ferrum 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,239 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "cliver"
4
+ require "net/http"
5
+ require "json"
6
+ require "addressable"
7
+ require "tmpdir"
8
+
9
+ module Ferrum
10
+ class Browser
11
+ class Process
12
+ KILL_TIMEOUT = 2
13
+ PROCESS_TIMEOUT = 1
14
+ BROWSER_PATH = ENV["BROWSER_PATH"]
15
+ BROWSER_HOST = "127.0.0.1"
16
+ BROWSER_PORT = "0"
17
+ DEFAULT_OPTIONS = {
18
+ "headless" => nil,
19
+ "disable-gpu" => nil,
20
+ "hide-scrollbars" => nil,
21
+ "mute-audio" => nil,
22
+ "enable-automation" => nil,
23
+ "disable-web-security" => nil,
24
+ "disable-session-crashed-bubble" => nil,
25
+ "disable-breakpad" => nil,
26
+ "disable-sync" => nil,
27
+ "no-first-run" => nil,
28
+ "use-mock-keychain" => nil,
29
+ "keep-alive-for-test" => nil,
30
+ "disable-popup-blocking" => nil,
31
+ "disable-extensions" => nil,
32
+ "disable-hang-monitor" => nil,
33
+ "disable-features" => "site-per-process,TranslateUI",
34
+ "disable-translate" => nil,
35
+ "disable-background-networking" => nil,
36
+ "enable-features" => "NetworkService,NetworkServiceInProcess",
37
+ "disable-background-timer-throttling" => nil,
38
+ "disable-backgrounding-occluded-windows" => nil,
39
+ "disable-client-side-phishing-detection" => nil,
40
+ "disable-default-apps" => nil,
41
+ "disable-dev-shm-usage" => nil,
42
+ "disable-ipc-flooding-protection" => nil,
43
+ "disable-prompt-on-repost" => nil,
44
+ "disable-renderer-backgrounding" => nil,
45
+ "force-color-profile" => "srgb",
46
+ "metrics-recording-only" => nil,
47
+ "safebrowsing-disable-auto-update" => nil,
48
+ "password-store" => "basic",
49
+ # Note: --no-sandbox is not needed if you properly setup a user in the container.
50
+ # https://github.com/ebidel/lighthouse-ci/blob/master/builder/Dockerfile#L35-L40
51
+ # "no-sandbox" => nil,
52
+ }.freeze
53
+
54
+ NOT_FOUND = "Could not find an executable for chrome. Try to make it " \
55
+ "available on the PATH or set environment varible for " \
56
+ "example BROWSER_PATH=\"/Applications/Chromium.app/Contents/MacOS/Chromium\""
57
+
58
+
59
+ attr_reader :host, :port, :ws_url, :pid, :path, :options, :cmd
60
+
61
+ def self.start(*args)
62
+ new(*args).tap(&:start)
63
+ end
64
+
65
+ def self.process_killer(pid)
66
+ proc do
67
+ begin
68
+ if Ferrum.windows?
69
+ ::Process.kill("KILL", pid)
70
+ else
71
+ ::Process.kill("USR1", pid)
72
+ start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
73
+ while ::Process.wait(pid, ::Process::WNOHANG).nil?
74
+ sleep 0.05
75
+ next unless (::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - start) > KILL_TIMEOUT
76
+ ::Process.kill("KILL", pid)
77
+ ::Process.wait(pid)
78
+ break
79
+ end
80
+ end
81
+ rescue Errno::ESRCH, Errno::ECHILD
82
+ end
83
+ end
84
+ end
85
+
86
+ def self.detect_browser_path
87
+ if RUBY_PLATFORM.include?("darwin")
88
+ [
89
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
90
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
91
+ ].find { |path| File.exist?(path) }
92
+ else
93
+ %w[chromium google-chrome-unstable google-chrome-beta google-chrome chrome chromium-browser google-chrome-stable].reduce(nil) do |path, exe|
94
+ path = Cliver.detect(exe)
95
+ break path if path
96
+ end
97
+ end
98
+ end
99
+
100
+ def initialize(options)
101
+ @options = {}
102
+
103
+ @path = options[:browser_path] || BROWSER_PATH || self.class.detect_browser_path
104
+
105
+ if options[:url]
106
+ url = URI.join(options[:url].to_s, "/json/version")
107
+ response = JSON.parse(::Net::HTTP.get(url))
108
+ set_ws_url(response["webSocketDebuggerUrl"])
109
+ return
110
+ end
111
+
112
+ # Doesn't work on MacOS, so we need to set it by CDP as well
113
+ @options.merge!("window-size" => options[:window_size].join(","))
114
+
115
+ port = options.fetch(:port, BROWSER_PORT)
116
+ @options.merge!("remote-debugging-port" => port)
117
+
118
+ host = options.fetch(:host, BROWSER_HOST)
119
+ @options.merge!("remote-debugging-address" => host)
120
+
121
+ @options.merge!("user-data-dir" => Dir.mktmpdir)
122
+
123
+ @options = DEFAULT_OPTIONS.merge(@options)
124
+
125
+ unless options.fetch(:headless, true)
126
+ @options.delete("headless")
127
+ @options.delete("disable-gpu")
128
+ end
129
+
130
+ @process_timeout = options.fetch(:process_timeout, PROCESS_TIMEOUT)
131
+
132
+ @options.merge!(options.fetch(:browser_options, {}))
133
+
134
+ @logger = options[:logger]
135
+ end
136
+
137
+ def start
138
+ # Don't do anything as browser is already running as external process.
139
+ return if ws_url
140
+
141
+ begin
142
+ read_io, write_io = IO.pipe
143
+ process_options = { in: File::NULL }
144
+ process_options[:pgroup] = true unless Ferrum.windows?
145
+ if Ferrum.mri?
146
+ process_options[:out] = process_options[:err] = write_io
147
+ end
148
+
149
+ raise Cliver::Dependency::NotFound.new(NOT_FOUND) unless @path
150
+
151
+ redirect_stdout(write_io) do
152
+ @cmd = [@path] + @options.map { |k, v| v.nil? ? "--#{k}" : "--#{k}=#{v}" }
153
+ @pid = ::Process.spawn(*@cmd, process_options)
154
+ ObjectSpace.define_finalizer(self, self.class.process_killer(@pid))
155
+ end
156
+
157
+ parse_ws_url(read_io, @process_timeout)
158
+ ensure
159
+ close_io(read_io, write_io)
160
+ end
161
+ end
162
+
163
+ def stop
164
+ return unless @pid
165
+ kill
166
+ ObjectSpace.undefine_finalizer(self)
167
+ end
168
+
169
+ def restart
170
+ stop
171
+ start
172
+ end
173
+
174
+ private
175
+
176
+ def redirect_stdout(write_io)
177
+ if Ferrum.mri?
178
+ yield
179
+ else
180
+ begin
181
+ prev = STDOUT.dup
182
+ $stdout = write_io
183
+ STDOUT.reopen(write_io)
184
+ yield
185
+ ensure
186
+ STDOUT.reopen(prev)
187
+ $stdout = STDOUT
188
+ prev.close
189
+ end
190
+ end
191
+ end
192
+
193
+ def kill
194
+ self.class.process_killer(@pid).call
195
+ @pid = nil
196
+ end
197
+
198
+ def parse_ws_url(read_io, timeout = PROCESS_TIMEOUT)
199
+ output = ""
200
+ start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
201
+ max_time = start + timeout
202
+ regexp = /DevTools listening on (ws:\/\/.*)/
203
+ while (now = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)) < max_time
204
+ begin
205
+ output += read_io.read_nonblock(512)
206
+ rescue IO::WaitReadable
207
+ IO.select([read_io], nil, nil, max_time - now)
208
+ else
209
+ if output.match(regexp)
210
+ set_ws_url(output.match(regexp)[1].strip)
211
+ break
212
+ end
213
+ end
214
+ end
215
+
216
+ unless ws_url
217
+ @logger.puts output if @logger
218
+ raise "Chrome process did not produce websocket url within #{timeout} seconds"
219
+ end
220
+ end
221
+
222
+ def set_ws_url(url)
223
+ @ws_url = Addressable::URI.parse(url)
224
+ @host = @ws_url.host
225
+ @port = @ws_url.port
226
+ end
227
+
228
+ def close_io(*ios)
229
+ ios.each do |io|
230
+ begin
231
+ io.close unless io.closed?
232
+ rescue IOError
233
+ raise unless RUBY_ENGINE == 'jruby'
234
+ end
235
+ end
236
+ end
237
+ end
238
+ end
239
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "concurrent-ruby"
4
+
5
+ module Ferrum
6
+ class Browser
7
+ class Subscriber
8
+ include Concurrent::Async
9
+
10
+ def initialize
11
+ super
12
+ @on = Hash.new { |h, k| h[k] = [] }
13
+ end
14
+
15
+ def on(event, &block)
16
+ @on[event] << block
17
+ true
18
+ end
19
+
20
+ def call(message)
21
+ method, params = message.values_at("method", "params")
22
+ @on[method].each { |b| b.call(params) }
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "socket"
5
+ require "websocket/driver"
6
+
7
+ module Ferrum
8
+ class Browser
9
+ class WebSocket
10
+ WEBSOCKET_BUG_SLEEP = 0.01
11
+
12
+ attr_reader :url, :messages
13
+
14
+ def initialize(url, logger)
15
+ @url = url
16
+ @logger = logger
17
+ uri = URI.parse(@url)
18
+ @sock = TCPSocket.new(uri.host, uri.port)
19
+ @driver = ::WebSocket::Driver.client(self)
20
+ @messages = Queue.new
21
+
22
+ @driver.on(:open, &method(:on_open))
23
+ @driver.on(:message, &method(:on_message))
24
+ @driver.on(:close, &method(:on_close))
25
+
26
+ @thread = Thread.new do
27
+ begin
28
+ while data = @sock.readpartial(512)
29
+ @driver.parse(data)
30
+ end
31
+ rescue EOFError, Errno::ECONNRESET
32
+ @messages.close
33
+ end
34
+ end
35
+
36
+ @thread.priority = 1
37
+
38
+ @driver.start
39
+ end
40
+
41
+ def on_open(_event)
42
+ # https://github.com/faye/websocket-driver-ruby/issues/46
43
+ sleep(WEBSOCKET_BUG_SLEEP)
44
+ end
45
+
46
+ def on_message(event)
47
+ data = JSON.parse(event.data)
48
+ @messages.push(data)
49
+ @logger&.puts(" ◀ #{event.data}\n")
50
+ end
51
+
52
+ def on_close(_event)
53
+ @messages.close
54
+ @thread.kill
55
+ end
56
+
57
+ def send_message(data)
58
+ json = data.to_json
59
+ @driver.text(json)
60
+ @logger&.puts("\n\n▶ #{json}")
61
+ end
62
+
63
+ def write(data)
64
+ @sock.write(data)
65
+ end
66
+
67
+ def close
68
+ @driver.close
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum
4
+ class Cookie
5
+ def initialize(attributes)
6
+ @attributes = attributes
7
+ end
8
+
9
+ def name
10
+ @attributes["name"]
11
+ end
12
+
13
+ def value
14
+ @attributes["value"]
15
+ end
16
+
17
+ def domain
18
+ @attributes["domain"]
19
+ end
20
+
21
+ def path
22
+ @attributes["path"]
23
+ end
24
+
25
+ def size
26
+ @attributes["size"]
27
+ end
28
+
29
+ def secure?
30
+ @attributes["secure"]
31
+ end
32
+
33
+ def httponly?
34
+ @attributes["httpOnly"]
35
+ end
36
+
37
+ def session?
38
+ @attributes["session"]
39
+ end
40
+
41
+ def expires
42
+ if @attributes["expires"] > 0
43
+ Time.at(@attributes["expires"])
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum
4
+ class NotImplemented < StandardError; end
5
+ class ModalNotFound < StandardError; end
6
+ class Error < StandardError; end
7
+ class NoSuchWindowError < Error; end
8
+ class EmptyTargetsError < Error; end
9
+ class NoExecutionContext < Error; end
10
+
11
+ class ClientError < Error
12
+ attr_reader :response
13
+
14
+ def initialize(response)
15
+ @response = response
16
+ super(response["message"])
17
+ end
18
+ end
19
+
20
+ class BrowserError < ClientError
21
+ def code
22
+ response["code"]
23
+ end
24
+
25
+ def data
26
+ response["data"]
27
+ end
28
+ end
29
+
30
+ class JavaScriptError < ClientError
31
+ attr_reader :class_name, :message
32
+
33
+ def initialize(response)
34
+ super
35
+ @class_name, @message = response.values_at("className", "description")
36
+ end
37
+ end
38
+
39
+ class StatusFailError < ClientError
40
+ def message
41
+ "Request to #{response["url"]} failed to reach server, check DNS and/or server status"
42
+ end
43
+ end
44
+
45
+ class FrameNotFound < ClientError
46
+ def name
47
+ response["args"].first
48
+ end
49
+
50
+ def message
51
+ "The frame \"#{name}\" was not found."
52
+ end
53
+ end
54
+
55
+ class NodeError < ClientError
56
+ attr_reader :node
57
+
58
+ def initialize(node, response)
59
+ @node = node
60
+ super(response)
61
+ end
62
+ end
63
+
64
+ class ObsoleteNode < NodeError
65
+ def message
66
+ "The element you are trying to interact with is either not part of the DOM, or is " \
67
+ "not currently visible on the page (perhaps display: none is set). " \
68
+ "It is possible the element has been replaced by another element and you meant to interact with " \
69
+ "the new element. If so you need to do a new find in order to get a reference to the " \
70
+ "new element."
71
+ end
72
+ end
73
+
74
+ class TimeoutError < Error
75
+ def message
76
+ "Timed out waiting for response. It's possible that this happened " \
77
+ "because something took a very long time (for example a page load " \
78
+ "was slow). If so, setting the :timeout option to a higher value might " \
79
+ "help."
80
+ end
81
+ end
82
+
83
+ class ScriptTimeoutError < Error
84
+ def message
85
+ "Timed out waiting for evaluated script to return a value"
86
+ end
87
+ end
88
+
89
+ class DeadBrowser < Error
90
+ def initialize(message = "Browser is dead")
91
+ super
92
+ end
93
+ end
94
+ end