ferrum 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +28 -0
- data/lib/ferrum.rb +25 -0
- data/lib/ferrum/browser.rb +145 -0
- data/lib/ferrum/browser/api.rb +14 -0
- data/lib/ferrum/browser/api/cookie.rb +46 -0
- data/lib/ferrum/browser/api/header.rb +32 -0
- data/lib/ferrum/browser/api/intercept.rb +32 -0
- data/lib/ferrum/browser/api/screenshot.rb +78 -0
- data/lib/ferrum/browser/client.rb +69 -0
- data/lib/ferrum/browser/process.rb +239 -0
- data/lib/ferrum/browser/subscriber.rb +26 -0
- data/lib/ferrum/browser/web_socket.rb +72 -0
- data/lib/ferrum/cookie.rb +47 -0
- data/lib/ferrum/errors.rb +94 -0
- data/lib/ferrum/network/error.rb +25 -0
- data/lib/ferrum/network/request.rb +33 -0
- data/lib/ferrum/network/response.rb +44 -0
- data/lib/ferrum/node.rb +175 -0
- data/lib/ferrum/page.rb +373 -0
- data/lib/ferrum/page/dom.rb +62 -0
- data/lib/ferrum/page/frame.rb +122 -0
- data/lib/ferrum/page/input.json +1341 -0
- data/lib/ferrum/page/input.rb +189 -0
- data/lib/ferrum/page/net.rb +92 -0
- data/lib/ferrum/page/runtime.rb +194 -0
- data/lib/ferrum/targets.rb +127 -0
- data/lib/ferrum/version.rb +5 -0
- metadata +245 -0
@@ -0,0 +1,239 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cliver"
|
4
|
+
require "net/http"
|
5
|
+
require "json"
|
6
|
+
require "addressable"
|
7
|
+
require "tmpdir"
|
8
|
+
|
9
|
+
module Ferrum
|
10
|
+
class Browser
|
11
|
+
class Process
|
12
|
+
KILL_TIMEOUT = 2
|
13
|
+
PROCESS_TIMEOUT = 1
|
14
|
+
BROWSER_PATH = ENV["BROWSER_PATH"]
|
15
|
+
BROWSER_HOST = "127.0.0.1"
|
16
|
+
BROWSER_PORT = "0"
|
17
|
+
DEFAULT_OPTIONS = {
|
18
|
+
"headless" => nil,
|
19
|
+
"disable-gpu" => nil,
|
20
|
+
"hide-scrollbars" => nil,
|
21
|
+
"mute-audio" => nil,
|
22
|
+
"enable-automation" => nil,
|
23
|
+
"disable-web-security" => nil,
|
24
|
+
"disable-session-crashed-bubble" => nil,
|
25
|
+
"disable-breakpad" => nil,
|
26
|
+
"disable-sync" => nil,
|
27
|
+
"no-first-run" => nil,
|
28
|
+
"use-mock-keychain" => nil,
|
29
|
+
"keep-alive-for-test" => nil,
|
30
|
+
"disable-popup-blocking" => nil,
|
31
|
+
"disable-extensions" => nil,
|
32
|
+
"disable-hang-monitor" => nil,
|
33
|
+
"disable-features" => "site-per-process,TranslateUI",
|
34
|
+
"disable-translate" => nil,
|
35
|
+
"disable-background-networking" => nil,
|
36
|
+
"enable-features" => "NetworkService,NetworkServiceInProcess",
|
37
|
+
"disable-background-timer-throttling" => nil,
|
38
|
+
"disable-backgrounding-occluded-windows" => nil,
|
39
|
+
"disable-client-side-phishing-detection" => nil,
|
40
|
+
"disable-default-apps" => nil,
|
41
|
+
"disable-dev-shm-usage" => nil,
|
42
|
+
"disable-ipc-flooding-protection" => nil,
|
43
|
+
"disable-prompt-on-repost" => nil,
|
44
|
+
"disable-renderer-backgrounding" => nil,
|
45
|
+
"force-color-profile" => "srgb",
|
46
|
+
"metrics-recording-only" => nil,
|
47
|
+
"safebrowsing-disable-auto-update" => nil,
|
48
|
+
"password-store" => "basic",
|
49
|
+
# Note: --no-sandbox is not needed if you properly setup a user in the container.
|
50
|
+
# https://github.com/ebidel/lighthouse-ci/blob/master/builder/Dockerfile#L35-L40
|
51
|
+
# "no-sandbox" => nil,
|
52
|
+
}.freeze
|
53
|
+
|
54
|
+
NOT_FOUND = "Could not find an executable for chrome. Try to make it " \
|
55
|
+
"available on the PATH or set environment varible for " \
|
56
|
+
"example BROWSER_PATH=\"/Applications/Chromium.app/Contents/MacOS/Chromium\""
|
57
|
+
|
58
|
+
|
59
|
+
attr_reader :host, :port, :ws_url, :pid, :path, :options, :cmd
|
60
|
+
|
61
|
+
def self.start(*args)
|
62
|
+
new(*args).tap(&:start)
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.process_killer(pid)
|
66
|
+
proc do
|
67
|
+
begin
|
68
|
+
if Ferrum.windows?
|
69
|
+
::Process.kill("KILL", pid)
|
70
|
+
else
|
71
|
+
::Process.kill("USR1", pid)
|
72
|
+
start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
73
|
+
while ::Process.wait(pid, ::Process::WNOHANG).nil?
|
74
|
+
sleep 0.05
|
75
|
+
next unless (::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - start) > KILL_TIMEOUT
|
76
|
+
::Process.kill("KILL", pid)
|
77
|
+
::Process.wait(pid)
|
78
|
+
break
|
79
|
+
end
|
80
|
+
end
|
81
|
+
rescue Errno::ESRCH, Errno::ECHILD
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.detect_browser_path
|
87
|
+
if RUBY_PLATFORM.include?("darwin")
|
88
|
+
[
|
89
|
+
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
90
|
+
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
91
|
+
].find { |path| File.exist?(path) }
|
92
|
+
else
|
93
|
+
%w[chromium google-chrome-unstable google-chrome-beta google-chrome chrome chromium-browser google-chrome-stable].reduce(nil) do |path, exe|
|
94
|
+
path = Cliver.detect(exe)
|
95
|
+
break path if path
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def initialize(options)
|
101
|
+
@options = {}
|
102
|
+
|
103
|
+
@path = options[:browser_path] || BROWSER_PATH || self.class.detect_browser_path
|
104
|
+
|
105
|
+
if options[:url]
|
106
|
+
url = URI.join(options[:url].to_s, "/json/version")
|
107
|
+
response = JSON.parse(::Net::HTTP.get(url))
|
108
|
+
set_ws_url(response["webSocketDebuggerUrl"])
|
109
|
+
return
|
110
|
+
end
|
111
|
+
|
112
|
+
# Doesn't work on MacOS, so we need to set it by CDP as well
|
113
|
+
@options.merge!("window-size" => options[:window_size].join(","))
|
114
|
+
|
115
|
+
port = options.fetch(:port, BROWSER_PORT)
|
116
|
+
@options.merge!("remote-debugging-port" => port)
|
117
|
+
|
118
|
+
host = options.fetch(:host, BROWSER_HOST)
|
119
|
+
@options.merge!("remote-debugging-address" => host)
|
120
|
+
|
121
|
+
@options.merge!("user-data-dir" => Dir.mktmpdir)
|
122
|
+
|
123
|
+
@options = DEFAULT_OPTIONS.merge(@options)
|
124
|
+
|
125
|
+
unless options.fetch(:headless, true)
|
126
|
+
@options.delete("headless")
|
127
|
+
@options.delete("disable-gpu")
|
128
|
+
end
|
129
|
+
|
130
|
+
@process_timeout = options.fetch(:process_timeout, PROCESS_TIMEOUT)
|
131
|
+
|
132
|
+
@options.merge!(options.fetch(:browser_options, {}))
|
133
|
+
|
134
|
+
@logger = options[:logger]
|
135
|
+
end
|
136
|
+
|
137
|
+
def start
|
138
|
+
# Don't do anything as browser is already running as external process.
|
139
|
+
return if ws_url
|
140
|
+
|
141
|
+
begin
|
142
|
+
read_io, write_io = IO.pipe
|
143
|
+
process_options = { in: File::NULL }
|
144
|
+
process_options[:pgroup] = true unless Ferrum.windows?
|
145
|
+
if Ferrum.mri?
|
146
|
+
process_options[:out] = process_options[:err] = write_io
|
147
|
+
end
|
148
|
+
|
149
|
+
raise Cliver::Dependency::NotFound.new(NOT_FOUND) unless @path
|
150
|
+
|
151
|
+
redirect_stdout(write_io) do
|
152
|
+
@cmd = [@path] + @options.map { |k, v| v.nil? ? "--#{k}" : "--#{k}=#{v}" }
|
153
|
+
@pid = ::Process.spawn(*@cmd, process_options)
|
154
|
+
ObjectSpace.define_finalizer(self, self.class.process_killer(@pid))
|
155
|
+
end
|
156
|
+
|
157
|
+
parse_ws_url(read_io, @process_timeout)
|
158
|
+
ensure
|
159
|
+
close_io(read_io, write_io)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def stop
|
164
|
+
return unless @pid
|
165
|
+
kill
|
166
|
+
ObjectSpace.undefine_finalizer(self)
|
167
|
+
end
|
168
|
+
|
169
|
+
def restart
|
170
|
+
stop
|
171
|
+
start
|
172
|
+
end
|
173
|
+
|
174
|
+
private
|
175
|
+
|
176
|
+
def redirect_stdout(write_io)
|
177
|
+
if Ferrum.mri?
|
178
|
+
yield
|
179
|
+
else
|
180
|
+
begin
|
181
|
+
prev = STDOUT.dup
|
182
|
+
$stdout = write_io
|
183
|
+
STDOUT.reopen(write_io)
|
184
|
+
yield
|
185
|
+
ensure
|
186
|
+
STDOUT.reopen(prev)
|
187
|
+
$stdout = STDOUT
|
188
|
+
prev.close
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def kill
|
194
|
+
self.class.process_killer(@pid).call
|
195
|
+
@pid = nil
|
196
|
+
end
|
197
|
+
|
198
|
+
def parse_ws_url(read_io, timeout = PROCESS_TIMEOUT)
|
199
|
+
output = ""
|
200
|
+
start = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
201
|
+
max_time = start + timeout
|
202
|
+
regexp = /DevTools listening on (ws:\/\/.*)/
|
203
|
+
while (now = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)) < max_time
|
204
|
+
begin
|
205
|
+
output += read_io.read_nonblock(512)
|
206
|
+
rescue IO::WaitReadable
|
207
|
+
IO.select([read_io], nil, nil, max_time - now)
|
208
|
+
else
|
209
|
+
if output.match(regexp)
|
210
|
+
set_ws_url(output.match(regexp)[1].strip)
|
211
|
+
break
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
unless ws_url
|
217
|
+
@logger.puts output if @logger
|
218
|
+
raise "Chrome process did not produce websocket url within #{timeout} seconds"
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def set_ws_url(url)
|
223
|
+
@ws_url = Addressable::URI.parse(url)
|
224
|
+
@host = @ws_url.host
|
225
|
+
@port = @ws_url.port
|
226
|
+
end
|
227
|
+
|
228
|
+
def close_io(*ios)
|
229
|
+
ios.each do |io|
|
230
|
+
begin
|
231
|
+
io.close unless io.closed?
|
232
|
+
rescue IOError
|
233
|
+
raise unless RUBY_ENGINE == 'jruby'
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "concurrent-ruby"
|
4
|
+
|
5
|
+
module Ferrum
|
6
|
+
class Browser
|
7
|
+
class Subscriber
|
8
|
+
include Concurrent::Async
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
super
|
12
|
+
@on = Hash.new { |h, k| h[k] = [] }
|
13
|
+
end
|
14
|
+
|
15
|
+
def on(event, &block)
|
16
|
+
@on[event] << block
|
17
|
+
true
|
18
|
+
end
|
19
|
+
|
20
|
+
def call(message)
|
21
|
+
method, params = message.values_at("method", "params")
|
22
|
+
@on[method].each { |b| b.call(params) }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
require "socket"
|
5
|
+
require "websocket/driver"
|
6
|
+
|
7
|
+
module Ferrum
|
8
|
+
class Browser
|
9
|
+
class WebSocket
|
10
|
+
WEBSOCKET_BUG_SLEEP = 0.01
|
11
|
+
|
12
|
+
attr_reader :url, :messages
|
13
|
+
|
14
|
+
def initialize(url, logger)
|
15
|
+
@url = url
|
16
|
+
@logger = logger
|
17
|
+
uri = URI.parse(@url)
|
18
|
+
@sock = TCPSocket.new(uri.host, uri.port)
|
19
|
+
@driver = ::WebSocket::Driver.client(self)
|
20
|
+
@messages = Queue.new
|
21
|
+
|
22
|
+
@driver.on(:open, &method(:on_open))
|
23
|
+
@driver.on(:message, &method(:on_message))
|
24
|
+
@driver.on(:close, &method(:on_close))
|
25
|
+
|
26
|
+
@thread = Thread.new do
|
27
|
+
begin
|
28
|
+
while data = @sock.readpartial(512)
|
29
|
+
@driver.parse(data)
|
30
|
+
end
|
31
|
+
rescue EOFError, Errno::ECONNRESET
|
32
|
+
@messages.close
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
@thread.priority = 1
|
37
|
+
|
38
|
+
@driver.start
|
39
|
+
end
|
40
|
+
|
41
|
+
def on_open(_event)
|
42
|
+
# https://github.com/faye/websocket-driver-ruby/issues/46
|
43
|
+
sleep(WEBSOCKET_BUG_SLEEP)
|
44
|
+
end
|
45
|
+
|
46
|
+
def on_message(event)
|
47
|
+
data = JSON.parse(event.data)
|
48
|
+
@messages.push(data)
|
49
|
+
@logger&.puts(" ◀ #{event.data}\n")
|
50
|
+
end
|
51
|
+
|
52
|
+
def on_close(_event)
|
53
|
+
@messages.close
|
54
|
+
@thread.kill
|
55
|
+
end
|
56
|
+
|
57
|
+
def send_message(data)
|
58
|
+
json = data.to_json
|
59
|
+
@driver.text(json)
|
60
|
+
@logger&.puts("\n\n▶ #{json}")
|
61
|
+
end
|
62
|
+
|
63
|
+
def write(data)
|
64
|
+
@sock.write(data)
|
65
|
+
end
|
66
|
+
|
67
|
+
def close
|
68
|
+
@driver.close
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ferrum
|
4
|
+
class Cookie
|
5
|
+
def initialize(attributes)
|
6
|
+
@attributes = attributes
|
7
|
+
end
|
8
|
+
|
9
|
+
def name
|
10
|
+
@attributes["name"]
|
11
|
+
end
|
12
|
+
|
13
|
+
def value
|
14
|
+
@attributes["value"]
|
15
|
+
end
|
16
|
+
|
17
|
+
def domain
|
18
|
+
@attributes["domain"]
|
19
|
+
end
|
20
|
+
|
21
|
+
def path
|
22
|
+
@attributes["path"]
|
23
|
+
end
|
24
|
+
|
25
|
+
def size
|
26
|
+
@attributes["size"]
|
27
|
+
end
|
28
|
+
|
29
|
+
def secure?
|
30
|
+
@attributes["secure"]
|
31
|
+
end
|
32
|
+
|
33
|
+
def httponly?
|
34
|
+
@attributes["httpOnly"]
|
35
|
+
end
|
36
|
+
|
37
|
+
def session?
|
38
|
+
@attributes["session"]
|
39
|
+
end
|
40
|
+
|
41
|
+
def expires
|
42
|
+
if @attributes["expires"] > 0
|
43
|
+
Time.at(@attributes["expires"])
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Ferrum
|
4
|
+
class NotImplemented < StandardError; end
|
5
|
+
class ModalNotFound < StandardError; end
|
6
|
+
class Error < StandardError; end
|
7
|
+
class NoSuchWindowError < Error; end
|
8
|
+
class EmptyTargetsError < Error; end
|
9
|
+
class NoExecutionContext < Error; end
|
10
|
+
|
11
|
+
class ClientError < Error
|
12
|
+
attr_reader :response
|
13
|
+
|
14
|
+
def initialize(response)
|
15
|
+
@response = response
|
16
|
+
super(response["message"])
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class BrowserError < ClientError
|
21
|
+
def code
|
22
|
+
response["code"]
|
23
|
+
end
|
24
|
+
|
25
|
+
def data
|
26
|
+
response["data"]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class JavaScriptError < ClientError
|
31
|
+
attr_reader :class_name, :message
|
32
|
+
|
33
|
+
def initialize(response)
|
34
|
+
super
|
35
|
+
@class_name, @message = response.values_at("className", "description")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class StatusFailError < ClientError
|
40
|
+
def message
|
41
|
+
"Request to #{response["url"]} failed to reach server, check DNS and/or server status"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class FrameNotFound < ClientError
|
46
|
+
def name
|
47
|
+
response["args"].first
|
48
|
+
end
|
49
|
+
|
50
|
+
def message
|
51
|
+
"The frame \"#{name}\" was not found."
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class NodeError < ClientError
|
56
|
+
attr_reader :node
|
57
|
+
|
58
|
+
def initialize(node, response)
|
59
|
+
@node = node
|
60
|
+
super(response)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class ObsoleteNode < NodeError
|
65
|
+
def message
|
66
|
+
"The element you are trying to interact with is either not part of the DOM, or is " \
|
67
|
+
"not currently visible on the page (perhaps display: none is set). " \
|
68
|
+
"It is possible the element has been replaced by another element and you meant to interact with " \
|
69
|
+
"the new element. If so you need to do a new find in order to get a reference to the " \
|
70
|
+
"new element."
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class TimeoutError < Error
|
75
|
+
def message
|
76
|
+
"Timed out waiting for response. It's possible that this happened " \
|
77
|
+
"because something took a very long time (for example a page load " \
|
78
|
+
"was slow). If so, setting the :timeout option to a higher value might " \
|
79
|
+
"help."
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
class ScriptTimeoutError < Error
|
84
|
+
def message
|
85
|
+
"Timed out waiting for evaluated script to return a value"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
class DeadBrowser < Error
|
90
|
+
def initialize(message = "Browser is dead")
|
91
|
+
super
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|