ferrum 0.9 → 0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +110 -74
- data/lib/ferrum.rb +17 -8
- data/lib/ferrum/browser.rb +6 -5
- data/lib/ferrum/browser/client.rb +4 -0
- data/lib/ferrum/browser/command.rb +1 -1
- data/lib/ferrum/browser/process.rb +5 -3
- data/lib/ferrum/browser/subscriber.rb +4 -0
- data/lib/ferrum/context.rb +3 -3
- data/lib/ferrum/frame.rb +1 -0
- data/lib/ferrum/frame/dom.rb +30 -30
- data/lib/ferrum/frame/runtime.rb +54 -71
- data/lib/ferrum/network.rb +23 -6
- data/lib/ferrum/network/error.rb +8 -15
- data/lib/ferrum/node.rb +11 -0
- data/lib/ferrum/page.rb +23 -11
- data/lib/ferrum/page/frames.rb +5 -2
- data/lib/ferrum/page/screenshot.rb +62 -10
- data/lib/ferrum/rbga.rb +38 -0
- data/lib/ferrum/version.rb +1 -1
- metadata +6 -5
data/lib/ferrum.rb
CHANGED
@@ -10,14 +10,20 @@ module Ferrum
|
|
10
10
|
class NotImplementedError < Error; end
|
11
11
|
|
12
12
|
class StatusError < Error
|
13
|
+
def initialize(url, message = nil)
|
14
|
+
super(message || "Request to #{url} failed to reach server, check DNS and server status")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class PendingConnectionsError < StatusError
|
19
|
+
attr_reader :pendings
|
20
|
+
|
13
21
|
def initialize(url, pendings = [])
|
14
|
-
|
15
|
-
"Request to #{url} failed to reach server, check DNS and/or server status"
|
16
|
-
else
|
17
|
-
"Request to #{url} reached server, but there are still pending connections: #{pendings.join(', ')}"
|
18
|
-
end
|
22
|
+
@pendings = pendings
|
19
23
|
|
20
|
-
|
24
|
+
message = "Request to #{url} reached server, but there are still pending connections: #{pendings.join(', ')}"
|
25
|
+
|
26
|
+
super(url, message)
|
21
27
|
end
|
22
28
|
end
|
23
29
|
|
@@ -37,8 +43,11 @@ module Ferrum
|
|
37
43
|
end
|
38
44
|
|
39
45
|
class ProcessTimeoutError < Error
|
40
|
-
|
41
|
-
|
46
|
+
attr_reader :output
|
47
|
+
|
48
|
+
def initialize(timeout, output)
|
49
|
+
@output = output
|
50
|
+
super("Browser did not produce websocket url within #{timeout} seconds, try to increase `:process_timeout`. See https://github.com/rubycdp/ferrum#customization")
|
42
51
|
end
|
43
52
|
end
|
44
53
|
|
data/lib/ferrum/browser.rb
CHANGED
@@ -17,19 +17,19 @@ module Ferrum
|
|
17
17
|
extend Forwardable
|
18
18
|
delegate %i[default_context] => :contexts
|
19
19
|
delegate %i[targets create_target create_page page pages windows] => :default_context
|
20
|
-
delegate %i[
|
20
|
+
delegate %i[go_to back forward refresh reload stop wait_for_reload
|
21
21
|
at_css at_xpath css xpath current_url current_title url title
|
22
22
|
body doctype set_content
|
23
23
|
headers cookies network
|
24
24
|
mouse keyboard
|
25
|
-
screenshot pdf viewport_size
|
25
|
+
screenshot pdf mhtml viewport_size
|
26
26
|
frames frame_by main_frame
|
27
|
-
evaluate evaluate_on evaluate_async execute
|
27
|
+
evaluate evaluate_on evaluate_async execute evaluate_func
|
28
28
|
add_script_tag add_style_tag bypass_csp
|
29
|
-
on] => :page
|
29
|
+
on goto] => :page
|
30
30
|
delegate %i[default_user_agent] => :process
|
31
31
|
|
32
|
-
attr_reader :client, :process, :contexts, :logger, :js_errors,
|
32
|
+
attr_reader :client, :process, :contexts, :logger, :js_errors, :pending_connection_errors,
|
33
33
|
:slowmo, :base_url, :options, :window_size, :ws_max_receive_size
|
34
34
|
attr_writer :timeout
|
35
35
|
|
@@ -44,6 +44,7 @@ module Ferrum
|
|
44
44
|
@logger, @timeout, @ws_max_receive_size =
|
45
45
|
@options.values_at(:logger, :timeout, :ws_max_receive_size)
|
46
46
|
@js_errors = @options.fetch(:js_errors, false)
|
47
|
+
@pending_connection_errors = @options.fetch(:pending_connection_errors, true)
|
47
48
|
@slowmo = @options[:slowmo].to_f
|
48
49
|
|
49
50
|
if @options.key?(:base_url)
|
@@ -4,7 +4,7 @@ module Ferrum
|
|
4
4
|
class Browser
|
5
5
|
class Command
|
6
6
|
NOT_FOUND = "Could not find an executable for the browser. Try to make " \
|
7
|
-
"it available on the PATH or set environment
|
7
|
+
"it available on the PATH or set environment variable for " \
|
8
8
|
"example BROWSER_PATH=\"/usr/bin/chrome\"".freeze
|
9
9
|
|
10
10
|
# Currently only these browsers support CDP:
|
@@ -16,7 +16,7 @@ module Ferrum
|
|
16
16
|
class Process
|
17
17
|
KILL_TIMEOUT = 2
|
18
18
|
WAIT_KILLED = 0.05
|
19
|
-
PROCESS_TIMEOUT = ENV.fetch("FERRUM_PROCESS_TIMEOUT",
|
19
|
+
PROCESS_TIMEOUT = ENV.fetch("FERRUM_PROCESS_TIMEOUT", 10).to_i
|
20
20
|
|
21
21
|
attr_reader :host, :port, :ws_url, :pid, :command,
|
22
22
|
:default_user_agent, :browser_version, :protocol_version,
|
@@ -64,11 +64,13 @@ module Ferrum
|
|
64
64
|
return
|
65
65
|
end
|
66
66
|
|
67
|
+
@pid = @xvfb = @user_data_dir = nil
|
67
68
|
@logger = options[:logger]
|
68
69
|
@process_timeout = options.fetch(:process_timeout, PROCESS_TIMEOUT)
|
69
70
|
|
70
|
-
tmpdir = Dir.mktmpdir
|
71
|
+
tmpdir = Dir.mktmpdir("ferrum_user_data_dir_")
|
71
72
|
ObjectSpace.define_finalizer(self, self.class.directory_remover(tmpdir))
|
73
|
+
@user_data_dir = tmpdir
|
72
74
|
@command = Command.build(options, tmpdir)
|
73
75
|
end
|
74
76
|
|
@@ -144,7 +146,7 @@ module Ferrum
|
|
144
146
|
|
145
147
|
unless ws_url
|
146
148
|
@logger.puts(output) if @logger
|
147
|
-
raise ProcessTimeoutError.new(timeout)
|
149
|
+
raise ProcessTimeoutError.new(timeout, output)
|
148
150
|
end
|
149
151
|
end
|
150
152
|
|
data/lib/ferrum/context.rb
CHANGED
@@ -42,9 +42,9 @@ module Ferrum
|
|
42
42
|
end
|
43
43
|
|
44
44
|
def create_target
|
45
|
-
|
46
|
-
|
47
|
-
|
45
|
+
@browser.command("Target.createTarget",
|
46
|
+
browserContextId: @id,
|
47
|
+
url: "about:blank")
|
48
48
|
target = @pendings.take(@browser.timeout)
|
49
49
|
raise NoSuchTargetError unless target.is_a?(Target)
|
50
50
|
@targets[target.id] = target
|
data/lib/ferrum/frame.rb
CHANGED
data/lib/ferrum/frame/dom.rb
CHANGED
@@ -37,54 +37,54 @@ module Ferrum
|
|
37
37
|
end
|
38
38
|
|
39
39
|
def xpath(selector, within: nil)
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
expr = <<~JS
|
41
|
+
function(selector, within) {
|
42
|
+
let results = [];
|
43
|
+
within ||= document
|
44
44
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
45
|
+
let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
46
|
+
for (let i = 0; i < xpath.snapshotLength; i++) {
|
47
|
+
results.push(xpath.snapshotItem(i));
|
48
|
+
}
|
49
49
|
|
50
|
-
|
50
|
+
return results;
|
51
|
+
}
|
51
52
|
JS
|
52
53
|
|
53
|
-
|
54
|
+
evaluate_func(expr, selector, within)
|
54
55
|
end
|
55
56
|
|
56
57
|
def at_xpath(selector, within: nil)
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
expr = <<~JS
|
59
|
+
function(selector, within) {
|
60
|
+
within ||= document
|
61
|
+
let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
62
|
+
return xpath.snapshotItem(0);
|
63
|
+
}
|
63
64
|
JS
|
64
|
-
|
65
|
-
evaluate_async(code, @page.timeout, selector, within)
|
65
|
+
evaluate_func(expr, selector, within)
|
66
66
|
end
|
67
67
|
|
68
68
|
def css(selector, within: nil)
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
69
|
+
expr = <<~JS
|
70
|
+
function(selector, within) {
|
71
|
+
within ||= document
|
72
|
+
return Array.from(within.querySelectorAll(selector));
|
73
|
+
}
|
74
74
|
JS
|
75
75
|
|
76
|
-
|
76
|
+
evaluate_func(expr, selector, within)
|
77
77
|
end
|
78
78
|
|
79
79
|
def at_css(selector, within: nil)
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
80
|
+
expr = <<~JS
|
81
|
+
function(selector, within) {
|
82
|
+
within ||= document
|
83
|
+
return within.querySelector(selector);
|
84
|
+
}
|
85
85
|
JS
|
86
86
|
|
87
|
-
|
87
|
+
evaluate_func(expr, selector, within)
|
88
88
|
end
|
89
89
|
end
|
90
90
|
end
|
data/lib/ferrum/frame/runtime.rb
CHANGED
@@ -3,36 +3,19 @@
|
|
3
3
|
require "singleton"
|
4
4
|
|
5
5
|
module Ferrum
|
6
|
+
class CyclicObject
|
7
|
+
include Singleton
|
8
|
+
|
9
|
+
def inspect
|
10
|
+
%(#<#{self.class} JavaScript object that cannot be represented in Ruby>)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
6
14
|
class Frame
|
7
15
|
module Runtime
|
8
16
|
INTERMITTENT_ATTEMPTS = ENV.fetch("FERRUM_INTERMITTENT_ATTEMPTS", 6).to_i
|
9
17
|
INTERMITTENT_SLEEP = ENV.fetch("FERRUM_INTERMITTENT_SLEEP", 0.1).to_f
|
10
18
|
|
11
|
-
EXECUTE_OPTIONS = {
|
12
|
-
returnByValue: true,
|
13
|
-
functionDeclaration: %(function() { %s })
|
14
|
-
}.freeze
|
15
|
-
DEFAULT_OPTIONS = {
|
16
|
-
functionDeclaration: %(function() { return %s })
|
17
|
-
}.freeze
|
18
|
-
EVALUATE_ASYNC_OPTIONS = {
|
19
|
-
awaitPromise: true,
|
20
|
-
functionDeclaration: %(
|
21
|
-
function() {
|
22
|
-
return new Promise((__resolve, __reject) => {
|
23
|
-
try {
|
24
|
-
arguments[arguments.length] = r => __resolve(r);
|
25
|
-
arguments.length = arguments.length + 1;
|
26
|
-
setTimeout(() => __reject(new Error("timed out promise")), %s);
|
27
|
-
%s
|
28
|
-
} catch(error) {
|
29
|
-
__reject(error);
|
30
|
-
}
|
31
|
-
});
|
32
|
-
}
|
33
|
-
)
|
34
|
-
}.freeze
|
35
|
-
|
36
19
|
SCRIPT_SRC_TAG = <<~JS
|
37
20
|
const script = document.createElement("script");
|
38
21
|
script.src = arguments[0];
|
@@ -63,37 +46,45 @@ module Ferrum
|
|
63
46
|
JS
|
64
47
|
|
65
48
|
def evaluate(expression, *args)
|
66
|
-
|
49
|
+
expression = "function() { return %s }" % expression
|
50
|
+
call(expression: expression, arguments: args)
|
67
51
|
end
|
68
52
|
|
69
|
-
def evaluate_async(expression,
|
70
|
-
|
53
|
+
def evaluate_async(expression, wait, *args)
|
54
|
+
template = <<~JS
|
55
|
+
function() {
|
56
|
+
return new Promise((__f, __r) => {
|
57
|
+
try {
|
58
|
+
arguments[arguments.length] = r => __f(r);
|
59
|
+
arguments.length = arguments.length + 1;
|
60
|
+
setTimeout(() => __r(new Error("timed out promise")), %s);
|
61
|
+
%s
|
62
|
+
} catch(error) {
|
63
|
+
__r(error);
|
64
|
+
}
|
65
|
+
});
|
66
|
+
}
|
67
|
+
JS
|
68
|
+
|
69
|
+
expression = template % [wait * 1000, expression]
|
70
|
+
call(expression: expression, arguments: args, awaitPromise: true)
|
71
71
|
end
|
72
72
|
|
73
73
|
def execute(expression, *args)
|
74
|
-
|
74
|
+
expression = "function() { %s }" % expression
|
75
|
+
call(expression: expression, arguments: args, handle: false, returnByValue: true)
|
75
76
|
true
|
76
77
|
end
|
77
78
|
|
78
|
-
def
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
Ferrum.with_attempts(errors: errors, max: attempts, wait: sleep) do
|
83
|
-
response = @page.command("DOM.resolveNode", nodeId: node.node_id)
|
84
|
-
object_id = response.dig("object", "objectId")
|
85
|
-
options = DEFAULT_OPTIONS.merge(objectId: object_id)
|
86
|
-
options[:functionDeclaration] = options[:functionDeclaration] % expression
|
87
|
-
options.merge!(returnByValue: by_value)
|
88
|
-
|
89
|
-
response = @page.command("Runtime.callFunctionOn",
|
90
|
-
wait: wait, slowmoable: true,
|
91
|
-
**options)
|
92
|
-
handle_error(response)
|
93
|
-
response = response["result"]
|
79
|
+
def evaluate_func(expression, *args, on: nil)
|
80
|
+
call(expression: expression, arguments: args, on: on)
|
81
|
+
end
|
94
82
|
|
95
|
-
|
96
|
-
|
83
|
+
def evaluate_on(node:, expression:, by_value: true, wait: 0)
|
84
|
+
options = { handle: true }
|
85
|
+
expression = "function() { return %s }" % expression
|
86
|
+
options = { handle: false, returnByValue: true } if by_value
|
87
|
+
call(expression: expression, on: node, wait: wait, **options)
|
97
88
|
end
|
98
89
|
|
99
90
|
def add_script_tag(url: nil, path: nil, content: nil, type: "text/javascript")
|
@@ -126,27 +117,29 @@ module Ferrum
|
|
126
117
|
|
127
118
|
private
|
128
119
|
|
129
|
-
def call(
|
120
|
+
def call(expression:, arguments: [], on: nil, wait: 0, handle: true, **options)
|
130
121
|
errors = [NodeNotFoundError, NoExecutionContextError]
|
131
122
|
attempts, sleep = INTERMITTENT_ATTEMPTS, INTERMITTENT_SLEEP
|
132
123
|
|
133
124
|
Ferrum.with_attempts(errors: errors, max: attempts, wait: sleep) do
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
params = params.merge(executionContextId: execution_id)
|
125
|
+
if on
|
126
|
+
response = @page.command("DOM.resolveNode", nodeId: on.node_id)
|
127
|
+
object_id = response.dig("object", "objectId")
|
128
|
+
options.merge!(objectId: object_id)
|
129
|
+
else
|
130
|
+
options.merge!(executionContextId: execution_id)
|
141
131
|
end
|
142
132
|
|
133
|
+
options.merge!(functionDeclaration: expression,
|
134
|
+
arguments: prepare_args(arguments))
|
135
|
+
|
143
136
|
response = @page.command("Runtime.callFunctionOn",
|
144
|
-
slowmoable: true,
|
145
|
-
**
|
137
|
+
wait: wait, slowmoable: true,
|
138
|
+
**options)
|
146
139
|
handle_error(response)
|
147
140
|
response = response["result"]
|
148
141
|
|
149
|
-
handle ? handle_response(response) : response
|
142
|
+
handle ? handle_response(response) : response.dig("value")
|
150
143
|
end
|
151
144
|
end
|
152
145
|
|
@@ -239,15 +232,13 @@ module Ferrum
|
|
239
232
|
return false;
|
240
233
|
}
|
241
234
|
|
242
|
-
|
243
|
-
function detectCycle(obj) {
|
235
|
+
function detectCycle(obj, seen) {
|
244
236
|
if (typeof obj === "object") {
|
245
237
|
if (seen.indexOf(obj) !== -1) {
|
246
238
|
return true;
|
247
239
|
}
|
248
|
-
seen.push(obj);
|
249
240
|
for (let key in obj) {
|
250
|
-
if (obj.hasOwnProperty(key) && detectCycle(obj[key])) {
|
241
|
+
if (obj.hasOwnProperty(key) && detectCycle(obj[key], seen.concat([obj]))) {
|
251
242
|
return true;
|
252
243
|
}
|
253
244
|
}
|
@@ -256,7 +247,7 @@ module Ferrum
|
|
256
247
|
return false;
|
257
248
|
}
|
258
249
|
|
259
|
-
return detectCycle(this);
|
250
|
+
return detectCycle(this, []);
|
260
251
|
}
|
261
252
|
JS
|
262
253
|
)
|
@@ -267,12 +258,4 @@ module Ferrum
|
|
267
258
|
end
|
268
259
|
end
|
269
260
|
end
|
270
|
-
|
271
|
-
class CyclicObject
|
272
|
-
include Singleton
|
273
|
-
|
274
|
-
def inspect
|
275
|
-
%(#<#{self.class} JavaScript object that cannot be represented in Ruby>)
|
276
|
-
end
|
277
|
-
end
|
278
261
|
end
|
data/lib/ferrum/network.rb
CHANGED
@@ -83,19 +83,21 @@ module Ferrum
|
|
83
83
|
@page.command("Fetch.enable", handleAuthRequests: true, patterns: [pattern])
|
84
84
|
end
|
85
85
|
|
86
|
-
def authorize(user:, password:, type: :server)
|
86
|
+
def authorize(user:, password:, type: :server, &block)
|
87
87
|
unless AUTHORIZE_TYPE.include?(type)
|
88
88
|
raise ArgumentError, ":type should be in #{AUTHORIZE_TYPE}"
|
89
89
|
end
|
90
90
|
|
91
|
+
if !block_given? && !@page.subscribed?("Fetch.requestPaused")
|
92
|
+
raise ArgumentError, "Block is missing, call `authorize(...) { |r| r.continue } or subscribe to `on(:request)` events before calling it"
|
93
|
+
end
|
94
|
+
|
91
95
|
@authorized_ids ||= {}
|
92
96
|
@authorized_ids[type] ||= []
|
93
97
|
|
94
98
|
intercept
|
95
99
|
|
96
|
-
@page.on(:request)
|
97
|
-
request.continue
|
98
|
-
end
|
100
|
+
@page.on(:request, &block)
|
99
101
|
|
100
102
|
@page.on(:auth) do |request, index, total|
|
101
103
|
if request.auth_challenge?(type)
|
@@ -157,12 +159,27 @@ module Ferrum
|
|
157
159
|
end
|
158
160
|
end
|
159
161
|
|
162
|
+
@page.on("Network.loadingFailed") do |params|
|
163
|
+
exchange = select(params["requestId"]).last
|
164
|
+
exchange.error ||= Network::Error.new
|
165
|
+
|
166
|
+
exchange.error.id = params["requestId"]
|
167
|
+
exchange.error.type = params["type"]
|
168
|
+
exchange.error.error_text = params["errorText"]
|
169
|
+
exchange.error.monotonic_time = params["timestamp"]
|
170
|
+
exchange.error.canceled = params["canceled"]
|
171
|
+
end
|
172
|
+
|
160
173
|
@page.on("Log.entryAdded") do |params|
|
161
174
|
entry = params["entry"] || {}
|
162
175
|
if entry["source"] == "network" && entry["level"] == "error"
|
163
176
|
exchange = select(entry["networkRequestId"]).last
|
164
|
-
error
|
165
|
-
|
177
|
+
exchange.error ||= Network::Error.new
|
178
|
+
|
179
|
+
exchange.error.id = entry["networkRequestId"]
|
180
|
+
exchange.error.url = entry["url"]
|
181
|
+
exchange.error.description = entry["text"]
|
182
|
+
exchange.error.timestamp = entry["timestamp"]
|
166
183
|
end
|
167
184
|
end
|
168
185
|
end
|