ferrum 0.9 → 0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +110 -74
- data/lib/ferrum.rb +17 -8
- data/lib/ferrum/browser.rb +6 -5
- data/lib/ferrum/browser/client.rb +4 -0
- data/lib/ferrum/browser/command.rb +1 -1
- data/lib/ferrum/browser/process.rb +5 -3
- data/lib/ferrum/browser/subscriber.rb +4 -0
- data/lib/ferrum/context.rb +3 -3
- data/lib/ferrum/frame.rb +1 -0
- data/lib/ferrum/frame/dom.rb +30 -30
- data/lib/ferrum/frame/runtime.rb +54 -71
- data/lib/ferrum/network.rb +23 -6
- data/lib/ferrum/network/error.rb +8 -15
- data/lib/ferrum/node.rb +11 -0
- data/lib/ferrum/page.rb +23 -11
- data/lib/ferrum/page/frames.rb +5 -2
- data/lib/ferrum/page/screenshot.rb +62 -10
- data/lib/ferrum/rbga.rb +38 -0
- data/lib/ferrum/version.rb +1 -1
- metadata +6 -5
data/lib/ferrum.rb
CHANGED
@@ -10,14 +10,20 @@ module Ferrum
|
|
10
10
|
class NotImplementedError < Error; end
|
11
11
|
|
12
12
|
class StatusError < Error
|
13
|
+
def initialize(url, message = nil)
|
14
|
+
super(message || "Request to #{url} failed to reach server, check DNS and server status")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class PendingConnectionsError < StatusError
|
19
|
+
attr_reader :pendings
|
20
|
+
|
13
21
|
def initialize(url, pendings = [])
|
14
|
-
|
15
|
-
"Request to #{url} failed to reach server, check DNS and/or server status"
|
16
|
-
else
|
17
|
-
"Request to #{url} reached server, but there are still pending connections: #{pendings.join(', ')}"
|
18
|
-
end
|
22
|
+
@pendings = pendings
|
19
23
|
|
20
|
-
|
24
|
+
message = "Request to #{url} reached server, but there are still pending connections: #{pendings.join(', ')}"
|
25
|
+
|
26
|
+
super(url, message)
|
21
27
|
end
|
22
28
|
end
|
23
29
|
|
@@ -37,8 +43,11 @@ module Ferrum
|
|
37
43
|
end
|
38
44
|
|
39
45
|
class ProcessTimeoutError < Error
|
40
|
-
|
41
|
-
|
46
|
+
attr_reader :output
|
47
|
+
|
48
|
+
def initialize(timeout, output)
|
49
|
+
@output = output
|
50
|
+
super("Browser did not produce websocket url within #{timeout} seconds, try to increase `:process_timeout`. See https://github.com/rubycdp/ferrum#customization")
|
42
51
|
end
|
43
52
|
end
|
44
53
|
|
data/lib/ferrum/browser.rb
CHANGED
@@ -17,19 +17,19 @@ module Ferrum
|
|
17
17
|
extend Forwardable
|
18
18
|
delegate %i[default_context] => :contexts
|
19
19
|
delegate %i[targets create_target create_page page pages windows] => :default_context
|
20
|
-
delegate %i[
|
20
|
+
delegate %i[go_to back forward refresh reload stop wait_for_reload
|
21
21
|
at_css at_xpath css xpath current_url current_title url title
|
22
22
|
body doctype set_content
|
23
23
|
headers cookies network
|
24
24
|
mouse keyboard
|
25
|
-
screenshot pdf viewport_size
|
25
|
+
screenshot pdf mhtml viewport_size
|
26
26
|
frames frame_by main_frame
|
27
|
-
evaluate evaluate_on evaluate_async execute
|
27
|
+
evaluate evaluate_on evaluate_async execute evaluate_func
|
28
28
|
add_script_tag add_style_tag bypass_csp
|
29
|
-
on] => :page
|
29
|
+
on goto] => :page
|
30
30
|
delegate %i[default_user_agent] => :process
|
31
31
|
|
32
|
-
attr_reader :client, :process, :contexts, :logger, :js_errors,
|
32
|
+
attr_reader :client, :process, :contexts, :logger, :js_errors, :pending_connection_errors,
|
33
33
|
:slowmo, :base_url, :options, :window_size, :ws_max_receive_size
|
34
34
|
attr_writer :timeout
|
35
35
|
|
@@ -44,6 +44,7 @@ module Ferrum
|
|
44
44
|
@logger, @timeout, @ws_max_receive_size =
|
45
45
|
@options.values_at(:logger, :timeout, :ws_max_receive_size)
|
46
46
|
@js_errors = @options.fetch(:js_errors, false)
|
47
|
+
@pending_connection_errors = @options.fetch(:pending_connection_errors, true)
|
47
48
|
@slowmo = @options[:slowmo].to_f
|
48
49
|
|
49
50
|
if @options.key?(:base_url)
|
@@ -4,7 +4,7 @@ module Ferrum
|
|
4
4
|
class Browser
|
5
5
|
class Command
|
6
6
|
NOT_FOUND = "Could not find an executable for the browser. Try to make " \
|
7
|
-
"it available on the PATH or set environment
|
7
|
+
"it available on the PATH or set environment variable for " \
|
8
8
|
"example BROWSER_PATH=\"/usr/bin/chrome\"".freeze
|
9
9
|
|
10
10
|
# Currently only these browsers support CDP:
|
@@ -16,7 +16,7 @@ module Ferrum
|
|
16
16
|
class Process
|
17
17
|
KILL_TIMEOUT = 2
|
18
18
|
WAIT_KILLED = 0.05
|
19
|
-
PROCESS_TIMEOUT = ENV.fetch("FERRUM_PROCESS_TIMEOUT",
|
19
|
+
PROCESS_TIMEOUT = ENV.fetch("FERRUM_PROCESS_TIMEOUT", 10).to_i
|
20
20
|
|
21
21
|
attr_reader :host, :port, :ws_url, :pid, :command,
|
22
22
|
:default_user_agent, :browser_version, :protocol_version,
|
@@ -64,11 +64,13 @@ module Ferrum
|
|
64
64
|
return
|
65
65
|
end
|
66
66
|
|
67
|
+
@pid = @xvfb = @user_data_dir = nil
|
67
68
|
@logger = options[:logger]
|
68
69
|
@process_timeout = options.fetch(:process_timeout, PROCESS_TIMEOUT)
|
69
70
|
|
70
|
-
tmpdir = Dir.mktmpdir
|
71
|
+
tmpdir = Dir.mktmpdir("ferrum_user_data_dir_")
|
71
72
|
ObjectSpace.define_finalizer(self, self.class.directory_remover(tmpdir))
|
73
|
+
@user_data_dir = tmpdir
|
72
74
|
@command = Command.build(options, tmpdir)
|
73
75
|
end
|
74
76
|
|
@@ -144,7 +146,7 @@ module Ferrum
|
|
144
146
|
|
145
147
|
unless ws_url
|
146
148
|
@logger.puts(output) if @logger
|
147
|
-
raise ProcessTimeoutError.new(timeout)
|
149
|
+
raise ProcessTimeoutError.new(timeout, output)
|
148
150
|
end
|
149
151
|
end
|
150
152
|
|
data/lib/ferrum/context.rb
CHANGED
@@ -42,9 +42,9 @@ module Ferrum
|
|
42
42
|
end
|
43
43
|
|
44
44
|
def create_target
|
45
|
-
|
46
|
-
|
47
|
-
|
45
|
+
@browser.command("Target.createTarget",
|
46
|
+
browserContextId: @id,
|
47
|
+
url: "about:blank")
|
48
48
|
target = @pendings.take(@browser.timeout)
|
49
49
|
raise NoSuchTargetError unless target.is_a?(Target)
|
50
50
|
@targets[target.id] = target
|
data/lib/ferrum/frame.rb
CHANGED
data/lib/ferrum/frame/dom.rb
CHANGED
@@ -37,54 +37,54 @@ module Ferrum
|
|
37
37
|
end
|
38
38
|
|
39
39
|
def xpath(selector, within: nil)
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
expr = <<~JS
|
41
|
+
function(selector, within) {
|
42
|
+
let results = [];
|
43
|
+
within ||= document
|
44
44
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
45
|
+
let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
46
|
+
for (let i = 0; i < xpath.snapshotLength; i++) {
|
47
|
+
results.push(xpath.snapshotItem(i));
|
48
|
+
}
|
49
49
|
|
50
|
-
|
50
|
+
return results;
|
51
|
+
}
|
51
52
|
JS
|
52
53
|
|
53
|
-
|
54
|
+
evaluate_func(expr, selector, within)
|
54
55
|
end
|
55
56
|
|
56
57
|
def at_xpath(selector, within: nil)
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
58
|
+
expr = <<~JS
|
59
|
+
function(selector, within) {
|
60
|
+
within ||= document
|
61
|
+
let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
62
|
+
return xpath.snapshotItem(0);
|
63
|
+
}
|
63
64
|
JS
|
64
|
-
|
65
|
-
evaluate_async(code, @page.timeout, selector, within)
|
65
|
+
evaluate_func(expr, selector, within)
|
66
66
|
end
|
67
67
|
|
68
68
|
def css(selector, within: nil)
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
69
|
+
expr = <<~JS
|
70
|
+
function(selector, within) {
|
71
|
+
within ||= document
|
72
|
+
return Array.from(within.querySelectorAll(selector));
|
73
|
+
}
|
74
74
|
JS
|
75
75
|
|
76
|
-
|
76
|
+
evaluate_func(expr, selector, within)
|
77
77
|
end
|
78
78
|
|
79
79
|
def at_css(selector, within: nil)
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
80
|
+
expr = <<~JS
|
81
|
+
function(selector, within) {
|
82
|
+
within ||= document
|
83
|
+
return within.querySelector(selector);
|
84
|
+
}
|
85
85
|
JS
|
86
86
|
|
87
|
-
|
87
|
+
evaluate_func(expr, selector, within)
|
88
88
|
end
|
89
89
|
end
|
90
90
|
end
|
data/lib/ferrum/frame/runtime.rb
CHANGED
@@ -3,36 +3,19 @@
|
|
3
3
|
require "singleton"
|
4
4
|
|
5
5
|
module Ferrum
|
6
|
+
class CyclicObject
|
7
|
+
include Singleton
|
8
|
+
|
9
|
+
def inspect
|
10
|
+
%(#<#{self.class} JavaScript object that cannot be represented in Ruby>)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
6
14
|
class Frame
|
7
15
|
module Runtime
|
8
16
|
INTERMITTENT_ATTEMPTS = ENV.fetch("FERRUM_INTERMITTENT_ATTEMPTS", 6).to_i
|
9
17
|
INTERMITTENT_SLEEP = ENV.fetch("FERRUM_INTERMITTENT_SLEEP", 0.1).to_f
|
10
18
|
|
11
|
-
EXECUTE_OPTIONS = {
|
12
|
-
returnByValue: true,
|
13
|
-
functionDeclaration: %(function() { %s })
|
14
|
-
}.freeze
|
15
|
-
DEFAULT_OPTIONS = {
|
16
|
-
functionDeclaration: %(function() { return %s })
|
17
|
-
}.freeze
|
18
|
-
EVALUATE_ASYNC_OPTIONS = {
|
19
|
-
awaitPromise: true,
|
20
|
-
functionDeclaration: %(
|
21
|
-
function() {
|
22
|
-
return new Promise((__resolve, __reject) => {
|
23
|
-
try {
|
24
|
-
arguments[arguments.length] = r => __resolve(r);
|
25
|
-
arguments.length = arguments.length + 1;
|
26
|
-
setTimeout(() => __reject(new Error("timed out promise")), %s);
|
27
|
-
%s
|
28
|
-
} catch(error) {
|
29
|
-
__reject(error);
|
30
|
-
}
|
31
|
-
});
|
32
|
-
}
|
33
|
-
)
|
34
|
-
}.freeze
|
35
|
-
|
36
19
|
SCRIPT_SRC_TAG = <<~JS
|
37
20
|
const script = document.createElement("script");
|
38
21
|
script.src = arguments[0];
|
@@ -63,37 +46,45 @@ module Ferrum
|
|
63
46
|
JS
|
64
47
|
|
65
48
|
def evaluate(expression, *args)
|
66
|
-
|
49
|
+
expression = "function() { return %s }" % expression
|
50
|
+
call(expression: expression, arguments: args)
|
67
51
|
end
|
68
52
|
|
69
|
-
def evaluate_async(expression,
|
70
|
-
|
53
|
+
def evaluate_async(expression, wait, *args)
|
54
|
+
template = <<~JS
|
55
|
+
function() {
|
56
|
+
return new Promise((__f, __r) => {
|
57
|
+
try {
|
58
|
+
arguments[arguments.length] = r => __f(r);
|
59
|
+
arguments.length = arguments.length + 1;
|
60
|
+
setTimeout(() => __r(new Error("timed out promise")), %s);
|
61
|
+
%s
|
62
|
+
} catch(error) {
|
63
|
+
__r(error);
|
64
|
+
}
|
65
|
+
});
|
66
|
+
}
|
67
|
+
JS
|
68
|
+
|
69
|
+
expression = template % [wait * 1000, expression]
|
70
|
+
call(expression: expression, arguments: args, awaitPromise: true)
|
71
71
|
end
|
72
72
|
|
73
73
|
def execute(expression, *args)
|
74
|
-
|
74
|
+
expression = "function() { %s }" % expression
|
75
|
+
call(expression: expression, arguments: args, handle: false, returnByValue: true)
|
75
76
|
true
|
76
77
|
end
|
77
78
|
|
78
|
-
def
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
Ferrum.with_attempts(errors: errors, max: attempts, wait: sleep) do
|
83
|
-
response = @page.command("DOM.resolveNode", nodeId: node.node_id)
|
84
|
-
object_id = response.dig("object", "objectId")
|
85
|
-
options = DEFAULT_OPTIONS.merge(objectId: object_id)
|
86
|
-
options[:functionDeclaration] = options[:functionDeclaration] % expression
|
87
|
-
options.merge!(returnByValue: by_value)
|
88
|
-
|
89
|
-
response = @page.command("Runtime.callFunctionOn",
|
90
|
-
wait: wait, slowmoable: true,
|
91
|
-
**options)
|
92
|
-
handle_error(response)
|
93
|
-
response = response["result"]
|
79
|
+
def evaluate_func(expression, *args, on: nil)
|
80
|
+
call(expression: expression, arguments: args, on: on)
|
81
|
+
end
|
94
82
|
|
95
|
-
|
96
|
-
|
83
|
+
def evaluate_on(node:, expression:, by_value: true, wait: 0)
|
84
|
+
options = { handle: true }
|
85
|
+
expression = "function() { return %s }" % expression
|
86
|
+
options = { handle: false, returnByValue: true } if by_value
|
87
|
+
call(expression: expression, on: node, wait: wait, **options)
|
97
88
|
end
|
98
89
|
|
99
90
|
def add_script_tag(url: nil, path: nil, content: nil, type: "text/javascript")
|
@@ -126,27 +117,29 @@ module Ferrum
|
|
126
117
|
|
127
118
|
private
|
128
119
|
|
129
|
-
def call(
|
120
|
+
def call(expression:, arguments: [], on: nil, wait: 0, handle: true, **options)
|
130
121
|
errors = [NodeNotFoundError, NoExecutionContextError]
|
131
122
|
attempts, sleep = INTERMITTENT_ATTEMPTS, INTERMITTENT_SLEEP
|
132
123
|
|
133
124
|
Ferrum.with_attempts(errors: errors, max: attempts, wait: sleep) do
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
params = params.merge(executionContextId: execution_id)
|
125
|
+
if on
|
126
|
+
response = @page.command("DOM.resolveNode", nodeId: on.node_id)
|
127
|
+
object_id = response.dig("object", "objectId")
|
128
|
+
options.merge!(objectId: object_id)
|
129
|
+
else
|
130
|
+
options.merge!(executionContextId: execution_id)
|
141
131
|
end
|
142
132
|
|
133
|
+
options.merge!(functionDeclaration: expression,
|
134
|
+
arguments: prepare_args(arguments))
|
135
|
+
|
143
136
|
response = @page.command("Runtime.callFunctionOn",
|
144
|
-
slowmoable: true,
|
145
|
-
**
|
137
|
+
wait: wait, slowmoable: true,
|
138
|
+
**options)
|
146
139
|
handle_error(response)
|
147
140
|
response = response["result"]
|
148
141
|
|
149
|
-
handle ? handle_response(response) : response
|
142
|
+
handle ? handle_response(response) : response.dig("value")
|
150
143
|
end
|
151
144
|
end
|
152
145
|
|
@@ -239,15 +232,13 @@ module Ferrum
|
|
239
232
|
return false;
|
240
233
|
}
|
241
234
|
|
242
|
-
|
243
|
-
function detectCycle(obj) {
|
235
|
+
function detectCycle(obj, seen) {
|
244
236
|
if (typeof obj === "object") {
|
245
237
|
if (seen.indexOf(obj) !== -1) {
|
246
238
|
return true;
|
247
239
|
}
|
248
|
-
seen.push(obj);
|
249
240
|
for (let key in obj) {
|
250
|
-
if (obj.hasOwnProperty(key) && detectCycle(obj[key])) {
|
241
|
+
if (obj.hasOwnProperty(key) && detectCycle(obj[key], seen.concat([obj]))) {
|
251
242
|
return true;
|
252
243
|
}
|
253
244
|
}
|
@@ -256,7 +247,7 @@ module Ferrum
|
|
256
247
|
return false;
|
257
248
|
}
|
258
249
|
|
259
|
-
return detectCycle(this);
|
250
|
+
return detectCycle(this, []);
|
260
251
|
}
|
261
252
|
JS
|
262
253
|
)
|
@@ -267,12 +258,4 @@ module Ferrum
|
|
267
258
|
end
|
268
259
|
end
|
269
260
|
end
|
270
|
-
|
271
|
-
class CyclicObject
|
272
|
-
include Singleton
|
273
|
-
|
274
|
-
def inspect
|
275
|
-
%(#<#{self.class} JavaScript object that cannot be represented in Ruby>)
|
276
|
-
end
|
277
|
-
end
|
278
261
|
end
|
data/lib/ferrum/network.rb
CHANGED
@@ -83,19 +83,21 @@ module Ferrum
|
|
83
83
|
@page.command("Fetch.enable", handleAuthRequests: true, patterns: [pattern])
|
84
84
|
end
|
85
85
|
|
86
|
-
def authorize(user:, password:, type: :server)
|
86
|
+
def authorize(user:, password:, type: :server, &block)
|
87
87
|
unless AUTHORIZE_TYPE.include?(type)
|
88
88
|
raise ArgumentError, ":type should be in #{AUTHORIZE_TYPE}"
|
89
89
|
end
|
90
90
|
|
91
|
+
if !block_given? && !@page.subscribed?("Fetch.requestPaused")
|
92
|
+
raise ArgumentError, "Block is missing, call `authorize(...) { |r| r.continue } or subscribe to `on(:request)` events before calling it"
|
93
|
+
end
|
94
|
+
|
91
95
|
@authorized_ids ||= {}
|
92
96
|
@authorized_ids[type] ||= []
|
93
97
|
|
94
98
|
intercept
|
95
99
|
|
96
|
-
@page.on(:request)
|
97
|
-
request.continue
|
98
|
-
end
|
100
|
+
@page.on(:request, &block)
|
99
101
|
|
100
102
|
@page.on(:auth) do |request, index, total|
|
101
103
|
if request.auth_challenge?(type)
|
@@ -157,12 +159,27 @@ module Ferrum
|
|
157
159
|
end
|
158
160
|
end
|
159
161
|
|
162
|
+
@page.on("Network.loadingFailed") do |params|
|
163
|
+
exchange = select(params["requestId"]).last
|
164
|
+
exchange.error ||= Network::Error.new
|
165
|
+
|
166
|
+
exchange.error.id = params["requestId"]
|
167
|
+
exchange.error.type = params["type"]
|
168
|
+
exchange.error.error_text = params["errorText"]
|
169
|
+
exchange.error.monotonic_time = params["timestamp"]
|
170
|
+
exchange.error.canceled = params["canceled"]
|
171
|
+
end
|
172
|
+
|
160
173
|
@page.on("Log.entryAdded") do |params|
|
161
174
|
entry = params["entry"] || {}
|
162
175
|
if entry["source"] == "network" && entry["level"] == "error"
|
163
176
|
exchange = select(entry["networkRequestId"]).last
|
164
|
-
error
|
165
|
-
|
177
|
+
exchange.error ||= Network::Error.new
|
178
|
+
|
179
|
+
exchange.error.id = entry["networkRequestId"]
|
180
|
+
exchange.error.url = entry["url"]
|
181
|
+
exchange.error.description = entry["text"]
|
182
|
+
exchange.error.timestamp = entry["timestamp"]
|
166
183
|
end
|
167
184
|
end
|
168
185
|
end
|