ferrum 0.9 → 0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ferrum.rb CHANGED
@@ -10,14 +10,20 @@ module Ferrum
10
10
  class NotImplementedError < Error; end
11
11
 
12
12
  class StatusError < Error
13
+ def initialize(url, message = nil)
14
+ super(message || "Request to #{url} failed to reach server, check DNS and server status")
15
+ end
16
+ end
17
+
18
+ class PendingConnectionsError < StatusError
19
+ attr_reader :pendings
20
+
13
21
  def initialize(url, pendings = [])
14
- message = if pendings.empty?
15
- "Request to #{url} failed to reach server, check DNS and/or server status"
16
- else
17
- "Request to #{url} reached server, but there are still pending connections: #{pendings.join(', ')}"
18
- end
22
+ @pendings = pendings
19
23
 
20
- super(message)
24
+ message = "Request to #{url} reached server, but there are still pending connections: #{pendings.join(', ')}"
25
+
26
+ super(url, message)
21
27
  end
22
28
  end
23
29
 
@@ -37,8 +43,11 @@ module Ferrum
37
43
  end
38
44
 
39
45
  class ProcessTimeoutError < Error
40
- def initialize(timeout)
41
- super("Browser did not produce websocket url within #{timeout} seconds")
46
+ attr_reader :output
47
+
48
+ def initialize(timeout, output)
49
+ @output = output
50
+ super("Browser did not produce websocket url within #{timeout} seconds, try to increase `:process_timeout`. See https://github.com/rubycdp/ferrum#customization")
42
51
  end
43
52
  end
44
53
 
@@ -17,19 +17,19 @@ module Ferrum
17
17
  extend Forwardable
18
18
  delegate %i[default_context] => :contexts
19
19
  delegate %i[targets create_target create_page page pages windows] => :default_context
20
- delegate %i[goto back forward refresh reload stop wait_for_reload
20
+ delegate %i[go_to back forward refresh reload stop wait_for_reload
21
21
  at_css at_xpath css xpath current_url current_title url title
22
22
  body doctype set_content
23
23
  headers cookies network
24
24
  mouse keyboard
25
- screenshot pdf viewport_size
25
+ screenshot pdf mhtml viewport_size
26
26
  frames frame_by main_frame
27
- evaluate evaluate_on evaluate_async execute
27
+ evaluate evaluate_on evaluate_async execute evaluate_func
28
28
  add_script_tag add_style_tag bypass_csp
29
- on] => :page
29
+ on goto] => :page
30
30
  delegate %i[default_user_agent] => :process
31
31
 
32
- attr_reader :client, :process, :contexts, :logger, :js_errors,
32
+ attr_reader :client, :process, :contexts, :logger, :js_errors, :pending_connection_errors,
33
33
  :slowmo, :base_url, :options, :window_size, :ws_max_receive_size
34
34
  attr_writer :timeout
35
35
 
@@ -44,6 +44,7 @@ module Ferrum
44
44
  @logger, @timeout, @ws_max_receive_size =
45
45
  @options.values_at(:logger, :timeout, :ws_max_receive_size)
46
46
  @js_errors = @options.fetch(:js_errors, false)
47
+ @pending_connection_errors = @options.fetch(:pending_connection_errors, true)
47
48
  @slowmo = @options[:slowmo].to_f
48
49
 
49
50
  if @options.key?(:base_url)
@@ -58,6 +58,10 @@ module Ferrum
58
58
  end
59
59
  end
60
60
 
61
+ def subscribed?(event)
62
+ [@interruptor, @subscriber].any? { |s| s.subscribed?(event) }
63
+ end
64
+
61
65
  def close
62
66
  @ws.close
63
67
  # Give a thread some time to handle a tail of messages
@@ -4,7 +4,7 @@ module Ferrum
4
4
  class Browser
5
5
  class Command
6
6
  NOT_FOUND = "Could not find an executable for the browser. Try to make " \
7
- "it available on the PATH or set environment varible for " \
7
+ "it available on the PATH or set environment variable for " \
8
8
  "example BROWSER_PATH=\"/usr/bin/chrome\"".freeze
9
9
 
10
10
  # Currently only these browsers support CDP:
@@ -16,7 +16,7 @@ module Ferrum
16
16
  class Process
17
17
  KILL_TIMEOUT = 2
18
18
  WAIT_KILLED = 0.05
19
- PROCESS_TIMEOUT = ENV.fetch("FERRUM_PROCESS_TIMEOUT", 2).to_i
19
+ PROCESS_TIMEOUT = ENV.fetch("FERRUM_PROCESS_TIMEOUT", 10).to_i
20
20
 
21
21
  attr_reader :host, :port, :ws_url, :pid, :command,
22
22
  :default_user_agent, :browser_version, :protocol_version,
@@ -64,11 +64,13 @@ module Ferrum
64
64
  return
65
65
  end
66
66
 
67
+ @pid = @xvfb = @user_data_dir = nil
67
68
  @logger = options[:logger]
68
69
  @process_timeout = options.fetch(:process_timeout, PROCESS_TIMEOUT)
69
70
 
70
- tmpdir = Dir.mktmpdir
71
+ tmpdir = Dir.mktmpdir("ferrum_user_data_dir_")
71
72
  ObjectSpace.define_finalizer(self, self.class.directory_remover(tmpdir))
73
+ @user_data_dir = tmpdir
72
74
  @command = Command.build(options, tmpdir)
73
75
  end
74
76
 
@@ -144,7 +146,7 @@ module Ferrum
144
146
 
145
147
  unless ws_url
146
148
  @logger.puts(output) if @logger
147
- raise ProcessTimeoutError.new(timeout)
149
+ raise ProcessTimeoutError.new(timeout, output)
148
150
  end
149
151
  end
150
152
 
@@ -21,6 +21,10 @@ module Ferrum
21
21
  true
22
22
  end
23
23
 
24
+ def subscribed?(event)
25
+ @on.key?(event)
26
+ end
27
+
24
28
  def call(message)
25
29
  method, params = message.values_at("method", "params")
26
30
  total = @on[method].size
@@ -42,9 +42,9 @@ module Ferrum
42
42
  end
43
43
 
44
44
  def create_target
45
- target_id = @browser.command("Target.createTarget",
46
- browserContextId: @id,
47
- url: "about:blank")["targetId"]
45
+ @browser.command("Target.createTarget",
46
+ browserContextId: @id,
47
+ url: "about:blank")
48
48
  target = @pendings.take(@browser.timeout)
49
49
  raise NoSuchTargetError unless target.is_a?(Target)
50
50
  @targets[target.id] = target
data/lib/ferrum/frame.rb CHANGED
@@ -11,6 +11,7 @@ module Ferrum
11
11
  attr_accessor :id, :name
12
12
 
13
13
  def initialize(id, page, parent_id = nil)
14
+ @execution_id = nil
14
15
  @id, @page, @parent_id = id, page, parent_id
15
16
  end
16
17
 
@@ -37,54 +37,54 @@ module Ferrum
37
37
  end
38
38
 
39
39
  def xpath(selector, within: nil)
40
- code = <<~JS
41
- let selector = arguments[0];
42
- let within = arguments[1] || document;
43
- let results = [];
40
+ expr = <<~JS
41
+ function(selector, within) {
42
+ let results = [];
43
+ within ||= document
44
44
 
45
- let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
46
- for (let i = 0; i < xpath.snapshotLength; i++) {
47
- results.push(xpath.snapshotItem(i));
48
- }
45
+ let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
46
+ for (let i = 0; i < xpath.snapshotLength; i++) {
47
+ results.push(xpath.snapshotItem(i));
48
+ }
49
49
 
50
- arguments[2](results);
50
+ return results;
51
+ }
51
52
  JS
52
53
 
53
- evaluate_async(code, @page.timeout, selector, within)
54
+ evaluate_func(expr, selector, within)
54
55
  end
55
56
 
56
57
  def at_xpath(selector, within: nil)
57
- code = <<~JS
58
- let selector = arguments[0];
59
- let within = arguments[1] || document;
60
- let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
61
- let result = xpath.snapshotItem(0);
62
- arguments[2](result);
58
+ expr = <<~JS
59
+ function(selector, within) {
60
+ within ||= document
61
+ let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
62
+ return xpath.snapshotItem(0);
63
+ }
63
64
  JS
64
-
65
- evaluate_async(code, @page.timeout, selector, within)
65
+ evaluate_func(expr, selector, within)
66
66
  end
67
67
 
68
68
  def css(selector, within: nil)
69
- code = <<~JS
70
- let selector = arguments[0];
71
- let within = arguments[1] || document;
72
- let results = within.querySelectorAll(selector);
73
- arguments[2](results);
69
+ expr = <<~JS
70
+ function(selector, within) {
71
+ within ||= document
72
+ return Array.from(within.querySelectorAll(selector));
73
+ }
74
74
  JS
75
75
 
76
- evaluate_async(code, @page.timeout, selector, within)
76
+ evaluate_func(expr, selector, within)
77
77
  end
78
78
 
79
79
  def at_css(selector, within: nil)
80
- code = <<~JS
81
- let selector = arguments[0];
82
- let within = arguments[1] || document;
83
- let result = within.querySelector(selector);
84
- arguments[2](result);
80
+ expr = <<~JS
81
+ function(selector, within) {
82
+ within ||= document
83
+ return within.querySelector(selector);
84
+ }
85
85
  JS
86
86
 
87
- evaluate_async(code, @page.timeout, selector, within)
87
+ evaluate_func(expr, selector, within)
88
88
  end
89
89
  end
90
90
  end
@@ -3,36 +3,19 @@
3
3
  require "singleton"
4
4
 
5
5
  module Ferrum
6
+ class CyclicObject
7
+ include Singleton
8
+
9
+ def inspect
10
+ %(#<#{self.class} JavaScript object that cannot be represented in Ruby>)
11
+ end
12
+ end
13
+
6
14
  class Frame
7
15
  module Runtime
8
16
  INTERMITTENT_ATTEMPTS = ENV.fetch("FERRUM_INTERMITTENT_ATTEMPTS", 6).to_i
9
17
  INTERMITTENT_SLEEP = ENV.fetch("FERRUM_INTERMITTENT_SLEEP", 0.1).to_f
10
18
 
11
- EXECUTE_OPTIONS = {
12
- returnByValue: true,
13
- functionDeclaration: %(function() { %s })
14
- }.freeze
15
- DEFAULT_OPTIONS = {
16
- functionDeclaration: %(function() { return %s })
17
- }.freeze
18
- EVALUATE_ASYNC_OPTIONS = {
19
- awaitPromise: true,
20
- functionDeclaration: %(
21
- function() {
22
- return new Promise((__resolve, __reject) => {
23
- try {
24
- arguments[arguments.length] = r => __resolve(r);
25
- arguments.length = arguments.length + 1;
26
- setTimeout(() => __reject(new Error("timed out promise")), %s);
27
- %s
28
- } catch(error) {
29
- __reject(error);
30
- }
31
- });
32
- }
33
- )
34
- }.freeze
35
-
36
19
  SCRIPT_SRC_TAG = <<~JS
37
20
  const script = document.createElement("script");
38
21
  script.src = arguments[0];
@@ -63,37 +46,45 @@ module Ferrum
63
46
  JS
64
47
 
65
48
  def evaluate(expression, *args)
66
- call(*args, expression: expression)
49
+ expression = "function() { return %s }" % expression
50
+ call(expression: expression, arguments: args)
67
51
  end
68
52
 
69
- def evaluate_async(expression, wait_time, *args)
70
- call(*args, expression: expression, wait_time: wait_time * 1000, **EVALUATE_ASYNC_OPTIONS)
53
+ def evaluate_async(expression, wait, *args)
54
+ template = <<~JS
55
+ function() {
56
+ return new Promise((__f, __r) => {
57
+ try {
58
+ arguments[arguments.length] = r => __f(r);
59
+ arguments.length = arguments.length + 1;
60
+ setTimeout(() => __r(new Error("timed out promise")), %s);
61
+ %s
62
+ } catch(error) {
63
+ __r(error);
64
+ }
65
+ });
66
+ }
67
+ JS
68
+
69
+ expression = template % [wait * 1000, expression]
70
+ call(expression: expression, arguments: args, awaitPromise: true)
71
71
  end
72
72
 
73
73
  def execute(expression, *args)
74
- call(*args, expression: expression, handle: false, **EXECUTE_OPTIONS)
74
+ expression = "function() { %s }" % expression
75
+ call(expression: expression, arguments: args, handle: false, returnByValue: true)
75
76
  true
76
77
  end
77
78
 
78
- def evaluate_on(node:, expression:, by_value: true, wait: 0)
79
- errors = [NodeNotFoundError, NoExecutionContextError]
80
- attempts, sleep = INTERMITTENT_ATTEMPTS, INTERMITTENT_SLEEP
81
-
82
- Ferrum.with_attempts(errors: errors, max: attempts, wait: sleep) do
83
- response = @page.command("DOM.resolveNode", nodeId: node.node_id)
84
- object_id = response.dig("object", "objectId")
85
- options = DEFAULT_OPTIONS.merge(objectId: object_id)
86
- options[:functionDeclaration] = options[:functionDeclaration] % expression
87
- options.merge!(returnByValue: by_value)
88
-
89
- response = @page.command("Runtime.callFunctionOn",
90
- wait: wait, slowmoable: true,
91
- **options)
92
- handle_error(response)
93
- response = response["result"]
79
+ def evaluate_func(expression, *args, on: nil)
80
+ call(expression: expression, arguments: args, on: on)
81
+ end
94
82
 
95
- by_value ? response.dig("value") : handle_response(response)
96
- end
83
+ def evaluate_on(node:, expression:, by_value: true, wait: 0)
84
+ options = { handle: true }
85
+ expression = "function() { return %s }" % expression
86
+ options = { handle: false, returnByValue: true } if by_value
87
+ call(expression: expression, on: node, wait: wait, **options)
97
88
  end
98
89
 
99
90
  def add_script_tag(url: nil, path: nil, content: nil, type: "text/javascript")
@@ -126,27 +117,29 @@ module Ferrum
126
117
 
127
118
  private
128
119
 
129
- def call(*args, expression:, wait_time: nil, handle: true, **options)
120
+ def call(expression:, arguments: [], on: nil, wait: 0, handle: true, **options)
130
121
  errors = [NodeNotFoundError, NoExecutionContextError]
131
122
  attempts, sleep = INTERMITTENT_ATTEMPTS, INTERMITTENT_SLEEP
132
123
 
133
124
  Ferrum.with_attempts(errors: errors, max: attempts, wait: sleep) do
134
- arguments = prepare_args(args)
135
- params = DEFAULT_OPTIONS.merge(options)
136
- expression = [wait_time, expression] if wait_time
137
- params[:functionDeclaration] = params[:functionDeclaration] % expression
138
- params = params.merge(arguments: arguments)
139
- unless params[:executionContextId]
140
- params = params.merge(executionContextId: execution_id)
125
+ if on
126
+ response = @page.command("DOM.resolveNode", nodeId: on.node_id)
127
+ object_id = response.dig("object", "objectId")
128
+ options.merge!(objectId: object_id)
129
+ else
130
+ options.merge!(executionContextId: execution_id)
141
131
  end
142
132
 
133
+ options.merge!(functionDeclaration: expression,
134
+ arguments: prepare_args(arguments))
135
+
143
136
  response = @page.command("Runtime.callFunctionOn",
144
- slowmoable: true,
145
- **params)
137
+ wait: wait, slowmoable: true,
138
+ **options)
146
139
  handle_error(response)
147
140
  response = response["result"]
148
141
 
149
- handle ? handle_response(response) : response
142
+ handle ? handle_response(response) : response.dig("value")
150
143
  end
151
144
  end
152
145
 
@@ -239,15 +232,13 @@ module Ferrum
239
232
  return false;
240
233
  }
241
234
 
242
- const seen = [];
243
- function detectCycle(obj) {
235
+ function detectCycle(obj, seen) {
244
236
  if (typeof obj === "object") {
245
237
  if (seen.indexOf(obj) !== -1) {
246
238
  return true;
247
239
  }
248
- seen.push(obj);
249
240
  for (let key in obj) {
250
- if (obj.hasOwnProperty(key) && detectCycle(obj[key])) {
241
+ if (obj.hasOwnProperty(key) && detectCycle(obj[key], seen.concat([obj]))) {
251
242
  return true;
252
243
  }
253
244
  }
@@ -256,7 +247,7 @@ module Ferrum
256
247
  return false;
257
248
  }
258
249
 
259
- return detectCycle(this);
250
+ return detectCycle(this, []);
260
251
  }
261
252
  JS
262
253
  )
@@ -267,12 +258,4 @@ module Ferrum
267
258
  end
268
259
  end
269
260
  end
270
-
271
- class CyclicObject
272
- include Singleton
273
-
274
- def inspect
275
- %(#<#{self.class} JavaScript object that cannot be represented in Ruby>)
276
- end
277
- end
278
261
  end
@@ -83,19 +83,21 @@ module Ferrum
83
83
  @page.command("Fetch.enable", handleAuthRequests: true, patterns: [pattern])
84
84
  end
85
85
 
86
- def authorize(user:, password:, type: :server)
86
+ def authorize(user:, password:, type: :server, &block)
87
87
  unless AUTHORIZE_TYPE.include?(type)
88
88
  raise ArgumentError, ":type should be in #{AUTHORIZE_TYPE}"
89
89
  end
90
90
 
91
+ if !block_given? && !@page.subscribed?("Fetch.requestPaused")
92
+ raise ArgumentError, "Block is missing, call `authorize(...) { |r| r.continue } or subscribe to `on(:request)` events before calling it"
93
+ end
94
+
91
95
  @authorized_ids ||= {}
92
96
  @authorized_ids[type] ||= []
93
97
 
94
98
  intercept
95
99
 
96
- @page.on(:request) do |request|
97
- request.continue
98
- end
100
+ @page.on(:request, &block)
99
101
 
100
102
  @page.on(:auth) do |request, index, total|
101
103
  if request.auth_challenge?(type)
@@ -157,12 +159,27 @@ module Ferrum
157
159
  end
158
160
  end
159
161
 
162
+ @page.on("Network.loadingFailed") do |params|
163
+ exchange = select(params["requestId"]).last
164
+ exchange.error ||= Network::Error.new
165
+
166
+ exchange.error.id = params["requestId"]
167
+ exchange.error.type = params["type"]
168
+ exchange.error.error_text = params["errorText"]
169
+ exchange.error.monotonic_time = params["timestamp"]
170
+ exchange.error.canceled = params["canceled"]
171
+ end
172
+
160
173
  @page.on("Log.entryAdded") do |params|
161
174
  entry = params["entry"] || {}
162
175
  if entry["source"] == "network" && entry["level"] == "error"
163
176
  exchange = select(entry["networkRequestId"]).last
164
- error = Network::Error.new(entry)
165
- exchange.error = error
177
+ exchange.error ||= Network::Error.new
178
+
179
+ exchange.error.id = entry["networkRequestId"]
180
+ exchange.error.url = entry["url"]
181
+ exchange.error.description = entry["text"]
182
+ exchange.error.timestamp = entry["timestamp"]
166
183
  end
167
184
  end
168
185
  end