ferrum 0.9 → 0.10

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ferrum.rb CHANGED
@@ -10,14 +10,20 @@ module Ferrum
10
10
  class NotImplementedError < Error; end
11
11
 
12
12
  class StatusError < Error
13
+ def initialize(url, message = nil)
14
+ super(message || "Request to #{url} failed to reach server, check DNS and server status")
15
+ end
16
+ end
17
+
18
+ class PendingConnectionsError < StatusError
19
+ attr_reader :pendings
20
+
13
21
  def initialize(url, pendings = [])
14
- message = if pendings.empty?
15
- "Request to #{url} failed to reach server, check DNS and/or server status"
16
- else
17
- "Request to #{url} reached server, but there are still pending connections: #{pendings.join(', ')}"
18
- end
22
+ @pendings = pendings
19
23
 
20
- super(message)
24
+ message = "Request to #{url} reached server, but there are still pending connections: #{pendings.join(', ')}"
25
+
26
+ super(url, message)
21
27
  end
22
28
  end
23
29
 
@@ -37,8 +43,11 @@ module Ferrum
37
43
  end
38
44
 
39
45
  class ProcessTimeoutError < Error
40
- def initialize(timeout)
41
- super("Browser did not produce websocket url within #{timeout} seconds")
46
+ attr_reader :output
47
+
48
+ def initialize(timeout, output)
49
+ @output = output
50
+ super("Browser did not produce websocket url within #{timeout} seconds, try to increase `:process_timeout`. See https://github.com/rubycdp/ferrum#customization")
42
51
  end
43
52
  end
44
53
 
@@ -17,19 +17,19 @@ module Ferrum
17
17
  extend Forwardable
18
18
  delegate %i[default_context] => :contexts
19
19
  delegate %i[targets create_target create_page page pages windows] => :default_context
20
- delegate %i[goto back forward refresh reload stop wait_for_reload
20
+ delegate %i[go_to back forward refresh reload stop wait_for_reload
21
21
  at_css at_xpath css xpath current_url current_title url title
22
22
  body doctype set_content
23
23
  headers cookies network
24
24
  mouse keyboard
25
- screenshot pdf viewport_size
25
+ screenshot pdf mhtml viewport_size
26
26
  frames frame_by main_frame
27
- evaluate evaluate_on evaluate_async execute
27
+ evaluate evaluate_on evaluate_async execute evaluate_func
28
28
  add_script_tag add_style_tag bypass_csp
29
- on] => :page
29
+ on goto] => :page
30
30
  delegate %i[default_user_agent] => :process
31
31
 
32
- attr_reader :client, :process, :contexts, :logger, :js_errors,
32
+ attr_reader :client, :process, :contexts, :logger, :js_errors, :pending_connection_errors,
33
33
  :slowmo, :base_url, :options, :window_size, :ws_max_receive_size
34
34
  attr_writer :timeout
35
35
 
@@ -44,6 +44,7 @@ module Ferrum
44
44
  @logger, @timeout, @ws_max_receive_size =
45
45
  @options.values_at(:logger, :timeout, :ws_max_receive_size)
46
46
  @js_errors = @options.fetch(:js_errors, false)
47
+ @pending_connection_errors = @options.fetch(:pending_connection_errors, true)
47
48
  @slowmo = @options[:slowmo].to_f
48
49
 
49
50
  if @options.key?(:base_url)
@@ -58,6 +58,10 @@ module Ferrum
58
58
  end
59
59
  end
60
60
 
61
+ def subscribed?(event)
62
+ [@interruptor, @subscriber].any? { |s| s.subscribed?(event) }
63
+ end
64
+
61
65
  def close
62
66
  @ws.close
63
67
  # Give a thread some time to handle a tail of messages
@@ -4,7 +4,7 @@ module Ferrum
4
4
  class Browser
5
5
  class Command
6
6
  NOT_FOUND = "Could not find an executable for the browser. Try to make " \
7
- "it available on the PATH or set environment varible for " \
7
+ "it available on the PATH or set environment variable for " \
8
8
  "example BROWSER_PATH=\"/usr/bin/chrome\"".freeze
9
9
 
10
10
  # Currently only these browsers support CDP:
@@ -16,7 +16,7 @@ module Ferrum
16
16
  class Process
17
17
  KILL_TIMEOUT = 2
18
18
  WAIT_KILLED = 0.05
19
- PROCESS_TIMEOUT = ENV.fetch("FERRUM_PROCESS_TIMEOUT", 2).to_i
19
+ PROCESS_TIMEOUT = ENV.fetch("FERRUM_PROCESS_TIMEOUT", 10).to_i
20
20
 
21
21
  attr_reader :host, :port, :ws_url, :pid, :command,
22
22
  :default_user_agent, :browser_version, :protocol_version,
@@ -64,11 +64,13 @@ module Ferrum
64
64
  return
65
65
  end
66
66
 
67
+ @pid = @xvfb = @user_data_dir = nil
67
68
  @logger = options[:logger]
68
69
  @process_timeout = options.fetch(:process_timeout, PROCESS_TIMEOUT)
69
70
 
70
- tmpdir = Dir.mktmpdir
71
+ tmpdir = Dir.mktmpdir("ferrum_user_data_dir_")
71
72
  ObjectSpace.define_finalizer(self, self.class.directory_remover(tmpdir))
73
+ @user_data_dir = tmpdir
72
74
  @command = Command.build(options, tmpdir)
73
75
  end
74
76
 
@@ -144,7 +146,7 @@ module Ferrum
144
146
 
145
147
  unless ws_url
146
148
  @logger.puts(output) if @logger
147
- raise ProcessTimeoutError.new(timeout)
149
+ raise ProcessTimeoutError.new(timeout, output)
148
150
  end
149
151
  end
150
152
 
@@ -21,6 +21,10 @@ module Ferrum
21
21
  true
22
22
  end
23
23
 
24
+ def subscribed?(event)
25
+ @on.key?(event)
26
+ end
27
+
24
28
  def call(message)
25
29
  method, params = message.values_at("method", "params")
26
30
  total = @on[method].size
@@ -42,9 +42,9 @@ module Ferrum
42
42
  end
43
43
 
44
44
  def create_target
45
- target_id = @browser.command("Target.createTarget",
46
- browserContextId: @id,
47
- url: "about:blank")["targetId"]
45
+ @browser.command("Target.createTarget",
46
+ browserContextId: @id,
47
+ url: "about:blank")
48
48
  target = @pendings.take(@browser.timeout)
49
49
  raise NoSuchTargetError unless target.is_a?(Target)
50
50
  @targets[target.id] = target
data/lib/ferrum/frame.rb CHANGED
@@ -11,6 +11,7 @@ module Ferrum
11
11
  attr_accessor :id, :name
12
12
 
13
13
  def initialize(id, page, parent_id = nil)
14
+ @execution_id = nil
14
15
  @id, @page, @parent_id = id, page, parent_id
15
16
  end
16
17
 
@@ -37,54 +37,54 @@ module Ferrum
37
37
  end
38
38
 
39
39
  def xpath(selector, within: nil)
40
- code = <<~JS
41
- let selector = arguments[0];
42
- let within = arguments[1] || document;
43
- let results = [];
40
+ expr = <<~JS
41
+ function(selector, within) {
42
+ let results = [];
43
+ within ||= document
44
44
 
45
- let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
46
- for (let i = 0; i < xpath.snapshotLength; i++) {
47
- results.push(xpath.snapshotItem(i));
48
- }
45
+ let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
46
+ for (let i = 0; i < xpath.snapshotLength; i++) {
47
+ results.push(xpath.snapshotItem(i));
48
+ }
49
49
 
50
- arguments[2](results);
50
+ return results;
51
+ }
51
52
  JS
52
53
 
53
- evaluate_async(code, @page.timeout, selector, within)
54
+ evaluate_func(expr, selector, within)
54
55
  end
55
56
 
56
57
  def at_xpath(selector, within: nil)
57
- code = <<~JS
58
- let selector = arguments[0];
59
- let within = arguments[1] || document;
60
- let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
61
- let result = xpath.snapshotItem(0);
62
- arguments[2](result);
58
+ expr = <<~JS
59
+ function(selector, within) {
60
+ within ||= document
61
+ let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
62
+ return xpath.snapshotItem(0);
63
+ }
63
64
  JS
64
-
65
- evaluate_async(code, @page.timeout, selector, within)
65
+ evaluate_func(expr, selector, within)
66
66
  end
67
67
 
68
68
  def css(selector, within: nil)
69
- code = <<~JS
70
- let selector = arguments[0];
71
- let within = arguments[1] || document;
72
- let results = within.querySelectorAll(selector);
73
- arguments[2](results);
69
+ expr = <<~JS
70
+ function(selector, within) {
71
+ within ||= document
72
+ return Array.from(within.querySelectorAll(selector));
73
+ }
74
74
  JS
75
75
 
76
- evaluate_async(code, @page.timeout, selector, within)
76
+ evaluate_func(expr, selector, within)
77
77
  end
78
78
 
79
79
  def at_css(selector, within: nil)
80
- code = <<~JS
81
- let selector = arguments[0];
82
- let within = arguments[1] || document;
83
- let result = within.querySelector(selector);
84
- arguments[2](result);
80
+ expr = <<~JS
81
+ function(selector, within) {
82
+ within ||= document
83
+ return within.querySelector(selector);
84
+ }
85
85
  JS
86
86
 
87
- evaluate_async(code, @page.timeout, selector, within)
87
+ evaluate_func(expr, selector, within)
88
88
  end
89
89
  end
90
90
  end
@@ -3,36 +3,19 @@
3
3
  require "singleton"
4
4
 
5
5
  module Ferrum
6
+ class CyclicObject
7
+ include Singleton
8
+
9
+ def inspect
10
+ %(#<#{self.class} JavaScript object that cannot be represented in Ruby>)
11
+ end
12
+ end
13
+
6
14
  class Frame
7
15
  module Runtime
8
16
  INTERMITTENT_ATTEMPTS = ENV.fetch("FERRUM_INTERMITTENT_ATTEMPTS", 6).to_i
9
17
  INTERMITTENT_SLEEP = ENV.fetch("FERRUM_INTERMITTENT_SLEEP", 0.1).to_f
10
18
 
11
- EXECUTE_OPTIONS = {
12
- returnByValue: true,
13
- functionDeclaration: %(function() { %s })
14
- }.freeze
15
- DEFAULT_OPTIONS = {
16
- functionDeclaration: %(function() { return %s })
17
- }.freeze
18
- EVALUATE_ASYNC_OPTIONS = {
19
- awaitPromise: true,
20
- functionDeclaration: %(
21
- function() {
22
- return new Promise((__resolve, __reject) => {
23
- try {
24
- arguments[arguments.length] = r => __resolve(r);
25
- arguments.length = arguments.length + 1;
26
- setTimeout(() => __reject(new Error("timed out promise")), %s);
27
- %s
28
- } catch(error) {
29
- __reject(error);
30
- }
31
- });
32
- }
33
- )
34
- }.freeze
35
-
36
19
  SCRIPT_SRC_TAG = <<~JS
37
20
  const script = document.createElement("script");
38
21
  script.src = arguments[0];
@@ -63,37 +46,45 @@ module Ferrum
63
46
  JS
64
47
 
65
48
  def evaluate(expression, *args)
66
- call(*args, expression: expression)
49
+ expression = "function() { return %s }" % expression
50
+ call(expression: expression, arguments: args)
67
51
  end
68
52
 
69
- def evaluate_async(expression, wait_time, *args)
70
- call(*args, expression: expression, wait_time: wait_time * 1000, **EVALUATE_ASYNC_OPTIONS)
53
+ def evaluate_async(expression, wait, *args)
54
+ template = <<~JS
55
+ function() {
56
+ return new Promise((__f, __r) => {
57
+ try {
58
+ arguments[arguments.length] = r => __f(r);
59
+ arguments.length = arguments.length + 1;
60
+ setTimeout(() => __r(new Error("timed out promise")), %s);
61
+ %s
62
+ } catch(error) {
63
+ __r(error);
64
+ }
65
+ });
66
+ }
67
+ JS
68
+
69
+ expression = template % [wait * 1000, expression]
70
+ call(expression: expression, arguments: args, awaitPromise: true)
71
71
  end
72
72
 
73
73
  def execute(expression, *args)
74
- call(*args, expression: expression, handle: false, **EXECUTE_OPTIONS)
74
+ expression = "function() { %s }" % expression
75
+ call(expression: expression, arguments: args, handle: false, returnByValue: true)
75
76
  true
76
77
  end
77
78
 
78
- def evaluate_on(node:, expression:, by_value: true, wait: 0)
79
- errors = [NodeNotFoundError, NoExecutionContextError]
80
- attempts, sleep = INTERMITTENT_ATTEMPTS, INTERMITTENT_SLEEP
81
-
82
- Ferrum.with_attempts(errors: errors, max: attempts, wait: sleep) do
83
- response = @page.command("DOM.resolveNode", nodeId: node.node_id)
84
- object_id = response.dig("object", "objectId")
85
- options = DEFAULT_OPTIONS.merge(objectId: object_id)
86
- options[:functionDeclaration] = options[:functionDeclaration] % expression
87
- options.merge!(returnByValue: by_value)
88
-
89
- response = @page.command("Runtime.callFunctionOn",
90
- wait: wait, slowmoable: true,
91
- **options)
92
- handle_error(response)
93
- response = response["result"]
79
+ def evaluate_func(expression, *args, on: nil)
80
+ call(expression: expression, arguments: args, on: on)
81
+ end
94
82
 
95
- by_value ? response.dig("value") : handle_response(response)
96
- end
83
+ def evaluate_on(node:, expression:, by_value: true, wait: 0)
84
+ options = { handle: true }
85
+ expression = "function() { return %s }" % expression
86
+ options = { handle: false, returnByValue: true } if by_value
87
+ call(expression: expression, on: node, wait: wait, **options)
97
88
  end
98
89
 
99
90
  def add_script_tag(url: nil, path: nil, content: nil, type: "text/javascript")
@@ -126,27 +117,29 @@ module Ferrum
126
117
 
127
118
  private
128
119
 
129
- def call(*args, expression:, wait_time: nil, handle: true, **options)
120
+ def call(expression:, arguments: [], on: nil, wait: 0, handle: true, **options)
130
121
  errors = [NodeNotFoundError, NoExecutionContextError]
131
122
  attempts, sleep = INTERMITTENT_ATTEMPTS, INTERMITTENT_SLEEP
132
123
 
133
124
  Ferrum.with_attempts(errors: errors, max: attempts, wait: sleep) do
134
- arguments = prepare_args(args)
135
- params = DEFAULT_OPTIONS.merge(options)
136
- expression = [wait_time, expression] if wait_time
137
- params[:functionDeclaration] = params[:functionDeclaration] % expression
138
- params = params.merge(arguments: arguments)
139
- unless params[:executionContextId]
140
- params = params.merge(executionContextId: execution_id)
125
+ if on
126
+ response = @page.command("DOM.resolveNode", nodeId: on.node_id)
127
+ object_id = response.dig("object", "objectId")
128
+ options.merge!(objectId: object_id)
129
+ else
130
+ options.merge!(executionContextId: execution_id)
141
131
  end
142
132
 
133
+ options.merge!(functionDeclaration: expression,
134
+ arguments: prepare_args(arguments))
135
+
143
136
  response = @page.command("Runtime.callFunctionOn",
144
- slowmoable: true,
145
- **params)
137
+ wait: wait, slowmoable: true,
138
+ **options)
146
139
  handle_error(response)
147
140
  response = response["result"]
148
141
 
149
- handle ? handle_response(response) : response
142
+ handle ? handle_response(response) : response.dig("value")
150
143
  end
151
144
  end
152
145
 
@@ -239,15 +232,13 @@ module Ferrum
239
232
  return false;
240
233
  }
241
234
 
242
- const seen = [];
243
- function detectCycle(obj) {
235
+ function detectCycle(obj, seen) {
244
236
  if (typeof obj === "object") {
245
237
  if (seen.indexOf(obj) !== -1) {
246
238
  return true;
247
239
  }
248
- seen.push(obj);
249
240
  for (let key in obj) {
250
- if (obj.hasOwnProperty(key) && detectCycle(obj[key])) {
241
+ if (obj.hasOwnProperty(key) && detectCycle(obj[key], seen.concat([obj]))) {
251
242
  return true;
252
243
  }
253
244
  }
@@ -256,7 +247,7 @@ module Ferrum
256
247
  return false;
257
248
  }
258
249
 
259
- return detectCycle(this);
250
+ return detectCycle(this, []);
260
251
  }
261
252
  JS
262
253
  )
@@ -267,12 +258,4 @@ module Ferrum
267
258
  end
268
259
  end
269
260
  end
270
-
271
- class CyclicObject
272
- include Singleton
273
-
274
- def inspect
275
- %(#<#{self.class} JavaScript object that cannot be represented in Ruby>)
276
- end
277
- end
278
261
  end
@@ -83,19 +83,21 @@ module Ferrum
83
83
  @page.command("Fetch.enable", handleAuthRequests: true, patterns: [pattern])
84
84
  end
85
85
 
86
- def authorize(user:, password:, type: :server)
86
+ def authorize(user:, password:, type: :server, &block)
87
87
  unless AUTHORIZE_TYPE.include?(type)
88
88
  raise ArgumentError, ":type should be in #{AUTHORIZE_TYPE}"
89
89
  end
90
90
 
91
+ if !block_given? && !@page.subscribed?("Fetch.requestPaused")
92
+ raise ArgumentError, "Block is missing, call `authorize(...) { |r| r.continue } or subscribe to `on(:request)` events before calling it"
93
+ end
94
+
91
95
  @authorized_ids ||= {}
92
96
  @authorized_ids[type] ||= []
93
97
 
94
98
  intercept
95
99
 
96
- @page.on(:request) do |request|
97
- request.continue
98
- end
100
+ @page.on(:request, &block)
99
101
 
100
102
  @page.on(:auth) do |request, index, total|
101
103
  if request.auth_challenge?(type)
@@ -157,12 +159,27 @@ module Ferrum
157
159
  end
158
160
  end
159
161
 
162
+ @page.on("Network.loadingFailed") do |params|
163
+ exchange = select(params["requestId"]).last
164
+ exchange.error ||= Network::Error.new
165
+
166
+ exchange.error.id = params["requestId"]
167
+ exchange.error.type = params["type"]
168
+ exchange.error.error_text = params["errorText"]
169
+ exchange.error.monotonic_time = params["timestamp"]
170
+ exchange.error.canceled = params["canceled"]
171
+ end
172
+
160
173
  @page.on("Log.entryAdded") do |params|
161
174
  entry = params["entry"] || {}
162
175
  if entry["source"] == "network" && entry["level"] == "error"
163
176
  exchange = select(entry["networkRequestId"]).last
164
- error = Network::Error.new(entry)
165
- exchange.error = error
177
+ exchange.error ||= Network::Error.new
178
+
179
+ exchange.error.id = entry["networkRequestId"]
180
+ exchange.error.url = entry["url"]
181
+ exchange.error.description = entry["text"]
182
+ exchange.error.timestamp = entry["timestamp"]
166
183
  end
167
184
  end
168
185
  end