ferrum 0.7 → 0.10.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,7 +13,7 @@ module Ferrum
13
13
 
14
14
  def initialize
15
15
  super
16
- @on = Hash.new { |h, k| h[k] = [] }
16
+ @on = Concurrent::Hash.new { |h, k| h[k] = Concurrent::Array.new }
17
17
  end
18
18
 
19
19
  def on(event, &block)
@@ -21,6 +21,10 @@ module Ferrum
21
21
  true
22
22
  end
23
23
 
24
+ def subscribed?(event)
25
+ @on.key?(event)
26
+ end
27
+
24
28
  def call(message)
25
29
  method, params = message.values_at("method", "params")
26
30
  total = @on[method].size
@@ -8,24 +8,32 @@ module Ferrum
8
8
  class Browser
9
9
  class WebSocket
10
10
  WEBSOCKET_BUG_SLEEP = 0.01
11
+ SKIP_LOGGING_SCREENSHOTS = !ENV["FERRUM_LOGGING_SCREENSHOTS"]
11
12
 
12
13
  attr_reader :url, :messages
13
14
 
14
- def initialize(url, logger)
15
+ def initialize(url, max_receive_size, logger)
15
16
  @url = url
16
17
  @logger = logger
17
18
  uri = URI.parse(@url)
18
19
  @sock = TCPSocket.new(uri.host, uri.port)
19
- @driver = ::WebSocket::Driver.client(self)
20
+ max_receive_size ||= ::WebSocket::Driver::MAX_LENGTH
21
+ @driver = ::WebSocket::Driver.client(self, max_length: max_receive_size)
20
22
  @messages = Queue.new
21
23
 
24
+ if SKIP_LOGGING_SCREENSHOTS
25
+ @screenshot_commands = Concurrent::Hash.new
26
+ end
27
+
22
28
  @driver.on(:open, &method(:on_open))
23
29
  @driver.on(:message, &method(:on_message))
24
30
  @driver.on(:close, &method(:on_close))
25
31
 
26
32
  @thread = Thread.new do
27
33
  Thread.current.abort_on_exception = true
28
- Thread.current.report_on_exception = true if Thread.current.respond_to?(:report_on_exception=)
34
+ if Thread.current.respond_to?(:report_on_exception=)
35
+ Thread.current.report_on_exception = true
36
+ end
29
37
 
30
38
  begin
31
39
  while data = @sock.readpartial(512)
@@ -47,7 +55,14 @@ module Ferrum
47
55
  def on_message(event)
48
56
  data = JSON.parse(event.data)
49
57
  @messages.push(data)
50
- @logger&.puts(" ◀ #{Ferrum.elapsed_time} #{event.data}\n")
58
+
59
+ output = event.data
60
+ if SKIP_LOGGING_SCREENSHOTS && @screenshot_commands[data["id"]]
61
+ @screenshot_commands.delete(data["id"])
62
+ output.sub!(/{"data":"(.*)"}/, %("Set FERRUM_LOGGING_SCREENSHOTS=true to see screenshots in Base64"))
63
+ end
64
+
65
+ @logger&.puts(" ◀ #{Ferrum.elapsed_time} #{output}\n")
51
66
  end
52
67
 
53
68
  def on_close(_event)
@@ -56,6 +71,10 @@ module Ferrum
56
71
  end
57
72
 
58
73
  def send_message(data)
74
+ if SKIP_LOGGING_SCREENSHOTS
75
+ @screenshot_commands[data[:id]] = true
76
+ end
77
+
59
78
  json = data.to_json
60
79
  @driver.text(json)
61
80
  @logger&.puts("\n\n▶ #{Ferrum.elapsed_time} #{json}")
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum
4
+ class Browser
5
+ class Xvfb
6
+ NOT_FOUND = "Could not find an executable for the Xvfb. Try to install " \
7
+ "it with your package manager".freeze
8
+
9
+ def self.start(*args)
10
+ new(*args).tap(&:start)
11
+ end
12
+
13
+ def self.xvfb_path
14
+ Cliver.detect("Xvfb")
15
+ end
16
+
17
+ attr_reader :screen_size, :display_id, :pid
18
+
19
+ def initialize(options)
20
+ @path = self.class.xvfb_path
21
+ raise Cliver::Dependency::NotFound.new(NOT_FOUND) unless @path
22
+
23
+ @screen_size = options.fetch(:window_size, [1024, 768]).join("x") + "x24"
24
+ @display_id = (Time.now.to_f * 1000).to_i % 100_000_000
25
+ end
26
+
27
+ def start
28
+ @pid = ::Process.spawn("#{@path} :#{display_id} -screen 0 #{screen_size}")
29
+ ::Process.detach(@pid)
30
+ end
31
+
32
+ def to_env
33
+ { "DISPLAY" => ":#{display_id}" }
34
+ end
35
+ end
36
+ end
37
+ end
@@ -42,9 +42,9 @@ module Ferrum
42
42
  end
43
43
 
44
44
  def create_target
45
- target_id = @browser.command("Target.createTarget",
46
- browserContextId: @id,
47
- url: "about:blank")["targetId"]
45
+ @browser.command("Target.createTarget",
46
+ browserContextId: @id,
47
+ url: "about:blank")
48
48
  target = @pendings.take(@browser.timeout)
49
49
  raise NoSuchTargetError unless target.is_a?(Target)
50
50
  @targets[target.id] = target
@@ -23,6 +23,10 @@ module Ferrum
23
23
  @attributes["path"]
24
24
  end
25
25
 
26
+ def samesite
27
+ @attributes["sameSite"]
28
+ end
29
+
26
30
  def size
27
31
  @attributes["size"]
28
32
  end
@@ -65,6 +69,9 @@ module Ferrum
65
69
  cookie[:value] ||= value
66
70
  cookie[:domain] ||= default_domain
67
71
 
72
+ cookie[:httpOnly] = cookie.delete(:httponly) if cookie.key?(:httponly)
73
+ cookie[:sameSite] = cookie.delete(:samesite) if cookie.key?(:samesite)
74
+
68
75
  expires = cookie.delete(:expires).to_i
69
76
  cookie[:expires] = expires if expires > 0
70
77
 
data/lib/ferrum/dialog.rb CHANGED
@@ -14,11 +14,11 @@ module Ferrum
14
14
  options = { accept: true }
15
15
  response = prompt_text || default_prompt
16
16
  options.merge!(promptText: response) if response
17
- @page.command("Page.handleJavaScriptDialog", **options)
17
+ @page.command("Page.handleJavaScriptDialog", slowmoable: true, **options)
18
18
  end
19
19
 
20
20
  def dismiss
21
- @page.command("Page.handleJavaScriptDialog", accept: false)
21
+ @page.command("Page.handleJavaScriptDialog", slowmoable: true, accept: false)
22
22
  end
23
23
 
24
24
  def match?(regexp)
data/lib/ferrum/frame.rb CHANGED
@@ -7,11 +7,11 @@ module Ferrum
7
7
  class Frame
8
8
  include DOM, Runtime
9
9
 
10
- attr_reader :id, :page, :parent_id, :state
11
- attr_writer :execution_id
12
- attr_accessor :name
10
+ attr_reader :page, :parent_id, :state
11
+ attr_accessor :id, :name
13
12
 
14
13
  def initialize(id, page, parent_id = nil)
14
+ @execution_id = nil
15
15
  @id, @page, @parent_id = id, page, parent_id
16
16
  end
17
17
 
@@ -35,6 +35,15 @@ module Ferrum
35
35
  @parent_id.nil?
36
36
  end
37
37
 
38
+ def set_content(html)
39
+ evaluate_async(%(
40
+ document.open();
41
+ document.write(arguments[0]);
42
+ document.close();
43
+ arguments[1](true);
44
+ ), @page.timeout, html)
45
+ end
46
+
38
47
  def execution_id?(execution_id)
39
48
  @execution_id == execution_id
40
49
  end
@@ -47,6 +56,14 @@ module Ferrum
47
56
  @page.event.wait(@page.timeout) ? retry : raise
48
57
  end
49
58
 
59
+ def set_execution_id(value)
60
+ @execution_id ||= value
61
+ end
62
+
63
+ def reset_execution_id
64
+ @execution_id = nil
65
+ end
66
+
50
67
  def inspect
51
68
  %(#<#{self.class} @id=#{@id.inspect} @parent_id=#{@parent_id.inspect} @name=#{@name.inspect} @state=#{@state.inspect} @execution_id=#{@execution_id.inspect}>)
52
69
  end
@@ -29,65 +29,62 @@ module Ferrum
29
29
  end
30
30
 
31
31
  def doctype
32
- evaluate("new XMLSerializer().serializeToString(document.doctype)")
32
+ evaluate("document.doctype && new XMLSerializer().serializeToString(document.doctype)")
33
33
  end
34
34
 
35
35
  def body
36
36
  evaluate("document.documentElement.outerHTML")
37
37
  end
38
38
 
39
- def at_xpath(selector, within: nil)
40
- xpath(selector, within: within).first
41
- end
42
-
43
- # FIXME: Check within
44
39
  def xpath(selector, within: nil)
45
- evaluate_async(%(
46
- try {
47
- let selector = arguments[0];
48
- let within = arguments[1] || document;
40
+ expr = <<~JS
41
+ function(selector, within) {
49
42
  let results = [];
43
+ within ||= document
50
44
 
51
45
  let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
52
46
  for (let i = 0; i < xpath.snapshotLength; i++) {
53
47
  results.push(xpath.snapshotItem(i));
54
48
  }
55
49
 
56
- arguments[2](results);
57
- } catch (error) {
58
- // DOMException.INVALID_EXPRESSION_ERR is undefined, using pure code
59
- if (error.code == DOMException.SYNTAX_ERR || error.code == 51) {
60
- throw "Invalid Selector";
61
- } else {
62
- throw error;
63
- }
64
- }), @page.timeout, selector, within)
65
- end
50
+ return results;
51
+ }
52
+ JS
66
53
 
67
- # FIXME css doesn't work for a frame w/o execution_id
68
- def css(selector, within: nil)
69
- node_id = within&.node_id || @page.document_id
54
+ evaluate_func(expr, selector, within)
55
+ end
70
56
 
71
- ids = @page.command("DOM.querySelectorAll",
72
- nodeId: node_id,
73
- selector: selector)["nodeIds"]
74
- ids.map { |id| build_node(id) }.compact
57
+ def at_xpath(selector, within: nil)
58
+ expr = <<~JS
59
+ function(selector, within) {
60
+ within ||= document
61
+ let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
62
+ return xpath.snapshotItem(0);
63
+ }
64
+ JS
65
+ evaluate_func(expr, selector, within)
75
66
  end
76
67
 
77
- def at_css(selector, within: nil)
78
- node_id = within&.node_id || @page.document_id
68
+ def css(selector, within: nil)
69
+ expr = <<~JS
70
+ function(selector, within) {
71
+ within ||= document
72
+ return Array.from(within.querySelectorAll(selector));
73
+ }
74
+ JS
79
75
 
80
- id = @page.command("DOM.querySelector",
81
- nodeId: node_id,
82
- selector: selector)["nodeId"]
83
- build_node(id)
76
+ evaluate_func(expr, selector, within)
84
77
  end
85
78
 
86
- private
79
+ def at_css(selector, within: nil)
80
+ expr = <<~JS
81
+ function(selector, within) {
82
+ within ||= document
83
+ return within.querySelector(selector);
84
+ }
85
+ JS
87
86
 
88
- def build_node(node_id)
89
- description = @page.command("DOM.describeNode", nodeId: node_id)
90
- Node.new(self, @page.target_id, node_id, description["node"])
87
+ evaluate_func(expr, selector, within)
91
88
  end
92
89
  end
93
90
  end
@@ -1,36 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "singleton"
4
+
3
5
  module Ferrum
6
+ class CyclicObject
7
+ include Singleton
8
+
9
+ def inspect
10
+ %(#<#{self.class} JavaScript object that cannot be represented in Ruby>)
11
+ end
12
+ end
13
+
4
14
  class Frame
5
15
  module Runtime
6
16
  INTERMITTENT_ATTEMPTS = ENV.fetch("FERRUM_INTERMITTENT_ATTEMPTS", 6).to_i
7
17
  INTERMITTENT_SLEEP = ENV.fetch("FERRUM_INTERMITTENT_SLEEP", 0.1).to_f
8
18
 
9
- EXECUTE_OPTIONS = {
10
- returnByValue: true,
11
- functionDeclaration: %(function() { %s })
12
- }.freeze
13
- DEFAULT_OPTIONS = {
14
- functionDeclaration: %(function() { return %s })
15
- }.freeze
16
- EVALUATE_ASYNC_OPTIONS = {
17
- awaitPromise: true,
18
- functionDeclaration: %(
19
- function() {
20
- return new Promise((__resolve, __reject) => {
21
- try {
22
- arguments[arguments.length] = r => __resolve(r);
23
- arguments.length = arguments.length + 1;
24
- setTimeout(() => __reject(new Error("timed out promise")), %s);
25
- %s
26
- } catch(error) {
27
- __reject(error);
28
- }
29
- });
30
- }
31
- )
32
- }.freeze
33
-
34
19
  SCRIPT_SRC_TAG = <<~JS
35
20
  const script = document.createElement("script");
36
21
  script.src = arguments[0];
@@ -61,35 +46,45 @@ module Ferrum
61
46
  JS
62
47
 
63
48
  def evaluate(expression, *args)
64
- call(*args, expression: expression)
49
+ expression = "function() { return %s }" % expression
50
+ call(expression: expression, arguments: args)
65
51
  end
66
52
 
67
- def evaluate_async(expression, wait_time, *args)
68
- call(*args, expression: expression, wait_time: wait_time * 1000, **EVALUATE_ASYNC_OPTIONS)
53
+ def evaluate_async(expression, wait, *args)
54
+ template = <<~JS
55
+ function() {
56
+ return new Promise((__f, __r) => {
57
+ try {
58
+ arguments[arguments.length] = r => __f(r);
59
+ arguments.length = arguments.length + 1;
60
+ setTimeout(() => __r(new Error("timed out promise")), %s);
61
+ %s
62
+ } catch(error) {
63
+ __r(error);
64
+ }
65
+ });
66
+ }
67
+ JS
68
+
69
+ expression = template % [wait * 1000, expression]
70
+ call(expression: expression, arguments: args, awaitPromise: true)
69
71
  end
70
72
 
71
73
  def execute(expression, *args)
72
- call(*args, expression: expression, handle: false, **EXECUTE_OPTIONS)
74
+ expression = "function() { %s }" % expression
75
+ call(expression: expression, arguments: args, handle: false, returnByValue: true)
73
76
  true
74
77
  end
75
78
 
76
- def evaluate_on(node:, expression:, by_value: true, wait: 0)
77
- errors = [NodeNotFoundError, NoExecutionContextError]
78
- attempts, sleep = INTERMITTENT_ATTEMPTS, INTERMITTENT_SLEEP
79
-
80
- Ferrum.with_attempts(errors: errors, max: attempts, wait: sleep) do
81
- response = @page.command("DOM.resolveNode", nodeId: node.node_id)
82
- object_id = response.dig("object", "objectId")
83
- options = DEFAULT_OPTIONS.merge(objectId: object_id)
84
- options[:functionDeclaration] = options[:functionDeclaration] % expression
85
- options.merge!(returnByValue: by_value)
86
-
87
- response = @page.command("Runtime.callFunctionOn",
88
- wait: wait, **options)["result"]
89
- .tap { |r| handle_error(r) }
79
+ def evaluate_func(expression, *args, on: nil)
80
+ call(expression: expression, arguments: args, on: on)
81
+ end
90
82
 
91
- by_value ? response.dig("value") : handle_response(response)
92
- end
83
+ def evaluate_on(node:, expression:, by_value: true, wait: 0)
84
+ options = { handle: true }
85
+ expression = "function() { return %s }" % expression
86
+ options = { handle: false, returnByValue: true } if by_value
87
+ call(expression: expression, on: node, wait: wait, **options)
93
88
  end
94
89
 
95
90
  def add_script_tag(url: nil, path: nil, content: nil, type: "text/javascript")
@@ -122,29 +117,37 @@ module Ferrum
122
117
 
123
118
  private
124
119
 
125
- def call(*args, expression:, wait_time: nil, handle: true, **options)
120
+ def call(expression:, arguments: [], on: nil, wait: 0, handle: true, **options)
126
121
  errors = [NodeNotFoundError, NoExecutionContextError]
127
122
  attempts, sleep = INTERMITTENT_ATTEMPTS, INTERMITTENT_SLEEP
128
123
 
129
124
  Ferrum.with_attempts(errors: errors, max: attempts, wait: sleep) do
130
- arguments = prepare_args(args)
131
- params = DEFAULT_OPTIONS.merge(options)
132
- expression = [wait_time, expression] if wait_time
133
- params[:functionDeclaration] = params[:functionDeclaration] % expression
134
- params = params.merge(arguments: arguments)
135
- unless params[:executionContextId]
125
+ params = options.dup
126
+
127
+ if on
128
+ response = @page.command("DOM.resolveNode", nodeId: on.node_id)
129
+ object_id = response.dig("object", "objectId")
130
+ params = params.merge(objectId: object_id)
131
+ end
132
+
133
+ if params[:executionContextId].nil? && params[:objectId].nil?
136
134
  params = params.merge(executionContextId: execution_id)
137
135
  end
138
136
 
139
137
  response = @page.command("Runtime.callFunctionOn",
140
- **params)["result"].tap { |r| handle_error(r) }
138
+ wait: wait, slowmoable: true,
139
+ **params.merge(functionDeclaration: expression,
140
+ arguments: prepare_args(arguments)))
141
+ handle_error(response)
142
+ response = response["result"]
141
143
 
142
- handle ? handle_response(response) : response
144
+ handle ? handle_response(response) : response.dig("value")
143
145
  end
144
146
  end
145
147
 
146
148
  # FIXME: We should have a central place to handle all type of errors
147
- def handle_error(result)
149
+ def handle_error(response)
150
+ result = response["result"]
148
151
  return if result["subtype"] != "error"
149
152
 
150
153
  case result["description"]
@@ -209,7 +212,7 @@ module Ferrum
209
212
 
210
213
  def reduce_props(object_id, to)
211
214
  if cyclic?(object_id).dig("result", "value")
212
- return "(cyclic structure)"
215
+ return to.is_a?(Array) ? [cyclic_object] : cyclic_object
213
216
  else
214
217
  props = @page.command("Runtime.getProperties", ownProperties: true, objectId: object_id)
215
218
  props["result"].reduce(to) do |memo, prop|
@@ -220,37 +223,40 @@ module Ferrum
220
223
  end
221
224
 
222
225
  def cyclic?(object_id)
223
- @page.command("Runtime.callFunctionOn",
224
- objectId: object_id,
225
- returnByValue: true,
226
- functionDeclaration: <<~JS
227
- function() {
228
- if (Array.isArray(this) &&
229
- this.every(e => e instanceof Node)) {
230
- return false;
231
- }
226
+ @page.command(
227
+ "Runtime.callFunctionOn",
228
+ objectId: object_id,
229
+ returnByValue: true,
230
+ functionDeclaration: <<~JS
231
+ function() {
232
+ if (Array.isArray(this) &&
233
+ this.every(e => e instanceof Node)) {
234
+ return false;
235
+ }
232
236
 
233
- const seen = [];
234
- function detectCycle(obj) {
235
- if (typeof obj === "object") {
236
- if (seen.indexOf(obj) !== -1) {
237
- return true;
238
- }
239
- seen.push(obj);
240
- for (let key in obj) {
241
- if (obj.hasOwnProperty(key) && detectCycle(obj[key])) {
242
- return true;
243
- }
244
- }
245
- }
246
-
247
- return false;
237
+ function detectCycle(obj, seen) {
238
+ if (typeof obj === "object") {
239
+ if (seen.indexOf(obj) !== -1) {
240
+ return true;
241
+ }
242
+ for (let key in obj) {
243
+ if (obj.hasOwnProperty(key) && detectCycle(obj[key], seen.concat([obj]))) {
244
+ return true;
248
245
  }
249
-
250
- return detectCycle(this);
251
246
  }
252
- JS
253
- )
247
+ }
248
+
249
+ return false;
250
+ }
251
+
252
+ return detectCycle(this, []);
253
+ }
254
+ JS
255
+ )
256
+ end
257
+
258
+ def cyclic_object
259
+ CyclicObject.instance
254
260
  end
255
261
  end
256
262
  end