ferrum 0.6.2 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,9 +7,13 @@ module Ferrum
7
7
  class Subscriber
8
8
  include Concurrent::Async
9
9
 
10
+ def self.build(size)
11
+ (0..size).map { new }
12
+ end
13
+
10
14
  def initialize
11
15
  super
12
- @on = Hash.new { |h, k| h[k] = [] }
16
+ @on = Concurrent::Hash.new { |h, k| h[k] = Concurrent::Array.new }
13
17
  end
14
18
 
15
19
  def on(event, &block)
@@ -17,6 +21,10 @@ module Ferrum
17
21
  true
18
22
  end
19
23
 
24
+ def subscribed?(event)
25
+ @on.key?(event)
26
+ end
27
+
20
28
  def call(message)
21
29
  method, params = message.values_at("method", "params")
22
30
  total = @on[method].size
@@ -8,24 +8,32 @@ module Ferrum
8
8
  class Browser
9
9
  class WebSocket
10
10
  WEBSOCKET_BUG_SLEEP = 0.01
11
+ SKIP_LOGGING_SCREENSHOTS = !ENV["FERRUM_LOGGING_SCREENSHOTS"]
11
12
 
12
13
  attr_reader :url, :messages
13
14
 
14
- def initialize(url, logger)
15
+ def initialize(url, max_receive_size, logger)
15
16
  @url = url
16
17
  @logger = logger
17
18
  uri = URI.parse(@url)
18
19
  @sock = TCPSocket.new(uri.host, uri.port)
19
- @driver = ::WebSocket::Driver.client(self)
20
+ max_receive_size ||= ::WebSocket::Driver::MAX_LENGTH
21
+ @driver = ::WebSocket::Driver.client(self, max_length: max_receive_size)
20
22
  @messages = Queue.new
21
23
 
24
+ if SKIP_LOGGING_SCREENSHOTS
25
+ @screenshot_commands = Concurrent::Hash.new
26
+ end
27
+
22
28
  @driver.on(:open, &method(:on_open))
23
29
  @driver.on(:message, &method(:on_message))
24
30
  @driver.on(:close, &method(:on_close))
25
31
 
26
32
  @thread = Thread.new do
27
33
  Thread.current.abort_on_exception = true
28
- Thread.current.report_on_exception = true if Thread.current.respond_to?(:report_on_exception=)
34
+ if Thread.current.respond_to?(:report_on_exception=)
35
+ Thread.current.report_on_exception = true
36
+ end
29
37
 
30
38
  begin
31
39
  while data = @sock.readpartial(512)
@@ -47,7 +55,14 @@ module Ferrum
47
55
  def on_message(event)
48
56
  data = JSON.parse(event.data)
49
57
  @messages.push(data)
50
- @logger&.puts(" ◀ #{Ferrum.elapsed_time} #{event.data}\n")
58
+
59
+ output = event.data
60
+ if SKIP_LOGGING_SCREENSHOTS && @screenshot_commands[data["id"]]
61
+ @screenshot_commands.delete(data["id"])
62
+ output.sub!(/{"data":"(.*)"}/, %("Set FERRUM_LOGGING_SCREENSHOTS=true to see screenshots in Base64"))
63
+ end
64
+
65
+ @logger&.puts(" ◀ #{Ferrum.elapsed_time} #{output}\n")
51
66
  end
52
67
 
53
68
  def on_close(_event)
@@ -56,6 +71,10 @@ module Ferrum
56
71
  end
57
72
 
58
73
  def send_message(data)
74
+ if SKIP_LOGGING_SCREENSHOTS
75
+ @screenshot_commands[data[:id]] = true
76
+ end
77
+
59
78
  json = data.to_json
60
79
  @driver.text(json)
61
80
  @logger&.puts("\n\n▶ #{Ferrum.elapsed_time} #{json}")
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum
4
+ class Browser
5
+ class Xvfb
6
+ NOT_FOUND = "Could not find an executable for the Xvfb. Try to install " \
7
+ "it with your package manager".freeze
8
+
9
+ def self.start(*args)
10
+ new(*args).tap(&:start)
11
+ end
12
+
13
+ def self.xvfb_path
14
+ Cliver.detect("Xvfb")
15
+ end
16
+
17
+ attr_reader :screen_size, :display_id, :pid
18
+
19
+ def initialize(options)
20
+ @path = self.class.xvfb_path
21
+ raise Cliver::Dependency::NotFound.new(NOT_FOUND) unless @path
22
+
23
+ @screen_size = options.fetch(:window_size, [1024, 768]).join("x") + "x24"
24
+ @display_id = (Time.now.to_f * 1000).to_i % 100_000_000
25
+ end
26
+
27
+ def start
28
+ @pid = ::Process.spawn("#{@path} :#{display_id} -screen 0 #{screen_size}")
29
+ ::Process.detach(@pid)
30
+ end
31
+
32
+ def to_env
33
+ { "DISPLAY" => ":#{display_id}" }
34
+ end
35
+ end
36
+ end
37
+ end
@@ -42,9 +42,9 @@ module Ferrum
42
42
  end
43
43
 
44
44
  def create_target
45
- target_id = @browser.command("Target.createTarget",
46
- browserContextId: @id,
47
- url: "about:blank")["targetId"]
45
+ @browser.command("Target.createTarget",
46
+ browserContextId: @id,
47
+ url: "about:blank")
48
48
  target = @pendings.take(@browser.timeout)
49
49
  raise NoSuchTargetError unless target.is_a?(Target)
50
50
  @targets[target.id] = target
@@ -23,6 +23,10 @@ module Ferrum
23
23
  @attributes["path"]
24
24
  end
25
25
 
26
+ def samesite
27
+ @attributes["sameSite"]
28
+ end
29
+
26
30
  def size
27
31
  @attributes["size"]
28
32
  end
@@ -65,6 +69,9 @@ module Ferrum
65
69
  cookie[:value] ||= value
66
70
  cookie[:domain] ||= default_domain
67
71
 
72
+ cookie[:httpOnly] = cookie.delete(:httponly) if cookie.key?(:httponly)
73
+ cookie[:sameSite] = cookie.delete(:samesite) if cookie.key?(:samesite)
74
+
68
75
  expires = cookie.delete(:expires).to_i
69
76
  cookie[:expires] = expires if expires > 0
70
77
 
data/lib/ferrum/dialog.rb CHANGED
@@ -14,11 +14,11 @@ module Ferrum
14
14
  options = { accept: true }
15
15
  response = prompt_text || default_prompt
16
16
  options.merge!(promptText: response) if response
17
- @page.command("Page.handleJavaScriptDialog", **options)
17
+ @page.command("Page.handleJavaScriptDialog", slowmoable: true, **options)
18
18
  end
19
19
 
20
20
  def dismiss
21
- @page.command("Page.handleJavaScriptDialog", accept: false)
21
+ @page.command("Page.handleJavaScriptDialog", slowmoable: true, accept: false)
22
22
  end
23
23
 
24
24
  def match?(regexp)
data/lib/ferrum/frame.rb CHANGED
@@ -7,19 +7,17 @@ module Ferrum
7
7
  class Frame
8
8
  include DOM, Runtime
9
9
 
10
- attr_reader :id, :page, :parent_id, :state
11
- attr_writer :execution_id
12
- attr_accessor :name
10
+ attr_reader :page, :parent_id, :state
11
+ attr_accessor :id, :name
13
12
 
14
13
  def initialize(id, page, parent_id = nil)
14
+ @execution_id = nil
15
15
  @id, @page, @parent_id = id, page, parent_id
16
16
  end
17
17
 
18
18
  # Can be one of:
19
19
  # * started_loading
20
20
  # * navigated
21
- # * scheduled_navigation
22
- # * cleared_scheduled_navigation
23
21
  # * stopped_loading
24
22
  def state=(value)
25
23
  @state = value
@@ -37,6 +35,15 @@ module Ferrum
37
35
  @parent_id.nil?
38
36
  end
39
37
 
38
+ def set_content(html)
39
+ evaluate_async(%(
40
+ document.open();
41
+ document.write(arguments[0]);
42
+ document.close();
43
+ arguments[1](true);
44
+ ), @page.timeout, html)
45
+ end
46
+
40
47
  def execution_id?(execution_id)
41
48
  @execution_id == execution_id
42
49
  end
@@ -49,6 +56,14 @@ module Ferrum
49
56
  @page.event.wait(@page.timeout) ? retry : raise
50
57
  end
51
58
 
59
+ def set_execution_id(value)
60
+ @execution_id ||= value
61
+ end
62
+
63
+ def reset_execution_id
64
+ @execution_id = nil
65
+ end
66
+
52
67
  def inspect
53
68
  %(#<#{self.class} @id=#{@id.inspect} @parent_id=#{@parent_id.inspect} @name=#{@name.inspect} @state=#{@state.inspect} @execution_id=#{@execution_id.inspect}>)
54
69
  end
@@ -29,65 +29,62 @@ module Ferrum
29
29
  end
30
30
 
31
31
  def doctype
32
- evaluate("new XMLSerializer().serializeToString(document.doctype)")
32
+ evaluate("document.doctype && new XMLSerializer().serializeToString(document.doctype)")
33
33
  end
34
34
 
35
35
  def body
36
36
  evaluate("document.documentElement.outerHTML")
37
37
  end
38
38
 
39
- def at_xpath(selector, within: nil)
40
- xpath(selector, within: within).first
41
- end
42
-
43
- # FIXME: Check within
44
39
  def xpath(selector, within: nil)
45
- evaluate_async(%(
46
- try {
47
- let selector = arguments[0];
48
- let within = arguments[1] || document;
40
+ expr = <<~JS
41
+ function(selector, within) {
49
42
  let results = [];
43
+ within ||= document
50
44
 
51
45
  let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
52
46
  for (let i = 0; i < xpath.snapshotLength; i++) {
53
47
  results.push(xpath.snapshotItem(i));
54
48
  }
55
49
 
56
- arguments[2](results);
57
- } catch (error) {
58
- // DOMException.INVALID_EXPRESSION_ERR is undefined, using pure code
59
- if (error.code == DOMException.SYNTAX_ERR || error.code == 51) {
60
- throw "Invalid Selector";
61
- } else {
62
- throw error;
63
- }
64
- }), @page.timeout, selector, within)
65
- end
50
+ return results;
51
+ }
52
+ JS
66
53
 
67
- # FIXME css doesn't work for a frame w/o execution_id
68
- def css(selector, within: nil)
69
- node_id = within&.node_id || @page.document_id
54
+ evaluate_func(expr, selector, within)
55
+ end
70
56
 
71
- ids = @page.command("DOM.querySelectorAll",
72
- nodeId: node_id,
73
- selector: selector)["nodeIds"]
74
- ids.map { |id| build_node(id) }.compact
57
+ def at_xpath(selector, within: nil)
58
+ expr = <<~JS
59
+ function(selector, within) {
60
+ within ||= document
61
+ let xpath = document.evaluate(selector, within, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
62
+ return xpath.snapshotItem(0);
63
+ }
64
+ JS
65
+ evaluate_func(expr, selector, within)
75
66
  end
76
67
 
77
- def at_css(selector, within: nil)
78
- node_id = within&.node_id || @page.document_id
68
+ def css(selector, within: nil)
69
+ expr = <<~JS
70
+ function(selector, within) {
71
+ within ||= document
72
+ return Array.from(within.querySelectorAll(selector));
73
+ }
74
+ JS
79
75
 
80
- id = @page.command("DOM.querySelector",
81
- nodeId: node_id,
82
- selector: selector)["nodeId"]
83
- build_node(id)
76
+ evaluate_func(expr, selector, within)
84
77
  end
85
78
 
86
- private
79
+ def at_css(selector, within: nil)
80
+ expr = <<~JS
81
+ function(selector, within) {
82
+ within ||= document
83
+ return within.querySelector(selector);
84
+ }
85
+ JS
87
86
 
88
- def build_node(node_id)
89
- description = @page.command("DOM.describeNode", nodeId: node_id)
90
- Node.new(self, @page.target_id, node_id, description["node"])
87
+ evaluate_func(expr, selector, within)
91
88
  end
92
89
  end
93
90
  end
@@ -1,36 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "singleton"
4
+
3
5
  module Ferrum
6
+ class CyclicObject
7
+ include Singleton
8
+
9
+ def inspect
10
+ %(#<#{self.class} JavaScript object that cannot be represented in Ruby>)
11
+ end
12
+ end
13
+
4
14
  class Frame
5
15
  module Runtime
6
16
  INTERMITTENT_ATTEMPTS = ENV.fetch("FERRUM_INTERMITTENT_ATTEMPTS", 6).to_i
7
17
  INTERMITTENT_SLEEP = ENV.fetch("FERRUM_INTERMITTENT_SLEEP", 0.1).to_f
8
18
 
9
- EXECUTE_OPTIONS = {
10
- returnByValue: true,
11
- functionDeclaration: %(function() { %s })
12
- }.freeze
13
- DEFAULT_OPTIONS = {
14
- functionDeclaration: %(function() { return %s })
15
- }.freeze
16
- EVALUATE_ASYNC_OPTIONS = {
17
- awaitPromise: true,
18
- functionDeclaration: %(
19
- function() {
20
- return new Promise((__resolve, __reject) => {
21
- try {
22
- arguments[arguments.length] = r => __resolve(r);
23
- arguments.length = arguments.length + 1;
24
- setTimeout(() => __reject(new Error("timed out promise")), %s);
25
- %s
26
- } catch(error) {
27
- __reject(error);
28
- }
29
- });
30
- }
31
- )
32
- }.freeze
33
-
34
19
  SCRIPT_SRC_TAG = <<~JS
35
20
  const script = document.createElement("script");
36
21
  script.src = arguments[0];
@@ -61,35 +46,45 @@ module Ferrum
61
46
  JS
62
47
 
63
48
  def evaluate(expression, *args)
64
- call(*args, expression: expression)
49
+ expression = "function() { return %s }" % expression
50
+ call(expression: expression, arguments: args)
65
51
  end
66
52
 
67
- def evaluate_async(expression, wait_time, *args)
68
- call(*args, expression: expression, wait_time: wait_time * 1000, **EVALUATE_ASYNC_OPTIONS)
53
+ def evaluate_async(expression, wait, *args)
54
+ template = <<~JS
55
+ function() {
56
+ return new Promise((__f, __r) => {
57
+ try {
58
+ arguments[arguments.length] = r => __f(r);
59
+ arguments.length = arguments.length + 1;
60
+ setTimeout(() => __r(new Error("timed out promise")), %s);
61
+ %s
62
+ } catch(error) {
63
+ __r(error);
64
+ }
65
+ });
66
+ }
67
+ JS
68
+
69
+ expression = template % [wait * 1000, expression]
70
+ call(expression: expression, arguments: args, awaitPromise: true)
69
71
  end
70
72
 
71
73
  def execute(expression, *args)
72
- call(*args, expression: expression, handle: false, **EXECUTE_OPTIONS)
74
+ expression = "function() { %s }" % expression
75
+ call(expression: expression, arguments: args, handle: false, returnByValue: true)
73
76
  true
74
77
  end
75
78
 
76
- def evaluate_on(node:, expression:, by_value: true, wait: 0)
77
- errors = [NodeNotFoundError, NoExecutionContextError]
78
- attempts, sleep = INTERMITTENT_ATTEMPTS, INTERMITTENT_SLEEP
79
-
80
- Ferrum.with_attempts(errors: errors, max: attempts, wait: sleep) do
81
- response = @page.command("DOM.resolveNode", nodeId: node.node_id)
82
- object_id = response.dig("object", "objectId")
83
- options = DEFAULT_OPTIONS.merge(objectId: object_id)
84
- options[:functionDeclaration] = options[:functionDeclaration] % expression
85
- options.merge!(returnByValue: by_value)
86
-
87
- response = @page.command("Runtime.callFunctionOn",
88
- wait: wait, **options)["result"]
89
- .tap { |r| handle_error(r) }
79
+ def evaluate_func(expression, *args, on: nil)
80
+ call(expression: expression, arguments: args, on: on)
81
+ end
90
82
 
91
- by_value ? response.dig("value") : handle_response(response)
92
- end
83
+ def evaluate_on(node:, expression:, by_value: true, wait: 0)
84
+ options = { handle: true }
85
+ expression = "function() { return %s }" % expression
86
+ options = { handle: false, returnByValue: true } if by_value
87
+ call(expression: expression, on: node, wait: wait, **options)
93
88
  end
94
89
 
95
90
  def add_script_tag(url: nil, path: nil, content: nil, type: "text/javascript")
@@ -122,29 +117,37 @@ module Ferrum
122
117
 
123
118
  private
124
119
 
125
- def call(*args, expression:, wait_time: nil, handle: true, **options)
120
+ def call(expression:, arguments: [], on: nil, wait: 0, handle: true, **options)
126
121
  errors = [NodeNotFoundError, NoExecutionContextError]
127
122
  attempts, sleep = INTERMITTENT_ATTEMPTS, INTERMITTENT_SLEEP
128
123
 
129
124
  Ferrum.with_attempts(errors: errors, max: attempts, wait: sleep) do
130
- arguments = prepare_args(args)
131
- params = DEFAULT_OPTIONS.merge(options)
132
- expression = [wait_time, expression] if wait_time
133
- params[:functionDeclaration] = params[:functionDeclaration] % expression
134
- params = params.merge(arguments: arguments)
135
- unless params[:executionContextId]
125
+ params = options.dup
126
+
127
+ if on
128
+ response = @page.command("DOM.resolveNode", nodeId: on.node_id)
129
+ object_id = response.dig("object", "objectId")
130
+ params = params.merge(objectId: object_id)
131
+ end
132
+
133
+ if params[:executionContextId].nil? && params[:objectId].nil?
136
134
  params = params.merge(executionContextId: execution_id)
137
135
  end
138
136
 
139
137
  response = @page.command("Runtime.callFunctionOn",
140
- **params)["result"].tap { |r| handle_error(r) }
138
+ wait: wait, slowmoable: true,
139
+ **params.merge(functionDeclaration: expression,
140
+ arguments: prepare_args(arguments)))
141
+ handle_error(response)
142
+ response = response["result"]
141
143
 
142
- handle ? handle_response(response) : response
144
+ handle ? handle_response(response) : response.dig("value")
143
145
  end
144
146
  end
145
147
 
146
148
  # FIXME: We should have a central place to handle all type of errors
147
- def handle_error(result)
149
+ def handle_error(response)
150
+ result = response["result"]
148
151
  return if result["subtype"] != "error"
149
152
 
150
153
  case result["description"]
@@ -209,7 +212,7 @@ module Ferrum
209
212
 
210
213
  def reduce_props(object_id, to)
211
214
  if cyclic?(object_id).dig("result", "value")
212
- return "(cyclic structure)"
215
+ return to.is_a?(Array) ? [cyclic_object] : cyclic_object
213
216
  else
214
217
  props = @page.command("Runtime.getProperties", ownProperties: true, objectId: object_id)
215
218
  props["result"].reduce(to) do |memo, prop|
@@ -220,37 +223,40 @@ module Ferrum
220
223
  end
221
224
 
222
225
  def cyclic?(object_id)
223
- @page.command("Runtime.callFunctionOn",
224
- objectId: object_id,
225
- returnByValue: true,
226
- functionDeclaration: <<~JS
227
- function() {
228
- if (Array.isArray(this) &&
229
- this.every(e => e instanceof Node)) {
230
- return false;
231
- }
226
+ @page.command(
227
+ "Runtime.callFunctionOn",
228
+ objectId: object_id,
229
+ returnByValue: true,
230
+ functionDeclaration: <<~JS
231
+ function() {
232
+ if (Array.isArray(this) &&
233
+ this.every(e => e instanceof Node)) {
234
+ return false;
235
+ }
232
236
 
233
- const seen = [];
234
- function detectCycle(obj) {
235
- if (typeof obj === 'object') {
236
- if (seen.indexOf(obj) !== -1) {
237
- return true;
238
- }
239
- seen.push(obj);
240
- for (let key in obj) {
241
- if (obj.hasOwnProperty(key) && detectCycle(obj[key])) {
242
- return true;
243
- }
244
- }
245
- }
246
-
247
- return false;
237
+ function detectCycle(obj, seen) {
238
+ if (typeof obj === "object") {
239
+ if (seen.indexOf(obj) !== -1) {
240
+ return true;
241
+ }
242
+ for (let key in obj) {
243
+ if (obj.hasOwnProperty(key) && detectCycle(obj[key], seen.concat([obj]))) {
244
+ return true;
248
245
  }
249
-
250
- return detectCycle(this);
251
246
  }
252
- JS
253
- )
247
+ }
248
+
249
+ return false;
250
+ }
251
+
252
+ return detectCycle(this, []);
253
+ }
254
+ JS
255
+ )
256
+ end
257
+
258
+ def cyclic_object
259
+ CyclicObject.instance
254
260
  end
255
261
  end
256
262
  end