ferrum 0.13 → 0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/README.md +288 -154
  4. data/lib/ferrum/browser/command.rb +8 -0
  5. data/lib/ferrum/browser/options/chrome.rb +17 -5
  6. data/lib/ferrum/browser/options.rb +38 -25
  7. data/lib/ferrum/browser/process.rb +44 -17
  8. data/lib/ferrum/browser.rb +34 -52
  9. data/lib/ferrum/client/subscriber.rb +76 -0
  10. data/lib/ferrum/{browser → client}/web_socket.rb +36 -22
  11. data/lib/ferrum/client.rb +169 -0
  12. data/lib/ferrum/context.rb +19 -15
  13. data/lib/ferrum/contexts.rb +46 -12
  14. data/lib/ferrum/cookies/cookie.rb +57 -0
  15. data/lib/ferrum/cookies.rb +40 -4
  16. data/lib/ferrum/downloads.rb +60 -0
  17. data/lib/ferrum/errors.rb +2 -1
  18. data/lib/ferrum/frame.rb +1 -0
  19. data/lib/ferrum/headers.rb +1 -1
  20. data/lib/ferrum/network/exchange.rb +29 -2
  21. data/lib/ferrum/network/intercepted_request.rb +8 -17
  22. data/lib/ferrum/network/request.rb +23 -39
  23. data/lib/ferrum/network/request_params.rb +57 -0
  24. data/lib/ferrum/network/response.rb +25 -5
  25. data/lib/ferrum/network.rb +43 -16
  26. data/lib/ferrum/node.rb +21 -1
  27. data/lib/ferrum/page/frames.rb +5 -5
  28. data/lib/ferrum/page/screenshot.rb +42 -24
  29. data/lib/ferrum/page.rb +183 -131
  30. data/lib/ferrum/proxy.rb +1 -1
  31. data/lib/ferrum/target.rb +25 -5
  32. data/lib/ferrum/utils/elapsed_time.rb +0 -2
  33. data/lib/ferrum/utils/event.rb +19 -0
  34. data/lib/ferrum/utils/platform.rb +4 -0
  35. data/lib/ferrum/utils/thread.rb +18 -0
  36. data/lib/ferrum/version.rb +1 -1
  37. data/lib/ferrum.rb +3 -0
  38. metadata +14 -114
  39. data/lib/ferrum/browser/client.rb +0 -102
  40. data/lib/ferrum/browser/subscriber.rb +0 -36
@@ -0,0 +1,169 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "forwardable"
4
+ require "ferrum/client/subscriber"
5
+ require "ferrum/client/web_socket"
6
+
7
+ module Ferrum
8
+ class SessionClient
9
+ attr_reader :client, :session_id
10
+
11
+ def self.event_name(event, session_id)
12
+ [event, session_id].compact.join("_")
13
+ end
14
+
15
+ def initialize(client, session_id)
16
+ @client = client
17
+ @session_id = session_id
18
+ end
19
+
20
+ def command(method, async: false, **params)
21
+ message = build_message(method, params)
22
+ @client.send_message(message, async: async)
23
+ end
24
+
25
+ def on(event, &block)
26
+ @client.on(event_name(event), &block)
27
+ end
28
+
29
+ def subscribed?(event)
30
+ @client.subscribed?(event_name(event))
31
+ end
32
+
33
+ def respond_to_missing?(name, include_private)
34
+ @client.respond_to?(name, include_private)
35
+ end
36
+
37
+ def method_missing(name, ...)
38
+ @client.send(name, ...)
39
+ end
40
+
41
+ def close
42
+ @client.subscriber.clear(session_id: session_id)
43
+ end
44
+
45
+ private
46
+
47
+ def build_message(method, params)
48
+ @client.build_message(method, params).merge(sessionId: session_id)
49
+ end
50
+
51
+ def event_name(event)
52
+ self.class.event_name(event, session_id)
53
+ end
54
+ end
55
+
56
+ class Client
57
+ extend Forwardable
58
+ delegate %i[timeout timeout=] => :options
59
+
60
+ attr_reader :ws_url, :options, :subscriber
61
+
62
+ def initialize(ws_url, options)
63
+ @command_id = 0
64
+ @ws_url = ws_url
65
+ @options = options
66
+ @pendings = Concurrent::Hash.new
67
+ @ws = WebSocket.new(ws_url, options.ws_max_receive_size, options.logger)
68
+ @subscriber = Subscriber.new
69
+
70
+ start
71
+ end
72
+
73
+ def command(method, async: false, **params)
74
+ message = build_message(method, params)
75
+ send_message(message, async: async)
76
+ end
77
+
78
+ def send_message(message, async:)
79
+ if async
80
+ @ws.send_message(message)
81
+ true
82
+ else
83
+ pending = Concurrent::IVar.new
84
+ @pendings[message[:id]] = pending
85
+ @ws.send_message(message)
86
+ data = pending.value!(timeout)
87
+ @pendings.delete(message[:id])
88
+
89
+ raise DeadBrowserError if data.nil? && @ws.messages.closed?
90
+ raise TimeoutError unless data
91
+
92
+ error, response = data.values_at("error", "result")
93
+ raise_browser_error(error) if error
94
+ response
95
+ end
96
+ end
97
+
98
+ def on(event, &block)
99
+ @subscriber.on(event, &block)
100
+ end
101
+
102
+ def subscribed?(event)
103
+ @subscriber.subscribed?(event)
104
+ end
105
+
106
+ def session(session_id)
107
+ SessionClient.new(self, session_id)
108
+ end
109
+
110
+ def close
111
+ @ws.close
112
+ # Give a thread some time to handle a tail of messages
113
+ @pendings.clear
114
+ @thread.kill unless @thread.join(1)
115
+ @subscriber.close
116
+ end
117
+
118
+ def inspect
119
+ "#<#{self.class} " \
120
+ "@command_id=#{@command_id.inspect} " \
121
+ "@pendings=#{@pendings.inspect} " \
122
+ "@ws=#{@ws.inspect}>"
123
+ end
124
+
125
+ def build_message(method, params)
126
+ { method: method, params: params }.merge(id: next_command_id)
127
+ end
128
+
129
+ private
130
+
131
+ def start
132
+ @thread = Utils::Thread.spawn do
133
+ loop do
134
+ message = @ws.messages.pop
135
+ break unless message
136
+
137
+ if message.key?("method")
138
+ @subscriber << message
139
+ else
140
+ @pendings[message["id"]]&.set(message)
141
+ end
142
+ end
143
+ end
144
+ end
145
+
146
+ def next_command_id
147
+ @command_id += 1
148
+ end
149
+
150
+ def raise_browser_error(error)
151
+ case error["message"]
152
+ # Node has disappeared while we were trying to get it
153
+ when "No node with given id found",
154
+ "Could not find node with given id",
155
+ "Inspected target navigated or closed"
156
+ raise NodeNotFoundError, error
157
+ # Context is lost, page is reloading
158
+ when "Cannot find context with specified id"
159
+ raise NoExecutionContextError, error
160
+ when "No target with given id found"
161
+ raise NoSuchPageError
162
+ when /Could not compute content quads/
163
+ raise CoordinatesNotFoundError
164
+ else
165
+ raise BrowserError, error
166
+ end
167
+ end
168
+ end
169
+ end
@@ -8,9 +8,9 @@ module Ferrum
8
8
 
9
9
  attr_reader :id, :targets
10
10
 
11
- def initialize(browser, contexts, id)
11
+ def initialize(client, contexts, id)
12
12
  @id = id
13
- @browser = browser
13
+ @client = client
14
14
  @contexts = contexts
15
15
  @targets = Concurrent::Map.new
16
16
  @pendings = Concurrent::MVar.new
@@ -46,33 +46,37 @@ module Ferrum
46
46
  end
47
47
 
48
48
  def create_target
49
- @browser.command("Target.createTarget",
50
- browserContextId: @id,
51
- url: "about:blank")
52
- target = @pendings.take(@browser.timeout)
49
+ @client.command("Target.createTarget", browserContextId: @id, url: "about:blank")
50
+ target = @pendings.take(@client.timeout)
53
51
  raise NoSuchTargetError unless target.is_a?(Target)
54
52
 
55
- @targets.put_if_absent(target.id, target)
56
53
  target
57
54
  end
58
55
 
59
- def add_target(params)
60
- target = Target.new(@browser, params)
61
- if target.window?
62
- @targets.put_if_absent(target.id, target)
63
- else
64
- @pendings.put(target, @browser.timeout)
65
- end
56
+ def add_target(params:, session_id: nil)
57
+ new_target = Target.new(@client, session_id, params)
58
+ target = @targets.put_if_absent(new_target.id, new_target)
59
+ target ||= new_target # `put_if_absent` returns nil if added a new value or existing if there was one already
60
+ @pendings.put(target, @client.timeout) if @pendings.empty?
61
+ target
66
62
  end
67
63
 
68
64
  def update_target(target_id, params)
69
- @targets[target_id].update(params)
65
+ @targets[target_id]&.update(params)
70
66
  end
71
67
 
72
68
  def delete_target(target_id)
73
69
  @targets.delete(target_id)
74
70
  end
75
71
 
72
+ def close_targets_connection
73
+ @targets.each_value do |target|
74
+ next unless target.connected?
75
+
76
+ target.page.close_connection
77
+ end
78
+ end
79
+
76
80
  def dispose
77
81
  @contexts.dispose(@id)
78
82
  end
@@ -4,12 +4,15 @@ require "ferrum/context"
4
4
 
5
5
  module Ferrum
6
6
  class Contexts
7
+ include Enumerable
8
+
7
9
  attr_reader :contexts
8
10
 
9
- def initialize(browser)
11
+ def initialize(client)
10
12
  @contexts = Concurrent::Map.new
11
- @browser = browser
13
+ @client = client
12
14
  subscribe
15
+ auto_attach
13
16
  discover
14
17
  end
15
18
 
@@ -17,6 +20,16 @@ module Ferrum
17
20
  @default_context ||= create
18
21
  end
19
22
 
23
+ def each(&block)
24
+ return enum_for(__method__) unless block_given?
25
+
26
+ @contexts.each(&block)
27
+ end
28
+
29
+ def [](id)
30
+ @contexts[id]
31
+ end
32
+
20
33
  def find_by(target_id:)
21
34
  context = nil
22
35
  @contexts.each_value { |c| context = c if c.target?(target_id) }
@@ -24,21 +37,25 @@ module Ferrum
24
37
  end
25
38
 
26
39
  def create(**options)
27
- response = @browser.command("Target.createBrowserContext", **options)
40
+ response = @client.command("Target.createBrowserContext", **options)
28
41
  context_id = response["browserContextId"]
29
- context = Context.new(@browser, self, context_id)
42
+ context = Context.new(@client, self, context_id)
30
43
  @contexts[context_id] = context
31
44
  context
32
45
  end
33
46
 
34
47
  def dispose(context_id)
35
48
  context = @contexts[context_id]
36
- @browser.command("Target.disposeBrowserContext",
37
- browserContextId: context.id)
49
+ context.close_targets_connection
50
+ @client.command("Target.disposeBrowserContext", browserContextId: context.id)
38
51
  @contexts.delete(context_id)
39
52
  true
40
53
  end
41
54
 
55
+ def close_connections
56
+ @contexts.each_value(&:close_targets_connection)
57
+ end
58
+
42
59
  def reset
43
60
  @default_context = nil
44
61
  @contexts.each_key { |id| dispose(id) }
@@ -51,15 +68,26 @@ module Ferrum
51
68
  private
52
69
 
53
70
  def subscribe
54
- @browser.client.on("Target.targetCreated") do |params|
71
+ @client.on("Target.attachedToTarget") do |params|
72
+ info, session_id = params.values_at("targetInfo", "sessionId")
73
+ next unless info["type"] == "page"
74
+
75
+ context_id = info["browserContextId"]
76
+ @contexts[context_id]&.add_target(session_id: session_id, params: info)
77
+ if params["waitingForDebugger"]
78
+ @client.session(session_id).command("Runtime.runIfWaitingForDebugger", async: true)
79
+ end
80
+ end
81
+
82
+ @client.on("Target.targetCreated") do |params|
55
83
  info = params["targetInfo"]
56
84
  next unless info["type"] == "page"
57
85
 
58
86
  context_id = info["browserContextId"]
59
- @contexts[context_id]&.add_target(info)
87
+ @contexts[context_id]&.add_target(params: info)
60
88
  end
61
89
 
62
- @browser.client.on("Target.targetInfoChanged") do |params|
90
+ @client.on("Target.targetInfoChanged") do |params|
63
91
  info = params["targetInfo"]
64
92
  next unless info["type"] == "page"
65
93
 
@@ -67,19 +95,25 @@ module Ferrum
67
95
  @contexts[context_id]&.update_target(target_id, info)
68
96
  end
69
97
 
70
- @browser.client.on("Target.targetDestroyed") do |params|
98
+ @client.on("Target.targetDestroyed") do |params|
71
99
  context = find_by(target_id: params["targetId"])
72
100
  context&.delete_target(params["targetId"])
73
101
  end
74
102
 
75
- @browser.client.on("Target.targetCrashed") do |params|
103
+ @client.on("Target.targetCrashed") do |params|
76
104
  context = find_by(target_id: params["targetId"])
77
105
  context&.delete_target(params["targetId"])
78
106
  end
79
107
  end
80
108
 
81
109
  def discover
82
- @browser.command("Target.setDiscoverTargets", discover: true)
110
+ @client.command("Target.setDiscoverTargets", discover: true)
111
+ end
112
+
113
+ def auto_attach
114
+ return unless @client.options.flatten
115
+
116
+ @client.command("Target.setAutoAttach", autoAttach: true, waitForDebuggerOnStart: true, flatten: true)
83
117
  end
84
118
  end
85
119
  end
@@ -113,6 +113,38 @@ module Ferrum
113
113
  Time.at(attributes["expires"]) if attributes["expires"].positive?
114
114
  end
115
115
 
116
+ #
117
+ # The priority of the cookie.
118
+ #
119
+ # @return [String]
120
+ #
121
+ def priority
122
+ @attributes["priority"]
123
+ end
124
+
125
+ #
126
+ # @return [Boolean]
127
+ #
128
+ def sameparty?
129
+ @attributes["sameParty"]
130
+ end
131
+
132
+ alias same_party? sameparty?
133
+
134
+ #
135
+ # @return [String]
136
+ #
137
+ def source_scheme
138
+ @attributes["sourceScheme"]
139
+ end
140
+
141
+ #
142
+ # @return [Integer]
143
+ #
144
+ def source_port
145
+ @attributes["sourcePort"]
146
+ end
147
+
116
148
  #
117
149
  # Compares different cookie objects.
118
150
  #
@@ -121,6 +153,31 @@ module Ferrum
121
153
  def ==(other)
122
154
  other.class == self.class && other.attributes == attributes
123
155
  end
156
+
157
+ #
158
+ # Converts the cookie back into a raw cookie String.
159
+ #
160
+ # @return [String]
161
+ # The raw cookie string.
162
+ #
163
+ def to_s
164
+ string = String.new("#{@attributes['name']}=#{@attributes['value']}")
165
+
166
+ @attributes.each do |key, value|
167
+ case key
168
+ when "name", "value" # no-op
169
+ when "domain" then string << "; Domain=#{value}"
170
+ when "path" then string << "; Path=#{value}"
171
+ when "expires" then string << "; Expires=#{Time.at(value).httpdate}"
172
+ when "httpOnly" then string << "; httpOnly" if value
173
+ when "secure" then string << "; Secure" if value
174
+ end
175
+ end
176
+
177
+ string
178
+ end
179
+
180
+ alias to_h attributes
124
181
  end
125
182
  end
126
183
  end
@@ -4,10 +4,34 @@ require "ferrum/cookies/cookie"
4
4
 
5
5
  module Ferrum
6
6
  class Cookies
7
+ include Enumerable
8
+
7
9
  def initialize(page)
8
10
  @page = page
9
11
  end
10
12
 
13
+ #
14
+ # Enumerates over all cookies.
15
+ #
16
+ # @yield [cookie]
17
+ # The given block will be passed each cookie.
18
+ #
19
+ # @yieldparam [Cookie] cookie
20
+ # A cookie in the browser.
21
+ #
22
+ # @return [Enumerator]
23
+ # If no block is given, an Enumerator object will be returned.
24
+ #
25
+ def each
26
+ return enum_for(__method__) unless block_given?
27
+
28
+ cookies = @page.command("Network.getAllCookies")["cookies"]
29
+
30
+ cookies.each do |c|
31
+ yield Cookie.new(c)
32
+ end
33
+ end
34
+
11
35
  #
12
36
  # Returns cookies hash.
13
37
  #
@@ -22,8 +46,9 @@ module Ferrum
22
46
  # # }
23
47
  #
24
48
  def all
25
- cookies = @page.command("Network.getAllCookies")["cookies"]
26
- cookies.to_h { |c| [c["name"], Cookie.new(c)] }
49
+ each.to_h do |cookie|
50
+ [cookie.name, cookie]
51
+ end
27
52
  end
28
53
 
29
54
  #
@@ -44,7 +69,7 @@ module Ferrum
44
69
  # # }>
45
70
  #
46
71
  def [](name)
47
- all[name]
72
+ find { |cookie| cookie.name == name }
48
73
  end
49
74
 
50
75
  #
@@ -53,23 +78,34 @@ module Ferrum
53
78
  # @param [Hash{Symbol => Object}, Cookie] options
54
79
  #
55
80
  # @option options [String] :name
81
+ # The cookie param name.
56
82
  #
57
83
  # @option options [String] :value
84
+ # The cookie param value.
58
85
  #
59
86
  # @option options [String] :domain
87
+ # The domain the cookie belongs to.
60
88
  #
61
89
  # @option options [String] :path
90
+ # The path that the cookie is bound to.
62
91
  #
63
92
  # @option options [Integer] :expires
93
+ # When the cookie will expire.
64
94
  #
65
95
  # @option options [Integer] :size
96
+ # The size of the cookie.
66
97
  #
67
98
  # @option options [Boolean] :httponly
99
+ # Specifies whether the cookie `HttpOnly`.
68
100
  #
69
101
  # @option options [Boolean] :secure
102
+ # Specifies whether the cookie is marked as `Secure`.
70
103
  #
71
104
  # @option options [String] :samesite
105
+ # Specifies whether the cookie is `SameSite`.
72
106
  #
107
+ # @option options [Boolean] :session
108
+ # Specifies whether the cookie is a session cookie.
73
109
  #
74
110
  # @example
75
111
  # browser.cookies.set(name: "stealth", value: "omg", domain: "google.com") # => true
@@ -136,7 +172,7 @@ module Ferrum
136
172
  private
137
173
 
138
174
  def default_domain
139
- URI.parse(@page.browser.base_url).host if @page.browser.base_url
175
+ URI.parse(@page.base_url).host if @page.base_url
140
176
  end
141
177
  end
142
178
  end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum
4
+ class Downloads
5
+ VALID_BEHAVIOR = %i[deny allow allowAndName default].freeze
6
+
7
+ def initialize(page)
8
+ @page = page
9
+ @event = Utils::Event.new.tap(&:set)
10
+ @files = {}
11
+ end
12
+
13
+ def files
14
+ @files.values
15
+ end
16
+
17
+ def wait(timeout = 5)
18
+ @event.reset
19
+ yield if block_given?
20
+ @event.wait(timeout)
21
+ @event.set
22
+ end
23
+
24
+ def set_behavior(save_path:, behavior: :allow)
25
+ raise ArgumentError unless VALID_BEHAVIOR.include?(behavior.to_sym)
26
+ raise Error, "supply absolute path for `:save_path` option" unless Pathname.new(save_path.to_s).absolute?
27
+
28
+ @page.command("Browser.setDownloadBehavior",
29
+ browserContextId: @page.context_id,
30
+ downloadPath: save_path,
31
+ behavior: behavior,
32
+ eventsEnabled: true)
33
+ end
34
+
35
+ def subscribe
36
+ subscribe_download_will_begin
37
+ subscribe_download_progress
38
+ end
39
+
40
+ def subscribe_download_will_begin
41
+ @page.on("Browser.downloadWillBegin") do |params|
42
+ @event.reset
43
+ @files[params["guid"]] = params
44
+ end
45
+ end
46
+
47
+ def subscribe_download_progress
48
+ @page.on("Browser.downloadProgress") do |params|
49
+ @files[params["guid"]].merge!(params)
50
+
51
+ case params["state"]
52
+ when "completed", "canceled"
53
+ @event.set
54
+ else
55
+ @event.reset
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
data/lib/ferrum/errors.rb CHANGED
@@ -6,7 +6,8 @@ module Ferrum
6
6
  class NoSuchTargetError < Error; end
7
7
  class NotImplementedError < Error; end
8
8
  class BinaryNotFoundError < Error; end
9
- class EmptyPathError < Error; end
9
+ class EmptyPathError < Error; end
10
+ class ServerError < Error; end
10
11
 
11
12
  class StatusError < Error
12
13
  def initialize(url, message = nil)
data/lib/ferrum/frame.rb CHANGED
@@ -113,6 +113,7 @@ module Ferrum
113
113
  document.close();
114
114
  arguments[1](true);
115
115
  ), @page.timeout, html)
116
+ @page.document_node_id
116
117
  end
117
118
  alias set_content content=
118
119
 
@@ -68,7 +68,7 @@ module Ferrum
68
68
 
69
69
  def set_overrides(user_agent: nil, accept_language: nil, platform: nil)
70
70
  options = {}
71
- options[:userAgent] = user_agent || @page.browser.default_user_agent
71
+ options[:userAgent] = user_agent || @page.default_user_agent
72
72
  options[:acceptLanguage] = accept_language if accept_language
73
73
  options[:platform] if platform
74
74
 
@@ -51,7 +51,7 @@ module Ferrum
51
51
  # @return [Boolean]
52
52
  #
53
53
  def navigation_request?(frame_id)
54
- request.type?(:document) && request&.frame_id == frame_id
54
+ request&.type?(:document) && request&.frame_id == frame_id
55
55
  end
56
56
 
57
57
  #
@@ -79,7 +79,7 @@ module Ferrum
79
79
  # @return [Boolean]
80
80
  #
81
81
  def finished?
82
- blocked? || !response.nil? || !error.nil?
82
+ blocked? || response&.loaded? || !error.nil? || ping?
83
83
  end
84
84
 
85
85
  #
@@ -100,6 +100,33 @@ module Ferrum
100
100
  !intercepted_request.nil?
101
101
  end
102
102
 
103
+ #
104
+ # Determines if the exchange is XHR.
105
+ #
106
+ # @return [Boolean]
107
+ #
108
+ def xhr?
109
+ !!request&.xhr?
110
+ end
111
+
112
+ #
113
+ # Determines if the exchange is a redirect.
114
+ #
115
+ # @return [Boolean]
116
+ #
117
+ def redirect?
118
+ response&.redirect?
119
+ end
120
+
121
+ #
122
+ # Determines if the exchange is ping.
123
+ #
124
+ # @return [Boolean]
125
+ #
126
+ def ping?
127
+ !!request&.ping?
128
+ end
129
+
103
130
  #
104
131
  # Returns request's URL.
105
132
  #