ferrum 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum::Network
4
+ class Error
5
+ def initialize(data)
6
+ @data = data
7
+ end
8
+
9
+ def id
10
+ @data["networkRequestId"]
11
+ end
12
+
13
+ def url
14
+ @data["url"]
15
+ end
16
+
17
+ def description
18
+ @data["text"]
19
+ end
20
+
21
+ def time
22
+ @time ||= Time.strptime(@data["timestamp"].to_s, "%s")
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "time"
4
+
5
+ module Ferrum::Network
6
+ class Request
7
+ attr_accessor :response, :error
8
+
9
+ def initialize(data)
10
+ @data = data
11
+ end
12
+
13
+ def id
14
+ @data["id"]
15
+ end
16
+
17
+ def url
18
+ @data["url"]
19
+ end
20
+
21
+ def method
22
+ @data["method"]
23
+ end
24
+
25
+ def headers
26
+ @data["headers"]
27
+ end
28
+
29
+ def time
30
+ @time ||= Time.strptime(@data["time"].to_s, "%s")
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum::Network
4
+ class Response
5
+ attr_accessor :body_size
6
+
7
+ def initialize(data)
8
+ @data = data
9
+ end
10
+
11
+ def id
12
+ @data["id"]
13
+ end
14
+
15
+ def url
16
+ @data["url"]
17
+ end
18
+
19
+ def status
20
+ @data["status"]
21
+ end
22
+
23
+ def status_text
24
+ @data["statusText"]
25
+ end
26
+
27
+ def headers
28
+ @data["headers"]
29
+ end
30
+
31
+ def headers_size
32
+ @data["encodedDataLength"]
33
+ end
34
+
35
+ # FIXME: didn't check if we have it on redirect response
36
+ def redirect_url
37
+ @data["redirectURL"]
38
+ end
39
+
40
+ def content_type
41
+ @content_type ||= @data.dig("headers", "contentType").sub(/;.*\z/, "")
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,175 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum
4
+ class Node
5
+ attr_reader :page, :target_id, :node_id, :desc
6
+
7
+ def initialize(page, target_id, node_id, desc)
8
+ @page, @target_id, @node_id, @desc =
9
+ page, target_id, node_id, desc
10
+ end
11
+
12
+ def node?
13
+ desc["nodeType"] == 1 # nodeType: 3, nodeName: "#text" e.g.
14
+ end
15
+
16
+ def page_send(name, *args)
17
+ page.send(name, self, *args)
18
+ rescue BrowserError => e
19
+ case e.message
20
+ when "No node with given id found"
21
+ raise ObsoleteNode.new(self, e.response)
22
+ else
23
+ raise
24
+ end
25
+ end
26
+
27
+ def at_xpath(selector)
28
+ page.at_xpath(selector, within: self)
29
+ end
30
+
31
+ def at_css(selector)
32
+ page.at_css(selector, within: self)
33
+ end
34
+
35
+ def xpath(selector)
36
+ page.xpath(selector, within: self)
37
+ end
38
+
39
+ def css(selector)
40
+ page.css(selector, within: self)
41
+ end
42
+
43
+ def text
44
+ page.evaluate_on(node: self, expression: "this.textContent")
45
+ end
46
+
47
+ def property(name)
48
+ page_send(:property, name)
49
+ end
50
+
51
+ def [](name)
52
+ # Although the attribute matters, the property is consistent. Return that in
53
+ # preference to the attribute for links and images.
54
+ if ((tag_name == "img") && (name == "src")) || ((tag_name == "a") && (name == "href"))
55
+ # if attribute exists get the property
56
+ return page_send(:attribute, name) && page_send(:property, name)
57
+ end
58
+
59
+ value = property(name)
60
+ value = page_send(:attribute, name) if value.nil? || value.is_a?(Hash)
61
+
62
+ value
63
+ end
64
+
65
+ def attributes
66
+ page_send(:attributes)
67
+ end
68
+
69
+ def value
70
+ page.evaluate_on(node: self, expression: "this.value")
71
+ end
72
+
73
+ def set(value)
74
+ if tag_name == "input"
75
+ case self[:type]
76
+ when "radio"
77
+ click
78
+ when "checkbox"
79
+ click if value != checked?
80
+ when "file"
81
+ files = value.respond_to?(:to_ary) ? value.to_ary.map(&:to_s) : value.to_s
82
+ page_send(:select_file, files)
83
+ else
84
+ page_send(:set, value.to_s)
85
+ end
86
+ elsif tag_name == "textarea"
87
+ page_send(:set, value.to_s)
88
+ elsif self[:isContentEditable]
89
+ # FIXME:
90
+ page_send(:delete_text)
91
+ send_keys(value.to_s)
92
+ end
93
+ end
94
+
95
+ def select_option
96
+ page_send(:select, true)
97
+ end
98
+
99
+ def unselect_option
100
+ raise NotImplemented
101
+ end
102
+
103
+ def tag_name
104
+ @tag_name ||= desc["nodeName"].downcase
105
+ end
106
+
107
+ def visible?
108
+ page_send(:visible?)
109
+ end
110
+
111
+ def checked?
112
+ self[:checked]
113
+ end
114
+
115
+ def selected?
116
+ !!self[:selected]
117
+ end
118
+
119
+ def disabled?
120
+ page_send(:disabled?)
121
+ end
122
+
123
+ def click(keys = [], offset = {})
124
+ page_send(:click, keys, offset)
125
+ end
126
+
127
+ def right_click(keys = [], offset = {})
128
+ page_send(:right_click, keys, offset)
129
+ end
130
+
131
+ def double_click(keys = [], offset = {})
132
+ page_send(:double_click, keys, offset)
133
+ end
134
+
135
+ def hover
136
+ page_send(:hover)
137
+ end
138
+
139
+ def trigger(event)
140
+ page_send(:trigger, event)
141
+ end
142
+
143
+ def scroll_to(element, location, position = nil)
144
+ if element.is_a?(Node)
145
+ scroll_element_to_location(element, location)
146
+ elsif location.is_a?(Symbol)
147
+ scroll_to_location(location)
148
+ else
149
+ scroll_to_coords(*position)
150
+ end
151
+ self
152
+ end
153
+
154
+ def ==(other)
155
+ return false unless other.is_a?(Node)
156
+ # We compare backendNodeId because once nodeId is sent to frontend backend
157
+ # never returns same nodeId sending 0. In other words frontend is
158
+ # responsible for keeping track of node ids.
159
+ target_id == other.target_id && desc["backendNodeId"] == other.desc["backendNodeId"]
160
+ end
161
+
162
+ def send_keys(*keys)
163
+ page_send(:send_keys, keys)
164
+ end
165
+ alias_method :send_key, :send_keys
166
+
167
+ def path
168
+ page_send(:path)
169
+ end
170
+
171
+ def inspect
172
+ %(#<#{self.class} @target_id=#{@target_id.inspect} @node_id=#{@node_id} @desc=#{@desc.inspect}>)
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,373 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ferrum/page/dom"
4
+ require "ferrum/page/input"
5
+ require "ferrum/page/runtime"
6
+ require "ferrum/page/frame"
7
+ require "ferrum/page/net"
8
+ require "ferrum/browser/client"
9
+ require "ferrum/network/error"
10
+ require "ferrum/network/request"
11
+ require "ferrum/network/response"
12
+
13
+ # RemoteObjectId is from a JavaScript world, and corresponds to any JavaScript
14
+ # object, including JS wrappers for DOM nodes. There is a way to convert between
15
+ # node ids and remote object ids (DOM.requestNode and DOM.resolveNode).
16
+ #
17
+ # NodeId is used for inspection, when backend tracks the node and sends updates to
18
+ # the frontend. If you somehow got NodeId over protocol, backend should have
19
+ # pushed to the frontend all of it's ancestors up to the Document node via
20
+ # DOM.setChildNodes. After that, frontend is always kept up-to-date about anything
21
+ # happening to the node.
22
+ #
23
+ # BackendNodeId is just a unique identifier for a node. Obtaining it does not send
24
+ # any updates, for example, the node may be destroyed without any notification.
25
+ # This is a way to keep a reference to the Node, when you don't necessarily want
26
+ # to keep track of it. One example would be linking to the node from performance
27
+ # data (e.g. relayout root node). BackendNodeId may be either resolved to
28
+ # inspected node (DOM.pushNodesByBackendIdsToFrontend) or described in more
29
+ # details (DOM.describeNode).
30
+ module Ferrum
31
+ class Page
32
+ NEW_WINDOW_BUG_SLEEP = 0.3
33
+
34
+ include Input, DOM, Runtime, Frame, Net
35
+
36
+ attr_accessor :referrer
37
+ attr_reader :target_id, :status, :response_headers
38
+
39
+ def initialize(target_id, browser, new_window = false)
40
+ @target_id, @browser = target_id, browser
41
+ @network_traffic = []
42
+ @event = Concurrent::Event.new.tap(&:set)
43
+
44
+ @frames = {}
45
+ @waiting_frames ||= Set.new
46
+ @frame_stack = []
47
+ @accept_modal = []
48
+ @modal_messages = []
49
+
50
+ # Dirty hack because new window doesn't have events at all
51
+ sleep(NEW_WINDOW_BUG_SLEEP) if new_window
52
+
53
+ begin
54
+ @session_id = @browser.command("Target.attachToTarget", targetId: @target_id)["sessionId"]
55
+ rescue BrowserError => e
56
+ if e.message == "No target with given id found"
57
+ raise NoSuchWindowError
58
+ else
59
+ raise
60
+ end
61
+ end
62
+
63
+ host = @browser.process.host
64
+ port = @browser.process.port
65
+ ws_url = "ws://#{host}:#{port}/devtools/page/#{@target_id}"
66
+ @client = Browser::Client.new(browser, ws_url, 1000)
67
+
68
+ subscribe
69
+ prepare_page
70
+ end
71
+
72
+ def timeout
73
+ @browser.timeout
74
+ end
75
+
76
+ def goto(url = nil)
77
+ options = { url: combine_url!(url) }
78
+ options.merge!(referrer: referrer) if referrer
79
+ response = command("Page.navigate", timeout: timeout, **options)
80
+ # https://cs.chromium.org/chromium/src/net/base/net_error_list.h
81
+ if %w[net::ERR_NAME_NOT_RESOLVED
82
+ net::ERR_NAME_RESOLUTION_FAILED
83
+ net::ERR_INTERNET_DISCONNECTED
84
+ net::ERR_CONNECTION_TIMED_OUT].include?(response["errorText"])
85
+ raise StatusFailError, "url" => options[:url]
86
+ end
87
+ response["frameId"]
88
+ end
89
+
90
+ def close
91
+ @browser.command("Target.detachFromTarget", sessionId: @session_id)
92
+ @browser.command("Target.closeTarget", targetId: @target_id)
93
+ close_connection
94
+ end
95
+
96
+ def close_connection
97
+ @client.close
98
+ end
99
+
100
+ def resize(width: nil, height: nil, fullscreen: false)
101
+ result = @browser.command("Browser.getWindowForTarget", targetId: @target_id)
102
+ @window_id, @bounds = result.values_at("windowId", "bounds")
103
+
104
+ if fullscreen
105
+ @browser.command("Browser.setWindowBounds", windowId: @window_id, bounds: { windowState: "fullscreen" })
106
+ else
107
+ @browser.command("Browser.setWindowBounds", windowId: @window_id, bounds: { windowState: "normal" })
108
+ @browser.command("Browser.setWindowBounds", windowId: @window_id, bounds: { width: width, height: height, windowState: "normal" })
109
+ command("Emulation.setDeviceMetricsOverride", width: width, height: height, deviceScaleFactor: 1, mobile: false)
110
+ end
111
+ end
112
+
113
+ def refresh
114
+ command("Page.reload", timeout: timeout)
115
+ end
116
+
117
+ def network_traffic(type = nil)
118
+ case type.to_s
119
+ when "all"
120
+ @network_traffic
121
+ when "blocked"
122
+ @network_traffic.select { |r| r.response.nil? } # when request blocked
123
+ else
124
+ @network_traffic.select { |r| r.response } # when request isn't blocked
125
+ end
126
+ end
127
+
128
+ def clear_network_traffic
129
+ @network_traffic = []
130
+ end
131
+
132
+ def go_back
133
+ go(-1)
134
+ end
135
+
136
+ def go_forward
137
+ go(1)
138
+ end
139
+
140
+ def accept_confirm
141
+ @accept_modal << true
142
+ end
143
+
144
+ def dismiss_confirm
145
+ @accept_modal << false
146
+ end
147
+
148
+ def accept_prompt(modal_response)
149
+ @accept_modal << true
150
+ @modal_response = modal_response
151
+ end
152
+
153
+ def dismiss_prompt
154
+ @accept_modal << false
155
+ end
156
+
157
+ def find_modal(options)
158
+ start_time = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
159
+ timeout_sec = options.fetch(:wait) { session_wait_time }
160
+ expect_text = options[:text]
161
+ expect_regexp = expect_text.is_a?(Regexp) ? expect_text : Regexp.escape(expect_text.to_s)
162
+ not_found_msg = "Unable to find modal dialog"
163
+ not_found_msg += " with #{expect_text}" if expect_text
164
+
165
+ begin
166
+ modal_text = @modal_messages.shift
167
+ raise ModalNotFound if modal_text.nil? || (expect_text && !modal_text.match(expect_regexp))
168
+ rescue ModalNotFound => e
169
+ raise e, not_found_msg if (::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - start_time) >= timeout_sec
170
+ sleep(0.05)
171
+ retry
172
+ end
173
+
174
+ modal_text
175
+ end
176
+
177
+ def reset_modals
178
+ @accept_modal = []
179
+ @modal_response = nil
180
+ @modal_messages = []
181
+ end
182
+
183
+ def command(method, timeout: 0, **params)
184
+ result = @client.command(method, params)
185
+
186
+ if timeout > 0
187
+ @event.reset
188
+ @event.wait(timeout)
189
+ end
190
+
191
+ result
192
+ end
193
+
194
+ private
195
+
196
+ def subscribe
197
+ super
198
+
199
+ if @browser.logger
200
+ @client.on("Runtime.consoleAPICalled") do |params|
201
+ params["args"].each { |r| @browser.logger.puts(r["value"]) }
202
+ end
203
+ end
204
+
205
+ if @browser.js_errors
206
+ @client.on("Runtime.exceptionThrown") do |params|
207
+ Thread.main.raise JavaScriptError.new(params.dig("exceptionDetails", "exception"))
208
+ end
209
+ end
210
+
211
+ @client.on("Page.javascriptDialogOpening") do |params|
212
+ accept_modal = @accept_modal.last
213
+ if accept_modal == true || accept_modal == false
214
+ @accept_modal.pop
215
+ @modal_messages << params["message"]
216
+ options = { accept: accept_modal }
217
+ response = @modal_response || params["defaultPrompt"]
218
+ options.merge!(promptText: response) if response
219
+ @client.command("Page.handleJavaScriptDialog", **options)
220
+ else
221
+ warn "Modal window has been opened, but you didn't wrap your code into (`accept_prompt` | `dismiss_prompt` | `accept_confirm` | `dismiss_confirm` | `accept_alert`), accepting by default"
222
+ options = { accept: true }
223
+ response = params["defaultPrompt"]
224
+ options.merge!(promptText: response) if response
225
+ @client.command("Page.handleJavaScriptDialog", **options)
226
+ end
227
+ end
228
+
229
+ @client.on("Page.windowOpen") do
230
+ @browser.targets.refresh
231
+ end
232
+
233
+ @client.on("Page.navigatedWithinDocument") do
234
+ @event.set if @waiting_frames.empty?
235
+ end
236
+
237
+ @client.on("Page.domContentEventFired") do |params|
238
+ # `frameStoppedLoading` doesn't occur if status isn't success
239
+ if @status != 200
240
+ @event.set
241
+ @document_id = get_document_id
242
+ end
243
+ end
244
+
245
+ @client.on("Network.requestWillBeSent") do |params|
246
+ if params["frameId"] == @frame_id
247
+ # Possible types:
248
+ # Document, Stylesheet, Image, Media, Font, Script, TextTrack, XHR,
249
+ # Fetch, EventSource, WebSocket, Manifest, SignedExchange, Ping,
250
+ # CSPViolationReport, Other
251
+ if params["type"] == "Document"
252
+ @event.reset
253
+ @request_id = params["requestId"]
254
+ end
255
+ end
256
+
257
+ id, time = params.values_at("requestId", "wallTime")
258
+ params = params["request"].merge("id" => id, "time" => time)
259
+ @network_traffic << Network::Request.new(params)
260
+ end
261
+
262
+ @client.on("Network.responseReceived") do |params|
263
+ if params["requestId"] == @request_id
264
+ @response_headers = params.dig("response", "headers")
265
+ @status = params.dig("response", "status")
266
+ end
267
+
268
+ if request = @network_traffic.find { |r| r.id == params["requestId"] }
269
+ params = params["response"].merge("id" => params["requestId"])
270
+ request.response = Network::Response.new(params)
271
+ end
272
+ end
273
+
274
+ @client.on("Network.loadingFinished") do |params|
275
+ if request = @network_traffic.find { |r| r.id == params["requestId"] }
276
+ # Sometimes we never get the Network.responseReceived event.
277
+ # See https://crbug.com/883475
278
+ #
279
+ # Network.loadingFinished's encodedDataLength contains both body and headers
280
+ # sizes received by wire. See https://crbug.com/764946
281
+ if response = request.response
282
+ response.body_size = params["encodedDataLength"] - response.headers_size
283
+ end
284
+ end
285
+ end
286
+
287
+ @client.on("Log.entryAdded") do |params|
288
+ source = params.dig("entry", "source")
289
+ level = params.dig("entry", "level")
290
+ if source == "network" && level == "error"
291
+ id = params.dig("entry", "networkRequestId")
292
+ if request = @network_traffic.find { |r| r.id == id }
293
+ request.error = Network::Error.new(params["entry"])
294
+ end
295
+ end
296
+ end
297
+ end
298
+
299
+ def prepare_page
300
+ command("Page.enable")
301
+ command("DOM.enable")
302
+ command("CSS.enable")
303
+ command("Runtime.enable")
304
+ command("Log.enable")
305
+ command("Network.enable")
306
+
307
+ if @browser.options[:save_path]
308
+ command("Page.setDownloadBehavior", behavior: "allow", downloadPath: @browser.options[:save_path])
309
+ end
310
+
311
+ @browser.extensions.each do |extension|
312
+ @client.command("Page.addScriptToEvaluateOnNewDocument", source: extension)
313
+ end
314
+
315
+ inject_extensions
316
+
317
+ width, height = @browser.window_size
318
+ resize(width: width, height: height)
319
+
320
+ url_whitelist = Array(@browser.url_whitelist)
321
+ url_blacklist = Array(@browser.url_blacklist)
322
+ intercept_request("*") if !url_whitelist.empty? || !url_blacklist.empty?
323
+
324
+ response = command("Page.getNavigationHistory")
325
+ if response.dig("entries", 0, "transitionType") != "typed"
326
+ # If we create page by clicking links, submiting forms and so on it
327
+ # opens a new window for which `frameStoppedLoading` event never
328
+ # occurs and thus search for nodes cannot be completed. Here we check
329
+ # the history and if the transitionType for example `link` then
330
+ # content is already loaded and we can try to get the document.
331
+ @document_id = get_document_id
332
+ end
333
+ end
334
+
335
+ def inject_extensions
336
+ @browser.extensions.each do |extension|
337
+ # https://github.com/GoogleChrome/puppeteer/issues/1443
338
+ # https://github.com/ChromeDevTools/devtools-protocol/issues/77
339
+ # https://github.com/cyrus-and/chrome-remote-interface/issues/319
340
+ # We also evaluate script just in case because
341
+ # `Page.addScriptToEvaluateOnNewDocument` doesn't work in popups.
342
+ @client.command("Runtime.evaluate", expression: extension,
343
+ contextId: execution_context_id,
344
+ returnByValue: true)
345
+ end
346
+ end
347
+
348
+ def go(delta)
349
+ history = command("Page.getNavigationHistory")
350
+ index, entries = history.values_at("currentIndex", "entries")
351
+
352
+ if entry = entries[index + delta]
353
+ # Potential wait because of network event
354
+ command("Page.navigateToHistoryEntry", timeout: 0.05, entryId: entry["id"])
355
+ end
356
+ end
357
+
358
+ def combine_url!(url_or_path)
359
+ url = Addressable::URI.parse(url_or_path)
360
+ nil_or_relative = url.nil? || url.relative?
361
+
362
+ if nil_or_relative && !@browser.base_url
363
+ raise "Set :base_url browser's option or use absolute url in `goto`, you passed: #{url_or_path}"
364
+ end
365
+
366
+ nil_or_relative ? @browser.base_url.join(url.to_s) : url
367
+ end
368
+
369
+ def get_document_id
370
+ command("DOM.getDocument", depth: 0).dig("root", "nodeId")
371
+ end
372
+ end
373
+ end