ferrum 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum::Network
4
+ class Error
5
+ def initialize(data)
6
+ @data = data
7
+ end
8
+
9
+ def id
10
+ @data["networkRequestId"]
11
+ end
12
+
13
+ def url
14
+ @data["url"]
15
+ end
16
+
17
+ def description
18
+ @data["text"]
19
+ end
20
+
21
+ def time
22
+ @time ||= Time.strptime(@data["timestamp"].to_s, "%s")
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "time"
4
+
5
+ module Ferrum::Network
6
+ class Request
7
+ attr_accessor :response, :error
8
+
9
+ def initialize(data)
10
+ @data = data
11
+ end
12
+
13
+ def id
14
+ @data["id"]
15
+ end
16
+
17
+ def url
18
+ @data["url"]
19
+ end
20
+
21
+ def method
22
+ @data["method"]
23
+ end
24
+
25
+ def headers
26
+ @data["headers"]
27
+ end
28
+
29
+ def time
30
+ @time ||= Time.strptime(@data["time"].to_s, "%s")
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum::Network
4
+ class Response
5
+ attr_accessor :body_size
6
+
7
+ def initialize(data)
8
+ @data = data
9
+ end
10
+
11
+ def id
12
+ @data["id"]
13
+ end
14
+
15
+ def url
16
+ @data["url"]
17
+ end
18
+
19
+ def status
20
+ @data["status"]
21
+ end
22
+
23
+ def status_text
24
+ @data["statusText"]
25
+ end
26
+
27
+ def headers
28
+ @data["headers"]
29
+ end
30
+
31
+ def headers_size
32
+ @data["encodedDataLength"]
33
+ end
34
+
35
+ # FIXME: didn't check if we have it on redirect response
36
+ def redirect_url
37
+ @data["redirectURL"]
38
+ end
39
+
40
+ def content_type
41
+ @content_type ||= @data.dig("headers", "contentType").sub(/;.*\z/, "")
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,175 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum
4
+ class Node
5
+ attr_reader :page, :target_id, :node_id, :desc
6
+
7
+ def initialize(page, target_id, node_id, desc)
8
+ @page, @target_id, @node_id, @desc =
9
+ page, target_id, node_id, desc
10
+ end
11
+
12
+ def node?
13
+ desc["nodeType"] == 1 # nodeType: 3, nodeName: "#text" e.g.
14
+ end
15
+
16
+ def page_send(name, *args)
17
+ page.send(name, self, *args)
18
+ rescue BrowserError => e
19
+ case e.message
20
+ when "No node with given id found"
21
+ raise ObsoleteNode.new(self, e.response)
22
+ else
23
+ raise
24
+ end
25
+ end
26
+
27
+ def at_xpath(selector)
28
+ page.at_xpath(selector, within: self)
29
+ end
30
+
31
+ def at_css(selector)
32
+ page.at_css(selector, within: self)
33
+ end
34
+
35
+ def xpath(selector)
36
+ page.xpath(selector, within: self)
37
+ end
38
+
39
+ def css(selector)
40
+ page.css(selector, within: self)
41
+ end
42
+
43
+ def text
44
+ page.evaluate_on(node: self, expression: "this.textContent")
45
+ end
46
+
47
+ def property(name)
48
+ page_send(:property, name)
49
+ end
50
+
51
+ def [](name)
52
+ # Although the attribute matters, the property is consistent. Return that in
53
+ # preference to the attribute for links and images.
54
+ if ((tag_name == "img") && (name == "src")) || ((tag_name == "a") && (name == "href"))
55
+ # if attribute exists get the property
56
+ return page_send(:attribute, name) && page_send(:property, name)
57
+ end
58
+
59
+ value = property(name)
60
+ value = page_send(:attribute, name) if value.nil? || value.is_a?(Hash)
61
+
62
+ value
63
+ end
64
+
65
+ def attributes
66
+ page_send(:attributes)
67
+ end
68
+
69
+ def value
70
+ page.evaluate_on(node: self, expression: "this.value")
71
+ end
72
+
73
+ def set(value)
74
+ if tag_name == "input"
75
+ case self[:type]
76
+ when "radio"
77
+ click
78
+ when "checkbox"
79
+ click if value != checked?
80
+ when "file"
81
+ files = value.respond_to?(:to_ary) ? value.to_ary.map(&:to_s) : value.to_s
82
+ page_send(:select_file, files)
83
+ else
84
+ page_send(:set, value.to_s)
85
+ end
86
+ elsif tag_name == "textarea"
87
+ page_send(:set, value.to_s)
88
+ elsif self[:isContentEditable]
89
+ # FIXME:
90
+ page_send(:delete_text)
91
+ send_keys(value.to_s)
92
+ end
93
+ end
94
+
95
+ def select_option
96
+ page_send(:select, true)
97
+ end
98
+
99
+ def unselect_option
100
+ raise NotImplemented
101
+ end
102
+
103
+ def tag_name
104
+ @tag_name ||= desc["nodeName"].downcase
105
+ end
106
+
107
+ def visible?
108
+ page_send(:visible?)
109
+ end
110
+
111
+ def checked?
112
+ self[:checked]
113
+ end
114
+
115
+ def selected?
116
+ !!self[:selected]
117
+ end
118
+
119
+ def disabled?
120
+ page_send(:disabled?)
121
+ end
122
+
123
+ def click(keys = [], offset = {})
124
+ page_send(:click, keys, offset)
125
+ end
126
+
127
+ def right_click(keys = [], offset = {})
128
+ page_send(:right_click, keys, offset)
129
+ end
130
+
131
+ def double_click(keys = [], offset = {})
132
+ page_send(:double_click, keys, offset)
133
+ end
134
+
135
+ def hover
136
+ page_send(:hover)
137
+ end
138
+
139
+ def trigger(event)
140
+ page_send(:trigger, event)
141
+ end
142
+
143
+ def scroll_to(element, location, position = nil)
144
+ if element.is_a?(Node)
145
+ scroll_element_to_location(element, location)
146
+ elsif location.is_a?(Symbol)
147
+ scroll_to_location(location)
148
+ else
149
+ scroll_to_coords(*position)
150
+ end
151
+ self
152
+ end
153
+
154
+ def ==(other)
155
+ return false unless other.is_a?(Node)
156
+ # We compare backendNodeId because once nodeId is sent to frontend backend
157
+ # never returns same nodeId sending 0. In other words frontend is
158
+ # responsible for keeping track of node ids.
159
+ target_id == other.target_id && desc["backendNodeId"] == other.desc["backendNodeId"]
160
+ end
161
+
162
+ def send_keys(*keys)
163
+ page_send(:send_keys, keys)
164
+ end
165
+ alias_method :send_key, :send_keys
166
+
167
+ def path
168
+ page_send(:path)
169
+ end
170
+
171
+ def inspect
172
+ %(#<#{self.class} @target_id=#{@target_id.inspect} @node_id=#{@node_id} @desc=#{@desc.inspect}>)
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,373 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ferrum/page/dom"
4
+ require "ferrum/page/input"
5
+ require "ferrum/page/runtime"
6
+ require "ferrum/page/frame"
7
+ require "ferrum/page/net"
8
+ require "ferrum/browser/client"
9
+ require "ferrum/network/error"
10
+ require "ferrum/network/request"
11
+ require "ferrum/network/response"
12
+
13
+ # RemoteObjectId is from a JavaScript world, and corresponds to any JavaScript
14
+ # object, including JS wrappers for DOM nodes. There is a way to convert between
15
+ # node ids and remote object ids (DOM.requestNode and DOM.resolveNode).
16
+ #
17
+ # NodeId is used for inspection, when backend tracks the node and sends updates to
18
+ # the frontend. If you somehow got NodeId over protocol, backend should have
19
+ # pushed to the frontend all of it's ancestors up to the Document node via
20
+ # DOM.setChildNodes. After that, frontend is always kept up-to-date about anything
21
+ # happening to the node.
22
+ #
23
+ # BackendNodeId is just a unique identifier for a node. Obtaining it does not send
24
+ # any updates, for example, the node may be destroyed without any notification.
25
+ # This is a way to keep a reference to the Node, when you don't necessarily want
26
+ # to keep track of it. One example would be linking to the node from performance
27
+ # data (e.g. relayout root node). BackendNodeId may be either resolved to
28
+ # inspected node (DOM.pushNodesByBackendIdsToFrontend) or described in more
29
+ # details (DOM.describeNode).
30
+ module Ferrum
31
+ class Page
32
+ NEW_WINDOW_BUG_SLEEP = 0.3
33
+
34
+ include Input, DOM, Runtime, Frame, Net
35
+
36
+ attr_accessor :referrer
37
+ attr_reader :target_id, :status, :response_headers
38
+
39
+ def initialize(target_id, browser, new_window = false)
40
+ @target_id, @browser = target_id, browser
41
+ @network_traffic = []
42
+ @event = Concurrent::Event.new.tap(&:set)
43
+
44
+ @frames = {}
45
+ @waiting_frames ||= Set.new
46
+ @frame_stack = []
47
+ @accept_modal = []
48
+ @modal_messages = []
49
+
50
+ # Dirty hack because new window doesn't have events at all
51
+ sleep(NEW_WINDOW_BUG_SLEEP) if new_window
52
+
53
+ begin
54
+ @session_id = @browser.command("Target.attachToTarget", targetId: @target_id)["sessionId"]
55
+ rescue BrowserError => e
56
+ if e.message == "No target with given id found"
57
+ raise NoSuchWindowError
58
+ else
59
+ raise
60
+ end
61
+ end
62
+
63
+ host = @browser.process.host
64
+ port = @browser.process.port
65
+ ws_url = "ws://#{host}:#{port}/devtools/page/#{@target_id}"
66
+ @client = Browser::Client.new(browser, ws_url, 1000)
67
+
68
+ subscribe
69
+ prepare_page
70
+ end
71
+
72
+ def timeout
73
+ @browser.timeout
74
+ end
75
+
76
+ def goto(url = nil)
77
+ options = { url: combine_url!(url) }
78
+ options.merge!(referrer: referrer) if referrer
79
+ response = command("Page.navigate", timeout: timeout, **options)
80
+ # https://cs.chromium.org/chromium/src/net/base/net_error_list.h
81
+ if %w[net::ERR_NAME_NOT_RESOLVED
82
+ net::ERR_NAME_RESOLUTION_FAILED
83
+ net::ERR_INTERNET_DISCONNECTED
84
+ net::ERR_CONNECTION_TIMED_OUT].include?(response["errorText"])
85
+ raise StatusFailError, "url" => options[:url]
86
+ end
87
+ response["frameId"]
88
+ end
89
+
90
+ def close
91
+ @browser.command("Target.detachFromTarget", sessionId: @session_id)
92
+ @browser.command("Target.closeTarget", targetId: @target_id)
93
+ close_connection
94
+ end
95
+
96
+ def close_connection
97
+ @client.close
98
+ end
99
+
100
+ def resize(width: nil, height: nil, fullscreen: false)
101
+ result = @browser.command("Browser.getWindowForTarget", targetId: @target_id)
102
+ @window_id, @bounds = result.values_at("windowId", "bounds")
103
+
104
+ if fullscreen
105
+ @browser.command("Browser.setWindowBounds", windowId: @window_id, bounds: { windowState: "fullscreen" })
106
+ else
107
+ @browser.command("Browser.setWindowBounds", windowId: @window_id, bounds: { windowState: "normal" })
108
+ @browser.command("Browser.setWindowBounds", windowId: @window_id, bounds: { width: width, height: height, windowState: "normal" })
109
+ command("Emulation.setDeviceMetricsOverride", width: width, height: height, deviceScaleFactor: 1, mobile: false)
110
+ end
111
+ end
112
+
113
+ def refresh
114
+ command("Page.reload", timeout: timeout)
115
+ end
116
+
117
+ def network_traffic(type = nil)
118
+ case type.to_s
119
+ when "all"
120
+ @network_traffic
121
+ when "blocked"
122
+ @network_traffic.select { |r| r.response.nil? } # when request blocked
123
+ else
124
+ @network_traffic.select { |r| r.response } # when request isn't blocked
125
+ end
126
+ end
127
+
128
+ def clear_network_traffic
129
+ @network_traffic = []
130
+ end
131
+
132
+ def go_back
133
+ go(-1)
134
+ end
135
+
136
+ def go_forward
137
+ go(1)
138
+ end
139
+
140
+ def accept_confirm
141
+ @accept_modal << true
142
+ end
143
+
144
+ def dismiss_confirm
145
+ @accept_modal << false
146
+ end
147
+
148
+ def accept_prompt(modal_response)
149
+ @accept_modal << true
150
+ @modal_response = modal_response
151
+ end
152
+
153
+ def dismiss_prompt
154
+ @accept_modal << false
155
+ end
156
+
157
+ def find_modal(options)
158
+ start_time = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
159
+ timeout_sec = options.fetch(:wait) { session_wait_time }
160
+ expect_text = options[:text]
161
+ expect_regexp = expect_text.is_a?(Regexp) ? expect_text : Regexp.escape(expect_text.to_s)
162
+ not_found_msg = "Unable to find modal dialog"
163
+ not_found_msg += " with #{expect_text}" if expect_text
164
+
165
+ begin
166
+ modal_text = @modal_messages.shift
167
+ raise ModalNotFound if modal_text.nil? || (expect_text && !modal_text.match(expect_regexp))
168
+ rescue ModalNotFound => e
169
+ raise e, not_found_msg if (::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - start_time) >= timeout_sec
170
+ sleep(0.05)
171
+ retry
172
+ end
173
+
174
+ modal_text
175
+ end
176
+
177
+ def reset_modals
178
+ @accept_modal = []
179
+ @modal_response = nil
180
+ @modal_messages = []
181
+ end
182
+
183
+ def command(method, timeout: 0, **params)
184
+ result = @client.command(method, params)
185
+
186
+ if timeout > 0
187
+ @event.reset
188
+ @event.wait(timeout)
189
+ end
190
+
191
+ result
192
+ end
193
+
194
+ private
195
+
196
+ def subscribe
197
+ super
198
+
199
+ if @browser.logger
200
+ @client.on("Runtime.consoleAPICalled") do |params|
201
+ params["args"].each { |r| @browser.logger.puts(r["value"]) }
202
+ end
203
+ end
204
+
205
+ if @browser.js_errors
206
+ @client.on("Runtime.exceptionThrown") do |params|
207
+ Thread.main.raise JavaScriptError.new(params.dig("exceptionDetails", "exception"))
208
+ end
209
+ end
210
+
211
+ @client.on("Page.javascriptDialogOpening") do |params|
212
+ accept_modal = @accept_modal.last
213
+ if accept_modal == true || accept_modal == false
214
+ @accept_modal.pop
215
+ @modal_messages << params["message"]
216
+ options = { accept: accept_modal }
217
+ response = @modal_response || params["defaultPrompt"]
218
+ options.merge!(promptText: response) if response
219
+ @client.command("Page.handleJavaScriptDialog", **options)
220
+ else
221
+ warn "Modal window has been opened, but you didn't wrap your code into (`accept_prompt` | `dismiss_prompt` | `accept_confirm` | `dismiss_confirm` | `accept_alert`), accepting by default"
222
+ options = { accept: true }
223
+ response = params["defaultPrompt"]
224
+ options.merge!(promptText: response) if response
225
+ @client.command("Page.handleJavaScriptDialog", **options)
226
+ end
227
+ end
228
+
229
+ @client.on("Page.windowOpen") do
230
+ @browser.targets.refresh
231
+ end
232
+
233
+ @client.on("Page.navigatedWithinDocument") do
234
+ @event.set if @waiting_frames.empty?
235
+ end
236
+
237
+ @client.on("Page.domContentEventFired") do |params|
238
+ # `frameStoppedLoading` doesn't occur if status isn't success
239
+ if @status != 200
240
+ @event.set
241
+ @document_id = get_document_id
242
+ end
243
+ end
244
+
245
+ @client.on("Network.requestWillBeSent") do |params|
246
+ if params["frameId"] == @frame_id
247
+ # Possible types:
248
+ # Document, Stylesheet, Image, Media, Font, Script, TextTrack, XHR,
249
+ # Fetch, EventSource, WebSocket, Manifest, SignedExchange, Ping,
250
+ # CSPViolationReport, Other
251
+ if params["type"] == "Document"
252
+ @event.reset
253
+ @request_id = params["requestId"]
254
+ end
255
+ end
256
+
257
+ id, time = params.values_at("requestId", "wallTime")
258
+ params = params["request"].merge("id" => id, "time" => time)
259
+ @network_traffic << Network::Request.new(params)
260
+ end
261
+
262
+ @client.on("Network.responseReceived") do |params|
263
+ if params["requestId"] == @request_id
264
+ @response_headers = params.dig("response", "headers")
265
+ @status = params.dig("response", "status")
266
+ end
267
+
268
+ if request = @network_traffic.find { |r| r.id == params["requestId"] }
269
+ params = params["response"].merge("id" => params["requestId"])
270
+ request.response = Network::Response.new(params)
271
+ end
272
+ end
273
+
274
+ @client.on("Network.loadingFinished") do |params|
275
+ if request = @network_traffic.find { |r| r.id == params["requestId"] }
276
+ # Sometimes we never get the Network.responseReceived event.
277
+ # See https://crbug.com/883475
278
+ #
279
+ # Network.loadingFinished's encodedDataLength contains both body and headers
280
+ # sizes received by wire. See https://crbug.com/764946
281
+ if response = request.response
282
+ response.body_size = params["encodedDataLength"] - response.headers_size
283
+ end
284
+ end
285
+ end
286
+
287
+ @client.on("Log.entryAdded") do |params|
288
+ source = params.dig("entry", "source")
289
+ level = params.dig("entry", "level")
290
+ if source == "network" && level == "error"
291
+ id = params.dig("entry", "networkRequestId")
292
+ if request = @network_traffic.find { |r| r.id == id }
293
+ request.error = Network::Error.new(params["entry"])
294
+ end
295
+ end
296
+ end
297
+ end
298
+
299
+ def prepare_page
300
+ command("Page.enable")
301
+ command("DOM.enable")
302
+ command("CSS.enable")
303
+ command("Runtime.enable")
304
+ command("Log.enable")
305
+ command("Network.enable")
306
+
307
+ if @browser.options[:save_path]
308
+ command("Page.setDownloadBehavior", behavior: "allow", downloadPath: @browser.options[:save_path])
309
+ end
310
+
311
+ @browser.extensions.each do |extension|
312
+ @client.command("Page.addScriptToEvaluateOnNewDocument", source: extension)
313
+ end
314
+
315
+ inject_extensions
316
+
317
+ width, height = @browser.window_size
318
+ resize(width: width, height: height)
319
+
320
+ url_whitelist = Array(@browser.url_whitelist)
321
+ url_blacklist = Array(@browser.url_blacklist)
322
+ intercept_request("*") if !url_whitelist.empty? || !url_blacklist.empty?
323
+
324
+ response = command("Page.getNavigationHistory")
325
+ if response.dig("entries", 0, "transitionType") != "typed"
326
+ # If we create page by clicking links, submiting forms and so on it
327
+ # opens a new window for which `frameStoppedLoading` event never
328
+ # occurs and thus search for nodes cannot be completed. Here we check
329
+ # the history and if the transitionType for example `link` then
330
+ # content is already loaded and we can try to get the document.
331
+ @document_id = get_document_id
332
+ end
333
+ end
334
+
335
+ def inject_extensions
336
+ @browser.extensions.each do |extension|
337
+ # https://github.com/GoogleChrome/puppeteer/issues/1443
338
+ # https://github.com/ChromeDevTools/devtools-protocol/issues/77
339
+ # https://github.com/cyrus-and/chrome-remote-interface/issues/319
340
+ # We also evaluate script just in case because
341
+ # `Page.addScriptToEvaluateOnNewDocument` doesn't work in popups.
342
+ @client.command("Runtime.evaluate", expression: extension,
343
+ contextId: execution_context_id,
344
+ returnByValue: true)
345
+ end
346
+ end
347
+
348
+ def go(delta)
349
+ history = command("Page.getNavigationHistory")
350
+ index, entries = history.values_at("currentIndex", "entries")
351
+
352
+ if entry = entries[index + delta]
353
+ # Potential wait because of network event
354
+ command("Page.navigateToHistoryEntry", timeout: 0.05, entryId: entry["id"])
355
+ end
356
+ end
357
+
358
+ def combine_url!(url_or_path)
359
+ url = Addressable::URI.parse(url_or_path)
360
+ nil_or_relative = url.nil? || url.relative?
361
+
362
+ if nil_or_relative && !@browser.base_url
363
+ raise "Set :base_url browser's option or use absolute url in `goto`, you passed: #{url_or_path}"
364
+ end
365
+
366
+ nil_or_relative ? @browser.base_url.join(url.to_s) : url
367
+ end
368
+
369
+ def get_document_id
370
+ command("DOM.getDocument", depth: 0).dig("root", "nodeId")
371
+ end
372
+ end
373
+ end