capybara-lightpanda 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +50 -0
- data/LICENSE.txt +27 -0
- data/NOTICE.md +101 -0
- data/README.md +215 -0
- data/lib/capybara/lightpanda/binary.rb +190 -0
- data/lib/capybara/lightpanda/browser.rb +963 -0
- data/lib/capybara/lightpanda/client/subscriber.rb +44 -0
- data/lib/capybara/lightpanda/client/web_socket.rb +160 -0
- data/lib/capybara/lightpanda/client.rb +124 -0
- data/lib/capybara/lightpanda/cookies.rb +181 -0
- data/lib/capybara/lightpanda/driver.rb +252 -0
- data/lib/capybara/lightpanda/errors.rb +76 -0
- data/lib/capybara/lightpanda/frame.rb +33 -0
- data/lib/capybara/lightpanda/javascripts/index.js +1108 -0
- data/lib/capybara/lightpanda/keyboard.rb +142 -0
- data/lib/capybara/lightpanda/logger.rb +37 -0
- data/lib/capybara/lightpanda/network.rb +92 -0
- data/lib/capybara/lightpanda/node.rb +726 -0
- data/lib/capybara/lightpanda/options.rb +63 -0
- data/lib/capybara/lightpanda/process.rb +252 -0
- data/lib/capybara/lightpanda/utils/event.rb +37 -0
- data/lib/capybara/lightpanda/version.rb +7 -0
- data/lib/capybara/lightpanda/xpath_polyfill.rb +10 -0
- data/lib/capybara-lightpanda.rb +42 -0
- metadata +119 -0
|
@@ -0,0 +1,963 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "forwardable"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "concurrent-ruby"
|
|
6
|
+
|
|
7
|
+
module Capybara
|
|
8
|
+
module Lightpanda
|
|
9
|
+
class Browser
|
|
10
|
+
extend Forwardable
|
|
11
|
+
|
|
12
|
+
attr_reader :options, :process, :client, :target_id, :session_id, :frame_stack
|
|
13
|
+
|
|
14
|
+
delegate %i[on off] => :client
|
|
15
|
+
|
|
16
|
+
# Lightpanda binary version (e.g. "lightpanda 0.2.9 nightly.5267") and
|
|
17
|
+
# parsed nightly build number, captured at Process startup. nil when
|
|
18
|
+
# the gem is connecting to an externally-managed Lightpanda via ws_url.
|
|
19
|
+
def version
|
|
20
|
+
@process&.version
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def nightly_build
|
|
24
|
+
@process&.nightly_build
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def initialize(options = {})
|
|
28
|
+
@options = Options.new(options)
|
|
29
|
+
@process = nil
|
|
30
|
+
@client = nil
|
|
31
|
+
@target_id = nil
|
|
32
|
+
@session_id = nil
|
|
33
|
+
@started = false
|
|
34
|
+
@page_events_enabled = false
|
|
35
|
+
@modal_responses = []
|
|
36
|
+
@modal_messages = []
|
|
37
|
+
@modal_handler_installed = false
|
|
38
|
+
@frame_stack = []
|
|
39
|
+
@frames = Concurrent::Hash.new
|
|
40
|
+
@turbo_event = Utils::Event.new
|
|
41
|
+
@turbo_event.set
|
|
42
|
+
@visited_origins = Concurrent::Set.new
|
|
43
|
+
|
|
44
|
+
start
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Set of `scheme://host:port` strings the browser has navigated to during
|
|
48
|
+
# this session. Used by Cookies#clear to enumerate cookies across all
|
|
49
|
+
# domains: Lightpanda's `Network.getCookies` (no urls param) is scoped
|
|
50
|
+
# to the current page's origin, so without tracked origins we'd miss
|
|
51
|
+
# cookies set on previously-visited domains.
|
|
52
|
+
attr_reader :visited_origins
|
|
53
|
+
|
|
54
|
+
def start
|
|
55
|
+
return if @started
|
|
56
|
+
|
|
57
|
+
if @options.ws_url?
|
|
58
|
+
@client = Client.new(@options.ws_url, @options)
|
|
59
|
+
else
|
|
60
|
+
@process = Process.new(@options)
|
|
61
|
+
@process.start
|
|
62
|
+
@client = Client.new(@process.ws_url, @options)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
create_page
|
|
66
|
+
|
|
67
|
+
@started = true
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def create_page
|
|
71
|
+
result = @client.command("Target.createTarget", { url: "about:blank" })
|
|
72
|
+
@target_id = result["targetId"]
|
|
73
|
+
|
|
74
|
+
attach_result = @client.command("Target.attachToTarget", { targetId: @target_id, flatten: true })
|
|
75
|
+
@session_id = attach_result["sessionId"]
|
|
76
|
+
|
|
77
|
+
@frames.clear
|
|
78
|
+
@turbo_event.set
|
|
79
|
+
subscribe_to_console_logs
|
|
80
|
+
subscribe_to_execution_context
|
|
81
|
+
subscribe_to_frame_events
|
|
82
|
+
subscribe_to_turbo_signals
|
|
83
|
+
register_auto_scripts
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def restart
|
|
87
|
+
quit
|
|
88
|
+
start
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Recover after a WebSocket disconnect or process crash during navigation.
|
|
92
|
+
# Restarts the process if it died, then creates a fresh client and page.
|
|
93
|
+
def reconnect
|
|
94
|
+
close_client_silently
|
|
95
|
+
restart_process_if_dead
|
|
96
|
+
|
|
97
|
+
ws_url = @options.ws_url? ? @options.ws_url : @process&.ws_url
|
|
98
|
+
raise DeadBrowserError, "Cannot reconnect: no WebSocket URL" unless ws_url
|
|
99
|
+
|
|
100
|
+
@client = Client.new(ws_url, @options)
|
|
101
|
+
create_page
|
|
102
|
+
@page_events_enabled = false
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def quit
|
|
106
|
+
begin
|
|
107
|
+
@client&.close
|
|
108
|
+
rescue StandardError
|
|
109
|
+
nil
|
|
110
|
+
end
|
|
111
|
+
begin
|
|
112
|
+
@process&.stop
|
|
113
|
+
rescue StandardError
|
|
114
|
+
nil
|
|
115
|
+
end
|
|
116
|
+
@client = nil
|
|
117
|
+
@process = nil
|
|
118
|
+
@started = false
|
|
119
|
+
@modal_handler_installed = false
|
|
120
|
+
@frame_stack.clear
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def command(method, **params)
|
|
124
|
+
@client.command(method, params)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def page_command(method, **params)
|
|
128
|
+
@client.command(method, params, session_id: @session_id)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Navigation with readyState fallback.
|
|
132
|
+
#
|
|
133
|
+
# Lightpanda may never fire Page.loadEventFired on complex JS pages
|
|
134
|
+
# (lightpanda-io/browser#1801, #1832). When the event times out,
|
|
135
|
+
# we poll document.readyState as a fallback.
|
|
136
|
+
#
|
|
137
|
+
# Page.navigate is sent asynchronously because Lightpanda may not
|
|
138
|
+
# return the command result until the page is fully loaded (unlike
|
|
139
|
+
# Chrome which returns immediately with frameId/loaderId). If we
|
|
140
|
+
# waited synchronously, the readyState fallback would never be
|
|
141
|
+
# reached on pages that fail to fully load.
|
|
142
|
+
#
|
|
143
|
+
# Uses a single shared deadline so the worst-case wait is 1x timeout,
|
|
144
|
+
# not 2x (lightpanda-io/browser#1849).
|
|
145
|
+
def go_to(url, wait: true, retried: false)
|
|
146
|
+
enable_page_events
|
|
147
|
+
|
|
148
|
+
if wait
|
|
149
|
+
wait_for_page_load(url, retried: retried)
|
|
150
|
+
else
|
|
151
|
+
page_command("Page.navigate", url: url)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
record_visited_origin(url)
|
|
155
|
+
end
|
|
156
|
+
alias goto go_to
|
|
157
|
+
|
|
158
|
+
def enable_page_events
|
|
159
|
+
return if @page_events_enabled
|
|
160
|
+
|
|
161
|
+
page_command("Page.enable")
|
|
162
|
+
@page_events_enabled = true
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Block up to `timeout` seconds for a default V8 execution context to
|
|
166
|
+
# exist. Returns true if available (immediately or after waiting),
|
|
167
|
+
# false if the timeout elapses with no executionContextCreated event.
|
|
168
|
+
def wait_for_default_context(timeout = 1.0)
|
|
169
|
+
@default_context_event.wait(timeout)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Run the block; if it raises NoExecutionContextError (the navigation
|
|
173
|
+
# race window — lightpanda-io/browser#2187), wait for the next default
|
|
174
|
+
# context to be signaled by Runtime.executionContextCreated, then
|
|
175
|
+
# retry once. Replaces blind 100 ms sleep retries.
|
|
176
|
+
def with_default_context_wait(timeout: 1.0)
|
|
177
|
+
yield
|
|
178
|
+
rescue NoExecutionContextError
|
|
179
|
+
raise unless wait_for_default_context(timeout)
|
|
180
|
+
|
|
181
|
+
yield
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def back
|
|
185
|
+
wait_for_navigation { execute("history.back()") }
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def forward
|
|
189
|
+
wait_for_navigation { execute("history.forward()") }
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def refresh
|
|
193
|
+
wait_for_navigation { page_command("Page.reload") }
|
|
194
|
+
end
|
|
195
|
+
alias reload refresh
|
|
196
|
+
|
|
197
|
+
def current_url
|
|
198
|
+
evaluate("window.location.href")
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def title
|
|
202
|
+
evaluate("document.title")
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def body
|
|
206
|
+
evaluate("document.documentElement.outerHTML")
|
|
207
|
+
end
|
|
208
|
+
alias html body
|
|
209
|
+
|
|
210
|
+
# Evaluate JS and return a serialized value.
|
|
211
|
+
# No-args fast path uses Runtime.evaluate; with args we wrap as a function
|
|
212
|
+
# and dispatch via Runtime.callFunctionOn so `arguments[i]` is bound.
|
|
213
|
+
# Both paths use `returnByValue: false` and unwrap so DOM-node returns
|
|
214
|
+
# come back as `{ "__lightpanda_node__" => ... }` for the Driver to wrap.
|
|
215
|
+
def evaluate(expression, *args)
|
|
216
|
+
if args.empty?
|
|
217
|
+
response = page_command("Runtime.evaluate", expression: expression, returnByValue: false, awaitPromise: true)
|
|
218
|
+
raise JavaScriptError, response if response["exceptionDetails"]
|
|
219
|
+
|
|
220
|
+
return unwrap_call_result(response["result"])
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
wrapped = "function() { return #{expression} }"
|
|
224
|
+
call_with_args(wrapped, args)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Execute JS without returning a value.
|
|
228
|
+
def execute(expression, *args)
|
|
229
|
+
if args.empty?
|
|
230
|
+
page_command("Runtime.evaluate", expression: expression, returnByValue: false, awaitPromise: false)
|
|
231
|
+
return nil
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
wrapped = "function() { #{expression} }"
|
|
235
|
+
call_with_args(wrapped, args, return_by_value: false)
|
|
236
|
+
nil
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Evaluate async JS with a callback. The user's script receives
|
|
240
|
+
# the callback as its last argument (`arguments[arguments.length - 1]`),
|
|
241
|
+
# matching Capybara's evaluate_async_script contract.
|
|
242
|
+
def evaluate_async(expression, *args, wait: @options.timeout)
|
|
243
|
+
timeout_ms = (wait * 1000).to_i
|
|
244
|
+
wrapped = <<~JS
|
|
245
|
+
function() {
|
|
246
|
+
var __args = Array.prototype.slice.call(arguments);
|
|
247
|
+
return new Promise(function(__resolve, __reject) {
|
|
248
|
+
var __timer = setTimeout(function() {
|
|
249
|
+
__reject(new Error('Async script timeout after #{timeout_ms}ms'));
|
|
250
|
+
}, #{timeout_ms});
|
|
251
|
+
var __done = function(val) { clearTimeout(__timer); __resolve(val); };
|
|
252
|
+
__args.push(__done);
|
|
253
|
+
(function() { #{expression} }).apply(null, __args);
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
JS
|
|
257
|
+
call_with_args(wrapped, args)
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Evaluate JS and return a RemoteObject reference (for DOM nodes, arrays).
|
|
261
|
+
def evaluate_with_ref(expression)
|
|
262
|
+
response = page_command("Runtime.evaluate", expression: expression, returnByValue: false, awaitPromise: true)
|
|
263
|
+
raise JavaScriptError, response if response["exceptionDetails"]
|
|
264
|
+
|
|
265
|
+
result = response["result"]
|
|
266
|
+
return nil if result["type"] == "undefined"
|
|
267
|
+
|
|
268
|
+
result
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Call a function on a remote object via Runtime.callFunctionOn.
|
|
272
|
+
# Binds `this` to the DOM element referenced by remote_object_id.
|
|
273
|
+
def call_function_on(remote_object_id, function_declaration, *args, return_by_value: true)
|
|
274
|
+
params = {
|
|
275
|
+
objectId: remote_object_id,
|
|
276
|
+
functionDeclaration: function_declaration,
|
|
277
|
+
returnByValue: return_by_value,
|
|
278
|
+
awaitPromise: true,
|
|
279
|
+
}
|
|
280
|
+
params[:arguments] = args.map { |a| serialize_argument(a) } unless args.empty?
|
|
281
|
+
|
|
282
|
+
response = page_command("Runtime.callFunctionOn", **params)
|
|
283
|
+
raise JavaScriptError, response if response["exceptionDetails"]
|
|
284
|
+
|
|
285
|
+
result = response["result"]
|
|
286
|
+
return nil if result["type"] == "undefined"
|
|
287
|
+
|
|
288
|
+
return_by_value ? result["value"] : result
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# Get properties of a remote object (used to extract array elements).
|
|
292
|
+
def get_object_properties(remote_object_id)
|
|
293
|
+
page_command("Runtime.getProperties", objectId: remote_object_id, ownProperties: true)
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Release a remote object reference to free V8 memory.
|
|
297
|
+
def release_object(remote_object_id)
|
|
298
|
+
page_command("Runtime.releaseObject", objectId: remote_object_id)
|
|
299
|
+
rescue BrowserError, NoExecutionContextError
|
|
300
|
+
# Object may already be released or context destroyed
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# Find elements in the current context (top frame or active frame).
|
|
304
|
+
# Returns an array of remote object ID strings.
|
|
305
|
+
def find(method, selector)
|
|
306
|
+
if @frame_stack.empty?
|
|
307
|
+
find_in_document(method, selector)
|
|
308
|
+
else
|
|
309
|
+
find_in_frame(method, selector)
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Find child elements within a specific node.
|
|
314
|
+
# Returns an array of remote object ID strings.
|
|
315
|
+
def find_within(remote_object_id, method, selector)
|
|
316
|
+
result = call_function_on(remote_object_id, FIND_WITHIN_JS, method, selector, return_by_value: false)
|
|
317
|
+
extract_node_object_ids(result)
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# objectId of document.activeElement, or nil if none/document detached.
|
|
321
|
+
def active_element
|
|
322
|
+
result = evaluate_with_ref("document.activeElement")
|
|
323
|
+
result&.dig("objectId")
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
# Resolve an objectId to its stable per-page backendNodeId.
|
|
327
|
+
# objectIds are transient (re-issued per Runtime call) but backendNodeId is stable,
|
|
328
|
+
# so this is what we compare for cross-query node equality.
|
|
329
|
+
def backend_node_id(remote_object_id)
|
|
330
|
+
page_command("DOM.describeNode", objectId: remote_object_id).dig("node", "backendNodeId")
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def css(selector)
|
|
334
|
+
node_ids = page_command("DOM.querySelectorAll", nodeId: document_node_id, selector: selector)
|
|
335
|
+
node_ids["nodeIds"] || []
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def at_css(selector)
|
|
339
|
+
result = page_command("DOM.querySelector", nodeId: document_node_id, selector: selector)
|
|
340
|
+
|
|
341
|
+
result["nodeId"]
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def screenshot(path: nil, format: :png, quality: nil, full_page: false, encoding: :binary)
|
|
345
|
+
params = { format: format.to_s }
|
|
346
|
+
params[:quality] = quality if quality && format == :jpeg
|
|
347
|
+
|
|
348
|
+
if full_page
|
|
349
|
+
metrics = page_command("Page.getLayoutMetrics")
|
|
350
|
+
content_size = metrics["contentSize"]
|
|
351
|
+
|
|
352
|
+
params[:clip] = {
|
|
353
|
+
x: 0,
|
|
354
|
+
y: 0,
|
|
355
|
+
width: content_size["width"],
|
|
356
|
+
height: content_size["height"],
|
|
357
|
+
scale: 1,
|
|
358
|
+
}
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
result = page_command("Page.captureScreenshot", **params)
|
|
362
|
+
data = result["data"]
|
|
363
|
+
|
|
364
|
+
if encoding == :base64
|
|
365
|
+
data
|
|
366
|
+
else
|
|
367
|
+
decoded = Base64.decode64(data)
|
|
368
|
+
|
|
369
|
+
if path
|
|
370
|
+
File.binwrite(path, decoded)
|
|
371
|
+
path
|
|
372
|
+
else
|
|
373
|
+
decoded
|
|
374
|
+
end
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
# Wait for any pending Turbo operations to complete. Event-driven: the
|
|
379
|
+
# injected JS in index.js calls `console.debug('__lightpanda_turbo_busy')`
|
|
380
|
+
# when the pending-ops counter rises above 0 and `_idle` when it returns
|
|
381
|
+
# to 0. We toggle @turbo_event accordingly (see subscribe_to_turbo_signals).
|
|
382
|
+
#
|
|
383
|
+
# Pages without Turbo never trigger _turboStart, so no sentinels fire and
|
|
384
|
+
# @turbo_event stays set (initial state) — wait returns immediately. Same
|
|
385
|
+
# for Turbo-loaded pages that have no pending work.
|
|
386
|
+
def wait_for_turbo
|
|
387
|
+
@turbo_event.wait(@options.timeout)
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
# Wait for the page to settle after an action that may have kicked off
|
|
391
|
+
# a Turbo fetch OR a full-page navigation. Used by Node#click and
|
|
392
|
+
# Node#implicit_submit so callers can immediately read updated state
|
|
393
|
+
# (title, current_url, …) without racing the navigation lifecycle.
|
|
394
|
+
#
|
|
395
|
+
# Sniff window: the action returns synchronously, but the CDP events
|
|
396
|
+
# signalling its async fallout (Runtime.executionContextsCleared for
|
|
397
|
+
# full nav; the turbo sentinel for Turbo) arrive later on the dispatch
|
|
398
|
+
# thread. We poll briefly for either signal — if neither fires within
|
|
399
|
+
# the window, assume the action was inert and exit fast.
|
|
400
|
+
SNIFF_WINDOW = 0.05
|
|
401
|
+
private_constant :SNIFF_WINDOW
|
|
402
|
+
|
|
403
|
+
def wait_for_idle
|
|
404
|
+
prior_context_iteration = @default_context_event.iteration
|
|
405
|
+
sniff_deadline = monotonic_time + SNIFF_WINDOW
|
|
406
|
+
loop do
|
|
407
|
+
break if @default_context_event.iteration > prior_context_iteration
|
|
408
|
+
break unless @turbo_event.set?
|
|
409
|
+
break if monotonic_time > sniff_deadline
|
|
410
|
+
|
|
411
|
+
sleep 0.001
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
@default_context_event.wait(@options.timeout)
|
|
415
|
+
@turbo_event.wait(@options.timeout)
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
def keyboard
|
|
419
|
+
@keyboard ||= Keyboard.new(self)
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
def network
|
|
423
|
+
@network ||= Network.new(self)
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def cookies
|
|
427
|
+
@cookies ||= Cookies.new(self)
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
# -- Frame Support --
|
|
431
|
+
# Two parallel views of frames:
|
|
432
|
+
#
|
|
433
|
+
# * `frame_stack` (Array<Node>) — the Capybara `switch_to_frame` stack;
|
|
434
|
+
# drives where `find` resolves selectors. Stored as Nodes so
|
|
435
|
+
# callFunctionOn can scope to the iframe's contentDocument.
|
|
436
|
+
#
|
|
437
|
+
# * `@frames` (Concurrent::Hash<String, Frame>) — metadata view
|
|
438
|
+
# populated from Page.frame{Attached,Navigated,Detached,...} events.
|
|
439
|
+
# Used for diagnostics / introspection (frames, main_frame, frame_by).
|
|
440
|
+
# Lightpanda's frame events are not reliable enough to drive
|
|
441
|
+
# navigation waits, so this is read-only metadata.
|
|
442
|
+
|
|
443
|
+
def push_frame(node)
|
|
444
|
+
@frame_stack.push(node)
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
def pop_frame
|
|
448
|
+
@frame_stack.pop
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
def clear_frames
|
|
452
|
+
@frame_stack.clear
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
# All frames currently attached to the page (main frame + iframes).
|
|
456
|
+
def frames
|
|
457
|
+
@frames.values
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
# The top-level frame, or nil if it hasn't been registered yet (events
|
|
461
|
+
# arrive asynchronously after Page.enable).
|
|
462
|
+
def main_frame
|
|
463
|
+
@frames.each_value.find(&:main?)
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
def frame_by(id: nil, name: nil)
|
|
467
|
+
if id
|
|
468
|
+
@frames[id]
|
|
469
|
+
elsif name
|
|
470
|
+
@frames.each_value.find { |f| f.name == name }
|
|
471
|
+
end
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
# -- Modal/Dialog Support --
|
|
475
|
+
# Lightpanda auto-dismisses dialogs in headless mode: alert→OK,
|
|
476
|
+
# confirm→false, prompt→null. Page.javascriptDialogOpening fires
|
|
477
|
+
# (since 2026-04-03), so we capture messages for find_modal, but
|
|
478
|
+
# Page.handleJavaScriptDialog always errors with "No dialog is showing"
|
|
479
|
+
# and we never call it (the dispatch thread cannot make synchronous
|
|
480
|
+
# CDP calls without deadlocking). @modal_responses is retained so
|
|
481
|
+
# accept_modal/dismiss_modal preserve their API contract; the
|
|
482
|
+
# accept/dismiss choice is informational only.
|
|
483
|
+
|
|
484
|
+
def prepare_modals
|
|
485
|
+
return if @modal_handler_installed
|
|
486
|
+
|
|
487
|
+
enable_page_events
|
|
488
|
+
|
|
489
|
+
on("Page.javascriptDialogOpening") do |params|
|
|
490
|
+
@modal_messages << { type: params["type"], message: params["message"] }
|
|
491
|
+
@modal_responses.shift
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
@modal_handler_installed = true
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
def accept_modal(type, text: nil)
|
|
498
|
+
prepare_modals
|
|
499
|
+
@modal_responses << { accept: true, text: text, type: type.to_s }
|
|
500
|
+
end
|
|
501
|
+
|
|
502
|
+
def dismiss_modal(type)
|
|
503
|
+
prepare_modals
|
|
504
|
+
@modal_responses << { accept: false, type: type.to_s }
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
def find_modal(type, text: nil, wait: options.timeout)
|
|
508
|
+
regexp = text.is_a?(Regexp) ? text : (text && Regexp.new(Regexp.escape(text.to_s)))
|
|
509
|
+
deadline = monotonic_time + wait
|
|
510
|
+
last_message = nil
|
|
511
|
+
loop do
|
|
512
|
+
msg = @modal_messages.find { |m| m[:type] == type.to_s }
|
|
513
|
+
if msg
|
|
514
|
+
last_message = msg[:message]
|
|
515
|
+
if regexp.nil? || last_message.match?(regexp)
|
|
516
|
+
@modal_messages.delete(msg)
|
|
517
|
+
return last_message
|
|
518
|
+
end
|
|
519
|
+
end
|
|
520
|
+
break if monotonic_time > deadline
|
|
521
|
+
|
|
522
|
+
sleep 0.05
|
|
523
|
+
end
|
|
524
|
+
raise_modal_not_found(text, last_message)
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
def reset_modals
|
|
528
|
+
@modal_responses.clear
|
|
529
|
+
@modal_messages.clear
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
private
|
|
533
|
+
|
|
534
|
+
def raise_modal_not_found(text, last_message)
|
|
535
|
+
if last_message
|
|
536
|
+
raise Capybara::ModalNotFound,
|
|
537
|
+
"Unable to find modal dialog with #{text} - found '#{last_message}' instead."
|
|
538
|
+
end
|
|
539
|
+
raise Capybara::ModalNotFound, "Unable to find modal dialog#{" with #{text}" if text}"
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
# JS function for finding elements within a node.
|
|
543
|
+
# Works in any execution context (top frame or iframe).
|
|
544
|
+
FIND_WITHIN_JS = <<~JS
|
|
545
|
+
function(method, selector) {
|
|
546
|
+
if (method === 'xpath') {
|
|
547
|
+
if (typeof _lightpanda !== 'undefined') return _lightpanda.xpathFind(selector, this);
|
|
548
|
+
return [];
|
|
549
|
+
}
|
|
550
|
+
try { return Array.from(this.querySelectorAll(selector)); } catch(e) { return []; }
|
|
551
|
+
}
|
|
552
|
+
JS
|
|
553
|
+
|
|
554
|
+
# JS function for finding elements in an iframe's contentDocument.
|
|
555
|
+
FIND_IN_FRAME_JS = <<~JS
|
|
556
|
+
function(method, selector) {
|
|
557
|
+
var doc;
|
|
558
|
+
try { doc = this.contentDocument || (this.contentWindow && this.contentWindow.document); } catch(e) {}
|
|
559
|
+
if (!doc) return [];
|
|
560
|
+
if (method === 'xpath') {
|
|
561
|
+
if (typeof _lightpanda !== 'undefined') return _lightpanda.xpathFind(selector, doc);
|
|
562
|
+
return [];
|
|
563
|
+
}
|
|
564
|
+
try { return Array.from(doc.querySelectorAll(selector)); } catch(e) { return []; }
|
|
565
|
+
}
|
|
566
|
+
JS
|
|
567
|
+
|
|
568
|
+
def find_in_document(method, selector)
|
|
569
|
+
with_default_context_wait do
|
|
570
|
+
# Coerce Symbol selectors (e.g. Capybara warning path lets `have_css(:p)`
|
|
571
|
+
# through) to a string before quoting. Symbol#inspect returns `:p`,
|
|
572
|
+
# which would inject a bare token into the JS source.
|
|
573
|
+
selector_literal = selector.to_s.inspect
|
|
574
|
+
js = if method == "xpath"
|
|
575
|
+
"(typeof _lightpanda !== 'undefined') ? _lightpanda.xpathFind(#{selector_literal}, document) : []"
|
|
576
|
+
else
|
|
577
|
+
"(function() { try { return Array.from(document.querySelectorAll(#{selector_literal})); } " \
|
|
578
|
+
"catch(e) { return []; } })()"
|
|
579
|
+
end
|
|
580
|
+
result = evaluate_with_ref(js)
|
|
581
|
+
extract_node_object_ids(result)
|
|
582
|
+
end
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
def find_in_frame(method, selector)
|
|
586
|
+
frame_node = @frame_stack.last
|
|
587
|
+
result = call_function_on(frame_node.remote_object_id, FIND_IN_FRAME_JS, method, selector,
|
|
588
|
+
return_by_value: false)
|
|
589
|
+
extract_node_object_ids(result)
|
|
590
|
+
end
|
|
591
|
+
|
|
592
|
+
# Extract individual node objectIds from a remote array reference.
|
|
593
|
+
def extract_node_object_ids(result)
|
|
594
|
+
return [] unless result && result["objectId"]
|
|
595
|
+
|
|
596
|
+
props = get_object_properties(result["objectId"])
|
|
597
|
+
properties = props["result"] || []
|
|
598
|
+
|
|
599
|
+
ids = properties
|
|
600
|
+
.select { |p| p["name"] =~ /\A\d+\z/ }
|
|
601
|
+
.sort_by { |p| p["name"].to_i }
|
|
602
|
+
.filter_map { |p| p.dig("value", "objectId") }
|
|
603
|
+
|
|
604
|
+
release_object(result["objectId"])
|
|
605
|
+
ids
|
|
606
|
+
rescue StandardError
|
|
607
|
+
[]
|
|
608
|
+
end
|
|
609
|
+
|
|
610
|
+
def register_auto_scripts
|
|
611
|
+
page_command("Page.addScriptToEvaluateOnNewDocument", source: XPathPolyfill::JS)
|
|
612
|
+
end
|
|
613
|
+
|
|
614
|
+
def subscribe_to_console_logs
|
|
615
|
+
logger = @options.logger
|
|
616
|
+
return unless logger
|
|
617
|
+
|
|
618
|
+
on("Runtime.consoleAPICalled") do |params|
|
|
619
|
+
params["args"]&.each do |r|
|
|
620
|
+
value = r["value"]
|
|
621
|
+
next if value.is_a?(String) && value.start_with?(TURBO_SENTINEL_PREFIX)
|
|
622
|
+
|
|
623
|
+
logger.puts(value)
|
|
624
|
+
end
|
|
625
|
+
end
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
TURBO_SENTINEL_PREFIX = "__lightpanda_turbo_"
|
|
629
|
+
private_constant :TURBO_SENTINEL_PREFIX
|
|
630
|
+
|
|
631
|
+
# Wire @turbo_event to the JS-side _signalTurbo emissions. The JS calls
|
|
632
|
+
# console.debug('__lightpanda_turbo_busy') / '_idle' on transitions across
|
|
633
|
+
# zero pending ops; Lightpanda forwards those to Runtime.consoleAPICalled.
|
|
634
|
+
# Idle → set the event (wakes any waiter); busy → reset.
|
|
635
|
+
#
|
|
636
|
+
# On Runtime.executionContextsCleared (navigation), unconditionally set
|
|
637
|
+
# the event: if we navigated away mid-busy state, no further idle signal
|
|
638
|
+
# would ever come from the old context, and we'd block for the full
|
|
639
|
+
# timeout. The new context will signal busy again if Turbo is active.
|
|
640
|
+
def subscribe_to_turbo_signals
|
|
641
|
+
on("Runtime.consoleAPICalled") do |params|
|
|
642
|
+
next unless params["args"].is_a?(Array)
|
|
643
|
+
|
|
644
|
+
marker = params["args"].first&.dig("value")
|
|
645
|
+
next unless marker.is_a?(String) && marker.start_with?(TURBO_SENTINEL_PREFIX)
|
|
646
|
+
|
|
647
|
+
case marker
|
|
648
|
+
when "#{TURBO_SENTINEL_PREFIX}busy" then @turbo_event.reset
|
|
649
|
+
when "#{TURBO_SENTINEL_PREFIX}idle" then @turbo_event.set
|
|
650
|
+
end
|
|
651
|
+
end
|
|
652
|
+
|
|
653
|
+
on("Runtime.executionContextsCleared") { @turbo_event.set }
|
|
654
|
+
end
|
|
655
|
+
|
|
656
|
+
# Maintain @frames from Page.frame* events. Subscribed once per page
|
|
657
|
+
# (create_page resets @frames and re-subscribes on a fresh client, so
|
|
658
|
+
# handlers don't accumulate across reconnects). Loading-state events
|
|
659
|
+
# are best-effort: Lightpanda's Page.frameStoppedLoading is unreliable
|
|
660
|
+
# on complex pages (#1801), so we track state for diagnostics only.
|
|
661
|
+
def subscribe_to_frame_events
|
|
662
|
+
on("Page.frameAttached") { |params| handle_frame_attached(params) }
|
|
663
|
+
on("Page.frameNavigated") { |params| handle_frame_navigated(params) }
|
|
664
|
+
on("Page.frameStartedLoading") { |params| set_frame_state(params["frameId"], :started_loading) }
|
|
665
|
+
on("Page.frameStoppedLoading") { |params| set_frame_state(params["frameId"], :stopped_loading) }
|
|
666
|
+
on("Page.frameDetached") { |params| handle_frame_detached(params) }
|
|
667
|
+
end
|
|
668
|
+
|
|
669
|
+
def handle_frame_attached(params)
|
|
670
|
+
parent_id, frame_id = params.values_at("parentFrameId", "frameId")
|
|
671
|
+
@frames[frame_id] ||= Frame.new(frame_id, parent_id)
|
|
672
|
+
end
|
|
673
|
+
|
|
674
|
+
def handle_frame_navigated(params)
|
|
675
|
+
frame_data = params["frame"] || {}
|
|
676
|
+
frame_id = frame_data["id"]
|
|
677
|
+
return unless frame_id
|
|
678
|
+
|
|
679
|
+
frame = @frames[frame_id] ||= Frame.new(frame_id, frame_data["parentId"])
|
|
680
|
+
frame.name = frame_data["name"]
|
|
681
|
+
frame.url = frame_data["url"]
|
|
682
|
+
frame.state = :navigated
|
|
683
|
+
end
|
|
684
|
+
|
|
685
|
+
def handle_frame_detached(params)
|
|
686
|
+
frame = @frames.delete(params["frameId"])
|
|
687
|
+
frame&.state = :detached
|
|
688
|
+
end
|
|
689
|
+
|
|
690
|
+
def set_frame_state(frame_id, state)
|
|
691
|
+
frame = @frames[frame_id]
|
|
692
|
+
frame.state = state if frame
|
|
693
|
+
end
|
|
694
|
+
|
|
695
|
+
# Track default-execution-context availability via Runtime events.
|
|
696
|
+
# Lightpanda destroys the V8 default context at navigation start (long
|
|
697
|
+
# before frameNavigated fires), then re-creates it once the new page
|
|
698
|
+
# commits. During the gap, Runtime.evaluate / callFunctionOn rejects
|
|
699
|
+
# with "Cannot find default execution context"
|
|
700
|
+
# (lightpanda-io/browser#2187). We watch executionContextsCleared /
|
|
701
|
+
# executionContextCreated and use the resulting Concurrent::Event to
|
|
702
|
+
# gate retries deterministically instead of blind sleeping.
|
|
703
|
+
def subscribe_to_execution_context
|
|
704
|
+
@default_context_event = Utils::Event.new
|
|
705
|
+
@default_context_event.set
|
|
706
|
+
|
|
707
|
+
on("Runtime.executionContextsCleared") { @default_context_event.reset }
|
|
708
|
+
on("Runtime.executionContextCreated") do |params|
|
|
709
|
+
@default_context_event.set if params.dig("context", "auxData", "isDefault")
|
|
710
|
+
end
|
|
711
|
+
|
|
712
|
+
page_command("Runtime.enable")
|
|
713
|
+
end
|
|
714
|
+
|
|
715
|
+
def serialize_argument(arg)
|
|
716
|
+
if arg.respond_to?(:remote_object_id)
|
|
717
|
+
{ objectId: arg.remote_object_id }
|
|
718
|
+
else
|
|
719
|
+
{ value: arg }
|
|
720
|
+
end
|
|
721
|
+
end
|
|
722
|
+
|
|
723
|
+
def document_node_id
|
|
724
|
+
result = page_command("DOM.getDocument")
|
|
725
|
+
|
|
726
|
+
result.dig("root", "nodeId")
|
|
727
|
+
end
|
|
728
|
+
|
|
729
|
+
def handle_evaluate_response(response)
|
|
730
|
+
raise JavaScriptError, response if response["exceptionDetails"]
|
|
731
|
+
|
|
732
|
+
result = response["result"]
|
|
733
|
+
return nil if result["type"] == "undefined"
|
|
734
|
+
|
|
735
|
+
result["value"]
|
|
736
|
+
end
|
|
737
|
+
|
|
738
|
+
# Run a wrapped function via Runtime.callFunctionOn with `arguments` bound.
|
|
739
|
+
# `args` is converted via `serialize_argument` (Nodes → objectId, scalars → value).
|
|
740
|
+
# When `return_by_value: false` (the default) the return value is unwrapped via
|
|
741
|
+
# `unwrap_call_result` so that DOM nodes come back as `{ "__lightpanda_node__" => ... }`
|
|
742
|
+
# hashes the Driver can wrap as Capybara nodes.
|
|
743
|
+
def call_with_args(function_declaration, args, return_by_value: false)
|
|
744
|
+
params = {
|
|
745
|
+
objectId: document_object_id,
|
|
746
|
+
functionDeclaration: function_declaration,
|
|
747
|
+
returnByValue: return_by_value,
|
|
748
|
+
awaitPromise: true,
|
|
749
|
+
arguments: args.map { |a| serialize_argument(a) },
|
|
750
|
+
}
|
|
751
|
+
response = page_command("Runtime.callFunctionOn", **params)
|
|
752
|
+
raise JavaScriptError, response if response["exceptionDetails"]
|
|
753
|
+
|
|
754
|
+
return_by_value ? handle_evaluate_response(response) : unwrap_call_result(response["result"])
|
|
755
|
+
end
|
|
756
|
+
|
|
757
|
+
# Translate a non-by-value Runtime result into a plain Ruby value, surfacing
|
|
758
|
+
# DOM nodes as `{ "__lightpanda_node__" => "..." }` so the Driver can wrap
|
|
759
|
+
# them. The sentinel key (rather than a plain "objectId") prevents
|
|
760
|
+
# misclassifying user JS that legitimately returns `{ objectId: "x" }`.
|
|
761
|
+
def unwrap_call_result(result)
|
|
762
|
+
return nil if result["type"] == "undefined"
|
|
763
|
+
return nil if result["subtype"] == "null"
|
|
764
|
+
|
|
765
|
+
object_id = result["objectId"]
|
|
766
|
+
if object_id
|
|
767
|
+
return { "__lightpanda_node__" => object_id } if result["subtype"] == "node"
|
|
768
|
+
return serialize_remote_array(object_id) if result["subtype"] == "array"
|
|
769
|
+
return serialize_remote_object(object_id) if result["type"] == "object"
|
|
770
|
+
end
|
|
771
|
+
|
|
772
|
+
result["value"]
|
|
773
|
+
end
|
|
774
|
+
|
|
775
|
+
# Re-fetch a remote object as JSON-serializable value for plain objects/arrays.
|
|
776
|
+
# Cheaper than walking properties and good enough for shared specs. Releases
|
|
777
|
+
# the original handle so long-lived sessions don't accumulate leaked objectIds.
|
|
778
|
+
def serialize_remote_object(object_id)
|
|
779
|
+
json = page_command(
|
|
780
|
+
"Runtime.callFunctionOn",
|
|
781
|
+
objectId: object_id,
|
|
782
|
+
functionDeclaration: "function() { return this }",
|
|
783
|
+
returnByValue: true
|
|
784
|
+
)
|
|
785
|
+
handle_evaluate_response(json)
|
|
786
|
+
ensure
|
|
787
|
+
release_object(object_id)
|
|
788
|
+
end
|
|
789
|
+
|
|
790
|
+
# Walk an array's own indexed properties via `Runtime.getProperties`,
|
|
791
|
+
# unwrapping each element through the regular result pipeline so that
|
|
792
|
+
# DOM-node entries surface as `{ "__lightpanda_node__" => ... }` instead
|
|
793
|
+
# of being flattened to `{}` by `returnByValue: true`. Releases the
|
|
794
|
+
# outer array's objectId once we've harvested its elements.
|
|
795
|
+
def serialize_remote_array(object_id)
|
|
796
|
+
properties = get_object_properties(object_id).fetch("result", [])
|
|
797
|
+
properties
|
|
798
|
+
.select { |p| p["enumerable"] && p["name"] =~ /\A\d+\z/ }
|
|
799
|
+
.sort_by { |p| p["name"].to_i }
|
|
800
|
+
.map { |p| unwrap_call_result(p["value"] || {}) }
|
|
801
|
+
ensure
|
|
802
|
+
release_object(object_id)
|
|
803
|
+
end
|
|
804
|
+
|
|
805
|
+
# objectId of `document`, used as the `this` context for callFunctionOn when
|
|
806
|
+
# we need `arguments` binding but don't care about `this`. Re-resolved per
|
|
807
|
+
# call because the document objectId is invalidated by navigation.
|
|
808
|
+
def document_object_id
|
|
809
|
+
result = page_command("Runtime.evaluate", expression: "document", returnByValue: false)
|
|
810
|
+
result.dig("result", "objectId")
|
|
811
|
+
end
|
|
812
|
+
|
|
813
|
+
def wait_for_page_load(url, retried:)
|
|
814
|
+
starting_url = safe_current_url
|
|
815
|
+
deadline = monotonic_time + @options.timeout
|
|
816
|
+
loaded = Utils::Event.new
|
|
817
|
+
|
|
818
|
+
handler = proc { loaded.set }
|
|
819
|
+
@client.on("Page.loadEventFired", &handler)
|
|
820
|
+
|
|
821
|
+
@client.command("Page.navigate", { url: url }, async: true, session_id: @session_id)
|
|
822
|
+
|
|
823
|
+
# Give loadEventFired a brief window (fast path), then fall back
|
|
824
|
+
# to readyState polling with the remaining budget.
|
|
825
|
+
unless loaded.wait([2, @options.timeout].min)
|
|
826
|
+
remaining = deadline - monotonic_time
|
|
827
|
+
poll_ready_state(remaining, loaded_event: loaded, starting_url: starting_url) if remaining.positive?
|
|
828
|
+
end
|
|
829
|
+
|
|
830
|
+
@client.off("Page.loadEventFired", handler)
|
|
831
|
+
handle_navigation_crash(url, deadline, retried: retried)
|
|
832
|
+
end
|
|
833
|
+
|
|
834
|
+
# Lightpanda may kill the WebSocket or crash during complex page
|
|
835
|
+
# navigation (lightpanda-io/browser#1849, #1854). Reconnect and
|
|
836
|
+
# retry once. If the retry also crashes, raise a clear error
|
|
837
|
+
# instead of leaving the client in a dead state.
|
|
838
|
+
def handle_navigation_crash(url, deadline, retried:)
|
|
839
|
+
if @client.closed? && !retried
|
|
840
|
+
begin
|
|
841
|
+
reconnect
|
|
842
|
+
remaining = deadline - monotonic_time
|
|
843
|
+
go_to(url, wait: remaining.positive?, retried: true) if remaining.positive?
|
|
844
|
+
rescue DeadBrowserError
|
|
845
|
+
raise
|
|
846
|
+
rescue StandardError
|
|
847
|
+
# reconnect itself failed (process won't restart, port stuck, etc.)
|
|
848
|
+
end
|
|
849
|
+
end
|
|
850
|
+
|
|
851
|
+
return unless @client.closed?
|
|
852
|
+
|
|
853
|
+
begin
|
|
854
|
+
reconnect
|
|
855
|
+
rescue StandardError
|
|
856
|
+
nil
|
|
857
|
+
end
|
|
858
|
+
raise DeadBrowserError, "Lightpanda crashed navigating to #{url}"
|
|
859
|
+
end
|
|
860
|
+
|
|
861
|
+
def close_client_silently
|
|
862
|
+
@client&.close
|
|
863
|
+
rescue StandardError
|
|
864
|
+
nil
|
|
865
|
+
end
|
|
866
|
+
|
|
867
|
+
def restart_process_if_dead
|
|
868
|
+
return unless @process && !@process.alive?
|
|
869
|
+
|
|
870
|
+
begin
|
|
871
|
+
@process.stop
|
|
872
|
+
rescue StandardError
|
|
873
|
+
nil
|
|
874
|
+
end
|
|
875
|
+
@process.start
|
|
876
|
+
end
|
|
877
|
+
|
|
878
|
+
def safe_current_url
|
|
879
|
+
current_url
|
|
880
|
+
rescue StandardError
|
|
881
|
+
nil
|
|
882
|
+
end
|
|
883
|
+
|
|
884
|
+
# Wait for a navigation triggered by the given block.
|
|
885
|
+
# Uses the same loadEventFired + readyState fallback as go_to.
|
|
886
|
+
def wait_for_navigation
|
|
887
|
+
enable_page_events
|
|
888
|
+
|
|
889
|
+
starting_url = safe_current_url
|
|
890
|
+
deadline = monotonic_time + @options.timeout
|
|
891
|
+
loaded = Utils::Event.new
|
|
892
|
+
handler = proc { loaded.set }
|
|
893
|
+
@client.on("Page.loadEventFired", &handler)
|
|
894
|
+
|
|
895
|
+
yield
|
|
896
|
+
|
|
897
|
+
unless loaded.wait([2, @options.timeout].min)
|
|
898
|
+
remaining = deadline - monotonic_time
|
|
899
|
+
poll_ready_state(remaining, loaded_event: loaded, starting_url: starting_url) if remaining.positive?
|
|
900
|
+
end
|
|
901
|
+
|
|
902
|
+
@client.off("Page.loadEventFired", handler)
|
|
903
|
+
end
|
|
904
|
+
|
|
905
|
+
# Poll document.readyState as a fallback when Page.loadEventFired
|
|
906
|
+
# doesn't fire. When starting_url is provided, the poll ignores
|
|
907
|
+
# readyState values from the old page (e.g. about:blank reports
|
|
908
|
+
# "complete" while the new page is still loading in the background).
|
|
909
|
+
def poll_ready_state(timeout, loaded_event: nil, starting_url: nil)
|
|
910
|
+
deadline = monotonic_time + timeout
|
|
911
|
+
# Use a short per-evaluation timeout because Lightpanda may block
|
|
912
|
+
# all commands while navigating. Without this, a single evaluate()
|
|
913
|
+
# call would consume the entire @options.timeout, making the poll
|
|
914
|
+
# loop effectively a single attempt.
|
|
915
|
+
poll_cmd_timeout = [timeout / 5.0, 2].max
|
|
916
|
+
|
|
917
|
+
loop do
|
|
918
|
+
break if loaded_event&.set?
|
|
919
|
+
break if @client.closed?
|
|
920
|
+
break if page_ready?(poll_cmd_timeout, starting_url)
|
|
921
|
+
break if monotonic_time > deadline
|
|
922
|
+
|
|
923
|
+
sleep 0.1
|
|
924
|
+
end
|
|
925
|
+
end
|
|
926
|
+
|
|
927
|
+
POLL_STATE_JS = "(function(){return{r:document.readyState,u:location.href}})()"
|
|
928
|
+
|
|
929
|
+
def page_ready?(cmd_timeout, starting_url)
|
|
930
|
+
response = @client.command(
|
|
931
|
+
"Runtime.evaluate",
|
|
932
|
+
{ expression: POLL_STATE_JS, returnByValue: true, awaitPromise: true },
|
|
933
|
+
session_id: @session_id,
|
|
934
|
+
timeout: cmd_timeout
|
|
935
|
+
)
|
|
936
|
+
state = response.dig("result", "value")
|
|
937
|
+
return false unless state
|
|
938
|
+
|
|
939
|
+
url_changed = starting_url.nil? || state["u"] != starting_url
|
|
940
|
+
url_changed && %w[complete interactive].include?(state["r"])
|
|
941
|
+
rescue StandardError
|
|
942
|
+
false
|
|
943
|
+
end
|
|
944
|
+
|
|
945
|
+
def monotonic_time
|
|
946
|
+
::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
|
947
|
+
end
|
|
948
|
+
|
|
949
|
+
# Capture `scheme://host:port` from a navigated URL so Cookies#clear can
|
|
950
|
+
# enumerate cookies across all visited domains. Skips opaque URLs
|
|
951
|
+
# (about:blank, data:, etc.) and any URI parser failure.
|
|
952
|
+
def record_visited_origin(url)
|
|
953
|
+
uri = URI.parse(url)
|
|
954
|
+
return unless uri.scheme && uri.host
|
|
955
|
+
|
|
956
|
+
port = uri.port || (uri.scheme == "https" ? 443 : 80)
|
|
957
|
+
@visited_origins << "#{uri.scheme}://#{uri.host}:#{port}"
|
|
958
|
+
rescue URI::InvalidURIError, NoMethodError
|
|
959
|
+
nil
|
|
960
|
+
end
|
|
961
|
+
end
|
|
962
|
+
end
|
|
963
|
+
end
|