capybara-lightpanda 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Capybara
4
+ module Lightpanda
5
+ class Browser
6
+ # Navigation lifecycle: go_to / back / forward / refresh and the
7
+ # Page.loadEventFired + readyState-polling machinery behind them
8
+ # (the polling fallback is load-bearing — see CLAUDE.md).
9
+ module Navigation
10
+ # Navigation with readyState fallback.
11
+ #
12
+ # Lightpanda may never fire Page.loadEventFired on complex JS pages
13
+ # (lightpanda-io/browser#1801, #1832). When the event times out,
14
+ # we poll document.readyState as a fallback.
15
+ #
16
+ # Page.navigate is sent asynchronously because Lightpanda may not
17
+ # return the command result until the page is fully loaded (unlike
18
+ # Chrome which returns immediately with frameId/loaderId). If we
19
+ # waited synchronously, the readyState fallback would never be
20
+ # reached on pages that fail to fully load.
21
+ #
22
+ # Uses a single shared deadline so the worst-case wait is 1x timeout,
23
+ # not 2x (lightpanda-io/browser#1849).
24
+ def go_to(url, wait: true)
25
+ enable_page_events
26
+
27
+ if wait
28
+ wait_for_page_load(url)
29
+ else
30
+ page_command("Page.navigate", url: url)
31
+ end
32
+ end
33
+ alias goto go_to
34
+
35
+ def back
36
+ wait_for_navigation { navigate_history(-1) }
37
+ end
38
+
39
+ def forward
40
+ wait_for_navigation { navigate_history(+1) }
41
+ end
42
+
43
+ def refresh
44
+ wait_for_navigation { page_command("Page.reload") }
45
+ end
46
+ alias reload refresh
47
+
48
+ private
49
+
50
+ def wait_for_page_load(url, retried: false)
51
+ deadline = await_navigation do
52
+ @client.command("Page.navigate", { url: url }, async: true, session_id: @session_id)
53
+ end
54
+ handle_navigation_crash(url, deadline, retried: retried)
55
+ end
56
+
57
+ # Lightpanda may kill the WebSocket or crash during complex page
58
+ # navigation (lightpanda-io/browser#1849, #1854). Reconnect and
59
+ # retry once. If the retry also crashes, raise a clear error
60
+ # instead of leaving the client in a dead state.
61
+ def handle_navigation_crash(url, deadline, retried:)
62
+ if @client.closed? && !retried
63
+ begin
64
+ reconnect
65
+ remaining = deadline - monotonic_time
66
+ if remaining.positive?
67
+ # Equivalent of re-entering go_to without leaking the retry
68
+ # bookkeeping into its public signature. enable_page_events is
69
+ # needed again: reconnect's clear_session_state reset the flag.
70
+ enable_page_events
71
+ wait_for_page_load(url, retried: true)
72
+ end
73
+ rescue DeadBrowserError
74
+ raise
75
+ rescue StandardError
76
+ # reconnect itself failed (process won't restart, port stuck, etc.).
77
+ # Fall through to the raise below — a second immediate reconnect
78
+ # attempt would just duplicate the failure we already swallowed.
79
+ end
80
+ end
81
+
82
+ return unless @client.closed?
83
+
84
+ raise DeadBrowserError, "Lightpanda crashed navigating to #{url}"
85
+ end
86
+
87
+ def safe_current_url
88
+ current_url
89
+ rescue StandardError
90
+ nil
91
+ end
92
+
93
+ # Wait for a navigation triggered by the given block.
94
+ # Uses the same loadEventFired + readyState fallback as go_to.
95
+ def wait_for_navigation(&)
96
+ enable_page_events
97
+ await_navigation(&)
98
+ end
99
+
100
+ # Step the session history by `offset` (-1 = back, +1 = forward) using
101
+ # native CDP. `Page.getNavigationHistory` returns the entry list and
102
+ # `currentIndex`; `Page.navigateToHistoryEntry` jumps to the chosen
103
+ # entry's `id`. No-op when the offset would step past either end so
104
+ # the behavior matches `history.back()` / `history.forward()` on a
105
+ # bounded session history.
106
+ def navigate_history(offset)
107
+ history = page_command("Page.getNavigationHistory")
108
+ target_index = history["currentIndex"] + offset
109
+ entries = history["entries"]
110
+ return if target_index.negative? || target_index >= entries.length
111
+
112
+ page_command("Page.navigateToHistoryEntry", entryId: entries[target_index]["id"])
113
+ end
114
+
115
+ # Common navigation lifecycle shared by `wait_for_page_load` (fresh
116
+ # `Page.navigate`) and `wait_for_navigation` (back / forward / reload).
117
+ # Subscribes to Page.loadEventFired, runs the trigger, waits briefly for
118
+ # the event, falls back to readyState polling for the remaining budget.
119
+ # The handler is unsubscribed via `ensure` so a raising trigger doesn't
120
+ # leak a subscription onto the next navigation. Returns the deadline so
121
+ # the caller can decide whether to attempt crash recovery.
122
+ def await_navigation
123
+ starting_url = safe_current_url
124
+ deadline = monotonic_time + @options.timeout
125
+ loaded = Utils::Event.new
126
+ handler = proc { loaded.set }
127
+ @client.on("Page.loadEventFired", &handler)
128
+
129
+ begin
130
+ yield
131
+
132
+ unless loaded.wait([2, @options.timeout].min)
133
+ remaining = deadline - monotonic_time
134
+ poll_ready_state(remaining, loaded_event: loaded, starting_url: starting_url) if remaining.positive?
135
+ end
136
+ ensure
137
+ @client.off("Page.loadEventFired", handler)
138
+ end
139
+
140
+ deadline
141
+ end
142
+
143
+ # Poll document.readyState as a fallback when Page.loadEventFired
144
+ # doesn't fire (CLAUDE.md rules call this out as load-bearing — do
145
+ # not remove). When starting_url is provided, the poll ignores
146
+ # readyState values from the old page (e.g. about:blank reports
147
+ # "complete" while the new page is still loading in the background).
148
+ def poll_ready_state(timeout, loaded_event: nil, starting_url: nil)
149
+ # Use a short per-evaluation timeout because Lightpanda may block
150
+ # all commands while navigating. Without this, a single evaluate()
151
+ # call would consume the entire @options.timeout, making the poll
152
+ # loop effectively a single attempt.
153
+ poll_cmd_timeout = [timeout / 5.0, 2].max
154
+
155
+ Utils::Wait.until(timeout: timeout, interval: 0.1) do
156
+ loaded_event&.set? || @client.closed? || page_ready?(poll_cmd_timeout, starting_url)
157
+ end
158
+ rescue TimeoutError
159
+ # Expected — readyState fallback exhausted its budget. The caller
160
+ # (await_navigation) keeps going and lets handle_navigation_crash
161
+ # decide whether the session is recoverable.
162
+ end
163
+
164
+ POLL_STATE_JS = "(function(){return{r:document.readyState,u:location.href}})()"
165
+ private_constant :POLL_STATE_JS
166
+
167
+ def page_ready?(cmd_timeout, starting_url)
168
+ response = @client.command(
169
+ "Runtime.evaluate",
170
+ { expression: POLL_STATE_JS, returnByValue: true, awaitPromise: true },
171
+ session_id: @session_id,
172
+ timeout: cmd_timeout
173
+ )
174
+ state = response.dig("result", "value")
175
+ return false unless state
176
+
177
+ url_changed = starting_url.nil? || state["u"] != starting_url
178
+ url_changed && %w[complete interactive].include?(state["r"])
179
+ rescue Error
180
+ false
181
+ end
182
+ end
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,258 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Capybara
4
+ module Lightpanda
5
+ class Browser
6
+ # JS evaluation and RemoteObject plumbing: Runtime.evaluate /
7
+ # callFunctionOn dispatch, result serialization (Ferrum's
8
+ # Frame::Runtime is the peer-gem equivalent).
9
+ module Runtime
10
+ # Evaluate JS and return a serialized value.
11
+ # No-args fast path uses Runtime.evaluate; with args we wrap as a function
12
+ # and dispatch via Runtime.callFunctionOn so `arguments[i]` is bound.
13
+ # Both paths use `returnByValue: false` and unwrap so DOM-node returns
14
+ # come back as `{ "__lightpanda_node__" => ... }` for the Driver to wrap.
15
+ #
16
+ # The no-args path sends the user's text verbatim with `replMode: true`
17
+ # (V8's DevTools-console REPL mode — Lightpanda forwards Runtime.evaluate
18
+ # to the V8 inspector, which handles the flag natively). Without it,
19
+ # top-level `const`/`let` persist in the global lexical environment
20
+ # across classic scripts — per spec, and Chrome behaves identically —
21
+ # so a second `const sel = ...` raises `SyntaxError: Identifier 'sel'
22
+ # has already been declared`. REPL mode keeps the bindings (visible to
23
+ # later calls, like the DevTools console) but allows redeclaration.
24
+ # Completion-value semantics cover a bare expression (`'foo'`), a
25
+ # `throw` statement, and multi-statement scripts alike.
26
+ def evaluate(expression, *args)
27
+ if args.empty?
28
+ response = page_command("Runtime.evaluate", expression: expression, returnByValue: false,
29
+ awaitPromise: true, replMode: true)
30
+ raise_on_js_error!("evaluate", expression, response)
31
+
32
+ return unwrap_call_result(response["result"])
33
+ end
34
+
35
+ wrapped = "function() { return #{expression} }"
36
+ call_with_args(wrapped, args)
37
+ end
38
+
39
+ # Execute JS without returning a value.
40
+ #
41
+ # Like `evaluate`, the no-args path uses `replMode: true` so top-level
42
+ # `const`/`let` redeclarations across calls don't raise. Also raises
43
+ # on JS exceptions so silent failures don't mask test bugs (the
44
+ # previous fast path swallowed them because `awaitPromise: false` was
45
+ # checked but `exceptionDetails` was not).
46
+ def execute(expression, *args)
47
+ if args.empty?
48
+ response = page_command("Runtime.evaluate", expression: expression, returnByValue: false,
49
+ awaitPromise: false, replMode: true)
50
+ raise_on_js_error!("execute", expression, response)
51
+ return nil
52
+ end
53
+
54
+ wrapped = "function() { #{expression} }"
55
+ call_with_args(wrapped, args, return_by_value: false)
56
+ nil
57
+ end
58
+
59
+ # Single home for the exceptionDetails check on Runtime responses:
60
+ # optional LIGHTPANDA_DEBUG dump, then JavaScriptError.
61
+ def raise_on_js_error!(site, expression, response)
62
+ return unless response["exceptionDetails"]
63
+
64
+ debug_js_failure(site, expression, response)
65
+ raise JavaScriptError, response
66
+ end
67
+
68
+ # When LIGHTPANDA_DEBUG=1 is set, log the JS expression and full CDP
69
+ # response for every JsException to STDERR. Invaluable for isolating
70
+ # which exact JS triggers an upstream Lightpanda bug.
71
+ def debug_js_failure(site, expression, response)
72
+ return unless ENV["LIGHTPANDA_DEBUG"]
73
+
74
+ warn "[lightpanda:#{site}] expression:\n#{expression}\n[lightpanda:#{site}] response:\n#{response.inspect}\n"
75
+ end
76
+
77
+ # Evaluate async JS with a callback. The user's script receives
78
+ # the callback as its last argument (`arguments[arguments.length - 1]`),
79
+ # matching Capybara's evaluate_async_script contract.
80
+ def evaluate_async(expression, *args, wait: @options.timeout)
81
+ timeout_ms = (wait * 1000).to_i
82
+ wrapped = <<~JS
83
+ function() {
84
+ var __args = Array.prototype.slice.call(arguments);
85
+ return new Promise(function(__resolve, __reject) {
86
+ var __timer = setTimeout(function() {
87
+ __reject(new Error('Async script timeout after #{timeout_ms}ms'));
88
+ }, #{timeout_ms});
89
+ var __done = function(val) { clearTimeout(__timer); __resolve(val); };
90
+ __args.push(__done);
91
+ (function() { #{expression} }).apply(null, __args);
92
+ });
93
+ }
94
+ JS
95
+ call_with_args(wrapped, args)
96
+ end
97
+
98
+ # Evaluate JS and return a RemoteObject reference (for DOM nodes, arrays).
99
+ def evaluate_with_ref(expression)
100
+ response = page_command("Runtime.evaluate", expression: expression, returnByValue: false, awaitPromise: true)
101
+ raise_on_js_error!("evaluate_with_ref", expression, response)
102
+
103
+ result = response["result"]
104
+ return nil if result["type"] == "undefined"
105
+
106
+ result
107
+ end
108
+
109
+ # Call a function on a remote object via Runtime.callFunctionOn.
110
+ # Binds `this` to the DOM element referenced by remote_object_id.
111
+ def call_function_on(remote_object_id, function_declaration, *args, return_by_value: true)
112
+ params = {
113
+ objectId: remote_object_id,
114
+ functionDeclaration: function_declaration,
115
+ returnByValue: return_by_value,
116
+ awaitPromise: true,
117
+ }
118
+ params[:arguments] = args.map { |a| serialize_argument(a) } unless args.empty?
119
+
120
+ response = page_command("Runtime.callFunctionOn", **params)
121
+ raise_on_js_error!("call_function_on", function_declaration, response)
122
+
123
+ result = response["result"]
124
+ return nil if result["type"] == "undefined"
125
+
126
+ return_by_value ? result["value"] : result
127
+ end
128
+
129
+ # Get properties of a remote object (used to extract array elements).
130
+ def get_object_properties(remote_object_id)
131
+ page_command("Runtime.getProperties", objectId: remote_object_id, ownProperties: true)
132
+ end
133
+
134
+ # Release a remote object reference to free V8 memory. Cleanup is
135
+ # best-effort: callers wrap their work in `ensure release_object(...)`,
136
+ # so a TimeoutError or transport hiccup here must not propagate out of
137
+ # the ensure block and bury the original failure.
138
+ def release_object(remote_object_id)
139
+ page_command("Runtime.releaseObject", objectId: remote_object_id)
140
+ rescue Error
141
+ # Object may already be released, context destroyed, or the CDP call
142
+ # itself timed out / failed in transport.
143
+ end
144
+
145
+ private
146
+
147
+ def serialize_argument(arg)
148
+ if arg.respond_to?(:remote_object_id)
149
+ { objectId: arg.remote_object_id }
150
+ else
151
+ { value: arg }
152
+ end
153
+ end
154
+
155
+ # Extract the by-value result of an already-issued Runtime call.
156
+ def handle_evaluate_response(response, expression)
157
+ raise_on_js_error!("handle_evaluate_response", expression, response)
158
+
159
+ result = response["result"]
160
+ return nil if result["type"] == "undefined"
161
+
162
+ result["value"]
163
+ end
164
+
165
+ # Run a wrapped function via Runtime.callFunctionOn with `arguments` bound.
166
+ # `args` is converted via `serialize_argument` (Nodes → objectId, scalars → value).
167
+ # When `return_by_value: false` (the default) the return value is unwrapped via
168
+ # `unwrap_call_result` so that DOM nodes come back as `{ "__lightpanda_node__" => ... }`
169
+ # hashes the Driver can wrap as Capybara nodes.
170
+ def call_with_args(function_declaration, args, return_by_value: false)
171
+ # document_object_id returns a fresh RemoteObject handle every call.
172
+ # Release it on the way out so long-running shared-spec sessions don't
173
+ # accumulate orphaned V8 handles between resets.
174
+ doc_oid = document_object_id
175
+ params = {
176
+ objectId: doc_oid,
177
+ functionDeclaration: function_declaration,
178
+ returnByValue: return_by_value,
179
+ awaitPromise: true,
180
+ arguments: args.map { |a| serialize_argument(a) },
181
+ }
182
+ response = page_command("Runtime.callFunctionOn", **params)
183
+ raise_on_js_error!("call_with_args", function_declaration, response)
184
+
185
+ return unwrap_call_result(response["result"]) unless return_by_value
186
+
187
+ handle_evaluate_response(response, function_declaration)
188
+ ensure
189
+ release_object(doc_oid) if doc_oid
190
+ end
191
+
192
+ # Translate a non-by-value Runtime result into a plain Ruby value, surfacing
193
+ # DOM nodes as `{ "__lightpanda_node__" => "..." }` so the Driver can wrap
194
+ # them. The sentinel key (rather than a plain "objectId") prevents
195
+ # misclassifying user JS that legitimately returns `{ objectId: "x" }`.
196
+ #
197
+ # When the result carries an objectId we can't unwrap (function, regexp,
198
+ # date, …), release the handle before falling back to `result["value"]`
199
+ # so V8 doesn't accumulate orphaned references across long sessions.
200
+ def unwrap_call_result(result)
201
+ return nil if result["type"] == "undefined"
202
+ return nil if result["subtype"] == "null"
203
+
204
+ object_id = result["objectId"]
205
+ if object_id
206
+ return { NODE_MARKER => object_id } if result["subtype"] == "node"
207
+ return serialize_remote_array(object_id) if result["subtype"] == "array"
208
+ return serialize_remote_object(object_id) if result["type"] == "object"
209
+
210
+ release_object(object_id)
211
+ end
212
+
213
+ result["value"]
214
+ end
215
+
216
+ # Re-fetch a remote object as JSON-serializable value for plain objects/arrays.
217
+ # Cheaper than walking properties and good enough for shared specs. Releases
218
+ # the original handle so long-lived sessions don't accumulate leaked objectIds.
219
+ def serialize_remote_object(object_id)
220
+ json = page_command(
221
+ "Runtime.callFunctionOn",
222
+ objectId: object_id,
223
+ functionDeclaration: "function() { return this }",
224
+ returnByValue: true
225
+ )
226
+ handle_evaluate_response(json, "function() { return this }")
227
+ ensure
228
+ release_object(object_id)
229
+ end
230
+
231
+ # Walk an array's own indexed properties via `Runtime.getProperties`,
232
+ # unwrapping each element through the regular result pipeline so that
233
+ # DOM-node entries surface as `{ "__lightpanda_node__" => ... }` instead
234
+ # of being flattened to `{}` by `returnByValue: true`. Releases the
235
+ # outer array's objectId once we've harvested its elements.
236
+ def serialize_remote_array(object_id)
237
+ properties = get_object_properties(object_id).fetch("result", [])
238
+ properties
239
+ .select { |p| p["enumerable"] && p["name"] =~ /\A\d+\z/ }
240
+ .sort_by { |p| p["name"].to_i }
241
+ .map { |p| unwrap_call_result(p["value"] || {}) }
242
+ ensure
243
+ release_object(object_id)
244
+ end
245
+
246
+ # objectId of `document`, used as the `this` context for callFunctionOn when
247
+ # we need `arguments` binding but don't care about `this`. Re-resolved per
248
+ # call because the document objectId is invalidated by navigation.
249
+ def document_object_id
250
+ result = page_command("Runtime.evaluate", expression: "document", returnByValue: false)
251
+ result.dig("result", "objectId")
252
+ end
253
+
254
+ private :raise_on_js_error!, :debug_js_failure, :get_object_properties
255
+ end
256
+ end
257
+ end
258
+ end