capybara-simulated 0.0.6 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +303 -158
  3. data/lib/capybara/simulated/asset_cache.rb +232 -0
  4. data/lib/capybara/simulated/browser.rb +3409 -845
  5. data/lib/capybara/simulated/driver.rb +341 -134
  6. data/lib/capybara/simulated/errors.rb +9 -5
  7. data/lib/capybara/simulated/js/bridge.bundle.js +19409 -0
  8. data/lib/capybara/simulated/js/snapshot_stubs.js +110 -0
  9. data/lib/capybara/simulated/node.rb +151 -163
  10. data/lib/capybara/simulated/quickjs_runtime.rb +424 -0
  11. data/lib/capybara/simulated/runtime_shared.rb +183 -0
  12. data/lib/capybara/simulated/script_cache.rb +168 -0
  13. data/lib/capybara/simulated/sourcemap.rb +119 -0
  14. data/lib/capybara/simulated/stack_resolver.rb +97 -0
  15. data/lib/capybara/simulated/trace.rb +111 -0
  16. data/lib/capybara/simulated/v8_runtime.rb +987 -0
  17. data/lib/capybara/simulated/version.rb +3 -1
  18. data/lib/capybara/simulated/webauthn_state.rb +367 -0
  19. data/lib/capybara/simulated/whitespace_normalizer.rb +45 -0
  20. data/lib/capybara/simulated/worker_runtime.rb +30 -0
  21. data/lib/capybara/simulated.rb +31 -4
  22. data/lib/capybara-simulated.rb +2 -0
  23. data/vendor/js/vendor.bundle.js +13 -0
  24. metadata +24 -32
  25. data/vendor/esbuild-wasm/LICENSE.md +0 -21
  26. data/vendor/esbuild-wasm/bin/esbuild +0 -91
  27. data/vendor/esbuild-wasm/esbuild.wasm +0 -0
  28. data/vendor/esbuild-wasm/lib/main.js +0 -2337
  29. data/vendor/esbuild-wasm/wasm_exec.js +0 -575
  30. data/vendor/esbuild-wasm/wasm_exec_node.js +0 -40
  31. data/vendor/js/bundle-modules.mjs +0 -168
  32. data/vendor/js/csim.bundle.js +0 -91560
  33. data/vendor/js/entry.mjs +0 -23
  34. data/vendor/js/prelude.js +0 -186
  35. data/vendor/js/runtime.js +0 -2174
@@ -0,0 +1,424 @@
1
+ # frozen_string_literal: true
2
+
3
+ # QuickJS-backed Runtime, alternate to `V8Runtime`. The DOM still lives
4
+ # in JS (same bridge.js, same vendor bundle); this class swaps the engine to
5
+ # trade JIT speed for ~10× smaller per-VM footprint — useful when the
6
+ # scaling target is "many parallel workers on a fixed RAM budget"
7
+ # rather than absolute per-spec wall time.
8
+ #
9
+ # Surface mirrors `V8Runtime` exactly: `eval` / `call` / `drain_timers`
10
+ # / `drain_microtasks` / `settle_gen` / `has_ready_timer?` /
11
+ # `reset_timers` / `rebuild_ctx` / `reset_page`. Browser code is
12
+ # engine-agnostic.
13
+
14
+ require 'digest'
15
+ require 'quickjs'
16
+
17
+ require_relative 'runtime_shared'
18
+ require_relative 'worker_runtime'
19
+
20
+ module Capybara
21
+ module Simulated
22
+ class QuickJSRuntime
23
+ # Compile the vendor bundle + bridge.js into bytecode once per process.
24
+ # Every per-visit VM replays this in ~10–20 ms (PR 31's microbench: 504KB
25
+ # bundle in ~4 ms; vendor + bridge is ~10× larger). Side effects (class
26
+ # definitions, the xpathway `Document.prototype.evaluate` install) run on
27
+ # each new VM — `compile` itself is pure (`COMPILE_ONLY` flag).
28
+ @@bridge_lock = Mutex.new
29
+ @@bridge_runnable = nil
30
+
31
+ def self.bridge_runnable
32
+ @@bridge_lock.synchronize { @@bridge_runnable ||= Quickjs::VM.new.compile(RuntimeShared.snapshot_src, filename: 'csim_bridge.js') }
33
+ end
34
+
35
+ # Process-wide cache of compiled `<script>` bodies (classic + ESM
36
+ # factory wrappers). Bridge.js routes each body through
37
+ # `__csim_runScript`; first encounter compiles into bytecode, every
38
+ # subsequent visit replays the cached `Runnable` against the
39
+ # current VM. The compile (PR 31 microbench: 504 KB → ~4 ms; Avo's
40
+ # bundle is ~10×) is the cost we're skipping.
41
+ #
42
+ # No size cap: typical app surface is a few hundred unique bodies
43
+ # (jQuery, Stimulus, Turbo, app bundle, per-page inlines). If a
44
+ # test suite generates pathological cardinality we can add LRU
45
+ # later — for now the parser-overhead saving dwarfs the cache RSS.
46
+ @@runnable_cache_lock = Mutex.new
47
+ @@runnable_cache = {}
48
+
49
+ # Sharing one compiler VM serialises compile calls, but compilation
50
+ # is CPU-bound C and parallel workers each have their own
51
+ # `QuickJSRuntime` class state (Ruby's `@@` is per-class, shared
52
+ # in-process). One compile-only VM is enough; creating a fresh
53
+ # `Quickjs::VM.new` per compile (~140 ms each for POLYFILL_INTL)
54
+ # would dwarf the compile itself.
55
+ @@compiler_lock = Mutex.new
56
+ @@compiler_vm = nil
57
+
58
+ def self.runnable_for(body, label)
59
+ key = Digest::SHA256.hexdigest(body)
60
+ cached = @@runnable_cache_lock.synchronize { @@runnable_cache[key] }
61
+ return cached if cached
62
+ fresh = @@compiler_lock.synchronize {
63
+ @@compiler_vm ||= Quickjs::VM.new
64
+ @@compiler_vm.compile(body, filename: label.to_s)
65
+ }
66
+ @@runnable_cache_lock.synchronize { @@runnable_cache[key] ||= fresh }
67
+ end
68
+
69
+ # Pre-warmed pool of bare `Quickjs::VM` instances. `Quickjs::VM.new`
70
+ # with `POLYFILL_INTL` is ~140 ms (FormatJS locale tables + IANA TZ
71
+ # bytecode); quickjs.rb #36 released the GVL during construction,
72
+ # so warmer threads build in parallel with the main thread.
73
+ # `build_vm` pops a pre-built VM and only pays for bridge replay +
74
+ # host-fn attach (~30 ms) on the hot path.
75
+ class VmPool
76
+ # 4 warmers × ~140 ms ≈ 28 VMs/sec — covers sustained demand for
77
+ # shared-spec-shaped runs. CAPACITY buffers short bursts before
78
+ # warmers backfill.
79
+ WARMER_COUNT = 4
80
+ CAPACITY = 6
81
+
82
+ def initialize(vm_options)
83
+ @vm_options = vm_options
84
+ @queue = SizedQueue.new(CAPACITY)
85
+ @threads = WARMER_COUNT.times.map {|i|
86
+ Thread.new { warmer_loop }.tap {|t| t.name = "csim-qjs-warmer-#{i}" }
87
+ }
88
+ end
89
+
90
+ def checkout = @queue.pop
91
+
92
+ # SizedQueue#close unblocks pushers + makes future pops return
93
+ # nil — necessary at process exit because a warmer mid-`VM.new`
94
+ # has the GVL released and would SEGV on interpreter teardown.
95
+ def shutdown
96
+ @queue.close
97
+ @threads.each {|t| t.join(2) }
98
+ end
99
+
100
+ private
101
+
102
+ def warmer_loop
103
+ loop { @queue.push(Quickjs::VM.new(**@vm_options)) }
104
+ rescue ClosedQueueError
105
+ # process exit
106
+ end
107
+ end
108
+
109
+ @@pool_lock = Mutex.new
110
+ @@pool = nil
111
+
112
+ def self.pool
113
+ @@pool_lock.synchronize { @@pool ||= VmPool.new(VM_OPTIONS) }
114
+ end
115
+
116
+ at_exit do
117
+ @@pool_lock.synchronize { @@pool&.shutdown }
118
+ rescue StandardError
119
+ # Best-effort at process exit.
120
+ end
121
+
122
+ def initialize(browser)
123
+ @browser = browser
124
+ @vm = nil
125
+ @runnable = self.class.bridge_runnable
126
+ self.class.pool # eager-start the warmers on first Browser
127
+ end
128
+
129
+ def eval(code)
130
+ v = vm
131
+ result = v.eval_code(code.to_s)
132
+ v.drain_jobs!
133
+ normalize(result)
134
+ end
135
+
136
+ # the V8 engine drains its microtask queue at
137
+ # the end of every call (V8's default microtask policy). QuickJS
138
+ # does not: `js_std_await` only pumps pending jobs while it's
139
+ # waiting for an actual Promise to resolve, and host-fn returns
140
+ # are plain values. Without a manual pump after every call,
141
+ # Promise.then chains queued during a host-fn body (Turbo's
142
+ # await fetch / Stimulus controllers, `evaluate_async_script`
143
+ # test scripts) stall until the next async boundary.
144
+ # `drain_jobs!` (quickjs.rb 0.18+) wraps `JS_ExecutePendingJob`
145
+ # in a loop to empty the queue, bounded by the VM's `timeout_msec`.
146
+ def call(name, *args)
147
+ v = vm
148
+ result = v.call(name.to_s, *args)
149
+ v.drain_jobs!
150
+ normalize(result)
151
+ end
152
+
153
+ # bridge.js owns the virtual clock; we drive it from Ruby because
154
+ # Capybara's polling cadence is wall-clock-anchored.
155
+ def drain_timers(max_ms = nil)
156
+ max_ms.nil? ? vm.call('__drainTimers') : vm.call('__drainTimers', max_ms.to_i)
157
+ end
158
+
159
+ # One event-loop step; returns `{ 'fired', 'gen', 'dirtied' }` (see
160
+ # V8Runtime#run_loop_step). `dirtied` = settleGen changed during the step.
161
+ def run_loop_step(max_ms, max_iter = 10_000, yield_on_gen: false)
162
+ r = vm.call('__runLoopStep', max_ms.to_i, max_iter.to_i, !!yield_on_gen)
163
+ r.is_a?(Hash) ? r : { 'fired' => 0, 'gen' => 0, 'dirtied' => false }
164
+ end
165
+
166
+ # `drain_jobs!` loops to queue-empty — one call is a full checkpoint,
167
+ # same contract as `V8Runtime#drain_microtasks`.
168
+ def drain_microtasks
169
+ vm.drain_jobs!
170
+ end
171
+
172
+ # No binary marshaler: QuickJS reinterprets high-bit bytes as UTF-8 and
173
+ # corrupts them, so binary payloads cross as base64 and the JS shim's
174
+ # `fetchedToBytes` atob's them back (see Browser#transfer_buffer_fetch_for_js).
175
+ def wrap_binary(bytes)
176
+ Base64.strict_encode64(bytes)
177
+ end
178
+
179
+ def settle_gen
180
+ vm.call('__settleGenGet').to_i
181
+ end
182
+
183
+ def has_ready_timer?
184
+ return false if @vm.nil?
185
+ !!vm.call('__hasReadyTimer')
186
+ end
187
+
188
+ # Delay (ms) until the nearest scheduled timer relative to the virtual
189
+ # clock, or -1 if none. Drives the horizon-gated fast-forward in
190
+ # `Browser#tick_real_time`.
191
+ def next_timer_delay_ms
192
+ return -1 if @vm.nil?
193
+ vm.call('__nextTimerDelay').to_i
194
+ end
195
+
196
+ def reset_timers
197
+ return if @vm.nil?
198
+ vm.call('__resetTimers')
199
+ end
200
+
201
+ # Tear down the current VM and build a fresh one from the
202
+ # precompiled bytecode. Partial in-VM resets carry the same
203
+ # library-init-leak hazards V8Runtime documents.
204
+ #
205
+ # We don't `@vm&.dispose!` before swapping: per-visit rebuilds
206
+ # happen on every spec example, and `dispose!` blocks on the
207
+ # quickjs GC running with the GVL held. Ruby GC will eventually
208
+ # reach the unreferenced VM and the gem's dfree handler frees
209
+ # the JSRuntime. The transient C-heap growth between GCs is the
210
+ # tradeoff for not paying ~hundreds of ms per spec.
211
+ def rebuild_ctx
212
+ @vm = build_vm
213
+ end
214
+
215
+ # Same operation as `rebuild_ctx` since per-visit rebuilds are
216
+ # already the inter-test reset point.
217
+ def reset_page = rebuild_ctx
218
+
219
+ # bridge.js patches `Intl.DateTimeFormat`; rusty_racer ships ICU
220
+ # built-in but QuickJS gates it behind a polyfill flag. Other JS
221
+ # surfaces bridge.js touches (URL / TextEncoder / atob/btoa /
222
+ # crypto) are already routed through Ruby-side host fns, so
223
+ # POLYFILL_INTL is the only one we strictly need.
224
+ #
225
+ # `max_stack_size: 0` — `JS_SetMaxStackSize` measures C stack
226
+ # delta from runtime construction; Ruby callers reach QuickJS
227
+ # through deep stacks (Capybara `synchronize` + RSpec matchers +
228
+ # bridge.js's class init closures), so the default 4 MB trips on
229
+ # routine `check_stale → __csimAlive` calls. `0` disables the
230
+ # check; OS thread stack is the real ceiling.
231
+ #
232
+ # `timeout_msec: (2**31)-1` — quickjs.rb default eval timeout is
233
+ # 100 ms; bridge.js's `__csimEvaluateXPath` / `__csimDispatchEvent`
234
+ # chains routinely exceed that on Avo-scale documents under
235
+ # QuickJS's interpreter. 0 means "interrupt immediately" (the
236
+ # handler returns `elapsed >= limit_ms`, so 0 fires on the first
237
+ # check), so practical no-limit.
238
+ VM_OPTIONS = {
239
+ features: [Quickjs::POLYFILL_INTL].freeze,
240
+ max_stack_size: 0,
241
+ # quickjs.rb's 128 MB default trips "out of memory in regexp
242
+ # execution" on class-attribute-heavy polls and the heaviest
243
+ # Mastodon hydrate (cumulative heap, not a single allocation).
244
+ # 512 MB clears the ceiling without idle cost — `JS_SetMemoryLimit`
245
+ # is a malloc ceiling, not a reservation.
246
+ memory_limit: 512 * 1024 * 1024,
247
+ # `drain_jobs!` loops `JS_ExecutePendingJob` until the
248
+ # queue empties — but Forem's article-feed render schedules
249
+ # new microtasks faster than they drain, so without a timer
250
+ # the call never returns. Real per-spec eval rarely runs over
251
+ # a second, and a 30 s ceiling is far below "hung CI worker"
252
+ # while leaving headroom for the heaviest Mastodon hydrate.
253
+ timeout_msec: 30_000
254
+ }.freeze
255
+
256
+ # Evaluates `url` as an ES module. For external `<script
257
+ # type="module" src="…">`, pass `src=nil` so QuickJS goes through
258
+ # `module_loader` to fetch — the URL becomes the module's
259
+ # identity. For inline `<script type="module">{…}</script>`,
260
+ # pass the body as `src` and let QuickJS compile it inline (the
261
+ # synthesised `#inline-…` URL becomes the module's identity, but
262
+ # transitive imports still go through `module_loader`).
263
+ #
264
+ # `quickjs.rb`'s `vm.import` distinguishes these by which keyword
265
+ # arg you pass: `filename:` alone → loader fetch; `from:` alone →
266
+ # inline compile. Passing both makes the gem ignore the body.
267
+ def eval_esm_module(url, src = nil)
268
+ v = vm
269
+ opts = src ? { from: src.to_s } : { filename: url.to_s }
270
+ opts[:code_to_expose] = ''
271
+ v.import("* as __csim_entry_#{rand(1 << 32)}", **opts)
272
+ v.drain_jobs!
273
+ end
274
+
275
+ private
276
+
277
+ def vm
278
+ @vm ||= build_vm
279
+ end
280
+
281
+ def build_vm
282
+ v = self.class.pool.checkout
283
+ @runnable.run(on: v)
284
+ attach_host_fns(v)
285
+ attach_module_loader(v)
286
+ attach_rejection_tracker(v)
287
+ v.eval_code('__csim_installWorker();')
288
+ v
289
+ end
290
+
291
+ def attach_host_fns(v)
292
+ self.class.attach_host_fns(v, @browser)
293
+ # Re-enter the same VM to run the body. `Runnable` is portable
294
+ # bytecode (no scope binding); replaying on `v` evaluates at
295
+ # globalThis, matching `(0, eval)(body)` semantics that
296
+ # bridge.js previously used directly. Script-throwing errors
297
+ # propagate as JS exceptions for bridge.js's caller-side
298
+ # try/catch.
299
+ v.define_function('__csim_runScript') {|label, body|
300
+ self.class.runnable_for(body.to_s, label).run(on: v)
301
+ nil
302
+ }
303
+ # Native-ESM entry. `bridge.js`'s `runModuleScript` calls this
304
+ # for every `<script type="module">`; V8 registers it too via
305
+ # `V8Runtime#attach_native_module_loader`.
306
+ browser = @browser
307
+ v.define_function('__csim_evalEsmEntry') {|url, inline_src|
308
+ RuntimeShared.safe_call { browser.eval_esm_module(url, inline_src) }
309
+ nil
310
+ }
311
+ end
312
+
313
+ # Class-level attach so Worker isolates (per-thread VMs that
314
+ # don't have a Runtime instance) reuse the same host-fn table.
315
+ def self.attach_host_fns(v, browser)
316
+ RuntimeShared::BROWSER_HOST_FNS.each {|name, body|
317
+ v.define_function(name) {|*a| RuntimeShared.safe_call { body.call(browser, *a) } }
318
+ }
319
+ RuntimeShared::STDLIB_HOST_FNS.each {|name, body|
320
+ v.define_function(name, &body)
321
+ }
322
+ # `dispatchEventForUserAction` calls this between listener
323
+ # invocations. `drain_jobs!` loops `JS_ExecutePendingJob` until
324
+ # the queue is empty, matching V8's
325
+ # `MicrotasksScope::PerformCheckpoint`. Older quickjs.rb
326
+ # without `drain_jobs!` falls back to a no-op.
327
+ if v.respond_to?(:drain_jobs!)
328
+ v.define_function('__csim_yield') { v.drain_jobs!; nil }
329
+ else
330
+ v.define_function('__csim_yield') { nil }
331
+ end
332
+ end
333
+
334
+ # Worker-isolate factory: fresh VM, bridge bytecode replayed,
335
+ # host fns attached *after* the replay (so snapshot_stubs.js's
336
+ # no-ops don't overwrite real ones), `__csim_isWorker` set, +
337
+ # the per-worker postMessage routed through `post_back`.
338
+ def self.build_worker(browser, post_back)
339
+ vm = Quickjs::VM.new(**VM_OPTIONS)
340
+ bridge_runnable.run(on: vm)
341
+ attach_host_fns(vm, browser)
342
+ vm.define_function('__csim_workerPostMessage') {|data| post_back.call(data); nil }
343
+ # Override main's __setTimersActive so worker's empty-timer-map
344
+ # flip doesn't race main's `polling?` gate. See v8_runtime's
345
+ # build_worker for the long-form rationale.
346
+ vm.define_function('__setTimersActive') {|_flag| nil }
347
+ vm.eval_code('__csim_installWorkerScope();')
348
+ vm.drain_jobs!
349
+ WorkerRuntime.new(
350
+ eval_fn: ->(s) { v = vm.eval_code(s.to_s); vm.drain_jobs!; v },
351
+ call_fn: ->(n, *a) { v = vm.call(n.to_s, *a); vm.drain_jobs!; v },
352
+ drain_microtasks: -> { vm.drain_jobs! },
353
+ drain_timers: -> { vm.call('__drainTimers', 50) },
354
+ has_ready_timer: -> { !!vm.call('__hasReadyTimer') },
355
+ # quickjs.rb has no explicit dispose; GC reclaims the VM.
356
+ dispose: -> { nil }
357
+ )
358
+ end
359
+
360
+ # QuickJS's native ESM loader. The Ruby block returns the source
361
+ # for each module URL; QuickJS handles parsing, live bindings,
362
+ # `import.meta`, and `import()` natively. quickjs.rb 0.18 passes
363
+ # the raw specifier + importer URL. Bare specifiers go through
364
+ # `Browser#resolve_module_specifier` so importmap entries
365
+ # (Stimulus / unbundled apps) resolve correctly; relative paths
366
+ # resolve against the importer. `nil` from `rack_fetch_body`
367
+ # propagates to QuickJS which raises a ReferenceError mirroring
368
+ # a real-browser 404.
369
+ def attach_module_loader(v)
370
+ browser = @browser
371
+ v.module_loader = ->(specifier, importer) {
372
+ resolved = browser.resolve_module_specifier(specifier, importer)
373
+ body = browser.rack_fetch_body(resolved)
374
+ return nil unless body
375
+ # `.json` (and `?import` JSON) imports come from Vite's
376
+ # `import.meta.glob` and `import x from './data.json'`
377
+ # patterns. quickjs.rb's loader passes the source through
378
+ # `JS_EVAL_TYPE_MODULE` regardless of extension, so we wrap
379
+ # JSON bodies in `export default …` ourselves. Real
380
+ # browsers gate on `with { type: 'json' }`; Vite's bundler
381
+ # output already injected that, but the resulting module
382
+ # source is just the raw JSON so we need the wrap either way.
383
+ code = resolved.to_s.match?(/\.json(?:\?|$)/) ? "export default #{body};" : body
384
+ # Return `{ code:, as: }` so QuickJS caches by the resolved
385
+ # absolute URL — necessary for subsequent relative imports
386
+ # to use this URL as their importer, not the raw specifier.
387
+ {code: code, as: resolved.to_s}
388
+ }
389
+ end
390
+
391
+ # Funnel unhandled Promise rejections into `console.error` so
392
+ # they show up in trace output / user-installed `log_console`
393
+ # overrides. Without this, an async chain that rejects without
394
+ # a `.catch` disappears silently — the same class of bug that
395
+ # cost half a session debugging Intl.Collator (the throw fired
396
+ # inside a module body we couldn't see).
397
+ def attach_rejection_tracker(v)
398
+ browser = @browser
399
+ v.on_unhandled_rejection do |reason|
400
+ msg = "#{reason.class}: #{reason.message}"
401
+ stack = reason.backtrace&.any? ? "\n#{reason.backtrace.first(20).join("\n")}" : ''
402
+ browser.log_console('error', "unhandled rejection: #{msg}#{stack}")
403
+ end
404
+ end
405
+
406
+ # QuickJS marshals JS `undefined` as the symbol
407
+ # `Quickjs::Value::UNDEFINED`; rusty_racer marshals it as `nil`. The
408
+ # rest of the gem expects `nil`, so normalize at the boundary.
409
+ # NaN gets the same treatment for consistency (the bridge never
410
+ # surfaces it as a load-bearing value).
411
+ UNDEFINED = Quickjs::Value::UNDEFINED
412
+ NAN = Quickjs::Value::NAN
413
+
414
+ def normalize(value)
415
+ case value
416
+ when UNDEFINED, NAN then nil
417
+ when Hash then value.transform_values {|v| normalize(v) }
418
+ when Array then value.map {|v| normalize(v) }
419
+ else value
420
+ end
421
+ end
422
+ end
423
+ end
424
+ end
@@ -0,0 +1,183 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'base64'
4
+ require 'openssl'
5
+ require 'securerandom'
6
+
7
+ require_relative 'webauthn_state'
8
+
9
+
10
+ module Capybara
11
+ module Simulated
12
+ # Bits common to `V8Runtime` and `QuickJSRuntime` — JS asset paths,
13
+ # the host-fn table that bridge.js reaches back through, the
14
+ # error-swallowing wrapper. Each engine plugs the table into its
15
+ # own attach API (rusty_racer's `Context#attach` vs quickjs.rb's
16
+ # `Quickjs::VM#define_function`).
17
+ module RuntimeShared
18
+ BRIDGE_JS = File.expand_path('js/bridge.bundle.js', __dir__).freeze
19
+ SNAPSHOT_STUBS_JS = File.expand_path('js/snapshot_stubs.js', __dir__).freeze
20
+ VENDOR_BUNDLE_JS = File.expand_path('../../../vendor/js/vendor.bundle.js', __dir__).freeze
21
+
22
+ def self.snapshot_stubs_src = File.read(SNAPSHOT_STUBS_JS)
23
+ def self.bridge_src = File.read(BRIDGE_JS)
24
+ def self.vendor_bundle_src = File.read(VENDOR_BUNDLE_JS)
25
+
26
+ # Combined source baked into the V8 Snapshot / QuickJS bytecode.
27
+ # Order matters: stubs first (so bridge's IIFE can reference the
28
+ # `globalThis.__rackFetch` etc. slots), then the vendor bundle
29
+ # (so bridge can reference `globalThis.__csimVendor.cssSelect` and
30
+ # `.xpathway`), then bridge proper — which installs the xpathway-backed
31
+ # `Document.prototype.evaluate` itself (see js/src/xpath.js). The
32
+ # standalone xpathway engine in the vendor blob replaces the old wgxpath.
33
+ def self.snapshot_src
34
+ snapshot_stubs_src +
35
+ vendor_bundle_src + ";\n" +
36
+ bridge_src
37
+ end
38
+
39
+ # Host fns whose body touches `Browser` — wrap with `safe_call`
40
+ # so a Ruby-side bug in the Browser path doesn't propagate as a
41
+ # JS exception that crashes the whole script chain. Bodies take
42
+ # `(browser, *js_args)` and return whatever the JS caller expects.
43
+ BROWSER_HOST_FNS = {
44
+ '__rackFetch' => ->(b, *a) { b.rack_fetch(a[0], a[1], a[2], a[3], a[4]) },
45
+ '__csimExternalAsset' => ->(b, *a) { b.external_asset_source(a[0]) },
46
+ '__locationAssign' => ->(b, *a) { b.location_assign(a[0]); nil },
47
+ '__locationReload' => ->(b, *_) { b.location_reload; nil },
48
+ '__setTimersActive' => ->(b, *a) { b.timers_active = !!a[0]; nil },
49
+ '__setCurrentUrl' => ->(b, *a) { b.history_state(a[0], a[1]); nil },
50
+ '__pushHistoryEntry' => ->(b, *a) { b.history_push(a[0], a[1]); nil },
51
+ '__historyGo' => ->(b, *a) { b.history_go(a[0]); nil },
52
+ '__historyLength' => ->(b, *_) { b.history_length },
53
+ '__csimReadFilePick' => ->(b, *a) { b.read_file_pick(a[0], a[1], a[2], a[3]) },
54
+ '__getDocumentCookie' => ->(b, *_) { b.document_cookie },
55
+ '__setDocumentCookie' => ->(b, *a) { b.write_document_cookie(a[0].to_s); nil },
56
+ '__getDocumentReferrer' => ->(b, *_) { b.current_referer },
57
+ '__csim_storageGet' => ->(b, *a) { b.storage_get(a[0], a[1]) },
58
+ '__csim_storageSet' => ->(b, *a) { b.storage_set(a[0], a[1], a[2]); nil },
59
+ '__csim_storageRemove' => ->(b, *a) { b.storage_remove(a[0], a[1]); nil },
60
+ '__csim_storageClear' => ->(b, *a) { b.storage_clear(a[0]); nil },
61
+ '__csim_storageKey' => ->(b, *a) { b.storage_key(a[0], a[1]) },
62
+ '__csim_storageLength' => ->(b, *a) { b.storage_length(a[0]) },
63
+ '__csimGeolocationState' => ->(b, *_) { b.geolocation_state_json },
64
+ '__modalDialog' => ->(b, *a) { b.handle_modal(a[0], a[1], a[2]) },
65
+ '__csim_pushImportmap' => ->(b, *a) { b.set_importmap(a[0]); nil },
66
+ '__csim_logConsole' => ->(b, *a) { b.log_console(a[0], a[1]); nil },
67
+ '__csim_eventSourceOpen' => ->(b, *a) { b.event_source_open(a[0]) },
68
+ '__csim_eventSourceClose' => ->(b, *a) { b.event_source_close(a[0]); nil },
69
+ '__csim_rackFetchAsync' => ->(b, *a) { b.rack_fetch_async(a[0], a[1], a[2], a[3]) },
70
+ '__csim_rackFetchAsyncAbort' => ->(b, *a) { b.rack_fetch_async_abort(a[0]); nil },
71
+ '__csim_workerSpawn' => ->(b, *a) { b.worker_spawn(a[0]) },
72
+ '__csim_workerPostToWorker' => ->(b, *a) { b.worker_post_to_worker(a[0], a[1]); nil },
73
+ '__csim_workerTerminate' => ->(b, *a) { b.worker_terminate(a[0]); nil },
74
+ '__csim_decodeImage' => ->(b, *a) { b.decode_image(a[0], a[1], a[2]) },
75
+ '__csim_blobRegister' => ->(b, *a) { b.blob_register(a[0], a[1]); nil },
76
+ '__csim_blobResolve' => ->(b, *a) { b.blob_resolve(a[0]) },
77
+ '__csim_blobUnregister' => ->(b, *a) { b.blob_unregister(a[0]); nil },
78
+ '__csim_transferStash' => ->(b, *a) { b.transfer_buffer_stash(a[0]) },
79
+ '__csim_transferFetch' => ->(b, *a) { b.transfer_buffer_fetch_for_js(a[0]) },
80
+ '__csim_decodeVideoFrame' => ->(b, *a) { b.decode_video_frame(a[0]) },
81
+ '__csim_encodeImage' => ->(b, *a) { b.encode_image(a[0], a[1], a[2], a[3], a[4]) },
82
+ # WebAuthn create / get raise `WebauthnState::Error` carrying
83
+ # the DOMException name (`InvalidStateError`, …); rescue here
84
+ # so the JS shim sees `{error:, name:}` instead of the
85
+ # `safe_call`-flattened nil that would collapse every failure
86
+ # to a generic NotAllowedError.
87
+ '__csimWebauthnCreate' => ->(b, *a) {
88
+ begin b.webauthn.create(a[0]); rescue WebauthnState::Error => e
89
+ {'error' => e.message, 'name' => e.webauthn_name}
90
+ end
91
+ },
92
+ '__csimWebauthnGet' => ->(b, *a) {
93
+ begin b.webauthn.get(a[0]); rescue WebauthnState::Error => e
94
+ {'error' => e.message, 'name' => e.webauthn_name}
95
+ end
96
+ },
97
+ '__csimWebauthnAddVirtualAuthenticator' => ->(b, *a) { b.webauthn.add_virtual_authenticator(a[0]) },
98
+ '__csimWebauthnRemoveVirtualAuthenticator' => ->(b, *a) { b.webauthn.remove_virtual_authenticator(a[0]); nil },
99
+ '__csimWebauthnAddCredential' => ->(b, *a) { b.webauthn.add_credential(a[0], a[1]); nil },
100
+ '__csimWebauthnRemoveCredential' => ->(b, *a) { b.webauthn.remove_credential(a[0], a[1]); nil },
101
+ '__csimWebauthnGetCredentials' => ->(b, *a) { b.webauthn.get_credentials(a[0]) },
102
+ '__csimWebauthnSetUserVerified' => ->(b, *a) { b.webauthn.set_user_verified(a[0], a[1]); nil }
103
+ }.freeze
104
+
105
+ # Host fns that route to pure stdlib — no Browser surface,
106
+ # nothing to safe_call, no allocation needed for the wrap. Skip
107
+ # the rescue overhead on every per-find / per-event invocation.
108
+ # Process-wide cascade-rule cache (mirrors the script bytecode cache). The
109
+ # built {hide, layout} rules are deterministic per (stylesheet-set,
110
+ # viewport), so the JS side caches the serialized rules keyed by a digest of
111
+ # the sheet sources and skips the ~12-15 ms css-tree parse + per-rule
112
+ # specificity + terminalKey rebuild on every per-visit VM rebuild. Lives in
113
+ # Ruby (not the VM) so it survives `rebuild_ctx`. Key space is tiny (one app
114
+ # ships one stylesheet set), so the map stays small; no eviction needed.
115
+ CASCADE_RULE_CACHE = {}
116
+ CASCADE_RULE_CACHE_MUTEX = Mutex.new
117
+
118
+ # Process-wide PER-SHEET parse cache (companion to CASCADE_RULE_CACHE). The
119
+ # built whole-cascade is cached above, but it misses whenever a page's inline
120
+ # `<style>` changes (Avo injects per-page styles), forcing a rebuild that
121
+ # re-parses every sheet — including unchanged linked bundles (avo.base.css).
122
+ # `parseSheet` is pure, so the JS side caches its serialized `{hide,layout}`
123
+ # keyed by (cssText hash, viewport) here, surviving the per-visit VM rebuild
124
+ # that wipes the in-VM `__sheetCache` — the CSS analogue of the JS bytecode
125
+ # cache. Keyed by content, so a content change yields a new key. Capped.
126
+ SHEET_PARSE_CACHE = {}
127
+ SHEET_PARSE_CACHE_MUTEX = Mutex.new
128
+ SHEET_PARSE_CACHE_MAX = 2048
129
+
130
+ STDLIB_HOST_FNS = {
131
+ '__csimCascadeCacheGet' => ->(*a) { CASCADE_RULE_CACHE_MUTEX.synchronize { CASCADE_RULE_CACHE[a[0].to_s] } },
132
+ '__csimCascadeCachePut' => lambda {|*a|
133
+ CASCADE_RULE_CACHE_MUTEX.synchronize { CASCADE_RULE_CACHE[a[0].to_s] = a[1].to_s }
134
+ nil
135
+ },
136
+ '__csimSheetCacheGet' => ->(*a) { SHEET_PARSE_CACHE_MUTEX.synchronize { SHEET_PARSE_CACHE[a[0].to_s] } },
137
+ '__csimSheetCachePut' => lambda {|*a|
138
+ SHEET_PARSE_CACHE_MUTEX.synchronize {
139
+ SHEET_PARSE_CACHE.clear if SHEET_PARSE_CACHE.size >= SHEET_PARSE_CACHE_MAX
140
+ SHEET_PARSE_CACHE[a[0].to_s] = a[1].to_s
141
+ }
142
+ nil
143
+ },
144
+ '__csim_randomUUID' => ->(*_) { SecureRandom.uuid },
145
+ '__csim_randomBytes' => ->(*a) { SecureRandom.bytes(a[0].to_i).bytes },
146
+ '__csim_atob' => ->(*a) { Base64.decode64(a[0].to_s) },
147
+ '__csim_btoa' => ->(*a) { Base64.strict_encode64(a[0].to_s) },
148
+ '__csim_utf8Encode' => ->(*a) { a[0].to_s.b.bytes },
149
+ '__csim_utf8Decode' => ->(*a) { a[0].pack('C*').force_encoding('UTF-8') },
150
+ # `__csim_parseUrl` is defined in JS now (js/src/url-parse.js, backed by
151
+ # the vendored whatwg-url) — spec-correct + no V8↔Ruby boundary per parse.
152
+ # Web Crypto SubtleCrypto.digest — algo is "SHA-1"/"SHA-256"/etc.
153
+ # JS hands us the byte array; we return the digest as bytes.
154
+ '__csim_subtleDigest' => lambda {|*a|
155
+ algo = a[0].to_s.upcase.tr('-', '')
156
+ bytes = a[1].is_a?(Array) ? a[1].pack('C*') : a[1].to_s
157
+ OpenSSL::Digest.new(algo).digest(bytes).bytes
158
+ }
159
+ }.freeze
160
+
161
+ def self.safe_call
162
+ yield
163
+ rescue StandardError => e
164
+ warn "[capybara-simulated] host fn error: #{e.class}: #{e.message[0, 200]}"
165
+ warn " at #{e.backtrace&.first(4)&.join("\n at ")}" if ENV['CSIM_HOSTFN_TRACE'] == '1'
166
+ nil
167
+ end
168
+
169
+ # Re-tag a text string for the Ruby→JS crossing. Marshalling is
170
+ # tag-driven: a BINARY-tagged String crosses as a Uint8Array, and a
171
+ # UTF-8-tagged string with invalid bytes raises — but Rack bodies,
172
+ # socket reads, and header values all arrive BINARY-tagged even when
173
+ # they ARE text. Decoding response bytes into text is the document
174
+ # layer's job (csim owns the charset knowledge; the contract is UTF-8),
175
+ # so every text crossing funnels through here: re-tag as UTF-8, scrub
176
+ # only actually-invalid bytes.
177
+ def self.utf8_text(s)
178
+ s = s.dup.force_encoding(Encoding::UTF_8) unless s.encoding == Encoding::UTF_8
179
+ s.valid_encoding? ? s : s.scrub
180
+ end
181
+ end
182
+ end
183
+ end