capybara-simulated 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,7 @@ require 'socket'
13
13
  require 'thread'
14
14
  require 'time'
15
15
  require 'uri'
16
+ require 'uri/idna' # WHATWG/UTS46 domain-to-ASCII/Unicode (uri-idna gem)
16
17
  require_relative 'asset_cache'
17
18
  require_relative 'errors'
18
19
  require_relative 'stack_resolver'
@@ -315,11 +316,23 @@ module Capybara
315
316
  # so long-running compute (e.g. mozjpeg over an 8900×8900 frame)
316
317
  # isn't starved by the settle_gen idle gate.
317
318
  @worker_in_flight = 0
319
+ # Workers whose initial script hasn't finished running yet. A worker that
320
+ # posts immediately on spawn (no main->worker message first) would leave
321
+ # `@worker_in_flight` at 0, so `worker_pending?` would be false in the gap
322
+ # between spawn and that first post — and settle / tick_real_time would
323
+ # stop waiting before the message lands. Count spawned-but-not-initialised
324
+ # workers so the async drain holds until the initial script has run.
325
+ @worker_initializing = 0
326
+ @worker_init_lock = Mutex.new
318
327
  # Cross-isolate `blob:` store. Worker isolates can't see the
319
328
  # main scope's `__csimBlobs` Map, so we mirror bytes here and
320
329
  # workers resolve them through a host fn.
321
330
  @blob_registry = {}
322
331
  @blob_registry_lock = Mutex.new
332
+ # url => owning worker handle, for blob URLs created INSIDE a worker. A
333
+ # worker's blob URL store dies with it, so terminating the worker revokes
334
+ # them (url-lifetime "Terminating worker revokes its URLs").
335
+ @blob_owners = {}
323
336
  # Postmessage transferable-buffer store. Large Uint8Array /
324
337
  # ArrayBuffer payloads cross isolates as a Ruby-side byte ID
325
338
  # rather than a JSON base64 string, so peak JS heap stays flat.
@@ -340,6 +353,9 @@ module Capybara
340
353
  # `message` event the next time it's active and settles/ticks. Plain
341
354
  # array (same thread — windows aren't background-threaded like workers).
342
355
  @window_inbox = []
356
+ # Cross-window BroadcastChannel messages from OTHER windows, delivered to
357
+ # this window's matching channels on settle. [{name, data}] (same thread).
358
+ @broadcast_inbox = []
343
359
  end
344
360
 
345
361
  # Worker thread polling and termination intervals — split so a
@@ -881,6 +897,7 @@ module Capybara
881
897
  # downloads from `click_link` complete inside the click
882
898
  # action.
883
899
  consume_pending_location
900
+ consume_pending_frame_nav
884
901
  return
885
902
  end
886
903
  case action['kind']
@@ -903,7 +920,7 @@ module Capybara
903
920
  # to `noopener` (so `window.opener` is null), unlike JS `window.open`
904
921
  # which keeps the opener — see `open_window_from_js`.
905
922
  elsif !target.empty? && !%w[_self _top _parent].include?(target.downcase) && @driver.respond_to?(:open_aux_window)
906
- @driver.open_aux_window(resolve_against_current(url, use_base: true))
923
+ @driver.open_aux_window(resolve_against_current(url, use_base: true), source: self, blob_snapshot: action['blob'])
907
924
  # In-page anchor links (`#frag` / current-page + `#frag`) move
908
925
  # the hash but don't fetch a new document. Pure-fragment also
909
926
  # short-circuits the `<a>`s test fixtures use as click sinks.
@@ -943,6 +960,7 @@ module Capybara
943
960
  # pending; if `@current_url` already changed mid-drain (the
944
961
  # navigate landed during a timer fire), skip the form submit
945
962
  # entirely — its form handle is in a stale VM by now.
963
+ consume_pending_frame_nav
946
964
  if @pending_location
947
965
  consume_pending_location
948
966
  elsif @current_url != submit_baseline_url
@@ -1457,7 +1475,7 @@ module Capybara
1457
1475
  url = pending['url'].to_s
1458
1476
  target = pending['target'].to_s
1459
1477
  if !target.empty? && !%w[_self _top _parent].include?(target.downcase) && @driver.respond_to?(:open_aux_window)
1460
- @driver.open_aux_window(resolve_against_current(url, use_base: true))
1478
+ @driver.open_aux_window(resolve_against_current(url, use_base: true), source: self, blob_snapshot: pending['blob'])
1461
1479
  elsif pure_fragment_navigation?(url)
1462
1480
  update_current_hash(url)
1463
1481
  else
@@ -1786,7 +1804,7 @@ module Capybara
1786
1804
  @stack_resolver ||= StackResolver.new(self)
1787
1805
  end
1788
1806
 
1789
- def log_network(method, url, status) = @trace&.log_network(method, url, status)
1807
+ def log_network(method, url, status, **extra) = @trace&.log_network(method, url, status, **extra)
1790
1808
 
1791
1809
  # `tag#id.class` short description of the handle, for trace
1792
1810
  # `description` fields. One V8 round-trip; only paid when a step
@@ -2277,10 +2295,11 @@ module Capybara
2277
2295
  reset_workers
2278
2296
  reset_websockets
2279
2297
  @window_inbox.clear
2298
+ @broadcast_inbox.clear
2280
2299
  # Free any zero-copy transfer backing stores that went unimported
2281
2300
  # (worker killed before draining its inbox, etc.) before the rebuild.
2282
2301
  drop_pending_transfers
2283
- @blob_registry_lock.synchronize { @blob_registry.clear }
2302
+ @blob_registry_lock.synchronize { @blob_registry.clear; @blob_owners.clear }
2284
2303
  # Drop volatile entries from the class-level HTTP asset cache
2285
2304
  # so test-local DB state (TranslationOverride, etc.) reaches
2286
2305
  # the app on subsequent visits. Fingerprinted assets
@@ -2309,6 +2328,14 @@ module Capybara
2309
2328
  reset_hijacked_fetches
2310
2329
  reset_websockets
2311
2330
  @window_inbox.clear
2331
+ @broadcast_inbox.clear
2332
+ # Dispose the JS runtime/isolate itself — for an auxiliary window this
2333
+ # Browser is the isolate's last owner, but V8Runtime registers every
2334
+ # isolate in a process-wide `@@live` set (for at_exit cleanup), which
2335
+ # pins it past a bare GC. Without this, each closed window leaked a live
2336
+ # V8 isolate (RSS climbed across a long suite). Only reached on teardown,
2337
+ # never on the per-test `reset!` path (which keeps the runtime).
2338
+ @runtime.dispose if @runtime.respond_to?(:dispose)
2312
2339
  rescue StandardError
2313
2340
  nil
2314
2341
  end
@@ -3037,7 +3064,7 @@ module Capybara
3037
3064
  # worker's `__csim_workerPostMessage` host fn closes over its
3038
3065
  # handle and routes outgoing messages onto a shared outbox the
3039
3066
  # main settle drains.
3040
- def worker_spawn(url)
3067
+ def worker_spawn(url, shared: false)
3041
3068
  handle = (@worker_seq += 1)
3042
3069
  inbox = Thread::Queue.new
3043
3070
  outbox = @worker_outbox
@@ -3049,9 +3076,11 @@ module Capybara
3049
3076
  # non-owning thread SEGVs (V8 isolates are thread-
3050
3077
  # bound; quickjs.rb's VM is similarly per-thread).
3051
3078
  body = fetch_worker_script(target)
3079
+ # Pending until the worker's initial script has run (see @worker_initializing).
3080
+ @worker_init_lock.synchronize { @worker_initializing += 1 }
3052
3081
  thread = Thread.new do
3053
3082
  Thread.current.report_on_exception = false
3054
- run_worker(handle, target, body, inbox, outbox, engine_class)
3083
+ run_worker(handle, target, body, inbox, outbox, engine_class, shared: shared)
3055
3084
  end
3056
3085
  @workers[handle] = {thread: thread, inbox: inbox}
3057
3086
  handle
@@ -3069,13 +3098,21 @@ module Capybara
3069
3098
  return unless w
3070
3099
  w[:inbox] << :terminate
3071
3100
  # Most clean shutdowns are <10 ms; the kill is the fallback
3072
- # for blocked workers.
3101
+ # for blocked workers. Join again AFTER the kill so the thread is actually
3102
+ # dead before we revoke its URLs — `Thread#kill` is async, and a worker
3103
+ # still running a `createObjectURL` could otherwise re-register a URL after
3104
+ # the revoke and leak it.
3073
3105
  w[:thread].join(WORKER_TERMINATE_GRACE)
3074
- w[:thread].kill if w[:thread].alive?
3106
+ if w[:thread].alive?
3107
+ w[:thread].kill
3108
+ w[:thread].join(WORKER_TERMINATE_GRACE)
3109
+ end
3075
3110
  # A blocked worker that never returned messages leaves
3076
3111
  # `@worker_in_flight` permanently > 0; reset when no workers
3077
3112
  # remain so `polling?` can short-circuit again.
3078
3113
  @worker_in_flight = 0 if @workers.empty?
3114
+ # The worker is gone — revoke the blob URLs it created.
3115
+ revoke_worker_blobs(handle.to_i)
3079
3116
  end
3080
3117
 
3081
3118
  def deliver_worker_messages
@@ -3089,7 +3126,7 @@ module Capybara
3089
3126
  events.size
3090
3127
  end
3091
3128
 
3092
- def worker_pending? = !@worker_outbox.empty? || @worker_in_flight > 0
3129
+ def worker_pending? = !@worker_outbox.empty? || @worker_in_flight > 0 || @worker_init_lock.synchronize { @worker_initializing } > 0
3093
3130
 
3094
3131
  # ── Cross-window messaging (window.open / opener / postMessage) ──
3095
3132
  # Each window is a separate Browser/VM/isolate, so a reference to another
@@ -3113,6 +3150,17 @@ module Capybara
3113
3150
  end
3114
3151
 
3115
3152
  def window_location_of(handle) = @driver.respond_to?(:window_location) ? @driver.window_location(handle.to_s).to_s : ''
3153
+ # Cross-window property reads (a WindowProxy `win.foo` / `win.document.foo`):
3154
+ # route to the Driver, which reads a PRIMITIVE off the target window's VM.
3155
+ def window_get(handle, prop) = (@driver.respond_to?(:window_read) ? @driver.window_read(handle.to_s, prop.to_s, doc: false) : nil)
3156
+ def window_doc_get(handle, prop) = (@driver.respond_to?(:window_read) ? @driver.window_read(handle.to_s, prop.to_s, doc: true) : nil)
3157
+ # Read a primitive property off THIS window's globalThis / document — called
3158
+ # by the Driver to serve another window's cross-window proxy read.
3159
+ def read_property(prop, doc: false)
3160
+ @runtime.call('__csimReadWindowProp', doc, prop.to_s)
3161
+ rescue StandardError
3162
+ nil
3163
+ end
3116
3164
  def set_window_location(handle, url) = (@driver.window_set_location(handle.to_s, url.to_s) if @driver.respond_to?(:window_set_location))
3117
3165
  def window_closed?(handle) = @driver.respond_to?(:window_closed?) ? @driver.window_closed?(handle.to_s) : true
3118
3166
  def close_child_window(handle) = (@driver.close_window(handle.to_s) if @driver.respond_to?(:close_window))
@@ -3125,14 +3173,34 @@ module Capybara
3125
3173
  @window_inbox << {'data' => data, 'origin' => origin.to_s, 'sourceHandle' => source_handle.to_s}
3126
3174
  end
3127
3175
 
3128
- def window_message_pending? = !@window_inbox.empty?
3176
+ # Covers both cross-window postMessage AND BroadcastChannel — the two
3177
+ # cross-window event channels share these drain/pending hooks.
3178
+ def window_message_pending? = !@window_inbox.empty? || !@broadcast_inbox.empty?
3129
3179
 
3130
- # Fire queued cross-window messages as `message` events on window.
3180
+ # A BroadcastChannel message from another window, queued for delivery to
3181
+ # this window's channels with the same name.
3182
+ def enqueue_broadcast(name, data) = (@broadcast_inbox << {'name' => name.to_s, 'data' => data})
3183
+
3184
+ # Fire queued cross-window messages (postMessage + BroadcastChannel).
3131
3185
  def deliver_window_messages
3132
- return 0 if @window_inbox.empty?
3133
- events = @window_inbox.slice!(0, @window_inbox.length)
3134
- @runtime.call('__csim_deliverWindowMessages', events)
3135
- events.size
3186
+ n = 0
3187
+ unless @window_inbox.empty?
3188
+ events = @window_inbox.slice!(0, @window_inbox.length)
3189
+ @runtime.call('__csim_deliverWindowMessages', events)
3190
+ n += events.size
3191
+ end
3192
+ unless @broadcast_inbox.empty?
3193
+ events = @broadcast_inbox.slice!(0, @broadcast_inbox.length)
3194
+ @runtime.call('__csim_deliverBroadcasts', events)
3195
+ n += events.size
3196
+ end
3197
+ n
3198
+ end
3199
+
3200
+ # `BroadcastChannel.postMessage` in THIS window — fan out to every OTHER
3201
+ # window's matching channels (same-window delivery happens in-VM).
3202
+ def broadcast_to_windows(name, data)
3203
+ @driver.broadcast_channel(self, name.to_s, data) if @driver.respond_to?(:broadcast_channel)
3136
3204
  end
3137
3205
 
3138
3206
  # ── Image decode (libvips) ─────────────────────────────────────
@@ -3195,8 +3263,24 @@ module Capybara
3195
3263
  }
3196
3264
  end
3197
3265
 
3198
- def blob_register(url, body_b64)
3199
- @blob_registry_lock.synchronize { @blob_registry[url.to_s] = body_b64.to_s }
3266
+ def blob_register(url, body_b64, owner_realm = nil)
3267
+ # Tag the creating context so the URL is revoked when that context goes
3268
+ # away: a WORKER (separate thread, tagged via Thread.current) when it
3269
+ # terminates ("Terminating worker"), or a FRAME REALM (owner_realm passed
3270
+ # from JS createObjectURL) when the iframe is removed ("Removing an
3271
+ # iframe"). Namespaced ('w:' / 'r:') so a worker handle and a realm id
3272
+ # never collide. Main-realm blobs (no owner) live until clear_volatile.
3273
+ worker = Thread.current[:csim_worker_handle]
3274
+ key = if worker then "w:#{worker}"
3275
+ elsif owner_realm && owner_realm.to_i != 0 then "r:#{owner_realm.to_i}"
3276
+ end
3277
+ @blob_registry_lock.synchronize do
3278
+ @blob_registry[url.to_s] = body_b64.to_s
3279
+ # Keep ownership in sync both ways: a (re-)registration with no owner
3280
+ # (main thread / main realm) must DROP any prior owner, else revoking
3281
+ # that context would wrongly revoke a now-page-owned URL.
3282
+ if key then @blob_owners[url.to_s] = key else @blob_owners.delete(url.to_s) end
3283
+ end
3200
3284
  nil
3201
3285
  end
3202
3286
 
@@ -3204,11 +3288,73 @@ module Capybara
3204
3288
  @blob_registry_lock.synchronize { @blob_registry[url.to_s] }
3205
3289
  end
3206
3290
 
3291
+ # WHATWG URL "domain to ASCII" — the JS tr46 stub delegates non-ASCII / xn--
3292
+ # hosts here (the ASCII fast path stays in-VM). Returns the punycode form, or
3293
+ # nil on an IDNA failure (so whatwg-url reports "domain to ASCII failed").
3294
+ # `be_strict: false` is the URL parser's mode (UseSTD3ASCIIRules and
3295
+ # VerifyDnsLength off) — empty middle labels (`x..y`) and `_`/etc. are
3296
+ # allowed, matching whatwg-url's `domainToASCII(domain, false)`.
3297
+ def domain_to_ascii(domain)
3298
+ URI::IDNA.whatwg_to_ascii(domain.to_s, be_strict: false)
3299
+ rescue URI::IDNA::Error
3300
+ nil # a genuine IDNA failure (bad punycode / disallowed codepoint) — let
3301
+ # whatwg-url report "domain to ASCII failed". Non-IDNA errors propagate.
3302
+ end
3303
+
3304
+ # WHATWG URL "domain to Unicode" — best-effort (never fails the parse per
3305
+ # spec), so on an IDNA error fall back to the input domain (unlike to_ascii,
3306
+ # which signals failure with nil — the asymmetry is intentional).
3307
+ def domain_to_unicode(domain)
3308
+ URI::IDNA.whatwg_to_unicode(domain.to_s, be_strict: false)
3309
+ rescue URI::IDNA::Error
3310
+ domain.to_s
3311
+ end
3312
+
3313
+ # Read a blob URL's bytes + content type from THIS window's VM (its local
3314
+ # blob store) — the Driver uses it to load a blob: document into a fresh aux
3315
+ # window opened by this window. Returns {bytes:, type:} or nil.
3316
+ def read_blob_for_window(url)
3317
+ r = @runtime.call('__csimReadBlobForWindow', url.to_s)
3318
+ return nil unless r.is_a?(Hash) && r['b64']
3319
+ { bytes: Base64.decode64(r['b64'].to_s), type: r['type'].to_s }
3320
+ rescue StandardError
3321
+ nil
3322
+ end
3323
+
3324
+ # Load a blob: document (bytes from the opener) as THIS window's top-level
3325
+ # document — for `window.open(blobURL)` / a blob: aux-window navigation,
3326
+ # where the blob isn't rack-navigable and lives in the opener's isolate.
3327
+ def boot_blob_document(url, bytes, content_type)
3328
+ @current_url = url.to_s
3329
+ ct = content_type.to_s.empty? ? 'text/html' : content_type.to_s
3330
+ # Blob string parts are UTF-8-encoded; when the Blob type carries no
3331
+ # charset, decode the document as UTF-8 (not the windows-1252 HTML locale
3332
+ # default, which is an HTTP concept that doesn't apply to in-memory blobs —
3333
+ # matches the iframe blob: path's decodeBlobBody). A charset in the Blob
3334
+ # type (url-charset) is preserved so it can override <meta charset>.
3335
+ ct = "#{ct};charset=utf-8" unless ct.downcase.include?('charset')
3336
+ record_response(200, {'content-type' => ct})
3337
+ boot_response_into_ctx(bytes)
3338
+ end
3339
+
3207
3340
  def blob_unregister(url)
3208
- @blob_registry_lock.synchronize { @blob_registry.delete(url.to_s) }
3341
+ @blob_registry_lock.synchronize { @blob_registry.delete(url.to_s); @blob_owners.delete(url.to_s) }
3209
3342
  nil
3210
3343
  end
3211
3344
 
3345
+ # Revoke every blob URL owned by a context that's going away (its blob URL
3346
+ # store is part of the global being torn down).
3347
+ def revoke_owned_blobs(key)
3348
+ @blob_registry_lock.synchronize do
3349
+ urls = @blob_owners.select {|_url, owner| owner == key }.keys
3350
+ urls.each {|url| @blob_registry.delete(url); @blob_owners.delete(url) }
3351
+ end
3352
+ end
3353
+ # Keys are normalized with `.to_i` on BOTH sides (register tags
3354
+ # "r:#{owner_realm.to_i}") so a marshalled Float/String id still matches.
3355
+ def revoke_worker_blobs(handle) = revoke_owned_blobs("w:#{handle.to_i}")
3356
+ def revoke_realm_blobs(realm_id) = revoke_owned_blobs("r:#{realm_id.to_i}")
3357
+
3212
3358
  # ── postMessage transferable-buffer registry ───────────────────
3213
3359
  #
3214
3360
  # Large Uint8Array / ArrayBuffer payloads cross isolates by ID;
@@ -3347,7 +3493,20 @@ module Capybara
3347
3493
  # `build_worker` factory, evaluates the worker script, then
3348
3494
  # loops draining microtasks + timers + inbox until `:terminate`
3349
3495
  # lands or an exception propagates.
3350
- private def run_worker(handle, url, body, inbox, outbox, engine_class)
3496
+ private def run_worker(handle, url, body, inbox, outbox, engine_class, shared: false)
3497
+ # Release the spawn-time `@worker_initializing` count exactly once, however
3498
+ # this method exits (normal start, `self.close()`, or an exception), so
3499
+ # worker_pending? doesn't stay stuck true forever.
3500
+ initializing = true
3501
+ release_init = lambda do
3502
+ next unless initializing
3503
+ initializing = false
3504
+ @worker_init_lock.synchronize { @worker_initializing -= 1 }
3505
+ end
3506
+ # Tag this thread so blob URLs created by the worker's script are owned by
3507
+ # this handle and revoked on terminate (see blob_register / revoke_worker_blobs).
3508
+ Thread.current[:csim_worker_handle] = handle
3509
+ rt = nil
3351
3510
  raise "worker script not found: #{url}" unless body
3352
3511
  # The worker SCRIPT is text; the Rack-fetched body arrives
3353
3512
  # BINARY-tagged (see `RuntimeShared.utf8_text`).
@@ -3360,16 +3519,34 @@ module Capybara
3360
3519
  # the snapshot-time `http://placeholder/`.
3361
3520
  rt.eval("globalThis.__csimUpdateLocation(#{JSON.generate(url.to_s)});")
3362
3521
  rt.eval(body)
3363
- loop do
3364
- msg = pop_with_timeout(inbox, WORKER_POLL_INTERVAL)
3365
- break if msg == :terminate
3366
- rt.call('__csim_workerOnMessage', msg) if msg
3522
+ rt.drain_microtasks
3523
+ # A SharedWorker fires `connect` AFTER its script set `self.onconnect`; the
3524
+ # connect handler's port post lands in the outbox before release_init, so
3525
+ # worker_pending? stays true until it's delivered.
3526
+ if shared
3527
+ rt.eval('typeof __csimFireSharedWorkerConnect === "function" && __csimFireSharedWorkerConnect();')
3367
3528
  rt.drain_microtasks
3368
- rt.drain_timers if rt.has_ready_timer?
3529
+ end
3530
+ # Initial script has run (and any immediate postMessage is in the outbox).
3531
+ release_init.call
3532
+ # A worker that called `self.close()` in its top-level script stops here —
3533
+ # the script ran (and may have posted), but no further messages are pulled.
3534
+ unless rt.eval('!!globalThis.__csimWorkerClosed')
3535
+ loop do
3536
+ msg = pop_with_timeout(inbox, WORKER_POLL_INTERVAL)
3537
+ break if msg == :terminate
3538
+ if msg
3539
+ rt.call('__csim_workerOnMessage', msg)
3540
+ rt.drain_microtasks
3541
+ rt.drain_timers if rt.has_ready_timer?
3542
+ break if rt.eval('!!globalThis.__csimWorkerClosed')
3543
+ end
3544
+ end
3369
3545
  end
3370
3546
  rescue StandardError => e
3371
3547
  outbox << {handle: handle, kind: '__error', message: "#{e.class}: #{e.message}"}
3372
3548
  ensure
3549
+ release_init.call # guarantee the init count is released on an early raise
3373
3550
  rt&.dispose
3374
3551
  end
3375
3552
 
@@ -3379,10 +3556,31 @@ module Capybara
3379
3556
  # circuit to the JS-side blob registry instead. Http(s) URLs
3380
3557
  # fall through to the regular Rack path.
3381
3558
  private def fetch_worker_script(url)
3382
- return rack_fetch_body(url) unless url.to_s.start_with?('blob:')
3383
- b64 = @runtime.call('__csimReadBlobBase64', url)
3384
- return nil unless b64
3385
- Base64.decode64(b64.to_s)
3559
+ u = url.to_s
3560
+ if u.start_with?('blob:')
3561
+ b64 = @runtime.call('__csimReadBlobBase64', u)
3562
+ return nil unless b64
3563
+ return Base64.decode64(b64.to_s)
3564
+ end
3565
+ # `data:[<mediatype>][;base64],<data>` worker scripts (a worker created
3566
+ # from a data: URL — its origin is opaque, so its blob: URLs serialize
3567
+ # with a 'null' origin). Decode inline; Rack can't serve a data: URL.
3568
+ return decode_data_url_body(u) if u.start_with?('data:')
3569
+ rack_fetch_body(u)
3570
+ end
3571
+
3572
+ # The decoded body of a `data:[<mediatype>][;base64],<data>` URL (RFC 2397):
3573
+ # base64-decoded when the `;base64` flag is present, else percent-decoded.
3574
+ private def decode_data_url_body(url)
3575
+ comma = url.index(',')
3576
+ return '' unless comma
3577
+ meta = url[5...comma]
3578
+ payload = url[(comma + 1)..]
3579
+ if meta =~ /;base64\s*\z/i
3580
+ Base64.decode64(payload)
3581
+ else
3582
+ CGI.unescape(payload)
3583
+ end
3386
3584
  end
3387
3585
 
3388
3586
  # `Thread::Queue#pop(timeout:)` blocks releasing the GVL — fine
@@ -3467,12 +3665,14 @@ module Capybara
3467
3665
  headers = headers.reject {|k, _| k == 'X-Csim-Body-B64' }
3468
3666
  end
3469
3667
  MAX_FETCH_REDIRECTS.times do
3668
+ t0 = @trace && Process.clock_gettime(Process::CLOCK_MONOTONIC)
3470
3669
  # GET-only cache shortcut (RFC 9111). Fresh hit → skip @app.call
3471
3670
  # entirely; stale-but-revalidatable → fall through with conditional
3472
3671
  # headers added so the server can return 304.
3473
3672
  cache_entry = method == 'GET' ? @@asset_cache.lookup(target) : nil
3474
3673
  if cache_entry&.fresh?
3475
- log_network(method, target, cache_entry.status)
3674
+ # Cached static asset — log headers/type/size but skip the (boring) body.
3675
+ trace_network(method, target, cache_entry.status, headers, body, cache_entry.headers, nil, t0, false)
3476
3676
  return response_hash(cache_entry.status, cache_entry.headers, cache_entry.body, target, redirected)
3477
3677
  end
3478
3678
 
@@ -3483,14 +3683,16 @@ module Capybara
3483
3683
  env.merge!(env_extras) if env_extras
3484
3684
  status, resp_headers, resp_body = dispatch_rack_or_http(target, env, method: method, body: body)
3485
3685
  merge_set_cookie(resp_headers)
3486
- log_network(method, target, status)
3487
3686
  if status == 304 && cache_entry
3687
+ trace_network(method, target, cache_entry.status, headers, body, cache_entry.headers, nil, t0, false)
3488
3688
  resp_body.close if resp_body.respond_to?(:close)
3489
3689
  @@asset_cache.refresh(cache_entry, resp_headers)
3490
3690
  return response_hash(cache_entry.status, cache_entry.headers, cache_entry.body, target, redirected)
3491
3691
  end
3492
3692
  if redirect_mode != 'manual' && (loc = redirect_location(status, resp_headers))
3493
3693
  raise StandardError, '[capybara-simulated] fetch: redirect blocked by redirect=error mode' if redirect_mode == 'error'
3694
+ # Log this hop (3xx) before method/body are rewritten for the next.
3695
+ trace_network(method, target, status, headers, body, resp_headers, nil, t0, true)
3494
3696
  redirected = true
3495
3697
  preserve = [307, 308].include?(status)
3496
3698
  next_url = resolve_against(loc, target)
@@ -3501,6 +3703,7 @@ module Capybara
3501
3703
  next
3502
3704
  end
3503
3705
  body_str = read_rack_body(resp_body)
3706
+ trace_network(method, target, status, headers, body, resp_headers, body_str, t0, false)
3504
3707
  @@asset_cache.store(target, status, resp_headers, body_str) if method == 'GET'
3505
3708
  return response_hash(status, resp_headers, body_str, target, redirected)
3506
3709
  end
@@ -3510,6 +3713,63 @@ module Capybara
3510
3713
  nil
3511
3714
  end
3512
3715
 
3716
+ # Cap per-body capture so one big asset/response can't bloat the
3717
+ # trace. Generous (this is a local debugging artifact).
3718
+ NETWORK_BODY_CAP = 256 * 1024
3719
+
3720
+ # Enriched network log for the trace: response content-type / byte
3721
+ # size / elapsed ms / redirect flag, plus request + response headers
3722
+ # and bodies (devtools-style). No-ops — and skips all the lookups —
3723
+ # unless a trace is recording, so the fetch hot path is unaffected
3724
+ # when tracing is off.
3725
+ def trace_network(method, url, status, req_headers, req_body, resp_headers, resp_body, t0, redirected)
3726
+ return unless @trace
3727
+ ct = resp_headers && (resp_headers['content-type'] || resp_headers['Content-Type'])
3728
+ ct = ct.first if ct.is_a?(Array) # Rack 3 permits array-valued header fields
3729
+ ct = ct.split(';', 2).first.strip if ct.is_a?(String)
3730
+ size = if resp_body
3731
+ resp_body.bytesize
3732
+ elsif (cl = resp_headers && (resp_headers['content-length'] || resp_headers['Content-Length']))
3733
+ (cl.is_a?(Array) ? cl.first : cl).to_i
3734
+ end
3735
+ log_network(method, url, status,
3736
+ content_type: (ct if ct.is_a?(String)),
3737
+ size: size,
3738
+ duration_ms: (t0 && ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0) * 1000).round),
3739
+ redirected: (redirected || nil),
3740
+ request_headers: normalize_trace_headers(req_headers),
3741
+ request_body: (req_body && !req_body.to_s.empty? ? cap_trace_body(req_body) : nil),
3742
+ response_headers: normalize_trace_headers(resp_headers),
3743
+ response_body: (resp_body ? cap_trace_body(resp_body) : nil))
3744
+ rescue StandardError => e
3745
+ # A trace-logging bug must NEVER break the real fetch: rack_fetch's
3746
+ # own `rescue StandardError` would otherwise swallow it and return
3747
+ # nil, so the asset (e.g. jQuery) silently fails to load. Drop the
3748
+ # log entry instead.
3749
+ warn "capybara-simulated: trace network log failed: #{e.class}: #{e.message}"
3750
+ end
3751
+
3752
+ # JSON-safe body for the trace: binary (non-UTF-8) bodies become a
3753
+ # placeholder rather than mojibake, and long bodies are truncated
3754
+ # (scrubbed so a mid-codepoint cut can't yield invalid UTF-8).
3755
+ #
3756
+ # Rack response bodies are ASCII-8BIT (BINARY); reinterpret the bytes as
3757
+ # UTF-8 up front and keep working in UTF-8 throughout. Otherwise a body
3758
+ # whose bytes ARE valid UTF-8 but stays BINARY-tagged would flow out of
3759
+ # here still BINARY, and the first concat with a UTF-8 string (the
3760
+ # truncation marker here, or the trace-buffer / JSON serialization
3761
+ # downstream) raises Encoding::CompatibilityError on any byte ≥ 0x80.
3762
+ def cap_trace_body(body)
3763
+ s = body.to_s.dup.force_encoding('UTF-8')
3764
+ return "[binary, #{s.bytesize} bytes]" unless s.valid_encoding?
3765
+ s.bytesize > NETWORK_BODY_CAP ? (s.byteslice(0, NETWORK_BODY_CAP).scrub + "\n…[truncated, #{s.bytesize} bytes total]") : s
3766
+ end
3767
+
3768
+ def normalize_trace_headers(headers)
3769
+ return nil unless headers
3770
+ headers.each_with_object({}) {|(k, v), out| out[k.to_s] = v.is_a?(Array) ? v.join(', ') : v.to_s }
3771
+ end
3772
+
3513
3773
  # CGI convention: `Content-Type` and `Content-Length` land in env
3514
3774
  # *without* the HTTP_ prefix. Rails / Rack params parsing reads
3515
3775
  # `CONTENT_TYPE` and dispatches JSON / multipart parsers off it;
@@ -3546,7 +3806,14 @@ module Capybara
3546
3806
  # encoding per the HTML "decode" algorithm and is removed). The real
3547
3807
  # bytes for binary consumers ride `body_b64`; the Rack body arrives
3548
3808
  # BINARY-tagged (see `RuntimeShared.utf8_text`).
3549
- text = RuntimeShared.utf8_text(is_text ? decode_response_bom(raw) : raw)
3809
+ bom_charset = nil
3810
+ text =
3811
+ if is_text
3812
+ decoded, bom_charset = decode_response_bom(raw)
3813
+ RuntimeShared.utf8_text(decoded)
3814
+ else
3815
+ RuntimeShared.utf8_text(raw)
3816
+ end
3550
3817
  out = {
3551
3818
  'status' => status,
3552
3819
  'headers' => hdrs,
@@ -3555,29 +3822,66 @@ module Capybara
3555
3822
  'redirected' => redirected,
3556
3823
  'type' => 'basic'
3557
3824
  }
3825
+ # The BOM-detected encoding (if any) — a frame load pins its document's
3826
+ # characterSet to it (see __csimFrameWindow); highest-precedence signal.
3827
+ out['charset'] = bom_charset if bom_charset
3558
3828
  out['body_b64'] = Base64.strict_encode64(raw) unless is_text
3559
3829
  out
3560
3830
  end
3561
3831
 
3562
- # Strip + decode a single leading byte-order mark, mapping the body to a
3563
- # UTF-8 Ruby string. No BOM → return the bytes untouched (the hot path:
3564
- # just a 2–3 byte prefix check). One BOM is consumed; any further BOMs are
3565
- # ordinary U+FEFF characters in the decoded text (per spec the parser does
3566
- # not strip them again).
3832
+ # Strip + decode a single leading byte-order mark, returning
3833
+ # `[utf8_text, charset]` `charset` is the BOM-selected Encoding-standard
3834
+ # name (highest-precedence encoding signal) or nil when there's no BOM (the
3835
+ # hot path: just a 2–3 byte prefix check). One BOM is consumed; any further
3836
+ # BOMs are ordinary U+FEFF characters in the decoded text (per spec the
3837
+ # parser does not strip them again).
3838
+ # An XML-family document (XHTML / SVG / application+text/xml). Its encoding
3839
+ # default is UTF-8 — the windows-1252 locale default is HTML-only.
3840
+ def xml_content_type?(content_type)
3841
+ mime = content_type.to_s.split(';', 2).first.to_s.strip.downcase
3842
+ mime.end_with?('+xml') || mime == 'application/xml' || mime == 'text/xml'
3843
+ end
3844
+
3845
+ # Does the response carry an explicit encoding signal (so the default
3846
+ # windows-1252 decode must NOT apply)? A `charset=` in the Content-Type, or
3847
+ # a `<meta charset>` / `<meta http-equiv=content-type … charset=…>` in the
3848
+ # HTML prescan window (the first 1024 bytes, per the HTML sniffing algorithm).
3849
+ # The `charset` must start a real attribute / content-charset (preceded by
3850
+ # whitespace, a quote, or `;`), so hyphenated look-alikes — `data-charset=`,
3851
+ # `accept-charset=` — don't false-trigger the signal.
3852
+ def html_charset_signal?(content_type, raw)
3853
+ return true if /;\s*charset\s*=/i.match?(content_type.to_s)
3854
+ head = raw.to_s.b[0, 1024].to_s
3855
+ /<meta\b[^>]*[\s"';]charset\s*=/i.match?(head)
3856
+ end
3857
+
3858
+ # Decode bytes as windows-1252 (the HTML locale-default encoding) to a UTF-8
3859
+ # Ruby string. Replaces undefined slots rather than raising.
3860
+ def decode_windows1252(s)
3861
+ s.to_s.b.dup.force_encoding(Encoding::WINDOWS_1252)
3862
+ .encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
3863
+ rescue StandardError
3864
+ RuntimeShared.utf8_text(s)
3865
+ end
3866
+
3567
3867
  def decode_response_bom(s)
3568
3868
  b = s.b
3569
3869
  if b.start_with?("\xEF\xBB\xBF".b)
3570
- b.byteslice(3..).force_encoding(Encoding::UTF_8)
3870
+ [b.byteslice(3..).force_encoding(Encoding::UTF_8), 'UTF-8']
3571
3871
  elsif b.start_with?("\xFF\xFE".b) || b.start_with?("\xFE\xFF".b)
3572
3872
  # Generic UTF-16: the BOM picks endianness and is dropped by the decoder.
3573
3873
  # Replace malformed units rather than raising (a truncated/odd-length
3574
3874
  # body still yields readable UTF-8 instead of falling back to raw bytes).
3575
- b.force_encoding(Encoding::UTF_16).encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
3875
+ # A UTF-32LE BOM (FF FE 00 00) is matched here as UTF-16LE too — which is
3876
+ # exactly what browsers do (UTF-32 unsupported; the leading FF FE is read
3877
+ # as the UTF-16LE BOM).
3878
+ charset = b.start_with?("\xFF\xFE".b) ? 'UTF-16LE' : 'UTF-16BE'
3879
+ [b.force_encoding(Encoding::UTF_16).encode(Encoding::UTF_8, invalid: :replace, undef: :replace), charset]
3576
3880
  else
3577
- s
3881
+ [s, nil]
3578
3882
  end
3579
3883
  rescue StandardError
3580
- s
3884
+ [s, nil]
3581
3885
  end
3582
3886
 
3583
3887
  def text_response?(headers)
@@ -3623,6 +3927,42 @@ module Capybara
3623
3927
  navigate(url)
3624
3928
  end
3625
3929
  end
3930
+ # A nested browsing context navigating its OWN `location` (the frame's
3931
+ # `location.href`/assign/replace/`location=`, incl. cross-frame
3932
+ # `iframe.contentWindow.location.href = …`). `realm_id` is the frame's realm.
3933
+ # Deferred like location_assign: applying it re-navigates the owning iframe,
3934
+ # which disposes that realm — illegal while the frame's location setter is
3935
+ # still on the V8 stack — so we stash and drain from `tick_real_time`.
3936
+ def frame_navigate_self(url, realm_id)
3937
+ return if realm_id.nil? || realm_id.zero?
3938
+ # Keyed by realm id (last URL wins per frame) so two different frames each
3939
+ # navigating in one turn both apply — a single slot would drop one.
3940
+ (@pending_frame_nav ||= {})[realm_id] = url.to_s
3941
+ end
3942
+ def consume_pending_frame_nav
3943
+ return if @pending_frame_nav.nil? || @pending_frame_nav.empty?
3944
+ navs = @pending_frame_nav
3945
+ @pending_frame_nav = nil
3946
+ navs.each do |realm_id, url|
3947
+ invalidate_find_cache
3948
+ # If this frame is on the entered `within_frame` stack, navigate it
3949
+ # through `navigate_frame` — it does the full fetch (redirects /
3950
+ # downloads / cookies) AND updates `@frame_stack` / `@current_realm_id`
3951
+ # so the enclosing `within_frame` block sees the new document. Otherwise
3952
+ # (a parent's `iframe.contentWindow.location.href = …`) re-navigate the
3953
+ # owning iframe by realm id via the src-reassignment path. Top-level
3954
+ # frames live in the main document; a nested non-entered frame's element
3955
+ # is in its parent realm's DOM (not yet routed — documented gap).
3956
+ entry = @frame_stack.find {|e| e[:realm_id] == realm_id }
3957
+ if entry
3958
+ navigate_frame(url, entry: entry)
3959
+ else
3960
+ @runtime.call('__csimNavigateFrameByRealm', realm_id, url)
3961
+ end
3962
+ rescue StandardError => e
3963
+ log_console('warn', "frame self-navigation failed: #{e.message}")
3964
+ end
3965
+ end
3626
3966
  # Mirror of `location_assign`'s deferral for `location.reload()`:
3627
3967
  # the JS call lands here from `__locationReload`; running
3628
3968
  # `browser.refresh` directly would `navigate` (rebuilding the
@@ -3635,10 +3975,156 @@ module Capybara
3635
3975
  @pending_reload = false
3636
3976
  refresh
3637
3977
  end
3978
+ # `frame.contentWindow.location.reload()` from a nested browsing context.
3979
+ # Like `frame_navigate_self`, the JS side flags the initiating realm here
3980
+ # and we defer (so the child realm isn't disposed mid-reload()). Keyed by
3981
+ # realm id so two frames reloading in one turn both apply.
3982
+ def frame_reload_self(realm_id)
3983
+ return if realm_id.nil? || realm_id.zero?
3984
+ (@pending_frame_reload ||= []) << realm_id
3985
+ end
3986
+ def consume_pending_frame_reload
3987
+ return if @pending_frame_reload.nil? || @pending_frame_reload.empty?
3988
+ realm_ids = @pending_frame_reload.uniq
3989
+ @pending_frame_reload = nil
3990
+ realm_ids.each do |realm_id|
3991
+ invalidate_find_cache
3992
+ # An entered `within_frame` frame reloads through `navigate_frame` (keeps
3993
+ # the frame stack in sync) — re-fetching its current document URL, which
3994
+ # we read from the still-alive realm. (A blob: URL entered this way is
3995
+ # re-fetched through Rack and so does NOT reuse retained bytes — reloading
3996
+ # an *entered* revoked-blob frame is an accepted gap; the common parent-
3997
+ # held path below reuses bytes via reloadFrame.) Otherwise (a parent's
3998
+ # `iframe.contentWindow.location.reload()`, empty href, or a realm torn
3999
+ # down between flag and drain) re-navigate the owning iframe by realm id
4000
+ # JS-side, reusing the retained content so blob bytes survive a revoke.
4001
+ entry = @frame_stack.find {|e| e[:realm_id] == realm_id }
4002
+ url = entry && @runtime.frame_realm_alive?(realm_id) ? @runtime.realm_call(realm_id, '__csimLocationHref').to_s : ''
4003
+ if entry && !url.empty?
4004
+ navigate_frame(url, entry: entry)
4005
+ else
4006
+ @runtime.call('__csimReloadFrameByRealm', realm_id)
4007
+ end
4008
+ rescue StandardError => e
4009
+ log_console('warn', "frame self-reload failed: #{e.message}")
4010
+ end
4011
+ end
4012
+ # A <form> submitted from INSIDE a nested browsing context (a frame realm
4013
+ # reached via `contentWindow`, not an entered `within_frame` block). The
4014
+ # pending-submit slot lives on the initiating realm's globalThis, which no
4015
+ # top-page drain reads, so the JS side flags the realm here (mirrors
4016
+ # `frame_navigate_self`). Keyed by realm id; deferred + drained from
4017
+ # `drain_pending_navigation` so we never serialize/navigate while the
4018
+ # form's `submit()` is still on the V8 stack.
4019
+ def frame_submit_self(realm_id)
4020
+ return if realm_id.nil? || realm_id.zero?
4021
+ (@pending_frame_submit ||= []) << realm_id
4022
+ end
4023
+ def consume_pending_frame_submit
4024
+ return if @pending_frame_submit.nil? || @pending_frame_submit.empty?
4025
+ realm_ids = @pending_frame_submit.uniq
4026
+ @pending_frame_submit = nil
4027
+ realm_ids.each do |realm_id|
4028
+ next unless @runtime.frame_realm_alive?(realm_id)
4029
+ sub = @runtime.realm_call(realm_id, '__csimTakePendingFormSubmit')
4030
+ next unless sub.is_a?(Hash) && sub['formHandle']
4031
+ invalidate_find_cache
4032
+ submit_form_in_realm(realm_id, sub['formHandle'], sub['submitterHandle'])
4033
+ rescue StandardError => e
4034
+ log_console('warn', "nested-context form submission failed: #{e.message}")
4035
+ end
4036
+ end
4037
+ # Serialize + route a form submitted inside frame realm `realm_id`. We
4038
+ # serialize in the INITIATING realm (so shadow-tree controls are excluded
4039
+ # and relative URLs resolve against that document), then route by target:
4040
+ # - a NAMED frame within that context (a sibling iframe) — reassign its src;
4041
+ # - self / _self / '' — navigate the initiating frame itself, same as a
4042
+ # self-targeted link there (within_frame → navigate_frame; a frame
4043
+ # reached via contentWindow → re-navigate its owning iframe by realm id).
4044
+ # GET fully supported. POST to a self frame needs the entered stack
4045
+ # (navigate_frame_post); POST-to-named and other targets from a nested
4046
+ # context aren't modeled (no in-scope need) — logged rather than dropped.
4047
+ def submit_form_in_realm(realm_id, form_handle, submitter_handle)
4048
+ spec = @runtime.realm_call(realm_id, '__csimFormSerialize', form_handle, submitter_handle || 0)
4049
+ return unless spec.is_a?(Hash)
4050
+ method = spec['method'].to_s.upcase
4051
+ method = 'GET' if method.empty?
4052
+ target = spec['target'].to_s
4053
+ action = spec['action'].to_s
4054
+ fields = (spec['fields'] || []).map {|pair| [pair[0].to_s, pair[1].to_s] }
4055
+ # Non-multipart file inputs contribute the filename only (mirror submit_form_handle's GET path).
4056
+ (spec['fileInputs'] || []).each do |fi|
4057
+ picks = @file_picks && @file_picks[fi['handle'].to_i] || []
4058
+ fields << [fi['name'].to_s, picks.first ? File.basename(picks.first) : '']
4059
+ end
4060
+ body = URI.encode_www_form(fields)
4061
+ get_url = form_get_url(action, body)
4062
+ if frame_self_target?(target)
4063
+ navigate_realm_self(realm_id, get_url, action, method, body, spec['enctype'].to_s)
4064
+ elsif %w[_parent _top _blank].include?(target.downcase)
4065
+ log_console('warn', "nested-context form submit (target=#{target.inspect}) is not modeled")
4066
+ elsif method == 'GET'
4067
+ # Named sibling frame, GET. realm_call returns false when no frame of
4068
+ # that name exists in the initiating document (e.g. it lives in an
4069
+ # ancestor/top context, which HTML target resolution would reach but
4070
+ # we don't); surface it rather than dropping silently.
4071
+ found = @runtime.realm_call(realm_id, '__csimNavigateNamedFrame', target, get_url)
4072
+ log_console('warn', "nested-context form submit: no frame named #{target.inspect} in the submitting document") unless found
4073
+ else
4074
+ log_console('warn', "nested-context form submit (target=#{target.inspect}, method=POST) is not modeled")
4075
+ end
4076
+ end
4077
+ # HTML form-submission "mutate action URL" for GET: REPLACE the action
4078
+ # URL's query with the serialized entry list (dropping any pre-existing
4079
+ # query), preserving a trailing #fragment. String-based so it works on the
4080
+ # raw (possibly relative) action attribute without URI.parse fragility;
4081
+ # the absolute equivalent of submit_form_handle's `uri.query = body`.
4082
+ def form_get_url(action, body)
4083
+ return action if body.empty?
4084
+ base, _hash, frag = action.partition('#')
4085
+ path = base.split('?', 2).first
4086
+ url = "#{path}?#{body}"
4087
+ frag.empty? ? url : "#{url}##{frag}"
4088
+ end
4089
+ # Navigate the initiating frame realm itself (a self-targeted form submit).
4090
+ def navigate_realm_self(realm_id, get_url, action, method, body, enctype)
4091
+ entry = @frame_stack.find {|e| e[:realm_id] == realm_id }
4092
+ if method == 'GET'
4093
+ if entry
4094
+ navigate_frame(resolve_against_current(get_url), entry: entry)
4095
+ else
4096
+ # A frame reached via contentWindow (not on the entered stack): its
4097
+ # owning iframe lives in the parent document — re-navigate by realm id
4098
+ # (relative get_url resolves against the frame's base on rebuild).
4099
+ @runtime.call('__csimNavigateFrameByRealm', realm_id, get_url)
4100
+ end
4101
+ elsif entry
4102
+ navigate_frame_post(resolve_against_current(action), body, enctype, entry: entry)
4103
+ else
4104
+ log_console('warn', "nested-context self-form POST (realm #{realm_id}) is not modeled")
4105
+ end
4106
+ end
3638
4107
  def drain_pending_navigation
3639
4108
  consume_pending_location
4109
+ consume_pending_frame_nav
4110
+ consume_pending_frame_submit
4111
+ consume_pending_frame_reload
3640
4112
  consume_pending_reload
3641
4113
  consume_pending_history_traverse
4114
+ consume_pending_aux_window
4115
+ end
4116
+
4117
+ # A script-driven `anchor.click()` / `target=_blank` navigation with no
4118
+ # Capybara action behind it (e.g. a WPT test) — open the aux window from the
4119
+ # event-loop drain. Safe mid-call (builds a separate Browser). Same-window /
4120
+ # frame navs are left untouched (handled by drain_after_user_action).
4121
+ def consume_pending_aux_window
4122
+ pending = @runtime.call('__csimTakePendingAuxWindow')
4123
+ return unless pending.is_a?(Hash) && pending['url'] && @driver.respond_to?(:open_aux_window)
4124
+ @driver.open_aux_window(resolve_against_current(pending['url'].to_s, use_base: true),
4125
+ source: self, blob_snapshot: pending['blob'])
4126
+ rescue StandardError => e
4127
+ log_console('warn', "aux-window open failed: #{e.message}")
3642
4128
  end
3643
4129
  # POST-after-POST resubmits with the original body; GET-after-GET
3644
4130
  # is just a re-GET. Replay the current history entry.
@@ -3987,11 +4473,31 @@ module Capybara
3987
4473
  # `within_frame` scope is now stale — fall back to the main document.
3988
4474
  reset_frame_scope
3989
4475
  reset_timer_state
3990
- # The DOCUMENT is text; the Rack body arrives BINARY-tagged (see
3991
- # `RuntimeShared.utf8_text`). Charset-header-driven decode is the
3992
- # fuller story; UTF-8 + scrub matches observable browser behavior
3993
- # for the suites we run.
3994
- html = RuntimeShared.utf8_text(html)
4476
+ # The response content type drives both the parser choice (XML vs HTML —
4477
+ # XHTML/XML/SVG parse case-sensitively, no html/head/body skeleton,
4478
+ # `isHtmlDocument` false) and the encoding's HTTP-charset signal.
4479
+ ct = (@last_response_headers || {}).find {|k, _| k.to_s.downcase == 'content-type' }&.last
4480
+ ct = ct.first if ct.is_a?(Array)
4481
+ # HTML document encoding sniffing (the body arrives BINARY-tagged; see
4482
+ # `RuntimeShared.utf8_text`). A leading BOM wins (over <meta charset>) and
4483
+ # is stripped. Otherwise, for an HTML document with NO encoding signal — no
4484
+ # charset in the Content-Type AND no <meta charset> in the prescan — the
4485
+ # locale default is windows-1252 and the bytes decode as such; there is NO
4486
+ # UTF-8 sniffing (WPT encoding/sniffing). A declared charset keeps the
4487
+ # UTF-8 + scrub path (the JS side reports it from the meta; a declared
4488
+ # non-UTF-8 multibyte charset is still UTF-8-decoded — legacy multibyte
4489
+ # tables are out of scope). The windows-1252 default is HTML-only: an XML
4490
+ # document (XHTML/SVG/application+text/xml) defaults to UTF-8, and an empty
4491
+ # body (about:blank, a blank 200) stays UTF-8 too.
4492
+ decoded, doc_charset = decode_response_bom(html)
4493
+ if doc_charset
4494
+ html = RuntimeShared.utf8_text(decoded)
4495
+ elsif html.to_s.empty? || xml_content_type?(ct) || html_charset_signal?(ct, html)
4496
+ html = RuntimeShared.utf8_text(html)
4497
+ else
4498
+ html = decode_windows1252(html)
4499
+ doc_charset = 'windows-1252'
4500
+ end
3995
4501
  opts = {
3996
4502
  'traceActive' => !@trace.nil?,
3997
4503
  'timezone' => ENV['TZ'].to_s,
@@ -3999,12 +4505,13 @@ module Capybara
3999
4505
  'url' => @current_url.to_s,
4000
4506
  'html' => html
4001
4507
  }
4002
- # Carry the response content type so the JS side can pick the XML vs
4003
- # HTML parser (XHTML / XML / SVG documents parse case-sensitively, with
4004
- # no html/head/body skeleton, and report `isHtmlDocument` false).
4005
- ct = (@last_response_headers || {}).find {|k, _| k.to_s.downcase == 'content-type' }&.last
4006
- ct = ct.first if ct.is_a?(Array)
4007
4508
  opts['contentType'] = ct.to_s if ct && !ct.to_s.empty?
4509
+ # The detected document encoding pins document.characterSet (over meta).
4510
+ opts['charset'] = doc_charset if doc_charset
4511
+ # `document.lastModified` reflects the response Last-Modified header (parsed
4512
+ # to local time); absent → the current time (handled JS-side).
4513
+ lm = response_headers['Last-Modified'] # response_headers normalizes keys to Capitalized-Dash form
4514
+ opts['lastModified'] = lm if lm && !lm.to_s.empty?
4008
4515
  if @viewport_width && @viewport_height
4009
4516
  opts['viewportW'] = @viewport_width
4010
4517
  opts['viewportH'] = @viewport_height
@@ -4025,9 +4532,12 @@ module Capybara
4025
4532
  # timer can't abort loading the next page (the page it would affect is
4026
4533
  # being discarded on the very next line).
4027
4534
  def flush_outgoing_page_init
4028
- saved_location = @pending_location
4029
- saved_reload = @pending_reload
4030
- saved_traverse = @pending_history_traverse
4535
+ saved_location = @pending_location
4536
+ saved_reload = @pending_reload
4537
+ saved_traverse = @pending_history_traverse
4538
+ saved_frame_nav = @pending_frame_nav
4539
+ saved_frame_submit = @pending_frame_submit
4540
+ saved_frame_reload = @pending_frame_reload
4031
4541
  begin
4032
4542
  @runtime.run_loop_step(0, SETTLE_MAX_ITER_TASKS, yield_on_gen: false)
4033
4543
  rescue StandardError
@@ -4036,6 +4546,13 @@ module Capybara
4036
4546
  @pending_location = saved_location
4037
4547
  @pending_reload = saved_reload
4038
4548
  @pending_history_traverse = saved_traverse
4549
+ # Don't let a frame-nav / frame-submit / frame-reload intent stashed by
4550
+ # an outgoing-page timer leak into the fresh page — its realm id belongs
4551
+ # to the discarded page (and a reused context id could mis-fire against
4552
+ # an unrelated realm on the new page).
4553
+ @pending_frame_nav = saved_frame_nav
4554
+ @pending_frame_submit = saved_frame_submit
4555
+ @pending_frame_reload = saved_frame_reload
4039
4556
  end
4040
4557
  end
4041
4558