capybara-simulated 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,11 +9,13 @@ require 'net/http'
9
9
  require 'openssl'
10
10
  require 'rack/mock'
11
11
  require 'securerandom'
12
+ require 'set'
12
13
  require 'socket'
13
14
  require 'thread'
14
15
  require 'time'
15
16
  require 'uri'
16
17
  require 'uri/idna' # WHATWG/UTS46 domain-to-ASCII/Unicode (uri-idna gem)
18
+ require 'zlib'
17
19
  require_relative 'asset_cache'
18
20
  require_relative 'errors'
19
21
  require_relative 'stack_resolver'
@@ -54,6 +56,18 @@ module Capybara
54
56
  # `Last-Modified` per RFC 9111.
55
57
  @@asset_cache = AssetCache.new
56
58
 
59
+ # Opt-in: capture each request's author header names verbatim on the Rack env
60
+ # (`csim.raw_request_headers`) so the WPT .py-handler harness can replay them with
61
+ # exact casing / token chars (inspect-headers / echo-headers). OFF for real app
62
+ # traffic — nothing there consumes the list, so it would only allocate per request.
63
+ @@capture_raw_request_headers = false
64
+ def self.capture_raw_request_headers
65
+ @@capture_raw_request_headers
66
+ end
67
+ def self.capture_raw_request_headers=(v)
68
+ @@capture_raw_request_headers = v
69
+ end
70
+
57
71
  attr_writer :timers_active
58
72
 
59
73
  # The Driver's handle for the window this Browser backs (set right after
@@ -271,6 +285,7 @@ module Capybara
271
285
  @ticking = false
272
286
  @history = []
273
287
  @history_idx = -1
288
+ @cors_preflight_cache = {}
274
289
  @modal_handlers = []
275
290
  # Geolocation override (CDP-ish). nil = no override configured →
276
291
  # navigator.geolocation reports POSITION_UNAVAILABLE. Ruby-backed so
@@ -2468,7 +2483,13 @@ module Capybara
2468
2483
  body << "--#{boundary}--\r\n"
2469
2484
  [body, "multipart/form-data; boundary=#{boundary}"]
2470
2485
  else
2471
- pairs = entries.map {|e| [e['name'].to_s, e['file'] ? e['filename'].to_s : e['value'].to_s] }
2486
+ # The urlencoded / text-plain encoders normalize CR/LF CRLF in each entry's
2487
+ # name and value (a file entry's filename is the value) — the entry list itself
2488
+ # stays raw, so normalization lives here, matching the JS encoders and real
2489
+ # browsers (newline-normalization.html).
2490
+ pairs = entries.map {|e|
2491
+ [normalize_form_newlines(e['name']), normalize_form_newlines(e['file'] ? e['filename'] : e['value'])]
2492
+ }
2472
2493
  if enctype == 'text/plain'
2473
2494
  [pairs.map {|name, value| "#{name}=#{value}\r\n" }.join, 'text/plain']
2474
2495
  else
@@ -2477,6 +2498,12 @@ module Capybara
2477
2498
  end
2478
2499
  end
2479
2500
 
2501
+ # HTML form-submission newline normalization: every lone CR, lone LF, and CRLF in an
2502
+ # entry name/value becomes a CRLF (the JS encoders' `normalizeNL` counterpart).
2503
+ def normalize_form_newlines(s)
2504
+ s.to_s.gsub(/\r\n?|\n/, "\r\n")
2505
+ end
2506
+
2480
2507
  # Resolve a threaded file entry's on-disk path via the `@file_picks` slot
2481
2508
  # recorded at `attach_file` time (handle/index). nil for a purely in-memory
2482
2509
  # `new File(['bytes'], …)` (no slot) — a CLASSIC (non-Turbo) submit then
@@ -2599,6 +2626,11 @@ module Capybara
2599
2626
  reset_frame_scope
2600
2627
  @history.clear
2601
2628
  @history_idx = -1
2629
+ @cors_preflight_cache = {} # CORS-preflight cache is per browsing context
2630
+ # A JS-driven history.back()/go() that scheduled a deferred traverse but
2631
+ # never drained (the page navigated away first) must not survive the reset
2632
+ # — otherwise the stale target replays against the NEXT page's fresh history.
2633
+ @pending_history_traverse = nil
2602
2634
  @file_picks = {} if @file_picks
2603
2635
  # Hand the live trace off to `@pending_trace` so an after-hook
2604
2636
  # running after `reset_session!` (Capybara's per-test teardown
@@ -3728,26 +3760,13 @@ module Capybara
3728
3760
  end
3729
3761
 
3730
3762
  def blob_resolve(url)
3731
- local = @blob_registry_lock.synchronize { @blob_registry[url.to_s] }
3732
- return local if local
3733
- # Not in this window's registry: a blob created in ANOTHER window/isolate is
3734
- # fetchable only from the SAME storage partition (cross-partition.https
3735
- # "fetched from a same-partition iframe" succeeds; a cross-partition fetch
3736
- # fails). Resolve the bytes cross-isolate from the creator and hand them back
3737
- # as base64 (resolveBlobBytes decodes them). The cross-isolate read enters the
3738
- # creator's V8 isolate, so only do it on the MAIN thread — a worker thread
3739
- # can't safely enter another isolate (so a same-partition WORKER fetch of a
3740
- # blob owned elsewhere isn't supported; a cross-partition one correctly fails).
3741
- # CAVEAT: bytes-only — resolveBlobBytes types a host-resolved blob as
3742
- # application/octet-stream (the cross-window path loses the Blob's `type`); no
3743
- # in-scope test reads the type on this path, and carrying it would change the
3744
- # __csim_blobResolve string protocol.
3745
- return nil unless @driver.respond_to?(:blob_partition_site_of) && @driver.respond_to?(:blob_bytes_for)
3746
- site = @driver.blob_partition_site_of(url.to_s)
3747
- return nil if site.nil? || site != blob_partition_site # unknown / revoked / cross-partition
3748
- return nil if Thread.current[:csim_worker_handle]
3749
- data = @driver.blob_bytes_for(url.to_s, self)
3750
- data && Base64.strict_encode64(data[:bytes].to_s.b)
3763
+ # A same-partition blob created in another window/isolate is spec-fetchable
3764
+ # cross-window, but resolving its bytes means a real-time cross-isolate read
3765
+ # (+ worker round-trips for the worker variants) that races the per-example
3766
+ # timeout under suite load — flaky in the gate. So we only resolve a blob from
3767
+ # THIS window's registry; the cross-window same-partition fetch is a backlog
3768
+ # item (cross-partition.https "fetched from a same-partition {iframe,worker}").
3769
+ @blob_registry_lock.synchronize { @blob_registry[url.to_s] }
3751
3770
  end
3752
3771
 
3753
3772
  # The SITE (scheme + registrable domain) of this window's top-level document.
@@ -4225,15 +4244,134 @@ module Capybara
4225
4244
  end
4226
4245
  end
4227
4246
 
4247
+ # Fetch caps a request at 20 redirects: the 21st is a network error (redirect-count).
4248
+ # The loop below runs one iteration PER dispatch, so it needs 20 redirect hops plus
4249
+ # the final response — MAX_FETCH_REDIRECTS + 1 iterations — to let exactly 20 succeed.
4228
4250
  MAX_FETCH_REDIRECTS = 20
4251
+ # Request cache modes that never READ the store (always hit the network), and modes that
4252
+ # serve a STORED response even when stale. Frozen so the hot rack_fetch path allocates no
4253
+ # throwaway arrays per hop (perf).
4254
+ CACHE_MODES_SKIP_READ = %w[no-store reload].freeze
4255
+ CACHE_MODES_SERVE_STALE = %w[force-cache only-if-cached].freeze
4256
+ # Fetch "bad port" blocklist (https://fetch.spec.whatwg.org/#port-blocking) —
4257
+ # ports tied to non-HTTP protocols a request must never reach. Frozen Set for
4258
+ # O(1) membership on the rack_fetch path.
4259
+ BAD_PORTS = Set[
4260
+ 0, 1, 7, 9, 11, 13, 15, 17, 19, 20, 21, 22, 23, 25, 37, 42, 43, 53, 69, 77,
4261
+ 79, 87, 95, 101, 102, 103, 104, 109, 110, 111, 113, 115, 117, 119, 123, 135,
4262
+ 137, 139, 143, 161, 179, 389, 427, 465, 512, 513, 514, 515, 526, 530, 531,
4263
+ 532, 540, 548, 554, 556, 563, 587, 601, 636, 989, 990, 993, 995, 1719, 1720,
4264
+ 1723, 2049, 3659, 4045, 4190, 5060, 5061, 6000, 6566, 6665, 6666, 6667, 6668,
4265
+ 6669, 6679, 6697, 10080
4266
+ ].freeze
4267
+
4268
+ REFERRER_POLICIES = %w[
4269
+ no-referrer no-referrer-when-downgrade origin origin-when-cross-origin
4270
+ same-origin strict-origin strict-origin-when-cross-origin unsafe-url
4271
+ ].freeze
4272
+
4273
+ # The `Referer` value a request carries under a Referrer-Policy — nil = send none
4274
+ # (https://w3c.github.io/webappsec-referrer-policy/#determine-requests-referrer).
4275
+ # `referrer_url` is the request's referrer (the initiating document); `target_url`
4276
+ # its destination. "full" is the referrer stripped of fragment + credentials;
4277
+ # "origin" is scheme://host[:port]/. An empty / unknown policy → the default
4278
+ # (strict-origin-when-cross-origin).
4279
+ def compute_referrer(policy, referrer_url, target_url)
4280
+ return nil if referrer_url.nil? || referrer_url.to_s.empty?
4281
+ policy = 'strict-origin-when-cross-origin' unless REFERRER_POLICIES.include?(policy)
4282
+ return nil if policy == 'no-referrer'
4283
+ # The referrer is almost always the (constant) document URL — memoise its parse
4284
+ # so the rack_fetch hot path doesn't re-parse it per request (rule 3).
4285
+ ref = parse_referrer_url(referrer_url)
4286
+ return nil unless ref && %w[http https].include?(ref.scheme)
4287
+ full = -> { u = ref.dup; u.fragment = nil; u.password = nil; u.user = nil; u.to_s }
4288
+ origin_only = -> {
4289
+ default_port = ref.scheme == 'https' ? 443 : 80
4290
+ port = ref.port && ref.port != default_port ? ":#{ref.port}" : ''
4291
+ "#{ref.scheme}://#{ref.host}#{port}/"
4292
+ }
4293
+ return full.call if policy == 'unsafe-url'
4294
+ return origin_only.call if policy == 'origin'
4295
+ # The remaining policies need the target to know same-origin / downgrade.
4296
+ tgt = (URI.parse(target_url) rescue nil)
4297
+ return nil unless tgt
4298
+ same_origin = ref.scheme == tgt.scheme && ref.host == tgt.host && ref.port == tgt.port
4299
+ downgrade = ref.scheme == 'https' && tgt.scheme == 'http'
4300
+ case policy
4301
+ when 'origin-when-cross-origin' then same_origin ? full.call : origin_only.call
4302
+ when 'same-origin' then same_origin ? full.call : nil
4303
+ when 'strict-origin' then downgrade ? nil : origin_only.call
4304
+ when 'no-referrer-when-downgrade' then downgrade ? nil : full.call
4305
+ when 'strict-origin-when-cross-origin' then same_origin ? full.call : (downgrade ? nil : origin_only.call)
4306
+ end
4307
+ end
4308
+
4309
+ # Parse a referrer URL, memoising the last one (the referrer is the document URL
4310
+ # for nearly every request, so this caches across the whole page's subresources).
4311
+ def parse_referrer_url(url)
4312
+ return @referrer_parsed if defined?(@referrer_parsed_for) && @referrer_parsed_for == url
4313
+ @referrer_parsed_for = url
4314
+ @referrer_parsed = (URI.parse(url) rescue nil)
4315
+ end
4316
+
4317
+ # Whether a request to `url_str` must be blocked as a Fetch "bad port". Cheap
4318
+ # pre-gate: only URLs whose authority carries an explicit `:<digit>` are parsed
4319
+ # (the vast majority don't), so the rack_fetch hot path — every asset / xhr /
4320
+ # fetch, cache hits included — skips URI.parse entirely.
4321
+ def bad_port?(url_str)
4322
+ return false unless url_str =~ %r{\A[a-z]+://[^/]*:\d}i
4323
+ port = URI.parse(url_str).port
4324
+ port && BAD_PORTS.include?(port)
4325
+ rescue URI::Error
4326
+ false
4327
+ end
4328
+
4229
4329
  # URLs we won't even try to route through Rack: anything that
4230
4330
  # isn't http(s) (data: / mailto: / about:) plus pseudo-tokens
4231
4331
  # like V8's `<snapshot>` that sourcemap libraries pull out of
4232
4332
  # error stacks and feed straight to `fetch()` / `xhr.open()`.
4233
- def rack_fetch(method, url, body, headers, redirect_mode, env_extras: nil)
4333
+ def rack_fetch(method, url, body, headers, redirect_mode, cors_mode = nil, credentials: 'same-origin', env_extras: nil, referrer_policy: nil, referrer: nil, cache_mode: 'default')
4334
+ # NB: a relative fetch/XHR URL is resolved against the document's API base URL
4335
+ # at OPEN time (XHR open() / fetch()), in JS, NOT here — resolving at send time
4336
+ # would wrongly pick up a `<base href>` inserted after open() (open-url-base
4337
+ # -inserted-after-open). So this resolves only against the document URL.
4234
4338
  target = resolve_against_current(url.to_s)
4235
4339
  return nil unless target.is_a?(String) && target.match?(%r{\Ahttps?://}i)
4236
- method = (method || 'GET').to_s.upcase
4340
+ # Fetch "port blocking" (https://fetch.spec.whatwg.org/#port-blocking): a
4341
+ # request to a blocked port is a network error before any connection —
4342
+ # fetch() rejects with TypeError, a sync XHR throws NetworkError
4343
+ # (request-bad-port). Re-checked per redirect hop below ("HTTP-redirect fetch"
4344
+ # re-runs the block), so a 3xx Location to a bad port is refused too.
4345
+ return nil if bad_port?(target)
4346
+ # CORS enforcement (preflight + Access-Control checks) applies only to cors_mode
4347
+ # 'cors' — sent by XHR and by fetch()'s default mode. fetch() also threads
4348
+ # 'no-cors' / 'same-origin' (mode semantics below), and a form-submission
4349
+ # navigation threads 'navigate'; other callers (sendBeacon, ESM, workers, the
4350
+ # internal asset GET) pass nil → no CORS and no mode semantics. The document's
4351
+ # origin is the request's origin; a different target origin is cross-origin.
4352
+ cors = cors_mode == 'cors'
4353
+ req_origin = cors ? url_origin(@current_url) : nil
4354
+ # Fetch request "mode" (fetch threads it; XHR is always 'cors'; a non-fetch/xhr
4355
+ # caller passes nil → no mode semantics, a plain 'basic' response). `no-cors`
4356
+ # filters a cross-origin response to opaque; `same-origin` makes a cross-origin
4357
+ # request a network error. `doc_origin` detects cross-origin for the response
4358
+ # TYPE regardless of whether CORS enforcement (cors) runs; `crossed` latches once
4359
+ # any hop leaves the document origin.
4360
+ no_cors_mode = cors_mode == 'no-cors'
4361
+ same_origin_mode = cors_mode == 'same-origin'
4362
+ # Only the real fetch request modes carry cross-origin semantics; a 'navigate'
4363
+ # (form submission) or a nil-mode internal caller gets a plain readable response.
4364
+ doc_origin = %w[cors no-cors same-origin].include?(cors_mode) ? url_origin(@current_url) : nil
4365
+ crossed = false
4366
+ # A request is "credentialed" (cookies + the credentialed CORS check) only in
4367
+ # `include` mode; `same-origin` (default) and `omit` are uncredentialed for the
4368
+ # CORS check, while the cookie decision below distinguishes all three.
4369
+ with_credentials = credentials == 'include'
4370
+ # Use the method's case AS GIVEN: the JS callers already applied the spec
4371
+ # normalization (XHR open() / Fetch upper-case the known methods, preserving
4372
+ # an unknown method's case — open-method-case-sensitive). Upper-casing here
4373
+ # would clobber a custom method like `xUNIcorn`.
4374
+ method = (method || 'GET').to_s
4237
4375
  redirected = false
4238
4376
  # JS-side base64-encodes Blob/File bodies (raw bytes survive
4239
4377
  # the engine's UTF-8 string boundary that way); decode before
@@ -4242,22 +4380,117 @@ module Capybara
4242
4380
  body = Base64.decode64(body.to_s)
4243
4381
  headers = headers.reject {|k, _| k == 'X-Csim-Body-B64' }
4244
4382
  end
4245
- MAX_FETCH_REDIRECTS.times do
4383
+ # The request's origin starts as the document origin; a cross-origin REDIRECT
4384
+ # taints it to an opaque origin (serialized "null") per Fetch "HTTP-redirect
4385
+ # fetch". `effective_origin` IS that origin — it's what the Origin header
4386
+ # carries and what the CORS check / preflight compare against from that hop on
4387
+ # ('null' once tainted, so the server must then allow 'null' or '*').
4388
+ effective_origin = req_origin
4389
+ # An author conditional (If-None-Match / …) means the caller is doing its own
4390
+ # revalidation, so the UA cache must step aside (computed once — the headers
4391
+ # carrying it survive every redirect hop unchanged).
4392
+ skip_cache = request_has_conditional_headers?(headers)
4393
+ ref_policy = referrer_policy # may be overridden per hop by a response Referrer-Policy
4394
+ # The referrer is stripped PROGRESSIVELY: each hop applies its (possibly
4395
+ # overridden) policy to the referrer the PREVIOUS hop sent, not to the original
4396
+ # document — so once a hop reduces it to an origin (or drops it), a later, laxer
4397
+ # policy can't widen it back (redirect-referrer-override). The initial source is
4398
+ # the request's referrer: an explicit `init.referrer` URL when given, else the
4399
+ # document URL ("client"); an empty referrer means no-referrer (compute_referrer
4400
+ # maps a blank source to nil).
4401
+ ref_source = referrer.nil? ? @current_url : referrer
4402
+ (MAX_FETCH_REDIRECTS + 1).times do
4246
4403
  t0 = @trace && Process.clock_gettime(Process::CLOCK_MONOTONIC)
4247
- # GET-only cache shortcut (RFC 9111). Fresh hit skip @app.call
4248
- # entirely; stale-but-revalidatable fall through with conditional
4249
- # headers added so the server can return 304.
4250
- cache_entry = method == 'GET' ? @@asset_cache.lookup(target) : nil
4251
- if cache_entry&.fresh?
4252
- # Cached static asset log headers/type/size but skip the (boring) body.
4404
+ # Cross-origin-ness for the request mode/type, latched across hops. Computed
4405
+ # BEFORE the cache so a cross-origin request never takes the cache fast path
4406
+ # (which would bypass the opaque filter / same-origin-mode error / cors type).
4407
+ crossed ||= !!(doc_origin && (effective_origin == 'null' || url_origin(target) != doc_origin))
4408
+ return nil if same_origin_mode && crossed # 'same-origin' mode forbids a cross-origin hop
4409
+ # HTTP cache (RFC 9111 + Fetch "HTTP-network-or-cache fetch"), gated by the request's
4410
+ # cache MODE. GET-only, same-origin (a cross-origin hop always redispatches so the mode
4411
+ # filtering below runs), and stepped aside when the author sent their own conditional.
4412
+ # - no-store / reload : never read the store — always hit the network, no conditional
4413
+ # - force-cache / only-if-cached : serve a stored response even when STALE, no revalidation
4414
+ # (only-if-cached with nothing stored is a network error)
4415
+ # - no-cache : always revalidate, even a fresh entry
4416
+ # - default : serve fresh; revalidate stale (fall through with conditionals)
4417
+ read_cache = method == 'GET' && !skip_cache && !crossed && !CACHE_MODES_SKIP_READ.include?(cache_mode)
4418
+ cache_entry = read_cache ? @@asset_cache.lookup(target) : nil
4419
+ serve_stored = cache_entry &&
4420
+ (CACHE_MODES_SERVE_STALE.include?(cache_mode) || (cache_entry.fresh? && cache_mode != 'no-cache'))
4421
+ if serve_stored
4422
+ if REDIRECT_STATUSES.include?(cache_entry.status.to_i)
4423
+ # A cached REDIRECT obeys the redirect mode exactly like a fresh one: `error` is a
4424
+ # network error, `manual` is an opaque-redirect, and `follow` follows it THROUGH
4425
+ # the cache — resolve the Location and continue so the next hop serves the cached
4426
+ # target (request-cache "uses cached … redirects"). only-if-cached / force-cache
4427
+ # reach this only same-origin GET (read_cache excludes cross-origin), so there's no
4428
+ # method rewrite / origin taint.
4429
+ raise StandardError, '[capybara-simulated] fetch: redirect blocked by redirect=error mode' if redirect_mode == 'error'
4430
+ if redirect_mode != 'follow'
4431
+ return response_hash(0, {}, '', target, false, type: 'opaqueredirect', body_null: true)
4432
+ end
4433
+ if (loc = redirect_location(cache_entry.status, cache_entry.headers))
4434
+ trace_network(method, target, cache_entry.status, headers, body, cache_entry.headers, nil, t0, true)
4435
+ redirected = true
4436
+ next_url = resolve_against(loc, target)
4437
+ return nil unless next_url.to_s.match?(%r{\Ahttps?://}i)
4438
+ target = carry_fragment(target, next_url)
4439
+ return nil if bad_port?(target) # a cached redirect to a blocked port is still a network error
4440
+ next
4441
+ end
4442
+ end
4443
+ # Cached asset — log headers/type/size but skip the (boring) body.
4253
4444
  trace_network(method, target, cache_entry.status, headers, body, cache_entry.headers, nil, t0, false)
4254
4445
  return response_hash(cache_entry.status, cache_entry.headers, cache_entry.body, target, redirected)
4255
4446
  end
4447
+ # only-if-cached forbids the network: no usable stored response → a network error.
4448
+ return nil if cache_mode == 'only-if-cached'
4256
4449
 
4257
4450
  env = Rack::MockRequest.env_for(target, method: method, input: body || '')
4451
+ env['REQUEST_METHOD'] = method # env_for upcases the method; restore the exact case (open-method-case-sensitive)
4452
+ # A GET/HEAD request carries no body, so it sends no Content-Length (env_for
4453
+ # always sets it to the input bytesize, i.e. 0). A POST/PUT with an empty body
4454
+ # keeps Content-Length: 0 (send-entity-body-none / -empty).
4455
+ env.delete('CONTENT_LENGTH') if %w[GET HEAD].include?(method.to_s.upcase)
4258
4456
  apply_request_headers(env, headers) if headers
4259
4457
  apply_request_headers(env, @@asset_cache.revalidation_headers(cache_entry)) if cache_entry
4260
- apply_default_request_env(env, referer: @current_url, force: false)
4458
+ # The Referer follows the request's Referrer-Policy (a redirect response can
4459
+ # override the policy for the next hop — see below). `hop_referer` also becomes
4460
+ # the source the NEXT hop strips from.
4461
+ hop_referer = compute_referrer(ref_policy, ref_source, target)
4462
+ apply_default_request_env(env, referer: hop_referer, force: false)
4463
+ # Whether this hop is cross-origin (cors only): a tainted (opaque) origin is
4464
+ # cross-origin to every real target; otherwise compare the target to the
4465
+ # document origin. Drives the Origin header, preflight, and the CORS check.
4466
+ cross_origin = cors && (effective_origin == 'null' || url_origin(target) != req_origin)
4467
+ # Fetch credentials mode decides cookie attachment, independent of the CORS
4468
+ # mode: `omit` never sends them; `include` always does; `same-origin` (default)
4469
+ # sends them only to a same-origin target — so an uncredentialed cross-origin
4470
+ # hop (cors OR no-cors) must not leak the document's cookies
4471
+ # (cors-redirect-credentials / cors-cookies). A navigation / internal caller has
4472
+ # no doc_origin, so it counts as same-origin and keeps them.
4473
+ hop_cross_origin = !!(doc_origin && (effective_origin == 'null' || url_origin(target) != doc_origin))
4474
+ send_cookies = credentials == 'include' || (credentials != 'omit' && !hop_cross_origin)
4475
+ env.delete('HTTP_COOKIE') unless send_cookies
4476
+ # A CORS request to a URL carrying credentials (`user:pass@`) is a network
4477
+ # error (access-control-and-redirects "user info" subtest).
4478
+ return nil if cross_origin && url_has_userinfo?(target)
4479
+ # CORS-preflight, re-evaluated PER HOP: a cross-origin non-simple request (a
4480
+ # non-safelisted method / header / Content-Type) must pass an OPTIONS preflight
4481
+ # first — so a same-origin request redirected cross-origin to an unsafe resource
4482
+ # is preflighted on the NEW origin (send-redirect-to-cors), not just an initially
4483
+ # cross-origin one (access-control-basic-get-fail-non-simple / preflight-*).
4484
+ if cross_origin && cors_unsafe_request?(method, headers)
4485
+ return nil unless cors_preflight_ok?(target, method, headers, effective_origin, with_credentials, hop_referer)
4486
+ end
4487
+ # Send the (effective) Origin — the UA owns this header — on a cors request when
4488
+ # the hop is cross-origin OR the method is not GET/HEAD (Fetch appends Origin to
4489
+ # every non-GET/HEAD request, so a same-origin POST carries it too). After a
4490
+ # cross-origin redirect the origin is the opaque "null".
4491
+ if cross_origin || (req_origin && !%w[GET HEAD].include?(method.to_s.upcase))
4492
+ env['HTTP_ORIGIN'] = effective_origin
4493
+ end
4261
4494
  env.merge!(env_extras) if env_extras
4262
4495
  status, resp_headers, resp_body = dispatch_rack_or_http(target, env, method: method, body: body)
4263
4496
  merge_set_cookie(resp_headers)
@@ -4267,23 +4500,119 @@ module Capybara
4267
4500
  @@asset_cache.refresh(cache_entry, resp_headers)
4268
4501
  return response_hash(cache_entry.status, cache_entry.headers, cache_entry.body, target, redirected)
4269
4502
  end
4270
- if redirect_mode != 'manual' && (loc = redirect_location(status, resp_headers))
4503
+ # Fetch "CORS check" runs on EVERY cross-origin response — including a 3xx the
4504
+ # UA is about to follow (a redirect whose response lacks a valid Access-Control
4505
+ # -Allow-Origin is itself a network error: access-control-and-redirects). A
4506
+ # credentialed request additionally forbids `*` and needs Allow-Credentials.
4507
+ if cross_origin && !cors_response_ok?(resp_headers, effective_origin, with_credentials)
4508
+ resp_body.close if resp_body.respond_to?(:close)
4509
+ return nil
4510
+ end
4511
+ # A redirect-status response in a NON-follow mode is handled without following,
4512
+ # keyed on the status ALONE (the Location is never parsed): `error` is a network
4513
+ # error; `manual` is an opaque-redirect filtered response (status 0, empty
4514
+ # statusText/headers, the ORIGINAL request URL, type 'opaqueredirect'). The CORS
4515
+ # check above runs first, so a cross-origin redirect that fails CORS is a network
4516
+ # error either way (redirect-mode / -location).
4517
+ if redirect_mode != 'follow' && REDIRECT_STATUSES.include?(status.to_i)
4518
+ resp_body.close if resp_body.respond_to?(:close)
4271
4519
  raise StandardError, '[capybara-simulated] fetch: redirect blocked by redirect=error mode' if redirect_mode == 'error'
4520
+ # A no-cors request may not even opaquely expose a CROSS-origin redirect — a
4521
+ # no-cors non-follow redirect to a cross-origin target is a network error,
4522
+ # while a same-origin one still yields an opaque-redirect.
4523
+ return nil if no_cors_mode && crossed
4524
+ return response_hash(0, {}, '', target, false, type: 'opaqueredirect', body_null: true)
4525
+ end
4526
+ if (loc = redirect_location(status, resp_headers))
4272
4527
  # Log this hop (3xx) before method/body are rewritten for the next.
4273
4528
  trace_network(method, target, status, headers, body, resp_headers, nil, t0, true)
4529
+ # Cache the redirect itself (a cacheable 3xx with freshness) BEFORE following it —
4530
+ # the follow does `next`, which would otherwise skip the store below — so a later
4531
+ # only-if-cached / force-cache request can follow the redirect chain from the cache
4532
+ # (request-cache "uses cached … redirects"). Same store gate as the terminal hop.
4533
+ @@asset_cache.store(target, status, resp_headers, '') if method == 'GET' && cache_mode != 'no-store' && !skip_cache
4274
4534
  redirected = true
4275
- preserve = [307, 308].include?(status)
4535
+ ref_source = hop_referer # the next hop strips from what THIS hop sent
4536
+ # A redirect response's Referrer-Policy overrides the policy for the next hop
4537
+ # (redirect-referrer-override): the last valid token of the header wins.
4538
+ if (rp = resp_headers['referrer-policy'] || resp_headers['Referrer-Policy'])
4539
+ tok = Array(rp).join(',').split(',').map(&:strip).reverse.find {|t| REFERRER_POLICIES.include?(t) }
4540
+ ref_policy = tok if tok
4541
+ end
4276
4542
  next_url = resolve_against(loc, target)
4543
+ # The UA only follows http(s) redirects: a Location that resolves to a
4544
+ # non-HTTP(S) URL (data:, an `invalidurl:` scheme, …) is a network error
4545
+ # (redirect-location data/invalid in follow mode).
4546
+ unless next_url.to_s.match?(%r{\Ahttps?://}i)
4547
+ resp_body.close if resp_body.respond_to?(:close)
4548
+ return nil
4549
+ end
4550
+ # A cross-origin redirect taints the request's origin to opaque ("null") only
4551
+ # once the request was ALREADY cross-origin (response tainting "cors", i.e.
4552
+ # `crossed`) and the hop changes origin — so a subsequent hop sends Origin: null
4553
+ # and the CORS check demands the server allow "null"/"*". The FIRST cross-origin
4554
+ # hop out of a same-origin request keeps the real origin (redirect-origin
4555
+ # "same origin to other origin" sends the document origin, not null).
4556
+ effective_origin = 'null' if cors && crossed && url_origin(next_url) != url_origin(target)
4277
4557
  target = carry_fragment(target, next_url)
4278
- method = 'GET' unless preserve
4279
- body = nil unless preserve
4558
+ if bad_port?(target) # a redirect to a blocked port is a network error too
4559
+ resp_body.close if resp_body.respond_to?(:close)
4560
+ return nil
4561
+ end
4562
+ # Fetch "HTTP-redirect fetch": the method changes to GET (dropping the
4563
+ # body + its Content-* headers) ONLY for 301/302 of a POST, or 303 of a
4564
+ # non-GET/HEAD. Otherwise method, body, and headers are preserved — so a
4565
+ # GET/HEAD redirected via 301/302/303 keeps its method and Content-Type,
4566
+ # and 307/308 always preserve (xhr send-redirect basics).
4567
+ up = method.to_s.upcase
4568
+ if ([301, 302].include?(status) && up == 'POST') || (status == 303 && !%w[GET HEAD].include?(up))
4569
+ method = 'GET'
4570
+ body = nil
4571
+ headers = headers.reject {|k, _| REDIRECT_DROPPED_HEADERS.include?(k.to_s.downcase) } if headers.is_a?(Hash)
4572
+ end
4280
4573
  resp_body.close if resp_body.respond_to?(:close)
4281
4574
  next
4282
4575
  end
4576
+ # A follow-mode redirect whose Location header IS present but EMPTY parses to the
4577
+ # request URL — a self-redirect that would loop until the redirect limit trips a
4578
+ # network error. redirect_location returns nil for it (empty ⇒ no followable
4579
+ # target, so navigation keeps rendering the 3xx), so recognize it here and fail
4580
+ # directly — fetch-only (redirect-empty-location follow mode).
4581
+ if REDIRECT_STATUSES.include?(status.to_i)
4582
+ raw_loc = resp_headers['location'] || resp_headers['Location']
4583
+ raw_loc = raw_loc.first if raw_loc.is_a?(Array)
4584
+ if raw_loc && raw_loc.to_s.empty?
4585
+ resp_body.close if resp_body.respond_to?(:close)
4586
+ return nil
4587
+ end
4588
+ end
4283
4589
  body_str = read_rack_body(resp_body)
4590
+ # A HEAD response, and a null-body status (204/205/304), have NO body — the UA
4591
+ # discards whatever the server sent and exposes response.body as null
4592
+ # (response-method HEAD; response-null-body). `null_body` flags it so the JS
4593
+ # Response reports a null body + empty text.
4594
+ null_body = method.to_s.upcase == 'HEAD' || NULL_BODY_STATUSES.include?(status.to_i)
4595
+ body_str = '' if null_body
4596
+ # The UA transparently decodes a Content-Encoding'd body (gzip/deflate); the
4597
+ # header stays, the bytes are inflated (response-data-gzip / -deflate).
4598
+ body_str = decode_content_encoding(body_str, resp_headers)
4599
+ # A cross-origin response only EXPOSES (getResponseHeader / getAllResponseHeaders)
4600
+ # the CORS-safelisted response headers plus those named in Access-Control-Expose
4601
+ # -Headers (`*` = all). content-type stays safelisted, so response decoding is
4602
+ # unaffected. (Filtered for script exposure only — trace / set-cookie / cache see
4603
+ # the full set.) The CORS check itself already ran above (incl. on 3xx hops).
4604
+ exposed_headers = cross_origin ? cors_exposed_headers(resp_headers) : resp_headers
4284
4605
  trace_network(method, target, status, headers, body, resp_headers, body_str, t0, false)
4285
- @@asset_cache.store(target, status, resp_headers, body_str) if method == 'GET'
4286
- return response_hash(status, resp_headers, body_str, target, redirected)
4606
+ # A no-store request must not write the cache (RFC 9111 §5.2.1.5); a request carrying
4607
+ # the author's own conditional bypasses the UA cache entirely (read AND write) it's
4608
+ # "treated similarly to no-store" (request-cache-default-conditional). Every other mode
4609
+ # (incl. reload, which refreshes it) stores a cacheable GET response.
4610
+ @@asset_cache.store(target, status, resp_headers, body_str) if method == 'GET' && cache_mode != 'no-store' && !skip_cache
4611
+ # A no-cors cross-origin response is OPAQUE: status 0, empty body, no exposed
4612
+ # headers, empty URL (cors-basic "Opaque filter"). Otherwise the type is 'cors'
4613
+ # for a cross-origin (CORS-allowed) response, else 'basic'.
4614
+ return response_hash(0, {}, '', '', false, type: 'opaque', body_null: true) if no_cors_mode && crossed
4615
+ return response_hash(status, exposed_headers, body_str, target, redirected, type: crossed ? 'cors' : 'basic', body_null: null_body)
4287
4616
  end
4288
4617
  raise StandardError, "[capybara-simulated] fetch exceeded #{MAX_FETCH_REDIRECTS} redirects"
4289
4618
  rescue StandardError => e
@@ -4304,7 +4633,7 @@ module Capybara
4304
4633
  return unless @trace
4305
4634
  ct = resp_headers && (resp_headers['content-type'] || resp_headers['Content-Type'])
4306
4635
  ct = ct.first if ct.is_a?(Array) # Rack 3 permits array-valued header fields
4307
- ct = ct.split(';', 2).first.strip if ct.is_a?(String)
4636
+ ct = ct.split(';', 2).first&.strip if ct.is_a?(String) # "" → split is [] → first is nil
4308
4637
  size = if resp_body
4309
4638
  resp_body.bytesize
4310
4639
  elsif (cl = resp_headers && (resp_headers['content-length'] || resp_headers['Content-Length']))
@@ -4345,7 +4674,13 @@ module Capybara
4345
4674
 
4346
4675
  def normalize_trace_headers(headers)
4347
4676
  return nil unless headers
4348
- headers.each_with_object({}) {|(k, v), out| out[k.to_s] = v.is_a?(Array) ? v.join(', ') : v.to_s }
4677
+ headers.each_with_object({}) do |(k, v), out|
4678
+ # `x-csim-status-text` is an internal sentinel carrying the HTTP reason
4679
+ # phrase (response_hash lifts it into statusText); it's never a real wire
4680
+ # header, so keep it out of the trace.
4681
+ next if k.to_s.downcase == 'x-csim-status-text'
4682
+ out[k.to_s] = v.is_a?(Array) ? v.join(', ') : v.to_s
4683
+ end
4349
4684
  end
4350
4685
 
4351
4686
  # CGI convention: `Content-Type` and `Content-Length` land in env
@@ -4356,7 +4691,14 @@ module Capybara
4356
4691
  # `@rails/request.js` never deserialise and the server reads an
4357
4692
  # empty params hash.
4358
4693
  def apply_request_headers(env, headers)
4694
+ # Preserve the author's exact header names (casing + token chars) alongside the
4695
+ # CGI-mangled HTTP_* keys: the Rack env upcases names and drops non-alphanumerics
4696
+ # (Status-URI → HTTP_STATUS_URI, a tchar-only name → an unrecoverable key), but a
4697
+ # .py echo handler (inspect-headers / echo-headers) reports the names verbatim.
4698
+ # run_py_handler reads this side list to emit the original names.
4699
+ raw = (env['csim.raw_request_headers'] ||= []) if @@capture_raw_request_headers
4359
4700
  headers.each {|k, v|
4701
+ raw << [k.to_s, v.to_s] if raw
4360
4702
  name = k.to_s.upcase.tr('-', '_')
4361
4703
  case name
4362
4704
  when 'CONTENT_TYPE', 'CONTENT_LENGTH' then env[name] = v.to_s
@@ -4375,9 +4717,13 @@ module Capybara
4375
4717
  # text body when `body_b64` is absent.
4376
4718
  TEXT_CONTENT_TYPE_PREFIXES = %w[text/ application/json application/javascript application/ecmascript application/xml image/svg+xml].freeze
4377
4719
 
4378
- def response_hash(status, headers, body, url, redirected)
4720
+ def response_hash(status, headers, body, url, redirected, type: 'basic', body_null: false)
4379
4721
  raw = body.to_s
4380
4722
  hdrs = stringify(headers)
4723
+ # A NUL in a header value is not a valid HTTP message; a real server can't
4724
+ # put it on the wire, so the fetch is a network error (nil → status 0 / a
4725
+ # thrown NetworkError for a sync XHR). See headers-normalize-response.
4726
+ return nil if hdrs.any? {|_, v| v.include?("\u0000") }
4381
4727
  is_text = text_response?(hdrs)
4382
4728
  # `body` crosses as TEXT — `responseText` semantics: the bytes decoded
4383
4729
  # as UTF-8 with invalid sequences replaced (a leading BOM selects the
@@ -4392,18 +4738,43 @@ module Capybara
4392
4738
  else
4393
4739
  RuntimeShared.utf8_text(raw)
4394
4740
  end
4741
+ # statusText = the HTTP reason phrase: a custom one carried on the internal
4742
+ # x-csim-status-text header (status.py), else the status code's standard
4743
+ # reason (xhr status/statusText tests). Strip the internal header either way.
4744
+ custom_reason = hdrs.delete('x-csim-status-text')
4745
+ # Rack::Utils::HTTP_STATUS_CODES values are ASCII-8BIT (binary) strings — the V8
4746
+ # bridge marshals a binary string as a byte array, not a JS string, so statusText
4747
+ # would arrive as [79,75] instead of "OK" (abort-during-loading reads statusText
4748
+ # on a static-file response). utf8_text re-tags + scrubs to a clean JS string, the
4749
+ # same path the body and every header value already take.
4750
+ reason = RuntimeShared.utf8_text(custom_reason || Rack::Utils::HTTP_STATUS_CODES[status.to_i] || '')
4751
+ # HTTP/2 has no reason phrase, so statusText is always the empty string there (a WPT
4752
+ # `.h2` test document's fetches run over h2). We don't model the h2 transport, so key
4753
+ # off the document URL — the same signal WPT uses to serve the resource over h2
4754
+ # (fetch/xhr status.h2 "statusText over H2 … should be the empty string").
4755
+ reason = '' if @current_url.to_s.include?('.h2.')
4395
4756
  out = {
4396
4757
  'status' => status,
4758
+ 'statusText' => reason,
4397
4759
  'headers' => hdrs,
4398
4760
  'body' => text,
4399
4761
  'url' => url,
4400
4762
  'redirected' => redirected,
4401
- 'type' => 'basic'
4763
+ 'type' => type
4402
4764
  }
4765
+ out['body_null'] = true if body_null # null-body status / HEAD → response.body is null
4403
4766
  # The BOM-detected encoding (if any) — a frame load pins its document's
4404
4767
  # characterSet to it (see __csimFrameWindow); highest-precedence signal.
4405
4768
  out['charset'] = bom_charset if bom_charset
4406
- out['body_b64'] = Base64.strict_encode64(raw) unless is_text
4769
+ # Hand the raw bytes to the (XHR) client UNLESS the response is pure-ASCII text.
4770
+ # ASCII decodes identically under every encoding — so responseText is already
4771
+ # correct from the UTF-8 `body`, and it round-trips byte-for-byte as an
4772
+ # ArrayBuffer/Blob. Any NON-ASCII body needs the bytes: a non-UTF-8 charset or an
4773
+ # XML-prolog / <meta charset>-sniffed encoding (responseText), or multibyte UTF-8
4774
+ # read as arraybuffer/blob — the client decodes them with the final encoding
4775
+ # (decodeResponseBytes). `ascii_only?` is a cheap C-level scan, so the dominant
4776
+ # pure-ASCII app JSON/HTML traffic keeps the fast path and pays no base64.
4777
+ out['body_b64'] = Base64.strict_encode64(raw) unless is_text && raw.ascii_only?
4407
4778
  out
4408
4779
  end
4409
4780
 
@@ -4479,6 +4850,34 @@ module Capybara
4479
4850
  buf
4480
4851
  end
4481
4852
 
4853
+ # Transparently decode a Content-Encoding'd response body (HTTP "content coding"):
4854
+ # gzip / x-gzip via Zlib.gunzip; deflate via zlib-wrapped inflate, falling back to
4855
+ # raw DEFLATE (the "deflate" coding is ambiguously used for both). Unknown codings
4856
+ # (e.g. br) and malformed data are left untouched — best-effort, like a browser that
4857
+ # would error, but we keep the bytes so the caller still sees a response.
4858
+ def decode_content_encoding(body, headers)
4859
+ return body if body.nil? || body.empty?
4860
+ raw = headers.find {|k, _| k.to_s.downcase == 'content-encoding' }&.last
4861
+ enc = (raw.is_a?(Array) ? raw.join(',') : raw.to_s).strip.downcase # Rack 3 may hand the value as an array
4862
+ # The decoded bytes re-enter the UTF-8 text pipeline the same as an
4863
+ # un-encoded body (read_rack_body yields UTF-8), so re-tag them — Zlib
4864
+ # output is ASCII-8BIT, which would otherwise marshal to V8 as a byte array.
4865
+ decoded =
4866
+ case enc
4867
+ when 'gzip', 'x-gzip' then Zlib.gunzip(body.b)
4868
+ when 'deflate'
4869
+ begin
4870
+ Zlib::Inflate.inflate(body.b)
4871
+ rescue Zlib::Error
4872
+ Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.b) # raw (header-less) DEFLATE
4873
+ end
4874
+ else return body
4875
+ end
4876
+ decoded.force_encoding('UTF-8')
4877
+ rescue Zlib::Error
4878
+ body
4879
+ end
4880
+
4482
4881
  # Defer the navigation: doing it from inside the running V8 call
4483
4882
  # would dispose the Context mid-call. tick_real_time drains
4484
4883
  # after the call returns. Same pattern as `__csimPendingFormSubmit`.
@@ -5555,18 +5954,40 @@ module Capybara
5555
5954
  end
5556
5955
 
5557
5956
  # Header names/values are TEXT (RFC 9110: field values are ASCII); Rack
5558
- # hands them over BINARY-tagged (see `RuntimeShared.utf8_text`).
5957
+ # hands them over BINARY-tagged (see `RuntimeShared.utf8_text`). Per-value HTTP
5958
+ # -whitespace normalization happens upstream, BEFORE duplicate values are
5959
+ # combined (WptRunner.combine_headers) — not here, where a combined value like
5960
+ # `", "` (two empty fields) would wrongly lose its trailing space. An
5961
+ # Array-valued header (a Rack app emitting a repeated field) is combined with
5962
+ # `, ` — the WHATWG "combine" separator getAllResponseHeaders exposes, matching
5963
+ # both real browsers and the harness's combine_headers.
5559
5964
  def stringify(headers)
5560
5965
  out = {}
5561
5966
  headers.each do |k, v|
5562
- out[k.to_s] = RuntimeShared.utf8_text(v.is_a?(Array) ? v.join(',') : v.to_s)
5967
+ out[k.to_s] = RuntimeShared.utf8_text(v.is_a?(Array) ? v.join(', ') : v.to_s)
5563
5968
  end
5564
5969
  out
5565
5970
  end
5566
5971
 
5972
+ # The Fetch "redirect status" set — ONLY these are followed. 300 (multiple
5973
+ # choice), 304 (not modified), 305/306 (deprecated) are NOT redirects: the 3xx
5974
+ # response is returned to the caller as-is (xhr send-redirect basics).
5975
+ REDIRECT_STATUSES = [301, 302, 303, 307, 308].freeze
5976
+ # Statuses whose response has no body (Fetch "null body status") — the body is dropped
5977
+ # and response.body is null (response-null-body). (101 is unreachable here.)
5978
+ NULL_BODY_STATUSES = [204, 205, 304].freeze
5979
+ # Request-body headers removed when a redirect nulls the body (method → GET).
5980
+ REDIRECT_DROPPED_HEADERS = %w[content-encoding content-language content-location content-type content-length].freeze
5567
5981
  def redirect_location(status, headers)
5568
- return nil unless (300..399).include?(status.to_i)
5569
- headers['location'] || headers['Location']
5982
+ return nil unless REDIRECT_STATUSES.include?(status.to_i)
5983
+ loc = headers['location'] || headers['Location']
5984
+ loc = loc.first if loc.is_a?(Array) # Rack 3 permits array-valued header fields
5985
+ # A blank (or absent) Location has no FOLLOWABLE target: an empty value parses back
5986
+ # to the current URL, so following it would just self-redirect. Return nil so a
5987
+ # caller renders the 3xx as-is rather than looping — the several navigation handlers
5988
+ # rely on this. (The fetch redirect loop recognizes a present-but-empty Location
5989
+ # separately and turns it into a network error per Fetch — see rack_fetch.)
5990
+ loc unless loc.to_s.empty?
5570
5991
  end
5571
5992
 
5572
5993
  def resolve_against_current(url, use_base: false)
@@ -5596,6 +6017,218 @@ module Capybara
5596
6017
  dom_call('__csimBaseHref').to_s
5597
6018
  end
5598
6019
 
6020
+ # Fetch "CORS-safelisted method" / "…request-header" / "…Content-Type". A request
6021
+ # is "simple" (no preflight) iff its method is safelisted AND every author header is
6022
+ # safelisted (Content-Type only for a urlencoded / multipart / text/plain value).
6023
+ CORS_SAFELISTED_METHODS = %w[GET HEAD POST].freeze
6024
+ CORS_SAFELISTED_HEADERS = %w[accept accept-language content-language content-type].freeze
6025
+ # RFC 7230 `token` (tchar+) — a valid HTTP method / field-name. Used to reject a
6026
+ # preflight whose Access-Control-Allow-Methods / -Headers carries a malformed value.
6027
+ HTTP_TOKEN = /\A[!#$%&'*+\-.^_`|~0-9A-Za-z]+\z/.freeze
6028
+ CORS_SAFELISTED_CTYPES = %w[application/x-www-form-urlencoded multipart/form-data text/plain].freeze
6029
+
6030
+ # The sorted, lowercased author header names that are NOT CORS-safelisted (a
6031
+ # non-safe Content-Type counts). These are echoed in Access-Control-Request-Headers
6032
+ # for the preflight and must be covered by Access-Control-Allow-Headers.
6033
+ def cors_unsafe_headers(headers)
6034
+ (headers || {}).filter_map {|k, v|
6035
+ name = k.to_s.downcase
6036
+ next if name.start_with?('x-csim') || name == 'content-length'
6037
+ if name == 'content-type'
6038
+ essence = v.to_s.split(';', 2).first.to_s.strip.downcase
6039
+ CORS_SAFELISTED_CTYPES.include?(essence) ? nil : name
6040
+ else
6041
+ CORS_SAFELISTED_HEADERS.include?(name) ? nil : name
6042
+ end
6043
+ }.uniq.sort
6044
+ end
6045
+
6046
+ def cors_unsafe_request?(method, headers)
6047
+ !CORS_SAFELISTED_METHODS.include?(method.to_s.upcase) || !cors_unsafe_headers(headers).empty?
6048
+ end
6049
+
6050
+ # Fetch "CORS check" on a cross-origin response: it must allow the request's
6051
+ # (effective) origin via Access-Control-Allow-Origin. A NON-credentialed request
6052
+ # accepts `*` or the exact origin; a CREDENTIALED one (withCredentials) forbids
6053
+ # `*` — the ACAO must be the exact origin AND Access-Control-Allow-Credentials
6054
+ # must be `true` (access-control-and-redirects-async-same-origin credentials cases).
6055
+ def cors_response_ok?(resp_headers, origin, credentialed)
6056
+ acao = cors_header(resp_headers, 'access-control-allow-origin')
6057
+ return false if acao.nil?
6058
+ if credentialed
6059
+ return false unless acao == origin
6060
+ cors_header(resp_headers, 'access-control-allow-credentials').to_s.downcase == 'true'
6061
+ else
6062
+ acao == '*' || acao == origin
6063
+ end
6064
+ end
6065
+
6066
+ # Whether a URL carries userinfo (`user[:password]@`). A CORS request to such a
6067
+ # URL is a network error (access-control-and-redirects "user info" subtest).
6068
+ def url_has_userinfo?(url)
6069
+ u = URI.parse(url.to_s)
6070
+ !u.userinfo.to_s.empty?
6071
+ rescue URI::InvalidURIError
6072
+ false
6073
+ end
6074
+
6075
+ # An author-set conditional header means the CALLER is doing its own revalidation,
6076
+ # so the UA cache must step aside: the request reaches the origin and the server's
6077
+ # own 304/200 decision is returned (send-conditional), not a cached hit.
6078
+ CONDITIONAL_REQUEST_HEADERS = %w[if-none-match if-modified-since if-match if-unmodified-since if-range].freeze
6079
+ def request_has_conditional_headers?(headers)
6080
+ headers.is_a?(Hash) && headers.any? {|k, _| CONDITIONAL_REQUEST_HEADERS.include?(k.to_s.downcase) }
6081
+ end
6082
+
6083
+ # Run the CORS preflight unless a cached result already covers this request (Fetch
6084
+ # "CORS-preflight cache"): a prior preflight to the same (origin, url) within its
6085
+ # Access-Control-Max-Age that allows this method + headers lets the actual request
6086
+ # skip the OPTIONS (access-control-basic-allow-preflight-cache). Returns false (=
6087
+ # network error) only when a fresh preflight is needed AND fails.
6088
+ def cors_preflight_ok?(target, method, headers, req_origin, credentialed, referer)
6089
+ return true if cors_preflight_cached?(target, req_origin, method, headers, credentialed)
6090
+ result = cors_run_preflight(target, method, headers, req_origin, credentialed, referer)
6091
+ return false unless result
6092
+ # Cache the grant for Max-Age seconds so a covered follow-up skips the preflight.
6093
+ # The key is (origin, url, credentialed): a credentialed grant (ACAO echoing the
6094
+ # origin, no `*` matching) can't cover an uncredentialed follow-up or vice versa,
6095
+ # so the two are cached apart. Expiry uses the REAL monotonic clock (not the
6096
+ # virtual one), so a test that virtual-sleeps past Max-Age to force a re-preflight
6097
+ # isn't caught yet.
6098
+ @cors_preflight_cache[[req_origin, target, credentialed]] = result.merge(stored_at: Process.clock_gettime(Process::CLOCK_MONOTONIC)) if result[:max_age].positive?
6099
+ true
6100
+ end
6101
+
6102
+ # Whether a cached preflight grant covers this request (not expired + method/headers
6103
+ # allowed). A method/header the cache doesn't cover — or an expired entry — forces a
6104
+ # fresh preflight (cache-invalidation-by-method / -header / -timeout).
6105
+ def cors_preflight_cached?(target, req_origin, method, headers, credentialed)
6106
+ entry = @cors_preflight_cache[[req_origin, target, credentialed]]
6107
+ return false unless entry
6108
+ return false if Process.clock_gettime(Process::CLOCK_MONOTONIC) - entry[:stored_at] >= entry[:max_age]
6109
+ cors_grant_allows?(entry[:methods], entry[:headers], method, cors_unsafe_headers(headers), credentialed)
6110
+ end
6111
+
6112
+ # `Authorization` is Fetch's sole "CORS non-wildcard request-header name": a preflight
6113
+ # `Access-Control-Allow-Headers: *` never covers it — it must be listed by name — even
6114
+ # for an uncredentialed request (cors-preflight "authorization not covered by wildcard").
6115
+ CORS_NON_WILDCARD_REQUEST_HEADERS = %w[authorization].freeze
6116
+
6117
+ # Does a preflight grant (its Access-Control-Allow-Methods / -Headers) cover this
6118
+ # request: the method is allowed / `*` / CORS-safelisted, and every unsafe header is
6119
+ # allowed / `*`. Shared by the fresh-preflight accept check and the cache-hit check.
6120
+ # For a CREDENTIALED request the wildcard loses its meaning — Fetch's "CORS-preflight
6121
+ # fetch" matches `*` against no method/header when credentials mode is include, so a
6122
+ # non-listed method or unsafe header is rejected (cors-preflight-star credentialed).
6123
+ def cors_grant_allows?(allow_methods, allow_headers, method, unsafe_headers, credentialed = false)
6124
+ # The method match is byte-CASE-SENSITIVE (Fetch normalizes the request method but
6125
+ # compares it verbatim against Access-Control-Allow-Methods): `delete` in the grant
6126
+ # does not cover a `DELETE` request. Safelisted GET/HEAD/POST pass regardless
6127
+ # (they're always normalized to upper-case) (cors-preflight-star method-case).
6128
+ m = method.to_s
6129
+ method_ok = allow_methods.include?(m) || CORS_SAFELISTED_METHODS.include?(m) || (!credentialed && allow_methods.include?('*'))
6130
+ return false unless method_ok
6131
+ wildcard_headers = !credentialed && allow_headers.include?('*')
6132
+ unsafe_headers.all? {|h|
6133
+ allow_headers.include?(h) || (wildcard_headers && !CORS_NON_WILDCARD_REQUEST_HEADERS.include?(h))
6134
+ }
6135
+ end
6136
+
6137
+ # Fetch "CORS-preflight fetch": send an OPTIONS with Access-Control-Request-Method
6138
+ # / -Headers + Origin; on success (ok-status, ACAO match, and the grant covers the
6139
+ # method + unsafe headers) return the grant {methods, headers, max_age} for the
6140
+ # cache, else nil. A credentialed preflight additionally requires the response to
6141
+ # allow credentials (ACAC:true) and forbids `*` in the origin/method/header grants.
6142
+ def cors_run_preflight(target, method, headers, req_origin, credentialed, referer)
6143
+ unsafe = cors_unsafe_headers(headers)
6144
+ env = Rack::MockRequest.env_for(target, method: 'OPTIONS')
6145
+ env['REQUEST_METHOD'] = 'OPTIONS'
6146
+ # The preflight's Referer is the request's referrer under its referrer policy —
6147
+ # the SAME value the actual request sends (computed by the caller), not the raw
6148
+ # document URL (cors-preflight-referrer).
6149
+ apply_default_request_env(env, referer: referer, force: false)
6150
+ # A CORS-preflight is a fetch, so it carries fetch's default `Accept: */*` (NOT
6151
+ # the navigation Accept apply_default_request_env sets) — some handlers reject a
6152
+ # preflight whose Accept isn't */* (preflight.py).
6153
+ env['HTTP_ACCEPT'] = '*/*'
6154
+ # A CORS-preflight is always uncredentialed — it carries no cookies, even when the
6155
+ # actual request that follows is credentialed.
6156
+ env.delete('HTTP_COOKIE')
6157
+ env['HTTP_ORIGIN'] = req_origin
6158
+ # Access-Control-Request-Method carries the request's (already-normalized) method
6159
+ # VERBATIM — `patch` stays `patch`, matching the byte-case-sensitive grant check.
6160
+ env['HTTP_ACCESS_CONTROL_REQUEST_METHOD'] = method.to_s
6161
+ env['HTTP_ACCESS_CONTROL_REQUEST_HEADERS'] = unsafe.join(',') unless unsafe.empty?
6162
+ status, ph, pbody = dispatch_rack_or_http(target, env, method: 'OPTIONS', body: nil)
6163
+ pbody.close if pbody.respond_to?(:close)
6164
+ return nil unless (200..299).include?(status.to_i)
6165
+ acao = cors_header(ph, 'access-control-allow-origin')
6166
+ # A credentialed preflight can't be allowed by the wildcard origin and must carry
6167
+ # Access-Control-Allow-Credentials: true (cors-preflight-star credentialed).
6168
+ return nil unless credentialed ? acao == req_origin : (acao == '*' || acao == req_origin)
6169
+ return nil if credentialed && cors_header(ph, 'access-control-allow-credentials') != 'true'
6170
+ allow_methods = cors_list(cors_header(ph, 'access-control-allow-methods'))
6171
+ allow_headers = cors_list(cors_header(ph, 'access-control-allow-headers')).map(&:downcase)
6172
+ # Fetch "extract header list values" fails when a grant contains a malformed token
6173
+ # (`Access-Control-Allow-Methods: Bad value` — a space isn't a tchar), and a failed
6174
+ # extraction is a network error (cors-preflight-response-validation). Methods and
6175
+ # header names are both HTTP tokens; `*` is a valid tchar so the wildcard passes.
6176
+ return nil unless (allow_methods + allow_headers).all? {|t| t.match?(HTTP_TOKEN) }
6177
+ return nil unless cors_grant_allows?(allow_methods, allow_headers, method, unsafe, credentialed)
6178
+ {methods: allow_methods, headers: allow_headers, max_age: cors_header(ph, 'access-control-max-age').to_i}
6179
+ end
6180
+
6181
+ # Fetch "CORS-safelisted response-header name" — always exposed to script for a
6182
+ # cross-origin response, without being listed in Access-Control-Expose-Headers.
6183
+ CORS_SAFELISTED_RESPONSE_HEADERS = %w[
6184
+ cache-control content-language content-length content-type expires last-modified pragma
6185
+ ].freeze
6186
+
6187
+ # The response headers a cross-origin "cors" response exposes to getResponseHeader /
6188
+ # getAllResponseHeaders: the CORS-safelisted set plus any named in Access-Control
6189
+ # -Expose-Headers (`*` exposes all — only valid without credentials, which these
6190
+ # cases don't use).
6191
+ def cors_exposed_headers(headers)
6192
+ # set-cookie / set-cookie2 are forbidden response-header names — NEVER exposed to
6193
+ # script, even under `Access-Control-Expose-Headers: *`. x-csim-status-text is our
6194
+ # internal reason-phrase sentinel (response_hash lifts it into statusText, which
6195
+ # IS exposed cross-origin, then strips it from the script-visible map), so it must
6196
+ # survive the filter.
6197
+ forbidden = %w[set-cookie set-cookie2]
6198
+ expose = cors_list(cors_header(headers, 'access-control-expose-headers')).map(&:downcase)
6199
+ return headers.reject {|k, _| forbidden.include?(k.to_s.downcase) } if expose.include?('*')
6200
+ allowed = CORS_SAFELISTED_RESPONSE_HEADERS + expose + ['x-csim-status-text']
6201
+ headers.select {|k, _| allowed.include?(k.to_s.downcase) }
6202
+ end
6203
+
6204
+ # Case-insensitive response-header lookup + comma-list split for the CORS checks.
6205
+ def cors_header(headers, name)
6206
+ pair = headers.find {|k, _| k.to_s.downcase == name }
6207
+ pair&.last.to_s
6208
+ end
6209
+
6210
+ def cors_list(value)
6211
+ value.to_s.split(',').map(&:strip).reject(&:empty?)
6212
+ end
6213
+
6214
+ # The origin of a URL — `scheme://host[:port]` with the default port (80/443)
6215
+ # elided — for the CORS same/cross-origin comparison. nil for a non-http(s) or
6216
+ # unparseable URL (about:blank / data: / a relative current_url) so CORS never
6217
+ # treats those as a comparable origin.
6218
+ def url_origin(url)
6219
+ u = URI.parse(url.to_s)
6220
+ return nil unless u.scheme && u.host && u.scheme.match?(/\Ahttps?\z/i)
6221
+ # An origin is (scheme, host, port) compared case-insensitively on scheme+host —
6222
+ # so canonicalize both to lowercase, else http://Example.com vs http://example.com
6223
+ # would mis-classify a same-origin request as cross-origin.
6224
+ scheme = u.scheme.downcase
6225
+ default = scheme == 'https' ? 443 : 80
6226
+ port = u.port && u.port != default ? ":#{u.port}" : ''
6227
+ "#{scheme}://#{u.host.downcase}#{port}"
6228
+ rescue URI::InvalidURIError
6229
+ nil
6230
+ end
6231
+
5599
6232
  def carry_fragment(from_url, to_url)
5600
6233
  from = URI.parse(from_url.to_s)
5601
6234
  to = URI.parse(to_url.to_s)