capybara-simulated 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/capybara/simulated/asset_cache.rb +15 -5
- data/lib/capybara/simulated/browser.rb +679 -46
- data/lib/capybara/simulated/driver.rb +13 -1
- data/lib/capybara/simulated/js/bridge.bundle.js +15586 -13715
- data/lib/capybara/simulated/runtime_shared.rb +1 -1
- data/lib/capybara/simulated/version.rb +1 -1
- data/vendor/js/vendor.bundle.js +12 -11
- metadata +1 -1
|
@@ -9,11 +9,13 @@ require 'net/http'
|
|
|
9
9
|
require 'openssl'
|
|
10
10
|
require 'rack/mock'
|
|
11
11
|
require 'securerandom'
|
|
12
|
+
require 'set'
|
|
12
13
|
require 'socket'
|
|
13
14
|
require 'thread'
|
|
14
15
|
require 'time'
|
|
15
16
|
require 'uri'
|
|
16
17
|
require 'uri/idna' # WHATWG/UTS46 domain-to-ASCII/Unicode (uri-idna gem)
|
|
18
|
+
require 'zlib'
|
|
17
19
|
require_relative 'asset_cache'
|
|
18
20
|
require_relative 'errors'
|
|
19
21
|
require_relative 'stack_resolver'
|
|
@@ -54,6 +56,18 @@ module Capybara
|
|
|
54
56
|
# `Last-Modified` per RFC 9111.
|
|
55
57
|
@@asset_cache = AssetCache.new
|
|
56
58
|
|
|
59
|
+
# Opt-in: capture each request's author header names verbatim on the Rack env
|
|
60
|
+
# (`csim.raw_request_headers`) so the WPT .py-handler harness can replay them with
|
|
61
|
+
# exact casing / token chars (inspect-headers / echo-headers). OFF for real app
|
|
62
|
+
# traffic — nothing there consumes the list, so it would only allocate per request.
|
|
63
|
+
@@capture_raw_request_headers = false
|
|
64
|
+
def self.capture_raw_request_headers
|
|
65
|
+
@@capture_raw_request_headers
|
|
66
|
+
end
|
|
67
|
+
def self.capture_raw_request_headers=(v)
|
|
68
|
+
@@capture_raw_request_headers = v
|
|
69
|
+
end
|
|
70
|
+
|
|
57
71
|
attr_writer :timers_active
|
|
58
72
|
|
|
59
73
|
# The Driver's handle for the window this Browser backs (set right after
|
|
@@ -271,6 +285,7 @@ module Capybara
|
|
|
271
285
|
@ticking = false
|
|
272
286
|
@history = []
|
|
273
287
|
@history_idx = -1
|
|
288
|
+
@cors_preflight_cache = {}
|
|
274
289
|
@modal_handlers = []
|
|
275
290
|
# Geolocation override (CDP-ish). nil = no override configured →
|
|
276
291
|
# navigator.geolocation reports POSITION_UNAVAILABLE. Ruby-backed so
|
|
@@ -2468,7 +2483,13 @@ module Capybara
|
|
|
2468
2483
|
body << "--#{boundary}--\r\n"
|
|
2469
2484
|
[body, "multipart/form-data; boundary=#{boundary}"]
|
|
2470
2485
|
else
|
|
2471
|
-
|
|
2486
|
+
# The urlencoded / text-plain encoders normalize CR/LF → CRLF in each entry's
|
|
2487
|
+
# name and value (a file entry's filename is the value) — the entry list itself
|
|
2488
|
+
# stays raw, so normalization lives here, matching the JS encoders and real
|
|
2489
|
+
# browsers (newline-normalization.html).
|
|
2490
|
+
pairs = entries.map {|e|
|
|
2491
|
+
[normalize_form_newlines(e['name']), normalize_form_newlines(e['file'] ? e['filename'] : e['value'])]
|
|
2492
|
+
}
|
|
2472
2493
|
if enctype == 'text/plain'
|
|
2473
2494
|
[pairs.map {|name, value| "#{name}=#{value}\r\n" }.join, 'text/plain']
|
|
2474
2495
|
else
|
|
@@ -2477,6 +2498,12 @@ module Capybara
|
|
|
2477
2498
|
end
|
|
2478
2499
|
end
|
|
2479
2500
|
|
|
2501
|
+
# HTML form-submission newline normalization: every lone CR, lone LF, and CRLF in an
|
|
2502
|
+
# entry name/value becomes a CRLF (the JS encoders' `normalizeNL` counterpart).
|
|
2503
|
+
def normalize_form_newlines(s)
|
|
2504
|
+
s.to_s.gsub(/\r\n?|\n/, "\r\n")
|
|
2505
|
+
end
|
|
2506
|
+
|
|
2480
2507
|
# Resolve a threaded file entry's on-disk path via the `@file_picks` slot
|
|
2481
2508
|
# recorded at `attach_file` time (handle/index). nil for a purely in-memory
|
|
2482
2509
|
# `new File(['bytes'], …)` (no slot) — a CLASSIC (non-Turbo) submit then
|
|
@@ -2599,6 +2626,11 @@ module Capybara
|
|
|
2599
2626
|
reset_frame_scope
|
|
2600
2627
|
@history.clear
|
|
2601
2628
|
@history_idx = -1
|
|
2629
|
+
@cors_preflight_cache = {} # CORS-preflight cache is per browsing context
|
|
2630
|
+
# A JS-driven history.back()/go() that scheduled a deferred traverse but
|
|
2631
|
+
# never drained (the page navigated away first) must not survive the reset
|
|
2632
|
+
# — otherwise the stale target replays against the NEXT page's fresh history.
|
|
2633
|
+
@pending_history_traverse = nil
|
|
2602
2634
|
@file_picks = {} if @file_picks
|
|
2603
2635
|
# Hand the live trace off to `@pending_trace` so an after-hook
|
|
2604
2636
|
# running after `reset_session!` (Capybara's per-test teardown
|
|
@@ -3728,26 +3760,13 @@ module Capybara
|
|
|
3728
3760
|
end
|
|
3729
3761
|
|
|
3730
3762
|
def blob_resolve(url)
|
|
3731
|
-
|
|
3732
|
-
|
|
3733
|
-
#
|
|
3734
|
-
#
|
|
3735
|
-
#
|
|
3736
|
-
#
|
|
3737
|
-
|
|
3738
|
-
# creator's V8 isolate, so only do it on the MAIN thread — a worker thread
|
|
3739
|
-
# can't safely enter another isolate (so a same-partition WORKER fetch of a
|
|
3740
|
-
# blob owned elsewhere isn't supported; a cross-partition one correctly fails).
|
|
3741
|
-
# CAVEAT: bytes-only — resolveBlobBytes types a host-resolved blob as
|
|
3742
|
-
# application/octet-stream (the cross-window path loses the Blob's `type`); no
|
|
3743
|
-
# in-scope test reads the type on this path, and carrying it would change the
|
|
3744
|
-
# __csim_blobResolve string protocol.
|
|
3745
|
-
return nil unless @driver.respond_to?(:blob_partition_site_of) && @driver.respond_to?(:blob_bytes_for)
|
|
3746
|
-
site = @driver.blob_partition_site_of(url.to_s)
|
|
3747
|
-
return nil if site.nil? || site != blob_partition_site # unknown / revoked / cross-partition
|
|
3748
|
-
return nil if Thread.current[:csim_worker_handle]
|
|
3749
|
-
data = @driver.blob_bytes_for(url.to_s, self)
|
|
3750
|
-
data && Base64.strict_encode64(data[:bytes].to_s.b)
|
|
3763
|
+
# A same-partition blob created in another window/isolate is spec-fetchable
|
|
3764
|
+
# cross-window, but resolving its bytes means a real-time cross-isolate read
|
|
3765
|
+
# (+ worker round-trips for the worker variants) that races the per-example
|
|
3766
|
+
# timeout under suite load — flaky in the gate. So we only resolve a blob from
|
|
3767
|
+
# THIS window's registry; the cross-window same-partition fetch is a backlog
|
|
3768
|
+
# item (cross-partition.https "fetched from a same-partition {iframe,worker}").
|
|
3769
|
+
@blob_registry_lock.synchronize { @blob_registry[url.to_s] }
|
|
3751
3770
|
end
|
|
3752
3771
|
|
|
3753
3772
|
# The SITE (scheme + registrable domain) of this window's top-level document.
|
|
@@ -4225,15 +4244,134 @@ module Capybara
|
|
|
4225
4244
|
end
|
|
4226
4245
|
end
|
|
4227
4246
|
|
|
4247
|
+
# Fetch caps a request at 20 redirects: the 21st is a network error (redirect-count).
|
|
4248
|
+
# The loop below runs one iteration PER dispatch, so it needs 20 redirect hops plus
|
|
4249
|
+
# the final response — MAX_FETCH_REDIRECTS + 1 iterations — to let exactly 20 succeed.
|
|
4228
4250
|
MAX_FETCH_REDIRECTS = 20
|
|
4251
|
+
# Request cache modes that never READ the store (always hit the network), and modes that
|
|
4252
|
+
# serve a STORED response even when stale. Frozen so the hot rack_fetch path allocates no
|
|
4253
|
+
# throwaway arrays per hop (perf).
|
|
4254
|
+
CACHE_MODES_SKIP_READ = %w[no-store reload].freeze
|
|
4255
|
+
CACHE_MODES_SERVE_STALE = %w[force-cache only-if-cached].freeze
|
|
4256
|
+
# Fetch "bad port" blocklist (https://fetch.spec.whatwg.org/#port-blocking) —
|
|
4257
|
+
# ports tied to non-HTTP protocols a request must never reach. Frozen Set for
|
|
4258
|
+
# O(1) membership on the rack_fetch path.
|
|
4259
|
+
BAD_PORTS = Set[
|
|
4260
|
+
0, 1, 7, 9, 11, 13, 15, 17, 19, 20, 21, 22, 23, 25, 37, 42, 43, 53, 69, 77,
|
|
4261
|
+
79, 87, 95, 101, 102, 103, 104, 109, 110, 111, 113, 115, 117, 119, 123, 135,
|
|
4262
|
+
137, 139, 143, 161, 179, 389, 427, 465, 512, 513, 514, 515, 526, 530, 531,
|
|
4263
|
+
532, 540, 548, 554, 556, 563, 587, 601, 636, 989, 990, 993, 995, 1719, 1720,
|
|
4264
|
+
1723, 2049, 3659, 4045, 4190, 5060, 5061, 6000, 6566, 6665, 6666, 6667, 6668,
|
|
4265
|
+
6669, 6679, 6697, 10080
|
|
4266
|
+
].freeze
|
|
4267
|
+
|
|
4268
|
+
REFERRER_POLICIES = %w[
|
|
4269
|
+
no-referrer no-referrer-when-downgrade origin origin-when-cross-origin
|
|
4270
|
+
same-origin strict-origin strict-origin-when-cross-origin unsafe-url
|
|
4271
|
+
].freeze
|
|
4272
|
+
|
|
4273
|
+
# The `Referer` value a request carries under a Referrer-Policy — nil = send none
|
|
4274
|
+
# (https://w3c.github.io/webappsec-referrer-policy/#determine-requests-referrer).
|
|
4275
|
+
# `referrer_url` is the request's referrer (the initiating document); `target_url`
|
|
4276
|
+
# its destination. "full" is the referrer stripped of fragment + credentials;
|
|
4277
|
+
# "origin" is scheme://host[:port]/. An empty / unknown policy → the default
|
|
4278
|
+
# (strict-origin-when-cross-origin).
|
|
4279
|
+
def compute_referrer(policy, referrer_url, target_url)
|
|
4280
|
+
return nil if referrer_url.nil? || referrer_url.to_s.empty?
|
|
4281
|
+
policy = 'strict-origin-when-cross-origin' unless REFERRER_POLICIES.include?(policy)
|
|
4282
|
+
return nil if policy == 'no-referrer'
|
|
4283
|
+
# The referrer is almost always the (constant) document URL — memoise its parse
|
|
4284
|
+
# so the rack_fetch hot path doesn't re-parse it per request (rule 3).
|
|
4285
|
+
ref = parse_referrer_url(referrer_url)
|
|
4286
|
+
return nil unless ref && %w[http https].include?(ref.scheme)
|
|
4287
|
+
full = -> { u = ref.dup; u.fragment = nil; u.password = nil; u.user = nil; u.to_s }
|
|
4288
|
+
origin_only = -> {
|
|
4289
|
+
default_port = ref.scheme == 'https' ? 443 : 80
|
|
4290
|
+
port = ref.port && ref.port != default_port ? ":#{ref.port}" : ''
|
|
4291
|
+
"#{ref.scheme}://#{ref.host}#{port}/"
|
|
4292
|
+
}
|
|
4293
|
+
return full.call if policy == 'unsafe-url'
|
|
4294
|
+
return origin_only.call if policy == 'origin'
|
|
4295
|
+
# The remaining policies need the target to know same-origin / downgrade.
|
|
4296
|
+
tgt = (URI.parse(target_url) rescue nil)
|
|
4297
|
+
return nil unless tgt
|
|
4298
|
+
same_origin = ref.scheme == tgt.scheme && ref.host == tgt.host && ref.port == tgt.port
|
|
4299
|
+
downgrade = ref.scheme == 'https' && tgt.scheme == 'http'
|
|
4300
|
+
case policy
|
|
4301
|
+
when 'origin-when-cross-origin' then same_origin ? full.call : origin_only.call
|
|
4302
|
+
when 'same-origin' then same_origin ? full.call : nil
|
|
4303
|
+
when 'strict-origin' then downgrade ? nil : origin_only.call
|
|
4304
|
+
when 'no-referrer-when-downgrade' then downgrade ? nil : full.call
|
|
4305
|
+
when 'strict-origin-when-cross-origin' then same_origin ? full.call : (downgrade ? nil : origin_only.call)
|
|
4306
|
+
end
|
|
4307
|
+
end
|
|
4308
|
+
|
|
4309
|
+
# Parse a referrer URL, memoising the last one (the referrer is the document URL
|
|
4310
|
+
# for nearly every request, so this caches across the whole page's subresources).
|
|
4311
|
+
def parse_referrer_url(url)
|
|
4312
|
+
return @referrer_parsed if defined?(@referrer_parsed_for) && @referrer_parsed_for == url
|
|
4313
|
+
@referrer_parsed_for = url
|
|
4314
|
+
@referrer_parsed = (URI.parse(url) rescue nil)
|
|
4315
|
+
end
|
|
4316
|
+
|
|
4317
|
+
# Whether a request to `url_str` must be blocked as a Fetch "bad port". Cheap
|
|
4318
|
+
# pre-gate: only URLs whose authority carries an explicit `:<digit>` are parsed
|
|
4319
|
+
# (the vast majority don't), so the rack_fetch hot path — every asset / xhr /
|
|
4320
|
+
# fetch, cache hits included — skips URI.parse entirely.
|
|
4321
|
+
def bad_port?(url_str)
|
|
4322
|
+
return false unless url_str =~ %r{\A[a-z]+://[^/]*:\d}i
|
|
4323
|
+
port = URI.parse(url_str).port
|
|
4324
|
+
port && BAD_PORTS.include?(port)
|
|
4325
|
+
rescue URI::Error
|
|
4326
|
+
false
|
|
4327
|
+
end
|
|
4328
|
+
|
|
4229
4329
|
# URLs we won't even try to route through Rack: anything that
|
|
4230
4330
|
# isn't http(s) (data: / mailto: / about:) plus pseudo-tokens
|
|
4231
4331
|
# like V8's `<snapshot>` that sourcemap libraries pull out of
|
|
4232
4332
|
# error stacks and feed straight to `fetch()` / `xhr.open()`.
|
|
4233
|
-
def rack_fetch(method, url, body, headers, redirect_mode, env_extras: nil)
|
|
4333
|
+
def rack_fetch(method, url, body, headers, redirect_mode, cors_mode = nil, credentials: 'same-origin', env_extras: nil, referrer_policy: nil, referrer: nil, cache_mode: 'default')
|
|
4334
|
+
# NB: a relative fetch/XHR URL is resolved against the document's API base URL
|
|
4335
|
+
# at OPEN time (XHR open() / fetch()), in JS, NOT here — resolving at send time
|
|
4336
|
+
# would wrongly pick up a `<base href>` inserted after open() (open-url-base
|
|
4337
|
+
# -inserted-after-open). So this resolves only against the document URL.
|
|
4234
4338
|
target = resolve_against_current(url.to_s)
|
|
4235
4339
|
return nil unless target.is_a?(String) && target.match?(%r{\Ahttps?://}i)
|
|
4236
|
-
|
|
4340
|
+
# Fetch "port blocking" (https://fetch.spec.whatwg.org/#port-blocking): a
|
|
4341
|
+
# request to a blocked port is a network error before any connection —
|
|
4342
|
+
# fetch() rejects with TypeError, a sync XHR throws NetworkError
|
|
4343
|
+
# (request-bad-port). Re-checked per redirect hop below ("HTTP-redirect fetch"
|
|
4344
|
+
# re-runs the block), so a 3xx Location to a bad port is refused too.
|
|
4345
|
+
return nil if bad_port?(target)
|
|
4346
|
+
# CORS enforcement (preflight + Access-Control checks) applies only to cors_mode
|
|
4347
|
+
# 'cors' — sent by XHR and by fetch()'s default mode. fetch() also threads
|
|
4348
|
+
# 'no-cors' / 'same-origin' (mode semantics below), and a form-submission
|
|
4349
|
+
# navigation threads 'navigate'; other callers (sendBeacon, ESM, workers, the
|
|
4350
|
+
# internal asset GET) pass nil → no CORS and no mode semantics. The document's
|
|
4351
|
+
# origin is the request's origin; a different target origin is cross-origin.
|
|
4352
|
+
cors = cors_mode == 'cors'
|
|
4353
|
+
req_origin = cors ? url_origin(@current_url) : nil
|
|
4354
|
+
# Fetch request "mode" (fetch threads it; XHR is always 'cors'; a non-fetch/xhr
|
|
4355
|
+
# caller passes nil → no mode semantics, a plain 'basic' response). `no-cors`
|
|
4356
|
+
# filters a cross-origin response to opaque; `same-origin` makes a cross-origin
|
|
4357
|
+
# request a network error. `doc_origin` detects cross-origin for the response
|
|
4358
|
+
# TYPE regardless of whether CORS enforcement (cors) runs; `crossed` latches once
|
|
4359
|
+
# any hop leaves the document origin.
|
|
4360
|
+
no_cors_mode = cors_mode == 'no-cors'
|
|
4361
|
+
same_origin_mode = cors_mode == 'same-origin'
|
|
4362
|
+
# Only the real fetch request modes carry cross-origin semantics; a 'navigate'
|
|
4363
|
+
# (form submission) or a nil-mode internal caller gets a plain readable response.
|
|
4364
|
+
doc_origin = %w[cors no-cors same-origin].include?(cors_mode) ? url_origin(@current_url) : nil
|
|
4365
|
+
crossed = false
|
|
4366
|
+
# A request is "credentialed" (cookies + the credentialed CORS check) only in
|
|
4367
|
+
# `include` mode; `same-origin` (default) and `omit` are uncredentialed for the
|
|
4368
|
+
# CORS check, while the cookie decision below distinguishes all three.
|
|
4369
|
+
with_credentials = credentials == 'include'
|
|
4370
|
+
# Use the method's case AS GIVEN: the JS callers already applied the spec
|
|
4371
|
+
# normalization (XHR open() / Fetch upper-case the known methods, preserving
|
|
4372
|
+
# an unknown method's case — open-method-case-sensitive). Upper-casing here
|
|
4373
|
+
# would clobber a custom method like `xUNIcorn`.
|
|
4374
|
+
method = (method || 'GET').to_s
|
|
4237
4375
|
redirected = false
|
|
4238
4376
|
# JS-side base64-encodes Blob/File bodies (raw bytes survive
|
|
4239
4377
|
# the engine's UTF-8 string boundary that way); decode before
|
|
@@ -4242,22 +4380,117 @@ module Capybara
|
|
|
4242
4380
|
body = Base64.decode64(body.to_s)
|
|
4243
4381
|
headers = headers.reject {|k, _| k == 'X-Csim-Body-B64' }
|
|
4244
4382
|
end
|
|
4245
|
-
|
|
4383
|
+
# The request's origin starts as the document origin; a cross-origin REDIRECT
|
|
4384
|
+
# taints it to an opaque origin (serialized "null") per Fetch "HTTP-redirect
|
|
4385
|
+
# fetch". `effective_origin` IS that origin — it's what the Origin header
|
|
4386
|
+
# carries and what the CORS check / preflight compare against from that hop on
|
|
4387
|
+
# ('null' once tainted, so the server must then allow 'null' or '*').
|
|
4388
|
+
effective_origin = req_origin
|
|
4389
|
+
# An author conditional (If-None-Match / …) means the caller is doing its own
|
|
4390
|
+
# revalidation, so the UA cache must step aside (computed once — the headers
|
|
4391
|
+
# carrying it survive every redirect hop unchanged).
|
|
4392
|
+
skip_cache = request_has_conditional_headers?(headers)
|
|
4393
|
+
ref_policy = referrer_policy # may be overridden per hop by a response Referrer-Policy
|
|
4394
|
+
# The referrer is stripped PROGRESSIVELY: each hop applies its (possibly
|
|
4395
|
+
# overridden) policy to the referrer the PREVIOUS hop sent, not to the original
|
|
4396
|
+
# document — so once a hop reduces it to an origin (or drops it), a later, laxer
|
|
4397
|
+
# policy can't widen it back (redirect-referrer-override). The initial source is
|
|
4398
|
+
# the request's referrer: an explicit `init.referrer` URL when given, else the
|
|
4399
|
+
# document URL ("client"); an empty referrer means no-referrer (compute_referrer
|
|
4400
|
+
# maps a blank source to nil).
|
|
4401
|
+
ref_source = referrer.nil? ? @current_url : referrer
|
|
4402
|
+
(MAX_FETCH_REDIRECTS + 1).times do
|
|
4246
4403
|
t0 = @trace && Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
4247
|
-
#
|
|
4248
|
-
#
|
|
4249
|
-
#
|
|
4250
|
-
|
|
4251
|
-
if
|
|
4252
|
-
|
|
4404
|
+
# Cross-origin-ness for the request mode/type, latched across hops. Computed
|
|
4405
|
+
# BEFORE the cache so a cross-origin request never takes the cache fast path
|
|
4406
|
+
# (which would bypass the opaque filter / same-origin-mode error / cors type).
|
|
4407
|
+
crossed ||= !!(doc_origin && (effective_origin == 'null' || url_origin(target) != doc_origin))
|
|
4408
|
+
return nil if same_origin_mode && crossed # 'same-origin' mode forbids a cross-origin hop
|
|
4409
|
+
# HTTP cache (RFC 9111 + Fetch "HTTP-network-or-cache fetch"), gated by the request's
|
|
4410
|
+
# cache MODE. GET-only, same-origin (a cross-origin hop always redispatches so the mode
|
|
4411
|
+
# filtering below runs), and stepped aside when the author sent their own conditional.
|
|
4412
|
+
# - no-store / reload : never read the store — always hit the network, no conditional
|
|
4413
|
+
# - force-cache / only-if-cached : serve a stored response even when STALE, no revalidation
|
|
4414
|
+
# (only-if-cached with nothing stored is a network error)
|
|
4415
|
+
# - no-cache : always revalidate, even a fresh entry
|
|
4416
|
+
# - default : serve fresh; revalidate stale (fall through with conditionals)
|
|
4417
|
+
read_cache = method == 'GET' && !skip_cache && !crossed && !CACHE_MODES_SKIP_READ.include?(cache_mode)
|
|
4418
|
+
cache_entry = read_cache ? @@asset_cache.lookup(target) : nil
|
|
4419
|
+
serve_stored = cache_entry &&
|
|
4420
|
+
(CACHE_MODES_SERVE_STALE.include?(cache_mode) || (cache_entry.fresh? && cache_mode != 'no-cache'))
|
|
4421
|
+
if serve_stored
|
|
4422
|
+
if REDIRECT_STATUSES.include?(cache_entry.status.to_i)
|
|
4423
|
+
# A cached REDIRECT obeys the redirect mode exactly like a fresh one: `error` is a
|
|
4424
|
+
# network error, `manual` is an opaque-redirect, and `follow` follows it THROUGH
|
|
4425
|
+
# the cache — resolve the Location and continue so the next hop serves the cached
|
|
4426
|
+
# target (request-cache "uses cached … redirects"). only-if-cached / force-cache
|
|
4427
|
+
# reach this only same-origin GET (read_cache excludes cross-origin), so there's no
|
|
4428
|
+
# method rewrite / origin taint.
|
|
4429
|
+
raise StandardError, '[capybara-simulated] fetch: redirect blocked by redirect=error mode' if redirect_mode == 'error'
|
|
4430
|
+
if redirect_mode != 'follow'
|
|
4431
|
+
return response_hash(0, {}, '', target, false, type: 'opaqueredirect', body_null: true)
|
|
4432
|
+
end
|
|
4433
|
+
if (loc = redirect_location(cache_entry.status, cache_entry.headers))
|
|
4434
|
+
trace_network(method, target, cache_entry.status, headers, body, cache_entry.headers, nil, t0, true)
|
|
4435
|
+
redirected = true
|
|
4436
|
+
next_url = resolve_against(loc, target)
|
|
4437
|
+
return nil unless next_url.to_s.match?(%r{\Ahttps?://}i)
|
|
4438
|
+
target = carry_fragment(target, next_url)
|
|
4439
|
+
return nil if bad_port?(target) # a cached redirect to a blocked port is still a network error
|
|
4440
|
+
next
|
|
4441
|
+
end
|
|
4442
|
+
end
|
|
4443
|
+
# Cached asset — log headers/type/size but skip the (boring) body.
|
|
4253
4444
|
trace_network(method, target, cache_entry.status, headers, body, cache_entry.headers, nil, t0, false)
|
|
4254
4445
|
return response_hash(cache_entry.status, cache_entry.headers, cache_entry.body, target, redirected)
|
|
4255
4446
|
end
|
|
4447
|
+
# only-if-cached forbids the network: no usable stored response → a network error.
|
|
4448
|
+
return nil if cache_mode == 'only-if-cached'
|
|
4256
4449
|
|
|
4257
4450
|
env = Rack::MockRequest.env_for(target, method: method, input: body || '')
|
|
4451
|
+
env['REQUEST_METHOD'] = method # env_for upcases the method; restore the exact case (open-method-case-sensitive)
|
|
4452
|
+
# A GET/HEAD request carries no body, so it sends no Content-Length (env_for
|
|
4453
|
+
# always sets it to the input bytesize, i.e. 0). A POST/PUT with an empty body
|
|
4454
|
+
# keeps Content-Length: 0 (send-entity-body-none / -empty).
|
|
4455
|
+
env.delete('CONTENT_LENGTH') if %w[GET HEAD].include?(method.to_s.upcase)
|
|
4258
4456
|
apply_request_headers(env, headers) if headers
|
|
4259
4457
|
apply_request_headers(env, @@asset_cache.revalidation_headers(cache_entry)) if cache_entry
|
|
4260
|
-
|
|
4458
|
+
# The Referer follows the request's Referrer-Policy (a redirect response can
|
|
4459
|
+
# override the policy for the next hop — see below). `hop_referer` also becomes
|
|
4460
|
+
# the source the NEXT hop strips from.
|
|
4461
|
+
hop_referer = compute_referrer(ref_policy, ref_source, target)
|
|
4462
|
+
apply_default_request_env(env, referer: hop_referer, force: false)
|
|
4463
|
+
# Whether this hop is cross-origin (cors only): a tainted (opaque) origin is
|
|
4464
|
+
# cross-origin to every real target; otherwise compare the target to the
|
|
4465
|
+
# document origin. Drives the Origin header, preflight, and the CORS check.
|
|
4466
|
+
cross_origin = cors && (effective_origin == 'null' || url_origin(target) != req_origin)
|
|
4467
|
+
# Fetch credentials mode decides cookie attachment, independent of the CORS
|
|
4468
|
+
# mode: `omit` never sends them; `include` always does; `same-origin` (default)
|
|
4469
|
+
# sends them only to a same-origin target — so an uncredentialed cross-origin
|
|
4470
|
+
# hop (cors OR no-cors) must not leak the document's cookies
|
|
4471
|
+
# (cors-redirect-credentials / cors-cookies). A navigation / internal caller has
|
|
4472
|
+
# no doc_origin, so it counts as same-origin and keeps them.
|
|
4473
|
+
hop_cross_origin = !!(doc_origin && (effective_origin == 'null' || url_origin(target) != doc_origin))
|
|
4474
|
+
send_cookies = credentials == 'include' || (credentials != 'omit' && !hop_cross_origin)
|
|
4475
|
+
env.delete('HTTP_COOKIE') unless send_cookies
|
|
4476
|
+
# A CORS request to a URL carrying credentials (`user:pass@`) is a network
|
|
4477
|
+
# error (access-control-and-redirects "user info" subtest).
|
|
4478
|
+
return nil if cross_origin && url_has_userinfo?(target)
|
|
4479
|
+
# CORS-preflight, re-evaluated PER HOP: a cross-origin non-simple request (a
|
|
4480
|
+
# non-safelisted method / header / Content-Type) must pass an OPTIONS preflight
|
|
4481
|
+
# first — so a same-origin request redirected cross-origin to an unsafe resource
|
|
4482
|
+
# is preflighted on the NEW origin (send-redirect-to-cors), not just an initially
|
|
4483
|
+
# cross-origin one (access-control-basic-get-fail-non-simple / preflight-*).
|
|
4484
|
+
if cross_origin && cors_unsafe_request?(method, headers)
|
|
4485
|
+
return nil unless cors_preflight_ok?(target, method, headers, effective_origin, with_credentials, hop_referer)
|
|
4486
|
+
end
|
|
4487
|
+
# Send the (effective) Origin — the UA owns this header — on a cors request when
|
|
4488
|
+
# the hop is cross-origin OR the method is not GET/HEAD (Fetch appends Origin to
|
|
4489
|
+
# every non-GET/HEAD request, so a same-origin POST carries it too). After a
|
|
4490
|
+
# cross-origin redirect the origin is the opaque "null".
|
|
4491
|
+
if cross_origin || (req_origin && !%w[GET HEAD].include?(method.to_s.upcase))
|
|
4492
|
+
env['HTTP_ORIGIN'] = effective_origin
|
|
4493
|
+
end
|
|
4261
4494
|
env.merge!(env_extras) if env_extras
|
|
4262
4495
|
status, resp_headers, resp_body = dispatch_rack_or_http(target, env, method: method, body: body)
|
|
4263
4496
|
merge_set_cookie(resp_headers)
|
|
@@ -4267,23 +4500,119 @@ module Capybara
|
|
|
4267
4500
|
@@asset_cache.refresh(cache_entry, resp_headers)
|
|
4268
4501
|
return response_hash(cache_entry.status, cache_entry.headers, cache_entry.body, target, redirected)
|
|
4269
4502
|
end
|
|
4270
|
-
|
|
4503
|
+
# Fetch "CORS check" runs on EVERY cross-origin response — including a 3xx the
|
|
4504
|
+
# UA is about to follow (a redirect whose response lacks a valid Access-Control
|
|
4505
|
+
# -Allow-Origin is itself a network error: access-control-and-redirects). A
|
|
4506
|
+
# credentialed request additionally forbids `*` and needs Allow-Credentials.
|
|
4507
|
+
if cross_origin && !cors_response_ok?(resp_headers, effective_origin, with_credentials)
|
|
4508
|
+
resp_body.close if resp_body.respond_to?(:close)
|
|
4509
|
+
return nil
|
|
4510
|
+
end
|
|
4511
|
+
# A redirect-status response in a NON-follow mode is handled without following,
|
|
4512
|
+
# keyed on the status ALONE (the Location is never parsed): `error` is a network
|
|
4513
|
+
# error; `manual` is an opaque-redirect filtered response (status 0, empty
|
|
4514
|
+
# statusText/headers, the ORIGINAL request URL, type 'opaqueredirect'). The CORS
|
|
4515
|
+
# check above runs first, so a cross-origin redirect that fails CORS is a network
|
|
4516
|
+
# error either way (redirect-mode / -location).
|
|
4517
|
+
if redirect_mode != 'follow' && REDIRECT_STATUSES.include?(status.to_i)
|
|
4518
|
+
resp_body.close if resp_body.respond_to?(:close)
|
|
4271
4519
|
raise StandardError, '[capybara-simulated] fetch: redirect blocked by redirect=error mode' if redirect_mode == 'error'
|
|
4520
|
+
# A no-cors request may not even opaquely expose a CROSS-origin redirect — a
|
|
4521
|
+
# no-cors non-follow redirect to a cross-origin target is a network error,
|
|
4522
|
+
# while a same-origin one still yields an opaque-redirect.
|
|
4523
|
+
return nil if no_cors_mode && crossed
|
|
4524
|
+
return response_hash(0, {}, '', target, false, type: 'opaqueredirect', body_null: true)
|
|
4525
|
+
end
|
|
4526
|
+
if (loc = redirect_location(status, resp_headers))
|
|
4272
4527
|
# Log this hop (3xx) before method/body are rewritten for the next.
|
|
4273
4528
|
trace_network(method, target, status, headers, body, resp_headers, nil, t0, true)
|
|
4529
|
+
# Cache the redirect itself (a cacheable 3xx with freshness) BEFORE following it —
|
|
4530
|
+
# the follow does `next`, which would otherwise skip the store below — so a later
|
|
4531
|
+
# only-if-cached / force-cache request can follow the redirect chain from the cache
|
|
4532
|
+
# (request-cache "uses cached … redirects"). Same store gate as the terminal hop.
|
|
4533
|
+
@@asset_cache.store(target, status, resp_headers, '') if method == 'GET' && cache_mode != 'no-store' && !skip_cache
|
|
4274
4534
|
redirected = true
|
|
4275
|
-
|
|
4535
|
+
ref_source = hop_referer # the next hop strips from what THIS hop sent
|
|
4536
|
+
# A redirect response's Referrer-Policy overrides the policy for the next hop
|
|
4537
|
+
# (redirect-referrer-override): the last valid token of the header wins.
|
|
4538
|
+
if (rp = resp_headers['referrer-policy'] || resp_headers['Referrer-Policy'])
|
|
4539
|
+
tok = Array(rp).join(',').split(',').map(&:strip).reverse.find {|t| REFERRER_POLICIES.include?(t) }
|
|
4540
|
+
ref_policy = tok if tok
|
|
4541
|
+
end
|
|
4276
4542
|
next_url = resolve_against(loc, target)
|
|
4543
|
+
# The UA only follows http(s) redirects: a Location that resolves to a
|
|
4544
|
+
# non-HTTP(S) URL (data:, an `invalidurl:` scheme, …) is a network error
|
|
4545
|
+
# (redirect-location data/invalid in follow mode).
|
|
4546
|
+
unless next_url.to_s.match?(%r{\Ahttps?://}i)
|
|
4547
|
+
resp_body.close if resp_body.respond_to?(:close)
|
|
4548
|
+
return nil
|
|
4549
|
+
end
|
|
4550
|
+
# A cross-origin redirect taints the request's origin to opaque ("null") only
|
|
4551
|
+
# once the request was ALREADY cross-origin (response tainting "cors", i.e.
|
|
4552
|
+
# `crossed`) and the hop changes origin — so a subsequent hop sends Origin: null
|
|
4553
|
+
# and the CORS check demands the server allow "null"/"*". The FIRST cross-origin
|
|
4554
|
+
# hop out of a same-origin request keeps the real origin (redirect-origin
|
|
4555
|
+
# "same origin to other origin" sends the document origin, not null).
|
|
4556
|
+
effective_origin = 'null' if cors && crossed && url_origin(next_url) != url_origin(target)
|
|
4277
4557
|
target = carry_fragment(target, next_url)
|
|
4278
|
-
|
|
4279
|
-
|
|
4558
|
+
if bad_port?(target) # a redirect to a blocked port is a network error too
|
|
4559
|
+
resp_body.close if resp_body.respond_to?(:close)
|
|
4560
|
+
return nil
|
|
4561
|
+
end
|
|
4562
|
+
# Fetch "HTTP-redirect fetch": the method changes to GET (dropping the
|
|
4563
|
+
# body + its Content-* headers) ONLY for 301/302 of a POST, or 303 of a
|
|
4564
|
+
# non-GET/HEAD. Otherwise method, body, and headers are preserved — so a
|
|
4565
|
+
# GET/HEAD redirected via 301/302/303 keeps its method and Content-Type,
|
|
4566
|
+
# and 307/308 always preserve (xhr send-redirect basics).
|
|
4567
|
+
up = method.to_s.upcase
|
|
4568
|
+
if ([301, 302].include?(status) && up == 'POST') || (status == 303 && !%w[GET HEAD].include?(up))
|
|
4569
|
+
method = 'GET'
|
|
4570
|
+
body = nil
|
|
4571
|
+
headers = headers.reject {|k, _| REDIRECT_DROPPED_HEADERS.include?(k.to_s.downcase) } if headers.is_a?(Hash)
|
|
4572
|
+
end
|
|
4280
4573
|
resp_body.close if resp_body.respond_to?(:close)
|
|
4281
4574
|
next
|
|
4282
4575
|
end
|
|
4576
|
+
# A follow-mode redirect whose Location header IS present but EMPTY parses to the
|
|
4577
|
+
# request URL — a self-redirect that would loop until the redirect limit trips a
|
|
4578
|
+
# network error. redirect_location returns nil for it (empty ⇒ no followable
|
|
4579
|
+
# target, so navigation keeps rendering the 3xx), so recognize it here and fail
|
|
4580
|
+
# directly — fetch-only (redirect-empty-location follow mode).
|
|
4581
|
+
if REDIRECT_STATUSES.include?(status.to_i)
|
|
4582
|
+
raw_loc = resp_headers['location'] || resp_headers['Location']
|
|
4583
|
+
raw_loc = raw_loc.first if raw_loc.is_a?(Array)
|
|
4584
|
+
if raw_loc && raw_loc.to_s.empty?
|
|
4585
|
+
resp_body.close if resp_body.respond_to?(:close)
|
|
4586
|
+
return nil
|
|
4587
|
+
end
|
|
4588
|
+
end
|
|
4283
4589
|
body_str = read_rack_body(resp_body)
|
|
4590
|
+
# A HEAD response, and a null-body status (204/205/304), have NO body — the UA
|
|
4591
|
+
# discards whatever the server sent and exposes response.body as null
|
|
4592
|
+
# (response-method HEAD; response-null-body). `null_body` flags it so the JS
|
|
4593
|
+
# Response reports a null body + empty text.
|
|
4594
|
+
null_body = method.to_s.upcase == 'HEAD' || NULL_BODY_STATUSES.include?(status.to_i)
|
|
4595
|
+
body_str = '' if null_body
|
|
4596
|
+
# The UA transparently decodes a Content-Encoding'd body (gzip/deflate); the
|
|
4597
|
+
# header stays, the bytes are inflated (response-data-gzip / -deflate).
|
|
4598
|
+
body_str = decode_content_encoding(body_str, resp_headers)
|
|
4599
|
+
# A cross-origin response only EXPOSES (getResponseHeader / getAllResponseHeaders)
|
|
4600
|
+
# the CORS-safelisted response headers plus those named in Access-Control-Expose
|
|
4601
|
+
# -Headers (`*` = all). content-type stays safelisted, so response decoding is
|
|
4602
|
+
# unaffected. (Filtered for script exposure only — trace / set-cookie / cache see
|
|
4603
|
+
# the full set.) The CORS check itself already ran above (incl. on 3xx hops).
|
|
4604
|
+
exposed_headers = cross_origin ? cors_exposed_headers(resp_headers) : resp_headers
|
|
4284
4605
|
trace_network(method, target, status, headers, body, resp_headers, body_str, t0, false)
|
|
4285
|
-
|
|
4286
|
-
|
|
4606
|
+
# A no-store request must not write the cache (RFC 9111 §5.2.1.5); a request carrying
|
|
4607
|
+
# the author's own conditional bypasses the UA cache entirely (read AND write) — it's
|
|
4608
|
+
# "treated similarly to no-store" (request-cache-default-conditional). Every other mode
|
|
4609
|
+
# (incl. reload, which refreshes it) stores a cacheable GET response.
|
|
4610
|
+
@@asset_cache.store(target, status, resp_headers, body_str) if method == 'GET' && cache_mode != 'no-store' && !skip_cache
|
|
4611
|
+
# A no-cors cross-origin response is OPAQUE: status 0, empty body, no exposed
|
|
4612
|
+
# headers, empty URL (cors-basic "Opaque filter"). Otherwise the type is 'cors'
|
|
4613
|
+
# for a cross-origin (CORS-allowed) response, else 'basic'.
|
|
4614
|
+
return response_hash(0, {}, '', '', false, type: 'opaque', body_null: true) if no_cors_mode && crossed
|
|
4615
|
+
return response_hash(status, exposed_headers, body_str, target, redirected, type: crossed ? 'cors' : 'basic', body_null: null_body)
|
|
4287
4616
|
end
|
|
4288
4617
|
raise StandardError, "[capybara-simulated] fetch exceeded #{MAX_FETCH_REDIRECTS} redirects"
|
|
4289
4618
|
rescue StandardError => e
|
|
@@ -4304,7 +4633,7 @@ module Capybara
|
|
|
4304
4633
|
return unless @trace
|
|
4305
4634
|
ct = resp_headers && (resp_headers['content-type'] || resp_headers['Content-Type'])
|
|
4306
4635
|
ct = ct.first if ct.is_a?(Array) # Rack 3 permits array-valued header fields
|
|
4307
|
-
ct = ct.split(';', 2).first
|
|
4636
|
+
ct = ct.split(';', 2).first&.strip if ct.is_a?(String) # "" → split is [] → first is nil
|
|
4308
4637
|
size = if resp_body
|
|
4309
4638
|
resp_body.bytesize
|
|
4310
4639
|
elsif (cl = resp_headers && (resp_headers['content-length'] || resp_headers['Content-Length']))
|
|
@@ -4345,7 +4674,13 @@ module Capybara
|
|
|
4345
4674
|
|
|
4346
4675
|
def normalize_trace_headers(headers)
|
|
4347
4676
|
return nil unless headers
|
|
4348
|
-
headers.each_with_object({})
|
|
4677
|
+
headers.each_with_object({}) do |(k, v), out|
|
|
4678
|
+
# `x-csim-status-text` is an internal sentinel carrying the HTTP reason
|
|
4679
|
+
# phrase (response_hash lifts it into statusText); it's never a real wire
|
|
4680
|
+
# header, so keep it out of the trace.
|
|
4681
|
+
next if k.to_s.downcase == 'x-csim-status-text'
|
|
4682
|
+
out[k.to_s] = v.is_a?(Array) ? v.join(', ') : v.to_s
|
|
4683
|
+
end
|
|
4349
4684
|
end
|
|
4350
4685
|
|
|
4351
4686
|
# CGI convention: `Content-Type` and `Content-Length` land in env
|
|
@@ -4356,7 +4691,14 @@ module Capybara
|
|
|
4356
4691
|
# `@rails/request.js` never deserialise and the server reads an
|
|
4357
4692
|
# empty params hash.
|
|
4358
4693
|
def apply_request_headers(env, headers)
|
|
4694
|
+
# Preserve the author's exact header names (casing + token chars) alongside the
|
|
4695
|
+
# CGI-mangled HTTP_* keys: the Rack env upcases names and drops non-alphanumerics
|
|
4696
|
+
# (Status-URI → HTTP_STATUS_URI, a tchar-only name → an unrecoverable key), but a
|
|
4697
|
+
# .py echo handler (inspect-headers / echo-headers) reports the names verbatim.
|
|
4698
|
+
# run_py_handler reads this side list to emit the original names.
|
|
4699
|
+
raw = (env['csim.raw_request_headers'] ||= []) if @@capture_raw_request_headers
|
|
4359
4700
|
headers.each {|k, v|
|
|
4701
|
+
raw << [k.to_s, v.to_s] if raw
|
|
4360
4702
|
name = k.to_s.upcase.tr('-', '_')
|
|
4361
4703
|
case name
|
|
4362
4704
|
when 'CONTENT_TYPE', 'CONTENT_LENGTH' then env[name] = v.to_s
|
|
@@ -4375,9 +4717,13 @@ module Capybara
|
|
|
4375
4717
|
# text body when `body_b64` is absent.
|
|
4376
4718
|
TEXT_CONTENT_TYPE_PREFIXES = %w[text/ application/json application/javascript application/ecmascript application/xml image/svg+xml].freeze
|
|
4377
4719
|
|
|
4378
|
-
def response_hash(status, headers, body, url, redirected)
|
|
4720
|
+
def response_hash(status, headers, body, url, redirected, type: 'basic', body_null: false)
|
|
4379
4721
|
raw = body.to_s
|
|
4380
4722
|
hdrs = stringify(headers)
|
|
4723
|
+
# A NUL in a header value is not a valid HTTP message; a real server can't
|
|
4724
|
+
# put it on the wire, so the fetch is a network error (nil → status 0 / a
|
|
4725
|
+
# thrown NetworkError for a sync XHR). See headers-normalize-response.
|
|
4726
|
+
return nil if hdrs.any? {|_, v| v.include?("\u0000") }
|
|
4381
4727
|
is_text = text_response?(hdrs)
|
|
4382
4728
|
# `body` crosses as TEXT — `responseText` semantics: the bytes decoded
|
|
4383
4729
|
# as UTF-8 with invalid sequences replaced (a leading BOM selects the
|
|
@@ -4392,18 +4738,43 @@ module Capybara
|
|
|
4392
4738
|
else
|
|
4393
4739
|
RuntimeShared.utf8_text(raw)
|
|
4394
4740
|
end
|
|
4741
|
+
# statusText = the HTTP reason phrase: a custom one carried on the internal
|
|
4742
|
+
# x-csim-status-text header (status.py), else the status code's standard
|
|
4743
|
+
# reason (xhr status/statusText tests). Strip the internal header either way.
|
|
4744
|
+
custom_reason = hdrs.delete('x-csim-status-text')
|
|
4745
|
+
# Rack::Utils::HTTP_STATUS_CODES values are ASCII-8BIT (binary) strings — the V8
|
|
4746
|
+
# bridge marshals a binary string as a byte array, not a JS string, so statusText
|
|
4747
|
+
# would arrive as [79,75] instead of "OK" (abort-during-loading reads statusText
|
|
4748
|
+
# on a static-file response). utf8_text re-tags + scrubs to a clean JS string, the
|
|
4749
|
+
# same path the body and every header value already take.
|
|
4750
|
+
reason = RuntimeShared.utf8_text(custom_reason || Rack::Utils::HTTP_STATUS_CODES[status.to_i] || '')
|
|
4751
|
+
# HTTP/2 has no reason phrase, so statusText is always the empty string there (a WPT
|
|
4752
|
+
# `.h2` test document's fetches run over h2). We don't model the h2 transport, so key
|
|
4753
|
+
# off the document URL — the same signal WPT uses to serve the resource over h2
|
|
4754
|
+
# (fetch/xhr status.h2 "statusText over H2 … should be the empty string").
|
|
4755
|
+
reason = '' if @current_url.to_s.include?('.h2.')
|
|
4395
4756
|
out = {
|
|
4396
4757
|
'status' => status,
|
|
4758
|
+
'statusText' => reason,
|
|
4397
4759
|
'headers' => hdrs,
|
|
4398
4760
|
'body' => text,
|
|
4399
4761
|
'url' => url,
|
|
4400
4762
|
'redirected' => redirected,
|
|
4401
|
-
'type' =>
|
|
4763
|
+
'type' => type
|
|
4402
4764
|
}
|
|
4765
|
+
out['body_null'] = true if body_null # null-body status / HEAD → response.body is null
|
|
4403
4766
|
# The BOM-detected encoding (if any) — a frame load pins its document's
|
|
4404
4767
|
# characterSet to it (see __csimFrameWindow); highest-precedence signal.
|
|
4405
4768
|
out['charset'] = bom_charset if bom_charset
|
|
4406
|
-
|
|
4769
|
+
# Hand the raw bytes to the (XHR) client UNLESS the response is pure-ASCII text.
|
|
4770
|
+
# ASCII decodes identically under every encoding — so responseText is already
|
|
4771
|
+
# correct from the UTF-8 `body`, and it round-trips byte-for-byte as an
|
|
4772
|
+
# ArrayBuffer/Blob. Any NON-ASCII body needs the bytes: a non-UTF-8 charset or an
|
|
4773
|
+
# XML-prolog / <meta charset>-sniffed encoding (responseText), or multibyte UTF-8
|
|
4774
|
+
# read as arraybuffer/blob — the client decodes them with the final encoding
|
|
4775
|
+
# (decodeResponseBytes). `ascii_only?` is a cheap C-level scan, so the dominant
|
|
4776
|
+
# pure-ASCII app JSON/HTML traffic keeps the fast path and pays no base64.
|
|
4777
|
+
out['body_b64'] = Base64.strict_encode64(raw) unless is_text && raw.ascii_only?
|
|
4407
4778
|
out
|
|
4408
4779
|
end
|
|
4409
4780
|
|
|
@@ -4479,6 +4850,34 @@ module Capybara
|
|
|
4479
4850
|
buf
|
|
4480
4851
|
end
|
|
4481
4852
|
|
|
4853
|
+
# Transparently decode a Content-Encoding'd response body (HTTP "content coding"):
|
|
4854
|
+
# gzip / x-gzip via Zlib.gunzip; deflate via zlib-wrapped inflate, falling back to
|
|
4855
|
+
# raw DEFLATE (the "deflate" coding is ambiguously used for both). Unknown codings
|
|
4856
|
+
# (e.g. br) and malformed data are left untouched — best-effort, like a browser that
|
|
4857
|
+
# would error, but we keep the bytes so the caller still sees a response.
|
|
4858
|
+
def decode_content_encoding(body, headers)
|
|
4859
|
+
return body if body.nil? || body.empty?
|
|
4860
|
+
raw = headers.find {|k, _| k.to_s.downcase == 'content-encoding' }&.last
|
|
4861
|
+
enc = (raw.is_a?(Array) ? raw.join(',') : raw.to_s).strip.downcase # Rack 3 may hand the value as an array
|
|
4862
|
+
# The decoded bytes re-enter the UTF-8 text pipeline the same as an
|
|
4863
|
+
# un-encoded body (read_rack_body yields UTF-8), so re-tag them — Zlib
|
|
4864
|
+
# output is ASCII-8BIT, which would otherwise marshal to V8 as a byte array.
|
|
4865
|
+
decoded =
|
|
4866
|
+
case enc
|
|
4867
|
+
when 'gzip', 'x-gzip' then Zlib.gunzip(body.b)
|
|
4868
|
+
when 'deflate'
|
|
4869
|
+
begin
|
|
4870
|
+
Zlib::Inflate.inflate(body.b)
|
|
4871
|
+
rescue Zlib::Error
|
|
4872
|
+
Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.b) # raw (header-less) DEFLATE
|
|
4873
|
+
end
|
|
4874
|
+
else return body
|
|
4875
|
+
end
|
|
4876
|
+
decoded.force_encoding('UTF-8')
|
|
4877
|
+
rescue Zlib::Error
|
|
4878
|
+
body
|
|
4879
|
+
end
|
|
4880
|
+
|
|
4482
4881
|
# Defer the navigation: doing it from inside the running V8 call
|
|
4483
4882
|
# would dispose the Context mid-call. tick_real_time drains
|
|
4484
4883
|
# after the call returns. Same pattern as `__csimPendingFormSubmit`.
|
|
@@ -5555,18 +5954,40 @@ module Capybara
|
|
|
5555
5954
|
end
|
|
5556
5955
|
|
|
5557
5956
|
# Header names/values are TEXT (RFC 9110: field values are ASCII); Rack
|
|
5558
|
-
# hands them over BINARY-tagged (see `RuntimeShared.utf8_text`).
|
|
5957
|
+
# hands them over BINARY-tagged (see `RuntimeShared.utf8_text`). Per-value HTTP
|
|
5958
|
+
# -whitespace normalization happens upstream, BEFORE duplicate values are
|
|
5959
|
+
# combined (WptRunner.combine_headers) — not here, where a combined value like
|
|
5960
|
+
# `", "` (two empty fields) would wrongly lose its trailing space. An
|
|
5961
|
+
# Array-valued header (a Rack app emitting a repeated field) is combined with
|
|
5962
|
+
# `, ` — the WHATWG "combine" separator getAllResponseHeaders exposes, matching
|
|
5963
|
+
# both real browsers and the harness's combine_headers.
|
|
5559
5964
|
def stringify(headers)
|
|
5560
5965
|
out = {}
|
|
5561
5966
|
headers.each do |k, v|
|
|
5562
|
-
out[k.to_s] = RuntimeShared.utf8_text(v.is_a?(Array) ? v.join(',') : v.to_s)
|
|
5967
|
+
out[k.to_s] = RuntimeShared.utf8_text(v.is_a?(Array) ? v.join(', ') : v.to_s)
|
|
5563
5968
|
end
|
|
5564
5969
|
out
|
|
5565
5970
|
end
|
|
5566
5971
|
|
|
5972
|
+
# The Fetch "redirect status" set — ONLY these are followed. 300 (multiple
|
|
5973
|
+
# choice), 304 (not modified), 305/306 (deprecated) are NOT redirects: the 3xx
|
|
5974
|
+
# response is returned to the caller as-is (xhr send-redirect basics).
|
|
5975
|
+
REDIRECT_STATUSES = [301, 302, 303, 307, 308].freeze
|
|
5976
|
+
# Statuses whose response has no body (Fetch "null body status") — the body is dropped
|
|
5977
|
+
# and response.body is null (response-null-body). (101 is unreachable here.)
|
|
5978
|
+
NULL_BODY_STATUSES = [204, 205, 304].freeze
|
|
5979
|
+
# Request-body headers removed when a redirect nulls the body (method → GET).
|
|
5980
|
+
REDIRECT_DROPPED_HEADERS = %w[content-encoding content-language content-location content-type content-length].freeze
|
|
5567
5981
|
def redirect_location(status, headers)
|
|
5568
|
-
return nil unless
|
|
5569
|
-
headers['location'] || headers['Location']
|
|
5982
|
+
return nil unless REDIRECT_STATUSES.include?(status.to_i)
|
|
5983
|
+
loc = headers['location'] || headers['Location']
|
|
5984
|
+
loc = loc.first if loc.is_a?(Array) # Rack 3 permits array-valued header fields
|
|
5985
|
+
# A blank (or absent) Location has no FOLLOWABLE target: an empty value parses back
|
|
5986
|
+
# to the current URL, so following it would just self-redirect. Return nil so a
|
|
5987
|
+
# caller renders the 3xx as-is rather than looping — the several navigation handlers
|
|
5988
|
+
# rely on this. (The fetch redirect loop recognizes a present-but-empty Location
|
|
5989
|
+
# separately and turns it into a network error per Fetch — see rack_fetch.)
|
|
5990
|
+
loc unless loc.to_s.empty?
|
|
5570
5991
|
end
|
|
5571
5992
|
|
|
5572
5993
|
def resolve_against_current(url, use_base: false)
|
|
@@ -5596,6 +6017,218 @@ module Capybara
|
|
|
5596
6017
|
dom_call('__csimBaseHref').to_s
|
|
5597
6018
|
end
|
|
5598
6019
|
|
|
6020
|
+
# Fetch "CORS-safelisted method" / "…request-header" / "…Content-Type". A request
|
|
6021
|
+
# is "simple" (no preflight) iff its method is safelisted AND every author header is
|
|
6022
|
+
# safelisted (Content-Type only for a urlencoded / multipart / text/plain value).
|
|
6023
|
+
CORS_SAFELISTED_METHODS = %w[GET HEAD POST].freeze
|
|
6024
|
+
CORS_SAFELISTED_HEADERS = %w[accept accept-language content-language content-type].freeze
|
|
6025
|
+
# RFC 7230 `token` (tchar+) — a valid HTTP method / field-name. Used to reject a
|
|
6026
|
+
# preflight whose Access-Control-Allow-Methods / -Headers carries a malformed value.
|
|
6027
|
+
HTTP_TOKEN = /\A[!#$%&'*+\-.^_`|~0-9A-Za-z]+\z/.freeze
|
|
6028
|
+
CORS_SAFELISTED_CTYPES = %w[application/x-www-form-urlencoded multipart/form-data text/plain].freeze
|
|
6029
|
+
|
|
6030
|
+
# The sorted, lowercased author header names that are NOT CORS-safelisted (a
|
|
6031
|
+
# non-safe Content-Type counts). These are echoed in Access-Control-Request-Headers
|
|
6032
|
+
# for the preflight and must be covered by Access-Control-Allow-Headers.
|
|
6033
|
+
def cors_unsafe_headers(headers)
|
|
6034
|
+
(headers || {}).filter_map {|k, v|
|
|
6035
|
+
name = k.to_s.downcase
|
|
6036
|
+
next if name.start_with?('x-csim') || name == 'content-length'
|
|
6037
|
+
if name == 'content-type'
|
|
6038
|
+
essence = v.to_s.split(';', 2).first.to_s.strip.downcase
|
|
6039
|
+
CORS_SAFELISTED_CTYPES.include?(essence) ? nil : name
|
|
6040
|
+
else
|
|
6041
|
+
CORS_SAFELISTED_HEADERS.include?(name) ? nil : name
|
|
6042
|
+
end
|
|
6043
|
+
}.uniq.sort
|
|
6044
|
+
end
|
|
6045
|
+
|
|
6046
|
+
def cors_unsafe_request?(method, headers)
|
|
6047
|
+
!CORS_SAFELISTED_METHODS.include?(method.to_s.upcase) || !cors_unsafe_headers(headers).empty?
|
|
6048
|
+
end
|
|
6049
|
+
|
|
6050
|
+
# Fetch "CORS check" on a cross-origin response: it must allow the request's
|
|
6051
|
+
# (effective) origin via Access-Control-Allow-Origin. A NON-credentialed request
|
|
6052
|
+
# accepts `*` or the exact origin; a CREDENTIALED one (withCredentials) forbids
|
|
6053
|
+
# `*` — the ACAO must be the exact origin AND Access-Control-Allow-Credentials
|
|
6054
|
+
# must be `true` (access-control-and-redirects-async-same-origin credentials cases).
|
|
6055
|
+
def cors_response_ok?(resp_headers, origin, credentialed)
|
|
6056
|
+
acao = cors_header(resp_headers, 'access-control-allow-origin')
|
|
6057
|
+
return false if acao.nil?
|
|
6058
|
+
if credentialed
|
|
6059
|
+
return false unless acao == origin
|
|
6060
|
+
cors_header(resp_headers, 'access-control-allow-credentials').to_s.downcase == 'true'
|
|
6061
|
+
else
|
|
6062
|
+
acao == '*' || acao == origin
|
|
6063
|
+
end
|
|
6064
|
+
end
|
|
6065
|
+
|
|
6066
|
+
# Whether a URL carries userinfo (`user[:password]@`). A CORS request to such a
|
|
6067
|
+
# URL is a network error (access-control-and-redirects "user info" subtest).
|
|
6068
|
+
def url_has_userinfo?(url)
|
|
6069
|
+
u = URI.parse(url.to_s)
|
|
6070
|
+
!u.userinfo.to_s.empty?
|
|
6071
|
+
rescue URI::InvalidURIError
|
|
6072
|
+
false
|
|
6073
|
+
end
|
|
6074
|
+
|
|
6075
|
+
# An author-set conditional header means the CALLER is doing its own revalidation,
|
|
6076
|
+
# so the UA cache must step aside: the request reaches the origin and the server's
|
|
6077
|
+
# own 304/200 decision is returned (send-conditional), not a cached hit.
|
|
6078
|
+
CONDITIONAL_REQUEST_HEADERS = %w[if-none-match if-modified-since if-match if-unmodified-since if-range].freeze
|
|
6079
|
+
def request_has_conditional_headers?(headers)
|
|
6080
|
+
headers.is_a?(Hash) && headers.any? {|k, _| CONDITIONAL_REQUEST_HEADERS.include?(k.to_s.downcase) }
|
|
6081
|
+
end
|
|
6082
|
+
|
|
6083
|
+
# Run the CORS preflight unless a cached result already covers this request (Fetch
|
|
6084
|
+
# "CORS-preflight cache"): a prior preflight to the same (origin, url) within its
|
|
6085
|
+
# Access-Control-Max-Age that allows this method + headers lets the actual request
|
|
6086
|
+
# skip the OPTIONS (access-control-basic-allow-preflight-cache). Returns false (=
|
|
6087
|
+
# network error) only when a fresh preflight is needed AND fails.
|
|
6088
|
+
def cors_preflight_ok?(target, method, headers, req_origin, credentialed, referer)
|
|
6089
|
+
return true if cors_preflight_cached?(target, req_origin, method, headers, credentialed)
|
|
6090
|
+
result = cors_run_preflight(target, method, headers, req_origin, credentialed, referer)
|
|
6091
|
+
return false unless result
|
|
6092
|
+
# Cache the grant for Max-Age seconds so a covered follow-up skips the preflight.
|
|
6093
|
+
# The key is (origin, url, credentialed): a credentialed grant (ACAO echoing the
|
|
6094
|
+
# origin, no `*` matching) can't cover an uncredentialed follow-up or vice versa,
|
|
6095
|
+
# so the two are cached apart. Expiry uses the REAL monotonic clock (not the
|
|
6096
|
+
# virtual one), so a test that virtual-sleeps past Max-Age to force a re-preflight
|
|
6097
|
+
# isn't caught yet.
|
|
6098
|
+
@cors_preflight_cache[[req_origin, target, credentialed]] = result.merge(stored_at: Process.clock_gettime(Process::CLOCK_MONOTONIC)) if result[:max_age].positive?
|
|
6099
|
+
true
|
|
6100
|
+
end
|
|
6101
|
+
|
|
6102
|
+
# Whether a cached preflight grant covers this request (not expired + method/headers
|
|
6103
|
+
# allowed). A method/header the cache doesn't cover — or an expired entry — forces a
|
|
6104
|
+
# fresh preflight (cache-invalidation-by-method / -header / -timeout).
|
|
6105
|
+
def cors_preflight_cached?(target, req_origin, method, headers, credentialed)
|
|
6106
|
+
entry = @cors_preflight_cache[[req_origin, target, credentialed]]
|
|
6107
|
+
return false unless entry
|
|
6108
|
+
return false if Process.clock_gettime(Process::CLOCK_MONOTONIC) - entry[:stored_at] >= entry[:max_age]
|
|
6109
|
+
cors_grant_allows?(entry[:methods], entry[:headers], method, cors_unsafe_headers(headers), credentialed)
|
|
6110
|
+
end
|
|
6111
|
+
|
|
6112
|
+
# `Authorization` is Fetch's sole "CORS non-wildcard request-header name": a preflight
|
|
6113
|
+
# `Access-Control-Allow-Headers: *` never covers it — it must be listed by name — even
|
|
6114
|
+
# for an uncredentialed request (cors-preflight "authorization not covered by wildcard").
|
|
6115
|
+
CORS_NON_WILDCARD_REQUEST_HEADERS = %w[authorization].freeze
|
|
6116
|
+
|
|
6117
|
+
# Does a preflight grant (its Access-Control-Allow-Methods / -Headers) cover this
|
|
6118
|
+
# request: the method is allowed / `*` / CORS-safelisted, and every unsafe header is
|
|
6119
|
+
# allowed / `*`. Shared by the fresh-preflight accept check and the cache-hit check.
|
|
6120
|
+
# For a CREDENTIALED request the wildcard loses its meaning — Fetch's "CORS-preflight
|
|
6121
|
+
# fetch" matches `*` against no method/header when credentials mode is include, so a
|
|
6122
|
+
# non-listed method or unsafe header is rejected (cors-preflight-star credentialed).
|
|
6123
|
+
def cors_grant_allows?(allow_methods, allow_headers, method, unsafe_headers, credentialed = false)
|
|
6124
|
+
# The method match is byte-CASE-SENSITIVE (Fetch normalizes the request method but
|
|
6125
|
+
# compares it verbatim against Access-Control-Allow-Methods): `delete` in the grant
|
|
6126
|
+
# does not cover a `DELETE` request. Safelisted GET/HEAD/POST pass regardless
|
|
6127
|
+
# (they're always normalized to upper-case) (cors-preflight-star method-case).
|
|
6128
|
+
m = method.to_s
|
|
6129
|
+
method_ok = allow_methods.include?(m) || CORS_SAFELISTED_METHODS.include?(m) || (!credentialed && allow_methods.include?('*'))
|
|
6130
|
+
return false unless method_ok
|
|
6131
|
+
wildcard_headers = !credentialed && allow_headers.include?('*')
|
|
6132
|
+
unsafe_headers.all? {|h|
|
|
6133
|
+
allow_headers.include?(h) || (wildcard_headers && !CORS_NON_WILDCARD_REQUEST_HEADERS.include?(h))
|
|
6134
|
+
}
|
|
6135
|
+
end
|
|
6136
|
+
|
|
6137
|
+
# Fetch "CORS-preflight fetch": send an OPTIONS with Access-Control-Request-Method
|
|
6138
|
+
# / -Headers + Origin; on success (ok-status, ACAO match, and the grant covers the
|
|
6139
|
+
# method + unsafe headers) return the grant {methods, headers, max_age} for the
|
|
6140
|
+
# cache, else nil. A credentialed preflight additionally requires the response to
|
|
6141
|
+
# allow credentials (ACAC:true) and forbids `*` in the origin/method/header grants.
|
|
6142
|
+
def cors_run_preflight(target, method, headers, req_origin, credentialed, referer)
|
|
6143
|
+
unsafe = cors_unsafe_headers(headers)
|
|
6144
|
+
env = Rack::MockRequest.env_for(target, method: 'OPTIONS')
|
|
6145
|
+
env['REQUEST_METHOD'] = 'OPTIONS'
|
|
6146
|
+
# The preflight's Referer is the request's referrer under its referrer policy —
|
|
6147
|
+
# the SAME value the actual request sends (computed by the caller), not the raw
|
|
6148
|
+
# document URL (cors-preflight-referrer).
|
|
6149
|
+
apply_default_request_env(env, referer: referer, force: false)
|
|
6150
|
+
# A CORS-preflight is a fetch, so it carries fetch's default `Accept: */*` (NOT
|
|
6151
|
+
# the navigation Accept apply_default_request_env sets) — some handlers reject a
|
|
6152
|
+
# preflight whose Accept isn't */* (preflight.py).
|
|
6153
|
+
env['HTTP_ACCEPT'] = '*/*'
|
|
6154
|
+
# A CORS-preflight is always uncredentialed — it carries no cookies, even when the
|
|
6155
|
+
# actual request that follows is credentialed.
|
|
6156
|
+
env.delete('HTTP_COOKIE')
|
|
6157
|
+
env['HTTP_ORIGIN'] = req_origin
|
|
6158
|
+
# Access-Control-Request-Method carries the request's (already-normalized) method
|
|
6159
|
+
# VERBATIM — `patch` stays `patch`, matching the byte-case-sensitive grant check.
|
|
6160
|
+
env['HTTP_ACCESS_CONTROL_REQUEST_METHOD'] = method.to_s
|
|
6161
|
+
env['HTTP_ACCESS_CONTROL_REQUEST_HEADERS'] = unsafe.join(',') unless unsafe.empty?
|
|
6162
|
+
status, ph, pbody = dispatch_rack_or_http(target, env, method: 'OPTIONS', body: nil)
|
|
6163
|
+
pbody.close if pbody.respond_to?(:close)
|
|
6164
|
+
return nil unless (200..299).include?(status.to_i)
|
|
6165
|
+
acao = cors_header(ph, 'access-control-allow-origin')
|
|
6166
|
+
# A credentialed preflight can't be allowed by the wildcard origin and must carry
|
|
6167
|
+
# Access-Control-Allow-Credentials: true (cors-preflight-star credentialed).
|
|
6168
|
+
return nil unless credentialed ? acao == req_origin : (acao == '*' || acao == req_origin)
|
|
6169
|
+
return nil if credentialed && cors_header(ph, 'access-control-allow-credentials') != 'true'
|
|
6170
|
+
allow_methods = cors_list(cors_header(ph, 'access-control-allow-methods'))
|
|
6171
|
+
allow_headers = cors_list(cors_header(ph, 'access-control-allow-headers')).map(&:downcase)
|
|
6172
|
+
# Fetch "extract header list values" fails when a grant contains a malformed token
|
|
6173
|
+
# (`Access-Control-Allow-Methods: Bad value` — a space isn't a tchar), and a failed
|
|
6174
|
+
# extraction is a network error (cors-preflight-response-validation). Methods and
|
|
6175
|
+
# header names are both HTTP tokens; `*` is a valid tchar so the wildcard passes.
|
|
6176
|
+
return nil unless (allow_methods + allow_headers).all? {|t| t.match?(HTTP_TOKEN) }
|
|
6177
|
+
return nil unless cors_grant_allows?(allow_methods, allow_headers, method, unsafe, credentialed)
|
|
6178
|
+
{methods: allow_methods, headers: allow_headers, max_age: cors_header(ph, 'access-control-max-age').to_i}
|
|
6179
|
+
end
|
|
6180
|
+
|
|
6181
|
+
# Fetch "CORS-safelisted response-header name" — always exposed to script for a
|
|
6182
|
+
# cross-origin response, without being listed in Access-Control-Expose-Headers.
|
|
6183
|
+
CORS_SAFELISTED_RESPONSE_HEADERS = %w[
|
|
6184
|
+
cache-control content-language content-length content-type expires last-modified pragma
|
|
6185
|
+
].freeze
|
|
6186
|
+
|
|
6187
|
+
# The response headers a cross-origin "cors" response exposes to getResponseHeader /
|
|
6188
|
+
# getAllResponseHeaders: the CORS-safelisted set plus any named in Access-Control
|
|
6189
|
+
# -Expose-Headers (`*` exposes all — only valid without credentials, which these
|
|
6190
|
+
# cases don't use).
|
|
6191
|
+
def cors_exposed_headers(headers)
|
|
6192
|
+
# set-cookie / set-cookie2 are forbidden response-header names — NEVER exposed to
|
|
6193
|
+
# script, even under `Access-Control-Expose-Headers: *`. x-csim-status-text is our
|
|
6194
|
+
# internal reason-phrase sentinel (response_hash lifts it into statusText, which
|
|
6195
|
+
# IS exposed cross-origin, then strips it from the script-visible map), so it must
|
|
6196
|
+
# survive the filter.
|
|
6197
|
+
forbidden = %w[set-cookie set-cookie2]
|
|
6198
|
+
expose = cors_list(cors_header(headers, 'access-control-expose-headers')).map(&:downcase)
|
|
6199
|
+
return headers.reject {|k, _| forbidden.include?(k.to_s.downcase) } if expose.include?('*')
|
|
6200
|
+
allowed = CORS_SAFELISTED_RESPONSE_HEADERS + expose + ['x-csim-status-text']
|
|
6201
|
+
headers.select {|k, _| allowed.include?(k.to_s.downcase) }
|
|
6202
|
+
end
|
|
6203
|
+
|
|
6204
|
+
# Case-insensitive response-header lookup + comma-list split for the CORS checks.
|
|
6205
|
+
def cors_header(headers, name)
|
|
6206
|
+
pair = headers.find {|k, _| k.to_s.downcase == name }
|
|
6207
|
+
pair&.last.to_s
|
|
6208
|
+
end
|
|
6209
|
+
|
|
6210
|
+
def cors_list(value)
|
|
6211
|
+
value.to_s.split(',').map(&:strip).reject(&:empty?)
|
|
6212
|
+
end
|
|
6213
|
+
|
|
6214
|
+
# The origin of a URL — `scheme://host[:port]` with the default port (80/443)
|
|
6215
|
+
# elided — for the CORS same/cross-origin comparison. nil for a non-http(s) or
|
|
6216
|
+
# unparseable URL (about:blank / data: / a relative current_url) so CORS never
|
|
6217
|
+
# treats those as a comparable origin.
|
|
6218
|
+
def url_origin(url)
|
|
6219
|
+
u = URI.parse(url.to_s)
|
|
6220
|
+
return nil unless u.scheme && u.host && u.scheme.match?(/\Ahttps?\z/i)
|
|
6221
|
+
# An origin is (scheme, host, port) compared case-insensitively on scheme+host —
|
|
6222
|
+
# so canonicalize both to lowercase, else http://Example.com vs http://example.com
|
|
6223
|
+
# would mis-classify a same-origin request as cross-origin.
|
|
6224
|
+
scheme = u.scheme.downcase
|
|
6225
|
+
default = scheme == 'https' ? 443 : 80
|
|
6226
|
+
port = u.port && u.port != default ? ":#{u.port}" : ''
|
|
6227
|
+
"#{scheme}://#{u.host.downcase}#{port}"
|
|
6228
|
+
rescue URI::InvalidURIError
|
|
6229
|
+
nil
|
|
6230
|
+
end
|
|
6231
|
+
|
|
5599
6232
|
def carry_fragment(from_url, to_url)
|
|
5600
6233
|
from = URI.parse(from_url.to_s)
|
|
5601
6234
|
to = URI.parse(to_url.to_s)
|