@mochi.js/core 0.1.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -5
- package/src/__tests__/geo-consistency.test.ts +277 -0
- package/src/__tests__/geo-probe.test.ts +415 -0
- package/src/__tests__/inject.test.ts +4 -0
- package/src/__tests__/integration.e2e.test.ts +24 -0
- package/src/__tests__/piercing.test.ts +164 -0
- package/src/__tests__/proc.test.ts +383 -0
- package/src/__tests__/selector.test.ts +188 -0
- package/src/__tests__/window-size.e2e.test.ts +130 -0
- package/src/cdp/types.ts +47 -0
- package/src/geo-consistency.ts +343 -0
- package/src/geo-probe.ts +603 -0
- package/src/index.ts +11 -0
- package/src/launch.ts +145 -9
- package/src/page/element-handle.ts +110 -0
- package/src/page/piercing.ts +135 -0
- package/src/page/selector.ts +423 -0
- package/src/page.ts +152 -1
- package/src/proc.ts +386 -41
- package/src/session.ts +358 -12
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Task 0252 conformance E2E — verify the OS-level outer-window pin
|
|
3
|
+
* (`--window-size=<W>,<H>`, derived from `matrix.display.{width,height}`)
|
|
4
|
+
* is honored under `--headless=new` such that
|
|
5
|
+
* `window.outerWidth === matrix.display.width`.
|
|
6
|
+
*
|
|
7
|
+
* UDC issue #2242 documents that `--window-size` is honored at the OS
|
|
8
|
+
* level under headless, but the JS API surface (`window.outerWidth/Height`)
|
|
9
|
+
* historically did not reflect it without a CDP `Browser.setWindowBounds`
|
|
10
|
+
* follow-up. This test is the canonical check that the leak is closed
|
|
11
|
+
* end-to-end on the Chromium versions we care about. If `outerWidth`
|
|
12
|
+
* comes back as 800 (the legacy headless default) the test fails loudly
|
|
13
|
+
* and the orchestrator knows to layer in the CDP fix.
|
|
14
|
+
*
|
|
15
|
+
* Mochi's inject layer also defines `window.outerWidth/outerHeight` from
|
|
16
|
+
* `matrix.uaCh["window-viewport"]` (R-029). On macOS the R-029 outerWidth
|
|
17
|
+
* equals `display.width` exactly (OS_CHROME_WIDTH = 0), so the assertion
|
|
18
|
+
* holds regardless of whether the OS-level honoring works as promised.
|
|
19
|
+
* The OS-level fix is what hardens the surface against:
|
|
20
|
+
* - inject-bypassed flows (`bypassInject: true`, `mochi capture`)
|
|
21
|
+
* - cross-realm reads where the spoof hasn't installed yet
|
|
22
|
+
*
|
|
23
|
+
* Gated by `MOCHI_E2E=1`. Set `MOCHI_CHROMIUM_PATH` to a real binary.
|
|
24
|
+
*
|
|
25
|
+
* @see tasks/0252-window-size-flag-from-matrix.md
|
|
26
|
+
* @see UDC `__init__.py:410-411`, UDC issue #2242
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import { describe, expect, it } from "bun:test";
|
|
30
|
+
import { mochi } from "../index";
|
|
31
|
+
|
|
32
|
+
const E2E_ENABLED = process.env.MOCHI_E2E === "1";
|
|
33
|
+
const TEST_TIMEOUT_MS = 15_000;
|
|
34
|
+
|
|
35
|
+
const describeOrSkip = E2E_ENABLED ? describe : describe.skip;
|
|
36
|
+
|
|
37
|
+
const PROBE_HTML = `<!doctype html><html><body><pre id="p"></pre><script>
|
|
38
|
+
document.getElementById("p").textContent = JSON.stringify({
|
|
39
|
+
outerWidth: window.outerWidth,
|
|
40
|
+
outerHeight: window.outerHeight,
|
|
41
|
+
screenWidth: screen.width,
|
|
42
|
+
screenHeight: screen.height,
|
|
43
|
+
});
|
|
44
|
+
</script></body></html>`;
|
|
45
|
+
|
|
46
|
+
const PROBE_DATA_URL = `data:text/html;charset=utf-8,${encodeURIComponent(PROBE_HTML)}`;
|
|
47
|
+
|
|
48
|
+
interface ProbeShape {
|
|
49
|
+
outerWidth: number;
|
|
50
|
+
outerHeight: number;
|
|
51
|
+
screenWidth: number;
|
|
52
|
+
screenHeight: number;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
describeOrSkip("@mochi.js/core --window-size E2E (MOCHI_E2E=1) — task 0252", () => {
|
|
56
|
+
it(
|
|
57
|
+
"window.outerWidth matches matrix.display.width under --headless=new",
|
|
58
|
+
async () => {
|
|
59
|
+
const session = await mochi.launch({
|
|
60
|
+
seed: "task-0252-window-size",
|
|
61
|
+
headless: true,
|
|
62
|
+
profile: {
|
|
63
|
+
id: "window-size-e2e-fixture",
|
|
64
|
+
version: "0.0.0-e2e",
|
|
65
|
+
engine: "chromium",
|
|
66
|
+
browser: { name: "chrome", channel: "stable", minVersion: "131", maxVersion: "133" },
|
|
67
|
+
os: { name: "macos", version: "14", arch: "arm64" },
|
|
68
|
+
device: {
|
|
69
|
+
vendor: "Apple",
|
|
70
|
+
model: "Mac14,2",
|
|
71
|
+
cpuFamily: "apple-silicon-m2",
|
|
72
|
+
cores: 8,
|
|
73
|
+
memoryGB: 16,
|
|
74
|
+
},
|
|
75
|
+
// Distinctive non-default dimensions so an 800×600 leak is glaring.
|
|
76
|
+
display: { width: 1728, height: 1117, dpr: 2, colorDepth: 30, pixelDepth: 30 },
|
|
77
|
+
gpu: {
|
|
78
|
+
vendor: "Apple Inc.",
|
|
79
|
+
renderer: "Apple M2",
|
|
80
|
+
webglUnmaskedVendor: "Google Inc. (Apple)",
|
|
81
|
+
webglUnmaskedRenderer:
|
|
82
|
+
"ANGLE (Apple, ANGLE Metal Renderer: Apple M2, Unspecified Version)",
|
|
83
|
+
webglMaxTextureSize: 16384,
|
|
84
|
+
webglMaxColorAttachments: 8,
|
|
85
|
+
webglExtensions: [],
|
|
86
|
+
},
|
|
87
|
+
audio: {
|
|
88
|
+
contextSampleRate: 48000,
|
|
89
|
+
audioWorkletLatency: 0.005,
|
|
90
|
+
destinationMaxChannelCount: 2,
|
|
91
|
+
},
|
|
92
|
+
fonts: { family: "macos-baseline", list: ["Helvetica"] },
|
|
93
|
+
timezone: "America/Los_Angeles",
|
|
94
|
+
locale: "en-US",
|
|
95
|
+
languages: ["en-US", "en"],
|
|
96
|
+
behavior: { hand: "right", tremor: 0.18, wpm: 60, scrollStyle: "smooth" },
|
|
97
|
+
wreqPreset: "chrome_131_macos",
|
|
98
|
+
userAgent:
|
|
99
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.6778.86 Safari/537.36",
|
|
100
|
+
uaCh: {},
|
|
101
|
+
entropyBudget: { fixed: [], perSeed: [] },
|
|
102
|
+
},
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
try {
|
|
106
|
+
const matrix = session.profile;
|
|
107
|
+
const page = await session.newPage();
|
|
108
|
+
await page.goto(PROBE_DATA_URL);
|
|
109
|
+
const txt = await page.text("#p");
|
|
110
|
+
if (txt === null) throw new Error("[mochi e2e] probe element produced no textContent");
|
|
111
|
+
const probe = JSON.parse(txt) as ProbeShape;
|
|
112
|
+
|
|
113
|
+
// Task 0252 success criterion #4: probe-time conformance.
|
|
114
|
+
// The 800×600 leak under --headless=new manifests as outerWidth=800.
|
|
115
|
+
// Failing here means the OS-level pin is NOT honored AND the inject
|
|
116
|
+
// spoof did not install — orchestrator should layer in CDP
|
|
117
|
+
// `Browser.setWindowBounds` per UDC issue #2242 follow-up.
|
|
118
|
+
expect(probe.outerWidth).toBe(matrix.display.width);
|
|
119
|
+
expect(probe.outerWidth).not.toBe(800);
|
|
120
|
+
|
|
121
|
+
// screen.width must match too (separate path: inject layer R-010).
|
|
122
|
+
expect(probe.screenWidth).toBe(matrix.display.width);
|
|
123
|
+
expect(probe.screenHeight).toBe(matrix.display.height);
|
|
124
|
+
} finally {
|
|
125
|
+
await session.close();
|
|
126
|
+
}
|
|
127
|
+
},
|
|
128
|
+
TEST_TIMEOUT_MS,
|
|
129
|
+
);
|
|
130
|
+
});
|
package/src/cdp/types.ts
CHANGED
|
@@ -68,6 +68,53 @@ export interface DomNode {
|
|
|
68
68
|
nodeName: string;
|
|
69
69
|
}
|
|
70
70
|
|
|
71
|
+
/**
|
|
72
|
+
* Wider subset of `DOM.Node` used by the closed-shadow piercing locator
|
|
73
|
+
* (`Page.querySelectorPiercing`).
|
|
74
|
+
*
|
|
75
|
+
* Returned by `DOM.getDocument({ depth: -1, pierce: true })` — `pierce: true`
|
|
76
|
+
* yields shadow descendants under `shadowRoots[]` for *both* open and closed
|
|
77
|
+
* roots, and iframe descendants under `contentDocument`. Element-node fields
|
|
78
|
+
* (`localName`, `attributes`) drive selector matching in JS without round-
|
|
79
|
+
* tripping each candidate through `DOM.querySelector` (which would not pierce
|
|
80
|
+
* closed shadows even when called against the parent's document node).
|
|
81
|
+
*
|
|
82
|
+
* Reference: <https://chromedevtools.github.io/devtools-protocol/tot/DOM/#type-Node>
|
|
83
|
+
*
|
|
84
|
+
* @see PLAN.md §8.2 — `DOM.getDocument` and `DOM.resolveNode` are not on the
|
|
85
|
+
* forbidden list; both are fine to use.
|
|
86
|
+
* @see tasks/0253-closed-shadow-piercing-locator.md
|
|
87
|
+
*/
|
|
88
|
+
export interface PierceDomNode {
|
|
89
|
+
nodeId: number;
|
|
90
|
+
backendNodeId: number;
|
|
91
|
+
/** 1 = ELEMENT, 3 = TEXT, 9 = DOCUMENT, 11 = DOCUMENT_FRAGMENT, etc. */
|
|
92
|
+
nodeType: number;
|
|
93
|
+
/** Upper-case tag for element nodes (e.g. `"DIV"`); `"#document"` for the document. */
|
|
94
|
+
nodeName: string;
|
|
95
|
+
/** Lower-case tag (`"div"`) — only present on element nodes. */
|
|
96
|
+
localName?: string;
|
|
97
|
+
/** Flat `[name, value, name, value, ...]` array — only on element nodes. */
|
|
98
|
+
attributes?: string[];
|
|
99
|
+
/** Element / document children. */
|
|
100
|
+
children?: PierceDomNode[];
|
|
101
|
+
/**
|
|
102
|
+
* Shadow-root subtrees attached to this element. CDP yields BOTH open and
|
|
103
|
+
* closed shadows here when `pierce: true` is set; `shadowRootType` is
|
|
104
|
+
* `"open" | "closed" | "user-agent"`. The piercing walker traverses all of
|
|
105
|
+
* them — that's the whole point of this type vs. `DomNode`.
|
|
106
|
+
*/
|
|
107
|
+
shadowRoots?: PierceDomNode[];
|
|
108
|
+
/** `"open" | "closed" | "user-agent"` — present on shadow-root nodes. */
|
|
109
|
+
shadowRootType?: "open" | "closed" | "user-agent";
|
|
110
|
+
/** iframe descendant tree. CDP yields it as a single-element array. */
|
|
111
|
+
contentDocument?: PierceDomNode;
|
|
112
|
+
/** Pseudo-element children (::before, ::after) — element nodes only. */
|
|
113
|
+
pseudoElements?: PierceDomNode[];
|
|
114
|
+
/** Template content fragment — present on `<template>` elements. */
|
|
115
|
+
templateContent?: PierceDomNode;
|
|
116
|
+
}
|
|
117
|
+
|
|
71
118
|
/** Subset of `Page.Frame`. */
|
|
72
119
|
export interface PageFrame {
|
|
73
120
|
id: string;
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Geo-consistency reconciler — cross-references the matrix's declared
|
|
3
|
+
* `(timezone, locale)` against the probed exit-IP geolocation and
|
|
4
|
+
* applies a `LaunchOptions.geoConsistency` policy on mismatch.
|
|
5
|
+
*
|
|
6
|
+
* The default policy is `"privacy-fallback"`: on mismatch (or probe
|
|
7
|
+
* failure), override the matrix to `UTC` + `en-US`. The session then
|
|
8
|
+
* fingerprints as a privacy-conscious user (Tor / Brave / hardened-FF
|
|
9
|
+
* style), which is benign in most threat models — across thousands of
|
|
10
|
+
* real users, mismatched-tz-vs-IP is the canonical bot signature; UTC
|
|
11
|
+
* + en-US looks like every Tor user.
|
|
12
|
+
*
|
|
13
|
+
* @see PLAN.md §9 — relational consistency, IP/TZ/Locale axis
|
|
14
|
+
* @see tasks/0262-ip-tz-locale-exit-consistency.md
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import type { MatrixV1 } from "@mochi.js/consistency";
|
|
18
|
+
import type { ExitGeo } from "./geo-probe";
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Reconciliation modes for `(matrix.timezone, matrix.locale)` vs exit IP.
|
|
22
|
+
*
|
|
23
|
+
* - `"privacy-fallback"` *(default)* — on mismatch (or probe failure),
|
|
24
|
+
* override to `UTC` + `en-US`. Fingerprints as a Tor-class user. UTC
|
|
25
|
+
* + en-US is the failure-mode-of-least-tampering: it identifies the
|
|
26
|
+
* user as privacy-aware, not as automated.
|
|
27
|
+
* - `"auto-correct"` — on mismatch, override the matrix's timezone with
|
|
28
|
+
* the IP's timezone and the locale's region with the IP's country.
|
|
29
|
+
* Most "stealth" but trusts mochi's IP-derived defaults over the
|
|
30
|
+
* user's declared profile.
|
|
31
|
+
* - `"strict"` — throw on mismatch. The user must change profile or
|
|
32
|
+
* change proxy.
|
|
33
|
+
* - `"off"` — skip the probe entirely. Used by tests and by users with
|
|
34
|
+
* rate-limit problems.
|
|
35
|
+
*/
|
|
36
|
+
export type GeoConsistencyMode = "privacy-fallback" | "auto-correct" | "strict" | "off";
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Outcome of a reconciliation pass — exposed for diagnostics + the
|
|
40
|
+
* planned `_internalReconcile` test seam. `kind === "ok"` means the
|
|
41
|
+
* matrix passes through unchanged; `"override"` means we adjusted the
|
|
42
|
+
* matrix per the policy; `"strict-throw"` is the strict-mode error path
|
|
43
|
+
* (caller throws).
|
|
44
|
+
*/
|
|
45
|
+
export interface GeoReconcileResult {
|
|
46
|
+
/** Possibly-adjusted matrix (always a fresh object when adjusted). */
|
|
47
|
+
readonly matrix: MatrixV1;
|
|
48
|
+
/** What happened. `"ok"` is the no-mismatch fast path. */
|
|
49
|
+
readonly action: "ok" | "no-probe" | "off" | "privacy-fallback" | "auto-correct";
|
|
50
|
+
/** The geo result that drove this decision (null for `"no-probe"` / `"off"`). */
|
|
51
|
+
readonly geo: ExitGeo | null;
|
|
52
|
+
/** Human-readable mismatch summary, when applicable. */
|
|
53
|
+
readonly reason?: string;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Thrown by {@link reconcileGeoConsistency} when `mode === "strict"` and
|
|
58
|
+
* the probe revealed a mismatch. Signals the user MUST adjust either the
|
|
59
|
+
* profile or the proxy.
|
|
60
|
+
*/
|
|
61
|
+
export class GeoMismatchError extends Error {
|
|
62
|
+
readonly matrix: { timezone: string; locale: string };
|
|
63
|
+
readonly geo: ExitGeo;
|
|
64
|
+
readonly reason: string;
|
|
65
|
+
constructor(matrix: { timezone: string; locale: string }, geo: ExitGeo, reason: string) {
|
|
66
|
+
super(
|
|
67
|
+
`[mochi] geoConsistency: strict — exit-IP geo (${geo.country}/${geo.timezone}, ` +
|
|
68
|
+
`via ${geo.source}) does not match matrix (${matrix.locale}/${matrix.timezone}): ` +
|
|
69
|
+
`${reason}. Change the profile to match the proxy egress, change the proxy, ` +
|
|
70
|
+
`or pass geoConsistency: "privacy-fallback" | "auto-correct" | "off".`,
|
|
71
|
+
);
|
|
72
|
+
this.name = "GeoMismatchError";
|
|
73
|
+
this.matrix = matrix;
|
|
74
|
+
this.geo = geo;
|
|
75
|
+
this.reason = reason;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Compute the **integer minutes offset** of an IANA timezone for a given
|
|
81
|
+
* reference date. Uses `Intl.DateTimeFormat(...).formatToParts(...)` to
|
|
82
|
+
* extract the "longOffset" part — the most stable cross-runtime path that
|
|
83
|
+
* works for both fixed-offset zones (`UTC`, `Etc/GMT+8`) and DST-aware
|
|
84
|
+
* zones (`America/New_York`).
|
|
85
|
+
*
|
|
86
|
+
* The brief calls this out: `America/New_York` and `America/Detroit`
|
|
87
|
+
* share the same offset and are equivalent for fingerprinting; we MUST
|
|
88
|
+
* compare offsets, not zone names.
|
|
89
|
+
*
|
|
90
|
+
* Returns `null` if the zone string isn't recognised (caller treats this
|
|
91
|
+
* as "incomparable" and bails out to the per-mode policy).
|
|
92
|
+
*/
|
|
93
|
+
export function tzOffsetMinutes(zone: string, ref: Date = new Date()): number | null {
|
|
94
|
+
try {
|
|
95
|
+
const parts = new Intl.DateTimeFormat("en-US", {
|
|
96
|
+
timeZone: zone,
|
|
97
|
+
timeZoneName: "longOffset",
|
|
98
|
+
}).formatToParts(ref);
|
|
99
|
+
const tzPart = parts.find((p) => p.type === "timeZoneName")?.value;
|
|
100
|
+
if (tzPart === undefined) return null;
|
|
101
|
+
// longOffset shape: "GMT+05:30", "GMT-08:00", "GMT" (== 0).
|
|
102
|
+
if (tzPart === "GMT" || tzPart === "UTC") return 0;
|
|
103
|
+
const m = /^(?:GMT|UTC)([+-])(\d{1,2})(?::?(\d{2}))?$/.exec(tzPart);
|
|
104
|
+
if (m === null) return null;
|
|
105
|
+
const sign = m[1] === "-" ? -1 : 1;
|
|
106
|
+
const hours = Number.parseInt(m[2] ?? "0", 10);
|
|
107
|
+
const mins = Number.parseInt(m[3] ?? "0", 10);
|
|
108
|
+
if (!Number.isFinite(hours) || !Number.isFinite(mins)) return null;
|
|
109
|
+
return sign * (hours * 60 + mins);
|
|
110
|
+
} catch {
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Extract the alpha-2 region code from a BCP-47 locale via `Intl.Locale`.
|
|
117
|
+
* `"en-US"` → `"US"`, `"de-DE"` → `"DE"`, `"en"` → `null` (no region).
|
|
118
|
+
*/
|
|
119
|
+
export function localeRegion(locale: string): string | null {
|
|
120
|
+
try {
|
|
121
|
+
const region = new Intl.Locale(locale).region;
|
|
122
|
+
if (region === undefined || region.length === 0) return null;
|
|
123
|
+
return region.toUpperCase();
|
|
124
|
+
} catch {
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Tiny country-code → primary-locale lookup for `auto-correct` mode.
|
|
131
|
+
* Covers the major proxy-egress countries; falls back to `en-<CC>` for
|
|
132
|
+
* unknown codes (which is wrong for, say, Korea, but is at most a
|
|
133
|
+
* lower-stealth-ceiling fallback rather than a hard fail).
|
|
134
|
+
*
|
|
135
|
+
* Order chosen for the most common residential-proxy egress destinations.
|
|
136
|
+
*/
|
|
137
|
+
const PRIMARY_LOCALE_BY_COUNTRY: Readonly<Record<string, string>> = {
|
|
138
|
+
US: "en-US",
|
|
139
|
+
GB: "en-GB",
|
|
140
|
+
CA: "en-CA",
|
|
141
|
+
AU: "en-AU",
|
|
142
|
+
IE: "en-IE",
|
|
143
|
+
NZ: "en-NZ",
|
|
144
|
+
DE: "de-DE",
|
|
145
|
+
AT: "de-AT",
|
|
146
|
+
CH: "de-CH",
|
|
147
|
+
FR: "fr-FR",
|
|
148
|
+
BE: "fr-BE",
|
|
149
|
+
IT: "it-IT",
|
|
150
|
+
ES: "es-ES",
|
|
151
|
+
MX: "es-MX",
|
|
152
|
+
AR: "es-AR",
|
|
153
|
+
BR: "pt-BR",
|
|
154
|
+
PT: "pt-PT",
|
|
155
|
+
NL: "nl-NL",
|
|
156
|
+
PL: "pl-PL",
|
|
157
|
+
RU: "ru-RU",
|
|
158
|
+
UA: "uk-UA",
|
|
159
|
+
CN: "zh-CN",
|
|
160
|
+
HK: "zh-HK",
|
|
161
|
+
TW: "zh-TW",
|
|
162
|
+
JP: "ja-JP",
|
|
163
|
+
KR: "ko-KR",
|
|
164
|
+
IN: "hi-IN",
|
|
165
|
+
ID: "id-ID",
|
|
166
|
+
TH: "th-TH",
|
|
167
|
+
VN: "vi-VN",
|
|
168
|
+
TR: "tr-TR",
|
|
169
|
+
IL: "he-IL",
|
|
170
|
+
SA: "ar-SA",
|
|
171
|
+
AE: "ar-AE",
|
|
172
|
+
EG: "ar-EG",
|
|
173
|
+
ZA: "en-ZA",
|
|
174
|
+
SG: "en-SG",
|
|
175
|
+
MY: "ms-MY",
|
|
176
|
+
PH: "en-PH",
|
|
177
|
+
SE: "sv-SE",
|
|
178
|
+
NO: "nb-NO",
|
|
179
|
+
DK: "da-DK",
|
|
180
|
+
FI: "fi-FI",
|
|
181
|
+
CZ: "cs-CZ",
|
|
182
|
+
HU: "hu-HU",
|
|
183
|
+
RO: "ro-RO",
|
|
184
|
+
GR: "el-GR",
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
/** Best-effort primary locale for an ISO-3166-1 alpha-2 country code. */
|
|
188
|
+
function primaryLocaleFor(country: string): string {
|
|
189
|
+
return PRIMARY_LOCALE_BY_COUNTRY[country.toUpperCase()] ?? `en-${country.toUpperCase()}`;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Return a fresh matrix with `timezone`/`locale`/`languages` overridden.
|
|
194
|
+
* Other fields (display, GPU, audio, etc.) are preserved so the rest of
|
|
195
|
+
* the relational lock stays intact. The brief's I-5 invariant: `MatrixV1`
|
|
196
|
+
* is the single source of truth, so we hand back the same shape with
|
|
197
|
+
* just the geo-axis fields swapped.
|
|
198
|
+
*
|
|
199
|
+
* Note: `wreqPreset` and `userAgent` are NOT touched — those carry
|
|
200
|
+
* OS/browser semantics, not geo. The reconciler is purely a geo-axis
|
|
201
|
+
* adjustment.
|
|
202
|
+
*/
|
|
203
|
+
function withGeoOverride(
|
|
204
|
+
matrix: MatrixV1,
|
|
205
|
+
overrides: { timezone: string; locale: string; languages: readonly [string, ...string[]] },
|
|
206
|
+
): MatrixV1 {
|
|
207
|
+
const [head, ...tail] = overrides.languages;
|
|
208
|
+
return {
|
|
209
|
+
...matrix,
|
|
210
|
+
timezone: overrides.timezone,
|
|
211
|
+
locale: overrides.locale,
|
|
212
|
+
languages: [head, ...tail],
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Reconcile the matrix against the probed exit-IP geo per the supplied
|
|
218
|
+
* `mode`. Pure: never mutates the input matrix; returns a fresh object on
|
|
219
|
+
* any override path.
|
|
220
|
+
*
|
|
221
|
+
* **Mismatch criteria**:
|
|
222
|
+
* - **Timezone**: matrix offset minutes ≠ IP offset minutes (computed
|
|
223
|
+
* via `Intl.DateTimeFormat(timeZoneName: "longOffset")`). Zone-name
|
|
224
|
+
* equivalence (e.g. `America/New_York` vs `America/Detroit`) is
|
|
225
|
+
* intentional — they share an offset and fingerprint identically.
|
|
226
|
+
* - **Locale**: `Intl.Locale(matrix.locale).region` ≠ IP country code.
|
|
227
|
+
* A locale with no region (`"en"`) is treated as matching any
|
|
228
|
+
* country (we can't disprove it).
|
|
229
|
+
*
|
|
230
|
+
* **Per-mode behaviour** (matrix passes through unchanged unless
|
|
231
|
+
* mismatch is detected):
|
|
232
|
+
*
|
|
233
|
+
* | Mode | probe = null | tz mismatch | locale mismatch | both match |
|
|
234
|
+
* |---|---|---|---|---|
|
|
235
|
+
* | `privacy-fallback` | UTC+en-US | UTC+en-US | UTC+en-US | passthrough |
|
|
236
|
+
* | `auto-correct` | passthrough (best effort) | IP tz | IP locale | passthrough |
|
|
237
|
+
* | `strict` | passthrough (no probe → no mismatch) | THROW | THROW | passthrough |
|
|
238
|
+
* | `off` | passthrough | n/a (no probe) | n/a | passthrough |
|
|
239
|
+
*
|
|
240
|
+
* The `strict` × `probe = null` case intentionally passes the matrix
|
|
241
|
+
* through. A null probe means "we couldn't talk to any geo endpoint" —
|
|
242
|
+
* which is most often a network blip, not a mismatch. Strict-mode users
|
|
243
|
+
* who want to fail closed on probe failure should pair this with
|
|
244
|
+
* external monitoring.
|
|
245
|
+
*
|
|
246
|
+
* @throws {GeoMismatchError} when `mode === "strict"` and a real
|
|
247
|
+
* mismatch was detected.
|
|
248
|
+
*/
|
|
249
|
+
export function reconcileGeoConsistency(
|
|
250
|
+
matrix: MatrixV1,
|
|
251
|
+
geo: ExitGeo | null,
|
|
252
|
+
mode: GeoConsistencyMode,
|
|
253
|
+
): GeoReconcileResult {
|
|
254
|
+
if (mode === "off") {
|
|
255
|
+
return { matrix, action: "off", geo: null };
|
|
256
|
+
}
|
|
257
|
+
if (geo === null) {
|
|
258
|
+
if (mode === "privacy-fallback") {
|
|
259
|
+
return {
|
|
260
|
+
matrix: withGeoOverride(matrix, {
|
|
261
|
+
timezone: "UTC",
|
|
262
|
+
locale: "en-US",
|
|
263
|
+
languages: ["en-US", "en"],
|
|
264
|
+
}),
|
|
265
|
+
action: "privacy-fallback",
|
|
266
|
+
geo: null,
|
|
267
|
+
reason: "probe returned null (all endpoints failed); falling back to UTC+en-US",
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
// auto-correct + strict: nothing to act on. Pass through.
|
|
271
|
+
return { matrix, action: "no-probe", geo: null };
|
|
272
|
+
}
|
|
273
|
+
// We have a probe result. Compute offset-based mismatch.
|
|
274
|
+
const matrixOffset = tzOffsetMinutes(matrix.timezone);
|
|
275
|
+
const ipOffset = tzOffsetMinutes(geo.timezone);
|
|
276
|
+
const tzMismatch =
|
|
277
|
+
matrixOffset !== null && ipOffset !== null && matrixOffset !== ipOffset
|
|
278
|
+
? `tz offset ${matrixOffset}min (matrix ${matrix.timezone}) ≠ ${ipOffset}min (IP ${geo.timezone})`
|
|
279
|
+
: null;
|
|
280
|
+
|
|
281
|
+
const matrixRegion = localeRegion(matrix.locale);
|
|
282
|
+
// matrixRegion === null => locale has no region (e.g. "en"); treat as
|
|
283
|
+
// permissive match to avoid spurious mismatches.
|
|
284
|
+
const localeMismatch =
|
|
285
|
+
matrixRegion !== null && matrixRegion !== geo.country
|
|
286
|
+
? `locale region ${matrixRegion} (matrix ${matrix.locale}) ≠ IP country ${geo.country}`
|
|
287
|
+
: null;
|
|
288
|
+
|
|
289
|
+
if (tzMismatch === null && localeMismatch === null) {
|
|
290
|
+
return { matrix, action: "ok", geo };
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
const reason = [tzMismatch, localeMismatch].filter((x): x is string => x !== null).join("; ");
|
|
294
|
+
|
|
295
|
+
if (mode === "strict") {
|
|
296
|
+
throw new GeoMismatchError({ timezone: matrix.timezone, locale: matrix.locale }, geo, reason);
|
|
297
|
+
}
|
|
298
|
+
if (mode === "auto-correct") {
|
|
299
|
+
const newLocale = primaryLocaleFor(geo.country);
|
|
300
|
+
return {
|
|
301
|
+
matrix: withGeoOverride(matrix, {
|
|
302
|
+
timezone: geo.timezone,
|
|
303
|
+
locale: newLocale,
|
|
304
|
+
// languages list: primary locale + its language root (e.g. "de-DE",
|
|
305
|
+
// "de"). Keeps the language root present which sites read for
|
|
306
|
+
// fallback negotiation.
|
|
307
|
+
languages: deriveLanguagesFor(newLocale),
|
|
308
|
+
}),
|
|
309
|
+
action: "auto-correct",
|
|
310
|
+
geo,
|
|
311
|
+
reason,
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
// privacy-fallback
|
|
315
|
+
return {
|
|
316
|
+
matrix: withGeoOverride(matrix, {
|
|
317
|
+
timezone: "UTC",
|
|
318
|
+
locale: "en-US",
|
|
319
|
+
languages: ["en-US", "en"],
|
|
320
|
+
}),
|
|
321
|
+
action: "privacy-fallback",
|
|
322
|
+
geo,
|
|
323
|
+
reason,
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Derive the `navigator.languages` list for an `auto-correct` override.
|
|
329
|
+
* Convention: `[primary, primary-language-only, "en"]`, deduped. The "en"
|
|
330
|
+
* tail mirrors what real Chrome instances ship for non-English locales —
|
|
331
|
+
* most users have English as a secondary because Chrome itself defaults
|
|
332
|
+
* the menu language to English on first install in many regions.
|
|
333
|
+
*/
|
|
334
|
+
function deriveLanguagesFor(locale: string): readonly [string, ...string[]] {
|
|
335
|
+
const out: [string, ...string[]] = [locale];
|
|
336
|
+
const dash = locale.indexOf("-");
|
|
337
|
+
if (dash > 0) {
|
|
338
|
+
const root = locale.slice(0, dash);
|
|
339
|
+
if (!out.includes(root)) out.push(root);
|
|
340
|
+
}
|
|
341
|
+
if (!out.includes("en")) out.push("en");
|
|
342
|
+
return out;
|
|
343
|
+
}
|