@mochi.js/core 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -4
- package/src/__tests__/geo-consistency.test.ts +277 -0
- package/src/__tests__/geo-probe.test.ts +415 -0
- package/src/__tests__/inject.test.ts +2 -0
- package/src/__tests__/integration.e2e.test.ts +24 -0
- package/src/geo-consistency.ts +343 -0
- package/src/geo-probe.ts +603 -0
- package/src/index.ts +10 -0
- package/src/launch.ts +78 -7
- package/src/page.ts +10 -1
- package/src/session.ts +228 -10
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Geo-consistency reconciler — cross-references the matrix's declared
|
|
3
|
+
* `(timezone, locale)` against the probed exit-IP geolocation and
|
|
4
|
+
* applies a `LaunchOptions.geoConsistency` policy on mismatch.
|
|
5
|
+
*
|
|
6
|
+
* The default policy is `"privacy-fallback"`: on mismatch (or probe
|
|
7
|
+
* failure), override the matrix to `UTC` + `en-US`. The session then
|
|
8
|
+
* fingerprints as a privacy-conscious user (Tor / Brave / hardened-FF
|
|
9
|
+
* style), which is benign in most threat models — across thousands of
|
|
10
|
+
* real users, mismatched-tz-vs-IP is the canonical bot signature; UTC
|
|
11
|
+
* + en-US looks like every Tor user.
|
|
12
|
+
*
|
|
13
|
+
* @see PLAN.md §9 — relational consistency, IP/TZ/Locale axis
|
|
14
|
+
* @see tasks/0262-ip-tz-locale-exit-consistency.md
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import type { MatrixV1 } from "@mochi.js/consistency";
|
|
18
|
+
import type { ExitGeo } from "./geo-probe";
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Reconciliation modes for `(matrix.timezone, matrix.locale)` vs exit IP.
|
|
22
|
+
*
|
|
23
|
+
* - `"privacy-fallback"` *(default)* — on mismatch (or probe failure),
|
|
24
|
+
* override to `UTC` + `en-US`. Fingerprints as a Tor-class user. UTC
|
|
25
|
+
* + en-US is the failure-mode-of-least-tampering: it identifies the
|
|
26
|
+
* user as privacy-aware, not as automated.
|
|
27
|
+
* - `"auto-correct"` — on mismatch, override the matrix's timezone with
|
|
28
|
+
* the IP's timezone and the locale's region with the IP's country.
|
|
29
|
+
* Most "stealth" but trusts mochi's IP-derived defaults over the
|
|
30
|
+
* user's declared profile.
|
|
31
|
+
* - `"strict"` — throw on mismatch. The user must change profile or
|
|
32
|
+
* change proxy.
|
|
33
|
+
* - `"off"` — skip the probe entirely. Used by tests and by users with
|
|
34
|
+
* rate-limit problems.
|
|
35
|
+
*/
|
|
36
|
+
export type GeoConsistencyMode = "privacy-fallback" | "auto-correct" | "strict" | "off";
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Outcome of a reconciliation pass — exposed for diagnostics + the
|
|
40
|
+
* planned `_internalReconcile` test seam. `kind === "ok"` means the
|
|
41
|
+
* matrix passes through unchanged; `"override"` means we adjusted the
|
|
42
|
+
* matrix per the policy; `"strict-throw"` is the strict-mode error path
|
|
43
|
+
* (caller throws).
|
|
44
|
+
*/
|
|
45
|
+
export interface GeoReconcileResult {
|
|
46
|
+
/** Possibly-adjusted matrix (always a fresh object when adjusted). */
|
|
47
|
+
readonly matrix: MatrixV1;
|
|
48
|
+
/** What happened. `"ok"` is the no-mismatch fast path. */
|
|
49
|
+
readonly action: "ok" | "no-probe" | "off" | "privacy-fallback" | "auto-correct";
|
|
50
|
+
/** The geo result that drove this decision (null for `"no-probe"` / `"off"`). */
|
|
51
|
+
readonly geo: ExitGeo | null;
|
|
52
|
+
/** Human-readable mismatch summary, when applicable. */
|
|
53
|
+
readonly reason?: string;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Thrown by {@link reconcileGeoConsistency} when `mode === "strict"` and
|
|
58
|
+
* the probe revealed a mismatch. Signals the user MUST adjust either the
|
|
59
|
+
* profile or the proxy.
|
|
60
|
+
*/
|
|
61
|
+
export class GeoMismatchError extends Error {
|
|
62
|
+
readonly matrix: { timezone: string; locale: string };
|
|
63
|
+
readonly geo: ExitGeo;
|
|
64
|
+
readonly reason: string;
|
|
65
|
+
constructor(matrix: { timezone: string; locale: string }, geo: ExitGeo, reason: string) {
|
|
66
|
+
super(
|
|
67
|
+
`[mochi] geoConsistency: strict — exit-IP geo (${geo.country}/${geo.timezone}, ` +
|
|
68
|
+
`via ${geo.source}) does not match matrix (${matrix.locale}/${matrix.timezone}): ` +
|
|
69
|
+
`${reason}. Change the profile to match the proxy egress, change the proxy, ` +
|
|
70
|
+
`or pass geoConsistency: "privacy-fallback" | "auto-correct" | "off".`,
|
|
71
|
+
);
|
|
72
|
+
this.name = "GeoMismatchError";
|
|
73
|
+
this.matrix = matrix;
|
|
74
|
+
this.geo = geo;
|
|
75
|
+
this.reason = reason;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Compute the **integer minutes offset** of an IANA timezone for a given
|
|
81
|
+
* reference date. Uses `Intl.DateTimeFormat(...).formatToParts(...)` to
|
|
82
|
+
* extract the "longOffset" part — the most stable cross-runtime path that
|
|
83
|
+
* works for both fixed-offset zones (`UTC`, `Etc/GMT+8`) and DST-aware
|
|
84
|
+
* zones (`America/New_York`).
|
|
85
|
+
*
|
|
86
|
+
* The brief calls this out: `America/New_York` and `America/Detroit`
|
|
87
|
+
* share the same offset and are equivalent for fingerprinting; we MUST
|
|
88
|
+
* compare offsets, not zone names.
|
|
89
|
+
*
|
|
90
|
+
* Returns `null` if the zone string isn't recognised (caller treats this
|
|
91
|
+
* as "incomparable" and bails out to the per-mode policy).
|
|
92
|
+
*/
|
|
93
|
+
export function tzOffsetMinutes(zone: string, ref: Date = new Date()): number | null {
|
|
94
|
+
try {
|
|
95
|
+
const parts = new Intl.DateTimeFormat("en-US", {
|
|
96
|
+
timeZone: zone,
|
|
97
|
+
timeZoneName: "longOffset",
|
|
98
|
+
}).formatToParts(ref);
|
|
99
|
+
const tzPart = parts.find((p) => p.type === "timeZoneName")?.value;
|
|
100
|
+
if (tzPart === undefined) return null;
|
|
101
|
+
// longOffset shape: "GMT+05:30", "GMT-08:00", "GMT" (== 0).
|
|
102
|
+
if (tzPart === "GMT" || tzPart === "UTC") return 0;
|
|
103
|
+
const m = /^(?:GMT|UTC)([+-])(\d{1,2})(?::?(\d{2}))?$/.exec(tzPart);
|
|
104
|
+
if (m === null) return null;
|
|
105
|
+
const sign = m[1] === "-" ? -1 : 1;
|
|
106
|
+
const hours = Number.parseInt(m[2] ?? "0", 10);
|
|
107
|
+
const mins = Number.parseInt(m[3] ?? "0", 10);
|
|
108
|
+
if (!Number.isFinite(hours) || !Number.isFinite(mins)) return null;
|
|
109
|
+
return sign * (hours * 60 + mins);
|
|
110
|
+
} catch {
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Extract the alpha-2 region code from a BCP-47 locale via `Intl.Locale`.
|
|
117
|
+
* `"en-US"` → `"US"`, `"de-DE"` → `"DE"`, `"en"` → `null` (no region).
|
|
118
|
+
*/
|
|
119
|
+
export function localeRegion(locale: string): string | null {
|
|
120
|
+
try {
|
|
121
|
+
const region = new Intl.Locale(locale).region;
|
|
122
|
+
if (region === undefined || region.length === 0) return null;
|
|
123
|
+
return region.toUpperCase();
|
|
124
|
+
} catch {
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Tiny country-code → primary-locale lookup for `auto-correct` mode.
|
|
131
|
+
* Covers the major proxy-egress countries; falls back to `en-<CC>` for
|
|
132
|
+
* unknown codes (which is wrong for, say, Korea, but is at most a
|
|
133
|
+
* lower-stealth-ceiling fallback rather than a hard fail).
|
|
134
|
+
*
|
|
135
|
+
* Order chosen for the most common residential-proxy egress destinations.
|
|
136
|
+
*/
|
|
137
|
+
const PRIMARY_LOCALE_BY_COUNTRY: Readonly<Record<string, string>> = {
|
|
138
|
+
US: "en-US",
|
|
139
|
+
GB: "en-GB",
|
|
140
|
+
CA: "en-CA",
|
|
141
|
+
AU: "en-AU",
|
|
142
|
+
IE: "en-IE",
|
|
143
|
+
NZ: "en-NZ",
|
|
144
|
+
DE: "de-DE",
|
|
145
|
+
AT: "de-AT",
|
|
146
|
+
CH: "de-CH",
|
|
147
|
+
FR: "fr-FR",
|
|
148
|
+
BE: "fr-BE",
|
|
149
|
+
IT: "it-IT",
|
|
150
|
+
ES: "es-ES",
|
|
151
|
+
MX: "es-MX",
|
|
152
|
+
AR: "es-AR",
|
|
153
|
+
BR: "pt-BR",
|
|
154
|
+
PT: "pt-PT",
|
|
155
|
+
NL: "nl-NL",
|
|
156
|
+
PL: "pl-PL",
|
|
157
|
+
RU: "ru-RU",
|
|
158
|
+
UA: "uk-UA",
|
|
159
|
+
CN: "zh-CN",
|
|
160
|
+
HK: "zh-HK",
|
|
161
|
+
TW: "zh-TW",
|
|
162
|
+
JP: "ja-JP",
|
|
163
|
+
KR: "ko-KR",
|
|
164
|
+
IN: "hi-IN",
|
|
165
|
+
ID: "id-ID",
|
|
166
|
+
TH: "th-TH",
|
|
167
|
+
VN: "vi-VN",
|
|
168
|
+
TR: "tr-TR",
|
|
169
|
+
IL: "he-IL",
|
|
170
|
+
SA: "ar-SA",
|
|
171
|
+
AE: "ar-AE",
|
|
172
|
+
EG: "ar-EG",
|
|
173
|
+
ZA: "en-ZA",
|
|
174
|
+
SG: "en-SG",
|
|
175
|
+
MY: "ms-MY",
|
|
176
|
+
PH: "en-PH",
|
|
177
|
+
SE: "sv-SE",
|
|
178
|
+
NO: "nb-NO",
|
|
179
|
+
DK: "da-DK",
|
|
180
|
+
FI: "fi-FI",
|
|
181
|
+
CZ: "cs-CZ",
|
|
182
|
+
HU: "hu-HU",
|
|
183
|
+
RO: "ro-RO",
|
|
184
|
+
GR: "el-GR",
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
/** Best-effort primary locale for an ISO-3166-1 alpha-2 country code. */
|
|
188
|
+
function primaryLocaleFor(country: string): string {
|
|
189
|
+
return PRIMARY_LOCALE_BY_COUNTRY[country.toUpperCase()] ?? `en-${country.toUpperCase()}`;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Return a fresh matrix with `timezone`/`locale`/`languages` overridden.
|
|
194
|
+
* Other fields (display, GPU, audio, etc.) are preserved so the rest of
|
|
195
|
+
* the relational lock stays intact. The brief's I-5 invariant: `MatrixV1`
|
|
196
|
+
* is the single source of truth, so we hand back the same shape with
|
|
197
|
+
* just the geo-axis fields swapped.
|
|
198
|
+
*
|
|
199
|
+
* Note: `wreqPreset` and `userAgent` are NOT touched — those carry
|
|
200
|
+
* OS/browser semantics, not geo. The reconciler is purely a geo-axis
|
|
201
|
+
* adjustment.
|
|
202
|
+
*/
|
|
203
|
+
function withGeoOverride(
|
|
204
|
+
matrix: MatrixV1,
|
|
205
|
+
overrides: { timezone: string; locale: string; languages: readonly [string, ...string[]] },
|
|
206
|
+
): MatrixV1 {
|
|
207
|
+
const [head, ...tail] = overrides.languages;
|
|
208
|
+
return {
|
|
209
|
+
...matrix,
|
|
210
|
+
timezone: overrides.timezone,
|
|
211
|
+
locale: overrides.locale,
|
|
212
|
+
languages: [head, ...tail],
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Reconcile the matrix against the probed exit-IP geo per the supplied
|
|
218
|
+
* `mode`. Pure: never mutates the input matrix; returns a fresh object on
|
|
219
|
+
* any override path.
|
|
220
|
+
*
|
|
221
|
+
* **Mismatch criteria**:
|
|
222
|
+
* - **Timezone**: matrix offset minutes ≠ IP offset minutes (computed
|
|
223
|
+
* via `Intl.DateTimeFormat(timeZoneName: "longOffset")`). Zone-name
|
|
224
|
+
* equivalence (e.g. `America/New_York` vs `America/Detroit`) is
|
|
225
|
+
* intentional — they share an offset and fingerprint identically.
|
|
226
|
+
* - **Locale**: `Intl.Locale(matrix.locale).region` ≠ IP country code.
|
|
227
|
+
* A locale with no region (`"en"`) is treated as matching any
|
|
228
|
+
* country (we can't disprove it).
|
|
229
|
+
*
|
|
230
|
+
* **Per-mode behaviour** (matrix passes through unchanged unless
|
|
231
|
+
* mismatch is detected):
|
|
232
|
+
*
|
|
233
|
+
* | Mode | probe = null | tz mismatch | locale mismatch | both match |
|
|
234
|
+
* |---|---|---|---|---|
|
|
235
|
+
* | `privacy-fallback` | UTC+en-US | UTC+en-US | UTC+en-US | passthrough |
|
|
236
|
+
* | `auto-correct` | passthrough (best effort) | IP tz | IP locale | passthrough |
|
|
237
|
+
* | `strict` | passthrough (no probe → no mismatch) | THROW | THROW | passthrough |
|
|
238
|
+
* | `off` | passthrough | n/a (no probe) | n/a | passthrough |
|
|
239
|
+
*
|
|
240
|
+
* The `strict` × `probe = null` case intentionally passes the matrix
|
|
241
|
+
* through. A null probe means "we couldn't talk to any geo endpoint" —
|
|
242
|
+
* which is most often a network blip, not a mismatch. Strict-mode users
|
|
243
|
+
* who want to fail closed on probe failure should pair this with
|
|
244
|
+
* external monitoring.
|
|
245
|
+
*
|
|
246
|
+
* @throws {GeoMismatchError} when `mode === "strict"` and a real
|
|
247
|
+
* mismatch was detected.
|
|
248
|
+
*/
|
|
249
|
+
export function reconcileGeoConsistency(
|
|
250
|
+
matrix: MatrixV1,
|
|
251
|
+
geo: ExitGeo | null,
|
|
252
|
+
mode: GeoConsistencyMode,
|
|
253
|
+
): GeoReconcileResult {
|
|
254
|
+
if (mode === "off") {
|
|
255
|
+
return { matrix, action: "off", geo: null };
|
|
256
|
+
}
|
|
257
|
+
if (geo === null) {
|
|
258
|
+
if (mode === "privacy-fallback") {
|
|
259
|
+
return {
|
|
260
|
+
matrix: withGeoOverride(matrix, {
|
|
261
|
+
timezone: "UTC",
|
|
262
|
+
locale: "en-US",
|
|
263
|
+
languages: ["en-US", "en"],
|
|
264
|
+
}),
|
|
265
|
+
action: "privacy-fallback",
|
|
266
|
+
geo: null,
|
|
267
|
+
reason: "probe returned null (all endpoints failed); falling back to UTC+en-US",
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
// auto-correct + strict: nothing to act on. Pass through.
|
|
271
|
+
return { matrix, action: "no-probe", geo: null };
|
|
272
|
+
}
|
|
273
|
+
// We have a probe result. Compute offset-based mismatch.
|
|
274
|
+
const matrixOffset = tzOffsetMinutes(matrix.timezone);
|
|
275
|
+
const ipOffset = tzOffsetMinutes(geo.timezone);
|
|
276
|
+
const tzMismatch =
|
|
277
|
+
matrixOffset !== null && ipOffset !== null && matrixOffset !== ipOffset
|
|
278
|
+
? `tz offset ${matrixOffset}min (matrix ${matrix.timezone}) ≠ ${ipOffset}min (IP ${geo.timezone})`
|
|
279
|
+
: null;
|
|
280
|
+
|
|
281
|
+
const matrixRegion = localeRegion(matrix.locale);
|
|
282
|
+
// matrixRegion === null => locale has no region (e.g. "en"); treat as
|
|
283
|
+
// permissive match to avoid spurious mismatches.
|
|
284
|
+
const localeMismatch =
|
|
285
|
+
matrixRegion !== null && matrixRegion !== geo.country
|
|
286
|
+
? `locale region ${matrixRegion} (matrix ${matrix.locale}) ≠ IP country ${geo.country}`
|
|
287
|
+
: null;
|
|
288
|
+
|
|
289
|
+
if (tzMismatch === null && localeMismatch === null) {
|
|
290
|
+
return { matrix, action: "ok", geo };
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
const reason = [tzMismatch, localeMismatch].filter((x): x is string => x !== null).join("; ");
|
|
294
|
+
|
|
295
|
+
if (mode === "strict") {
|
|
296
|
+
throw new GeoMismatchError({ timezone: matrix.timezone, locale: matrix.locale }, geo, reason);
|
|
297
|
+
}
|
|
298
|
+
if (mode === "auto-correct") {
|
|
299
|
+
const newLocale = primaryLocaleFor(geo.country);
|
|
300
|
+
return {
|
|
301
|
+
matrix: withGeoOverride(matrix, {
|
|
302
|
+
timezone: geo.timezone,
|
|
303
|
+
locale: newLocale,
|
|
304
|
+
// languages list: primary locale + its language root (e.g. "de-DE",
|
|
305
|
+
// "de"). Keeps the language root present which sites read for
|
|
306
|
+
// fallback negotiation.
|
|
307
|
+
languages: deriveLanguagesFor(newLocale),
|
|
308
|
+
}),
|
|
309
|
+
action: "auto-correct",
|
|
310
|
+
geo,
|
|
311
|
+
reason,
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
// privacy-fallback
|
|
315
|
+
return {
|
|
316
|
+
matrix: withGeoOverride(matrix, {
|
|
317
|
+
timezone: "UTC",
|
|
318
|
+
locale: "en-US",
|
|
319
|
+
languages: ["en-US", "en"],
|
|
320
|
+
}),
|
|
321
|
+
action: "privacy-fallback",
|
|
322
|
+
geo,
|
|
323
|
+
reason,
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Derive the `navigator.languages` list for an `auto-correct` override.
|
|
329
|
+
* Convention: `[primary, primary-language-only, "en"]`, deduped. The "en"
|
|
330
|
+
* tail mirrors what real Chrome instances ship for non-English locales —
|
|
331
|
+
* most users have English as a secondary because Chrome itself defaults
|
|
332
|
+
* the menu language to English on first install in many regions.
|
|
333
|
+
*/
|
|
334
|
+
function deriveLanguagesFor(locale: string): readonly [string, ...string[]] {
|
|
335
|
+
const out: [string, ...string[]] = [locale];
|
|
336
|
+
const dash = locale.indexOf("-");
|
|
337
|
+
if (dash > 0) {
|
|
338
|
+
const root = locale.slice(0, dash);
|
|
339
|
+
if (!out.includes(root)) out.push(root);
|
|
340
|
+
}
|
|
341
|
+
if (!out.includes("en")) out.push("en");
|
|
342
|
+
return out;
|
|
343
|
+
}
|