@mochi.js/core 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,343 @@
1
+ /**
2
+ * Geo-consistency reconciler — cross-references the matrix's declared
3
+ * `(timezone, locale)` against the probed exit-IP geolocation and
4
+ * applies a `LaunchOptions.geoConsistency` policy on mismatch.
5
+ *
6
+ * The default policy is `"privacy-fallback"`: on mismatch (or probe
7
+ * failure), override the matrix to `UTC` + `en-US`. The session then
8
+ * fingerprints as a privacy-conscious user (Tor / Brave / hardened-FF
9
+ * style), which is benign in most threat models — across thousands of
10
+ * real users, mismatched-tz-vs-IP is the canonical bot signature; UTC
11
+ * + en-US looks like every Tor user.
12
+ *
13
+ * @see PLAN.md §9 — relational consistency, IP/TZ/Locale axis
14
+ * @see tasks/0262-ip-tz-locale-exit-consistency.md
15
+ */
16
+
17
+ import type { MatrixV1 } from "@mochi.js/consistency";
18
+ import type { ExitGeo } from "./geo-probe";
19
+
20
+ /**
21
+ * Reconciliation modes for `(matrix.timezone, matrix.locale)` vs exit IP.
22
+ *
23
+ * - `"privacy-fallback"` *(default)* — on mismatch (or probe failure),
24
+ * override to `UTC` + `en-US`. Fingerprints as a Tor-class user. UTC
25
+ * + en-US is the failure-mode-of-least-tampering: it identifies the
26
+ * user as privacy-aware, not as automated.
27
+ * - `"auto-correct"` — on mismatch, override the matrix's timezone with
28
+ * the IP's timezone and the locale's region with the IP's country.
29
+ * Most "stealth" but trusts mochi's IP-derived defaults over the
30
+ * user's declared profile.
31
+ * - `"strict"` — throw on mismatch. The user must change profile or
32
+ * change proxy.
33
+ * - `"off"` — skip the probe entirely. Used by tests and by users with
34
+ * rate-limit problems.
35
+ */
36
+ export type GeoConsistencyMode = "privacy-fallback" | "auto-correct" | "strict" | "off";
37
+
38
+ /**
39
+ * Outcome of a reconciliation pass — exposed for diagnostics + the
40
+ * planned `_internalReconcile` test seam. `kind === "ok"` means the
41
+ * matrix passes through unchanged; `"override"` means we adjusted the
42
+ * matrix per the policy; `"strict-throw"` is the strict-mode error path
43
+ * (caller throws).
44
+ */
45
+ export interface GeoReconcileResult {
46
+ /** Possibly-adjusted matrix (always a fresh object when adjusted). */
47
+ readonly matrix: MatrixV1;
48
+ /** What happened. `"ok"` is the no-mismatch fast path. */
49
+ readonly action: "ok" | "no-probe" | "off" | "privacy-fallback" | "auto-correct";
50
+ /** The geo result that drove this decision (null for `"no-probe"` / `"off"`). */
51
+ readonly geo: ExitGeo | null;
52
+ /** Human-readable mismatch summary, when applicable. */
53
+ readonly reason?: string;
54
+ }
55
+
56
+ /**
57
+ * Thrown by {@link reconcileGeoConsistency} when `mode === "strict"` and
58
+ * the probe revealed a mismatch. Signals the user MUST adjust either the
59
+ * profile or the proxy.
60
+ */
61
+ export class GeoMismatchError extends Error {
62
+ readonly matrix: { timezone: string; locale: string };
63
+ readonly geo: ExitGeo;
64
+ readonly reason: string;
65
+ constructor(matrix: { timezone: string; locale: string }, geo: ExitGeo, reason: string) {
66
+ super(
67
+ `[mochi] geoConsistency: strict — exit-IP geo (${geo.country}/${geo.timezone}, ` +
68
+ `via ${geo.source}) does not match matrix (${matrix.locale}/${matrix.timezone}): ` +
69
+ `${reason}. Change the profile to match the proxy egress, change the proxy, ` +
70
+ `or pass geoConsistency: "privacy-fallback" | "auto-correct" | "off".`,
71
+ );
72
+ this.name = "GeoMismatchError";
73
+ this.matrix = matrix;
74
+ this.geo = geo;
75
+ this.reason = reason;
76
+ }
77
+ }
78
+
79
+ /**
80
+ * Compute the **integer minutes offset** of an IANA timezone for a given
81
+ * reference date. Uses `Intl.DateTimeFormat(...).formatToParts(...)` to
82
+ * extract the "longOffset" part — the most stable cross-runtime path that
83
+ * works for both fixed-offset zones (`UTC`, `Etc/GMT+8`) and DST-aware
84
+ * zones (`America/New_York`).
85
+ *
86
+ * The brief calls this out: `America/New_York` and `America/Detroit`
87
+ * share the same offset and are equivalent for fingerprinting; we MUST
88
+ * compare offsets, not zone names.
89
+ *
90
+ * Returns `null` if the zone string isn't recognised (caller treats this
91
+ * as "incomparable" and bails out to the per-mode policy).
92
+ */
93
+ export function tzOffsetMinutes(zone: string, ref: Date = new Date()): number | null {
94
+ try {
95
+ const parts = new Intl.DateTimeFormat("en-US", {
96
+ timeZone: zone,
97
+ timeZoneName: "longOffset",
98
+ }).formatToParts(ref);
99
+ const tzPart = parts.find((p) => p.type === "timeZoneName")?.value;
100
+ if (tzPart === undefined) return null;
101
+ // longOffset shape: "GMT+05:30", "GMT-08:00", "GMT" (== 0).
102
+ if (tzPart === "GMT" || tzPart === "UTC") return 0;
103
+ const m = /^(?:GMT|UTC)([+-])(\d{1,2})(?::?(\d{2}))?$/.exec(tzPart);
104
+ if (m === null) return null;
105
+ const sign = m[1] === "-" ? -1 : 1;
106
+ const hours = Number.parseInt(m[2] ?? "0", 10);
107
+ const mins = Number.parseInt(m[3] ?? "0", 10);
108
+ if (!Number.isFinite(hours) || !Number.isFinite(mins)) return null;
109
+ return sign * (hours * 60 + mins);
110
+ } catch {
111
+ return null;
112
+ }
113
+ }
114
+
115
+ /**
116
+ * Extract the alpha-2 region code from a BCP-47 locale via `Intl.Locale`.
117
+ * `"en-US"` → `"US"`, `"de-DE"` → `"DE"`, `"en"` → `null` (no region).
118
+ */
119
+ export function localeRegion(locale: string): string | null {
120
+ try {
121
+ const region = new Intl.Locale(locale).region;
122
+ if (region === undefined || region.length === 0) return null;
123
+ return region.toUpperCase();
124
+ } catch {
125
+ return null;
126
+ }
127
+ }
128
+
129
+ /**
130
+ * Tiny country-code → primary-locale lookup for `auto-correct` mode.
131
+ * Covers the major proxy-egress countries; falls back to `en-<CC>` for
132
+ * unknown codes (which is wrong for, say, Korea, but is at most a
133
+ * lower-stealth-ceiling fallback rather than a hard fail).
134
+ *
135
+ * Order chosen for the most common residential-proxy egress destinations.
136
+ */
137
+ const PRIMARY_LOCALE_BY_COUNTRY: Readonly<Record<string, string>> = {
138
+ US: "en-US",
139
+ GB: "en-GB",
140
+ CA: "en-CA",
141
+ AU: "en-AU",
142
+ IE: "en-IE",
143
+ NZ: "en-NZ",
144
+ DE: "de-DE",
145
+ AT: "de-AT",
146
+ CH: "de-CH",
147
+ FR: "fr-FR",
148
+ BE: "fr-BE",
149
+ IT: "it-IT",
150
+ ES: "es-ES",
151
+ MX: "es-MX",
152
+ AR: "es-AR",
153
+ BR: "pt-BR",
154
+ PT: "pt-PT",
155
+ NL: "nl-NL",
156
+ PL: "pl-PL",
157
+ RU: "ru-RU",
158
+ UA: "uk-UA",
159
+ CN: "zh-CN",
160
+ HK: "zh-HK",
161
+ TW: "zh-TW",
162
+ JP: "ja-JP",
163
+ KR: "ko-KR",
164
+ IN: "hi-IN",
165
+ ID: "id-ID",
166
+ TH: "th-TH",
167
+ VN: "vi-VN",
168
+ TR: "tr-TR",
169
+ IL: "he-IL",
170
+ SA: "ar-SA",
171
+ AE: "ar-AE",
172
+ EG: "ar-EG",
173
+ ZA: "en-ZA",
174
+ SG: "en-SG",
175
+ MY: "ms-MY",
176
+ PH: "en-PH",
177
+ SE: "sv-SE",
178
+ NO: "nb-NO",
179
+ DK: "da-DK",
180
+ FI: "fi-FI",
181
+ CZ: "cs-CZ",
182
+ HU: "hu-HU",
183
+ RO: "ro-RO",
184
+ GR: "el-GR",
185
+ };
186
+
187
+ /** Best-effort primary locale for an ISO-3166-1 alpha-2 country code. */
188
+ function primaryLocaleFor(country: string): string {
189
+ return PRIMARY_LOCALE_BY_COUNTRY[country.toUpperCase()] ?? `en-${country.toUpperCase()}`;
190
+ }
191
+
192
+ /**
193
+ * Return a fresh matrix with `timezone`/`locale`/`languages` overridden.
194
+ * Other fields (display, GPU, audio, etc.) are preserved so the rest of
195
+ * the relational lock stays intact. The brief's I-5 invariant: `MatrixV1`
196
+ * is the single source of truth, so we hand back the same shape with
197
+ * just the geo-axis fields swapped.
198
+ *
199
+ * Note: `wreqPreset` and `userAgent` are NOT touched — those carry
200
+ * OS/browser semantics, not geo. The reconciler is purely a geo-axis
201
+ * adjustment.
202
+ */
203
+ function withGeoOverride(
204
+ matrix: MatrixV1,
205
+ overrides: { timezone: string; locale: string; languages: readonly [string, ...string[]] },
206
+ ): MatrixV1 {
207
+ const [head, ...tail] = overrides.languages;
208
+ return {
209
+ ...matrix,
210
+ timezone: overrides.timezone,
211
+ locale: overrides.locale,
212
+ languages: [head, ...tail],
213
+ };
214
+ }
215
+
216
+ /**
217
+ * Reconcile the matrix against the probed exit-IP geo per the supplied
218
+ * `mode`. Pure: never mutates the input matrix; returns a fresh object on
219
+ * any override path.
220
+ *
221
+ * **Mismatch criteria**:
222
+ * - **Timezone**: matrix offset minutes ≠ IP offset minutes (computed
223
+ * via `Intl.DateTimeFormat(timeZoneName: "longOffset")`). Zone-name
224
+ * equivalence (e.g. `America/New_York` vs `America/Detroit`) is
225
+ * intentional — they share an offset and fingerprint identically.
226
+ * - **Locale**: `Intl.Locale(matrix.locale).region` ≠ IP country code.
227
+ * A locale with no region (`"en"`) is treated as matching any
228
+ * country (we can't disprove it).
229
+ *
230
+ * **Per-mode behaviour** (matrix passes through unchanged unless
231
+ * mismatch is detected):
232
+ *
233
+ * | Mode | probe = null | tz mismatch | locale mismatch | both match |
234
+ * |---|---|---|---|---|
235
+ * | `privacy-fallback` | UTC+en-US | UTC+en-US | UTC+en-US | passthrough |
236
+ * | `auto-correct` | passthrough (best effort) | IP tz | IP locale | passthrough |
237
+ * | `strict` | passthrough (no probe → no mismatch) | THROW | THROW | passthrough |
238
+ * | `off` | passthrough | n/a (no probe) | n/a | passthrough |
239
+ *
240
+ * The `strict` × `probe = null` case intentionally passes the matrix
241
+ * through. A null probe means "we couldn't talk to any geo endpoint" —
242
+ * which is most often a network blip, not a mismatch. Strict-mode users
243
+ * who want to fail closed on probe failure should pair this with
244
+ * external monitoring.
245
+ *
246
+ * @throws {GeoMismatchError} when `mode === "strict"` and a real
247
+ * mismatch was detected.
248
+ */
249
+ export function reconcileGeoConsistency(
250
+ matrix: MatrixV1,
251
+ geo: ExitGeo | null,
252
+ mode: GeoConsistencyMode,
253
+ ): GeoReconcileResult {
254
+ if (mode === "off") {
255
+ return { matrix, action: "off", geo: null };
256
+ }
257
+ if (geo === null) {
258
+ if (mode === "privacy-fallback") {
259
+ return {
260
+ matrix: withGeoOverride(matrix, {
261
+ timezone: "UTC",
262
+ locale: "en-US",
263
+ languages: ["en-US", "en"],
264
+ }),
265
+ action: "privacy-fallback",
266
+ geo: null,
267
+ reason: "probe returned null (all endpoints failed); falling back to UTC+en-US",
268
+ };
269
+ }
270
+ // auto-correct + strict: nothing to act on. Pass through.
271
+ return { matrix, action: "no-probe", geo: null };
272
+ }
273
+ // We have a probe result. Compute offset-based mismatch.
274
+ const matrixOffset = tzOffsetMinutes(matrix.timezone);
275
+ const ipOffset = tzOffsetMinutes(geo.timezone);
276
+ const tzMismatch =
277
+ matrixOffset !== null && ipOffset !== null && matrixOffset !== ipOffset
278
+ ? `tz offset ${matrixOffset}min (matrix ${matrix.timezone}) ≠ ${ipOffset}min (IP ${geo.timezone})`
279
+ : null;
280
+
281
+ const matrixRegion = localeRegion(matrix.locale);
282
+ // matrixRegion === null => locale has no region (e.g. "en"); treat as
283
+ // permissive match to avoid spurious mismatches.
284
+ const localeMismatch =
285
+ matrixRegion !== null && matrixRegion !== geo.country
286
+ ? `locale region ${matrixRegion} (matrix ${matrix.locale}) ≠ IP country ${geo.country}`
287
+ : null;
288
+
289
+ if (tzMismatch === null && localeMismatch === null) {
290
+ return { matrix, action: "ok", geo };
291
+ }
292
+
293
+ const reason = [tzMismatch, localeMismatch].filter((x): x is string => x !== null).join("; ");
294
+
295
+ if (mode === "strict") {
296
+ throw new GeoMismatchError({ timezone: matrix.timezone, locale: matrix.locale }, geo, reason);
297
+ }
298
+ if (mode === "auto-correct") {
299
+ const newLocale = primaryLocaleFor(geo.country);
300
+ return {
301
+ matrix: withGeoOverride(matrix, {
302
+ timezone: geo.timezone,
303
+ locale: newLocale,
304
+ // languages list: primary locale + its language root (e.g. "de-DE",
305
+ // "de"). Keeps the language root present which sites read for
306
+ // fallback negotiation.
307
+ languages: deriveLanguagesFor(newLocale),
308
+ }),
309
+ action: "auto-correct",
310
+ geo,
311
+ reason,
312
+ };
313
+ }
314
+ // privacy-fallback
315
+ return {
316
+ matrix: withGeoOverride(matrix, {
317
+ timezone: "UTC",
318
+ locale: "en-US",
319
+ languages: ["en-US", "en"],
320
+ }),
321
+ action: "privacy-fallback",
322
+ geo,
323
+ reason,
324
+ };
325
+ }
326
+
327
+ /**
328
+ * Derive the `navigator.languages` list for an `auto-correct` override.
329
+ * Convention: `[primary, primary-language-only, "en"]`, deduped. The "en"
330
+ * tail mirrors what real Chrome instances ship for non-English locales —
331
+ * most users have English as a secondary because Chrome itself defaults
332
+ * the menu language to English on first install in many regions.
333
+ */
334
+ function deriveLanguagesFor(locale: string): readonly [string, ...string[]] {
335
+ const out: [string, ...string[]] = [locale];
336
+ const dash = locale.indexOf("-");
337
+ if (dash > 0) {
338
+ const root = locale.slice(0, dash);
339
+ if (!out.includes(root)) out.push(root);
340
+ }
341
+ if (!out.includes("en")) out.push("en");
342
+ return out;
343
+ }