@mochi.js/core 0.2.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,112 @@
1
+ /**
2
+ * Auto-pick the host-OS-matching profile when `LaunchOptions.profile` is
3
+ * omitted (task 0272). The function below is the pure decision table the
4
+ * launcher consults; tests stub `(platform, arch)` and assert the mapping
5
+ * without spinning a Chromium.
6
+ *
7
+ * ## Why
8
+ *
9
+ * Task 0271 documents the strategic thesis: spoofing Windows from a Linux
10
+ * server is the wrong default. Linux is a real-user signal, not a bot
11
+ * signal. WAFs trained on real traffic do not penalize Linux UAs because
12
+ * Linux desktops are massively overrepresented in high-LTV segments
13
+ * (developers, engineers, researchers). The signal was always
14
+ * `HeadlessChrome`, never Linux.
15
+ *
16
+ * Lifting host-OS-matching from "user types `profile: 'linux-chrome-stable'`
17
+ * by hand" into a default removes the entire class of "user accidentally
18
+ * spoofed Windows from a Linux DC and looked weird to the WAF" failures —
19
+ * the same argument that drove `detectLinuxServerEnv` for headless mode in
20
+ * task 0259.
21
+ *
22
+ * ## Mapping
23
+ *
24
+ * The host pairs `(process.platform, process.arch)` we currently support:
25
+ *
26
+ * - `linux/x64` → `linux-chrome-stable`
27
+ * - `darwin/arm64` → `mac-m4-chrome-stable`
28
+ * - `darwin/x64` → `mac-chrome-stable`
29
+ * - `win32/x64` → `windows-chrome-stable`
30
+ *
31
+ * Everything else (linux/arm64, freebsd, alpine-musl detection, win32/arm64,
32
+ * etc.) returns `null` — the launcher then throws with a precise diagnostic
33
+ * listing the six explicit profile IDs and a pointer to the
34
+ * choose-your-profile guide. We never silently fall back to a placeholder.
35
+ *
36
+ * ## Caveat — darwin/x64
37
+ *
38
+ * The current profile catalog (`packages/profiles/data/`) ships
39
+ * `mac-chrome-stable` as a darwin/arm64 capture (its `os.arch === "arm64"`
40
+ * in `profile.json`). The mapping above still routes darwin/x64 to
41
+ * `mac-chrome-stable` per task 0272's success criteria; users on Intel Macs
42
+ * who want a strict arch match should pass `profile` explicitly until an
43
+ * `mac-intel-chrome-stable` capture lands.
44
+ *
45
+ * @see tasks/0271-the-linux-os-thesis.md — the strategic thesis + evidence
46
+ * @see tasks/0272-host-os-profile-auto-default.md — engineering brief
47
+ */
48
+
49
+ import type { ProfileId } from "./launch";
50
+
51
+ /**
52
+ * Pure decision table: given the current host's `(process.platform,
53
+ * process.arch)` pair, return the profile id that best matches the host
54
+ * OS axis. Returns `null` for unsupported hosts so the launcher can throw
55
+ * with a precise diagnostic.
56
+ *
57
+ * No I/O, no logging — call sites can introspect the value cheaply (e.g.
58
+ * `console.log(mochi.defaultProfileForHost())`).
59
+ */
60
+ export function defaultProfileForHost(): ProfileId | null {
61
+ return resolveDefaultProfileForHost(process.platform, process.arch);
62
+ }
63
+
64
+ /**
65
+ * Internal pure resolver, exposed so the unit tests can drive the table
66
+ * without stubbing global `process`. Mirrors the precedence-table style of
67
+ * `resolveHeadlessMode` (task 0258).
68
+ *
69
+ * @internal
70
+ */
71
+ export function resolveDefaultProfileForHost(
72
+ platform: NodeJS.Platform,
73
+ arch: string,
74
+ ): ProfileId | null {
75
+ if (platform === "linux" && arch === "x64") return "linux-chrome-stable";
76
+ if (platform === "darwin" && arch === "arm64") return "mac-m4-chrome-stable";
77
+ if (platform === "darwin" && arch === "x64") return "mac-chrome-stable";
78
+ if (platform === "win32" && arch === "x64") return "windows-chrome-stable";
79
+ return null;
80
+ }
81
+
82
+ /**
83
+ * The six real-device profile IDs that `defaultProfileForHost` can return,
84
+ * surfaced by the launcher's failure-mode diagnostic. Order matches the
85
+ * task 0272 brief verbatim so the user-facing message is stable.
86
+ *
87
+ * @internal
88
+ */
89
+ export const EXPLICIT_PROFILE_IDS = [
90
+ "mac-m4-chrome-stable",
91
+ "mac-chrome-stable",
92
+ "mac-chrome-beta",
93
+ "windows-chrome-stable",
94
+ "linux-chrome-stable",
95
+ "mac-brave-stable",
96
+ ] as const satisfies readonly ProfileId[];
97
+
98
+ /**
99
+ * Build the precise diagnostic emitted when `profile` is omitted on an
100
+ * unsupported host. Format pinned by task 0272 — keep stable so docs +
101
+ * LLM-context blocks stay correct.
102
+ *
103
+ * @internal
104
+ */
105
+ export function unsupportedHostMessage(platform: NodeJS.Platform, arch: string): string {
106
+ const list = EXPLICIT_PROFILE_IDS.map((id) => ` - ${id}`).join("\n");
107
+ return (
108
+ `[mochi] launch: no profile supplied and no host-matching default for ` +
109
+ `platform=${platform} arch=${arch}. Pick one explicitly:\n${list}\n` +
110
+ `See https://mochijs.com/docs/guides/choose-your-profile for the decision aid.`
111
+ );
112
+ }
@@ -0,0 +1,343 @@
1
+ /**
2
+ * Geo-consistency reconciler — cross-references the matrix's declared
3
+ * `(timezone, locale)` against the probed exit-IP geolocation and
4
+ * applies a `LaunchOptions.geoConsistency` policy on mismatch.
5
+ *
6
+ * The default policy is `"privacy-fallback"`: on mismatch (or probe
7
+ * failure), override the matrix to `UTC` + `en-US`. The session then
8
+ * fingerprints as a privacy-conscious user (Tor / Brave / hardened-FF
9
+ * style), which is benign in most threat models — across thousands of
10
+ * real users, mismatched-tz-vs-IP is the canonical bot signature; UTC
11
+ * + en-US looks like every Tor user.
12
+ *
13
+ * @see PLAN.md §9 — relational consistency, IP/TZ/Locale axis
14
+ * @see tasks/0262-ip-tz-locale-exit-consistency.md
15
+ */
16
+
17
+ import type { MatrixV1 } from "@mochi.js/consistency";
18
+ import type { ExitGeo } from "./geo-probe";
19
+
20
+ /**
21
+ * Reconciliation modes for `(matrix.timezone, matrix.locale)` vs exit IP.
22
+ *
23
+ * - `"privacy-fallback"` *(default)* — on mismatch (or probe failure),
24
+ * override to `UTC` + `en-US`. Fingerprints as a Tor-class user. UTC
25
+ * + en-US is the failure-mode-of-least-tampering: it identifies the
26
+ * user as privacy-aware, not as automated.
27
+ * - `"auto-correct"` — on mismatch, override the matrix's timezone with
28
+ * the IP's timezone and the locale's region with the IP's country.
29
+ * Most "stealth" but trusts mochi's IP-derived defaults over the
30
+ * user's declared profile.
31
+ * - `"strict"` — throw on mismatch. The user must change profile or
32
+ * change proxy.
33
+ * - `"off"` — skip the probe entirely. Used by tests and by users with
34
+ * rate-limit problems.
35
+ */
36
+ export type GeoConsistencyMode = "privacy-fallback" | "auto-correct" | "strict" | "off";
37
+
38
+ /**
39
+ * Outcome of a reconciliation pass — exposed for diagnostics + the
40
+ * planned `_internalReconcile` test seam. `kind === "ok"` means the
41
+ * matrix passes through unchanged; `"override"` means we adjusted the
42
+ * matrix per the policy; `"strict-throw"` is the strict-mode error path
43
+ * (caller throws).
44
+ */
45
+ export interface GeoReconcileResult {
46
+ /** Possibly-adjusted matrix (always a fresh object when adjusted). */
47
+ readonly matrix: MatrixV1;
48
+ /** What happened. `"ok"` is the no-mismatch fast path. */
49
+ readonly action: "ok" | "no-probe" | "off" | "privacy-fallback" | "auto-correct";
50
+ /** The geo result that drove this decision (null for `"no-probe"` / `"off"`). */
51
+ readonly geo: ExitGeo | null;
52
+ /** Human-readable mismatch summary, when applicable. */
53
+ readonly reason?: string;
54
+ }
55
+
56
+ /**
57
+ * Thrown by {@link reconcileGeoConsistency} when `mode === "strict"` and
58
+ * the probe revealed a mismatch. Signals the user MUST adjust either the
59
+ * profile or the proxy.
60
+ */
61
+ export class GeoMismatchError extends Error {
62
+ readonly matrix: { timezone: string; locale: string };
63
+ readonly geo: ExitGeo;
64
+ readonly reason: string;
65
+ constructor(matrix: { timezone: string; locale: string }, geo: ExitGeo, reason: string) {
66
+ super(
67
+ `[mochi] geoConsistency: strict — exit-IP geo (${geo.country}/${geo.timezone}, ` +
68
+ `via ${geo.source}) does not match matrix (${matrix.locale}/${matrix.timezone}): ` +
69
+ `${reason}. Change the profile to match the proxy egress, change the proxy, ` +
70
+ `or pass geoConsistency: "privacy-fallback" | "auto-correct" | "off".`,
71
+ );
72
+ this.name = "GeoMismatchError";
73
+ this.matrix = matrix;
74
+ this.geo = geo;
75
+ this.reason = reason;
76
+ }
77
+ }
78
+
79
+ /**
80
+ * Compute the **integer minutes offset** of an IANA timezone for a given
81
+ * reference date. Uses `Intl.DateTimeFormat(...).formatToParts(...)` to
82
+ * extract the "longOffset" part — the most stable cross-runtime path that
83
+ * works for both fixed-offset zones (`UTC`, `Etc/GMT+8`) and DST-aware
84
+ * zones (`America/New_York`).
85
+ *
86
+ * The brief calls this out: `America/New_York` and `America/Detroit`
87
+ * share the same offset and are equivalent for fingerprinting; we MUST
88
+ * compare offsets, not zone names.
89
+ *
90
+ * Returns `null` if the zone string isn't recognised (caller treats this
91
+ * as "incomparable" and bails out to the per-mode policy).
92
+ */
93
+ export function tzOffsetMinutes(zone: string, ref: Date = new Date()): number | null {
94
+ try {
95
+ const parts = new Intl.DateTimeFormat("en-US", {
96
+ timeZone: zone,
97
+ timeZoneName: "longOffset",
98
+ }).formatToParts(ref);
99
+ const tzPart = parts.find((p) => p.type === "timeZoneName")?.value;
100
+ if (tzPart === undefined) return null;
101
+ // longOffset shape: "GMT+05:30", "GMT-08:00", "GMT" (== 0).
102
+ if (tzPart === "GMT" || tzPart === "UTC") return 0;
103
+ const m = /^(?:GMT|UTC)([+-])(\d{1,2})(?::?(\d{2}))?$/.exec(tzPart);
104
+ if (m === null) return null;
105
+ const sign = m[1] === "-" ? -1 : 1;
106
+ const hours = Number.parseInt(m[2] ?? "0", 10);
107
+ const mins = Number.parseInt(m[3] ?? "0", 10);
108
+ if (!Number.isFinite(hours) || !Number.isFinite(mins)) return null;
109
+ return sign * (hours * 60 + mins);
110
+ } catch {
111
+ return null;
112
+ }
113
+ }
114
+
115
+ /**
116
+ * Extract the alpha-2 region code from a BCP-47 locale via `Intl.Locale`.
117
+ * `"en-US"` → `"US"`, `"de-DE"` → `"DE"`, `"en"` → `null` (no region).
118
+ */
119
+ export function localeRegion(locale: string): string | null {
120
+ try {
121
+ const region = new Intl.Locale(locale).region;
122
+ if (region === undefined || region.length === 0) return null;
123
+ return region.toUpperCase();
124
+ } catch {
125
+ return null;
126
+ }
127
+ }
128
+
129
+ /**
130
+ * Tiny country-code → primary-locale lookup for `auto-correct` mode.
131
+ * Covers the major proxy-egress countries; falls back to `en-<CC>` for
132
+ * unknown codes (which is wrong for, say, Korea, but is at most a
133
+ * lower-stealth-ceiling fallback rather than a hard fail).
134
+ *
135
+ * Order chosen for the most common residential-proxy egress destinations.
136
+ */
137
+ const PRIMARY_LOCALE_BY_COUNTRY: Readonly<Record<string, string>> = {
138
+ US: "en-US",
139
+ GB: "en-GB",
140
+ CA: "en-CA",
141
+ AU: "en-AU",
142
+ IE: "en-IE",
143
+ NZ: "en-NZ",
144
+ DE: "de-DE",
145
+ AT: "de-AT",
146
+ CH: "de-CH",
147
+ FR: "fr-FR",
148
+ BE: "fr-BE",
149
+ IT: "it-IT",
150
+ ES: "es-ES",
151
+ MX: "es-MX",
152
+ AR: "es-AR",
153
+ BR: "pt-BR",
154
+ PT: "pt-PT",
155
+ NL: "nl-NL",
156
+ PL: "pl-PL",
157
+ RU: "ru-RU",
158
+ UA: "uk-UA",
159
+ CN: "zh-CN",
160
+ HK: "zh-HK",
161
+ TW: "zh-TW",
162
+ JP: "ja-JP",
163
+ KR: "ko-KR",
164
+ IN: "hi-IN",
165
+ ID: "id-ID",
166
+ TH: "th-TH",
167
+ VN: "vi-VN",
168
+ TR: "tr-TR",
169
+ IL: "he-IL",
170
+ SA: "ar-SA",
171
+ AE: "ar-AE",
172
+ EG: "ar-EG",
173
+ ZA: "en-ZA",
174
+ SG: "en-SG",
175
+ MY: "ms-MY",
176
+ PH: "en-PH",
177
+ SE: "sv-SE",
178
+ NO: "nb-NO",
179
+ DK: "da-DK",
180
+ FI: "fi-FI",
181
+ CZ: "cs-CZ",
182
+ HU: "hu-HU",
183
+ RO: "ro-RO",
184
+ GR: "el-GR",
185
+ };
186
+
187
+ /** Best-effort primary locale for an ISO-3166-1 alpha-2 country code. */
188
+ function primaryLocaleFor(country: string): string {
189
+ return PRIMARY_LOCALE_BY_COUNTRY[country.toUpperCase()] ?? `en-${country.toUpperCase()}`;
190
+ }
191
+
192
+ /**
193
+ * Return a fresh matrix with `timezone`/`locale`/`languages` overridden.
194
+ * Other fields (display, GPU, audio, etc.) are preserved so the rest of
195
+ * the relational lock stays intact. The brief's I-5 invariant: `MatrixV1`
196
+ * is the single source of truth, so we hand back the same shape with
197
+ * just the geo-axis fields swapped.
198
+ *
199
+ * Note: `wreqPreset` and `userAgent` are NOT touched — those carry
200
+ * OS/browser semantics, not geo. The reconciler is purely a geo-axis
201
+ * adjustment.
202
+ */
203
+ function withGeoOverride(
204
+ matrix: MatrixV1,
205
+ overrides: { timezone: string; locale: string; languages: readonly [string, ...string[]] },
206
+ ): MatrixV1 {
207
+ const [head, ...tail] = overrides.languages;
208
+ return {
209
+ ...matrix,
210
+ timezone: overrides.timezone,
211
+ locale: overrides.locale,
212
+ languages: [head, ...tail],
213
+ };
214
+ }
215
+
216
+ /**
217
+ * Reconcile the matrix against the probed exit-IP geo per the supplied
218
+ * `mode`. Pure: never mutates the input matrix; returns a fresh object on
219
+ * any override path.
220
+ *
221
+ * **Mismatch criteria**:
222
+ * - **Timezone**: matrix offset minutes ≠ IP offset minutes (computed
223
+ * via `Intl.DateTimeFormat(timeZoneName: "longOffset")`). Zone-name
224
+ * equivalence (e.g. `America/New_York` vs `America/Detroit`) is
225
+ * intentional — they share an offset and fingerprint identically.
226
+ * - **Locale**: `Intl.Locale(matrix.locale).region` ≠ IP country code.
227
+ * A locale with no region (`"en"`) is treated as matching any
228
+ * country (we can't disprove it).
229
+ *
230
+ * **Per-mode behaviour** (matrix passes through unchanged unless
231
+ * mismatch is detected):
232
+ *
233
+ * | Mode | probe = null | tz mismatch | locale mismatch | both match |
234
+ * |---|---|---|---|---|
235
+ * | `privacy-fallback` | UTC+en-US | UTC+en-US | UTC+en-US | passthrough |
236
+ * | `auto-correct` | passthrough (best effort) | IP tz | IP locale | passthrough |
237
+ * | `strict` | passthrough (no probe → no mismatch) | THROW | THROW | passthrough |
238
+ * | `off` | passthrough | n/a (no probe) | n/a | passthrough |
239
+ *
240
+ * The `strict` × `probe = null` case intentionally passes the matrix
241
+ * through. A null probe means "we couldn't talk to any geo endpoint" —
242
+ * which is most often a network blip, not a mismatch. Strict-mode users
243
+ * who want to fail closed on probe failure should pair this with
244
+ * external monitoring.
245
+ *
246
+ * @throws {GeoMismatchError} when `mode === "strict"` and a real
247
+ * mismatch was detected.
248
+ */
249
+ export function reconcileGeoConsistency(
250
+ matrix: MatrixV1,
251
+ geo: ExitGeo | null,
252
+ mode: GeoConsistencyMode,
253
+ ): GeoReconcileResult {
254
+ if (mode === "off") {
255
+ return { matrix, action: "off", geo: null };
256
+ }
257
+ if (geo === null) {
258
+ if (mode === "privacy-fallback") {
259
+ return {
260
+ matrix: withGeoOverride(matrix, {
261
+ timezone: "UTC",
262
+ locale: "en-US",
263
+ languages: ["en-US", "en"],
264
+ }),
265
+ action: "privacy-fallback",
266
+ geo: null,
267
+ reason: "probe returned null (all endpoints failed); falling back to UTC+en-US",
268
+ };
269
+ }
270
+ // auto-correct + strict: nothing to act on. Pass through.
271
+ return { matrix, action: "no-probe", geo: null };
272
+ }
273
+ // We have a probe result. Compute offset-based mismatch.
274
+ const matrixOffset = tzOffsetMinutes(matrix.timezone);
275
+ const ipOffset = tzOffsetMinutes(geo.timezone);
276
+ const tzMismatch =
277
+ matrixOffset !== null && ipOffset !== null && matrixOffset !== ipOffset
278
+ ? `tz offset ${matrixOffset}min (matrix ${matrix.timezone}) ≠ ${ipOffset}min (IP ${geo.timezone})`
279
+ : null;
280
+
281
+ const matrixRegion = localeRegion(matrix.locale);
282
+ // matrixRegion === null => locale has no region (e.g. "en"); treat as
283
+ // permissive match to avoid spurious mismatches.
284
+ const localeMismatch =
285
+ matrixRegion !== null && matrixRegion !== geo.country
286
+ ? `locale region ${matrixRegion} (matrix ${matrix.locale}) ≠ IP country ${geo.country}`
287
+ : null;
288
+
289
+ if (tzMismatch === null && localeMismatch === null) {
290
+ return { matrix, action: "ok", geo };
291
+ }
292
+
293
+ const reason = [tzMismatch, localeMismatch].filter((x): x is string => x !== null).join("; ");
294
+
295
+ if (mode === "strict") {
296
+ throw new GeoMismatchError({ timezone: matrix.timezone, locale: matrix.locale }, geo, reason);
297
+ }
298
+ if (mode === "auto-correct") {
299
+ const newLocale = primaryLocaleFor(geo.country);
300
+ return {
301
+ matrix: withGeoOverride(matrix, {
302
+ timezone: geo.timezone,
303
+ locale: newLocale,
304
+ // languages list: primary locale + its language root (e.g. "de-DE",
305
+ // "de"). Keeps the language root present which sites read for
306
+ // fallback negotiation.
307
+ languages: deriveLanguagesFor(newLocale),
308
+ }),
309
+ action: "auto-correct",
310
+ geo,
311
+ reason,
312
+ };
313
+ }
314
+ // privacy-fallback
315
+ return {
316
+ matrix: withGeoOverride(matrix, {
317
+ timezone: "UTC",
318
+ locale: "en-US",
319
+ languages: ["en-US", "en"],
320
+ }),
321
+ action: "privacy-fallback",
322
+ geo,
323
+ reason,
324
+ };
325
+ }
326
+
327
+ /**
328
+ * Derive the `navigator.languages` list for an `auto-correct` override.
329
+ * Convention: `[primary, primary-language-only, "en"]`, deduped. The "en"
330
+ * tail mirrors what real Chrome instances ship for non-English locales —
331
+ * most users have English as a secondary because Chrome itself defaults
332
+ * the menu language to English on first install in many regions.
333
+ */
334
+ function deriveLanguagesFor(locale: string): readonly [string, ...string[]] {
335
+ const out: [string, ...string[]] = [locale];
336
+ const dash = locale.indexOf("-");
337
+ if (dash > 0) {
338
+ const root = locale.slice(0, dash);
339
+ if (!out.includes(root)) out.push(root);
340
+ }
341
+ if (!out.includes("en")) out.push("en");
342
+ return out;
343
+ }