@mochi.js/core 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +3 -4
- package/src/__tests__/cookies-jar.test.ts +2 -3
- package/src/__tests__/default-profile.test.ts +6 -8
- package/src/__tests__/dx-cluster.e2e.test.ts +1 -2
- package/src/__tests__/geo-consistency.test.ts +0 -1
- package/src/__tests__/geo-probe.test.ts +13 -13
- package/src/__tests__/init-injector.e2e.test.ts +0 -1
- package/src/__tests__/init-injector.test.ts +1 -2
- package/src/__tests__/inject.test.ts +1 -2
- package/src/__tests__/page-dx-cluster.test.ts +3 -4
- package/src/__tests__/piercing.test.ts +1 -1
- package/src/__tests__/proc-linux-server.test.ts +4 -4
- package/src/__tests__/proc.test.ts +3 -3
- package/src/__tests__/proxy-auth.test.ts +1 -2
- package/src/__tests__/screenshot.e2e.test.ts +1 -1
- package/src/__tests__/screenshot.test.ts +1 -1
- package/src/__tests__/window-size.e2e.test.ts +0 -1
- package/src/cdp/types.ts +0 -1
- package/src/default-profile.ts +6 -8
- package/src/geo-consistency.ts +0 -1
- package/src/geo-probe.ts +37 -32
- package/src/index.ts +1 -1
- package/src/launch.ts +38 -52
- package/src/page/element-handle.ts +0 -1
- package/src/page/piercing.ts +0 -1
- package/src/page/selector.ts +0 -1
- package/src/page.ts +31 -14
- package/src/proc.ts +5 -6
- package/src/proxy-auth.ts +1 -3
- package/src/session.ts +489 -124
- package/src/version.ts +1 -1
package/src/session.ts
CHANGED
|
@@ -19,12 +19,6 @@ import {
|
|
|
19
19
|
} from "@mochi.js/challenges";
|
|
20
20
|
import type { MatrixV1 } from "@mochi.js/consistency";
|
|
21
21
|
import { buildPayload, type PayloadResult } from "@mochi.js/inject";
|
|
22
|
-
import {
|
|
23
|
-
openCtx as defaultOpenCtx,
|
|
24
|
-
requestOnCtx as defaultRequestOnCtx,
|
|
25
|
-
type NetCtx,
|
|
26
|
-
type NetFetchInit,
|
|
27
|
-
} from "@mochi.js/net";
|
|
28
22
|
import {
|
|
29
23
|
type InitInjectorHandle,
|
|
30
24
|
installInitInjector,
|
|
@@ -36,23 +30,6 @@ import { Page } from "./page";
|
|
|
36
30
|
import type { ChromiumProcess } from "./proc";
|
|
37
31
|
import { VERSION } from "./version";
|
|
38
32
|
|
|
39
|
-
/**
|
|
40
|
-
* Injection seam for the network FFI. Session uses this internally so tests
|
|
41
|
-
* can stub the FFI layer without spinning up the cdylib. Production code
|
|
42
|
-
* defaults to `@mochi.js/net`.
|
|
43
|
-
*
|
|
44
|
-
* @internal
|
|
45
|
-
*/
|
|
46
|
-
export interface NetAdapter {
|
|
47
|
-
openCtx(spec: { preset: string; proxy?: string }): NetCtx;
|
|
48
|
-
requestOnCtx(ctx: NetCtx, url: string, init: NetFetchInit): Response;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
const defaultNetAdapter: NetAdapter = {
|
|
52
|
-
openCtx: defaultOpenCtx,
|
|
53
|
-
requestOnCtx: defaultRequestOnCtx,
|
|
54
|
-
};
|
|
55
|
-
|
|
56
33
|
/**
|
|
57
34
|
* Per-call timeout for the worker idOnly inject roundtrip. 5s, not the
|
|
58
35
|
* router's 30s default — workers spawned by sites like sannysoft,
|
|
@@ -97,16 +74,9 @@ export interface SessionInit {
|
|
|
97
74
|
* When true, skip {@link buildPayload} AND skip the init-injector install
|
|
98
75
|
* (no `Fetch.fulfillRequest` body splice on documents); worker targets
|
|
99
76
|
* receive no inject either. Intended for `mochi capture` and similar
|
|
100
|
-
* baseline-collection flows. PLAN.md §12.1,
|
|
77
|
+
* baseline-collection flows. PLAN.md §12.1,
|
|
101
78
|
*/
|
|
102
79
|
bypassInject?: boolean;
|
|
103
|
-
/**
|
|
104
|
-
* Optional outbound proxy URL forwarded to the network FFI for
|
|
105
|
-
* `Session.fetch` requests. Out-of-band requests honour this independently
|
|
106
|
-
* of the browser's `--proxy-server` flag (which already sees the proxy via
|
|
107
|
-
* the CDP launch path).
|
|
108
|
-
*/
|
|
109
|
-
netProxy?: string;
|
|
110
80
|
/**
|
|
111
81
|
* Optional proxy credentials. When set, the Session attaches a CDP
|
|
112
82
|
* `Fetch.authRequired` listener so HTTP / SOCKS5 proxy auth challenges
|
|
@@ -117,14 +87,6 @@ export interface SessionInit {
|
|
|
117
87
|
* @see proxy-auth.ts for the §8.2 invariant rationale.
|
|
118
88
|
*/
|
|
119
89
|
proxyAuth?: { username: string; password: string };
|
|
120
|
-
/**
|
|
121
|
-
* Network adapter override — tests inject a stub here to exercise the
|
|
122
|
-
* `Session.fetch` wiring without loading the cdylib. Production code does
|
|
123
|
-
* not pass this; the default uses `@mochi.js/net`.
|
|
124
|
-
*
|
|
125
|
-
* @internal
|
|
126
|
-
*/
|
|
127
|
-
netAdapter?: NetAdapter;
|
|
128
90
|
/**
|
|
129
91
|
* Convenience layer toggles surfaced via
|
|
130
92
|
* `LaunchOptions.challenges`. When `challenges.turnstile.autoClick` is
|
|
@@ -156,7 +118,7 @@ export interface StorageSnapshot {
|
|
|
156
118
|
sessionStorage: Record<string, Record<string, string>>;
|
|
157
119
|
}
|
|
158
120
|
|
|
159
|
-
// ---- cookie-jar persistence
|
|
121
|
+
// ---- cookie-jar persistence -------------------------------------
|
|
160
122
|
|
|
161
123
|
/**
|
|
162
124
|
* Current on-disk cookie-file format version. Bumped on incompatible header
|
|
@@ -170,7 +132,6 @@ export const COOKIE_JAR_FORMAT_VERSION = 1 as const;
|
|
|
170
132
|
* verbatim `Storage.getCookies` payload — every shipped Chromium revision
|
|
171
133
|
* agrees on this shape, so loading on a newer Chromium round-trips losslessly.
|
|
172
134
|
*
|
|
173
|
-
* @see tasks/0257-dx-cluster-cookies-storage-permissions.md (success criteria)
|
|
174
135
|
* @see https://chromedevtools.github.io/devtools-protocol/tot/Storage/#method-getCookies
|
|
175
136
|
*/
|
|
176
137
|
export interface CookieJarFile {
|
|
@@ -249,24 +210,30 @@ export class Session {
|
|
|
249
210
|
private readonly _pages: Page[] = [];
|
|
250
211
|
private closed = false;
|
|
251
212
|
/**
|
|
252
|
-
*
|
|
253
|
-
*
|
|
254
|
-
*
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
*
|
|
259
|
-
* client pool inside the Rust crate handles connection reuse for repeated
|
|
260
|
-
* calls. Closed on `Session.close`.
|
|
213
|
+
* Lazily-created scratch frame used by {@link fetch} to satisfy the
|
|
214
|
+
* `frameId` requirement of `Network.loadNetworkResource` AND to host the
|
|
215
|
+
* `page.evaluate("fetch(...)")` path for non-GET calls. The frame
|
|
216
|
+
* navigates `about:blank` once and is reused across every `Session.fetch`
|
|
217
|
+
* call. Closed on {@link close}.
|
|
218
|
+
*
|
|
219
|
+
* @internal
|
|
261
220
|
*/
|
|
262
|
-
private
|
|
221
|
+
private scratchFrame: { targetId: string; sessionId: string; frameId: string } | undefined;
|
|
263
222
|
/**
|
|
264
|
-
*
|
|
265
|
-
*
|
|
223
|
+
* Mutex for {@link ensureScratchFrame} — without it, two concurrent
|
|
224
|
+
* `Session.fetch` calls race on `Target.createTarget` and produce two
|
|
225
|
+
* scratch frames (only one tracked). The promise resolves once the first
|
|
226
|
+
* caller has finished setup; subsequent callers reuse the cached frame.
|
|
266
227
|
*
|
|
267
228
|
* @internal
|
|
268
229
|
*/
|
|
269
|
-
private
|
|
230
|
+
private scratchFramePromise:
|
|
231
|
+
| Promise<{
|
|
232
|
+
targetId: string;
|
|
233
|
+
sessionId: string;
|
|
234
|
+
frameId: string;
|
|
235
|
+
}>
|
|
236
|
+
| undefined;
|
|
270
237
|
/**
|
|
271
238
|
* The compiled inject payload for this session. Built once at construction
|
|
272
239
|
* from the resolved {@link MatrixV1}; reused across every new page and
|
|
@@ -282,7 +249,7 @@ export class Session {
|
|
|
282
249
|
/**
|
|
283
250
|
* Whether this session bypasses the inject pipeline (no `buildPayload`,
|
|
284
251
|
* no body splice via `Fetch.fulfillRequest`, no worker injection). Set
|
|
285
|
-
* from {@link SessionInit.bypassInject}. PLAN.md §12.1,
|
|
252
|
+
* from {@link SessionInit.bypassInject}. PLAN.md §12.1,
|
|
286
253
|
*
|
|
287
254
|
* @internal
|
|
288
255
|
*/
|
|
@@ -331,8 +298,6 @@ export class Session {
|
|
|
331
298
|
this.profile = init.matrix;
|
|
332
299
|
this.seed = init.seed;
|
|
333
300
|
this.bypassInject = init.bypassInject === true;
|
|
334
|
-
this.netProxy = init.netProxy;
|
|
335
|
-
this.netAdapter = init.netAdapter ?? defaultNetAdapter;
|
|
336
301
|
this.challengesOpts = init.challenges;
|
|
337
302
|
// Skip payload compilation entirely when bypassed — capture flows must
|
|
338
303
|
// not pay the build cost AND must not see the matrix-derived bytes.
|
|
@@ -590,81 +555,343 @@ export class Session {
|
|
|
590
555
|
}
|
|
591
556
|
|
|
592
557
|
/**
|
|
593
|
-
* Out-of-band fetch —
|
|
594
|
-
*
|
|
595
|
-
* own navigation/XHR/fetch are unaffected (they use Chromium's native
|
|
596
|
-
* TLS, which already matches a Chrome profile). Returns a standard Web
|
|
597
|
-
* `Response`. PLAN.md §5.4 / §10.
|
|
558
|
+
* Out-of-band fetch — routes through Chromium itself so JA4/JA3/H2 are
|
|
559
|
+
* real Chrome by definition. Returns a standard Web `Response`.
|
|
598
560
|
*
|
|
599
|
-
*
|
|
600
|
-
*
|
|
601
|
-
*
|
|
561
|
+
* ### Dual-mechanism routing
|
|
562
|
+
*
|
|
563
|
+
* The implementation picks one of two CDP paths based on the call shape.
|
|
564
|
+
* Both paths run inside the browser, so both inherit the session's
|
|
565
|
+
* cookie jar, proxy (`--proxy-server`), and TLS stack — the bytes a
|
|
566
|
+
* server observes are byte-identical to what Chromium sends on its own
|
|
567
|
+
* navigation.
|
|
568
|
+
*
|
|
569
|
+
* - **Mechanism A — `Network.loadNetworkResource`.** Used when the call
|
|
570
|
+
* is a simple GET (no `init.method` other than `"GET"`, no
|
|
571
|
+
* `init.headers`, no `init.body`). The CDP method bypasses the
|
|
572
|
+
* same-origin policy at the network layer — there is no CORS preflight
|
|
573
|
+
* and no `Origin` header is sent. Body is returned as an
|
|
574
|
+
* {@link IO.StreamHandle} which we drain via `IO.read` until EOF and
|
|
575
|
+
* then close. Requires a `frameId`; we lazily allocate an
|
|
576
|
+
* `about:blank` scratch frame and reuse it across calls.
|
|
577
|
+
*
|
|
578
|
+
* - **Mechanism B — `page.evaluate("fetch(url, init).then(...)")`.** Used
|
|
579
|
+
* for everything else (POST/PUT/DELETE, custom headers, request body).
|
|
580
|
+
* Full {@link RequestInit} semantics pass through: cookies inherit
|
|
581
|
+
* from the page's origin (the scratch frame is `about:blank`), CORS
|
|
582
|
+
* applies same as a real user's browser, redirects follow per
|
|
583
|
+
* `init.redirect`. Bodies are forwarded as `string` /
|
|
584
|
+
* `ArrayBuffer` / `URLSearchParams`; `Blob` / `FormData` /
|
|
585
|
+
* `ReadableStream` are not yet supported (rejected with a clear
|
|
586
|
+
* diagnostic). The response is reconstructed from a base64-encoded
|
|
587
|
+
* ArrayBuffer + a status / headers tuple.
|
|
588
|
+
*
|
|
589
|
+
* ### Cookie semantics (breaking change vs. 0.6)
|
|
590
|
+
*
|
|
591
|
+
* Both mechanisms share the browser's cookie jar. A cookie set via
|
|
592
|
+
* `Page.goto` or `session.cookies.set` is sent on the next
|
|
593
|
+
* `session.fetch` call to the same origin — no manual `Cookie` header
|
|
594
|
+
* propagation. The pre-0.7 wreq-routed `Session.fetch` was cookieless.
|
|
595
|
+
*
|
|
596
|
+
* ### What changed vs. 0.6
|
|
597
|
+
*
|
|
598
|
+
* - **No more Rust FFI.** The `@mochi.js/net` and `@mochi.js/net-rs`
|
|
599
|
+
* packages are gone; there is no cdylib to install or trust.
|
|
600
|
+
* - **Cookies inherit** (above).
|
|
601
|
+
* - **Non-GET respects CORS.** Mechanism B is a real `fetch` from the
|
|
602
|
+
* page's main world; cross-origin POSTs without `Access-Control-Allow-Origin`
|
|
603
|
+
* fail the same way they would for a user.
|
|
604
|
+
*
|
|
605
|
+
* @see PLAN.md §5.4 / §7
|
|
602
606
|
*/
|
|
603
607
|
async fetch(url: string, init?: RequestInit): Promise<Response> {
|
|
604
608
|
this.assertOpen();
|
|
605
|
-
const
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
609
|
+
const isSimpleGet =
|
|
610
|
+
init === undefined ||
|
|
611
|
+
((init.method === undefined || init.method.toUpperCase() === "GET") &&
|
|
612
|
+
init.headers === undefined &&
|
|
613
|
+
init.body === undefined);
|
|
614
|
+
if (isSimpleGet) return this.fetchViaLoadNetworkResource(url);
|
|
615
|
+
// Mechanism B: serialize the init eagerly so unsupported body shapes
|
|
616
|
+
// (FormData / Blob / ReadableStream) throw BEFORE we allocate any CDP
|
|
617
|
+
// resources — a no-op on the wire if the call would have failed
|
|
618
|
+
// anyway.
|
|
619
|
+
const initSerialized = serializeRequestInitForFetch(init as RequestInit);
|
|
620
|
+
return this.fetchViaPageEvaluate(url, initSerialized);
|
|
615
621
|
}
|
|
616
622
|
|
|
617
|
-
/**
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
623
|
+
/**
|
|
624
|
+
* Mechanism A: drive `Network.loadNetworkResource` against the scratch
|
|
625
|
+
* frame, then drain the resulting `IO.StreamHandle` until EOF.
|
|
626
|
+
*
|
|
627
|
+
* `Network.loadNetworkResource` is exposed by the browser-side network
|
|
628
|
+
* handler and runs against the host's StoragePartition rather than the
|
|
629
|
+
* per-target `NetworkAgent`'s request observer. It does NOT require
|
|
630
|
+
* `Network.enable` (the contract test
|
|
631
|
+
* `tests/contract/session-fetch-no-network-enable.contract.test.ts`
|
|
632
|
+
* pins this empirically — if Chromium ever changes its mind, the test
|
|
633
|
+
* fails loudly and we fall back to mechanism B exclusively).
|
|
634
|
+
*
|
|
635
|
+
* Returned options are intentionally narrow: the CDP method only takes
|
|
636
|
+
* `disableCache` and `includeCredentials`. We default
|
|
637
|
+
* `includeCredentials: true` so cookies inherit (the whole point of a
|
|
638
|
+
* shared-identity fetch).
|
|
639
|
+
*
|
|
640
|
+
* @internal
|
|
641
|
+
*/
|
|
642
|
+
private async fetchViaLoadNetworkResource(url: string): Promise<Response> {
|
|
643
|
+
const { frameId } = await this.ensureScratchFrame();
|
|
644
|
+
const res = await this.router.send<{ resource: LoadNetworkResourcePageResult }>(
|
|
645
|
+
"Network.loadNetworkResource",
|
|
646
|
+
{
|
|
647
|
+
frameId,
|
|
648
|
+
url,
|
|
649
|
+
options: { disableCache: false, includeCredentials: true },
|
|
650
|
+
},
|
|
651
|
+
);
|
|
652
|
+
if (!res.resource.success) {
|
|
653
|
+
const name = res.resource.netErrorName ?? "fetch failed";
|
|
654
|
+
const httpStatus =
|
|
655
|
+
res.resource.httpStatusCode !== undefined
|
|
656
|
+
? ` (httpStatus=${res.resource.httpStatusCode})`
|
|
657
|
+
: "";
|
|
658
|
+
throw new Error(`[mochi] Session.fetch: ${name}${httpStatus}`);
|
|
624
659
|
}
|
|
625
|
-
|
|
660
|
+
const status =
|
|
661
|
+
typeof res.resource.httpStatusCode === "number" && res.resource.httpStatusCode > 0
|
|
662
|
+
? res.resource.httpStatusCode
|
|
663
|
+
: 200;
|
|
664
|
+
const headers = new Headers();
|
|
665
|
+
if (res.resource.headers !== undefined) {
|
|
666
|
+
for (const [k, v] of Object.entries(res.resource.headers)) {
|
|
667
|
+
try {
|
|
668
|
+
headers.append(k, String(v));
|
|
669
|
+
} catch {
|
|
670
|
+
// ignore unmappable header names
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
if (res.resource.stream === undefined) {
|
|
675
|
+
// Empty body — no stream allocated. Common for 204 / HEAD-style
|
|
676
|
+
// responses though `loadNetworkResource` is GET-only.
|
|
677
|
+
return new Response(uint8ToArrayBuffer(new Uint8Array(0)), { status, headers });
|
|
678
|
+
}
|
|
679
|
+
const body = await this.readIoStream(res.resource.stream);
|
|
680
|
+
return new Response(uint8ToArrayBuffer(body), { status, headers });
|
|
626
681
|
}
|
|
627
682
|
|
|
628
|
-
/**
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
683
|
+
/**
|
|
684
|
+
* Drain an `IO.StreamHandle` produced by `Network.loadNetworkResource`.
|
|
685
|
+
*
|
|
686
|
+
* The CDP `IO.read` method returns chunks tagged with a `base64Encoded`
|
|
687
|
+
* boolean — text bodies arrive verbatim, binary bodies arrive base64-
|
|
688
|
+
* decoded. We accumulate raw bytes (decoding base64 when needed) and
|
|
689
|
+
* close the handle on EOF. `IO.close` is best-effort: a failure to
|
|
690
|
+
* close doesn't prevent the response from being returned.
|
|
691
|
+
*
|
|
692
|
+
* Chunk size: 64 KiB — the same window the DevTools frontend uses.
|
|
693
|
+
*
|
|
694
|
+
* @internal
|
|
695
|
+
*/
|
|
696
|
+
private async readIoStream(handle: string): Promise<Uint8Array> {
|
|
697
|
+
const chunks: Uint8Array[] = [];
|
|
698
|
+
let totalLen = 0;
|
|
699
|
+
// 64 KiB per chunk — DevTools frontend uses the same window. Larger
|
|
700
|
+
// values risk fragmenting the CDP frame; smaller values triple the
|
|
701
|
+
// round-trip count for a realistic JSON body.
|
|
702
|
+
const READ_SIZE = 64 * 1024;
|
|
703
|
+
for (;;) {
|
|
704
|
+
const r = await this.router.send<{ data: string; eof: boolean; base64Encoded?: boolean }>(
|
|
705
|
+
"IO.read",
|
|
706
|
+
{ handle, size: READ_SIZE },
|
|
707
|
+
);
|
|
708
|
+
if (r.data.length > 0) {
|
|
709
|
+
const bytes =
|
|
710
|
+
r.base64Encoded === true ? base64ToBytes(r.data) : new TextEncoder().encode(r.data);
|
|
711
|
+
chunks.push(bytes);
|
|
712
|
+
totalLen += bytes.byteLength;
|
|
713
|
+
}
|
|
714
|
+
if (r.eof) break;
|
|
637
715
|
}
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
716
|
+
try {
|
|
717
|
+
await this.router.send("IO.close", { handle });
|
|
718
|
+
} catch {
|
|
719
|
+
// best-effort — handle may have auto-released on EOF
|
|
720
|
+
}
|
|
721
|
+
if (chunks.length === 0) return new Uint8Array(0);
|
|
722
|
+
if (chunks.length === 1) return chunks[0] as Uint8Array;
|
|
723
|
+
const out = new Uint8Array(totalLen);
|
|
724
|
+
let offset = 0;
|
|
725
|
+
for (const c of chunks) {
|
|
726
|
+
out.set(c, offset);
|
|
727
|
+
offset += c.byteLength;
|
|
728
|
+
}
|
|
729
|
+
return out;
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
/**
|
|
733
|
+
* Mechanism B: forward the call into the page's main-world `fetch` via
|
|
734
|
+
* `Runtime.callFunctionOn`. The function returns
|
|
735
|
+
* `{ status, headers, bodyB64 }`; the body round-trips as base64 so
|
|
736
|
+
* binary responses survive intact.
|
|
737
|
+
*
|
|
738
|
+
* Cookies inherit from the scratch page's origin (`about:blank`), which
|
|
739
|
+
* means cookies set via `Page.goto` (any origin) plus
|
|
740
|
+
* `Storage.setCookies` reach the call exactly as if a user typed `fetch`
|
|
741
|
+
* into the browser console. CORS applies — cross-origin POSTs without
|
|
742
|
+
* the right ACAO header fail the same way they would for a user.
|
|
743
|
+
*
|
|
744
|
+
* @internal
|
|
745
|
+
*/
|
|
746
|
+
private async fetchViaPageEvaluate(url: string, initSerialized: string): Promise<Response> {
|
|
747
|
+
const { sessionId } = await this.ensureScratchFrame();
|
|
748
|
+
const documentObjectId = await this.scratchDocumentObjectId(sessionId);
|
|
749
|
+
// The function source is small and self-contained. We avoid any
|
|
750
|
+
// `Runtime.evaluate` (per §8.2 / `Runtime.enable` is forbidden, plus
|
|
751
|
+
// we want a deterministic context) and bind to the document objectId
|
|
752
|
+
// so the call lands in the page's main world.
|
|
753
|
+
const fnDeclaration = `async function(urlArg, initJson) {
|
|
754
|
+
const init = JSON.parse(initJson);
|
|
755
|
+
let bodyOut = init.__body;
|
|
756
|
+
if (init.__bodyB64 !== undefined) {
|
|
757
|
+
const bin = atob(init.__bodyB64);
|
|
758
|
+
const bytes = new Uint8Array(bin.length);
|
|
759
|
+
for (let i = 0; i < bin.length; i++) bytes[i] = bin.charCodeAt(i);
|
|
760
|
+
bodyOut = bytes;
|
|
761
|
+
}
|
|
762
|
+
delete init.__body;
|
|
763
|
+
delete init.__bodyB64;
|
|
764
|
+
if (bodyOut !== undefined) init.body = bodyOut;
|
|
765
|
+
const r = await fetch(urlArg, init);
|
|
766
|
+
const buf = await r.arrayBuffer();
|
|
767
|
+
let b64 = "";
|
|
768
|
+
const view = new Uint8Array(buf);
|
|
769
|
+
// Chunked btoa to dodge call-stack overflow on big bodies.
|
|
770
|
+
const CHUNK = 0x8000;
|
|
771
|
+
for (let i = 0; i < view.length; i += CHUNK) {
|
|
772
|
+
let s = "";
|
|
773
|
+
const end = Math.min(i + CHUNK, view.length);
|
|
774
|
+
for (let j = i; j < end; j++) s += String.fromCharCode(view[j]);
|
|
775
|
+
b64 += btoa(s);
|
|
644
776
|
}
|
|
645
|
-
|
|
777
|
+
const headers = {};
|
|
778
|
+
r.headers.forEach((v, k) => { headers[k] = v; });
|
|
779
|
+
return { status: r.status, headers, bodyB64: b64 };
|
|
780
|
+
}`;
|
|
781
|
+
const callRes = await this.router.send<{
|
|
782
|
+
result: {
|
|
783
|
+
value?: { status: number; headers: Record<string, string>; bodyB64: string };
|
|
784
|
+
type: string;
|
|
785
|
+
};
|
|
786
|
+
exceptionDetails?: { exception?: { description?: string }; text?: string };
|
|
787
|
+
}>(
|
|
788
|
+
"Runtime.callFunctionOn",
|
|
789
|
+
{
|
|
790
|
+
functionDeclaration: fnDeclaration,
|
|
791
|
+
objectId: documentObjectId,
|
|
792
|
+
arguments: [{ value: url }, { value: initSerialized }],
|
|
793
|
+
returnByValue: true,
|
|
794
|
+
awaitPromise: true,
|
|
795
|
+
},
|
|
796
|
+
{ sessionId },
|
|
797
|
+
);
|
|
798
|
+
if (callRes.exceptionDetails !== undefined) {
|
|
799
|
+
const desc =
|
|
800
|
+
callRes.exceptionDetails.exception?.description ??
|
|
801
|
+
callRes.exceptionDetails.text ??
|
|
802
|
+
"page-evaluate fetch threw";
|
|
803
|
+
throw new Error(`[mochi] Session.fetch: ${desc}`);
|
|
646
804
|
}
|
|
647
|
-
|
|
805
|
+
const out = callRes.result.value;
|
|
806
|
+
if (out === undefined) {
|
|
807
|
+
throw new Error("[mochi] Session.fetch: page-evaluate fetch returned undefined");
|
|
808
|
+
}
|
|
809
|
+
const headers = new Headers();
|
|
810
|
+
for (const [k, v] of Object.entries(out.headers)) {
|
|
811
|
+
try {
|
|
812
|
+
headers.append(k, v);
|
|
813
|
+
} catch {
|
|
814
|
+
// ignore unmappable header names
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
const body = base64ToBytes(out.bodyB64);
|
|
818
|
+
return new Response(uint8ToArrayBuffer(body), { status: out.status, headers });
|
|
648
819
|
}
|
|
649
820
|
|
|
650
821
|
/**
|
|
651
|
-
*
|
|
652
|
-
*
|
|
653
|
-
*
|
|
822
|
+
* Lazily create the scratch frame used by {@link fetch}. The first call
|
|
823
|
+
* spawns an `about:blank` page (kept off the public {@link pages} list),
|
|
824
|
+
* attaches a flat-mode session, enables `Page` (for the `frameNavigated`
|
|
825
|
+
* event), records the main-frame id, and caches the result. Subsequent
|
|
826
|
+
* calls reuse the cache. Closed on {@link close}.
|
|
827
|
+
*
|
|
828
|
+
* Concurrent first-callers share the same in-flight promise so we don't
|
|
829
|
+
* race on `Target.createTarget`.
|
|
830
|
+
*
|
|
831
|
+
* @internal
|
|
654
832
|
*/
|
|
655
|
-
private
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
833
|
+
private async ensureScratchFrame(): Promise<{
|
|
834
|
+
targetId: string;
|
|
835
|
+
sessionId: string;
|
|
836
|
+
frameId: string;
|
|
837
|
+
}> {
|
|
838
|
+
if (this.scratchFrame !== undefined) return this.scratchFrame;
|
|
839
|
+
if (this.scratchFramePromise !== undefined) return this.scratchFramePromise;
|
|
840
|
+
this.scratchFramePromise = (async () => {
|
|
841
|
+
const created = await this.router.send<{ targetId: string }>("Target.createTarget", {
|
|
842
|
+
url: "about:blank",
|
|
843
|
+
});
|
|
844
|
+
const attached = await this.router.send<{ sessionId: string }>("Target.attachToTarget", {
|
|
845
|
+
targetId: created.targetId,
|
|
846
|
+
flatten: true,
|
|
847
|
+
});
|
|
848
|
+
// Page.enable surfaces `Page.frameNavigated`; we need it to capture
|
|
849
|
+
// the main-frame id deterministically (`Page.getFrameTree` is also
|
|
850
|
+
// an option but adds a CDP round-trip).
|
|
851
|
+
await this.router.send("Page.enable", undefined, { sessionId: attached.sessionId });
|
|
852
|
+
const tree = await this.router.send<{ frameTree: { frame: { id: string } } }>(
|
|
853
|
+
"Page.getFrameTree",
|
|
854
|
+
undefined,
|
|
855
|
+
{ sessionId: attached.sessionId },
|
|
856
|
+
);
|
|
857
|
+
this.scratchFrame = {
|
|
858
|
+
targetId: created.targetId,
|
|
859
|
+
sessionId: attached.sessionId,
|
|
860
|
+
frameId: tree.frameTree.frame.id,
|
|
861
|
+
};
|
|
862
|
+
return this.scratchFrame;
|
|
863
|
+
})();
|
|
864
|
+
try {
|
|
865
|
+
const frame = await this.scratchFramePromise;
|
|
866
|
+
return frame;
|
|
867
|
+
} finally {
|
|
868
|
+
this.scratchFramePromise = undefined;
|
|
662
869
|
}
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
/**
|
|
873
|
+
* Resolve the scratch page's `document` objectId for `Runtime.callFunctionOn`.
|
|
874
|
+
* `DOM.getDocument` is the canonical "give me a fresh root NodeId"
|
|
875
|
+
* method; `DOM.resolveNode` then returns its `objectId`. Both are §8.2-
|
|
876
|
+
* clean (no `Runtime.enable`, no isolated worlds).
|
|
877
|
+
*
|
|
878
|
+
* @internal
|
|
879
|
+
*/
|
|
880
|
+
private async scratchDocumentObjectId(sessionId: string): Promise<string> {
|
|
881
|
+
const doc = await this.router.send<{ root: { nodeId: number } }>(
|
|
882
|
+
"DOM.getDocument",
|
|
883
|
+
{ depth: 0 },
|
|
884
|
+
{ sessionId },
|
|
667
885
|
);
|
|
886
|
+
const resolved = await this.router.send<{ object: { objectId?: string } }>(
|
|
887
|
+
"DOM.resolveNode",
|
|
888
|
+
{ nodeId: doc.root.nodeId },
|
|
889
|
+
{ sessionId },
|
|
890
|
+
);
|
|
891
|
+
if (resolved.object.objectId === undefined) {
|
|
892
|
+
throw new Error("[mochi] Session.fetch: scratch document objectId unresolved");
|
|
893
|
+
}
|
|
894
|
+
return resolved.object.objectId;
|
|
668
895
|
}
|
|
669
896
|
|
|
670
897
|
/**
|
|
@@ -693,16 +920,17 @@ export class Session {
|
|
|
693
920
|
// ignore — best-effort
|
|
694
921
|
}
|
|
695
922
|
}
|
|
696
|
-
//
|
|
697
|
-
//
|
|
698
|
-
//
|
|
699
|
-
if (this.
|
|
923
|
+
// Close the scratch frame used by Session.fetch (mechanisms A + B).
|
|
924
|
+
// `Target.closeTarget` is idempotent server-side; we only call when
|
|
925
|
+
// a scratch frame was actually opened.
|
|
926
|
+
if (this.scratchFrame !== undefined) {
|
|
927
|
+
const targetId = this.scratchFrame.targetId;
|
|
928
|
+
this.scratchFrame = undefined;
|
|
700
929
|
try {
|
|
701
|
-
this.
|
|
930
|
+
await this.router.send("Target.closeTarget", { targetId });
|
|
702
931
|
} catch (err) {
|
|
703
|
-
console.warn("[mochi]
|
|
932
|
+
if (!this.closed) console.warn("[mochi] scratch frame close failed:", err);
|
|
704
933
|
}
|
|
705
|
-
this.netCtx = undefined;
|
|
706
934
|
}
|
|
707
935
|
// Drop the unified init-injector subscription (and its `Fetch.disable`)
|
|
708
936
|
// BEFORE we tear down the router so the disable round-trip can still
|
|
@@ -823,7 +1051,7 @@ export class Session {
|
|
|
823
1051
|
* (no Page domain). PLAN.md §8.4 calls out that the worker target accepts
|
|
824
1052
|
* `Runtime.evaluate` even though `Runtime.enable` is forbidden by §8.2.
|
|
825
1053
|
*
|
|
826
|
-
* The Patchright-cited bootstrap
|
|
1054
|
+
* The Patchright-cited bootstrap (— `crServiceWorkerPatch.ts:32-43`,
|
|
827
1055
|
* `crPagePatch.ts:404-417`) tightens the inject race window:
|
|
828
1056
|
* 1. `Runtime.evaluate("globalThis", { serialization: "idOnly" })` —
|
|
829
1057
|
* returns a `RemoteObject` whose `objectId` carries the worker's
|
|
@@ -1016,7 +1244,7 @@ export class Session {
|
|
|
1016
1244
|
}
|
|
1017
1245
|
}
|
|
1018
1246
|
|
|
1019
|
-
// ---- UA-CH metadata helpers
|
|
1247
|
+
// ---- UA-CH metadata helpers -------------------------------------
|
|
1020
1248
|
|
|
1021
1249
|
/**
|
|
1022
1250
|
* Single brand entry as accepted by `Network.setUserAgentOverride`'s
|
|
@@ -1191,7 +1419,7 @@ export function buildUserAgentMetadata(matrix: MatrixV1): {
|
|
|
1191
1419
|
};
|
|
1192
1420
|
}
|
|
1193
1421
|
|
|
1194
|
-
// ---- cookie-jar factory
|
|
1422
|
+
// ---- cookie-jar factory -----------------------------------------
|
|
1195
1423
|
|
|
1196
1424
|
/**
|
|
1197
1425
|
* Build the {@link CookieJar} returned by `Session.cookies`. Bound to one
|
|
@@ -1281,3 +1509,140 @@ function createCookieJar(session: Session): CookieJar {
|
|
|
1281
1509
|
},
|
|
1282
1510
|
};
|
|
1283
1511
|
}
|
|
1512
|
+
|
|
1513
|
+
// ---- Session.fetch helpers --------------------------------------
|
|
1514
|
+
|
|
1515
|
+
/**
|
|
1516
|
+
* Shape of the `Network.loadNetworkResource` reply per the CDP `tot`
|
|
1517
|
+
* spec. The `stream` handle, when present, is an {@link IO.StreamHandle}
|
|
1518
|
+
* that must be drained via `IO.read` until EOF and then `IO.close`d.
|
|
1519
|
+
*
|
|
1520
|
+
* @internal
|
|
1521
|
+
* @see https://chromedevtools.github.io/devtools-protocol/tot/Network/#method-loadNetworkResource
|
|
1522
|
+
*/
|
|
1523
|
+
interface LoadNetworkResourcePageResult {
|
|
1524
|
+
success: boolean;
|
|
1525
|
+
netError?: number;
|
|
1526
|
+
netErrorName?: string;
|
|
1527
|
+
httpStatusCode?: number;
|
|
1528
|
+
/** `IO.StreamHandle` — drain via `IO.read` until EOF. Undefined on empty body. */
|
|
1529
|
+
stream?: string;
|
|
1530
|
+
headers?: Record<string, string>;
|
|
1531
|
+
}
|
|
1532
|
+
|
|
1533
|
+
/**
|
|
1534
|
+
* Convert a `Uint8Array` to a fresh `ArrayBuffer` slice — TS's lib.dom
|
|
1535
|
+
* `BodyInit` rejects `Uint8Array<ArrayBufferLike>` in some configurations
|
|
1536
|
+
* (Bun ships its own DOM types here), so we hand `Response` an ArrayBuffer
|
|
1537
|
+
* directly. Zero-copy when possible (the underlying buffer is already a
|
|
1538
|
+
* plain `ArrayBuffer`); falls back to a copy slice otherwise.
|
|
1539
|
+
*
|
|
1540
|
+
* @internal
|
|
1541
|
+
*/
|
|
1542
|
+
function uint8ToArrayBuffer(bytes: Uint8Array): ArrayBuffer {
|
|
1543
|
+
return bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength) as ArrayBuffer;
|
|
1544
|
+
}
|
|
1545
|
+
|
|
1546
|
+
/**
|
|
1547
|
+
* Decode a base64-encoded string into a `Uint8Array`. Used by
|
|
1548
|
+
* {@link Session.fetch}'s mechanisms A (when `IO.read` returns
|
|
1549
|
+
* `base64Encoded: true`) and B (the page-evaluate path always returns
|
|
1550
|
+
* base64 so binary responses round-trip intact).
|
|
1551
|
+
*
|
|
1552
|
+
* Bun ships `atob` natively; we use it for the chunked decode.
|
|
1553
|
+
*
|
|
1554
|
+
* @internal
|
|
1555
|
+
*/
|
|
1556
|
+
function base64ToBytes(b64: string): Uint8Array {
|
|
1557
|
+
if (b64.length === 0) return new Uint8Array(0);
|
|
1558
|
+
const bin = atob(b64);
|
|
1559
|
+
const out = new Uint8Array(bin.length);
|
|
1560
|
+
for (let i = 0; i < bin.length; i++) out[i] = bin.charCodeAt(i);
|
|
1561
|
+
return out;
|
|
1562
|
+
}
|
|
1563
|
+
|
|
1564
|
+
/**
|
|
1565
|
+
* Serialize a {@link RequestInit} into a JSON-safe shape the page-evaluate
|
|
1566
|
+
* fetch path can consume. Headers / method / redirect / mode / credentials
|
|
1567
|
+
* pass through unchanged. The body is the tricky part:
|
|
1568
|
+
*
|
|
1569
|
+
* - `string` / `URLSearchParams` → forwarded as the `__body` string field.
|
|
1570
|
+
* - `ArrayBuffer` / typed array → base64-encoded into `__bodyB64` so
|
|
1571
|
+
* binary survives the JSON-only round-trip; the page-side glue
|
|
1572
|
+
* decodes back to a Uint8Array before passing to `fetch`.
|
|
1573
|
+
* - `null` / `undefined` → no body field.
|
|
1574
|
+
* - `Blob` / `FormData` / `ReadableStream` → throws with a clear
|
|
1575
|
+
* diagnostic. Future work; needs a separate channel because they're
|
|
1576
|
+
* not JSON-serializable.
|
|
1577
|
+
*
|
|
1578
|
+
* @internal
|
|
1579
|
+
*/
|
|
1580
|
+
function serializeRequestInitForFetch(init: RequestInit): string {
|
|
1581
|
+
const out: Record<string, unknown> = {};
|
|
1582
|
+
if (init.method !== undefined) out.method = init.method;
|
|
1583
|
+
if (init.headers !== undefined) out.headers = headersInitToRecord(init.headers);
|
|
1584
|
+
if (init.redirect !== undefined) out.redirect = init.redirect;
|
|
1585
|
+
if (init.mode !== undefined) out.mode = init.mode;
|
|
1586
|
+
if (init.credentials !== undefined) out.credentials = init.credentials;
|
|
1587
|
+
if (init.referrer !== undefined) out.referrer = init.referrer;
|
|
1588
|
+
if (init.referrerPolicy !== undefined) out.referrerPolicy = init.referrerPolicy;
|
|
1589
|
+
if (init.cache !== undefined) out.cache = init.cache;
|
|
1590
|
+
if (init.integrity !== undefined) out.integrity = init.integrity;
|
|
1591
|
+
if (init.keepalive !== undefined) out.keepalive = init.keepalive;
|
|
1592
|
+
const b = init.body;
|
|
1593
|
+
if (b !== undefined && b !== null) {
|
|
1594
|
+
if (typeof b === "string") {
|
|
1595
|
+
out.__body = b;
|
|
1596
|
+
} else if (b instanceof URLSearchParams) {
|
|
1597
|
+
out.__body = b.toString();
|
|
1598
|
+
} else if (b instanceof ArrayBuffer) {
|
|
1599
|
+
out.__bodyB64 = bytesToBase64(new Uint8Array(b));
|
|
1600
|
+
} else if (ArrayBuffer.isView(b)) {
|
|
1601
|
+
const view = b as ArrayBufferView;
|
|
1602
|
+
out.__bodyB64 = bytesToBase64(new Uint8Array(view.buffer, view.byteOffset, view.byteLength));
|
|
1603
|
+
} else {
|
|
1604
|
+
// Blob / FormData / ReadableStream — would need a separate transport
|
|
1605
|
+
// (multipart / streaming) that the JSON-only page-evaluate seam can't
|
|
1606
|
+
// express today. The brief explicitly defers these to a follow-up.
|
|
1607
|
+
throw new Error(
|
|
1608
|
+
"[mochi] Session.fetch: Blob, FormData, and ReadableStream bodies are not yet supported — " +
|
|
1609
|
+
"use string / ArrayBuffer / URLSearchParams or wait for the streaming-body PR.",
|
|
1610
|
+
);
|
|
1611
|
+
}
|
|
1612
|
+
}
|
|
1613
|
+
return JSON.stringify(out);
|
|
1614
|
+
}
|
|
1615
|
+
|
|
1616
|
+
/** Coerce a Web `Headers` / record / array-pair shape into a plain record. */
|
|
1617
|
+
function headersInitToRecord(h: HeadersInit): Record<string, string> {
|
|
1618
|
+
if (h instanceof Headers) {
|
|
1619
|
+
const out: Record<string, string> = {};
|
|
1620
|
+
h.forEach((v, k) => {
|
|
1621
|
+
out[k] = v;
|
|
1622
|
+
});
|
|
1623
|
+
return out;
|
|
1624
|
+
}
|
|
1625
|
+
if (Array.isArray(h)) {
|
|
1626
|
+
const out: Record<string, string> = {};
|
|
1627
|
+
for (const pair of h) {
|
|
1628
|
+
const k = pair[0];
|
|
1629
|
+
const v = pair[1];
|
|
1630
|
+
if (typeof k === "string" && typeof v === "string") out[k] = v;
|
|
1631
|
+
}
|
|
1632
|
+
return out;
|
|
1633
|
+
}
|
|
1634
|
+
return { ...(h as Record<string, string>) };
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
/** Encode a `Uint8Array` to base64. Chunked to dodge call-stack overflow. */
|
|
1638
|
+
function bytesToBase64(bytes: Uint8Array): string {
|
|
1639
|
+
let out = "";
|
|
1640
|
+
const CHUNK = 0x8000;
|
|
1641
|
+
for (let i = 0; i < bytes.length; i += CHUNK) {
|
|
1642
|
+
let s = "";
|
|
1643
|
+
const end = Math.min(i + CHUNK, bytes.length);
|
|
1644
|
+
for (let j = i; j < end; j++) s += String.fromCharCode(bytes[j] as number);
|
|
1645
|
+
out += btoa(s);
|
|
1646
|
+
}
|
|
1647
|
+
return out;
|
|
1648
|
+
}
|