imprint-mcp 0.4.6 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/examples/google-flights/README.md +2 -0
- package/examples/google-flights/_shared/flights_request.ts +10 -4
- package/examples/google-flights/get_flight_booking_details/index.ts +5 -2
- package/examples/google-flights/get_flight_booking_details/parser.ts +8 -0
- package/examples/google-flights/get_flight_booking_details/workflow.json +5 -2
- package/examples/google-flights/get_flight_calendar_prices/index.ts +5 -2
- package/examples/google-flights/get_flight_calendar_prices/parser.ts +8 -4
- package/examples/google-flights/get_flight_calendar_prices/workflow.json +5 -2
- package/examples/google-flights/lookup_airport/index.ts +3 -0
- package/examples/google-flights/lookup_airport/parser.ts +8 -1
- package/examples/google-flights/lookup_airport/workflow.json +3 -0
- package/examples/google-flights/search_flights/index.ts +63 -8
- package/examples/google-flights/search_flights/parser.ts +10 -0
- package/examples/google-flights/search_flights/request-transform.ts +45 -0
- package/examples/google-flights/search_flights/workflow.json +63 -8
- package/package.json +1 -1
- package/prompts/build-planning.md +1 -1
- package/prompts/compile-agent.md +5 -3
- package/prompts/prereq-builder.md +2 -1
- package/src/imprint/backend-ladder.ts +436 -43
- package/src/imprint/cdp-browser-fetch.ts +176 -6
- package/src/imprint/cdp-jar-cache.ts +105 -10
- package/src/imprint/compile-tools.ts +2 -2
- package/src/imprint/mcp-server.ts +152 -65
- package/src/imprint/probe-backends.ts +41 -10
- package/src/imprint/runtime.ts +24 -12
- package/src/imprint/stealth-fetch.ts +71 -0
- package/src/imprint/stealth-token-cache.ts +38 -1
- package/src/imprint/types.ts +45 -0
|
@@ -28,6 +28,7 @@
|
|
|
28
28
|
* trusted browser session.
|
|
29
29
|
*/
|
|
30
30
|
|
|
31
|
+
import { Buffer } from 'node:buffer';
|
|
31
32
|
import CDP from 'chrome-remote-interface';
|
|
32
33
|
import { launchChromium, proxyUrl } from './chromium.ts';
|
|
33
34
|
import { createLog } from './log.ts';
|
|
@@ -52,6 +53,23 @@ export interface MintedJar {
|
|
|
52
53
|
/** The bootstrap page HTML, so callers can satisfy html_regex captures
|
|
53
54
|
* (e.g. csrf / csp-nonce scraped from the page) without the browser. */
|
|
54
55
|
html: string;
|
|
56
|
+
/** Browser-generated requests observed while the bootstrap page loaded. Lets
|
|
57
|
+
* workflows capture replay headers minted by page JavaScript for later XHRs. */
|
|
58
|
+
observedRequests?: Array<{
|
|
59
|
+
method: string;
|
|
60
|
+
url: string;
|
|
61
|
+
headers: Record<string, string>;
|
|
62
|
+
body?: string;
|
|
63
|
+
resourceType?: string;
|
|
64
|
+
source?: 'browser' | 'replay';
|
|
65
|
+
response?: {
|
|
66
|
+
status: number;
|
|
67
|
+
headers: Record<string, string>;
|
|
68
|
+
body?: string;
|
|
69
|
+
};
|
|
70
|
+
}>;
|
|
71
|
+
/** Exact page URL used to create the page-specific HTML / observed requests. */
|
|
72
|
+
bootstrapUrl?: string;
|
|
55
73
|
/** Date.now() at mint — the jar's validity is bounded (~2h fixed for Akamai). */
|
|
56
74
|
bootstrapEpoch: number;
|
|
57
75
|
/** The final `_abck` status field at capture (`0` = validated, `-1` = pending).
|
|
@@ -73,17 +91,36 @@ export interface MintedJar {
|
|
|
73
91
|
source?: 'mint' | 'recording';
|
|
74
92
|
}
|
|
75
93
|
|
|
76
|
-
/**
|
|
77
|
-
*
|
|
78
|
-
*
|
|
79
|
-
*
|
|
94
|
+
/** Akamai session jars have recognizable sensor cookies and must pass the
|
|
95
|
+
* stricter validation check below before plain-fetch replay is trusted. Other
|
|
96
|
+
* sites may still need a cached CDP bootstrap page + ordinary cookies, but
|
|
97
|
+
* will never carry `_abck`/`bm_sv`; do not apply Akamai-specific invalidation
|
|
98
|
+
* to those generic bootstrap artifacts. */
|
|
99
|
+
export function jarHasAkamaiValidationSignals(
|
|
100
|
+
cookies: Array<{ name: string; value: string }>,
|
|
101
|
+
): boolean {
|
|
102
|
+
return cookies.some((c) =>
|
|
103
|
+
['_abck', 'bm_sv', 'ak_bmsc', 'bm_sz', 'bm_mi'].includes(c.name.toLowerCase()),
|
|
104
|
+
);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/** A session is replay-safe when either it is not an Akamai-style jar, or its
|
|
108
|
+
* `_abck` is validated (`~0~`) / the Akamai validated-session marker `bm_sv` is
|
|
109
|
+
* present. `bm_sv` survives `_abck` rotating back to `~-1~`. Shared by the cdp
|
|
110
|
+
* mint and recording-seed paths so both judge "validated" identically. */
|
|
80
111
|
export function jarCookiesValidated(cookies: Array<{ name: string; value: string }>): boolean {
|
|
112
|
+
if (!jarHasAkamaiValidationSignals(cookies)) return true;
|
|
81
113
|
const abck = cookies.find((c) => c.name === '_abck')?.value;
|
|
82
114
|
if (abck && abck.split('~')[1] === '0') return true;
|
|
83
115
|
return cookies.some((c) => c.name === 'bm_sv');
|
|
84
116
|
}
|
|
85
117
|
|
|
86
118
|
export interface CdpBrowserFetch {
|
|
119
|
+
/** Actual page URL used to bootstrap this browser session. */
|
|
120
|
+
readonly bootstrapUrl: string;
|
|
121
|
+
/** Reuse the same Chrome process for a new bootstrap page. The next
|
|
122
|
+
* ensure/mint call navigates there and refreshes page-observed request state. */
|
|
123
|
+
setBootstrapUrl(bootstrapUrl: string): void;
|
|
87
124
|
/** typeof fetch — executes the request inside the live trusted Chrome page. */
|
|
88
125
|
readonly fetchImpl: typeof fetch;
|
|
89
126
|
/** Force the bootstrap navigation + `_abck` validation now; returns the
|
|
@@ -214,7 +251,7 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
214
251
|
// and never establishes the sensor session). Fall back to the origin root,
|
|
215
252
|
// which loads a real page and runs the Akamai sensor JS.
|
|
216
253
|
const baseLooksLikeApi = /\.act(\?|$)|\/api\//i.test(opts.baseUrl);
|
|
217
|
-
|
|
254
|
+
let navUrl = opts.bootstrapUrl ?? (baseLooksLikeApi ? `${baseOrigin}/` : opts.baseUrl);
|
|
218
255
|
const abckWaitMs = (opts.abckWaitSeconds ?? 25) * 1000;
|
|
219
256
|
const reqTimeoutMs = opts.requestTimeoutMs ?? 60_000;
|
|
220
257
|
const cdpCommandTimeoutMs = opts.cdpCommandTimeoutMs ?? 20_000;
|
|
@@ -224,6 +261,18 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
224
261
|
let client: CdpClient | null = null;
|
|
225
262
|
let bootstrapped = false;
|
|
226
263
|
let appliedUa: string | undefined;
|
|
264
|
+
let forceDocumentReset = false;
|
|
265
|
+
const observedRequests: NonNullable<MintedJar['observedRequests']> = [];
|
|
266
|
+
const observedByRequestId = new Map<string, NonNullable<MintedJar['observedRequests']>[number]>();
|
|
267
|
+
const pendingResponseCaptures = new Set<Promise<void>>();
|
|
268
|
+
let networkObserversAttached = false;
|
|
269
|
+
const replayRequestKeys = new Set<string>();
|
|
270
|
+
|
|
271
|
+
function resetObservedRequests(): void {
|
|
272
|
+
observedRequests.length = 0;
|
|
273
|
+
observedByRequestId.clear();
|
|
274
|
+
pendingResponseCaptures.clear();
|
|
275
|
+
}
|
|
227
276
|
|
|
228
277
|
async function close(): Promise<void> {
|
|
229
278
|
const c = client;
|
|
@@ -274,6 +323,65 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
274
323
|
await withTimeout(Runtime.enable(), 'CDP Runtime.enable', cdpCommandTimeoutMs);
|
|
275
324
|
await withTimeout(Network.enable(), 'CDP Network.enable', cdpCommandTimeoutMs);
|
|
276
325
|
await withTimeout(Page.enable(), 'CDP Page.enable', cdpCommandTimeoutMs);
|
|
326
|
+
if (!networkObserversAttached) {
|
|
327
|
+
networkObserversAttached = true;
|
|
328
|
+
Network.requestWillBeSent((params) => {
|
|
329
|
+
const req = params.request;
|
|
330
|
+
const headers: Record<string, string> = {};
|
|
331
|
+
for (const [k, v] of Object.entries(req.headers ?? {})) {
|
|
332
|
+
if (typeof v === 'string') headers[k] = v;
|
|
333
|
+
}
|
|
334
|
+
const postData = (req as { postData?: unknown }).postData;
|
|
335
|
+
const entry: NonNullable<MintedJar['observedRequests']>[number] = {
|
|
336
|
+
method: req.method,
|
|
337
|
+
url: req.url,
|
|
338
|
+
headers,
|
|
339
|
+
source: replayRequestKeys.delete(observedRequestKey(req.method, req.url, postData))
|
|
340
|
+
? 'replay'
|
|
341
|
+
: 'browser',
|
|
342
|
+
...(typeof postData === 'string' ? { body: postData } : {}),
|
|
343
|
+
...(typeof params.type === 'string' ? { resourceType: params.type } : {}),
|
|
344
|
+
};
|
|
345
|
+
observedRequests.push(entry);
|
|
346
|
+
observedByRequestId.set(params.requestId, entry);
|
|
347
|
+
if (observedRequests.length > 100) observedRequests.shift();
|
|
348
|
+
});
|
|
349
|
+
Network.responseReceived((params) => {
|
|
350
|
+
const entry = observedByRequestId.get(params.requestId);
|
|
351
|
+
if (!entry) return;
|
|
352
|
+
const headers: Record<string, string> = {};
|
|
353
|
+
for (const [k, v] of Object.entries(params.response.headers ?? {})) {
|
|
354
|
+
if (typeof v === 'string') headers[k] = v;
|
|
355
|
+
}
|
|
356
|
+
entry.response = {
|
|
357
|
+
status: params.response.status,
|
|
358
|
+
headers,
|
|
359
|
+
};
|
|
360
|
+
});
|
|
361
|
+
Network.loadingFinished((params) => {
|
|
362
|
+
const entry = observedByRequestId.get(params.requestId);
|
|
363
|
+
if (!entry || !shouldCaptureObservedBody(entry)) return;
|
|
364
|
+
const pending = (async () => {
|
|
365
|
+
try {
|
|
366
|
+
const bodyResult = await withTimeout(
|
|
367
|
+
Network.getResponseBody({ requestId: params.requestId }),
|
|
368
|
+
'CDP Network.getResponseBody',
|
|
369
|
+
shortCdpTimeoutMs,
|
|
370
|
+
);
|
|
371
|
+
if (!entry.response) {
|
|
372
|
+
entry.response = { status: 200, headers: {} };
|
|
373
|
+
}
|
|
374
|
+
entry.response.body = bodyResult.base64Encoded
|
|
375
|
+
? Buffer.from(bodyResult.body, 'base64').toString('utf8')
|
|
376
|
+
: bodyResult.body;
|
|
377
|
+
} catch {
|
|
378
|
+
// best-effort — response reuse simply won't match without a body
|
|
379
|
+
}
|
|
380
|
+
})();
|
|
381
|
+
pendingResponseCaptures.add(pending);
|
|
382
|
+
void pending.finally(() => pendingResponseCaptures.delete(pending));
|
|
383
|
+
});
|
|
384
|
+
}
|
|
277
385
|
// Plant the high-trust seed cookies (the recording's validated Akamai jar)
|
|
278
386
|
// BEFORE navigating, so the first request to the protected origin carries the
|
|
279
387
|
// trusted session. A synthetic mint can reach `_abck~0~` yet still get its
|
|
@@ -335,6 +443,20 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
335
443
|
// loads normally. Bound the CDP command and proceed regardless — _abck
|
|
336
444
|
// polling below tolerates a partial load.
|
|
337
445
|
try {
|
|
446
|
+
if (forceDocumentReset) {
|
|
447
|
+
forceDocumentReset = false;
|
|
448
|
+
await withTimeout(
|
|
449
|
+
Page.navigate({ url: 'about:blank' }),
|
|
450
|
+
'CDP Page.navigate(about:blank)',
|
|
451
|
+
Math.max(1, Math.min(abckWaitMs, 5_000)),
|
|
452
|
+
);
|
|
453
|
+
await withTimeout(
|
|
454
|
+
Page.loadEventFired(),
|
|
455
|
+
'CDP Page.loadEventFired(about:blank)',
|
|
456
|
+
Math.max(1, Math.min(abckWaitMs, 2_000)),
|
|
457
|
+
).catch(() => {});
|
|
458
|
+
await sleep(250);
|
|
459
|
+
}
|
|
338
460
|
await withTimeout(
|
|
339
461
|
Page.navigate({ url: navUrl }),
|
|
340
462
|
'CDP Page.navigate',
|
|
@@ -363,6 +485,7 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
363
485
|
const start = Date.now();
|
|
364
486
|
let i = 0;
|
|
365
487
|
let status = '?';
|
|
488
|
+
let absentAbckChecks = 0;
|
|
366
489
|
let pos = { x: rand(120, 1100), y: rand(120, 600) };
|
|
367
490
|
while (Date.now() - start < abckWaitMs) {
|
|
368
491
|
try {
|
|
@@ -425,7 +548,16 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
425
548
|
}
|
|
426
549
|
await sleep(rand(180, 520)); // non-uniform dwell between interaction bursts
|
|
427
550
|
const abck = await getCookie(client, '_abck');
|
|
428
|
-
|
|
551
|
+
if (abck === undefined) {
|
|
552
|
+
absentAbckChecks++;
|
|
553
|
+
if (absentAbckChecks >= 2) {
|
|
554
|
+
status = 'absent';
|
|
555
|
+
break;
|
|
556
|
+
}
|
|
557
|
+
} else {
|
|
558
|
+
absentAbckChecks = 0;
|
|
559
|
+
status = abck.split('~')[1] ?? '?';
|
|
560
|
+
}
|
|
429
561
|
if (abckIsValidated(abck)) break;
|
|
430
562
|
i++;
|
|
431
563
|
}
|
|
@@ -519,6 +651,7 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
519
651
|
|
|
520
652
|
// Execute the fetch INSIDE the trusted page. credentials:'include' so the
|
|
521
653
|
// browser attaches the validated session cookies.
|
|
654
|
+
replayRequestKeys.add(observedRequestKey(method, fullUrl, body));
|
|
522
655
|
const expr = `(async () => {
|
|
523
656
|
try {
|
|
524
657
|
const ctrl = new AbortController();
|
|
@@ -562,6 +695,17 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
562
695
|
}) as typeof fetch;
|
|
563
696
|
|
|
564
697
|
return {
|
|
698
|
+
get bootstrapUrl() {
|
|
699
|
+
return navUrl;
|
|
700
|
+
},
|
|
701
|
+
setBootstrapUrl(nextBootstrapUrl: string): void {
|
|
702
|
+
if (nextBootstrapUrl === navUrl) return;
|
|
703
|
+
log(`retargeting pooled Chrome bootstrap (${navUrl} → ${nextBootstrapUrl})`);
|
|
704
|
+
navUrl = nextBootstrapUrl;
|
|
705
|
+
bootstrapped = false;
|
|
706
|
+
forceDocumentReset = true;
|
|
707
|
+
resetObservedRequests();
|
|
708
|
+
},
|
|
565
709
|
fetchImpl,
|
|
566
710
|
async ensureBootstrapped() {
|
|
567
711
|
const c = await ensure();
|
|
@@ -581,6 +725,7 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
581
725
|
},
|
|
582
726
|
async mintJar(): Promise<MintedJar> {
|
|
583
727
|
const c = await ensure();
|
|
728
|
+
await settlePendingResponseCaptures(pendingResponseCaptures, shortCdpTimeoutMs);
|
|
584
729
|
const cookies: MintedJar['cookies'] = [];
|
|
585
730
|
try {
|
|
586
731
|
const res = await withTimeout(
|
|
@@ -623,6 +768,8 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
623
768
|
cookies,
|
|
624
769
|
ua: appliedUa ?? '',
|
|
625
770
|
html,
|
|
771
|
+
observedRequests: observedRequests.slice(),
|
|
772
|
+
bootstrapUrl: navUrl,
|
|
626
773
|
bootstrapEpoch: Date.now(),
|
|
627
774
|
abckFlag: abck?.split('~')[1] ?? '?',
|
|
628
775
|
validated: jarCookiesValidated(cookies),
|
|
@@ -637,6 +784,29 @@ function sleep(ms: number): Promise<void> {
|
|
|
637
784
|
return new Promise((r) => setTimeout(r, ms));
|
|
638
785
|
}
|
|
639
786
|
|
|
787
|
+
function shouldCaptureObservedBody(
|
|
788
|
+
entry: NonNullable<MintedJar['observedRequests']>[number],
|
|
789
|
+
): boolean {
|
|
790
|
+
const type = entry.resourceType?.toLowerCase();
|
|
791
|
+
if (type && type !== 'xhr' && type !== 'fetch') return false;
|
|
792
|
+
return true;
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
function observedRequestKey(method: string, url: string, body: unknown): string {
|
|
796
|
+
return `${method.toUpperCase()} ${url} ${typeof body === 'string' ? body : ''}`;
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
async function settlePendingResponseCaptures(
|
|
800
|
+
pending: Set<Promise<void>>,
|
|
801
|
+
timeoutMs: number,
|
|
802
|
+
): Promise<void> {
|
|
803
|
+
if (pending.size === 0) return;
|
|
804
|
+
await Promise.race([
|
|
805
|
+
Promise.allSettled([...pending]),
|
|
806
|
+
new Promise<void>((resolve) => setTimeout(resolve, timeoutMs)),
|
|
807
|
+
]);
|
|
808
|
+
}
|
|
809
|
+
|
|
640
810
|
async function withTimeout<T>(promise: Promise<T>, label: string, timeoutMs: number): Promise<T> {
|
|
641
811
|
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) return promise;
|
|
642
812
|
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
@@ -25,7 +25,11 @@ import {
|
|
|
25
25
|
writeFileSync,
|
|
26
26
|
} from 'node:fs';
|
|
27
27
|
import { join as pathJoin } from 'node:path';
|
|
28
|
-
import {
|
|
28
|
+
import {
|
|
29
|
+
type MintedJar,
|
|
30
|
+
jarCookiesValidated,
|
|
31
|
+
jarHasAkamaiValidationSignals,
|
|
32
|
+
} from './cdp-browser-fetch.ts';
|
|
29
33
|
import { createLog } from './log.ts';
|
|
30
34
|
|
|
31
35
|
const log = createLog('cdp-jar');
|
|
@@ -55,27 +59,32 @@ function jarPath(siteDir: string): string {
|
|
|
55
59
|
* The cached `ua` is reused for replay verbatim; a UA drift (Chrome auto-update
|
|
56
60
|
* mid-window) is rare and self-heals reactively on a replay 403, so we do NOT
|
|
57
61
|
* launch Chrome just to gate on UA here. */
|
|
58
|
-
export function loadJar(siteDir: string): MintedJar | null {
|
|
62
|
+
export function loadJar(siteDir: string, bootstrapUrl?: string): MintedJar | null {
|
|
59
63
|
const p = jarPath(siteDir);
|
|
60
64
|
if (!existsSync(p)) return null;
|
|
61
65
|
try {
|
|
62
66
|
const raw = JSON.parse(readFileSync(p, 'utf8')) as Partial<MintedJar>;
|
|
63
67
|
if (!raw || !Array.isArray(raw.cookies) || typeof raw.bootstrapEpoch !== 'number') return null;
|
|
68
|
+
if (bootstrapUrl && raw.bootstrapUrl && raw.bootstrapUrl !== bootstrapUrl) {
|
|
69
|
+
log('cached jar bootstrap URL differs from current workflow page — re-mint');
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
64
72
|
const ageSeconds = (Date.now() - raw.bootstrapEpoch) / 1000;
|
|
65
73
|
const maxAge = jarMaxAgeSeconds();
|
|
66
74
|
if (ageSeconds >= maxAge) {
|
|
67
75
|
log(`cached jar in ${siteDir} is ${Math.round(ageSeconds)}s old (>= ${maxAge}s) — re-mint`);
|
|
68
76
|
return null;
|
|
69
77
|
}
|
|
70
|
-
//
|
|
71
|
-
//
|
|
72
|
-
//
|
|
73
|
-
|
|
74
|
-
|
|
78
|
+
// Akamai jars must be validated (`_abck~0~` OR `bm_sv`). Non-Akamai jars
|
|
79
|
+
// may still be valid generic bootstrap artifacts (HTML captures + ordinary
|
|
80
|
+
// cookies), so do not discard them just because they lack Akamai markers.
|
|
81
|
+
// Fall back to cookie inspection for caches written before `validated`.
|
|
82
|
+
const validated = raw.validated ?? jarCookiesValidated(raw.cookies);
|
|
83
|
+
if (!validated && jarHasAkamaiValidationSignals(raw.cookies)) {
|
|
75
84
|
log(`cached jar not validated (_abck~${raw.abckFlag}~, no bm_sv) — re-mint`);
|
|
76
85
|
return null;
|
|
77
86
|
}
|
|
78
|
-
return raw as MintedJar;
|
|
87
|
+
return stripDurableObservedResponseBodies(raw as MintedJar);
|
|
79
88
|
} catch {
|
|
80
89
|
return null;
|
|
81
90
|
}
|
|
@@ -87,13 +96,30 @@ export function saveJar(siteDir: string, jar: MintedJar): void {
|
|
|
87
96
|
mkdirSync(siteDir, { recursive: true });
|
|
88
97
|
const p = jarPath(siteDir);
|
|
89
98
|
const tmp = `${p}.${process.pid}.tmp`;
|
|
90
|
-
writeFileSync(tmp, `${JSON.stringify(jar)}\n`, 'utf8');
|
|
99
|
+
writeFileSync(tmp, `${JSON.stringify(stripDurableObservedResponseBodies(jar))}\n`, 'utf8');
|
|
91
100
|
renameSync(tmp, p);
|
|
92
101
|
} catch (err) {
|
|
93
102
|
log(`failed to persist jar to ${siteDir}: ${err instanceof Error ? err.message : String(err)}`);
|
|
94
103
|
}
|
|
95
104
|
}
|
|
96
105
|
|
|
106
|
+
function stripDurableObservedResponseBodies(jar: MintedJar): MintedJar {
|
|
107
|
+
if (!jar.observedRequests) return jar;
|
|
108
|
+
return {
|
|
109
|
+
...jar,
|
|
110
|
+
observedRequests: jar.observedRequests.map((req) => {
|
|
111
|
+
if (!req.response || req.response.body === undefined) return req;
|
|
112
|
+
return {
|
|
113
|
+
...req,
|
|
114
|
+
response: {
|
|
115
|
+
status: req.response.status,
|
|
116
|
+
headers: req.response.headers,
|
|
117
|
+
},
|
|
118
|
+
};
|
|
119
|
+
}),
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
97
123
|
/** Remove a cached jar (best-effort) — call on a replay 401/403/428/429 so the
|
|
98
124
|
* next call re-mints (reactive self-heal), or when a site's teach run ends. */
|
|
99
125
|
export function clearJar(siteDir: string): void {
|
|
@@ -203,6 +229,7 @@ export function seedJarFromRecording(
|
|
|
203
229
|
}));
|
|
204
230
|
const abck = cookies.find((c) => c.name === '_abck')?.value;
|
|
205
231
|
const abckFlag = abck?.split('~')[1] ?? '?';
|
|
232
|
+
const hasAkamaiValidationSignals = jarHasAkamaiValidationSignals(cookies);
|
|
206
233
|
// Validated = `_abck~0~` OR a `bm_sv` cookie (Akamai's validated-session
|
|
207
234
|
// marker). `_abck` rotates back to `~-1~` after clearing a request, so a real
|
|
208
235
|
// working recording often ends with `_abck~-1~` + `bm_sv` — that jar replays
|
|
@@ -241,21 +268,89 @@ export function seedJarFromRecording(
|
|
|
241
268
|
// captured HTML. Without this (the old `html: ''`), any workflow whose
|
|
242
269
|
// requests reference `${state.X}` from an html_regex capture STATE_MISSINGs.
|
|
243
270
|
const html = pickBootstrapHtml(session.requests ?? [], bootstrapUrl);
|
|
271
|
+
const observedRequests = collectObservedRequests(session.requests ?? []);
|
|
244
272
|
saveJar(siteDir, {
|
|
245
273
|
cookies,
|
|
246
274
|
ua,
|
|
247
275
|
html,
|
|
276
|
+
observedRequests,
|
|
277
|
+
...(bootstrapUrl ? { bootstrapUrl } : {}),
|
|
248
278
|
bootstrapEpoch: Math.round(newestMtime),
|
|
249
279
|
abckFlag,
|
|
250
280
|
validated: true, // gated above on jarCookiesValidated
|
|
251
281
|
source: 'recording',
|
|
252
282
|
});
|
|
283
|
+
const validationLabel = hasAkamaiValidationSignals ? 'bm_sv-validated' : 'generic-bootstrap';
|
|
253
284
|
log(
|
|
254
|
-
`seeded jar from recording ${newest} (${cookies.length} cookies, _abck~${abckFlag}~,
|
|
285
|
+
`seeded jar from recording ${newest} (${cookies.length} cookies, _abck~${abckFlag}~, ${validationLabel}, ua=${ua ? `${ua.slice(0, 40)}…` : '(none)'}, html=${html.length}b)`,
|
|
255
286
|
);
|
|
256
287
|
return true;
|
|
257
288
|
}
|
|
258
289
|
|
|
290
|
+
function collectObservedRequests(
|
|
291
|
+
requests: Array<{
|
|
292
|
+
requestHeaders?: unknown;
|
|
293
|
+
headers?: unknown;
|
|
294
|
+
body?: unknown;
|
|
295
|
+
url?: string;
|
|
296
|
+
method?: string;
|
|
297
|
+
resourceType?: string;
|
|
298
|
+
response?: {
|
|
299
|
+
status?: number;
|
|
300
|
+
headers?: Record<string, string>;
|
|
301
|
+
body?: string;
|
|
302
|
+
};
|
|
303
|
+
}>,
|
|
304
|
+
): NonNullable<MintedJar['observedRequests']> {
|
|
305
|
+
const observed: NonNullable<MintedJar['observedRequests']> = [];
|
|
306
|
+
for (const r of requests) {
|
|
307
|
+
if (!r.url) continue;
|
|
308
|
+
const headers = normalizeHeaders(r.requestHeaders ?? r.headers);
|
|
309
|
+
observed.push({
|
|
310
|
+
method: (r.method ?? 'GET').toUpperCase(),
|
|
311
|
+
url: r.url,
|
|
312
|
+
headers,
|
|
313
|
+
source: 'browser',
|
|
314
|
+
...(typeof r.body === 'string' ? { body: r.body } : {}),
|
|
315
|
+
...(typeof r.resourceType === 'string' ? { resourceType: r.resourceType } : {}),
|
|
316
|
+
...(typeof r.response?.status === 'number'
|
|
317
|
+
? {
|
|
318
|
+
response: {
|
|
319
|
+
status: r.response.status,
|
|
320
|
+
headers: r.response.headers ?? {},
|
|
321
|
+
},
|
|
322
|
+
}
|
|
323
|
+
: {}),
|
|
324
|
+
});
|
|
325
|
+
if (observed.length > 100) observed.shift();
|
|
326
|
+
}
|
|
327
|
+
return observed;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
function normalizeHeaders(raw: unknown): Record<string, string> {
|
|
331
|
+
if (Array.isArray(raw)) {
|
|
332
|
+
const entries = raw
|
|
333
|
+
.map((x) =>
|
|
334
|
+
x &&
|
|
335
|
+
typeof x === 'object' &&
|
|
336
|
+
'name' in x &&
|
|
337
|
+
'value' in x &&
|
|
338
|
+
typeof x.name === 'string' &&
|
|
339
|
+
typeof x.value === 'string'
|
|
340
|
+
? [x.name, x.value]
|
|
341
|
+
: null,
|
|
342
|
+
)
|
|
343
|
+
.filter((x): x is [string, string] => x !== null);
|
|
344
|
+
return Object.fromEntries(entries);
|
|
345
|
+
}
|
|
346
|
+
if (!raw || typeof raw !== 'object') return {};
|
|
347
|
+
const out: Record<string, string> = {};
|
|
348
|
+
for (const [k, v] of Object.entries(raw)) {
|
|
349
|
+
if (typeof v === 'string') out[k] = v;
|
|
350
|
+
}
|
|
351
|
+
return out;
|
|
352
|
+
}
|
|
353
|
+
|
|
259
354
|
/**
|
|
260
355
|
* Choose the recorded HTML to seed into jar.html for html_regex bootstrap
|
|
261
356
|
* captures. Preference order: (1) the recorded response for the exact bootstrap
|
|
@@ -1261,7 +1261,7 @@ export async function typecheckArtifacts(
|
|
|
1261
1261
|
|
|
1262
1262
|
try {
|
|
1263
1263
|
const result = await runCommand(
|
|
1264
|
-
'
|
|
1264
|
+
'bun x tsc --noEmit -p .imprint-typecheck.tsconfig.json',
|
|
1265
1265
|
dir,
|
|
1266
1266
|
120000,
|
|
1267
1267
|
);
|
|
@@ -2937,7 +2937,7 @@ export async function externalVerification(
|
|
|
2937
2937
|
const output = await typecheckArtifacts(toolDir, ['parser.ts', 'request-transform.ts']);
|
|
2938
2938
|
if (output.exitCode !== 0 || output.timedOut) {
|
|
2939
2939
|
failures.push(
|
|
2940
|
-
`generated TypeScript artifacts failed typecheck (
|
|
2940
|
+
`generated TypeScript artifacts failed typecheck (bun x tsc --noEmit -p .imprint-typecheck.tsconfig.json) exited ${output.exitCode}${output.timedOut ? ' after timing out' : ''}\nstdout:\n${output.stdout}\nstderr:\n${output.stderr}`,
|
|
2941
2941
|
);
|
|
2942
2942
|
}
|
|
2943
2943
|
}
|