imprint-mcp 0.4.7 → 0.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/examples/google-flights/README.md +0 -2
- package/examples/google-flights/_shared/flights_request.ts +4 -10
- package/examples/google-flights/get_flight_booking_details/index.ts +2 -5
- package/examples/google-flights/get_flight_booking_details/parser.ts +0 -8
- package/examples/google-flights/get_flight_booking_details/workflow.json +2 -5
- package/examples/google-flights/get_flight_calendar_prices/index.ts +2 -5
- package/examples/google-flights/get_flight_calendar_prices/parser.ts +11 -15
- package/examples/google-flights/get_flight_calendar_prices/workflow.json +2 -5
- package/examples/google-flights/lookup_airport/index.ts +0 -3
- package/examples/google-flights/lookup_airport/parser.ts +1 -8
- package/examples/google-flights/lookup_airport/workflow.json +0 -3
- package/examples/google-flights/search_flights/index.ts +7 -62
- package/examples/google-flights/search_flights/request-transform.ts +4 -47
- package/examples/google-flights/search_flights/workflow.json +7 -62
- package/package.json +1 -1
- package/prompts/build-planning.md +1 -1
- package/prompts/compile-agent.md +3 -5
- package/prompts/prereq-builder.md +1 -2
- package/src/imprint/backend-ladder.ts +47 -436
- package/src/imprint/cdp-browser-fetch.ts +6 -176
- package/src/imprint/cdp-jar-cache.ts +10 -105
- package/src/imprint/compile-tools.ts +2 -2
- package/src/imprint/mcp-server.ts +65 -152
- package/src/imprint/probe-backends.ts +10 -41
- package/src/imprint/runtime.ts +12 -24
- package/src/imprint/stealth-fetch.ts +0 -71
- package/src/imprint/stealth-token-cache.ts +1 -38
- package/src/imprint/types.ts +0 -45
|
@@ -28,7 +28,6 @@
|
|
|
28
28
|
* trusted browser session.
|
|
29
29
|
*/
|
|
30
30
|
|
|
31
|
-
import { Buffer } from 'node:buffer';
|
|
32
31
|
import CDP from 'chrome-remote-interface';
|
|
33
32
|
import { launchChromium, proxyUrl } from './chromium.ts';
|
|
34
33
|
import { createLog } from './log.ts';
|
|
@@ -53,23 +52,6 @@ export interface MintedJar {
|
|
|
53
52
|
/** The bootstrap page HTML, so callers can satisfy html_regex captures
|
|
54
53
|
* (e.g. csrf / csp-nonce scraped from the page) without the browser. */
|
|
55
54
|
html: string;
|
|
56
|
-
/** Browser-generated requests observed while the bootstrap page loaded. Lets
|
|
57
|
-
* workflows capture replay headers minted by page JavaScript for later XHRs. */
|
|
58
|
-
observedRequests?: Array<{
|
|
59
|
-
method: string;
|
|
60
|
-
url: string;
|
|
61
|
-
headers: Record<string, string>;
|
|
62
|
-
body?: string;
|
|
63
|
-
resourceType?: string;
|
|
64
|
-
source?: 'browser' | 'replay';
|
|
65
|
-
response?: {
|
|
66
|
-
status: number;
|
|
67
|
-
headers: Record<string, string>;
|
|
68
|
-
body?: string;
|
|
69
|
-
};
|
|
70
|
-
}>;
|
|
71
|
-
/** Exact page URL used to create the page-specific HTML / observed requests. */
|
|
72
|
-
bootstrapUrl?: string;
|
|
73
55
|
/** Date.now() at mint — the jar's validity is bounded (~2h fixed for Akamai). */
|
|
74
56
|
bootstrapEpoch: number;
|
|
75
57
|
/** The final `_abck` status field at capture (`0` = validated, `-1` = pending).
|
|
@@ -91,36 +73,17 @@ export interface MintedJar {
|
|
|
91
73
|
source?: 'mint' | 'recording';
|
|
92
74
|
}
|
|
93
75
|
|
|
94
|
-
/**
|
|
95
|
-
*
|
|
96
|
-
*
|
|
97
|
-
*
|
|
98
|
-
* to those generic bootstrap artifacts. */
|
|
99
|
-
export function jarHasAkamaiValidationSignals(
|
|
100
|
-
cookies: Array<{ name: string; value: string }>,
|
|
101
|
-
): boolean {
|
|
102
|
-
return cookies.some((c) =>
|
|
103
|
-
['_abck', 'bm_sv', 'ak_bmsc', 'bm_sz', 'bm_mi'].includes(c.name.toLowerCase()),
|
|
104
|
-
);
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
/** A session is replay-safe when either it is not an Akamai-style jar, or its
|
|
108
|
-
* `_abck` is validated (`~0~`) / the Akamai validated-session marker `bm_sv` is
|
|
109
|
-
* present. `bm_sv` survives `_abck` rotating back to `~-1~`. Shared by the cdp
|
|
110
|
-
* mint and recording-seed paths so both judge "validated" identically. */
|
|
76
|
+
/** A session is replay-safe when `_abck` is validated (`~0~`) OR the Akamai
|
|
77
|
+
* validated-session marker `bm_sv` is present (it is only set post-validation,
|
|
78
|
+
* and survives `_abck` rotating back to `~-1~`). Shared by the cdp mint and the
|
|
79
|
+
* recording-seed paths so both judge "validated" identically. */
|
|
111
80
|
export function jarCookiesValidated(cookies: Array<{ name: string; value: string }>): boolean {
|
|
112
|
-
if (!jarHasAkamaiValidationSignals(cookies)) return true;
|
|
113
81
|
const abck = cookies.find((c) => c.name === '_abck')?.value;
|
|
114
82
|
if (abck && abck.split('~')[1] === '0') return true;
|
|
115
83
|
return cookies.some((c) => c.name === 'bm_sv');
|
|
116
84
|
}
|
|
117
85
|
|
|
118
86
|
export interface CdpBrowserFetch {
|
|
119
|
-
/** Actual page URL used to bootstrap this browser session. */
|
|
120
|
-
readonly bootstrapUrl: string;
|
|
121
|
-
/** Reuse the same Chrome process for a new bootstrap page. The next
|
|
122
|
-
* ensure/mint call navigates there and refreshes page-observed request state. */
|
|
123
|
-
setBootstrapUrl(bootstrapUrl: string): void;
|
|
124
87
|
/** typeof fetch — executes the request inside the live trusted Chrome page. */
|
|
125
88
|
readonly fetchImpl: typeof fetch;
|
|
126
89
|
/** Force the bootstrap navigation + `_abck` validation now; returns the
|
|
@@ -251,7 +214,7 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
251
214
|
// and never establishes the sensor session). Fall back to the origin root,
|
|
252
215
|
// which loads a real page and runs the Akamai sensor JS.
|
|
253
216
|
const baseLooksLikeApi = /\.act(\?|$)|\/api\//i.test(opts.baseUrl);
|
|
254
|
-
|
|
217
|
+
const navUrl = opts.bootstrapUrl ?? (baseLooksLikeApi ? `${baseOrigin}/` : opts.baseUrl);
|
|
255
218
|
const abckWaitMs = (opts.abckWaitSeconds ?? 25) * 1000;
|
|
256
219
|
const reqTimeoutMs = opts.requestTimeoutMs ?? 60_000;
|
|
257
220
|
const cdpCommandTimeoutMs = opts.cdpCommandTimeoutMs ?? 20_000;
|
|
@@ -261,18 +224,6 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
261
224
|
let client: CdpClient | null = null;
|
|
262
225
|
let bootstrapped = false;
|
|
263
226
|
let appliedUa: string | undefined;
|
|
264
|
-
let forceDocumentReset = false;
|
|
265
|
-
const observedRequests: NonNullable<MintedJar['observedRequests']> = [];
|
|
266
|
-
const observedByRequestId = new Map<string, NonNullable<MintedJar['observedRequests']>[number]>();
|
|
267
|
-
const pendingResponseCaptures = new Set<Promise<void>>();
|
|
268
|
-
let networkObserversAttached = false;
|
|
269
|
-
const replayRequestKeys = new Set<string>();
|
|
270
|
-
|
|
271
|
-
function resetObservedRequests(): void {
|
|
272
|
-
observedRequests.length = 0;
|
|
273
|
-
observedByRequestId.clear();
|
|
274
|
-
pendingResponseCaptures.clear();
|
|
275
|
-
}
|
|
276
227
|
|
|
277
228
|
async function close(): Promise<void> {
|
|
278
229
|
const c = client;
|
|
@@ -323,65 +274,6 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
323
274
|
await withTimeout(Runtime.enable(), 'CDP Runtime.enable', cdpCommandTimeoutMs);
|
|
324
275
|
await withTimeout(Network.enable(), 'CDP Network.enable', cdpCommandTimeoutMs);
|
|
325
276
|
await withTimeout(Page.enable(), 'CDP Page.enable', cdpCommandTimeoutMs);
|
|
326
|
-
if (!networkObserversAttached) {
|
|
327
|
-
networkObserversAttached = true;
|
|
328
|
-
Network.requestWillBeSent((params) => {
|
|
329
|
-
const req = params.request;
|
|
330
|
-
const headers: Record<string, string> = {};
|
|
331
|
-
for (const [k, v] of Object.entries(req.headers ?? {})) {
|
|
332
|
-
if (typeof v === 'string') headers[k] = v;
|
|
333
|
-
}
|
|
334
|
-
const postData = (req as { postData?: unknown }).postData;
|
|
335
|
-
const entry: NonNullable<MintedJar['observedRequests']>[number] = {
|
|
336
|
-
method: req.method,
|
|
337
|
-
url: req.url,
|
|
338
|
-
headers,
|
|
339
|
-
source: replayRequestKeys.delete(observedRequestKey(req.method, req.url, postData))
|
|
340
|
-
? 'replay'
|
|
341
|
-
: 'browser',
|
|
342
|
-
...(typeof postData === 'string' ? { body: postData } : {}),
|
|
343
|
-
...(typeof params.type === 'string' ? { resourceType: params.type } : {}),
|
|
344
|
-
};
|
|
345
|
-
observedRequests.push(entry);
|
|
346
|
-
observedByRequestId.set(params.requestId, entry);
|
|
347
|
-
if (observedRequests.length > 100) observedRequests.shift();
|
|
348
|
-
});
|
|
349
|
-
Network.responseReceived((params) => {
|
|
350
|
-
const entry = observedByRequestId.get(params.requestId);
|
|
351
|
-
if (!entry) return;
|
|
352
|
-
const headers: Record<string, string> = {};
|
|
353
|
-
for (const [k, v] of Object.entries(params.response.headers ?? {})) {
|
|
354
|
-
if (typeof v === 'string') headers[k] = v;
|
|
355
|
-
}
|
|
356
|
-
entry.response = {
|
|
357
|
-
status: params.response.status,
|
|
358
|
-
headers,
|
|
359
|
-
};
|
|
360
|
-
});
|
|
361
|
-
Network.loadingFinished((params) => {
|
|
362
|
-
const entry = observedByRequestId.get(params.requestId);
|
|
363
|
-
if (!entry || !shouldCaptureObservedBody(entry)) return;
|
|
364
|
-
const pending = (async () => {
|
|
365
|
-
try {
|
|
366
|
-
const bodyResult = await withTimeout(
|
|
367
|
-
Network.getResponseBody({ requestId: params.requestId }),
|
|
368
|
-
'CDP Network.getResponseBody',
|
|
369
|
-
shortCdpTimeoutMs,
|
|
370
|
-
);
|
|
371
|
-
if (!entry.response) {
|
|
372
|
-
entry.response = { status: 200, headers: {} };
|
|
373
|
-
}
|
|
374
|
-
entry.response.body = bodyResult.base64Encoded
|
|
375
|
-
? Buffer.from(bodyResult.body, 'base64').toString('utf8')
|
|
376
|
-
: bodyResult.body;
|
|
377
|
-
} catch {
|
|
378
|
-
// best-effort — response reuse simply won't match without a body
|
|
379
|
-
}
|
|
380
|
-
})();
|
|
381
|
-
pendingResponseCaptures.add(pending);
|
|
382
|
-
void pending.finally(() => pendingResponseCaptures.delete(pending));
|
|
383
|
-
});
|
|
384
|
-
}
|
|
385
277
|
// Plant the high-trust seed cookies (the recording's validated Akamai jar)
|
|
386
278
|
// BEFORE navigating, so the first request to the protected origin carries the
|
|
387
279
|
// trusted session. A synthetic mint can reach `_abck~0~` yet still get its
|
|
@@ -443,20 +335,6 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
443
335
|
// loads normally. Bound the CDP command and proceed regardless — _abck
|
|
444
336
|
// polling below tolerates a partial load.
|
|
445
337
|
try {
|
|
446
|
-
if (forceDocumentReset) {
|
|
447
|
-
forceDocumentReset = false;
|
|
448
|
-
await withTimeout(
|
|
449
|
-
Page.navigate({ url: 'about:blank' }),
|
|
450
|
-
'CDP Page.navigate(about:blank)',
|
|
451
|
-
Math.max(1, Math.min(abckWaitMs, 5_000)),
|
|
452
|
-
);
|
|
453
|
-
await withTimeout(
|
|
454
|
-
Page.loadEventFired(),
|
|
455
|
-
'CDP Page.loadEventFired(about:blank)',
|
|
456
|
-
Math.max(1, Math.min(abckWaitMs, 2_000)),
|
|
457
|
-
).catch(() => {});
|
|
458
|
-
await sleep(250);
|
|
459
|
-
}
|
|
460
338
|
await withTimeout(
|
|
461
339
|
Page.navigate({ url: navUrl }),
|
|
462
340
|
'CDP Page.navigate',
|
|
@@ -485,7 +363,6 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
485
363
|
const start = Date.now();
|
|
486
364
|
let i = 0;
|
|
487
365
|
let status = '?';
|
|
488
|
-
let absentAbckChecks = 0;
|
|
489
366
|
let pos = { x: rand(120, 1100), y: rand(120, 600) };
|
|
490
367
|
while (Date.now() - start < abckWaitMs) {
|
|
491
368
|
try {
|
|
@@ -548,16 +425,7 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
548
425
|
}
|
|
549
426
|
await sleep(rand(180, 520)); // non-uniform dwell between interaction bursts
|
|
550
427
|
const abck = await getCookie(client, '_abck');
|
|
551
|
-
|
|
552
|
-
absentAbckChecks++;
|
|
553
|
-
if (absentAbckChecks >= 2) {
|
|
554
|
-
status = 'absent';
|
|
555
|
-
break;
|
|
556
|
-
}
|
|
557
|
-
} else {
|
|
558
|
-
absentAbckChecks = 0;
|
|
559
|
-
status = abck.split('~')[1] ?? '?';
|
|
560
|
-
}
|
|
428
|
+
status = abck?.split('~')[1] ?? '?';
|
|
561
429
|
if (abckIsValidated(abck)) break;
|
|
562
430
|
i++;
|
|
563
431
|
}
|
|
@@ -651,7 +519,6 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
651
519
|
|
|
652
520
|
// Execute the fetch INSIDE the trusted page. credentials:'include' so the
|
|
653
521
|
// browser attaches the validated session cookies.
|
|
654
|
-
replayRequestKeys.add(observedRequestKey(method, fullUrl, body));
|
|
655
522
|
const expr = `(async () => {
|
|
656
523
|
try {
|
|
657
524
|
const ctrl = new AbortController();
|
|
@@ -695,17 +562,6 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
695
562
|
}) as typeof fetch;
|
|
696
563
|
|
|
697
564
|
return {
|
|
698
|
-
get bootstrapUrl() {
|
|
699
|
-
return navUrl;
|
|
700
|
-
},
|
|
701
|
-
setBootstrapUrl(nextBootstrapUrl: string): void {
|
|
702
|
-
if (nextBootstrapUrl === navUrl) return;
|
|
703
|
-
log(`retargeting pooled Chrome bootstrap (${navUrl} → ${nextBootstrapUrl})`);
|
|
704
|
-
navUrl = nextBootstrapUrl;
|
|
705
|
-
bootstrapped = false;
|
|
706
|
-
forceDocumentReset = true;
|
|
707
|
-
resetObservedRequests();
|
|
708
|
-
},
|
|
709
565
|
fetchImpl,
|
|
710
566
|
async ensureBootstrapped() {
|
|
711
567
|
const c = await ensure();
|
|
@@ -725,7 +581,6 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
725
581
|
},
|
|
726
582
|
async mintJar(): Promise<MintedJar> {
|
|
727
583
|
const c = await ensure();
|
|
728
|
-
await settlePendingResponseCaptures(pendingResponseCaptures, shortCdpTimeoutMs);
|
|
729
584
|
const cookies: MintedJar['cookies'] = [];
|
|
730
585
|
try {
|
|
731
586
|
const res = await withTimeout(
|
|
@@ -768,8 +623,6 @@ export function createCdpBrowserFetch(opts: CdpBrowserFetchOptions): CdpBrowserF
|
|
|
768
623
|
cookies,
|
|
769
624
|
ua: appliedUa ?? '',
|
|
770
625
|
html,
|
|
771
|
-
observedRequests: observedRequests.slice(),
|
|
772
|
-
bootstrapUrl: navUrl,
|
|
773
626
|
bootstrapEpoch: Date.now(),
|
|
774
627
|
abckFlag: abck?.split('~')[1] ?? '?',
|
|
775
628
|
validated: jarCookiesValidated(cookies),
|
|
@@ -784,29 +637,6 @@ function sleep(ms: number): Promise<void> {
|
|
|
784
637
|
return new Promise((r) => setTimeout(r, ms));
|
|
785
638
|
}
|
|
786
639
|
|
|
787
|
-
function shouldCaptureObservedBody(
|
|
788
|
-
entry: NonNullable<MintedJar['observedRequests']>[number],
|
|
789
|
-
): boolean {
|
|
790
|
-
const type = entry.resourceType?.toLowerCase();
|
|
791
|
-
if (type && type !== 'xhr' && type !== 'fetch') return false;
|
|
792
|
-
return true;
|
|
793
|
-
}
|
|
794
|
-
|
|
795
|
-
function observedRequestKey(method: string, url: string, body: unknown): string {
|
|
796
|
-
return `${method.toUpperCase()} ${url} ${typeof body === 'string' ? body : ''}`;
|
|
797
|
-
}
|
|
798
|
-
|
|
799
|
-
async function settlePendingResponseCaptures(
|
|
800
|
-
pending: Set<Promise<void>>,
|
|
801
|
-
timeoutMs: number,
|
|
802
|
-
): Promise<void> {
|
|
803
|
-
if (pending.size === 0) return;
|
|
804
|
-
await Promise.race([
|
|
805
|
-
Promise.allSettled([...pending]),
|
|
806
|
-
new Promise<void>((resolve) => setTimeout(resolve, timeoutMs)),
|
|
807
|
-
]);
|
|
808
|
-
}
|
|
809
|
-
|
|
810
640
|
async function withTimeout<T>(promise: Promise<T>, label: string, timeoutMs: number): Promise<T> {
|
|
811
641
|
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) return promise;
|
|
812
642
|
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
@@ -25,11 +25,7 @@ import {
|
|
|
25
25
|
writeFileSync,
|
|
26
26
|
} from 'node:fs';
|
|
27
27
|
import { join as pathJoin } from 'node:path';
|
|
28
|
-
import {
|
|
29
|
-
type MintedJar,
|
|
30
|
-
jarCookiesValidated,
|
|
31
|
-
jarHasAkamaiValidationSignals,
|
|
32
|
-
} from './cdp-browser-fetch.ts';
|
|
28
|
+
import { type MintedJar, jarCookiesValidated } from './cdp-browser-fetch.ts';
|
|
33
29
|
import { createLog } from './log.ts';
|
|
34
30
|
|
|
35
31
|
const log = createLog('cdp-jar');
|
|
@@ -59,32 +55,27 @@ function jarPath(siteDir: string): string {
|
|
|
59
55
|
* The cached `ua` is reused for replay verbatim; a UA drift (Chrome auto-update
|
|
60
56
|
* mid-window) is rare and self-heals reactively on a replay 403, so we do NOT
|
|
61
57
|
* launch Chrome just to gate on UA here. */
|
|
62
|
-
export function loadJar(siteDir: string
|
|
58
|
+
export function loadJar(siteDir: string): MintedJar | null {
|
|
63
59
|
const p = jarPath(siteDir);
|
|
64
60
|
if (!existsSync(p)) return null;
|
|
65
61
|
try {
|
|
66
62
|
const raw = JSON.parse(readFileSync(p, 'utf8')) as Partial<MintedJar>;
|
|
67
63
|
if (!raw || !Array.isArray(raw.cookies) || typeof raw.bootstrapEpoch !== 'number') return null;
|
|
68
|
-
if (bootstrapUrl && raw.bootstrapUrl && raw.bootstrapUrl !== bootstrapUrl) {
|
|
69
|
-
log('cached jar bootstrap URL differs from current workflow page — re-mint');
|
|
70
|
-
return null;
|
|
71
|
-
}
|
|
72
64
|
const ageSeconds = (Date.now() - raw.bootstrapEpoch) / 1000;
|
|
73
65
|
const maxAge = jarMaxAgeSeconds();
|
|
74
66
|
if (ageSeconds >= maxAge) {
|
|
75
67
|
log(`cached jar in ${siteDir} is ${Math.round(ageSeconds)}s old (>= ${maxAge}s) — re-mint`);
|
|
76
68
|
return null;
|
|
77
69
|
}
|
|
78
|
-
//
|
|
79
|
-
//
|
|
80
|
-
//
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
if (!validated && jarHasAkamaiValidationSignals(raw.cookies)) {
|
|
70
|
+
// Validated = `_abck~0~` OR `bm_sv` present (the latter survives `_abck`
|
|
71
|
+
// rotating back to `~-1~`). Fall back to the abckFlag check for caches
|
|
72
|
+
// written before the `validated` field existed.
|
|
73
|
+
const validated = raw.validated ?? raw.abckFlag === '0';
|
|
74
|
+
if (!validated) {
|
|
84
75
|
log(`cached jar not validated (_abck~${raw.abckFlag}~, no bm_sv) — re-mint`);
|
|
85
76
|
return null;
|
|
86
77
|
}
|
|
87
|
-
return
|
|
78
|
+
return raw as MintedJar;
|
|
88
79
|
} catch {
|
|
89
80
|
return null;
|
|
90
81
|
}
|
|
@@ -96,30 +87,13 @@ export function saveJar(siteDir: string, jar: MintedJar): void {
|
|
|
96
87
|
mkdirSync(siteDir, { recursive: true });
|
|
97
88
|
const p = jarPath(siteDir);
|
|
98
89
|
const tmp = `${p}.${process.pid}.tmp`;
|
|
99
|
-
writeFileSync(tmp, `${JSON.stringify(
|
|
90
|
+
writeFileSync(tmp, `${JSON.stringify(jar)}\n`, 'utf8');
|
|
100
91
|
renameSync(tmp, p);
|
|
101
92
|
} catch (err) {
|
|
102
93
|
log(`failed to persist jar to ${siteDir}: ${err instanceof Error ? err.message : String(err)}`);
|
|
103
94
|
}
|
|
104
95
|
}
|
|
105
96
|
|
|
106
|
-
function stripDurableObservedResponseBodies(jar: MintedJar): MintedJar {
|
|
107
|
-
if (!jar.observedRequests) return jar;
|
|
108
|
-
return {
|
|
109
|
-
...jar,
|
|
110
|
-
observedRequests: jar.observedRequests.map((req) => {
|
|
111
|
-
if (!req.response || req.response.body === undefined) return req;
|
|
112
|
-
return {
|
|
113
|
-
...req,
|
|
114
|
-
response: {
|
|
115
|
-
status: req.response.status,
|
|
116
|
-
headers: req.response.headers,
|
|
117
|
-
},
|
|
118
|
-
};
|
|
119
|
-
}),
|
|
120
|
-
};
|
|
121
|
-
}
|
|
122
|
-
|
|
123
97
|
/** Remove a cached jar (best-effort) — call on a replay 401/403/428/429 so the
|
|
124
98
|
* next call re-mints (reactive self-heal), or when a site's teach run ends. */
|
|
125
99
|
export function clearJar(siteDir: string): void {
|
|
@@ -229,7 +203,6 @@ export function seedJarFromRecording(
|
|
|
229
203
|
}));
|
|
230
204
|
const abck = cookies.find((c) => c.name === '_abck')?.value;
|
|
231
205
|
const abckFlag = abck?.split('~')[1] ?? '?';
|
|
232
|
-
const hasAkamaiValidationSignals = jarHasAkamaiValidationSignals(cookies);
|
|
233
206
|
// Validated = `_abck~0~` OR a `bm_sv` cookie (Akamai's validated-session
|
|
234
207
|
// marker). `_abck` rotates back to `~-1~` after clearing a request, so a real
|
|
235
208
|
// working recording often ends with `_abck~-1~` + `bm_sv` — that jar replays
|
|
@@ -268,89 +241,21 @@ export function seedJarFromRecording(
|
|
|
268
241
|
// captured HTML. Without this (the old `html: ''`), any workflow whose
|
|
269
242
|
// requests reference `${state.X}` from an html_regex capture STATE_MISSINGs.
|
|
270
243
|
const html = pickBootstrapHtml(session.requests ?? [], bootstrapUrl);
|
|
271
|
-
const observedRequests = collectObservedRequests(session.requests ?? []);
|
|
272
244
|
saveJar(siteDir, {
|
|
273
245
|
cookies,
|
|
274
246
|
ua,
|
|
275
247
|
html,
|
|
276
|
-
observedRequests,
|
|
277
|
-
...(bootstrapUrl ? { bootstrapUrl } : {}),
|
|
278
248
|
bootstrapEpoch: Math.round(newestMtime),
|
|
279
249
|
abckFlag,
|
|
280
250
|
validated: true, // gated above on jarCookiesValidated
|
|
281
251
|
source: 'recording',
|
|
282
252
|
});
|
|
283
|
-
const validationLabel = hasAkamaiValidationSignals ? 'bm_sv-validated' : 'generic-bootstrap';
|
|
284
253
|
log(
|
|
285
|
-
`seeded jar from recording ${newest} (${cookies.length} cookies, _abck~${abckFlag}~,
|
|
254
|
+
`seeded jar from recording ${newest} (${cookies.length} cookies, _abck~${abckFlag}~, bm_sv-validated, ua=${ua ? `${ua.slice(0, 40)}…` : '(none)'}, html=${html.length}b)`,
|
|
286
255
|
);
|
|
287
256
|
return true;
|
|
288
257
|
}
|
|
289
258
|
|
|
290
|
-
function collectObservedRequests(
|
|
291
|
-
requests: Array<{
|
|
292
|
-
requestHeaders?: unknown;
|
|
293
|
-
headers?: unknown;
|
|
294
|
-
body?: unknown;
|
|
295
|
-
url?: string;
|
|
296
|
-
method?: string;
|
|
297
|
-
resourceType?: string;
|
|
298
|
-
response?: {
|
|
299
|
-
status?: number;
|
|
300
|
-
headers?: Record<string, string>;
|
|
301
|
-
body?: string;
|
|
302
|
-
};
|
|
303
|
-
}>,
|
|
304
|
-
): NonNullable<MintedJar['observedRequests']> {
|
|
305
|
-
const observed: NonNullable<MintedJar['observedRequests']> = [];
|
|
306
|
-
for (const r of requests) {
|
|
307
|
-
if (!r.url) continue;
|
|
308
|
-
const headers = normalizeHeaders(r.requestHeaders ?? r.headers);
|
|
309
|
-
observed.push({
|
|
310
|
-
method: (r.method ?? 'GET').toUpperCase(),
|
|
311
|
-
url: r.url,
|
|
312
|
-
headers,
|
|
313
|
-
source: 'browser',
|
|
314
|
-
...(typeof r.body === 'string' ? { body: r.body } : {}),
|
|
315
|
-
...(typeof r.resourceType === 'string' ? { resourceType: r.resourceType } : {}),
|
|
316
|
-
...(typeof r.response?.status === 'number'
|
|
317
|
-
? {
|
|
318
|
-
response: {
|
|
319
|
-
status: r.response.status,
|
|
320
|
-
headers: r.response.headers ?? {},
|
|
321
|
-
},
|
|
322
|
-
}
|
|
323
|
-
: {}),
|
|
324
|
-
});
|
|
325
|
-
if (observed.length > 100) observed.shift();
|
|
326
|
-
}
|
|
327
|
-
return observed;
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
function normalizeHeaders(raw: unknown): Record<string, string> {
|
|
331
|
-
if (Array.isArray(raw)) {
|
|
332
|
-
const entries = raw
|
|
333
|
-
.map((x) =>
|
|
334
|
-
x &&
|
|
335
|
-
typeof x === 'object' &&
|
|
336
|
-
'name' in x &&
|
|
337
|
-
'value' in x &&
|
|
338
|
-
typeof x.name === 'string' &&
|
|
339
|
-
typeof x.value === 'string'
|
|
340
|
-
? [x.name, x.value]
|
|
341
|
-
: null,
|
|
342
|
-
)
|
|
343
|
-
.filter((x): x is [string, string] => x !== null);
|
|
344
|
-
return Object.fromEntries(entries);
|
|
345
|
-
}
|
|
346
|
-
if (!raw || typeof raw !== 'object') return {};
|
|
347
|
-
const out: Record<string, string> = {};
|
|
348
|
-
for (const [k, v] of Object.entries(raw)) {
|
|
349
|
-
if (typeof v === 'string') out[k] = v;
|
|
350
|
-
}
|
|
351
|
-
return out;
|
|
352
|
-
}
|
|
353
|
-
|
|
354
259
|
/**
|
|
355
260
|
* Choose the recorded HTML to seed into jar.html for html_regex bootstrap
|
|
356
261
|
* captures. Preference order: (1) the recorded response for the exact bootstrap
|
|
@@ -1261,7 +1261,7 @@ export async function typecheckArtifacts(
|
|
|
1261
1261
|
|
|
1262
1262
|
try {
|
|
1263
1263
|
const result = await runCommand(
|
|
1264
|
-
'
|
|
1264
|
+
'bunx tsc --noEmit -p .imprint-typecheck.tsconfig.json',
|
|
1265
1265
|
dir,
|
|
1266
1266
|
120000,
|
|
1267
1267
|
);
|
|
@@ -2937,7 +2937,7 @@ export async function externalVerification(
|
|
|
2937
2937
|
const output = await typecheckArtifacts(toolDir, ['parser.ts', 'request-transform.ts']);
|
|
2938
2938
|
if (output.exitCode !== 0 || output.timedOut) {
|
|
2939
2939
|
failures.push(
|
|
2940
|
-
`generated TypeScript artifacts failed typecheck (
|
|
2940
|
+
`generated TypeScript artifacts failed typecheck (bunx tsc --noEmit -p .imprint-typecheck.tsconfig.json) exited ${output.exitCode}${output.timedOut ? ' after timing out' : ''}\nstdout:\n${output.stdout}\nstderr:\n${output.stderr}`,
|
|
2941
2941
|
);
|
|
2942
2942
|
}
|
|
2943
2943
|
}
|