imprint-mcp 0.4.7 → 0.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/README.md +4 -4
  2. package/examples/google-flights/README.md +0 -2
  3. package/examples/google-flights/_shared/flights_request.ts +4 -10
  4. package/examples/google-flights/get_flight_booking_details/index.ts +2 -5
  5. package/examples/google-flights/get_flight_booking_details/parser.ts +0 -8
  6. package/examples/google-flights/get_flight_booking_details/workflow.json +2 -5
  7. package/examples/google-flights/get_flight_calendar_prices/index.ts +2 -5
  8. package/examples/google-flights/get_flight_calendar_prices/parser.ts +11 -15
  9. package/examples/google-flights/get_flight_calendar_prices/workflow.json +2 -5
  10. package/examples/google-flights/lookup_airport/index.ts +0 -3
  11. package/examples/google-flights/lookup_airport/parser.ts +1 -8
  12. package/examples/google-flights/lookup_airport/workflow.json +0 -3
  13. package/examples/google-flights/search_flights/index.ts +7 -62
  14. package/examples/google-flights/search_flights/request-transform.ts +4 -47
  15. package/examples/google-flights/search_flights/workflow.json +7 -62
  16. package/package.json +1 -1
  17. package/prompts/build-planning.md +1 -1
  18. package/prompts/compile-agent.md +3 -5
  19. package/prompts/prereq-builder.md +1 -2
  20. package/src/imprint/backend-ladder.ts +47 -436
  21. package/src/imprint/cdp-browser-fetch.ts +6 -176
  22. package/src/imprint/cdp-jar-cache.ts +10 -105
  23. package/src/imprint/compile-tools.ts +2 -2
  24. package/src/imprint/mcp-server.ts +65 -152
  25. package/src/imprint/probe-backends.ts +10 -41
  26. package/src/imprint/runtime.ts +12 -24
  27. package/src/imprint/stealth-fetch.ts +0 -71
  28. package/src/imprint/stealth-token-cache.ts +1 -38
  29. package/src/imprint/types.ts +0 -45
@@ -10,9 +10,6 @@
10
10
  * cached (~90 min) so one bootstrap serves many searches. Auto mode always
11
11
  * splices this right after `fetch`; it only RUNS when `fetch` escalates, so a
12
12
  * healthy plain-API site never pays for it.
13
- * - `cdp-replay` — live Chrome API replay. Reused by MCP/compile sessions
14
- * when a workflow needs browser-observed request state or sustained protected
15
- * POSTs.
16
13
  * - `stealth-fetch` — Playwright stealth bootstrap + native fetch (token tier).
17
14
  * - `playbook` — DOM-walk LAST RESORT (needs a compiled playbook.yaml).
18
15
  */
@@ -25,7 +22,6 @@ import {
25
22
  type CdpBrowserFetchOptions,
26
23
  type MintedJar,
27
24
  createCdpBrowserFetch,
28
- jarHasAkamaiValidationSignals,
29
25
  } from './cdp-browser-fetch.ts';
30
26
  import {
31
27
  clearJar,
@@ -167,6 +163,10 @@ export function __resetCompileCdpPoolForTest(): void {
167
163
  compileCdpPool.clear();
168
164
  }
169
165
 
166
+ function cdpToolResultImpliesDeadSession(result: ToolResult): boolean {
167
+ return !result.ok && result.error === 'NETWORK';
168
+ }
169
+
170
170
  /** Freshness window for the file-backed compile-time stealth token. Matches
171
171
  * stealth-fetch's in-process `maxTokenAgeSeconds` default so a reused token is
172
172
  * not immediately considered stale by `createStealthFetch`. */
@@ -217,30 +217,6 @@ function withWorkflowDefaults(
217
217
  return paramsWithDefaults;
218
218
  }
219
219
 
220
- async function withWorkflowPreparedParams(
221
- tool: ResolvedTool,
222
- params: Record<string, string | number | boolean>,
223
- ): Promise<Record<string, string | number | boolean>> {
224
- const preparedParams = withWorkflowDefaults(tool.workflow, params);
225
- const modulePath = tool.workflow.requestTransformModule;
226
- if (!modulePath) return preparedParams;
227
- try {
228
- const mod = await import(pathResolve(tool.dir, modulePath));
229
- if (typeof mod.prepareParams !== 'function') return preparedParams;
230
- const extra = await mod.prepareParams(preparedParams);
231
- if (!extra || typeof extra !== 'object') return preparedParams;
232
- for (const [key, value] of Object.entries(extra)) {
233
- if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
234
- preparedParams[key] = value;
235
- }
236
- }
237
- } catch {
238
- // Non-fatal: request transforms are optional, and executeWorkflow will surface
239
- // any still-missing placeholders with its normal STATE_MISSING diagnostics.
240
- }
241
- return preparedParams;
242
- }
243
-
244
220
  /** Await the per-origin min spacing before a compile-path live request. The
245
221
  * first call to an origin never waits (last=0); subsequent ones within the
246
222
  * window are delayed so the suite paces itself under the rate-flag. */
@@ -363,7 +339,7 @@ export async function runWithLadder(
363
339
  result = await runCdpReplay(tool, params, options?.cdpPool);
364
340
  break;
365
341
  case 'stealth-fetch': {
366
- const paramsWithDefaults = await withWorkflowPreparedParams(tool, params);
342
+ const paramsWithDefaults = withWorkflowDefaults(tool.workflow, params);
367
343
  const sf = await ensureStealthFetch(tool, stealthCache, paramsWithDefaults);
368
344
  // When the workflow declares a bootstrap block, mint its declared
369
345
  // session-token state (CSRF cookies etc.) from the SAME stealth
@@ -371,21 +347,17 @@ export async function runWithLadder(
371
347
  // workflow escalating here from fetch-bootstrap loses the
372
348
  // ${state.X} its requests need — the gap that made bootstrap-block
373
349
  // tools on anti-bot sites unverifiable.
374
- const tokens = tool.workflow.bootstrap ? await sf.ensureBootstrapped() : undefined;
375
350
  const initialState = tool.workflow.bootstrap
376
- ? await stealthBootstrapState(sf, tool.workflow.bootstrap, tokens)
351
+ ? await stealthBootstrapState(sf, tool.workflow.bootstrap)
377
352
  : undefined;
378
- result = await tool.toolFn(paramsWithDefaults, {
379
- fetchImpl: tokens ? makeObservedResponseFetch(tokens, sf.fetchImpl) : sf.fetchImpl,
380
- initialState,
381
- });
353
+ result = await tool.toolFn(paramsWithDefaults, { fetchImpl: sf.fetchImpl, initialState });
382
354
  break;
383
355
  }
384
356
  case 'playbook': {
385
357
  // DOM-walk last resort (the anti-bot API path is fetch-bootstrap, above).
386
358
  // Apply workflow.json's declared parameter defaults — runPlaybook
387
359
  // validates and throws on absent values regardless of declared defaults.
388
- const paramsWithDefaults = await withWorkflowPreparedParams(tool, params);
360
+ const paramsWithDefaults = withWorkflowDefaults(tool.workflow, params);
389
361
  result = await runPlaybook({
390
362
  playbook: playbookPath(assetRoot, tool.site, tool.dir),
391
363
  params: paramsWithDefaults,
@@ -554,9 +526,11 @@ export function effectiveAutoLadder(
554
526
  const fbIdx = next.indexOf('fetch-bootstrap');
555
527
  if (fbIdx !== -1) next.splice(fbIdx + 1, 0, 'cdp-replay');
556
528
  }
557
- // For workflows that need live-browser request state, front-load cdp-replay so
558
- // MCP sessions reuse the same Chrome instead of paying the one-shot
559
- // fetch-bootstrap browser mint before every distinct bootstrap URL.
529
+ // For a MULTI-step state-changing anti-bot workflow, plain-fetch rungs are not
530
+ // just doomed their tarpitted .act attempts BURN the per-IP rate budget
531
+ // before cdp-replay even runs, which can flag the IP and make cdp-replay tarpit
532
+ // too. Front-load cdp-replay for these so the live browser handles every
533
+ // protected POST from a clean slate.
560
534
  if (prefersCdpReplayFirst(workflow)) {
561
535
  const i = next.indexOf('cdp-replay');
562
536
  if (i > 0) {
@@ -567,20 +541,15 @@ export function effectiveAutoLadder(
567
541
  return next;
568
542
  }
569
543
 
570
- /** Prefer CDP first when the workflow needs live-browser request state.
571
- *
572
- * Two generic cases qualify:
573
- * - bootstrap captures read fields from browser-observed requests
574
- * (`request_header` / `request_url_regex` / `request_body_regex`). A one-shot fetch-bootstrap can
575
- * also observe them, but it closes Chrome after minting; CDP can reuse the
576
- * same browser across MCP calls and retarget route/date-specific bootstraps.
577
- * - multi-step state-changing anti-bot flows (≥2 mutating requests plus a
578
- * bootstrap/state signal). Plain-fetch replay can't sustain those protected
579
- * POST sequences and can burn the per-IP budget before CDP runs.
580
- */
544
+ /** A multi-step, state-changing, anti-bot workflow: ≥2 mutating requests AND an
545
+ * anti-bot signal (a bootstrap block, or requests that depend on captured
546
+ * `${state.X}` tokens). Plain-fetch replay can't sustain its sequence of
547
+ * protected POSTs (each self-invalidates `_abck`); only the live-browser
548
+ * cdp-replay rung can and it should run FIRST so the doomed fetch /
549
+ * fetch-bootstrap attempts don't pre-burn the per-IP .act budget. A plain
550
+ * multi-POST REST API (no bootstrap, no `${state.X}`) is NOT matched, so it
551
+ * keeps the cheap fetch-first order. */
581
552
  export function prefersCdpReplayFirst(workflow: Workflow): boolean {
582
- if (workflow.bootstrap?.captures?.some(isObservedRequestBootstrapCapture)) return true;
583
-
584
553
  const mutating = workflow.requests.filter((r) => {
585
554
  const m = (r.method ?? 'GET').toUpperCase();
586
555
  return r.effect === 'unsafe' || m === 'POST' || m === 'PUT' || m === 'PATCH' || m === 'DELETE';
@@ -595,14 +564,6 @@ export function prefersCdpReplayFirst(workflow: Workflow): boolean {
595
564
  return Boolean(workflow.bootstrap) || hasStateRefs;
596
565
  }
597
566
 
598
- function isObservedRequestBootstrapCapture(capture: BootstrapCapture): boolean {
599
- return (
600
- capture.source === 'request_header' ||
601
- capture.source === 'request_url_regex' ||
602
- capture.source === 'request_body_regex'
603
- );
604
- }
605
-
606
567
  function nextStateMissingBackend(
607
568
  ladder: ConcreteBackend[],
608
569
  backend: ConcreteBackend,
@@ -673,11 +634,10 @@ async function getOrMintCdpJar(
673
634
  bootstrapUrl: string | undefined,
674
635
  siteDir: string,
675
636
  forceFresh: boolean,
676
- workflow?: Workflow,
677
637
  ): Promise<MintedJar | null> {
678
638
  if (cdpJarMinterForTest) return cdpJarMinterForTest(baseUrl, bootstrapUrl);
679
639
  if (!forceFresh) {
680
- let cached = loadJar(siteDir, bootstrapUrl);
640
+ let cached = loadJar(siteDir);
681
641
  // A recording NEWER than the cached jar supersedes it — e.g. the user
682
642
  // re-recorded on a new IP, so the cached (old-IP) jar would tarpit. Drop the
683
643
  // stale cache and re-seed from the fresh recording below.
@@ -688,18 +648,7 @@ async function getOrMintCdpJar(
688
648
  // many sequential .act), strictly better than a synthetic cdp-browser mint
689
649
  // (low-trust → tarpitted even on a fresh IP). "The recording IS the
690
650
  // executable." Reuse the `rec` stat above so we don't re-glob.
691
- if (!cached && seedJarFromRecording(siteDir, rec, bootstrapUrl)) {
692
- cached = loadJar(siteDir, bootstrapUrl);
693
- }
694
- if (cached && workflow?.bootstrap) {
695
- const missing = missingObservedRequestCaptureNames(workflow.bootstrap, cached);
696
- if (missing.length > 0) {
697
- log(
698
- `cached jar is missing required browser-observed capture(s): ${missing.join(', ')} — re-mint`,
699
- );
700
- cached = null;
701
- }
702
- }
651
+ if (!cached && seedJarFromRecording(siteDir, rec, bootstrapUrl)) cached = loadJar(siteDir);
703
652
  if (cached) {
704
653
  const provenance =
705
654
  cached.source === 'recording'
@@ -718,12 +667,10 @@ async function getOrMintCdpJar(
718
667
  }
719
668
  let cf: CdpBrowserFetch | undefined;
720
669
  try {
721
- cf = (cdpBrowserFetchFactoryForTest ?? createCdpBrowserFetch)({ baseUrl, bootstrapUrl });
722
- const jar = await mintJarWithBootstrapWait(cf, workflow);
723
- if (jar.abckFlag !== '0' && jarHasAkamaiValidationSignals(jar.cookies)) {
670
+ cf = createCdpBrowserFetch({ baseUrl, bootstrapUrl });
671
+ const jar = await cf.mintJar();
672
+ if (jar.abckFlag !== '0') {
724
673
  log(`cdp jar minted with _abck~${jar.abckFlag}~ (not validated) — replay may be rejected`);
725
- } else if (!jarHasAkamaiValidationSignals(jar.cookies)) {
726
- log(`cdp jar minted generic bootstrap state (html=${jar.html.length}b)`);
727
674
  }
728
675
  saveJar(siteDir, jar);
729
676
  return jar;
@@ -735,43 +682,6 @@ async function getOrMintCdpJar(
735
682
  }
736
683
  }
737
684
 
738
- async function mintJarWithBootstrapWait(
739
- cf: CdpBrowserFetch,
740
- workflow: Workflow | undefined,
741
- ): Promise<MintedJar> {
742
- let jar = await cf.mintJar();
743
- const bootstrap = workflow?.bootstrap;
744
- if (!bootstrap || requiredObservedRequestCaptures(bootstrap).length === 0) return jar;
745
-
746
- const timeoutMs =
747
- typeof bootstrap.timeoutMs === 'number' && bootstrap.timeoutMs > 0
748
- ? bootstrap.timeoutMs
749
- : 30_000;
750
- const deadline = Date.now() + timeoutMs;
751
- let loggedWait = false;
752
-
753
- while (Date.now() < deadline) {
754
- const missing = missingObservedRequestCaptureNames(bootstrap, jar);
755
- if (missing.length === 0) return jar;
756
- if (!loggedWait) {
757
- log(
758
- `waiting up to ${timeoutMs}ms for browser-observed bootstrap request capture(s): ${missing.join(', ')}`,
759
- );
760
- loggedWait = true;
761
- }
762
- await sleepMs(Math.min(500, Math.max(1, deadline - Date.now())));
763
- jar = await cf.mintJar();
764
- }
765
-
766
- const missing = missingObservedRequestCaptureNames(bootstrap, jar);
767
- if (missing.length > 0) {
768
- log(
769
- `timed out waiting for browser-observed bootstrap request capture(s): ${missing.join(', ')}`,
770
- );
771
- }
772
- return jar;
773
- }
774
-
775
685
  /** Replay transport for the bootstrap-then-fetch path: PLAIN fetch that presents
776
686
  * the jar's exact UA (Akamai drops the jar on a UA mismatch). Cookies are
777
687
  * attached by executeWorkflow's RuntimeCookieJar from bootstrappedCredentials,
@@ -811,81 +721,6 @@ function makeProxyFetch(): typeof fetch | undefined {
811
721
  )) as typeof fetch;
812
722
  }
813
723
 
814
- type ObservedResponseSource = {
815
- observedRequests?: Array<{
816
- method: string;
817
- url: string;
818
- body?: string;
819
- source?: 'browser' | 'replay';
820
- response?: {
821
- status: number;
822
- headers: Record<string, string>;
823
- body?: string;
824
- };
825
- }>;
826
- };
827
-
828
- function makeObservedResponseFetch(
829
- source: ObservedResponseSource,
830
- fallbackFetch: typeof fetch,
831
- ): typeof fetch {
832
- return (async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
833
- const url =
834
- typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url;
835
- const method = (init?.method ?? 'GET').toUpperCase();
836
- const body = observedRequestBody(init?.body);
837
- const observed = findObservedResponse(source, method, url, body);
838
- if (observed) {
839
- log(`using bootstrap-observed response for ${method} ${redactUrlForLog(url)}`);
840
- return new Response(observed.body ?? '', {
841
- status: observed.status,
842
- headers: new Headers(observed.headers),
843
- });
844
- }
845
- return fallbackFetch(input, init);
846
- }) as typeof fetch;
847
- }
848
-
849
- function findObservedResponse(
850
- source: ObservedResponseSource,
851
- method: string,
852
- url: string,
853
- body: string | undefined,
854
- ): { status: number; headers: Record<string, string>; body?: string } | undefined {
855
- const observed = source.observedRequests ?? [];
856
- for (let i = observed.length - 1; i >= 0; i--) {
857
- const req = observed[i];
858
- if (!req?.response || req.response.body === undefined) continue;
859
- if (req.source === 'replay') continue;
860
- if (req.method.toUpperCase() !== method) continue;
861
- if (req.url !== url) continue;
862
- // Some CDP requestWillBeSent events omit postData even though the matching
863
- // response body is available. If the observed body exists, require an exact
864
- // body match. If CDP omitted it, fall back to exact method+URL; Google-style
865
- // batchexecute URLs carry session/request ids, so this still avoids serving a
866
- // response from a different bootstrap request.
867
- if (req.body !== undefined && req.body !== (body ?? undefined)) continue;
868
- return req.response;
869
- }
870
- return undefined;
871
- }
872
-
873
- function observedRequestBody(body: RequestInit['body'] | undefined): string | undefined {
874
- if (body === undefined || body === null) return undefined;
875
- if (typeof body === 'string') return body;
876
- if (body instanceof URLSearchParams) return body.toString();
877
- return undefined;
878
- }
879
-
880
- function redactUrlForLog(url: string): string {
881
- try {
882
- const u = new URL(url);
883
- return `${u.origin}${u.pathname}`;
884
- } catch {
885
- return url.slice(0, 80);
886
- }
887
- }
888
-
889
724
  /** A replay error that means the JAR is bad (clear it + re-mint), as opposed to a
890
725
  * transient IP rate-flag (NETWORK/RATE_LIMITED — a fresh jar won't help; back off). */
891
726
  function jarLikelyStale(result: ToolResult): boolean {
@@ -925,14 +760,14 @@ async function runFetchBootstrap(
925
760
  values: {},
926
761
  storage: [],
927
762
  };
928
- const paramsWithDefaults = await withWorkflowPreparedParams(tool, params);
763
+ const paramsWithDefaults = withWorkflowDefaults(tool.workflow, params);
929
764
  const bootstrapUrl = tool.workflow.bootstrap
930
765
  ? substituteString(tool.workflow.bootstrap.url, paramsWithDefaults, credentials, [])
931
766
  : undefined;
932
767
  const siteDir = pathResolve(tool.dir, '..');
933
768
 
934
769
  for (let attempt = 0; attempt < 2; attempt++) {
935
- const jar = await getOrMintCdpJar(baseUrl, bootstrapUrl, siteDir, attempt > 0, tool.workflow);
770
+ const jar = await getOrMintCdpJar(baseUrl, bootstrapUrl, siteDir, attempt > 0);
936
771
  if (!jar) {
937
772
  // Couldn't even launch the bootstrap browser → let the ladder escalate.
938
773
  const stateMissing = bootstrapFailureStateMissingResult(
@@ -957,7 +792,7 @@ async function runFetchBootstrap(
957
792
  // recording-seeded or cached jar is validated:true by construction, so the
958
793
  // cheap plain-fetch path is untouched; `=== false` (not falsy) leaves jars
959
794
  // without the field — older caches / test stubs — on the original path.
960
- if (jar.validated === false && jarHasAkamaiValidationSignals(jar.cookies)) {
795
+ if (jar.validated === false) {
961
796
  log(
962
797
  'fetch-bootstrap: minted jar unvalidated (no _abck~0~/bm_sv) — plain-fetch replay doomed; escalating to cdp-replay',
963
798
  );
@@ -1002,7 +837,7 @@ async function runFetchBootstrap(
1002
837
  const result = await tool.toolFn(paramsWithDefaults, {
1003
838
  credentials: bootstrappedCredentials,
1004
839
  initialState: captureResult.state,
1005
- fetchImpl: makeObservedResponseFetch(jar, makeJarUaFetch(jar.ua)),
840
+ fetchImpl: makeJarUaFetch(jar.ua),
1006
841
  });
1007
842
 
1008
843
  if (result.ok) return result;
@@ -1060,7 +895,7 @@ async function runCdpReplay(
1060
895
  values: {},
1061
896
  storage: [],
1062
897
  };
1063
- const paramsWithDefaults = await withWorkflowPreparedParams(tool, params);
898
+ const paramsWithDefaults = withWorkflowDefaults(tool.workflow, params);
1064
899
  const bootstrapUrl = tool.workflow.bootstrap
1065
900
  ? substituteString(tool.workflow.bootstrap.url, paramsWithDefaults, credentials, [])
1066
901
  : undefined;
@@ -1068,12 +903,6 @@ async function runCdpReplay(
1068
903
  const siteDir = pathResolve(tool.dir, '..');
1069
904
  const poolKey = tool.site;
1070
905
  const pooled = cdpPool?.get(poolKey);
1071
- if (pooled && bootstrapUrl && pooled.bootstrapUrl !== bootstrapUrl) {
1072
- log(
1073
- `cdp-replay: reusing pooled Chrome session for new bootstrap (${pooled.bootstrapUrl} → ${bootstrapUrl})`,
1074
- );
1075
- pooled.setBootstrapUrl(bootstrapUrl);
1076
- }
1077
906
  const ownsSession = !pooled;
1078
907
 
1079
908
  let cf: CdpBrowserFetch;
@@ -1099,7 +928,7 @@ async function runCdpReplay(
1099
928
  }
1100
929
 
1101
930
  try {
1102
- const jar = await mintJarWithBootstrapWait(cf, tool.workflow);
931
+ const jar = await cf.mintJar();
1103
932
  const bootstrappedCredentials: CredentialStore = {
1104
933
  ...credentials,
1105
934
  cookies: [
@@ -1131,7 +960,7 @@ async function runCdpReplay(
1131
960
  const result = await tool.toolFn(paramsWithDefaults, {
1132
961
  credentials: bootstrappedCredentials,
1133
962
  initialState: captureResult.state,
1134
- fetchImpl: makeObservedResponseFetch(jar, cf.fetchImpl),
963
+ fetchImpl: cf.fetchImpl,
1135
964
  });
1136
965
 
1137
966
  if (result.ok) {
@@ -1145,13 +974,11 @@ async function runCdpReplay(
1145
974
  if (!cdpPool && ownsSession) await cf.close();
1146
975
  }
1147
976
  } else {
1148
- // A workflow-level failure (BAD_RESPONSE/STATE_MISSING/FORBIDDEN/etc.) is
1149
- // not evidence that the Chrome/CDP session is dead. Keep pooled sessions
1150
- // alive so the next MCP call can reuse/retarget the browser; only the
1151
- // catch path below evicts sessions after an actual CDP exception.
1152
- if (cdpPool && ownsSession) {
1153
- cdpPool.set(poolKey, cf);
1154
- } else if (!cdpPool && ownsSession) {
977
+ if (ownsSession) {
978
+ await cf.close();
979
+ } else if (cdpPool && cdpToolResultImpliesDeadSession(result)) {
980
+ cdpPool.delete(poolKey);
981
+ log('cdp-replay: evicted degraded session from pool');
1155
982
  await cf.close();
1156
983
  }
1157
984
  }
@@ -1223,45 +1050,6 @@ function jarBootstrapCaptureState(
1223
1050
  ),
1224
1051
  };
1225
1052
  }
1226
- } else if (capture.source === 'request_header') {
1227
- const value = captureObservedRequestHeader(jar, capture);
1228
- if (value !== undefined && value !== null && value !== '') state[capture.name] = value;
1229
- else if (capture.required !== false) {
1230
- return {
1231
- ok: false,
1232
- result: bootstrapCaptureMissingResult(
1233
- capture,
1234
- `Required bootstrap capture "${capture.name}" (request_header ${capture.header}) did not match an observed browser request.`,
1235
- 'producer_ran_value_absent',
1236
- ),
1237
- };
1238
- }
1239
- } else if (capture.source === 'request_url_regex') {
1240
- const value = captureObservedRequestUrlRegex(jar, capture);
1241
- if (value !== undefined && value !== null && value !== '') state[capture.name] = value;
1242
- else if (capture.required !== false) {
1243
- return {
1244
- ok: false,
1245
- result: bootstrapCaptureMissingResult(
1246
- capture,
1247
- `Required bootstrap capture "${capture.name}" (request_url_regex ${capture.pattern}) did not match an observed browser request.`,
1248
- 'producer_ran_value_absent',
1249
- ),
1250
- };
1251
- }
1252
- } else if (capture.source === 'request_body_regex') {
1253
- const value = captureObservedRequestBodyRegex(jar, capture);
1254
- if (value !== undefined && value !== null && value !== '') state[capture.name] = value;
1255
- else if (capture.required !== false) {
1256
- return {
1257
- ok: false,
1258
- result: bootstrapCaptureMissingResult(
1259
- capture,
1260
- `Required bootstrap capture "${capture.name}" (request_body_regex ${capture.pattern}) did not match an observed browser request body.`,
1261
- 'producer_ran_value_absent',
1262
- ),
1263
- };
1264
- }
1265
1053
  } else if (capture.required !== false) {
1266
1054
  // response_header / dom_* can't be resolved from a closed browser jar.
1267
1055
  return {
@@ -1277,134 +1065,6 @@ function jarBootstrapCaptureState(
1277
1065
  return { ok: true, state };
1278
1066
  }
1279
1067
 
1280
- function captureObservedRequestHeader(
1281
- jar: MintedJar,
1282
- capture: Extract<BootstrapCapture, { source: 'request_header' }>,
1283
- ): string | string[] | undefined {
1284
- return captureObservedRequestValueFromObserved(jar.observedRequests ?? [], capture, (req) =>
1285
- headerValue(req.headers, capture.header),
1286
- );
1287
- }
1288
-
1289
- function captureObservedRequestUrlRegex(
1290
- jar: MintedJar,
1291
- capture: Extract<BootstrapCapture, { source: 'request_url_regex' }>,
1292
- ): string | string[] | undefined {
1293
- return captureObservedRequestValueFromObserved(jar.observedRequests ?? [], capture, (req) => {
1294
- try {
1295
- return req.url.match(new RegExp(capture.pattern))?.[capture.group ?? 1];
1296
- } catch {
1297
- return undefined;
1298
- }
1299
- });
1300
- }
1301
-
1302
- function captureObservedRequestBodyRegex(
1303
- jar: MintedJar,
1304
- capture: Extract<BootstrapCapture, { source: 'request_body_regex' }>,
1305
- ): string | string[] | undefined {
1306
- return captureObservedRequestValueFromObserved(jar.observedRequests ?? [], capture, (req) => {
1307
- if (typeof req.body !== 'string') return undefined;
1308
- try {
1309
- const match = req.body.match(new RegExp(capture.pattern));
1310
- return match?.[capture.group ?? 1] ?? match?.[0];
1311
- } catch {
1312
- return undefined;
1313
- }
1314
- });
1315
- }
1316
-
1317
- function requiredObservedRequestCaptures(
1318
- bootstrap: NonNullable<Workflow['bootstrap']>,
1319
- ): Array<
1320
- Extract<
1321
- BootstrapCapture,
1322
- { source: 'request_header' | 'request_url_regex' | 'request_body_regex' }
1323
- >
1324
- > {
1325
- return (bootstrap.captures ?? []).filter(
1326
- (
1327
- capture,
1328
- ): capture is Extract<
1329
- BootstrapCapture,
1330
- { source: 'request_header' | 'request_url_regex' | 'request_body_regex' }
1331
- > =>
1332
- capture.required !== false &&
1333
- (capture.source === 'request_header' ||
1334
- capture.source === 'request_url_regex' ||
1335
- capture.source === 'request_body_regex'),
1336
- );
1337
- }
1338
-
1339
- function missingObservedRequestCaptureNames(
1340
- bootstrap: NonNullable<Workflow['bootstrap']>,
1341
- jar: MintedJar,
1342
- ): string[] {
1343
- const missing: string[] = [];
1344
- for (const capture of requiredObservedRequestCaptures(bootstrap)) {
1345
- const value =
1346
- capture.source === 'request_header'
1347
- ? captureObservedRequestHeader(jar, capture)
1348
- : capture.source === 'request_url_regex'
1349
- ? captureObservedRequestUrlRegex(jar, capture)
1350
- : captureObservedRequestBodyRegex(jar, capture);
1351
- if (value === undefined || value === '' || (Array.isArray(value) && value.length === 0)) {
1352
- missing.push(capture.name);
1353
- }
1354
- }
1355
- return missing;
1356
- }
1357
-
1358
- function captureObservedRequestValueFromObserved(
1359
- observed: Array<{
1360
- method: string;
1361
- url: string;
1362
- headers: Record<string, string>;
1363
- body?: string;
1364
- source?: 'browser' | 'replay';
1365
- }>,
1366
- capture: Extract<
1367
- BootstrapCapture,
1368
- { source: 'request_header' | 'request_url_regex' | 'request_body_regex' }
1369
- >,
1370
- pickValue: (req: {
1371
- method: string;
1372
- url: string;
1373
- headers: Record<string, string>;
1374
- body?: string;
1375
- source?: 'browser' | 'replay';
1376
- }) => string | undefined,
1377
- ): string | string[] | undefined {
1378
- let urlRe: RegExp | null = null;
1379
- if (capture.urlPattern) {
1380
- try {
1381
- urlRe = new RegExp(capture.urlPattern);
1382
- } catch {
1383
- return undefined;
1384
- }
1385
- }
1386
- const method = capture.method?.toUpperCase();
1387
- const matches: string[] = [];
1388
- for (const req of observed) {
1389
- if (req.source === 'replay') continue;
1390
- if (method && req.method.toUpperCase() !== method) continue;
1391
- if (urlRe && !urlRe.test(req.url)) continue;
1392
- const value = pickValue(req);
1393
- if (value !== undefined && value !== '') matches.push(value);
1394
- }
1395
- if (capture.mode === 'all') return matches.length ? matches : undefined;
1396
- if (capture.mode === 'first') return matches[0];
1397
- return matches[matches.length - 1];
1398
- }
1399
-
1400
- function headerValue(headers: Record<string, string>, header: string): string | undefined {
1401
- const headerName = header.toLowerCase();
1402
- for (const [name, value] of Object.entries(headers)) {
1403
- if (name.toLowerCase() === headerName) return value;
1404
- }
1405
- return undefined;
1406
- }
1407
-
1408
1068
  function bootstrapFailureStateMissingResult(
1409
1069
  workflow: Workflow,
1410
1070
  message: string,
@@ -1532,12 +1192,6 @@ export async function evaluateBootstrapCapture(
1532
1192
  },
1533
1193
  { origin: capture.origin, key: capture.key },
1534
1194
  );
1535
- case 'request_header':
1536
- return undefined;
1537
- case 'request_url_regex':
1538
- return undefined;
1539
- case 'request_body_regex':
1540
- return undefined;
1541
1195
  case 'cookie':
1542
1196
  return undefined;
1543
1197
  }
@@ -1545,38 +1199,30 @@ export async function evaluateBootstrapCapture(
1545
1199
 
1546
1200
  /** Per-site stealth fetcher; bootstrap pays its ~12s once per process. */
1547
1201
  /** Mint `${state.X}` values from the stealth bootstrap session for a workflow
1548
- * that declares a bootstrap block. Satisfies `cookie`, `html_regex`,
1549
- * `response_header`, and observed request captures from the cookies / HTML /
1550
- * headers / observed browser requests the stealth navigation minted — all one
1551
- * consistent session as the transport cookies, so a token the later API POST
1552
- * checks against the session resolves.
1202
+ * that declares a bootstrap block. Satisfies `cookie`, `html_regex`, and
1203
+ * `response_header` captures from the cookies / HTML / response headers the
1204
+ * stealth navigation minted — all one consistent session as the transport
1205
+ * cookies, so a token the later API POST checks against the session resolves.
1553
1206
  * `dom_*` / storage sources need a live page and are left for the
1554
1207
  * fetch-bootstrap rung (the compile prompt steers replay-safe session tokens
1555
1208
  * to cookie/html_regex, which this covers). */
1556
1209
  async function stealthBootstrapState(
1557
1210
  sf: StealthFetch,
1558
1211
  bootstrap: NonNullable<ResolvedTool['workflow']['bootstrap']>,
1559
- tokens?: TokenCache,
1560
1212
  ): Promise<Record<string, unknown>> {
1561
1213
  const state: Record<string, unknown> = {};
1562
1214
  const captures = bootstrap.captures ?? [];
1563
1215
  const supported = captures.filter(
1564
- (c) =>
1565
- c.source === 'cookie' ||
1566
- c.source === 'html_regex' ||
1567
- c.source === 'response_header' ||
1568
- c.source === 'request_header' ||
1569
- c.source === 'request_url_regex' ||
1570
- c.source === 'request_body_regex',
1216
+ (c) => c.source === 'cookie' || c.source === 'html_regex' || c.source === 'response_header',
1571
1217
  );
1572
1218
  if (supported.length === 0) return state;
1573
- const bootstrapTokens = tokens ?? (await sf.ensureBootstrapped());
1219
+ const tokens = await sf.ensureBootstrapped();
1574
1220
  for (const cap of supported) {
1575
1221
  if (cap.source === 'cookie') {
1576
- const hit = bootstrapTokens.cookies.find((c) => c.name === cap.cookie);
1222
+ const hit = tokens.cookies.find((c) => c.name === cap.cookie);
1577
1223
  if (hit) state[cap.name] = hit.value;
1578
1224
  } else if (cap.source === 'html_regex') {
1579
- const html = bootstrapTokens.bootstrapHtml ?? '';
1225
+ const html = tokens.bootstrapHtml ?? '';
1580
1226
  try {
1581
1227
  const m = html.match(new RegExp(cap.pattern));
1582
1228
  const v = m?.[cap.group ?? 1];
@@ -1585,43 +1231,8 @@ async function stealthBootstrapState(
1585
1231
  // invalid regex — leave unset; substitution will surface STATE_MISSING
1586
1232
  }
1587
1233
  } else if (cap.source === 'response_header') {
1588
- const v = bootstrapTokens.bootstrapResponseHeaders?.[cap.header.toLowerCase()];
1234
+ const v = tokens.bootstrapResponseHeaders?.[cap.header.toLowerCase()];
1589
1235
  if (v !== undefined && v !== '') state[cap.name] = v;
1590
- } else if (cap.source === 'request_header') {
1591
- const v = captureObservedRequestValueFromObserved(
1592
- bootstrapTokens.observedRequests ?? [],
1593
- cap,
1594
- (req) => headerValue(req.headers, cap.header),
1595
- );
1596
- if (v !== undefined && v !== null && v !== '') state[cap.name] = v;
1597
- } else if (cap.source === 'request_url_regex') {
1598
- const v = captureObservedRequestValueFromObserved(
1599
- bootstrapTokens.observedRequests ?? [],
1600
- cap,
1601
- (req) => {
1602
- try {
1603
- return req.url.match(new RegExp(cap.pattern))?.[cap.group ?? 1];
1604
- } catch {
1605
- return undefined;
1606
- }
1607
- },
1608
- );
1609
- if (v !== undefined && v !== null && v !== '') state[cap.name] = v;
1610
- } else if (cap.source === 'request_body_regex') {
1611
- const v = captureObservedRequestValueFromObserved(
1612
- bootstrapTokens.observedRequests ?? [],
1613
- cap,
1614
- (req) => {
1615
- if (typeof req.body !== 'string') return undefined;
1616
- try {
1617
- const match = req.body.match(new RegExp(cap.pattern));
1618
- return match?.[cap.group ?? 1] ?? match?.[0];
1619
- } catch {
1620
- return undefined;
1621
- }
1622
- },
1623
- );
1624
- if (v !== undefined && v !== null && v !== '') state[cap.name] = v;
1625
1236
  }
1626
1237
  }
1627
1238
  return state;