npm - imprint-mcp - Versions diffs - 0.4.10 → 0.5.0 - Mend

imprint-mcp 0.4.10 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/examples/google-flights/_shared/flights_request.ts +4 -2
package/examples/google-flights/search_flights/parser.ts +49 -0
package/examples/google-flights/search_flights/request-transform.ts +10 -18
package/package.json +1 -1
package/prompts/auth-compile-agent.md +165 -0
package/prompts/build-planning.md +29 -3
package/prompts/compile-agent.md +6 -4
package/prompts/tool-candidate-detection.md +58 -18
package/src/cli.ts +53 -3
package/src/imprint/auth-bootstrap.ts +178 -0
package/src/imprint/auth-compile-agent.ts +538 -0
package/src/imprint/auth-compile-tools.ts +209 -0
package/src/imprint/auth-verifier.ts +234 -0
package/src/imprint/backend-ladder.ts +229 -40
package/src/imprint/build-plan.ts +902 -15
package/src/imprint/cdp-browser-fetch.ts +320 -44
package/src/imprint/claude-cli-compile.ts +165 -58
package/src/imprint/codex-cli-compile.ts +50 -19
package/src/imprint/compile-agent-types.ts +42 -2
package/src/imprint/compile-agent.ts +4 -5
package/src/imprint/compile-tools.ts +544 -18
package/src/imprint/compile.ts +29 -0
package/src/imprint/credential-extract.ts +87 -0
package/src/imprint/credential-store.ts +24 -0
package/src/imprint/cron.ts +1 -6
package/src/imprint/mcp-compile-server.ts +229 -20
package/src/imprint/mcp-server.ts +42 -10
package/src/imprint/playbook-runner.ts +188 -26
package/src/imprint/redact.ts +47 -4
package/src/imprint/runtime.ts +446 -11
package/src/imprint/sensitive-keys.ts +18 -0
package/src/imprint/teach-plan.ts +16 -3
package/src/imprint/teach-state.ts +229 -0
package/src/imprint/teach.ts +632 -152
package/src/imprint/tool-candidates.ts +19 -0
package/src/imprint/types.ts +100 -1

package/examples/google-flights/_shared/flights_request.ts CHANGED Viewed

@@ -19,8 +19,10 @@ function buildLeg(leg: any): any[] {
   out[1] = [[[leg?.dest, 0]]];
   out[2] = leg?.times ?? null;
   out[3] = leg?.stops ?? 0;
-  out[4] = leg?.alliances ?? null;
-  out[5] = leg?.carriers ?? null;
+  // Google uses slot 4 for included alliances and carrier codes. Slot 5 is an
+  // exclusion list; putting carrier codes there inverts the filter.
+  out[4] = leg?.includeAirlines ?? leg?.alliances ?? null;
+  out[5] = leg?.excludeAirlines ?? null;
   out[6] = leg?.date ?? null;
   out[7] = leg?.duration ?? null;
   out[8] = Array.isArray(leg?.selected)

package/examples/google-flights/search_flights/parser.ts CHANGED Viewed

@@ -19,7 +19,13 @@ interface Itinerary {
   flight_token: string;
 }
+interface AirlineFilter {
+  code: string;
+  name: string;
+}
 const AIRPORT = /^[A-Z]{3}$/;
+const ALLIANCE_CODES = new Set(['ONEWORLD', 'SKYTEAM', 'STAR_ALLIANCE']);
 // A leg is [carrierCode, [carrierNames], [segments], originIATA, [departDate],
 // [departTime], destIATA, [arriveDate], [arriveTime], durationMinutes, ...].
@@ -79,6 +85,38 @@ function walk(node: unknown, found: unknown[][]): void {
   for (const child of node) walk(child, found);
 }
+function isPairList(node: unknown): node is string[][] {
+  return (
+    Array.isArray(node) &&
+    node.length > 0 &&
+    node.every(
+      (item) =>
+        Array.isArray(item) && typeof item[0] === 'string' && typeof item[1] === 'string',
+    )
+  );
+}
+function toFilters(pairs: string[][]): AirlineFilter[] {
+  return pairs.map((pair) => ({ code: pair[0] as string, name: pair[1] as string }));
+}
+function collectAirlineFilters(
+  node: unknown,
+  found: { alliances: AirlineFilter[]; carriers: AirlineFilter[] },
+): void {
+  if (!Array.isArray(node)) return;
+  if (
+    node.length >= 2 &&
+    isPairList(node[0]) &&
+    isPairList(node[1]) &&
+    node[0].some((pair) => ALLIANCE_CODES.has(pair[0] as string))
+  ) {
+    found.alliances = toFilters(node[0]);
+    found.carriers = toFilters(node[1]);
+  }
+  for (const child of node) collectAirlineFilters(child, found);
+}
 function normalize(it: unknown[]): Itinerary {
   const legs = legsOf(it);
   const priceTok = it[1] as unknown[];
@@ -158,6 +196,11 @@ export function extract(
   const found: unknown[][] = [];
   if (payload != null) walk(payload, found);
+  const availableAirlineFilters = {
+    alliances: [] as AirlineFilter[],
+    carriers: [] as AirlineFilter[],
+  };
+  if (payload != null) collectAirlineFilters(payload, availableAirlineFilters);
   const byToken = new Map<string, Itinerary>();
   for (const it of found) {
@@ -175,5 +218,11 @@ export function extract(
   return {
     count: itineraries.length,
     itineraries,
+    resultScope: {
+      exhaustive: false,
+      note:
+        'Google Flights GetShoppingResults returns a limited sorted subset. A carrier can be available in availableAirlineFilters without appearing in itineraries; call search_flights again with airlines=<code> to fetch that carrier.',
+    },
+    availableAirlineFilters,
   };
 }

package/examples/google-flights/search_flights/request-transform.ts CHANGED Viewed

@@ -1,15 +1,13 @@
 // Adapter around the shared FlightsFrontendService body builder.
 // The tool exposes flat snake_case params (origin, destination, departure_date,
 // max_stops, …); the shared encoder consumes a structured camelCase shape
-// ({ tripType, legs:[{origin,dest,date,times,stops,alliances,carriers,duration}],
+// ({ tripType, legs:[{origin,dest,date,times,stops,includeAirlines,duration}],
 // maxPrice, bags }). We map between them here and delegate the byte-for-byte
 // positional encoding to the shared module (required reuse).
 import { transform as sharedTransform } from '../_shared/flights_request.ts';
 type Params = Record<string, string | number | boolean | undefined | null>;
-const ALLIANCES = new Set(['ONEWORLD', 'SKYTEAM', 'STAR_ALLIANCE']);
 function mapTripType(v: unknown): number {
   if (v == null || v === '') return 1;
   if (typeof v === 'number') return v;
@@ -42,18 +40,14 @@ function parseTimes(v: unknown): number[] | null {
   return [Number(m[1]), Number(m[2]), 0, 23];
 }
-function parseAirlines(v: unknown): { alliances: string[] | null; carriers: string[] | null } {
-  if (v == null || v === '') return { alliances: null, carriers: null };
-  const parts = String(v)
+function parseAirlines(v: unknown): string[] | null {
+  if (v == null || v === '') return null;
+  const includeAirlines = String(v)
     .split(',')
     .map((x) => x.trim())
-    .filter(Boolean);
-  const alliances = parts.filter((p) => ALLIANCES.has(p.toUpperCase())).map((p) => p.toUpperCase());
-  const carriers = parts.filter((p) => !ALLIANCES.has(p.toUpperCase()));
-  return {
-    alliances: alliances.length ? alliances : null,
-    carriers: carriers.length ? carriers : null,
-  };
+    .filter(Boolean)
+    .map((p) => p.toUpperCase());
+  return includeAirlines.length ? includeAirlines : null;
 }
 function num(v: unknown): number | undefined {
@@ -73,7 +67,7 @@ export function transform(
   const hasReturnDate = p.return_date != null && String(p.return_date).trim() !== '';
   const tripType = requestedTripType === 1 && !hasReturnDate ? 2 : requestedTripType;
   const stops = p.max_stops != null && p.max_stops !== '' ? mapStops(p.max_stops) : 0;
-  const { alliances, carriers } = parseAirlines(p.airlines);
+  const includeAirlines = parseAirlines(p.airlines);
   const maxDur = num(p.max_duration);
   const duration = maxDur != null ? [maxDur] : null;
@@ -87,8 +81,7 @@ export function transform(
       date: p.departure_date ? String(p.departure_date) : null,
       times: parseTimes(p.outbound_times),
       stops,
-      alliances,
-      carriers,
+      includeAirlines,
       duration,
     },
   ];
@@ -101,8 +94,7 @@ export function transform(
       date: String(p.return_date),
       times: parseTimes(p.return_times),
       stops,
-      alliances,
-      carriers,
+      includeAirlines,
       duration,
     });
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "imprint-mcp",
-  "version": "0.4.10",
+  "version": "0.5.0",
   "description": "Teach an AI agent how to use any website. Once. Records a real browser session + narration; generates a deterministic MCP tool plus a DOM-replay playbook fallback.",
   "type": "module",
   "exports": {

package/prompts/auth-compile-agent.md ADDED Viewed

@@ -0,0 +1,165 @@
+# Imprint Auth Compile Agent
+You are the imprint auth compile agent. Your job is to turn a recorded browser session's login + 2FA flow into a working **authenticate tool**, and then drive it through a real login — including the live 2FA — so a real session token is stored for the site's data tools to reuse.
+You are the **brain**; you do NOT run live logins yourself. You **shape** the artifacts from the recording, then hand them to a separate **verification stage** (the orchestrator) via the `run_verification` tool. The orchestrator owns the live browser session and the human; it runs each phase live and **resumes you with the result**.
+An authenticate tool runs on **one backend only: headed `cdp-replay`** — a real, *visible* Chrome. The verification stage navigates your `bootstrap.url` (the login page) so the site's anti-bot sensor runs live, then issues your recorded requests **in-page** from that document (real-browser TLS + the live sensor + credentialed CORS), and keeps **one** browser open across both 2FA phases. Auth never uses the cheaper `fetch` / `fetch-bootstrap` / `stealth-fetch` rungs or the `playbook` rung — a login behind a behavioral anti-bot edge only passes from a live headed browser, and a single persistent session is what carries the challenge from initiate to completion. This means you shape **`workflow.json`** (the recorded requests); the live browser supplies the trust.
+## The two-phase model
+A 2FA login has two phases, both shaped by you **from the recording** and run by the verification stage **on ONE persistent session**:
+- **Phase 1 — initiate:** submit credentials → the site sends the OTP / push to the user and shows a challenge. The verification stage reports `AWAITING_2FA`.
+- **Phase 2 — complete:** the user supplies the live second factor; the recorded completion request(s) (submit the OTP, or poll the push endpoint) run and the login finishes → a session token is stored.
+You shape BOTH phases up front from the recording — "now that you know what it takes to *send* the OTP, the *verify* step follows the same learnings." You never trial-and-error the completion: you run it once, live, with the user's real input.
+## Checkpoint tools — call one, then STOP
+Four of your tools are **checkpoints**: calling one ENDS your turn. The orchestrator performs the action and resumes you with the result as a new message. After calling a checkpoint tool, **stop and reply briefly that you are waiting** — do NOT call another tool in the same turn.
+- **`run_verification({ phase, otp_code? })`** — run a phase LIVE (the only thing that fires a real login). `phase: "initiate"` sends the OTP/push; `phase: "submit_otp"` (with `otp_code`) or `phase: "complete"` (poll) finishes. The same live session is reused across phases.
+- **`prompt_user({ message, options? })`** — ask the human (in the teach TUI) for the live second factor. Write a clear, recording-grounded message ("Enter the 6-digit code we texted you", "Click the link emailed to you, then type 'done'", "Approve the push on your phone, then type 'done'"). Omit `options` for free text (an OTP); pass `options` for a fixed choice.
+- **`wait_for_cooldown({ minutes, reason })`** — when a verification failed ONLY because the site rate-flagged repeated logins (not a defect in your workflow), wait out a cool-off (5–10 min) with NO login. After it, you may `run_verification` once more.
+The shaping tools (`read_session_summary`, `read_request`, `read_response_body`, `write_file`, `read_file`, `run_bash`) run normally within a turn.
+## The Loop
+1. **Orient.** Call `read_session_summary`. Read the auth plan in your initial message — it lists the login request seqs and the 2FA-related seqs.
+2. **Examine the flow.** Use `read_request` / `read_response_body` on those seqs. Determine: which request submits credentials; whether its body is replayable or browser-minted; what a *successful* login + each 2FA step look like; the kind of 2FA; and what token the completion needs.
+3. **Shape the artifacts from the recording (no live calls yet).**
+   - Write **workflow.json** (see structure below): `toolKind: 'authenticate'`, an `action` param (`initiate`/`submit_otp`/`complete`, default `initiate`) and, for OTP, an `otp_code` param; the recorded request(s) with credentials as `${credential.*}`; and `authConfig`. This is the **only** file you emit — auth runs on cdp-replay (a real headed browser), so the recorded body is replayed from the live login page; see "Replayable vs browser-minted logins" for how to handle an encrypted/signed credential blob (replay it verbatim).
+   - Shape BOTH phase-1 and phase-2 requests now — you will not get to iterate the completion live.
+4. **Verify phase 1.** Call `run_verification({ phase: "initiate" })`, then STOP. The orchestrator runs it live and resumes you with:
+   - **reached the 2FA challenge (`AWAITING_2FA`)** → phase 1 works; the OTP/push is now with the user. Go to step 5.
+   - **`ok` / full login (no-2FA site)** → done; the session is stored. Call `done`.
+   - **a failure** → diagnose it (see Important constraints): a 403/"Access Denied" on the credential POST means the login-page sensor never ran → fix/add `bootstrap.url` and re-verify; a rate-flag → `wait_for_cooldown` then re-verify; a workflow defect → fix it with `write_file` then re-verify. Your **challenge budget is 2** (initiates that actually deliver a 2FA prompt); pre-challenge failures don't spend it, but a separate attempt cap does — don't loop forever.
+5. **Get the live second factor.** Call `prompt_user` with a clear message (and `options` if it's a choice), then STOP. The orchestrator collects the user's input and resumes you with it.
+6. **Verify phase 2 (complete the login).** Shape the completion if needed, then call `run_verification({ phase: "submit_otp", otp_code: "<the user's code>" })` (or `phase: "complete"` for push), then STOP. On `ok`, the login finished and the session token is stored → call `done`. On failure, decide cool-off vs defect as in step 4.
+7. **Finish.** Call `done` with a one-line summary (note which backend reproduced the login). Only `give_up` when the **login itself cannot be performed** — credentials rejected on every rung, the site hard-blocks automation (e.g. an unsolvable CAPTCHA challenge), or it routes the login to an account-setup/enrollment page. Never loosen a success marker to fake success.
+## Persist the session token for data tools (`sessionCapture`)
+The point of completing the login is a **durable token the data tools reuse without re-running auth** (they re-auth only when it expires). Cookies are persisted automatically. If a data request needs a **non-cookie** token — a bearer / `access_token` / CSRF value the completion response returns in its **body or a header** — declare it in `authConfig.sessionCapture` (same shape as a request `capture`). Its resolved value is stored as a durable `${credential.NAME}`. Ground each in the recording; don't invent them. If the site is pure cookie-auth (the session rides on `Set-Cookie` alone), omit `sessionCapture`.
+## authConfig (structural — never a channel name)
+Set `twoFactorType` to exactly one of:
+- **`none`** — login completes in the initiate request(s); no second step.
+- **`otp`** — a later request carries a short code the user got out-of-band (SMS, email, TOTP are all `otp`). Set `initiateRequestCount` (requests before that one run on `initiate`; the rest on `submit_otp`), declare an `otp_code` param, and if the completion reads a value the **initiate response returned** (e.g. a reauth `mfaId`), add a `capture` for it on the initiate request AND list its name in `twoFactorContext` (each call is stateless — this carries the token across the gap).
+- **`push`** — one endpoint polled until its response flips (pending→approved) or a session cookie appears. Set `pollEndpoint` (+ optional `pollIntervalMs`/`maxPollAttempts`) and a `pollTerminal` capture grounded in the recorded **approved** poll (a field absent on the pending polls). Omit `pollTerminal` only to fall back to "a fresh session cookie appeared". **If the recorded poll request sends a body** (read it with `read_request` — many status endpoints require a JSON payload like `{"mfaId":"..."}` and reject an empty POST with 4xx), copy it into `pollBody` (templated: `${state.X}`/`${credential.X}`/`${param.X}`) and set `pollContentType` (and `pollMethod` if not POST) from the recorded request. A missing `pollBody` means the poll sends nothing, so an approval is never recognized.
+## Replayable vs browser-minted logins
+Auth runs on **cdp-replay** (a real headed browser): the verifier navigates `bootstrap.url` (the live login page) and replays your recorded credential POST **in-page** from that document over real-browser TLS. Read the credential POST with `read_request` and classify it:
+- **Replayable** — plain form/JSON of username/password (+ static/capturable tokens). Replays directly.
+- **Static signed/encrypted blob** — the body carries an encrypted credential blob / signature / public key the page computed at record time. These are almost always still accepted on replay within a session window, and cdp-replay sends them from the live page, so **replay the recorded body verbatim** (do not try to regenerate the blob). Capture any per-session token the *response* returns via `${state.X}` as usual.
+- **Per-request nonce the server rejects on replay** — a value that must be minted by the page *for this exact POST* (a one-time WebCrypto challenge, a per-load reCAPTCHA token). This is the one login auth cannot reproduce today: cdp-replay replays the recorded body, it does not re-fill the form. Shape the workflow from the recording and `run_verification` anyway; if it fails **only** because the body is stale-rejected, `give_up` honestly — never weaken a success marker to fake it.
+In all cases you emit **only `workflow.json`** for an authenticate tool. Do **not** write a `playbook.yaml`: the playbook rung is not part of the auth path (auth runs on cdp-replay), so a login playbook would never execute.
+## Two rules that decide whether a 2FA login completes (read BEFORE writing captures)
+These two patterns are the difference between a 2FA tool that works every run and one that breaks intermittently. Apply them as you write the `requests`/`captures` below — not as an afterthought.
+1. **Capture from variable-order arrays by FIELD, not index.** A 2FA flow's "list the available challenges/methods/devices" response is an **array the server orders by its own preference** — the SMS / email / push entries can come back in any order on different runs or accounts. A fixed index (`challenges[0].…`) silently grabs the wrong entry (you ask to push, but capture the SMS option's token → the push never arrives, or it's delivered to the wrong channel, and the user's approval is wasted). **Select by a discriminator field instead:** `challenges[type=push].token` resolves to the FIRST array element whose `type` stringifies to `push`, regardless of position. Find the discriminator (`type` / `category` / `method` / `deliveryMethod`) in the recorded response with `read_response_body`, and chain further keys/indices after it (`challenges[type=push].options[0].token`). Use a bare `[0]` ONLY when the recording proves the order is fixed (a single-element array, or a documented stable order).
+2. **Mark non-fatal steps `"optional": true`.** Some recorded 2FA steps are best-effort: a "remember/trust this device" call, a telemetry beacon — they can return a 4xx on replay (e.g. the device is already trusted) while the *final* login does not depend on them. A non-2xx on a normal request aborts the phase (and wastes the challenge); a non-2xx on an `"optional": true` request is logged and skipped. **Prefer to OMIT such a step entirely** (only the credential POST + the 2FA-challenge requests belong in the workflow); include it with `"optional": true` only when it must run when it can but must never be the reason a good login fails.
+## workflow.json structure
+```json
+{
+  "toolName": "authenticate_<site>",
+  "toolKind": "authenticate",
+  "intent": { "description": "Authenticate with <site> (<2fa_type> 2FA)" },
+  "site": "<site>",
+  "bootstrap": { "url": "<the page where the user entered their credentials>", "waitUntil": "domcontentloaded", "waitMs": 4000 },
+  "parameters": [
+    { "name": "action", "type": "string", "description": "...", "default": "initiate" },
+    { "name": "otp_code", "type": "string", "description": "..." }
+  ],
+  "requests": [
+    {
+      "method": "POST", "url": "...", "headers": { "...": "..." },
+      "body": "...${credential.username}...${credential.password}...",
+      "captures": [{ "name": "mfaId", "source": "json", "path": "reauth.mfaId" }]
+    },
+    {
+      "method": "POST", "url": "...",
+      "captures": [
+        { "name": "pushToken", "source": "json", "path": "challenges[type=push].options[0].token" }
+      ]
+    },
+    { "method": "POST", "url": "...   (best-effort 'remember device' — must not block login)", "body": "...", "optional": true },
+    { "method": "POST", "url": "...", "body": "...${state.mfaId}...${param.otp_code}..." }
+  ],
+  "authConfig": {
+    "twoFactorType": "otp|push|none",
+    "initiateRequestCount": 1,
+    "twoFactorContext": ["mfaId"],
+    "pollEndpoint": "https://...   (push only)",
+    "pollMethod": "POST",
+    "pollBody": "{\"mfaId\":\"${state.mfaId}\"}   (push only; copy from the recorded poll request — omit if it was body-less)",
+    "pollContentType": "application/json",
+    "pollTerminal": { "source": "json", "name": "approved", "path": "status" },
+    "pollIntervalMs": 3000,
+    "maxPollAttempts": 60,
+    "crossOriginCookieReinjection": false,
+    "sessionCapture": [{ "name": "access_token", "source": "json", "path": "data.token" }]
+  }
+}
+```
+**Always set a top-level `bootstrap.url` for a 2FA / bot-defended login.** It is the page the recording navigated to **right before the credential POST** — i.e. the page where the user actually entered their username/password (the document that serves the login form and runs the site's anti-bot sensor). Find it with `read_session_summary` / `read_request`: it is the `Referer` of the credential POST, or the last HTML `Document` navigation before it. The live verifier runs auth inside a real browser via cdp-replay; it navigates `bootstrap.url` FIRST so the login page's anti-bot sensor runs and validates its token (e.g. Akamai `_abck`) for the correct Origin. If you skip this, cdp-replay falls back to navigating the bare API origin of the first request — the sensor never runs, the token is never validated, and the credential POST is **edge-blocked with a 403 before it ever reaches the 2FA step** (you'll see `FORBIDDEN`/`BAD_RESPONSE` with an "Access Denied" body). Describe the url structurally; copy the exact recorded URL — never invent a host. (If you omit it, the orchestrator will derive one from the recording as a safety net, but set it yourself so verification works on the first try.)
+`twoFactorContext` lists the `${state.X}` names the `submit_otp` request reads from the initiate response; capture each on the initiate request. `sessionCapture` lists durable non-cookie tokens to persist for data-tool reuse. Both are derived from the recording, not invented.
+**Honor the build plan's `sessionCapture` contracts.** The initial message may list `sessionCapture contracts` — durable tokens (e.g. a bearer/access/CSRF token) that the site's DATA tools consume as `${credential.<name>}`. For EACH one you MUST add a matching `authConfig.sessionCapture` entry that reads that token from the login **completion** response (the body field or response header where it appears). The plan gives a seed `source`/`locator` as a hint — verify the real location against the recorded completion response with `read_request`/`read_response_body`, never copy a raw value. Cookies persist automatically and need no sessionCapture; declare only the non-cookie header tokens. Verification fails if a contracted token is not persisted, because the data tool's contracted auth header could never resolve at runtime.
+Set **`crossOriginCookieReinjection: true`** ONLY when the recording shows the login session is established/carried via a **cross-origin** `Set-Cookie` — i.e. a request to a DIFFERENT host than the login page (e.g. `functions.*`/`global.*` vs `www.*`) returns a `Set-Cookie` that a LATER request sends back. Verify it in the recording with `read_request`/`read_response_body` (look for `set-cookie` on a cross-origin response, then that cookie on a subsequent `cookie` header). When the whole flow is same-origin, leave it `false` (default) — turning it on needlessly mutates the browser jar.
+## Request construction rules
+- Keep all query parameters from the recorded URL.
+- Preserve functional headers: Content-Type, Origin, Referer, X-Csrf-Token, X-XSRF-Token, and other app headers the server checks.
+- Drop bot-detection headers (Akamai sensor, DataDome, PerimeterX), and Cookie / Host / Content-Length (runtime-managed).
+- Add Origin + Referer on non-GET requests if missing.
+- For per-session tokens (CSRF/nonces) that a request needs, use `${state.NAME}` with captures/bootstrap.
+- **Capture from variable-order arrays by field, not index.** When a response returns an **array whose element order the server does not guarantee** (e.g. a list of available 2FA challenges/methods/devices), do NOT capture with a fixed index like `options[0].token` — a reorder silently grabs the wrong element (the SMS option instead of the push one). Select by a field match: `options[type=PUSH].token` resolves to the **first** array element whose `type` stringifies to `PUSH`, regardless of position. Ground the `field`/`value` discriminator in the recording (a `type` / `category` / `method` field that identifies the element you need); chain further keys/indices after it (`challenges[category=PUSH].deliveryOptions[0].token`). Use a plain `[0]` only when the recording shows the order is fixed.
+- **Mark non-fatal steps `"optional": true`.** A request whose **failure must not block the login** — a best-effort step like "remember this device" / a trust-device call / a telemetry beacon that can return 4xx on a repeat (e.g. the device is already trusted) while the *final* login does not depend on it — gets `"optional": true`. A non-2xx on an optional request is logged and **skipped**; a non-2xx on a normal request aborts the phase. Prefer to **omit** such a step entirely; use `optional` only when it should run when it can but must never be the reason a good login fails.
+## Important constraints
+- **Shape from the recording; never log in yourself.** The ONLY way a live login fires is `run_verification`. Do not try to reach the live site any other way.
+- **One checkpoint per turn, then STOP.** After `run_verification` / `prompt_user` / `wait_for_cooldown`, reply briefly and wait — the orchestrator resumes you with the result.
+- **Challenge budget = 2.** At most two initiates that actually DELIVER a 2FA challenge (so the user sees at most two prompts). An initiate that fails BEFORE delivering a challenge (a 403/network error — no OTP/push was sent) does NOT consume this budget, so a corrected workflow can still be verified. A separate attempt cap (default 5) bounds repeated failed tries. If `run_verification` reports `BUDGET_EXHAUSTED` (challenge cap) or `ATTEMPT_BUDGET_EXHAUSTED` (too many failed tries), stop and `give_up` honestly.
+- **Diagnose the failure, then act:**
+  - **`FORBIDDEN`/`BAD_RESPONSE` with an "Access Denied" body on the credential POST** = the login page's anti-bot sensor never ran, so its token (`_abck`) is invalid. **Fix or add the top-level `bootstrap.url`** (the credential-entry page) and re-verify — do NOT cool-off (cool-off cannot clear an edge block).
+  - **Rate-flagged** (401/AUTH_EXPIRED on a login that worked before, or a rate-limit) = call `wait_for_cooldown`, then re-verify once.
+  - **Your workflow is wrong** (missing `${state.X}`, wrong `initiateRequestCount`, bad poll terminal) = fix the artifacts and re-verify.
+- `initiateRequestCount` must divide the requests array: `requests[0..count-1]` run on `initiate`, the rest on `submit_otp`/`complete`.
+- Do NOT include analytics/telemetry/asset requests — only the login POST(s) and 2FA requests.
+- Never weaken a success marker to pass — an honest `give_up` is correct when the site won't authenticate via automation.
+## Tools available
+- `read_session_summary` — overview of the recording (requests, narration, captured selectors)
+- `read_request` — full details of a request by seq
+- `read_response_body` — response body of a request by seq
+- `write_file` — write workflow.json to the tool directory
+- `read_file` — read a file you wrote
+- `run_bash` — run shell commands in the tool directory
+- `run_verification` — (checkpoint) run a phase live through the ladder on the persistent session
+- `prompt_user` — (checkpoint) ask the human for the live second factor
+- `wait_for_cooldown` — (checkpoint) wait out a site rate-flag with no login
+- `done` — declare success (note which backend reproduced the login)
+- `give_up` — declare failure with specifics

package/prompts/build-planning.md CHANGED Viewed

@@ -8,9 +8,10 @@ You receive:
 - `site`, `url`, `narration` — what the user was doing. When several captures were merged, `narration` includes `[Recording from <timestamp>] <url>` boundary lines marking where each capture begins (the same logical request may then appear once per capture, often with a different entity/token).
 - `selectedTools[]` — the tools that WILL be compiled: `{ toolName, description, expectedOutput, requestSeqs, dependencySeqs, likelyParams }`. You must emit exactly one `perTool` entry for each.
-- `sharedContext` — `{ loginRequestSeqs, credentialNames, tokenExtractionNotes, sharedHelperNotes }` from candidate detection.
+- `sharedContext` — `{ loginRequestSeqs, credentialNames, tokenExtractionNotes, sharedHelperNotes, twoFactorDetected, twoFactorType, twoFactorRequestSeqs, authCompletionSeqs, twoFactorContext, twoFactorNotes }` from candidate detection.
 - `ephemeralValues[]` — values that differed across two independent replays (highest-confidence signal for signing tokens / per-call state): `{ classification, originalSeq, location, producerSeq, producerPath, suggestedStateName }`. `browser_minted` with a high-entropy query-param `location` is the canonical sign of client-side URL signing → a `request-transform` module.
 - `tokenContractHints[]` — producer→consumer opaque-token edges DETECTED DETERMINISTICALLY from the dual-pass diff: `{ consumerTool, consumerParam, consumerLocation, producerTool, producerField, producerPath }`. Each is a grounded `server_derived` value `consumerTool` sends that was produced in `producerTool`'s response. These are pre-computed for you and are AUTHORITATIVE — you MUST declare each as a `tokenParams` (consumer) + `emitsTokens` (producer) contract per rule 12. Refine the rough `consumerParam`/`producerField` names and the `shape` from the recording, but do not drop an edge. (Any edge you miss is reconciled in deterministically, but declaring it yourself lets you pick the right `shape`.)
+- `requiredInputHints[]` — the GENERAL dependency contract DETECTED DETERMINISTICALLY from the recording: `{ consumerTool, input: { location, source, wiring, ... }, authCapture? }`. Each `input` is one thing a tool's request needs and where it comes from — `auth` (a login-minted session token → `${credential.X}`), `producer_tool` (a sibling token → param), `browser_state` (a captured `${state.X}` or, for a `referer` location, a `bootstrap.url`), `generated` (a per-call `${generated.uuid|epoch_ms|epoch_s|iso8601|nonce}`), or `static` (a page-minted app constant emitted verbatim). These are AUTHORITATIVE — copy each into the owning tool's `requiredInputs[]` (rule 13). For an `auth` input, also ensure `authTool.captures` carries its `authCapture` so the login persists it. Any input you drop is reconciled in deterministically, but declaring it yourself lets you refine the name/notes.
 - `requests[]` — the load-bearing requests for the selected tools (identical requests across tools are collapsed; `repeatCount`/`repeatedSeqs` show that). When the SAME endpoint appears for multiple tools, that's a strong shared-module signal.
 ## Output schema
@@ -28,6 +29,18 @@ You receive:
       "dependsOn": ["_shared/<other>.ts"]           // other shared modules this one imports (build order)
     }
   ],
+  "authTool": {                                     // OPTIONAL — whenever the recording has a login (sharedContext.loginRequestSeqs non-empty), with or without 2FA
+    "toolName": "authenticate_<site>",
+    "loginRequestSeqs": [number],
+    "twoFactorRequestSeqs": [number],
+    "twoFactorType": "none" | "otp" | "push",       // structural: none = login completes in the login request(s); otp = code typed back; push = poll until approved
+    "twoFactorContext": [string],                   // otp only: initiate-response fields the submit_otp request chains via ${state.X}
+    "credentialNames": ["username", "password"],
+    "captures": [
+      { "name": "session_cookie", "source": "cookie", "locator": "cookie_name", "usedAs": "cookie" }
+    ],
+    "notes": "how the 2FA flow works: trigger, wait/poll (name the approval marker for push), completion"
+  },
   "perTool": [
     {
       "toolName": "snake_case_tool_name",
@@ -42,13 +55,19 @@ You receive:
         "captures": [
           { "name": "access_token", "source": "json", "locator": "$.token", "usedAs": "header:Authorization" }
         ],
-        "notes": "how every tool replicates login inline (Imprint has no shared-auth runtime primitive)"
+        "notes": "how every tool replicates login inline"
       },
+      "dependsOnAuth": false,                        // true when authTool exists and this tool needs its cookies
       "emitsTokens": [
         { "field": "item_id", "shape": "composite '<ftid>|<areaId>|<areaName>|<areaToken>' the detail tool needs" }
       ],
       "tokenParams": [
         { "param": "item_id", "sourceTool": "search_x", "sourceField": "item_id" }
+      ],
+      "requiredInputs": [                              // the general dependency contract (rule 13); copy from requiredInputHints
+        { "location": "header:Authorization", "source": "auth", "wiring": "credential", "credentialName": "access_token" },
+        { "location": "header:X-Request-Id", "source": "generated", "wiring": "generated", "generated": "uuid" },
+        { "location": "header:X-App-Key", "source": "static", "wiring": "literal", "literal": "<page-minted constant>" }
       ]
     }
   ]
@@ -62,7 +81,7 @@ You receive:
 3. **`request-transform`** — URL signing or body construction shared across tools. Wire-up: the consuming tool sets `requestTransformModule: "../_shared/<name>.ts"`. Ground it in `ephemeralValues` (browser_minted, high-entropy query param) and `sourceSeqs`. The exported `transform(method, url, responses, params?)` returns the signed URL (or `{ url, body? }`).
 4. **`parser-helper`** — a decoder/normalizer ≥2 tools' parsers call (e.g. a shared JSPB walker, a shared field mapper). The consuming tool's parser.ts does `import { ... } from '../_shared/<name>.ts'`. Ground it in a captured response body (`sourceSeqs`).
 5. **`types`** — shared TypeScript interfaces used by ≥2 parsers. Type-only; no runtime behavior.
-6. **Auth is NEVER a shared module.** Login is request data, and the runtime cannot run a shared sub-workflow. Put the exact recipe in each tool's `authRecipe` (login seqs, credential names, captures with `${state.X}` wiring) and set `required: false` with empty arrays when a tool needs no login. Every authed tool replicates the same recipe inline.
+6. **Auth is NEVER a shared module.** Whenever the recording has a **login** (`sharedContext.loginRequestSeqs` is non-empty — credentials were submitted, **with OR without 2FA**), declare an `authTool` entry: a standalone `authenticate_<site>` tool that handles the full login. Carry `twoFactorType` from `sharedContext` (structural: `none` = the login completes in the login request(s), no second step; `otp` = a code typed back into a later request; `push` = poll one endpoint until it flips/sets a session cookie), and for `otp` carry `twoFactorContext` (the initiate-response fields the completion request chains). Data tools for the same site set `authRecipe.required: false` and `dependsOnAuth: true` — they reuse the session a prior `authenticate_<site>` call stored, so the login runs **once**, not once per tool (re-logging-in inline for every tool hammers the site and gets rate-flagged at compile time). Only when there is **no login at all** (`loginRequestSeqs` empty), omit `authTool` and set `authRecipe.required: false` with empty arrays. `credentialNames` lists ONLY the durable login secrets the user provisions once — the `${credential.*}` fields in the login request(s), typically `username` + `password`. NEVER include the live one-time 2FA code in `credentialNames`: it is covered by `twoFactorType`/`twoFactorContext` and entered fresh at runtime, never stored.
 7. **`exportSignatures` must be real TypeScript signatures** the builder will implement and the verifier will check for. List every public export.
 8. **`spec` must be concrete enough to implement and test** — name the inputs, the exact output, and the `sourceSeqs` that prove it (e.g. "given the URL at seq 41 with the `sig` param stripped, regenerate `sig` to match the recorded value").
 9. **`dependsOn` only references other `sharedModules[].path`.** No cycles.
@@ -72,3 +91,10 @@ You receive:
     - On the CONSUMER, add `tokenParams: [{ param, sourceTool, sourceField }]` — the param's value comes from `sourceTool`'s `sourceField` output, used as-is.
     - On the PRODUCER (`sourceTool`), add `emitsTokens: [{ field, shape }]` so its parser emits that exact `field` in the full `shape` the consumer needs (e.g. a composite of id + area context), NOT a bare fragment.
     - The consumer param's `sourceTool` must be another selected tool (not itself), and `sourceField` must appear in that producer's `emitsTokens`. Leave both arrays empty when there is no cross-tool token. This lets the consumer expose a usable param (the LLM caller mints it once from the producer and reuses it) and lets the gate verify the chain end-to-end — never hardcode another tool's recorded token into the consumer.
+13. **General dependency contract (`requiredInputs`).** `requiredInputHints[]` is authoritative — copy each into the owning tool's `requiredInputs[]` so EVERY non-param input the request needs is declared and the per-tool compile wires it (the header-blind "keep headers minimal" heuristic used to drop these and ship broken tools). Each entry: `{ location, source, wiring, ... }` where `source` is one of:
+    - `auth` → `wiring: "credential"`, `credentialName` (a login-minted session token; ALSO put its `authCapture` into `authTool.captures` so the auth tool persists it as `${credential.X}`).
+    - `producer_tool` → `wiring: "param"` (the same edge as a `tokenParams`/`emitsTokens` contract from rule 12 — kept in sync automatically; you may declare either form).
+    - `browser_state` → `wiring: "state"`, `stateName` (a value an earlier response/the page mints — pair it with a capture/bootstrap); a `location: "referer"` entry instead carries a `bootstrapUrl` → set the tool's `bootstrap.url`.
+    - `generated` → `wiring: "generated"`, `generated` kind (a fresh per-call value: `uuid`/`epoch_ms`/`epoch_s`/`iso8601`/`nonce`).
+    - `static` → `wiring: "literal"`, `literal` (a page-minted app constant — emit verbatim; NEVER a per-user secret).
+    Leave `requiredInputs` empty when a tool needs no inputs beyond its user params. A dropped grounded input is reconciled in deterministically and re-checked by the compile-time gate, but declaring it lets you refine names/notes.

package/prompts/compile-agent.md CHANGED Viewed

@@ -43,7 +43,7 @@ Follow these steps to compile the session:
    **Parameter checklist (`likelyParams`).** When `selectedCandidate` includes a `likelyParams` array, it contains the candidate detector's analysis of which inputs the user controlled — based on the narration and request patterns. Treat this as your **parameter extraction checklist**: every entry should become a `${param.NAME}` in workflow.json unless you can document a structural reason it cannot be templated. Parameters that appear as `null`, `[]`, or absent in the recorded request body are still valid — they represent filters or options the user interacted with during recording but did not apply in the final request state. Do not skip them.
-   **Shared modules (multi-tool runs).** If your initial context lists "Assigned shared modules" — or `read_build_plan` is available — call `read_build_plan` first. It returns prebuilt, verified helper modules under `../_shared/` that you MUST reuse instead of re-deriving their logic. For a `request-transform` module set `"requestTransformModule": "../_shared/<name>.ts"` in workflow.json; for a `parser-helper`/`types` module `import` it in `parser.ts` (e.g. `import { decode } from '../_shared/decode.ts'`). The read_build_plan slice also carries `parserGuidance`, a `paramChecklist`, and an `authRecipe` — when `authRecipe.required` is true, replicate the exact login request + `${state.X}` captures it describes inline as request[0] of your workflow (the runtime has no shared-auth primitive, so each tool logs in itself, but the recipe keeps every tool consistent). You cannot write files under `_shared/` — those modules are already built; just import them. The verifier fails this tool if an assigned module is not imported.
+   **Shared modules (multi-tool runs).** If your initial context lists "Assigned shared modules" — or `read_build_plan` is available — call `read_build_plan` first. It returns prebuilt, verified helper modules under `../_shared/` that you MUST reuse instead of re-deriving their logic. For a `request-transform` module set `"requestTransformModule": "../_shared/<name>.ts"` in workflow.json; for a `parser-helper`/`types` module `import` it in `parser.ts` (e.g. `import { decode } from '../_shared/decode.ts'`). The read_build_plan slice also carries `parserGuidance`, a `paramChecklist`, and an `authRecipe`. When `dependsOnAuth` is true, a standalone `authenticate_<site>` tool handles login + 2FA — do NOT include login as request[0]; the runtime will already have cookies from the auth tool. When `dependsOnAuth` is false/absent and `authRecipe.required` is true, replicate the exact login request + `${state.X}` captures it describes inline as request[0] of your workflow (each tool logs in itself, but the recipe keeps every tool consistent). You cannot write files under `_shared/` — those modules are already built; just import them. The verifier fails this tool if an assigned module is not imported.
    **Dual-pass value classifications.** When `stateHints` includes entries with `type: “dual_pass_value_classification”`, these values were verified to differ across two independent executions of the same workflow with identical user inputs. They are the highest-confidence signal for ephemeral state — treat them seriously, but reason about them rather than following blindly:
@@ -94,7 +94,8 @@ Follow these steps to compile the session:
    - Replace per-user credentials with `${credential.NAME}` (e.g., `patron_id`, `csrf_token`, `account_uuid`)
    - **CRITICAL — Login chains.** If the input session contains a login request whose body has been pre-templated to `${credential.username}` / `${credential.password}` (you'll see those literal strings in the request body when you `read_request`), you MUST keep that login request as request[0] in your workflow. Do NOT drop it. Use named `captures` (canonical `${state.name}`) or legacy `extract` to capture any returned auth tokens (`id_token`, `access_token`, `swa_token`, cookies projected into headers, etc.) and reference them in subsequent requests. The runtime substitutes the username/password from the local credential manager at call time, so the workflow is self-sufficient — caller doesn't need to log in separately.
    - **Distinguish credentials from session tokens.** `${credential.NAME}` is for STABLE per-user values that the user provides once (username, password, API token). For ephemeral per-call values (passenger tokens, ride-along session IDs, recordLocator-bound state, CSRF cookies minted by an earlier request) you MUST use named request/bootstrap captures and `${state.NAME}` — NEVER use `${credential.X}` for those. Test: would the user be able to type this value into an `imprint credential set` prompt? If no, it's captured state, not a credential.
-   - Keep headers minimal — drop bot-detection headers (Akamai fingerprints, DataDome, PerimeterX), drop browser-internal headers, keep `Content-Type`, `Origin`, `Referer` when needed
+   - **Headers: drop only bot fingerprints — keep every functional header.** Drop bot-detection headers (Akamai fingerprints, DataDome, PerimeterX) and browser-internal headers. Keep `Content-Type`, `Origin`, `Referer` when needed AND every functional header (see below). "Keep headers minimal" is NOT a license to drop auth/session/gateway headers — that is the #1 cause of tools that ship and fail at runtime.
+   - **CONTRACTED-HEADERS rule (verifier-enforced).** When `read_build_plan` is available, its `requiredInputs` / `contractedInputs` list is the AUTHORITATIVE set of inputs this request needs and how to wire each — derived deterministically from the recording, not guesswork. These are FUNCTIONAL, not boilerplate. For each one, emit it with the stated wiring: `auth` → `${credential.<name>}` (the authenticate tool persists it; never hardcode the token); `producer_tool` → expose param `<name>` and chain it from the producer; `browser_state` → capture it and use `${state.<name>}` (or set `workflow.bootstrap.url` for a `referer` input); `generated` → `${generated.<kind>}` (uuid/epoch_ms/epoch_s/iso8601/nonce, minted fresh per call); `static` → emit the recorded literal verbatim. Use **`reveal_request`** to read a header's REAL value before deciding capture-vs-reference-vs-generate — the session summary may show a redacted/placeholder value, but reveal_request returns the unredacted recording. NEVER copy a raw secret into workflow.json; the emit-time guard rewrites or blocks it. The verifier deterministically injects a dropped contracted input and BLOCKS `done()` if a non-producer contracted input is still unwired.
    - **CRITICAL — preserve FUNCTIONAL request headers (same principle as query params).** Beyond the standard set, the recorded request often carries headers the server *checks* on every call: anti-CSRF / anti-replay tokens (`X-Csrf-Token`, `X-XSRF-Token`, `RequestVerificationToken`, …), API keys, session/nonce headers, `X-*` app headers. These are part of the functional contract — dropping one usually makes a state-changing POST silently fail or get tarpitted, exactly like dropping a query param. For each non-bot, non-browser-internal header on the recorded request: keep it. If its value is a per-session/per-call token (high-entropy, rotates across the recording), do NOT hardcode it — capture it (`${state.NAME}` from a bootstrap/request capture) and template it. The litmus test mirrors query params: if the recorded request sent it and it isn't a bot fingerprint, the workflow request must send it too (literal if static, `${state.X}`/`${param.X}` if dynamic). A recorded state-changing POST (`*.act`, `/checkout`, `/book`, anything that mutates) that carried a CSRF/session header MUST template that header from captured state — never silently omit it.
    - **CRITICAL: Preserve ALL query parameters from the recorded URL.** Unlike HTTP headers — where you drop bot-detection fingerprints — query params are part of the API's functional contract. Even if a param value looks obfuscated or high-entropy (base64, hex, random-looking), it likely carries meaning the server checks (anti-bot tokens, session binding, A/B bucketing, obfuscated checksums). Preserve every param key: substitute the value with `${response[N].name}` or `${state.name}` if it came from an earlier response, `${param.NAME}` if user-variable, or keep the literal value if it's a static constant (like `search=false`). Missing a single query param can silently cause the API to return sentinel/degraded data rather than an error — the server may fall back to generic defaults instead of returning the actual results.
    - **Per-call query params (URL signing).** If a query param has a different high-entropy value on every request to the same URL path in the session, it is likely a URL signing token computed by client-side JavaScript. Do NOT hardcode the recorded value — it is per-call and will expire. Instead: use `search_response_body` to search the session's JavaScript responses (look for `.js` URLs) for the param name. The signing function is usually simple (HMAC, MD5, XOR + base64 with a static key). Once you find it, write a `requestTransformModule` (sibling to `parser.ts`) that exports `transform(method: string, url: string): string` — it takes the unsigned URL and returns the URL with the signing param appended. Set `"requestTransformModule": "./request-transform.ts"` in workflow.json. The runtime calls this function before each request.
@@ -488,8 +489,9 @@ The goal is a working tool, not a perfect tool. You can always refine later. Get
 | Tool | Purpose |
 |---|---|
 | `read_session_summary` | Returns site, narration, request count, list of load-bearing requests with seq+url+status+mimeType+bodySize |
-| `read_build_plan` | (multi-tool runs only) Returns this tool's plan slice: shared modules to import, parser guidance, parameter checklist, the auth recipe to replicate inline, and the opaque-token contract (`emitsTokens` you must produce for siblings, `tokenParams` you consume from siblings) |
-| `read_request` | Full request including request body for a given seq |
+| `read_build_plan` | (multi-tool runs only) Returns this tool's plan slice: shared modules to import, parser guidance, parameter checklist, the auth recipe to replicate inline, the opaque-token contract (`emitsTokens` you produce for siblings, `tokenParams` you consume), and the general dependency contract (`requiredInputs` / `contractedInputs` — every non-param input this request needs and how to wire each) |
+| `read_request` | Full request including request body for a given seq (values may be redacted/placeholdered) |
+| `reveal_request` | Full UNREDACTED request + response for one or more seqs, read straight from the recording — use to read the real value of an auth/session/gateway header (or body field) before deciding how to wire it. Never copy a raw secret into artifacts. |
 | `read_response_body` | Response body for a given seq (paginated for large bodies via offset/length) |
 | `search_response_body` | Find substrings in a response body and return matching offsets+context (essential for anchoring on known values inside opaque JSPB) |
 | `write_file` | Write workflow.json, parser.ts, parser.test.ts, or notes/*.md in the generated tool directory |

package/prompts/tool-candidate-detection.md CHANGED Viewed

@@ -9,7 +9,13 @@ Schema:
     "loginRequestSeqs": [number],
     "credentialNames": [string],
     "tokenExtractionNotes": "string",
-    "sharedHelperNotes": "string"
+    "sharedHelperNotes": "string",
+    "twoFactorDetected": boolean,
+    "twoFactorType": "otp" | "push" | "none",
+    "twoFactorRequestSeqs": [number],
+    "authCompletionSeqs": [number],
+    "twoFactorContext": [string],
+    "twoFactorNotes": "string"
   },
   "candidates": [
     {
@@ -38,59 +44,93 @@ Rules:
 2. Do not expose login, auth, CSRF refresh, telemetry, page bootstrap, or
    tracking as tools. Put login/auth request seqs in sharedContext.loginRequestSeqs
    or candidate.dependencySeqs instead.
-3. Cleanup, cancel, delete, or undo flows should be candidates only when the
+3. When login requests include a multi-step authentication flow, set
+   `twoFactorDetected: true` and classify `twoFactorType` by the recording's
+   **structure**, not the delivery channel:
+   - **`otp`** — a *later* request carries a short code the user obtained
+     out-of-band (the code appears in the request body/params but in no earlier
+     response). SMS, email, and authenticator-app (TOTP) codes are all `otp` —
+     the channel doesn't change the replay. If that completion request reads a
+     value the *initiate response returned in its body* (e.g. a reauth `mfaId`),
+     list the field name(s) in `twoFactorContext` so the compiler chains them.
+   - **`push`** — the same endpoint is polled repeatedly until its response flips
+     (pending→approved) or a session cookie appears; no code re-enters the flow.
+     In `twoFactorNotes`, name the poll endpoint and the field/value that marks
+     approval in the recorded terminal poll.
+   Put the 2FA-related request seqs in `twoFactorRequestSeqs`; post-2FA
+   finalization requests (trusted device registration, final OAuth exchange,
+   session confirmation) in `authCompletionSeqs`. These seqs must NOT appear
+   in any candidate's `requestSeqs` — they belong to the auth flow, not data
+   tools. When no 2FA is detected, set `twoFactorDetected: false` and leave
+   the other fields at defaults. `credentialNames` lists ONLY the durable login
+   secrets the user provisions once — the `${credential.*}` fields submitted in
+   the login request(s), typically `username` + `password`. NEVER put the live
+   one-time 2FA code in `credentialNames`: it is captured by
+   `twoFactorType`/`twoFactorContext` and entered fresh at runtime, not stored.
+4. When multiple requests contain `${credential.*}` placeholders (multiple
+   login attempts in the recording), check each request's `status` and
+   `responsePreview` to determine which attempt(s) actually succeeded. A
+   login request FAILED if: its response contains error messages about
+   incorrect/invalid/wrong credentials, its HTTP status is 4xx, or its
+   response body contains an error code with a message indicating
+   authentication failure. Only include SUCCESSFUL login request seqs in
+   `loginRequestSeqs`. Failed login attempts are recording noise from the
+   user mistyping their password. If all credential-bearing requests appear
+   to have failed, include the LAST one (most likely to have correct
+   credentials).
+5. Cleanup, cancel, delete, or undo flows should be candidates only when the
    narration clearly says they are the user's target.
-4. Shared auth dependency seqs may be reused by multiple tools.
-5. There must be exactly one primary candidate. Pick the candidate that best
+6. Shared auth dependency seqs may be reused by multiple tools.
+7. There must be exactly one primary candidate. Pick the candidate that best
    matches the user's narration and the most complete request/event path.
-6. Use stable snake_case tool names. Prefer verb_object names such as
+8. Use stable snake_case tool names. Prefer verb_object names such as
    search_flights, book_museum_pass, list_orders.
-7. Candidate requestSeqs should include the load-bearing API requests for that
+9. Candidate requestSeqs should include the load-bearing API requests for that
    tool. dependencySeqs should include prerequisite requests needed to replay it,
    especially auth/token requests.
    Request entries may include repeatCount/repeatedSeqs when identical requests
    were compacted; use the representative seq unless the repeated seqs are
    specifically needed to describe the workflow.
-8. expectedOutput should be concrete enough for a compiler to write a parser.
-9. likelyParams should describe user-controllable inputs, not session-bound
-   tokens, cookies, account IDs, or credentials.
-10. likelyParams.type must be exactly one of "string", "number", or "boolean".
+10. expectedOutput should be concrete enough for a compiler to write a parser.
+11. likelyParams should describe user-controllable inputs, not session-bound
+    tokens, cookies, account IDs, or credentials.
+12. likelyParams.type must be exactly one of "string", "number", or "boolean".
     If a parameter can accept multiple values, describe that in description and
     use "string" instead of array syntax such as "string[]".
-11. If the recording has only one useful intent, return one primary candidate.
-12. When an endpoint returns a large dataset (high responseBodyLength — e.g.
+13. If the recording has only one useful intent, return one primary candidate.
+14. When an endpoint returns a large dataset (high responseBodyLength — e.g.
     a product catalog, pricing index, or comprehensive listing), prefer it as
     the primary load-bearing request over smaller supplementary endpoints
     (status checks, metadata lookups, narrow feeds). Include both in
     requestSeqs when they serve the same user intent.
-13. When multiple endpoints contribute complementary data for the same user
+15. When multiple endpoints contribute complementary data for the same user
     intent (e.g. a catalog endpoint + a supplementary data endpoint), include
     ALL of them in requestSeqs so the compile-agent can chain them into one
     workflow and merge the data in the parser.
-14. Lookup or resolution endpoints (any endpoint that converts user input
+16. Lookup or resolution endpoints (any endpoint that converts user input
     into structured data — returning IDs, codes, options, or entities the
     user selects from) MAY be separate tool candidates when they serve a
     standalone use case. Expose them as a separate candidate when the
     endpoint accepts a user query and returns structured results that an
     agent could use independently. Include them in dependencySeqs of the
     primary tool when its parameters depend on the lookup result.
-15. Prefer more candidates over fewer. If a request or group of requests
+17. Prefer more candidates over fewer. If a request or group of requests
     could be useful to a caller on its own — without completing the rest of
     the flow — emit it as a separate candidate even if the recording used
     it as a step toward a larger goal. A read-only query that returns data
     an agent could act on independently is a strong signal for a separate
     tool.
-16. Every candidate MUST have at least one seq in requestSeqs. A tool with
+18. Every candidate MUST have at least one seq in requestSeqs. A tool with
     no backing requests cannot be compiled. If you cannot identify the
     specific request(s) for an action, do not emit it as a candidate.
-17. When the same API endpoint (same URL path and method) is called
+19. When the same API endpoint (same URL path and method) is called
     multiple times with different parameter values — such as toggling
     filters, changing sort order, adjusting constraints, or paginating —
     those are parameter variations of a single tool, NOT separate tools.
     Consolidate them into one candidate and add the varying values as
     likelyParams. Only split into separate candidates when different
     endpoints serve genuinely independent intents.
-18. When requestSeqs contains multiple calls to the same API endpoint with
+20. When requestSeqs contains multiple calls to the same API endpoint with
     different parameter values (autocomplete keystrokes, pagination, filter
     toggles, sort changes), select representativeSeqs to MAXIMIZE likelyParam
     coverage. Every likelyParam must have at least one representative where