npm - imprint-mcp - Versions diffs - 0.2.1 → 0.3.0 - Mend

imprint-mcp 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

package/README.md +165 -201
package/examples/discoverandgo/README.md +1 -1
package/examples/echo/README.md +1 -1
package/examples/google-flights/README.md +28 -0
package/examples/google-flights/_shared/batchexecute.ts +63 -0
package/examples/google-flights/_shared/flights_request.ts +95 -0
package/examples/google-flights/_shared/package.json +9 -0
package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
package/examples/google-flights/get_flight_booking_details/package.json +9 -0
package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
package/examples/google-flights/get_flight_calendar_prices/workflow.json +76 -0
package/examples/google-flights/lookup_airport/index.ts +101 -0
package/examples/google-flights/lookup_airport/package.json +9 -0
package/examples/google-flights/lookup_airport/parser.ts +66 -0
package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
package/examples/google-flights/lookup_airport/workflow.json +57 -0
package/examples/google-flights/search_flights/index.ts +219 -0
package/examples/google-flights/search_flights/package.json +9 -0
package/examples/google-flights/search_flights/parser.ts +169 -0
package/examples/google-flights/search_flights/playbook.yaml +184 -0
package/examples/google-flights/search_flights/request-transform.ts +119 -0
package/examples/google-flights/search_flights/workflow.json +143 -0
package/examples/google-hotels/README.md +29 -0
package/examples/google-hotels/_shared/batchexecute.ts +73 -0
package/examples/google-hotels/_shared/freq.ts +158 -0
package/examples/google-hotels/_shared/package.json +9 -0
package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
package/examples/google-hotels/search_hotels/index.ts +207 -0
package/examples/google-hotels/search_hotels/package.json +9 -0
package/examples/google-hotels/search_hotels/parser.ts +260 -0
package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
package/examples/google-hotels/search_hotels/workflow.json +127 -0
package/package.json +3 -2
package/prompts/audit-agent.md +71 -0
package/prompts/build-planning.md +74 -0
package/prompts/compile-agent.md +131 -27
package/prompts/prereq-builder.md +64 -0
package/prompts/prereq-planner.md +34 -0
package/prompts/tool-planning.md +39 -0
package/src/cli.ts +109 -2
package/src/imprint/agent.ts +5 -0
package/src/imprint/audit.ts +996 -0
package/src/imprint/backend-ladder.ts +1214 -184
package/src/imprint/build-plan.ts +1051 -0
package/src/imprint/cdp-browser-fetch.ts +589 -0
package/src/imprint/cdp-jar-cache.ts +320 -0
package/src/imprint/chromium.ts +135 -0
package/src/imprint/claude-cli-compile.ts +125 -25
package/src/imprint/codex-cli-compile.ts +26 -23
package/src/imprint/compile-agent-types.ts +38 -0
package/src/imprint/compile-agent.ts +63 -25
package/src/imprint/compile-tools.ts +1656 -64
package/src/imprint/compile.ts +13 -1
package/src/imprint/concurrency.ts +87 -0
package/src/imprint/cron.ts +1 -0
package/src/imprint/doctor.ts +39 -0
package/src/imprint/freeform-redact.ts +5 -4
package/src/imprint/integrations.ts +2 -2
package/src/imprint/llm.ts +56 -8
package/src/imprint/mcp-compile-server.ts +43 -10
package/src/imprint/mcp-maintenance.ts +9 -101
package/src/imprint/mcp-server.ts +73 -7
package/src/imprint/multi-progress.ts +7 -2
package/src/imprint/param-grounding.ts +367 -0
package/src/imprint/paths.ts +29 -0
package/src/imprint/playbook-runner.ts +101 -40
package/src/imprint/prereq-builder.ts +651 -0
package/src/imprint/probe-backends.ts +6 -3
package/src/imprint/record.ts +10 -1
package/src/imprint/redact.ts +30 -2
package/src/imprint/replay-capture.ts +19 -18
package/src/imprint/runtime.ts +19 -10
package/src/imprint/session-diff.ts +79 -2
package/src/imprint/session-merge.ts +9 -5
package/src/imprint/stealth-chromium.ts +81 -0
package/src/imprint/stealth-fetch.ts +309 -29
package/src/imprint/stealth-token-cache.ts +88 -0
package/src/imprint/teach-plan.ts +251 -0
package/src/imprint/teach-state.ts +10 -0
package/src/imprint/teach.ts +456 -142
package/src/imprint/tool-candidates.ts +72 -14
package/src/imprint/tool-plan.ts +313 -0
package/src/imprint/tracing.ts +135 -6
package/src/imprint/types.ts +61 -3
package/examples/google-flights/search_google_flights/index.ts +0 -101
package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
package/examples/google-flights/search_google_flights/parser.ts +0 -189
package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
package/examples/google-flights/search_google_flights/workflow.json +0 -48
package/examples/google-hotels/search_google_hotels/index.ts +0 -194
package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97

package/prompts/prereq-planner.md ADDED Viewed

@@ -0,0 +1,34 @@
+You are the PLANNER for ONE shared TypeScript module that several generated tools (compiled from the same browser recording of one site) will import. A second agent will WRITE the module + its test by following your plan. Your job is to remove the guesswork before any code exists: decode the recorded data, fix the algorithm, and call out every strict-typing hazard up front. A precise plan is what makes the implementation pass on the first attempt instead of burning verification cycles.
+## Input
+You receive `{ site, url, module, availableDependencies, sources }`:
+- `module` — `{ path, kind, purpose, exportSignatures, spec, dependsOn }`. The implementer must produce exactly these exports.
+- `sources[]` — recorded requests that ground the behavior: `{ seq, method, url, requestHeaders, requestBody, status, mimeType, responseBody }`. These are the ground truth — decode them, do not guess.
+- `availableDependencies[]` — already-built shared modules this one may import.
+## Output
+Return a concise **Markdown** plan — no JSON, and do not wrap the whole response in a code fence. Use exactly these sections:
+### Data shape
+Decode the ACTUAL recorded `sources`. State the precise shape the module operates on and where the target data lives. When the body is an RPC envelope — an anti-XSSI guard (e.g. `)]}'`), length-prefixed frames, `["wrb.fr", "<rpcid>", "<payload>"]` rows, or doubly/triply-encoded JSON strings — give the exact unwrapping steps AND a decoded sample with real indices (e.g. "strip the first line; each frame is `<len>\n<json>`; in the `wrb.fr` row, element [2] is a JSON string → `JSON.parse` it → the hotel name is at `[0][1][3]`, the price at `[0][1][7][0]`"). For a `request-transform`, identify the signing/dynamic param, its position in the recorded URL/body, and the apparent algorithm (HMAC/MD5/CRC32/base64/etc.) inferred from the recording.
+### Algorithm
+Step by step, what each export in `exportSignatures` does to turn the recorded input into the required output. Name exact fields and indices. Ground every step in `sources`.
+### Typing hazards
+The module is typechecked with `tsc` under `strict` + `noUncheckedIndexedAccess`, as a gate SEPARATE from the test (a passing test still fails the build on a type error). Enumerate the specific spots that yield `T | undefined` — indexed access (`arr[i]`), regex captures (`re.exec(s)` → `m[1]`, `s.match(re)` → `m[1]`), and split results (`s.split(d)[n]`) — and the exact guard or assertion to use at each (`const m = re.exec(s); if (!m?.[1]) return …`, or `m[1]!` when the structure guarantees presence). Be exhaustive: this is the single most common reason implementations fail.
+### Test plan
+Which recorded `seq` to load (from `module.sourceSeqs`) and the concrete recorded values to assert — at least 3 meaningful assertions on real data, no tautologies. For a `request-transform`, name the param to strip and re-sign and the exact expected value from the recording.
+### Risks
+Ambiguities, multiple plausible interpretations, or anything the recording doesn't fully pin down — each with your best-guess resolution so the implementer isn't blocked.
+## Rules
+1. Ground everything in the provided `sources`. Decode real values; never invent fields the recording doesn't show.
+2. No production code — pseudocode, field paths, and exact type-guard snippets only. The implementer writes the module.
+3. Be specific and concise. Skip generic advice; every line should be something the implementer couldn't trivially infer from the signatures alone.

package/prompts/tool-planning.md ADDED Viewed

@@ -0,0 +1,39 @@
+You are the PLANNER for ONE MCP tool that a second agent will COMPILE from a browser recording of a single site. The tool replays the site's API: it takes typed parameters, issues one or more HTTP requests, and parses the response into a structured result. Your job is to remove the guesswork before any code exists — map every parameter to the exact recorded field, fix how each request is constructed and signed, and pin down exactly where the result data lives in the response. A precise plan is what makes the compile pass on the first attempt instead of burning verification cycles.
+## Input
+You receive `{ site, url, tool, sharedContext?, planGuidance?, assignedModules, requests }`:
+- `tool` — `{ toolName, description, expectedOutput, likelyParams, requestSeqs, dependencySeqs }`. The compiled tool must expose these parameters and produce `expectedOutput`. `likelyParams` are the detector's best guess — confirm or correct each against the recorded requests.
+- `planGuidance?` — present when a global build plan ran first: `{ parserGuidance, paramChecklist, authRecipe, loadBearingSeqs }` for THIS tool. Treat it as prior guidance and reconcile it with the recorded data; if the recording contradicts it, prefer the recording and say so.
+- `assignedModules[]` — verified shared modules this tool MUST import instead of re-implementing: `{ path, kind, importPath, exportSignatures, purpose }`. A `request-transform` module reproduces the site's request signing/construction; a `parser-helper` extracts data from the response. Reference each by its exact `importPath`.
+- `requests[]` — the recorded requests in scope for this tool: `{ seq, method, url, headers, body, status, mimeType, responsePreview, ... }`. These are the ground truth — decode them, do not guess. `responsePreview` is truncated; note where the full body must be read.
+## Output
+Return a concise **Markdown** plan — no JSON, and do not wrap the whole response in a code fence. Use exactly these sections:
+### Parameters
+For EACH tool parameter, name the exact recorded field it maps to: the query-string key, JSON body path, header, or path segment in a specific recorded `seq`, with the recorded value as evidence (e.g. "`origin` → query param `from` in seq 12, recorded value `SFO`"). Flag any `likelyParam` that does not appear in the recording (it may be derived, optional, or wrong) and state your resolution. Note defaults where the recording shows one.
+For EACH parameter that should influence the request, also emit a short **verification anchor**: the recorded `seq`(s) that demonstrate that parameter's effect, and the exact request location it controls — the field name, array index, or position the compiler must reproduce (e.g. "anchor: seq 12 query `from=SFO`, seq 19 query `from=LAX` — controls query key `from`"; for positional/array bodies, give the index). For a parameter that selects among request variants, give the anchor seq for each variant so the compiler wires the parameter to drive the variation rather than hardcoding one variant. The anchor is what lets the compiler verify, before finishing, that the constructed request reproduces the parameter's encoding instead of advertising a parameter it never applies.
+**Hard rule: a parameter with no recorded anchor must not be exposed.** If you cannot point to at least one recorded `seq` and exact location demonstrating a parameter's effect, do not list it as a tool parameter — note it under Edge cases as dropped-for-lack-of-evidence and why. A narrower tool that does exactly what it advertises beats one exposing a parameter that nothing in the recording can verify.
+### Requests
+The request(s) the tool issues, in order: method, URL (with which parts are templated from parameters vs constant), body shape, and required headers. If a value is signed or dynamically constructed and an `assignedModules` `request-transform` covers it, say to call that module by its `importPath` rather than re-deriving the algorithm. If there is no assigned module, describe the construction/signing from the recording. Note dependency requests (`dependencySeqs`) that must run first to mint a token or id, and what they produce.
+### Response parsing
+The exact location of the result data: the `seq` whose response carries it, the precise JSON path(s) to the array/object, and the per-item fields to extract for `expectedOutput`. If the body is an RPC envelope (anti-XSSI prefix, length-prefixed frames, doubly-encoded JSON strings), give the exact unwrapping steps. If an `assignedModules` `parser-helper` covers this, say to call it by its `importPath` and what it returns.
+### Shared modules
+The verbatim `importPath` of every module in `assignedModules` the tool must import, each with one line on what it provides. If `assignedModules` is empty, write "none".
+### Edge cases
+Empty results, optional parameters omitted, pagination, error/zero-result responses, and any value the recording doesn't fully pin down — each with your best-guess resolution so the compiler isn't blocked.
+## Rules
+1. Ground every mapping in the provided `requests`. Decode real recorded values; do NOT invent fields absent from the recorded data.
+2. No production code — field paths, exact indices, and the `importPath` to call only. The compiler writes the tool.
+3. Be specific and concise. Skip generic advice; every line should be something the compiler couldn't trivially infer from the parameter names alone.

package/src/cli.ts CHANGED Viewed

@@ -7,7 +7,7 @@ import { parseArgs } from 'node:util';
 import { IS_COMPILED_BINARY } from './imprint/is-compiled.ts';
 import type { ProviderName } from './imprint/llm.ts';
 import { isDebug } from './imprint/log.ts';
-import { shutdownTracing, traced } from './imprint/tracing.ts';
+import { shutdownTracing, tracedWithCostRollup } from './imprint/tracing.ts';
 import { VERSION } from './imprint/version.ts';
 /** Load .env from the project root (next to src/) if present.
@@ -66,6 +66,7 @@ RUN
   mcp-server <site>        Serve one site's tools as MCP (stdio default).
   cron <site>              Polling daemon for ~/.imprint/<site>/<toolName>/cron.json.
   playbook <site>          Run a playbook directly (debugging).
+  audit <site>             Exercise every generated tool and score it (≥95% gate).
 OTHER
   doctor                   Check that the environment is set up correctly.
@@ -352,6 +353,35 @@ export const VERB_HELP: Record<string, VerbHelp> = {
     ],
     example: 'imprint mcp-server southwest',
   },
+  audit: {
+    summary:
+      "Drive a headless agent against a site's MCP tools, exercise each one, and compute a deterministic accuracy score. Verdicts come from the agent; the score is computed by imprint.",
+    usage: [
+      'imprint audit <site> [--min-score <n>] [--out <path>] [--model <name>] [--timeout <duration>] [--json]',
+    ],
+    flags: [
+      {
+        name: '--min-score <n>',
+        description: 'Pass threshold as a percentage of gradeable invocations (default 95).',
+      },
+      {
+        name: '--out <path>',
+        description:
+          'Where to write the JSON report (default ~/.imprint/<site>/.audit-report.json).',
+      },
+      {
+        name: '--model <name>',
+        description: 'Override the auditor model (default Opus via claude-cli).',
+      },
+      {
+        name: '--timeout <duration>',
+        description:
+          'Audit-session wall-clock cap. Accepts 20m, 1h, 300s, or plain ms. Default 45m.',
+      },
+      { name: '--json', description: 'Print the machine-readable report to stdout.' },
+    ],
+    example: 'imprint audit google-flights --min-score 95',
+  },
   mcp: {
     summary:
       'Audit, disable, re-enable, and delete Imprint MCP registrations and stale teach state.',
@@ -916,6 +946,75 @@ async function main(argv: string[]): Promise<number> {
       return 0;
     }
+    case 'audit': {
+      const site = requirePositional(argv, 'audit', 'a <site> argument');
+      if (site === null) return 2;
+      const { values } = parseArgs({
+        args: argv.slice(2),
+        options: {
+          'min-score': { type: 'string' },
+          out: { type: 'string' },
+          model: { type: 'string' },
+          timeout: { type: 'string' },
+          json: { type: 'boolean' },
+        },
+        allowPositionals: false,
+      });
+      let minScore = 95;
+      if (values['min-score'] !== undefined) {
+        const parsed = Number(values['min-score']);
+        if (!Number.isFinite(parsed) || parsed < 0 || parsed > 100) {
+          console.error(
+            `error: invalid --min-score "${values['min-score']}"\n→ use a number between 0 and 100`,
+          );
+          return 2;
+        }
+        minScore = parsed;
+      }
+      let auditTimeoutMs: number | undefined;
+      if (values.timeout) {
+        auditTimeoutMs = parseDuration(values.timeout) ?? undefined;
+        if (auditTimeoutMs === undefined) {
+          console.error(
+            `error: invalid --timeout "${values.timeout}"\n→ use format: 20m, 1h, 300s, or plain milliseconds`,
+          );
+          return 2;
+        }
+      }
+      const { runAudit } = await import('./imprint/audit.ts');
+      const { localAuditReportPath } = await import('./imprint/paths.ts');
+      const outPath = values.out ?? localAuditReportPath(site);
+      const score = await tracedWithCostRollup(
+        'cli.audit',
+        'AGENT',
+        {
+          'imprint.site': site,
+          'imprint.min_score': minScore,
+          'imprint.model': values.model ?? 'auto',
+        },
+        () =>
+          runAudit({
+            site,
+            minScore,
+            outPath,
+            model: values.model,
+            timeoutMs: auditTimeoutMs,
+            json: values.json,
+          }),
+      );
+      // Exit codes distinguish the outcomes: 0 pass, 1 fail (fix the code),
+      // 2 inconclusive (the site blocked us), 3 timeout (audit didn't finish).
+      // (if-chain rather than switch: a `case '<word>':` here would be misread as
+      // a CLI verb by the verb/help drift-guard test.)
+      if (score.verdict === 'pass') return 0;
+      if (score.verdict === 'fail') return 1;
+      if (score.verdict === 'timeout') return 3;
+      return 2;
+    }
     case 'cron': {
       const site = requirePositional(argv, 'cron', 'a <site> argument');
       if (site === null) return 2;
@@ -1138,7 +1237,7 @@ async function main(argv: string[]): Promise<number> {
       try {
         const { teach } = await import('./imprint/teach.ts');
-        await traced(
+        await tracedWithCostRollup(
           'cli.teach',
           'AGENT',
           {
@@ -1195,6 +1294,8 @@ async function main(argv: string[]): Promise<number> {
           'example-dir': { type: 'string' },
           'candidate-json': { type: 'string' },
           'shared-context-json': { type: 'string' },
+          'build-plan-path': { type: 'string' },
+          'shared-modules-json': { type: 'string' },
         },
         allowPositionals: false,
       });
@@ -1209,17 +1310,23 @@ async function main(argv: string[]): Promise<number> {
       const { ToolCandidateSchema, SharedCompileContextSchema } = await import(
         './imprint/tool-candidates.ts'
       );
+      const { SharedModuleManifestSchema } = await import('./imprint/build-plan.ts');
       const candidate = values['candidate-json']
         ? ToolCandidateSchema.parse(JSON.parse(values['candidate-json']))
         : undefined;
       const sharedContext = values['shared-context-json']
         ? SharedCompileContextSchema.parse(JSON.parse(values['shared-context-json']))
         : undefined;
+      const sharedModules = values['shared-modules-json']
+        ? SharedModuleManifestSchema.parse(JSON.parse(values['shared-modules-json']))
+        : undefined;
       await runCompileMcpServer({
         sessionPath: values['session-path'],
         toolDir,
         candidate,
         sharedContext,
+        buildPlanPath: values['build-plan-path'],
+        sharedModules,
       });
       return 0;
     }

package/src/imprint/agent.ts CHANGED Viewed

@@ -77,6 +77,9 @@ interface AgentLoopOptions {
   llm: ToolUseProvider;
   /** called before each LLM call and tool dispatch with structured progress */
   onProgress?: (p: AgentProgress) => void;
+  /** called after each turn with the full conversation log so far, so callers
+   *  can flush incrementally (e.g. write .compile-log.json to disk). */
+  onConversationUpdate?: (log: ConversationLogEntry[]) => void;
   /** called when the wall-clock deadline is reached; return ms to extend or null to time out */
   onDeadlineReached?: OnDeadlineReached;
 }
@@ -461,6 +464,8 @@ export async function runAgentLoop(opts: AgentLoopOptions): Promise<AgentResult>
       },
     );
+    opts.onConversationUpdate?.(conversationLog);
     if (turnOutcome.action === 'return') return turnOutcome.result;
     // Loop continues...