npm - imprint-mcp - Versions diffs - 0.2.1 → 0.3.1 - Mend

imprint-mcp 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (130) hide show

package/README.md +193 -189
package/examples/discoverandgo/README.md +1 -1
package/examples/echo/README.md +1 -1
package/examples/google-flights/README.md +28 -0
package/examples/google-flights/_shared/batchexecute.ts +63 -0
package/examples/google-flights/_shared/flights_request.ts +95 -0
package/examples/google-flights/_shared/package.json +9 -0
package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
package/examples/google-flights/get_flight_booking_details/package.json +9 -0
package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
package/examples/google-flights/get_flight_calendar_prices/workflow.json +78 -0
package/examples/google-flights/lookup_airport/index.ts +101 -0
package/examples/google-flights/lookup_airport/package.json +9 -0
package/examples/google-flights/lookup_airport/parser.ts +66 -0
package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
package/examples/google-flights/lookup_airport/workflow.json +57 -0
package/examples/google-flights/search_flights/index.ts +219 -0
package/examples/google-flights/search_flights/package.json +9 -0
package/examples/google-flights/search_flights/parser.ts +169 -0
package/examples/google-flights/search_flights/playbook.yaml +184 -0
package/examples/google-flights/search_flights/request-transform.ts +119 -0
package/examples/google-flights/search_flights/workflow.json +143 -0
package/examples/google-hotels/README.md +29 -0
package/examples/google-hotels/_shared/batchexecute.ts +73 -0
package/examples/google-hotels/_shared/freq.ts +158 -0
package/examples/google-hotels/_shared/package.json +9 -0
package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
package/examples/google-hotels/search_hotels/index.ts +207 -0
package/examples/google-hotels/search_hotels/package.json +9 -0
package/examples/google-hotels/search_hotels/parser.ts +260 -0
package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
package/examples/google-hotels/search_hotels/workflow.json +127 -0
package/examples/southwest/README.md +3 -2
package/examples/southwest/search_southwest_flights/index.ts +18 -1
package/examples/southwest/search_southwest_flights/workflow.json +18 -1
package/package.json +3 -2
package/prompts/audit-agent.md +71 -0
package/prompts/build-planning.md +74 -0
package/prompts/compile-agent.md +131 -27
package/prompts/prereq-builder.md +64 -0
package/prompts/prereq-planner.md +34 -0
package/prompts/tool-planning.md +39 -0
package/src/cli.ts +116 -3
package/src/imprint/agent.ts +5 -0
package/src/imprint/audit.ts +996 -0
package/src/imprint/backend-ladder.ts +1214 -184
package/src/imprint/build-plan.ts +1051 -0
package/src/imprint/cdp-browser-fetch.ts +592 -0
package/src/imprint/cdp-jar-cache.ts +320 -0
package/src/imprint/chromium.ts +414 -8
package/src/imprint/claude-cli-compile.ts +125 -25
package/src/imprint/codex-cli-compile.ts +26 -23
package/src/imprint/compile-agent-types.ts +38 -0
package/src/imprint/compile-agent.ts +63 -25
package/src/imprint/compile-tools.ts +1666 -66
package/src/imprint/compile.ts +13 -1
package/src/imprint/concurrency.ts +87 -0
package/src/imprint/cron.ts +4 -0
package/src/imprint/doctor.ts +48 -3
package/src/imprint/freeform-redact.ts +5 -4
package/src/imprint/install.ts +79 -4
package/src/imprint/integrations.ts +3 -3
package/src/imprint/llm.ts +56 -8
package/src/imprint/mcp-compile-server.ts +43 -10
package/src/imprint/mcp-maintenance.ts +18 -102
package/src/imprint/mcp-server.ts +73 -7
package/src/imprint/multi-progress.ts +7 -2
package/src/imprint/param-grounding.ts +367 -0
package/src/imprint/paths.ts +29 -0
package/src/imprint/playbook-runner.ts +101 -40
package/src/imprint/prereq-builder.ts +651 -0
package/src/imprint/probe-backends.ts +6 -3
package/src/imprint/record.ts +10 -1
package/src/imprint/redact.ts +30 -2
package/src/imprint/replay-capture.ts +19 -18
package/src/imprint/runtime.ts +19 -10
package/src/imprint/session-diff.ts +79 -2
package/src/imprint/session-merge.ts +9 -5
package/src/imprint/stealth-chromium.ts +79 -0
package/src/imprint/stealth-fetch.ts +309 -29
package/src/imprint/stealth-token-cache.ts +88 -0
package/src/imprint/teach-plan.ts +251 -0
package/src/imprint/teach-state.ts +10 -0
package/src/imprint/teach.ts +456 -142
package/src/imprint/tool-candidates.ts +72 -14
package/src/imprint/tool-plan.ts +313 -0
package/src/imprint/tracing.ts +135 -6
package/src/imprint/types.ts +61 -3
package/examples/google-flights/search_google_flights/index.ts +0 -101
package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
package/examples/google-flights/search_google_flights/parser.ts +0 -189
package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
package/examples/google-flights/search_google_flights/workflow.json +0 -48
package/examples/google-hotels/search_google_hotels/index.ts +0 -194
package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97

package/src/imprint/tool-candidates.ts CHANGED Viewed

@@ -182,24 +182,54 @@ export async function detectToolCandidates(
         `detecting candidate tools from ${payload.events.length} event(s), ${payload.requests.length} request(s)…`,
       );
       const llm = resolveProvider(llmConfig ?? {});
-      const result = await llm.analyze(systemPrompt, payload);
-      const objectText = extractJsonObject(result.text);
-      if (!objectText) {
-        throw new Error(
-          `Candidate detector did not return a JSON object.\nRaw response:\n${result.text.slice(0, 1000)}`,
-        );
-      }
+      const runOnce = async (): Promise<{
+        detection: ToolCandidateDetection;
+        result: Awaited<ReturnType<typeof llm.analyze>>;
+      }> => {
+        const result = await llm.analyze(systemPrompt, payload);
+        const objectText = extractJsonObject(result.text);
+        if (!objectText) {
+          throw new Error(
+            `Candidate detector did not return a JSON object.\nRaw response:\n${result.text.slice(0, 1000)}`,
+          );
+        }
+        let parsed: unknown;
+        try {
+          parsed = JSON.parse(objectText);
+        } catch (err) {
+          throw new Error(
+            `Candidate detector response was not valid JSON: ${err instanceof Error ? err.message : String(err)}\nExtracted:\n${objectText.slice(0, 1000)}`,
+          );
+        }
+        return { detection: validateToolCandidateDetection(parsed), result };
+      };
-      let parsed: unknown;
-      try {
-        parsed = JSON.parse(objectText);
-      } catch (err) {
-        throw new Error(
-          `Candidate detector response was not valid JSON: ${err instanceof Error ? err.message : String(err)}\nExtracted:\n${objectText.slice(0, 1000)}`,
+      let { detection, result } = await runOnce();
+      // Anti-collapse guard: a single candidate from a session that hit multiple
+      // distinct endpoint families is almost always under-segmentation (the
+      // detector folded separate tools — e.g. search vs pricing vs autocomplete —
+      // into one). This is pure LLM variance; re-run once and keep the richer
+      // segmentation. Targeted so genuinely single-tool sites don't pay for it.
+      if (detection.candidates.length === 1 && distinctEndpointFamilies(payload) >= 2) {
+        log(
+          'detector returned 1 candidate but the session spans ≥2 endpoint families — re-running once to guard against under-segmentation…',
         );
+        try {
+          const retry = await runOnce();
+          if (retry.detection.candidates.length > detection.candidates.length) {
+            log(`retry segmented into ${retry.detection.candidates.length} candidates; using it`);
+            ({ detection, result } = retry);
+          } else {
+            log('retry did not segment further; keeping the original detection');
+          }
+        } catch (err) {
+          log(
+            `retry failed (${err instanceof Error ? err.message : String(err)}); keeping original`,
+          );
+        }
       }
-      const detection = validateToolCandidateDetection(parsed);
       setSpanAttributes(span, {
         'imprint.candidate_count': detection.candidates.length,
         'imprint.primary_tool_name': detection.candidates.find((c) => c.primary)?.toolName,
@@ -372,6 +402,33 @@ function candidateRequestGroupKey(request: CandidateRequestPayload): unknown[] {
   ];
 }
+/** Telemetry / beacon endpoints. These fire constantly during any real session
+ *  and are never the load-bearing request behind a user intent. Left in the
+ *  candidate payload they add noise that pushes the detector to under-segment,
+ *  and — worse — the detector can anchor a candidate's `requestSeqs` on one
+ *  (e.g. Google's `/log`), sending compile to reverse-engineer a beacon. Excluded
+ *  entirely. The boundary lookahead keeps `/login`, `/catalog`, etc. safe. */
+const TELEMETRY_PATH =
+  /\/(log|gen_204|jserror|ping|beacon|csi|batchlog|metrics|stats|collect|analytics|adsct|pagead|ccm)(?=$|[/?])/i;
+/** Count distinct endpoint families (batchexecute rpcid, else METHOD+path) that
+ *  carry a non-trivial number of requests. ≥2 means the session genuinely hit
+ *  multiple backends — a single detected candidate there signals under-
+ *  segmentation. */
+function distinctEndpointFamilies(payload: ToolCandidatePayload): number {
+  const counts = new Map<string, number>();
+  for (const r of payload.requests) {
+    const url = safeUrl(r.url);
+    if (!url) continue;
+    const rpc = /[?&]rpcids?=([^&]+)/.exec(url.search)?.[1];
+    const key = rpc ? `rpc:${decodeURIComponent(rpc)}` : `${r.method} ${url.pathname}`;
+    counts.set(key, (counts.get(key) ?? 0) + 1);
+  }
+  let families = 0;
+  for (const c of counts.values()) if (c >= 3) families++;
+  return families;
+}
 function isCandidateRequest(
   request: CapturedRequest,
   startRoot: string | null,
@@ -380,6 +437,7 @@ function isCandidateRequest(
   if (request.resourceType !== 'XHR' && request.resourceType !== 'Fetch') return false;
   const url = safeUrl(request.url);
   if (!url) return false;
+  if (TELEMETRY_PATH.test(url.pathname)) return false;
   if (startRoot && !isSameRegistrableDomain(url.hostname, startRoot)) {
     return appApiHosts.has(url.hostname);
   }

package/src/imprint/tool-plan.ts ADDED Viewed

@@ -0,0 +1,313 @@
+/**
+ * Per-tool planning pass for `imprint teach`.
+ *
+ * After the global shared-module plan + build (teach-plan.ts) runs once, each
+ * tool gets a thin planning stage before its compile (plan THEN execute): one
+ * `llm.analyze` pass that maps each parameter to its recorded field, fixes the
+ * request construction + response parsing, and names the shared modules to
+ * import. The Markdown plan rides the compile agent's initial prompt (via
+ * formatToolPlan), so the compile follows it instead of re-deriving structure.
+ *
+ * Best-effort throughout: a missing prompt, a timeout, or any LLM/IO error
+ * yields `undefined` and the compile proceeds exactly as before. Gated by
+ * IMPRINT_NO_TOOL_PLAN. Modeled on planSharedModule in prereq-builder.ts.
+ */
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
+import { join as pathJoin } from 'node:path';
+import {
+  BuildPlanSchema,
+  type SharedModuleManifestEntry,
+  planSliceForTool,
+  resolveAssignedModules,
+} from './build-plan.ts';
+import { withTimeout } from './concurrency.ts';
+import { type ProviderName, resolveProvider } from './llm.ts';
+import { loadJsonFile } from './load-json.ts';
+import { createLog } from './log.ts';
+import { localToolDir } from './paths.ts';
+import { compactRequestContexts, requestContextDigest } from './request-context.ts';
+import type { SharedCompileContext, ToolCandidate } from './tool-candidates.ts';
+import { setSpanAttributes, traced } from './tracing.ts';
+import { type Session, SessionSchema } from './types.ts';
+const PROMPTS_DIR = pathJoin(import.meta.dir, '..', '..', 'prompts');
+const log = createLog('tool-plan');
+/** Wall-clock cap on the per-tool planner LLM call. A throttled/hung provider
+ *  must not block the tool's compile; on timeout we degrade to compiling without
+ *  a plan (today's behavior). The shared-module plan is the 10-min one. */
+const TOOL_PLAN_TIMEOUT_MS = 5 * 60_000;
+const BODY_LIMIT = 800;
+const RESPONSE_PREVIEW_LIMIT = 500;
+const HEADER_LIMIT = 600;
+interface ToolPlanRequestPayload {
+  seq: number;
+  method: string;
+  url: string;
+  status?: number;
+  mimeType?: string;
+  headers: string;
+  body?: string;
+  bodyDigest?: string;
+  bodyLength?: number;
+  responsePreview?: string;
+  responseBodyDigest?: string;
+  responseBodyLength?: number;
+  repeatCount?: number;
+  repeatedSeqs?: number[];
+  lastTimestamp?: number;
+  timestamp: number;
+}
+interface ToolPlanAssignedModule {
+  path: string;
+  kind: string;
+  importPath: string;
+  exportSignatures: string[];
+  purpose: string;
+}
+interface ToolPlanPayload {
+  site: string;
+  url: string;
+  tool: {
+    toolName: string;
+    description: string;
+    expectedOutput: string;
+    likelyParams: ToolCandidate['likelyParams'];
+    requestSeqs: number[];
+    dependencySeqs: number[];
+  };
+  sharedContext?: SharedCompileContext;
+  /** Slice of the global build plan for this tool (when a build plan exists). */
+  planGuidance?: {
+    parserGuidance: string;
+    paramChecklist: string[];
+    authRecipe: unknown;
+    loadBearingSeqs: number[];
+  };
+  assignedModules: ToolPlanAssignedModule[];
+  requests: ToolPlanRequestPayload[];
+}
+/** Pure payload builder — unit-testable without an LLM. Filters requests to the
+ *  tool's relevant seqs (candidate seqs ∪ dependency seqs ∪ build-plan
+ *  loadBearingSeqs) and compacts them the same way build-plan.ts does. */
+export function buildToolPlanPayload(opts: {
+  session: Session;
+  candidate: ToolCandidate;
+  sharedContext?: SharedCompileContext;
+  buildPlan?: unknown;
+  sharedModules?: SharedModuleManifestEntry[];
+}): ToolPlanPayload {
+  const { session, candidate, sharedContext } = opts;
+  // Project the global build plan (if any) down to this tool's slice + the
+  // shared modules it was assigned.
+  let planGuidance: ToolPlanPayload['planGuidance'];
+  let assignedModules: ToolPlanAssignedModule[] = [];
+  let loadBearingSeqs: number[] = [];
+  if (opts.buildPlan) {
+    const parsed = BuildPlanSchema.safeParse(opts.buildPlan);
+    if (parsed.success) {
+      const plan = parsed.data;
+      const slice = planSliceForTool(plan, candidate.toolName);
+      if (slice) {
+        planGuidance = {
+          parserGuidance: slice.tool.parserGuidance,
+          paramChecklist: slice.tool.paramChecklist,
+          authRecipe: slice.tool.authRecipe,
+          loadBearingSeqs: slice.tool.loadBearingSeqs,
+        };
+        loadBearingSeqs = slice.tool.loadBearingSeqs;
+      }
+      assignedModules = resolveAssignedModules(plan, candidate.toolName, opts.sharedModules)
+        .filter((m) => m.verified)
+        .map((m) => ({
+          path: m.path,
+          kind: m.kind,
+          importPath: m.importPath,
+          exportSignatures: m.exportSignatures,
+          purpose: m.purpose,
+        }));
+    }
+  }
+  const scope = new Set<number>();
+  for (const s of candidate.requestSeqs) scope.add(s);
+  for (const s of candidate.dependencySeqs) scope.add(s);
+  for (const s of loadBearingSeqs) scope.add(s);
+  const requests = compactRequestContexts(
+    session.requests
+      .filter((r) => scope.has(r.seq))
+      .map((r) => ({
+        seq: r.seq,
+        timestamp: r.timestamp,
+        method: r.method,
+        url: r.url,
+        status: r.response?.status,
+        mimeType: r.response?.mimeType,
+        headers: truncate(JSON.stringify(r.headers), HEADER_LIMIT) ?? '{}',
+        body: truncate(r.body, BODY_LIMIT),
+        bodyDigest: requestContextDigest(r.body),
+        bodyLength: r.body?.length,
+        responsePreview: truncate(r.response?.body, RESPONSE_PREVIEW_LIMIT),
+        responseBodyDigest: requestContextDigest(r.response?.body),
+        responseBodyLength: r.response?.body?.length,
+      })),
+    toolPlanRequestGroupKey,
+  );
+  return {
+    site: session.site,
+    url: session.url,
+    tool: {
+      toolName: candidate.toolName,
+      description: candidate.description,
+      expectedOutput: candidate.expectedOutput,
+      likelyParams: candidate.likelyParams,
+      requestSeqs: candidate.requestSeqs,
+      dependencySeqs: candidate.dependencySeqs,
+    },
+    sharedContext,
+    planGuidance,
+    assignedModules,
+    requests,
+  };
+}
+function toolPlanRequestGroupKey(request: ToolPlanRequestPayload): unknown[] {
+  return [
+    request.method,
+    request.url,
+    request.bodyDigest,
+    request.bodyLength,
+    request.status,
+    request.mimeType,
+    request.responseBodyDigest,
+    request.responseBodyLength,
+  ];
+}
+/** Derive a per-tool implementation plan from the recording. Best-effort: any
+ *  error/timeout (or the IMPRINT_NO_TOOL_PLAN gate / a missing prompt) returns
+ *  undefined so the caller compiles without a plan (today's behavior). Persists
+ *  the plan to `~/.imprint/<site>/<toolName>/.tool-plan.md`. */
+export async function planToolCompile(opts: {
+  site: string;
+  toolName: string;
+  candidate: ToolCandidate;
+  sharedContext?: SharedCompileContext;
+  sessionPath: string;
+  buildPlanPath?: string;
+  sharedModules?: SharedModuleManifestEntry[];
+  providerName: ProviderName;
+  model?: string;
+}): Promise<string | undefined> {
+  if (toolPlanDisabled()) return undefined;
+  const promptPath = pathJoin(PROMPTS_DIR, 'tool-planning.md');
+  if (!existsSync(promptPath)) return undefined;
+  return await traced(
+    'teach.plan_tool',
+    'AGENT',
+    {
+      'imprint.site': opts.site,
+      'imprint.tool_name': opts.toolName,
+      'imprint.provider': opts.providerName,
+    },
+    async (span) => {
+      try {
+        const systemPrompt = readFileSync(promptPath, 'utf8');
+        const session = loadJsonFile(
+          opts.sessionPath,
+          SessionSchema,
+          {
+            notFound: 'session not found before tool planning',
+            badSchema: 'session file is malformed',
+          },
+          'session',
+        );
+        // Load the global build plan slice (if one exists) so the per-tool plan
+        // can carry the tool's parserGuidance/paramChecklist/authRecipe and the
+        // shared modules it was assigned.
+        let buildPlan: unknown;
+        if (opts.buildPlanPath && existsSync(opts.buildPlanPath)) {
+          try {
+            buildPlan = loadJsonFile(
+              opts.buildPlanPath,
+              BuildPlanSchema,
+              { notFound: 'build plan not found' },
+              'build plan',
+            );
+          } catch {
+            buildPlan = undefined;
+          }
+        }
+        const payload = buildToolPlanPayload({
+          session,
+          candidate: opts.candidate,
+          sharedContext: opts.sharedContext,
+          buildPlan,
+          sharedModules: opts.sharedModules,
+        });
+        const llm = resolveProvider({ provider: opts.providerName, model: opts.model });
+        const result = await withTimeout(
+          llm.analyze(systemPrompt, payload),
+          TOOL_PLAN_TIMEOUT_MS,
+          'tool planner',
+        );
+        const plan = stripCodeFences(result.text).trim();
+        if (plan.length === 0) {
+          setSpanAttributes(span, { 'imprint.tool_plan.skipped': true });
+          return undefined;
+        }
+        const toolDir = localToolDir(opts.site, opts.toolName);
+        mkdirSync(toolDir, { recursive: true });
+        writeFileSync(pathJoin(toolDir, '.tool-plan.md'), plan, 'utf8');
+        setSpanAttributes(span, {
+          'imprint.tool_plan.chars': plan.length,
+          'imprint.tool_plan.skipped': false,
+        });
+        log(`planned ${opts.toolName} (${plan.length} chars)`);
+        return plan;
+      } catch (err) {
+        setSpanAttributes(span, { 'imprint.tool_plan.skipped': true });
+        log(
+          `tool planning failed for ${opts.toolName} (${err instanceof Error ? err.message : String(err)}) — compiling without a plan`,
+        );
+        return undefined;
+      }
+    },
+  );
+}
+function toolPlanDisabled(): boolean {
+  const v = process.env.IMPRINT_NO_TOOL_PLAN;
+  return !!v && !['0', 'false', 'no', 'off'].includes(v.toLowerCase());
+}
+/** Unwrap a response whose entire body is a single Markdown code fence; leave
+ *  inline fences (snippets within the plan) untouched. Mirrors the helper in
+ *  prereq-builder.ts (not exported there). */
+function stripCodeFences(text: string): string {
+  const t = text.trim();
+  const m = /^```[a-zA-Z]*\n([\s\S]*?)\n```$/.exec(t);
+  return m?.[1] ?? t;
+}
+function truncate(s: string | undefined, limit: number): string | undefined {
+  if (!s) return undefined;
+  if (s.length <= limit) return s;
+  return `${s.slice(0, limit)}…(truncated, original length ${s.length})`;
+}

package/src/imprint/tracing.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import { AsyncLocalStorage } from 'node:async_hooks';
 import {
   MimeType,
   type NodeTracerProvider,
@@ -14,6 +15,26 @@ type TraceKind = OpenInferenceSpanKind | `${OpenInferenceSpanKind}`;
 type TraceAttributes = Record<string, unknown>;
 type TraceLlmMessage = { role?: string; content?: string };
+// ---------------------------------------------------------------------------
+// Cost accumulator — rolls up LLM costs from child spans to a parent span.
+// ---------------------------------------------------------------------------
+interface CostAccumulator {
+  inputTokens: number;
+  outputTokens: number;
+  cacheReadTokens: number;
+  cacheWriteTokens: number;
+  uncachedInputCost: number;
+  cacheReadCost: number;
+  cacheWriteCost: number;
+  completionCost: number;
+}
+const costAccumulatorStorage = new AsyncLocalStorage<CostAccumulator>();
+function getActiveCostAccumulator(): CostAccumulator | undefined {
+  return costAccumulatorStorage.getStore();
+}
 let provider: NodeTracerProvider | null = null;
 let attemptedInit = false;
 let suppressInit = false;
@@ -136,7 +157,29 @@ export function resolveTraceTokenCount(
   return { source: 'missing' };
 }
+/**
+ * Total prompt tokens = uncached input + cache reads + cache writes.
+ *
+ * Providers (Anthropic API and the claude CLI alike) report `usage.input_tokens`
+ * as the *uncached* portion only — the cached bulk lives in the separate cache
+ * counts. `llmCostAttributes` expects `inputTokens` to be the TOTAL (it
+ * re-derives uncached by subtracting the cache split), and `llm.token_count.prompt`
+ * should likewise reflect the whole prompt. So every capture boundary normalizes
+ * here instead of feeding the bare uncached count (which billed the cached bulk
+ * at the full input rate, or mislabeled the token count). Returns null when the
+ * uncached count itself is unknown.
+ */
+export function totalPromptTokens(
+  uncachedInputTokens: number | null | undefined,
+  cacheReadTokens: number | null | undefined,
+  cacheWriteTokens: number | null | undefined,
+): number | null {
+  if (uncachedInputTokens == null) return null;
+  return uncachedInputTokens + (cacheReadTokens ?? 0) + (cacheWriteTokens ?? 0);
+}
 const DEFAULT_MODEL_RATES: Record<string, { inputUsdPer1M: number; outputUsdPer1M: number }> = {
+  'claude-opus-4-8': { inputUsdPer1M: 5, outputUsdPer1M: 25 },
   'claude-opus-4-7': { inputUsdPer1M: 5, outputUsdPer1M: 25 },
   'claude-opus-4-6': { inputUsdPer1M: 5, outputUsdPer1M: 25 },
   'claude-opus-4-5': { inputUsdPer1M: 5, outputUsdPer1M: 25 },
@@ -230,6 +273,59 @@ export async function traced<T>(
   );
 }
+/**
+ * Like `traced`, but accumulates `llm.cost.*` from all descendant LLM spans
+ * and sets the rolled-up totals on the parent span when `fn` completes.
+ * Use on root spans (`cli.teach`, `cli.audit`) so Phoenix shows the full cost.
+ */
+export async function tracedWithCostRollup<T>(
+  name: string,
+  kind: TraceKind,
+  attributes: TraceAttributes | undefined,
+  fn: (span: Span) => Promise<T> | T,
+): Promise<T> {
+  const acc: CostAccumulator = {
+    inputTokens: 0,
+    outputTokens: 0,
+    cacheReadTokens: 0,
+    cacheWriteTokens: 0,
+    uncachedInputCost: 0,
+    cacheReadCost: 0,
+    cacheWriteCost: 0,
+    completionCost: 0,
+  };
+  const applyCostRollup = (span: Span): void => {
+    const promptCost = acc.uncachedInputCost + acc.cacheReadCost + acc.cacheWriteCost;
+    const totalCost = promptCost + acc.completionCost;
+    if (totalCost === 0 && acc.inputTokens === 0 && acc.outputTokens === 0) return;
+    setSpanAttributes(span, {
+      [SemanticConventions.LLM_TOKEN_COUNT_PROMPT]: acc.inputTokens,
+      [SemanticConventions.LLM_TOKEN_COUNT_COMPLETION]: acc.outputTokens,
+      [SemanticConventions.LLM_TOKEN_COUNT_TOTAL]: acc.inputTokens + acc.outputTokens,
+      [SemanticConventions.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ]: acc.cacheReadTokens,
+      [SemanticConventions.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE]: acc.cacheWriteTokens,
+      [SemanticConventions.LLM_COST_PROMPT]: promptCost,
+      [SemanticConventions.LLM_COST_COMPLETION]: acc.completionCost,
+      [SemanticConventions.LLM_COST_TOTAL]: totalCost,
+      [SemanticConventions.LLM_COST_PROMPT_DETAILS_CACHE_READ]: acc.cacheReadCost,
+      [SemanticConventions.LLM_COST_PROMPT_DETAILS_CACHE_WRITE]: acc.cacheWriteCost,
+      [SemanticConventions.LLM_COST_INPUT]: acc.uncachedInputCost,
+      'imprint.llm.cost_estimated': true,
+    });
+  };
+  return costAccumulatorStorage.run(acc, () =>
+    traced(name, kind, attributes, async (span) => {
+      try {
+        return await fn(span);
+      } finally {
+        applyCostRollup(span);
+      }
+    }),
+  );
+}
 export function startTraceSpan(
   name: string,
   kind: TraceKind,
@@ -444,23 +540,56 @@ function llmCostAttributes(opts: {
       : hasCacheBreakdown
         ? Math.max(0, opts.inputTokens - cacheRead - cacheWrite)
         : opts.inputTokens;
+  let uncachedInputCost: number | undefined;
+  let cacheReadCost = 0;
+  let cacheWriteCost = 0;
+  if (uncachedInput !== undefined) {
+    if (hasCacheBreakdown) {
+      uncachedInputCost = (uncachedInput / 1_000_000) * opts.inputUsdPer1M;
+      cacheReadCost = (cacheRead / 1_000_000) * opts.inputUsdPer1M * CACHE_READ_MULTIPLIER;
+      cacheWriteCost = (cacheWrite / 1_000_000) * opts.inputUsdPer1M * CACHE_WRITE_MULTIPLIER;
+    } else {
+      uncachedInputCost = (uncachedInput / 1_000_000) * opts.inputUsdPer1M;
+    }
+  }
   const prompt =
-    uncachedInput === undefined
+    uncachedInputCost === undefined
       ? undefined
-      : hasCacheBreakdown
-        ? (uncachedInput / 1_000_000) * opts.inputUsdPer1M +
-          (cacheRead / 1_000_000) * opts.inputUsdPer1M * CACHE_READ_MULTIPLIER +
-          (cacheWrite / 1_000_000) * opts.inputUsdPer1M * CACHE_WRITE_MULTIPLIER
-        : (uncachedInput / 1_000_000) * opts.inputUsdPer1M;
+      : uncachedInputCost + cacheReadCost + cacheWriteCost;
   const completion =
     opts.outputTokens === undefined
       ? undefined
       : (opts.outputTokens / 1_000_000) * opts.outputUsdPer1M;
   const total = (prompt ?? 0) + (completion ?? 0);
+  // Roll up into the nearest ancestor tracedWithCostRollup, if any.
+  const acc = getActiveCostAccumulator();
+  if (acc) {
+    acc.inputTokens += opts.inputTokens ?? 0;
+    acc.outputTokens += opts.outputTokens ?? 0;
+    acc.cacheReadTokens += cacheRead;
+    acc.cacheWriteTokens += cacheWrite;
+    acc.uncachedInputCost += uncachedInputCost ?? 0;
+    acc.cacheReadCost += cacheReadCost;
+    acc.cacheWriteCost += cacheWriteCost;
+    acc.completionCost += completion ?? 0;
+  }
   return {
     ...(prompt !== undefined ? { [SemanticConventions.LLM_COST_PROMPT]: prompt } : {}),
     ...(completion !== undefined ? { [SemanticConventions.LLM_COST_COMPLETION]: completion } : {}),
     [SemanticConventions.LLM_COST_TOTAL]: total,
+    ...(hasCacheBreakdown
+      ? {
+          [SemanticConventions.LLM_COST_PROMPT_DETAILS_CACHE_READ]: cacheReadCost,
+          [SemanticConventions.LLM_COST_PROMPT_DETAILS_CACHE_WRITE]: cacheWriteCost,
+          [SemanticConventions.LLM_COST_INPUT]: uncachedInputCost,
+          [SemanticConventions.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ]: cacheRead,
+          [SemanticConventions.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE]: cacheWrite,
+        }
+      : {}),
     'imprint.llm.cost_estimated': true,
   };
 }