imprint-mcp 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/README.md +193 -189
  2. package/examples/discoverandgo/README.md +1 -1
  3. package/examples/echo/README.md +1 -1
  4. package/examples/google-flights/README.md +28 -0
  5. package/examples/google-flights/_shared/batchexecute.ts +63 -0
  6. package/examples/google-flights/_shared/flights_request.ts +95 -0
  7. package/examples/google-flights/_shared/package.json +9 -0
  8. package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
  9. package/examples/google-flights/get_flight_booking_details/package.json +9 -0
  10. package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
  11. package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
  12. package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
  13. package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
  14. package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
  15. package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
  16. package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
  17. package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
  18. package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
  19. package/examples/google-flights/get_flight_calendar_prices/workflow.json +78 -0
  20. package/examples/google-flights/lookup_airport/index.ts +101 -0
  21. package/examples/google-flights/lookup_airport/package.json +9 -0
  22. package/examples/google-flights/lookup_airport/parser.ts +66 -0
  23. package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
  24. package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
  25. package/examples/google-flights/lookup_airport/workflow.json +57 -0
  26. package/examples/google-flights/search_flights/index.ts +219 -0
  27. package/examples/google-flights/search_flights/package.json +9 -0
  28. package/examples/google-flights/search_flights/parser.ts +169 -0
  29. package/examples/google-flights/search_flights/playbook.yaml +184 -0
  30. package/examples/google-flights/search_flights/request-transform.ts +119 -0
  31. package/examples/google-flights/search_flights/workflow.json +143 -0
  32. package/examples/google-hotels/README.md +29 -0
  33. package/examples/google-hotels/_shared/batchexecute.ts +73 -0
  34. package/examples/google-hotels/_shared/freq.ts +158 -0
  35. package/examples/google-hotels/_shared/package.json +9 -0
  36. package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
  37. package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
  38. package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
  39. package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
  40. package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
  41. package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
  42. package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
  43. package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
  44. package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
  45. package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
  46. package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
  47. package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
  48. package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
  49. package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
  50. package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
  51. package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
  52. package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
  53. package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
  54. package/examples/google-hotels/search_hotels/index.ts +207 -0
  55. package/examples/google-hotels/search_hotels/package.json +9 -0
  56. package/examples/google-hotels/search_hotels/parser.ts +260 -0
  57. package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
  58. package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
  59. package/examples/google-hotels/search_hotels/workflow.json +127 -0
  60. package/examples/southwest/README.md +3 -2
  61. package/examples/southwest/search_southwest_flights/index.ts +18 -1
  62. package/examples/southwest/search_southwest_flights/workflow.json +18 -1
  63. package/package.json +3 -2
  64. package/prompts/audit-agent.md +71 -0
  65. package/prompts/build-planning.md +74 -0
  66. package/prompts/compile-agent.md +131 -27
  67. package/prompts/prereq-builder.md +64 -0
  68. package/prompts/prereq-planner.md +34 -0
  69. package/prompts/tool-planning.md +39 -0
  70. package/src/cli.ts +116 -3
  71. package/src/imprint/agent.ts +5 -0
  72. package/src/imprint/audit.ts +996 -0
  73. package/src/imprint/backend-ladder.ts +1214 -184
  74. package/src/imprint/build-plan.ts +1051 -0
  75. package/src/imprint/cdp-browser-fetch.ts +592 -0
  76. package/src/imprint/cdp-jar-cache.ts +320 -0
  77. package/src/imprint/chromium.ts +414 -8
  78. package/src/imprint/claude-cli-compile.ts +125 -25
  79. package/src/imprint/codex-cli-compile.ts +26 -23
  80. package/src/imprint/compile-agent-types.ts +38 -0
  81. package/src/imprint/compile-agent.ts +63 -25
  82. package/src/imprint/compile-tools.ts +1666 -66
  83. package/src/imprint/compile.ts +13 -1
  84. package/src/imprint/concurrency.ts +87 -0
  85. package/src/imprint/cron.ts +4 -0
  86. package/src/imprint/doctor.ts +48 -3
  87. package/src/imprint/freeform-redact.ts +5 -4
  88. package/src/imprint/install.ts +79 -4
  89. package/src/imprint/integrations.ts +3 -3
  90. package/src/imprint/llm.ts +56 -8
  91. package/src/imprint/mcp-compile-server.ts +43 -10
  92. package/src/imprint/mcp-maintenance.ts +18 -102
  93. package/src/imprint/mcp-server.ts +73 -7
  94. package/src/imprint/multi-progress.ts +7 -2
  95. package/src/imprint/param-grounding.ts +367 -0
  96. package/src/imprint/paths.ts +29 -0
  97. package/src/imprint/playbook-runner.ts +101 -40
  98. package/src/imprint/prereq-builder.ts +651 -0
  99. package/src/imprint/probe-backends.ts +6 -3
  100. package/src/imprint/record.ts +10 -1
  101. package/src/imprint/redact.ts +30 -2
  102. package/src/imprint/replay-capture.ts +19 -18
  103. package/src/imprint/runtime.ts +19 -10
  104. package/src/imprint/session-diff.ts +79 -2
  105. package/src/imprint/session-merge.ts +9 -5
  106. package/src/imprint/stealth-chromium.ts +79 -0
  107. package/src/imprint/stealth-fetch.ts +309 -29
  108. package/src/imprint/stealth-token-cache.ts +88 -0
  109. package/src/imprint/teach-plan.ts +251 -0
  110. package/src/imprint/teach-state.ts +10 -0
  111. package/src/imprint/teach.ts +456 -142
  112. package/src/imprint/tool-candidates.ts +72 -14
  113. package/src/imprint/tool-plan.ts +313 -0
  114. package/src/imprint/tracing.ts +135 -6
  115. package/src/imprint/types.ts +61 -3
  116. package/examples/google-flights/search_google_flights/index.ts +0 -101
  117. package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
  118. package/examples/google-flights/search_google_flights/parser.ts +0 -189
  119. package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
  120. package/examples/google-flights/search_google_flights/workflow.json +0 -48
  121. package/examples/google-hotels/search_google_hotels/index.ts +0 -194
  122. package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
  123. package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
  124. package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
  125. package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
  126. package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
  127. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
  128. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
  129. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
  130. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97
@@ -182,24 +182,54 @@ export async function detectToolCandidates(
182
182
  `detecting candidate tools from ${payload.events.length} event(s), ${payload.requests.length} request(s)…`,
183
183
  );
184
184
  const llm = resolveProvider(llmConfig ?? {});
185
- const result = await llm.analyze(systemPrompt, payload);
186
- const objectText = extractJsonObject(result.text);
187
- if (!objectText) {
188
- throw new Error(
189
- `Candidate detector did not return a JSON object.\nRaw response:\n${result.text.slice(0, 1000)}`,
190
- );
191
- }
185
+ const runOnce = async (): Promise<{
186
+ detection: ToolCandidateDetection;
187
+ result: Awaited<ReturnType<typeof llm.analyze>>;
188
+ }> => {
189
+ const result = await llm.analyze(systemPrompt, payload);
190
+ const objectText = extractJsonObject(result.text);
191
+ if (!objectText) {
192
+ throw new Error(
193
+ `Candidate detector did not return a JSON object.\nRaw response:\n${result.text.slice(0, 1000)}`,
194
+ );
195
+ }
196
+ let parsed: unknown;
197
+ try {
198
+ parsed = JSON.parse(objectText);
199
+ } catch (err) {
200
+ throw new Error(
201
+ `Candidate detector response was not valid JSON: ${err instanceof Error ? err.message : String(err)}\nExtracted:\n${objectText.slice(0, 1000)}`,
202
+ );
203
+ }
204
+ return { detection: validateToolCandidateDetection(parsed), result };
205
+ };
192
206
 
193
- let parsed: unknown;
194
- try {
195
- parsed = JSON.parse(objectText);
196
- } catch (err) {
197
- throw new Error(
198
- `Candidate detector response was not valid JSON: ${err instanceof Error ? err.message : String(err)}\nExtracted:\n${objectText.slice(0, 1000)}`,
207
+ let { detection, result } = await runOnce();
208
+
209
+ // Anti-collapse guard: a single candidate from a session that hit multiple
210
+ // distinct endpoint families is almost always under-segmentation (the
211
+ // detector folded separate tools — e.g. search vs pricing vs autocomplete —
212
+ // into one). This is pure LLM variance; re-run once and keep the richer
213
+ // segmentation. Targeted so genuinely single-tool sites don't pay for it.
214
+ if (detection.candidates.length === 1 && distinctEndpointFamilies(payload) >= 2) {
215
+ log(
216
+ 'detector returned 1 candidate but the session spans ≥2 endpoint families — re-running once to guard against under-segmentation…',
199
217
  );
218
+ try {
219
+ const retry = await runOnce();
220
+ if (retry.detection.candidates.length > detection.candidates.length) {
221
+ log(`retry segmented into ${retry.detection.candidates.length} candidates; using it`);
222
+ ({ detection, result } = retry);
223
+ } else {
224
+ log('retry did not segment further; keeping the original detection');
225
+ }
226
+ } catch (err) {
227
+ log(
228
+ `retry failed (${err instanceof Error ? err.message : String(err)}); keeping original`,
229
+ );
230
+ }
200
231
  }
201
232
 
202
- const detection = validateToolCandidateDetection(parsed);
203
233
  setSpanAttributes(span, {
204
234
  'imprint.candidate_count': detection.candidates.length,
205
235
  'imprint.primary_tool_name': detection.candidates.find((c) => c.primary)?.toolName,
@@ -372,6 +402,33 @@ function candidateRequestGroupKey(request: CandidateRequestPayload): unknown[] {
372
402
  ];
373
403
  }
374
404
 
405
+ /** Telemetry / beacon endpoints. These fire constantly during any real session
406
+ * and are never the load-bearing request behind a user intent. Left in the
407
+ * candidate payload they add noise that pushes the detector to under-segment,
408
+ * and — worse — the detector can anchor a candidate's `requestSeqs` on one
409
+ * (e.g. Google's `/log`), sending compile to reverse-engineer a beacon. Excluded
410
+ * entirely. The boundary lookahead keeps `/login`, `/catalog`, etc. safe. */
411
+ const TELEMETRY_PATH =
412
+ /\/(log|gen_204|jserror|ping|beacon|csi|batchlog|metrics|stats|collect|analytics|adsct|pagead|ccm)(?=$|[/?])/i;
413
+
414
+ /** Count distinct endpoint families (batchexecute rpcid, else METHOD+path) that
415
+ * carry a non-trivial number of requests. ≥2 means the session genuinely hit
416
+ * multiple backends — a single detected candidate there signals under-
417
+ * segmentation. */
418
+ function distinctEndpointFamilies(payload: ToolCandidatePayload): number {
419
+ const counts = new Map<string, number>();
420
+ for (const r of payload.requests) {
421
+ const url = safeUrl(r.url);
422
+ if (!url) continue;
423
+ const rpc = /[?&]rpcids?=([^&]+)/.exec(url.search)?.[1];
424
+ const key = rpc ? `rpc:${decodeURIComponent(rpc)}` : `${r.method} ${url.pathname}`;
425
+ counts.set(key, (counts.get(key) ?? 0) + 1);
426
+ }
427
+ let families = 0;
428
+ for (const c of counts.values()) if (c >= 3) families++;
429
+ return families;
430
+ }
431
+
375
432
  function isCandidateRequest(
376
433
  request: CapturedRequest,
377
434
  startRoot: string | null,
@@ -380,6 +437,7 @@ function isCandidateRequest(
380
437
  if (request.resourceType !== 'XHR' && request.resourceType !== 'Fetch') return false;
381
438
  const url = safeUrl(request.url);
382
439
  if (!url) return false;
440
+ if (TELEMETRY_PATH.test(url.pathname)) return false;
383
441
  if (startRoot && !isSameRegistrableDomain(url.hostname, startRoot)) {
384
442
  return appApiHosts.has(url.hostname);
385
443
  }
@@ -0,0 +1,313 @@
1
+ /**
2
+ * Per-tool planning pass for `imprint teach`.
3
+ *
4
+ * After the global shared-module plan + build (teach-plan.ts) runs once, each
5
+ * tool gets a thin planning stage before its compile (plan THEN execute): one
6
+ * `llm.analyze` pass that maps each parameter to its recorded field, fixes the
7
+ * request construction + response parsing, and names the shared modules to
8
+ * import. The Markdown plan rides the compile agent's initial prompt (via
9
+ * formatToolPlan), so the compile follows it instead of re-deriving structure.
10
+ *
11
+ * Best-effort throughout: a missing prompt, a timeout, or any LLM/IO error
12
+ * yields `undefined` and the compile proceeds exactly as before. Gated by
13
+ * IMPRINT_NO_TOOL_PLAN. Modeled on planSharedModule in prereq-builder.ts.
14
+ */
15
+
16
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
17
+ import { join as pathJoin } from 'node:path';
18
+ import {
19
+ BuildPlanSchema,
20
+ type SharedModuleManifestEntry,
21
+ planSliceForTool,
22
+ resolveAssignedModules,
23
+ } from './build-plan.ts';
24
+ import { withTimeout } from './concurrency.ts';
25
+ import { type ProviderName, resolveProvider } from './llm.ts';
26
+ import { loadJsonFile } from './load-json.ts';
27
+ import { createLog } from './log.ts';
28
+ import { localToolDir } from './paths.ts';
29
+ import { compactRequestContexts, requestContextDigest } from './request-context.ts';
30
+ import type { SharedCompileContext, ToolCandidate } from './tool-candidates.ts';
31
+ import { setSpanAttributes, traced } from './tracing.ts';
32
+ import { type Session, SessionSchema } from './types.ts';
33
+
34
+ const PROMPTS_DIR = pathJoin(import.meta.dir, '..', '..', 'prompts');
35
+ const log = createLog('tool-plan');
36
+
37
+ /** Wall-clock cap on the per-tool planner LLM call. A throttled/hung provider
38
+ * must not block the tool's compile; on timeout we degrade to compiling without
39
+ * a plan (today's behavior). The shared-module plan is the 10-min one. */
40
+ const TOOL_PLAN_TIMEOUT_MS = 5 * 60_000;
41
+
42
+ const BODY_LIMIT = 800;
43
+ const RESPONSE_PREVIEW_LIMIT = 500;
44
+ const HEADER_LIMIT = 600;
45
+
46
+ interface ToolPlanRequestPayload {
47
+ seq: number;
48
+ method: string;
49
+ url: string;
50
+ status?: number;
51
+ mimeType?: string;
52
+ headers: string;
53
+ body?: string;
54
+ bodyDigest?: string;
55
+ bodyLength?: number;
56
+ responsePreview?: string;
57
+ responseBodyDigest?: string;
58
+ responseBodyLength?: number;
59
+ repeatCount?: number;
60
+ repeatedSeqs?: number[];
61
+ lastTimestamp?: number;
62
+ timestamp: number;
63
+ }
64
+
65
+ interface ToolPlanAssignedModule {
66
+ path: string;
67
+ kind: string;
68
+ importPath: string;
69
+ exportSignatures: string[];
70
+ purpose: string;
71
+ }
72
+
73
+ interface ToolPlanPayload {
74
+ site: string;
75
+ url: string;
76
+ tool: {
77
+ toolName: string;
78
+ description: string;
79
+ expectedOutput: string;
80
+ likelyParams: ToolCandidate['likelyParams'];
81
+ requestSeqs: number[];
82
+ dependencySeqs: number[];
83
+ };
84
+ sharedContext?: SharedCompileContext;
85
+ /** Slice of the global build plan for this tool (when a build plan exists). */
86
+ planGuidance?: {
87
+ parserGuidance: string;
88
+ paramChecklist: string[];
89
+ authRecipe: unknown;
90
+ loadBearingSeqs: number[];
91
+ };
92
+ assignedModules: ToolPlanAssignedModule[];
93
+ requests: ToolPlanRequestPayload[];
94
+ }
95
+
96
+ /** Pure payload builder — unit-testable without an LLM. Filters requests to the
97
+ * tool's relevant seqs (candidate seqs ∪ dependency seqs ∪ build-plan
98
+ * loadBearingSeqs) and compacts them the same way build-plan.ts does. */
99
+ export function buildToolPlanPayload(opts: {
100
+ session: Session;
101
+ candidate: ToolCandidate;
102
+ sharedContext?: SharedCompileContext;
103
+ buildPlan?: unknown;
104
+ sharedModules?: SharedModuleManifestEntry[];
105
+ }): ToolPlanPayload {
106
+ const { session, candidate, sharedContext } = opts;
107
+
108
+ // Project the global build plan (if any) down to this tool's slice + the
109
+ // shared modules it was assigned.
110
+ let planGuidance: ToolPlanPayload['planGuidance'];
111
+ let assignedModules: ToolPlanAssignedModule[] = [];
112
+ let loadBearingSeqs: number[] = [];
113
+ if (opts.buildPlan) {
114
+ const parsed = BuildPlanSchema.safeParse(opts.buildPlan);
115
+ if (parsed.success) {
116
+ const plan = parsed.data;
117
+ const slice = planSliceForTool(plan, candidate.toolName);
118
+ if (slice) {
119
+ planGuidance = {
120
+ parserGuidance: slice.tool.parserGuidance,
121
+ paramChecklist: slice.tool.paramChecklist,
122
+ authRecipe: slice.tool.authRecipe,
123
+ loadBearingSeqs: slice.tool.loadBearingSeqs,
124
+ };
125
+ loadBearingSeqs = slice.tool.loadBearingSeqs;
126
+ }
127
+ assignedModules = resolveAssignedModules(plan, candidate.toolName, opts.sharedModules)
128
+ .filter((m) => m.verified)
129
+ .map((m) => ({
130
+ path: m.path,
131
+ kind: m.kind,
132
+ importPath: m.importPath,
133
+ exportSignatures: m.exportSignatures,
134
+ purpose: m.purpose,
135
+ }));
136
+ }
137
+ }
138
+
139
+ const scope = new Set<number>();
140
+ for (const s of candidate.requestSeqs) scope.add(s);
141
+ for (const s of candidate.dependencySeqs) scope.add(s);
142
+ for (const s of loadBearingSeqs) scope.add(s);
143
+
144
+ const requests = compactRequestContexts(
145
+ session.requests
146
+ .filter((r) => scope.has(r.seq))
147
+ .map((r) => ({
148
+ seq: r.seq,
149
+ timestamp: r.timestamp,
150
+ method: r.method,
151
+ url: r.url,
152
+ status: r.response?.status,
153
+ mimeType: r.response?.mimeType,
154
+ headers: truncate(JSON.stringify(r.headers), HEADER_LIMIT) ?? '{}',
155
+ body: truncate(r.body, BODY_LIMIT),
156
+ bodyDigest: requestContextDigest(r.body),
157
+ bodyLength: r.body?.length,
158
+ responsePreview: truncate(r.response?.body, RESPONSE_PREVIEW_LIMIT),
159
+ responseBodyDigest: requestContextDigest(r.response?.body),
160
+ responseBodyLength: r.response?.body?.length,
161
+ })),
162
+ toolPlanRequestGroupKey,
163
+ );
164
+
165
+ return {
166
+ site: session.site,
167
+ url: session.url,
168
+ tool: {
169
+ toolName: candidate.toolName,
170
+ description: candidate.description,
171
+ expectedOutput: candidate.expectedOutput,
172
+ likelyParams: candidate.likelyParams,
173
+ requestSeqs: candidate.requestSeqs,
174
+ dependencySeqs: candidate.dependencySeqs,
175
+ },
176
+ sharedContext,
177
+ planGuidance,
178
+ assignedModules,
179
+ requests,
180
+ };
181
+ }
182
+
183
+ function toolPlanRequestGroupKey(request: ToolPlanRequestPayload): unknown[] {
184
+ return [
185
+ request.method,
186
+ request.url,
187
+ request.bodyDigest,
188
+ request.bodyLength,
189
+ request.status,
190
+ request.mimeType,
191
+ request.responseBodyDigest,
192
+ request.responseBodyLength,
193
+ ];
194
+ }
195
+
196
+ /** Derive a per-tool implementation plan from the recording. Best-effort: any
197
+ * error/timeout (or the IMPRINT_NO_TOOL_PLAN gate / a missing prompt) returns
198
+ * undefined so the caller compiles without a plan (today's behavior). Persists
199
+ * the plan to `~/.imprint/<site>/<toolName>/.tool-plan.md`. */
200
+ export async function planToolCompile(opts: {
201
+ site: string;
202
+ toolName: string;
203
+ candidate: ToolCandidate;
204
+ sharedContext?: SharedCompileContext;
205
+ sessionPath: string;
206
+ buildPlanPath?: string;
207
+ sharedModules?: SharedModuleManifestEntry[];
208
+ providerName: ProviderName;
209
+ model?: string;
210
+ }): Promise<string | undefined> {
211
+ if (toolPlanDisabled()) return undefined;
212
+ const promptPath = pathJoin(PROMPTS_DIR, 'tool-planning.md');
213
+ if (!existsSync(promptPath)) return undefined;
214
+
215
+ return await traced(
216
+ 'teach.plan_tool',
217
+ 'AGENT',
218
+ {
219
+ 'imprint.site': opts.site,
220
+ 'imprint.tool_name': opts.toolName,
221
+ 'imprint.provider': opts.providerName,
222
+ },
223
+ async (span) => {
224
+ try {
225
+ const systemPrompt = readFileSync(promptPath, 'utf8');
226
+
227
+ const session = loadJsonFile(
228
+ opts.sessionPath,
229
+ SessionSchema,
230
+ {
231
+ notFound: 'session not found before tool planning',
232
+ badSchema: 'session file is malformed',
233
+ },
234
+ 'session',
235
+ );
236
+
237
+ // Load the global build plan slice (if one exists) so the per-tool plan
238
+ // can carry the tool's parserGuidance/paramChecklist/authRecipe and the
239
+ // shared modules it was assigned.
240
+ let buildPlan: unknown;
241
+ if (opts.buildPlanPath && existsSync(opts.buildPlanPath)) {
242
+ try {
243
+ buildPlan = loadJsonFile(
244
+ opts.buildPlanPath,
245
+ BuildPlanSchema,
246
+ { notFound: 'build plan not found' },
247
+ 'build plan',
248
+ );
249
+ } catch {
250
+ buildPlan = undefined;
251
+ }
252
+ }
253
+
254
+ const payload = buildToolPlanPayload({
255
+ session,
256
+ candidate: opts.candidate,
257
+ sharedContext: opts.sharedContext,
258
+ buildPlan,
259
+ sharedModules: opts.sharedModules,
260
+ });
261
+
262
+ const llm = resolveProvider({ provider: opts.providerName, model: opts.model });
263
+ const result = await withTimeout(
264
+ llm.analyze(systemPrompt, payload),
265
+ TOOL_PLAN_TIMEOUT_MS,
266
+ 'tool planner',
267
+ );
268
+ const plan = stripCodeFences(result.text).trim();
269
+ if (plan.length === 0) {
270
+ setSpanAttributes(span, { 'imprint.tool_plan.skipped': true });
271
+ return undefined;
272
+ }
273
+
274
+ const toolDir = localToolDir(opts.site, opts.toolName);
275
+ mkdirSync(toolDir, { recursive: true });
276
+ writeFileSync(pathJoin(toolDir, '.tool-plan.md'), plan, 'utf8');
277
+
278
+ setSpanAttributes(span, {
279
+ 'imprint.tool_plan.chars': plan.length,
280
+ 'imprint.tool_plan.skipped': false,
281
+ });
282
+ log(`planned ${opts.toolName} (${plan.length} chars)`);
283
+ return plan;
284
+ } catch (err) {
285
+ setSpanAttributes(span, { 'imprint.tool_plan.skipped': true });
286
+ log(
287
+ `tool planning failed for ${opts.toolName} (${err instanceof Error ? err.message : String(err)}) — compiling without a plan`,
288
+ );
289
+ return undefined;
290
+ }
291
+ },
292
+ );
293
+ }
294
+
295
+ function toolPlanDisabled(): boolean {
296
+ const v = process.env.IMPRINT_NO_TOOL_PLAN;
297
+ return !!v && !['0', 'false', 'no', 'off'].includes(v.toLowerCase());
298
+ }
299
+
300
+ /** Unwrap a response whose entire body is a single Markdown code fence; leave
301
+ * inline fences (snippets within the plan) untouched. Mirrors the helper in
302
+ * prereq-builder.ts (not exported there). */
303
+ function stripCodeFences(text: string): string {
304
+ const t = text.trim();
305
+ const m = /^```[a-zA-Z]*\n([\s\S]*?)\n```$/.exec(t);
306
+ return m?.[1] ?? t;
307
+ }
308
+
309
+ function truncate(s: string | undefined, limit: number): string | undefined {
310
+ if (!s) return undefined;
311
+ if (s.length <= limit) return s;
312
+ return `${s.slice(0, limit)}…(truncated, original length ${s.length})`;
313
+ }
@@ -1,3 +1,4 @@
1
+ import { AsyncLocalStorage } from 'node:async_hooks';
1
2
  import {
2
3
  MimeType,
3
4
  type NodeTracerProvider,
@@ -14,6 +15,26 @@ type TraceKind = OpenInferenceSpanKind | `${OpenInferenceSpanKind}`;
14
15
  type TraceAttributes = Record<string, unknown>;
15
16
  type TraceLlmMessage = { role?: string; content?: string };
16
17
 
18
+ // ---------------------------------------------------------------------------
19
+ // Cost accumulator — rolls up LLM costs from child spans to a parent span.
20
+ // ---------------------------------------------------------------------------
21
+ interface CostAccumulator {
22
+ inputTokens: number;
23
+ outputTokens: number;
24
+ cacheReadTokens: number;
25
+ cacheWriteTokens: number;
26
+ uncachedInputCost: number;
27
+ cacheReadCost: number;
28
+ cacheWriteCost: number;
29
+ completionCost: number;
30
+ }
31
+
32
+ const costAccumulatorStorage = new AsyncLocalStorage<CostAccumulator>();
33
+
34
+ function getActiveCostAccumulator(): CostAccumulator | undefined {
35
+ return costAccumulatorStorage.getStore();
36
+ }
37
+
17
38
  let provider: NodeTracerProvider | null = null;
18
39
  let attemptedInit = false;
19
40
  let suppressInit = false;
@@ -136,7 +157,29 @@ export function resolveTraceTokenCount(
136
157
  return { source: 'missing' };
137
158
  }
138
159
 
160
+ /**
161
+ * Total prompt tokens = uncached input + cache reads + cache writes.
162
+ *
163
+ * Providers (Anthropic API and the claude CLI alike) report `usage.input_tokens`
164
+ * as the *uncached* portion only — the cached bulk lives in the separate cache
165
+ * counts. `llmCostAttributes` expects `inputTokens` to be the TOTAL (it
166
+ * re-derives uncached by subtracting the cache split), and `llm.token_count.prompt`
167
+ * should likewise reflect the whole prompt. So every capture boundary normalizes
168
+ * here instead of feeding the bare uncached count (which billed the cached bulk
169
+ * at the full input rate, or mislabeled the token count). Returns null when the
170
+ * uncached count itself is unknown.
171
+ */
172
+ export function totalPromptTokens(
173
+ uncachedInputTokens: number | null | undefined,
174
+ cacheReadTokens: number | null | undefined,
175
+ cacheWriteTokens: number | null | undefined,
176
+ ): number | null {
177
+ if (uncachedInputTokens == null) return null;
178
+ return uncachedInputTokens + (cacheReadTokens ?? 0) + (cacheWriteTokens ?? 0);
179
+ }
180
+
139
181
  const DEFAULT_MODEL_RATES: Record<string, { inputUsdPer1M: number; outputUsdPer1M: number }> = {
182
+ 'claude-opus-4-8': { inputUsdPer1M: 5, outputUsdPer1M: 25 },
140
183
  'claude-opus-4-7': { inputUsdPer1M: 5, outputUsdPer1M: 25 },
141
184
  'claude-opus-4-6': { inputUsdPer1M: 5, outputUsdPer1M: 25 },
142
185
  'claude-opus-4-5': { inputUsdPer1M: 5, outputUsdPer1M: 25 },
@@ -230,6 +273,59 @@ export async function traced<T>(
230
273
  );
231
274
  }
232
275
 
276
+ /**
277
+ * Like `traced`, but accumulates `llm.cost.*` from all descendant LLM spans
278
+ * and sets the rolled-up totals on the parent span when `fn` completes.
279
+ * Use on root spans (`cli.teach`, `cli.audit`) so Phoenix shows the full cost.
280
+ */
281
+ export async function tracedWithCostRollup<T>(
282
+ name: string,
283
+ kind: TraceKind,
284
+ attributes: TraceAttributes | undefined,
285
+ fn: (span: Span) => Promise<T> | T,
286
+ ): Promise<T> {
287
+ const acc: CostAccumulator = {
288
+ inputTokens: 0,
289
+ outputTokens: 0,
290
+ cacheReadTokens: 0,
291
+ cacheWriteTokens: 0,
292
+ uncachedInputCost: 0,
293
+ cacheReadCost: 0,
294
+ cacheWriteCost: 0,
295
+ completionCost: 0,
296
+ };
297
+
298
+ const applyCostRollup = (span: Span): void => {
299
+ const promptCost = acc.uncachedInputCost + acc.cacheReadCost + acc.cacheWriteCost;
300
+ const totalCost = promptCost + acc.completionCost;
301
+ if (totalCost === 0 && acc.inputTokens === 0 && acc.outputTokens === 0) return;
302
+ setSpanAttributes(span, {
303
+ [SemanticConventions.LLM_TOKEN_COUNT_PROMPT]: acc.inputTokens,
304
+ [SemanticConventions.LLM_TOKEN_COUNT_COMPLETION]: acc.outputTokens,
305
+ [SemanticConventions.LLM_TOKEN_COUNT_TOTAL]: acc.inputTokens + acc.outputTokens,
306
+ [SemanticConventions.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ]: acc.cacheReadTokens,
307
+ [SemanticConventions.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE]: acc.cacheWriteTokens,
308
+ [SemanticConventions.LLM_COST_PROMPT]: promptCost,
309
+ [SemanticConventions.LLM_COST_COMPLETION]: acc.completionCost,
310
+ [SemanticConventions.LLM_COST_TOTAL]: totalCost,
311
+ [SemanticConventions.LLM_COST_PROMPT_DETAILS_CACHE_READ]: acc.cacheReadCost,
312
+ [SemanticConventions.LLM_COST_PROMPT_DETAILS_CACHE_WRITE]: acc.cacheWriteCost,
313
+ [SemanticConventions.LLM_COST_INPUT]: acc.uncachedInputCost,
314
+ 'imprint.llm.cost_estimated': true,
315
+ });
316
+ };
317
+
318
+ return costAccumulatorStorage.run(acc, () =>
319
+ traced(name, kind, attributes, async (span) => {
320
+ try {
321
+ return await fn(span);
322
+ } finally {
323
+ applyCostRollup(span);
324
+ }
325
+ }),
326
+ );
327
+ }
328
+
233
329
  export function startTraceSpan(
234
330
  name: string,
235
331
  kind: TraceKind,
@@ -444,23 +540,56 @@ function llmCostAttributes(opts: {
444
540
  : hasCacheBreakdown
445
541
  ? Math.max(0, opts.inputTokens - cacheRead - cacheWrite)
446
542
  : opts.inputTokens;
543
+
544
+ let uncachedInputCost: number | undefined;
545
+ let cacheReadCost = 0;
546
+ let cacheWriteCost = 0;
547
+ if (uncachedInput !== undefined) {
548
+ if (hasCacheBreakdown) {
549
+ uncachedInputCost = (uncachedInput / 1_000_000) * opts.inputUsdPer1M;
550
+ cacheReadCost = (cacheRead / 1_000_000) * opts.inputUsdPer1M * CACHE_READ_MULTIPLIER;
551
+ cacheWriteCost = (cacheWrite / 1_000_000) * opts.inputUsdPer1M * CACHE_WRITE_MULTIPLIER;
552
+ } else {
553
+ uncachedInputCost = (uncachedInput / 1_000_000) * opts.inputUsdPer1M;
554
+ }
555
+ }
556
+
447
557
  const prompt =
448
- uncachedInput === undefined
558
+ uncachedInputCost === undefined
449
559
  ? undefined
450
- : hasCacheBreakdown
451
- ? (uncachedInput / 1_000_000) * opts.inputUsdPer1M +
452
- (cacheRead / 1_000_000) * opts.inputUsdPer1M * CACHE_READ_MULTIPLIER +
453
- (cacheWrite / 1_000_000) * opts.inputUsdPer1M * CACHE_WRITE_MULTIPLIER
454
- : (uncachedInput / 1_000_000) * opts.inputUsdPer1M;
560
+ : uncachedInputCost + cacheReadCost + cacheWriteCost;
455
561
  const completion =
456
562
  opts.outputTokens === undefined
457
563
  ? undefined
458
564
  : (opts.outputTokens / 1_000_000) * opts.outputUsdPer1M;
459
565
  const total = (prompt ?? 0) + (completion ?? 0);
566
+
567
+ // Roll up into the nearest ancestor tracedWithCostRollup, if any.
568
+ const acc = getActiveCostAccumulator();
569
+ if (acc) {
570
+ acc.inputTokens += opts.inputTokens ?? 0;
571
+ acc.outputTokens += opts.outputTokens ?? 0;
572
+ acc.cacheReadTokens += cacheRead;
573
+ acc.cacheWriteTokens += cacheWrite;
574
+ acc.uncachedInputCost += uncachedInputCost ?? 0;
575
+ acc.cacheReadCost += cacheReadCost;
576
+ acc.cacheWriteCost += cacheWriteCost;
577
+ acc.completionCost += completion ?? 0;
578
+ }
579
+
460
580
  return {
461
581
  ...(prompt !== undefined ? { [SemanticConventions.LLM_COST_PROMPT]: prompt } : {}),
462
582
  ...(completion !== undefined ? { [SemanticConventions.LLM_COST_COMPLETION]: completion } : {}),
463
583
  [SemanticConventions.LLM_COST_TOTAL]: total,
584
+ ...(hasCacheBreakdown
585
+ ? {
586
+ [SemanticConventions.LLM_COST_PROMPT_DETAILS_CACHE_READ]: cacheReadCost,
587
+ [SemanticConventions.LLM_COST_PROMPT_DETAILS_CACHE_WRITE]: cacheWriteCost,
588
+ [SemanticConventions.LLM_COST_INPUT]: uncachedInputCost,
589
+ [SemanticConventions.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ]: cacheRead,
590
+ [SemanticConventions.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE]: cacheWrite,
591
+ }
592
+ : {}),
464
593
  'imprint.llm.cost_estimated': true,
465
594
  };
466
595
  }