imprint-mcp 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +193 -189
- package/examples/discoverandgo/README.md +1 -1
- package/examples/echo/README.md +1 -1
- package/examples/google-flights/README.md +28 -0
- package/examples/google-flights/_shared/batchexecute.ts +63 -0
- package/examples/google-flights/_shared/flights_request.ts +95 -0
- package/examples/google-flights/_shared/package.json +9 -0
- package/examples/google-flights/get_flight_booking_details/index.ts +159 -0
- package/examples/google-flights/get_flight_booking_details/package.json +9 -0
- package/examples/google-flights/get_flight_booking_details/parser.ts +182 -0
- package/examples/google-flights/get_flight_booking_details/playbook.yaml +138 -0
- package/examples/google-flights/get_flight_booking_details/request-transform.ts +86 -0
- package/examples/google-flights/get_flight_booking_details/workflow.json +98 -0
- package/examples/google-flights/get_flight_calendar_prices/index.ts +131 -0
- package/examples/google-flights/get_flight_calendar_prices/package.json +9 -0
- package/examples/google-flights/get_flight_calendar_prices/parser.ts +86 -0
- package/examples/google-flights/get_flight_calendar_prices/playbook.yaml +97 -0
- package/examples/google-flights/get_flight_calendar_prices/request-transform.ts +31 -0
- package/examples/google-flights/get_flight_calendar_prices/workflow.json +78 -0
- package/examples/google-flights/lookup_airport/index.ts +101 -0
- package/examples/google-flights/lookup_airport/package.json +9 -0
- package/examples/google-flights/lookup_airport/parser.ts +66 -0
- package/examples/google-flights/lookup_airport/playbook.yaml +47 -0
- package/examples/google-flights/lookup_airport/request-transform.ts +20 -0
- package/examples/google-flights/lookup_airport/workflow.json +57 -0
- package/examples/google-flights/search_flights/index.ts +219 -0
- package/examples/google-flights/search_flights/package.json +9 -0
- package/examples/google-flights/search_flights/parser.ts +169 -0
- package/examples/google-flights/search_flights/playbook.yaml +184 -0
- package/examples/google-flights/search_flights/request-transform.ts +119 -0
- package/examples/google-flights/search_flights/workflow.json +143 -0
- package/examples/google-hotels/README.md +29 -0
- package/examples/google-hotels/_shared/batchexecute.ts +73 -0
- package/examples/google-hotels/_shared/freq.ts +158 -0
- package/examples/google-hotels/_shared/package.json +9 -0
- package/examples/google-hotels/autocomplete_hotel_location/index.ts +80 -0
- package/examples/google-hotels/autocomplete_hotel_location/package.json +9 -0
- package/examples/google-hotels/autocomplete_hotel_location/parser.ts +71 -0
- package/examples/google-hotels/autocomplete_hotel_location/playbook.yaml +36 -0
- package/examples/google-hotels/autocomplete_hotel_location/request-transform.ts +37 -0
- package/examples/google-hotels/autocomplete_hotel_location/workflow.json +36 -0
- package/examples/google-hotels/get_hotel_booking_options/index.ts +143 -0
- package/examples/google-hotels/get_hotel_booking_options/package.json +9 -0
- package/examples/google-hotels/get_hotel_booking_options/parser.ts +271 -0
- package/examples/google-hotels/get_hotel_booking_options/playbook.yaml +154 -0
- package/examples/google-hotels/get_hotel_booking_options/request-transform.ts +154 -0
- package/examples/google-hotels/get_hotel_booking_options/workflow.json +84 -0
- package/examples/google-hotels/get_hotel_reviews/index.ts +81 -0
- package/examples/google-hotels/get_hotel_reviews/package.json +9 -0
- package/examples/google-hotels/get_hotel_reviews/parser.ts +128 -0
- package/examples/google-hotels/get_hotel_reviews/playbook.yaml +64 -0
- package/examples/google-hotels/get_hotel_reviews/request-transform.ts +42 -0
- package/examples/google-hotels/get_hotel_reviews/workflow.json +37 -0
- package/examples/google-hotels/search_hotels/index.ts +207 -0
- package/examples/google-hotels/search_hotels/package.json +9 -0
- package/examples/google-hotels/search_hotels/parser.ts +260 -0
- package/examples/google-hotels/search_hotels/playbook.yaml +87 -0
- package/examples/google-hotels/search_hotels/request-transform.ts +197 -0
- package/examples/google-hotels/search_hotels/workflow.json +127 -0
- package/examples/southwest/README.md +3 -2
- package/examples/southwest/search_southwest_flights/index.ts +18 -1
- package/examples/southwest/search_southwest_flights/workflow.json +18 -1
- package/package.json +3 -2
- package/prompts/audit-agent.md +71 -0
- package/prompts/build-planning.md +74 -0
- package/prompts/compile-agent.md +131 -27
- package/prompts/prereq-builder.md +64 -0
- package/prompts/prereq-planner.md +34 -0
- package/prompts/tool-planning.md +39 -0
- package/src/cli.ts +116 -3
- package/src/imprint/agent.ts +5 -0
- package/src/imprint/audit.ts +996 -0
- package/src/imprint/backend-ladder.ts +1214 -184
- package/src/imprint/build-plan.ts +1051 -0
- package/src/imprint/cdp-browser-fetch.ts +592 -0
- package/src/imprint/cdp-jar-cache.ts +320 -0
- package/src/imprint/chromium.ts +414 -8
- package/src/imprint/claude-cli-compile.ts +125 -25
- package/src/imprint/codex-cli-compile.ts +26 -23
- package/src/imprint/compile-agent-types.ts +38 -0
- package/src/imprint/compile-agent.ts +63 -25
- package/src/imprint/compile-tools.ts +1666 -66
- package/src/imprint/compile.ts +13 -1
- package/src/imprint/concurrency.ts +87 -0
- package/src/imprint/cron.ts +4 -0
- package/src/imprint/doctor.ts +48 -3
- package/src/imprint/freeform-redact.ts +5 -4
- package/src/imprint/install.ts +79 -4
- package/src/imprint/integrations.ts +3 -3
- package/src/imprint/llm.ts +56 -8
- package/src/imprint/mcp-compile-server.ts +43 -10
- package/src/imprint/mcp-maintenance.ts +18 -102
- package/src/imprint/mcp-server.ts +73 -7
- package/src/imprint/multi-progress.ts +7 -2
- package/src/imprint/param-grounding.ts +367 -0
- package/src/imprint/paths.ts +29 -0
- package/src/imprint/playbook-runner.ts +101 -40
- package/src/imprint/prereq-builder.ts +651 -0
- package/src/imprint/probe-backends.ts +6 -3
- package/src/imprint/record.ts +10 -1
- package/src/imprint/redact.ts +30 -2
- package/src/imprint/replay-capture.ts +19 -18
- package/src/imprint/runtime.ts +19 -10
- package/src/imprint/session-diff.ts +79 -2
- package/src/imprint/session-merge.ts +9 -5
- package/src/imprint/stealth-chromium.ts +79 -0
- package/src/imprint/stealth-fetch.ts +309 -29
- package/src/imprint/stealth-token-cache.ts +88 -0
- package/src/imprint/teach-plan.ts +251 -0
- package/src/imprint/teach-state.ts +10 -0
- package/src/imprint/teach.ts +456 -142
- package/src/imprint/tool-candidates.ts +72 -14
- package/src/imprint/tool-plan.ts +313 -0
- package/src/imprint/tracing.ts +135 -6
- package/src/imprint/types.ts +61 -3
- package/examples/google-flights/search_google_flights/index.ts +0 -101
- package/examples/google-flights/search_google_flights/parser.test.ts +0 -140
- package/examples/google-flights/search_google_flights/parser.ts +0 -189
- package/examples/google-flights/search_google_flights/playbook.yaml +0 -130
- package/examples/google-flights/search_google_flights/workflow.json +0 -48
- package/examples/google-hotels/search_google_hotels/index.ts +0 -194
- package/examples/google-hotels/search_google_hotels/parser.test.ts +0 -168
- package/examples/google-hotels/search_google_hotels/parser.ts +0 -330
- package/examples/google-hotels/search_google_hotels/playbook.yaml +0 -125
- package/examples/google-hotels/search_google_hotels/workflow.json +0 -111
- package/examples/namecheap-domains/search_namecheap_domains/index.ts +0 -144
- package/examples/namecheap-domains/search_namecheap_domains/parser.ts +0 -380
- package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +0 -50
- package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +0 -136
- package/examples/namecheap-domains/search_namecheap_domains/workflow.json +0 -97
|
@@ -182,24 +182,54 @@ export async function detectToolCandidates(
|
|
|
182
182
|
`detecting candidate tools from ${payload.events.length} event(s), ${payload.requests.length} request(s)…`,
|
|
183
183
|
);
|
|
184
184
|
const llm = resolveProvider(llmConfig ?? {});
|
|
185
|
-
const
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
);
|
|
191
|
-
|
|
185
|
+
const runOnce = async (): Promise<{
|
|
186
|
+
detection: ToolCandidateDetection;
|
|
187
|
+
result: Awaited<ReturnType<typeof llm.analyze>>;
|
|
188
|
+
}> => {
|
|
189
|
+
const result = await llm.analyze(systemPrompt, payload);
|
|
190
|
+
const objectText = extractJsonObject(result.text);
|
|
191
|
+
if (!objectText) {
|
|
192
|
+
throw new Error(
|
|
193
|
+
`Candidate detector did not return a JSON object.\nRaw response:\n${result.text.slice(0, 1000)}`,
|
|
194
|
+
);
|
|
195
|
+
}
|
|
196
|
+
let parsed: unknown;
|
|
197
|
+
try {
|
|
198
|
+
parsed = JSON.parse(objectText);
|
|
199
|
+
} catch (err) {
|
|
200
|
+
throw new Error(
|
|
201
|
+
`Candidate detector response was not valid JSON: ${err instanceof Error ? err.message : String(err)}\nExtracted:\n${objectText.slice(0, 1000)}`,
|
|
202
|
+
);
|
|
203
|
+
}
|
|
204
|
+
return { detection: validateToolCandidateDetection(parsed), result };
|
|
205
|
+
};
|
|
192
206
|
|
|
193
|
-
let
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
207
|
+
let { detection, result } = await runOnce();
|
|
208
|
+
|
|
209
|
+
// Anti-collapse guard: a single candidate from a session that hit multiple
|
|
210
|
+
// distinct endpoint families is almost always under-segmentation (the
|
|
211
|
+
// detector folded separate tools — e.g. search vs pricing vs autocomplete —
|
|
212
|
+
// into one). This is pure LLM variance; re-run once and keep the richer
|
|
213
|
+
// segmentation. Targeted so genuinely single-tool sites don't pay for it.
|
|
214
|
+
if (detection.candidates.length === 1 && distinctEndpointFamilies(payload) >= 2) {
|
|
215
|
+
log(
|
|
216
|
+
'detector returned 1 candidate but the session spans ≥2 endpoint families — re-running once to guard against under-segmentation…',
|
|
199
217
|
);
|
|
218
|
+
try {
|
|
219
|
+
const retry = await runOnce();
|
|
220
|
+
if (retry.detection.candidates.length > detection.candidates.length) {
|
|
221
|
+
log(`retry segmented into ${retry.detection.candidates.length} candidates; using it`);
|
|
222
|
+
({ detection, result } = retry);
|
|
223
|
+
} else {
|
|
224
|
+
log('retry did not segment further; keeping the original detection');
|
|
225
|
+
}
|
|
226
|
+
} catch (err) {
|
|
227
|
+
log(
|
|
228
|
+
`retry failed (${err instanceof Error ? err.message : String(err)}); keeping original`,
|
|
229
|
+
);
|
|
230
|
+
}
|
|
200
231
|
}
|
|
201
232
|
|
|
202
|
-
const detection = validateToolCandidateDetection(parsed);
|
|
203
233
|
setSpanAttributes(span, {
|
|
204
234
|
'imprint.candidate_count': detection.candidates.length,
|
|
205
235
|
'imprint.primary_tool_name': detection.candidates.find((c) => c.primary)?.toolName,
|
|
@@ -372,6 +402,33 @@ function candidateRequestGroupKey(request: CandidateRequestPayload): unknown[] {
|
|
|
372
402
|
];
|
|
373
403
|
}
|
|
374
404
|
|
|
405
|
+
/** Telemetry / beacon endpoints. These fire constantly during any real session
|
|
406
|
+
* and are never the load-bearing request behind a user intent. Left in the
|
|
407
|
+
* candidate payload they add noise that pushes the detector to under-segment,
|
|
408
|
+
* and — worse — the detector can anchor a candidate's `requestSeqs` on one
|
|
409
|
+
* (e.g. Google's `/log`), sending compile to reverse-engineer a beacon. Excluded
|
|
410
|
+
* entirely. The boundary lookahead keeps `/login`, `/catalog`, etc. safe. */
|
|
411
|
+
const TELEMETRY_PATH =
|
|
412
|
+
/\/(log|gen_204|jserror|ping|beacon|csi|batchlog|metrics|stats|collect|analytics|adsct|pagead|ccm)(?=$|[/?])/i;
|
|
413
|
+
|
|
414
|
+
/** Count distinct endpoint families (batchexecute rpcid, else METHOD+path) that
|
|
415
|
+
* carry a non-trivial number of requests. ≥2 means the session genuinely hit
|
|
416
|
+
* multiple backends — a single detected candidate there signals under-
|
|
417
|
+
* segmentation. */
|
|
418
|
+
function distinctEndpointFamilies(payload: ToolCandidatePayload): number {
|
|
419
|
+
const counts = new Map<string, number>();
|
|
420
|
+
for (const r of payload.requests) {
|
|
421
|
+
const url = safeUrl(r.url);
|
|
422
|
+
if (!url) continue;
|
|
423
|
+
const rpc = /[?&]rpcids?=([^&]+)/.exec(url.search)?.[1];
|
|
424
|
+
const key = rpc ? `rpc:${decodeURIComponent(rpc)}` : `${r.method} ${url.pathname}`;
|
|
425
|
+
counts.set(key, (counts.get(key) ?? 0) + 1);
|
|
426
|
+
}
|
|
427
|
+
let families = 0;
|
|
428
|
+
for (const c of counts.values()) if (c >= 3) families++;
|
|
429
|
+
return families;
|
|
430
|
+
}
|
|
431
|
+
|
|
375
432
|
function isCandidateRequest(
|
|
376
433
|
request: CapturedRequest,
|
|
377
434
|
startRoot: string | null,
|
|
@@ -380,6 +437,7 @@ function isCandidateRequest(
|
|
|
380
437
|
if (request.resourceType !== 'XHR' && request.resourceType !== 'Fetch') return false;
|
|
381
438
|
const url = safeUrl(request.url);
|
|
382
439
|
if (!url) return false;
|
|
440
|
+
if (TELEMETRY_PATH.test(url.pathname)) return false;
|
|
383
441
|
if (startRoot && !isSameRegistrableDomain(url.hostname, startRoot)) {
|
|
384
442
|
return appApiHosts.has(url.hostname);
|
|
385
443
|
}
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-tool planning pass for `imprint teach`.
|
|
3
|
+
*
|
|
4
|
+
* After the global shared-module plan + build (teach-plan.ts) runs once, each
|
|
5
|
+
* tool gets a thin planning stage before its compile (plan THEN execute): one
|
|
6
|
+
* `llm.analyze` pass that maps each parameter to its recorded field, fixes the
|
|
7
|
+
* request construction + response parsing, and names the shared modules to
|
|
8
|
+
* import. The Markdown plan rides the compile agent's initial prompt (via
|
|
9
|
+
* formatToolPlan), so the compile follows it instead of re-deriving structure.
|
|
10
|
+
*
|
|
11
|
+
* Best-effort throughout: a missing prompt, a timeout, or any LLM/IO error
|
|
12
|
+
* yields `undefined` and the compile proceeds exactly as before. Gated by
|
|
13
|
+
* IMPRINT_NO_TOOL_PLAN. Modeled on planSharedModule in prereq-builder.ts.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
17
|
+
import { join as pathJoin } from 'node:path';
|
|
18
|
+
import {
|
|
19
|
+
BuildPlanSchema,
|
|
20
|
+
type SharedModuleManifestEntry,
|
|
21
|
+
planSliceForTool,
|
|
22
|
+
resolveAssignedModules,
|
|
23
|
+
} from './build-plan.ts';
|
|
24
|
+
import { withTimeout } from './concurrency.ts';
|
|
25
|
+
import { type ProviderName, resolveProvider } from './llm.ts';
|
|
26
|
+
import { loadJsonFile } from './load-json.ts';
|
|
27
|
+
import { createLog } from './log.ts';
|
|
28
|
+
import { localToolDir } from './paths.ts';
|
|
29
|
+
import { compactRequestContexts, requestContextDigest } from './request-context.ts';
|
|
30
|
+
import type { SharedCompileContext, ToolCandidate } from './tool-candidates.ts';
|
|
31
|
+
import { setSpanAttributes, traced } from './tracing.ts';
|
|
32
|
+
import { type Session, SessionSchema } from './types.ts';
|
|
33
|
+
|
|
34
|
+
const PROMPTS_DIR = pathJoin(import.meta.dir, '..', '..', 'prompts');
|
|
35
|
+
const log = createLog('tool-plan');
|
|
36
|
+
|
|
37
|
+
/** Wall-clock cap on the per-tool planner LLM call. A throttled/hung provider
|
|
38
|
+
* must not block the tool's compile; on timeout we degrade to compiling without
|
|
39
|
+
* a plan (today's behavior). The shared-module plan is the 10-min one. */
|
|
40
|
+
const TOOL_PLAN_TIMEOUT_MS = 5 * 60_000;
|
|
41
|
+
|
|
42
|
+
const BODY_LIMIT = 800;
|
|
43
|
+
const RESPONSE_PREVIEW_LIMIT = 500;
|
|
44
|
+
const HEADER_LIMIT = 600;
|
|
45
|
+
|
|
46
|
+
interface ToolPlanRequestPayload {
|
|
47
|
+
seq: number;
|
|
48
|
+
method: string;
|
|
49
|
+
url: string;
|
|
50
|
+
status?: number;
|
|
51
|
+
mimeType?: string;
|
|
52
|
+
headers: string;
|
|
53
|
+
body?: string;
|
|
54
|
+
bodyDigest?: string;
|
|
55
|
+
bodyLength?: number;
|
|
56
|
+
responsePreview?: string;
|
|
57
|
+
responseBodyDigest?: string;
|
|
58
|
+
responseBodyLength?: number;
|
|
59
|
+
repeatCount?: number;
|
|
60
|
+
repeatedSeqs?: number[];
|
|
61
|
+
lastTimestamp?: number;
|
|
62
|
+
timestamp: number;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
interface ToolPlanAssignedModule {
|
|
66
|
+
path: string;
|
|
67
|
+
kind: string;
|
|
68
|
+
importPath: string;
|
|
69
|
+
exportSignatures: string[];
|
|
70
|
+
purpose: string;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
interface ToolPlanPayload {
|
|
74
|
+
site: string;
|
|
75
|
+
url: string;
|
|
76
|
+
tool: {
|
|
77
|
+
toolName: string;
|
|
78
|
+
description: string;
|
|
79
|
+
expectedOutput: string;
|
|
80
|
+
likelyParams: ToolCandidate['likelyParams'];
|
|
81
|
+
requestSeqs: number[];
|
|
82
|
+
dependencySeqs: number[];
|
|
83
|
+
};
|
|
84
|
+
sharedContext?: SharedCompileContext;
|
|
85
|
+
/** Slice of the global build plan for this tool (when a build plan exists). */
|
|
86
|
+
planGuidance?: {
|
|
87
|
+
parserGuidance: string;
|
|
88
|
+
paramChecklist: string[];
|
|
89
|
+
authRecipe: unknown;
|
|
90
|
+
loadBearingSeqs: number[];
|
|
91
|
+
};
|
|
92
|
+
assignedModules: ToolPlanAssignedModule[];
|
|
93
|
+
requests: ToolPlanRequestPayload[];
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/** Pure payload builder — unit-testable without an LLM. Filters requests to the
|
|
97
|
+
* tool's relevant seqs (candidate seqs ∪ dependency seqs ∪ build-plan
|
|
98
|
+
* loadBearingSeqs) and compacts them the same way build-plan.ts does. */
|
|
99
|
+
export function buildToolPlanPayload(opts: {
|
|
100
|
+
session: Session;
|
|
101
|
+
candidate: ToolCandidate;
|
|
102
|
+
sharedContext?: SharedCompileContext;
|
|
103
|
+
buildPlan?: unknown;
|
|
104
|
+
sharedModules?: SharedModuleManifestEntry[];
|
|
105
|
+
}): ToolPlanPayload {
|
|
106
|
+
const { session, candidate, sharedContext } = opts;
|
|
107
|
+
|
|
108
|
+
// Project the global build plan (if any) down to this tool's slice + the
|
|
109
|
+
// shared modules it was assigned.
|
|
110
|
+
let planGuidance: ToolPlanPayload['planGuidance'];
|
|
111
|
+
let assignedModules: ToolPlanAssignedModule[] = [];
|
|
112
|
+
let loadBearingSeqs: number[] = [];
|
|
113
|
+
if (opts.buildPlan) {
|
|
114
|
+
const parsed = BuildPlanSchema.safeParse(opts.buildPlan);
|
|
115
|
+
if (parsed.success) {
|
|
116
|
+
const plan = parsed.data;
|
|
117
|
+
const slice = planSliceForTool(plan, candidate.toolName);
|
|
118
|
+
if (slice) {
|
|
119
|
+
planGuidance = {
|
|
120
|
+
parserGuidance: slice.tool.parserGuidance,
|
|
121
|
+
paramChecklist: slice.tool.paramChecklist,
|
|
122
|
+
authRecipe: slice.tool.authRecipe,
|
|
123
|
+
loadBearingSeqs: slice.tool.loadBearingSeqs,
|
|
124
|
+
};
|
|
125
|
+
loadBearingSeqs = slice.tool.loadBearingSeqs;
|
|
126
|
+
}
|
|
127
|
+
assignedModules = resolveAssignedModules(plan, candidate.toolName, opts.sharedModules)
|
|
128
|
+
.filter((m) => m.verified)
|
|
129
|
+
.map((m) => ({
|
|
130
|
+
path: m.path,
|
|
131
|
+
kind: m.kind,
|
|
132
|
+
importPath: m.importPath,
|
|
133
|
+
exportSignatures: m.exportSignatures,
|
|
134
|
+
purpose: m.purpose,
|
|
135
|
+
}));
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const scope = new Set<number>();
|
|
140
|
+
for (const s of candidate.requestSeqs) scope.add(s);
|
|
141
|
+
for (const s of candidate.dependencySeqs) scope.add(s);
|
|
142
|
+
for (const s of loadBearingSeqs) scope.add(s);
|
|
143
|
+
|
|
144
|
+
const requests = compactRequestContexts(
|
|
145
|
+
session.requests
|
|
146
|
+
.filter((r) => scope.has(r.seq))
|
|
147
|
+
.map((r) => ({
|
|
148
|
+
seq: r.seq,
|
|
149
|
+
timestamp: r.timestamp,
|
|
150
|
+
method: r.method,
|
|
151
|
+
url: r.url,
|
|
152
|
+
status: r.response?.status,
|
|
153
|
+
mimeType: r.response?.mimeType,
|
|
154
|
+
headers: truncate(JSON.stringify(r.headers), HEADER_LIMIT) ?? '{}',
|
|
155
|
+
body: truncate(r.body, BODY_LIMIT),
|
|
156
|
+
bodyDigest: requestContextDigest(r.body),
|
|
157
|
+
bodyLength: r.body?.length,
|
|
158
|
+
responsePreview: truncate(r.response?.body, RESPONSE_PREVIEW_LIMIT),
|
|
159
|
+
responseBodyDigest: requestContextDigest(r.response?.body),
|
|
160
|
+
responseBodyLength: r.response?.body?.length,
|
|
161
|
+
})),
|
|
162
|
+
toolPlanRequestGroupKey,
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
site: session.site,
|
|
167
|
+
url: session.url,
|
|
168
|
+
tool: {
|
|
169
|
+
toolName: candidate.toolName,
|
|
170
|
+
description: candidate.description,
|
|
171
|
+
expectedOutput: candidate.expectedOutput,
|
|
172
|
+
likelyParams: candidate.likelyParams,
|
|
173
|
+
requestSeqs: candidate.requestSeqs,
|
|
174
|
+
dependencySeqs: candidate.dependencySeqs,
|
|
175
|
+
},
|
|
176
|
+
sharedContext,
|
|
177
|
+
planGuidance,
|
|
178
|
+
assignedModules,
|
|
179
|
+
requests,
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function toolPlanRequestGroupKey(request: ToolPlanRequestPayload): unknown[] {
|
|
184
|
+
return [
|
|
185
|
+
request.method,
|
|
186
|
+
request.url,
|
|
187
|
+
request.bodyDigest,
|
|
188
|
+
request.bodyLength,
|
|
189
|
+
request.status,
|
|
190
|
+
request.mimeType,
|
|
191
|
+
request.responseBodyDigest,
|
|
192
|
+
request.responseBodyLength,
|
|
193
|
+
];
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/** Derive a per-tool implementation plan from the recording. Best-effort: any
|
|
197
|
+
* error/timeout (or the IMPRINT_NO_TOOL_PLAN gate / a missing prompt) returns
|
|
198
|
+
* undefined so the caller compiles without a plan (today's behavior). Persists
|
|
199
|
+
* the plan to `~/.imprint/<site>/<toolName>/.tool-plan.md`. */
|
|
200
|
+
export async function planToolCompile(opts: {
|
|
201
|
+
site: string;
|
|
202
|
+
toolName: string;
|
|
203
|
+
candidate: ToolCandidate;
|
|
204
|
+
sharedContext?: SharedCompileContext;
|
|
205
|
+
sessionPath: string;
|
|
206
|
+
buildPlanPath?: string;
|
|
207
|
+
sharedModules?: SharedModuleManifestEntry[];
|
|
208
|
+
providerName: ProviderName;
|
|
209
|
+
model?: string;
|
|
210
|
+
}): Promise<string | undefined> {
|
|
211
|
+
if (toolPlanDisabled()) return undefined;
|
|
212
|
+
const promptPath = pathJoin(PROMPTS_DIR, 'tool-planning.md');
|
|
213
|
+
if (!existsSync(promptPath)) return undefined;
|
|
214
|
+
|
|
215
|
+
return await traced(
|
|
216
|
+
'teach.plan_tool',
|
|
217
|
+
'AGENT',
|
|
218
|
+
{
|
|
219
|
+
'imprint.site': opts.site,
|
|
220
|
+
'imprint.tool_name': opts.toolName,
|
|
221
|
+
'imprint.provider': opts.providerName,
|
|
222
|
+
},
|
|
223
|
+
async (span) => {
|
|
224
|
+
try {
|
|
225
|
+
const systemPrompt = readFileSync(promptPath, 'utf8');
|
|
226
|
+
|
|
227
|
+
const session = loadJsonFile(
|
|
228
|
+
opts.sessionPath,
|
|
229
|
+
SessionSchema,
|
|
230
|
+
{
|
|
231
|
+
notFound: 'session not found before tool planning',
|
|
232
|
+
badSchema: 'session file is malformed',
|
|
233
|
+
},
|
|
234
|
+
'session',
|
|
235
|
+
);
|
|
236
|
+
|
|
237
|
+
// Load the global build plan slice (if one exists) so the per-tool plan
|
|
238
|
+
// can carry the tool's parserGuidance/paramChecklist/authRecipe and the
|
|
239
|
+
// shared modules it was assigned.
|
|
240
|
+
let buildPlan: unknown;
|
|
241
|
+
if (opts.buildPlanPath && existsSync(opts.buildPlanPath)) {
|
|
242
|
+
try {
|
|
243
|
+
buildPlan = loadJsonFile(
|
|
244
|
+
opts.buildPlanPath,
|
|
245
|
+
BuildPlanSchema,
|
|
246
|
+
{ notFound: 'build plan not found' },
|
|
247
|
+
'build plan',
|
|
248
|
+
);
|
|
249
|
+
} catch {
|
|
250
|
+
buildPlan = undefined;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
const payload = buildToolPlanPayload({
|
|
255
|
+
session,
|
|
256
|
+
candidate: opts.candidate,
|
|
257
|
+
sharedContext: opts.sharedContext,
|
|
258
|
+
buildPlan,
|
|
259
|
+
sharedModules: opts.sharedModules,
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
const llm = resolveProvider({ provider: opts.providerName, model: opts.model });
|
|
263
|
+
const result = await withTimeout(
|
|
264
|
+
llm.analyze(systemPrompt, payload),
|
|
265
|
+
TOOL_PLAN_TIMEOUT_MS,
|
|
266
|
+
'tool planner',
|
|
267
|
+
);
|
|
268
|
+
const plan = stripCodeFences(result.text).trim();
|
|
269
|
+
if (plan.length === 0) {
|
|
270
|
+
setSpanAttributes(span, { 'imprint.tool_plan.skipped': true });
|
|
271
|
+
return undefined;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const toolDir = localToolDir(opts.site, opts.toolName);
|
|
275
|
+
mkdirSync(toolDir, { recursive: true });
|
|
276
|
+
writeFileSync(pathJoin(toolDir, '.tool-plan.md'), plan, 'utf8');
|
|
277
|
+
|
|
278
|
+
setSpanAttributes(span, {
|
|
279
|
+
'imprint.tool_plan.chars': plan.length,
|
|
280
|
+
'imprint.tool_plan.skipped': false,
|
|
281
|
+
});
|
|
282
|
+
log(`planned ${opts.toolName} (${plan.length} chars)`);
|
|
283
|
+
return plan;
|
|
284
|
+
} catch (err) {
|
|
285
|
+
setSpanAttributes(span, { 'imprint.tool_plan.skipped': true });
|
|
286
|
+
log(
|
|
287
|
+
`tool planning failed for ${opts.toolName} (${err instanceof Error ? err.message : String(err)}) — compiling without a plan`,
|
|
288
|
+
);
|
|
289
|
+
return undefined;
|
|
290
|
+
}
|
|
291
|
+
},
|
|
292
|
+
);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function toolPlanDisabled(): boolean {
|
|
296
|
+
const v = process.env.IMPRINT_NO_TOOL_PLAN;
|
|
297
|
+
return !!v && !['0', 'false', 'no', 'off'].includes(v.toLowerCase());
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/** Unwrap a response whose entire body is a single Markdown code fence; leave
|
|
301
|
+
* inline fences (snippets within the plan) untouched. Mirrors the helper in
|
|
302
|
+
* prereq-builder.ts (not exported there). */
|
|
303
|
+
function stripCodeFences(text: string): string {
|
|
304
|
+
const t = text.trim();
|
|
305
|
+
const m = /^```[a-zA-Z]*\n([\s\S]*?)\n```$/.exec(t);
|
|
306
|
+
return m?.[1] ?? t;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
function truncate(s: string | undefined, limit: number): string | undefined {
|
|
310
|
+
if (!s) return undefined;
|
|
311
|
+
if (s.length <= limit) return s;
|
|
312
|
+
return `${s.slice(0, limit)}…(truncated, original length ${s.length})`;
|
|
313
|
+
}
|
package/src/imprint/tracing.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
1
2
|
import {
|
|
2
3
|
MimeType,
|
|
3
4
|
type NodeTracerProvider,
|
|
@@ -14,6 +15,26 @@ type TraceKind = OpenInferenceSpanKind | `${OpenInferenceSpanKind}`;
|
|
|
14
15
|
type TraceAttributes = Record<string, unknown>;
|
|
15
16
|
type TraceLlmMessage = { role?: string; content?: string };
|
|
16
17
|
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Cost accumulator — rolls up LLM costs from child spans to a parent span.
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
interface CostAccumulator {
|
|
22
|
+
inputTokens: number;
|
|
23
|
+
outputTokens: number;
|
|
24
|
+
cacheReadTokens: number;
|
|
25
|
+
cacheWriteTokens: number;
|
|
26
|
+
uncachedInputCost: number;
|
|
27
|
+
cacheReadCost: number;
|
|
28
|
+
cacheWriteCost: number;
|
|
29
|
+
completionCost: number;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const costAccumulatorStorage = new AsyncLocalStorage<CostAccumulator>();
|
|
33
|
+
|
|
34
|
+
function getActiveCostAccumulator(): CostAccumulator | undefined {
|
|
35
|
+
return costAccumulatorStorage.getStore();
|
|
36
|
+
}
|
|
37
|
+
|
|
17
38
|
let provider: NodeTracerProvider | null = null;
|
|
18
39
|
let attemptedInit = false;
|
|
19
40
|
let suppressInit = false;
|
|
@@ -136,7 +157,29 @@ export function resolveTraceTokenCount(
|
|
|
136
157
|
return { source: 'missing' };
|
|
137
158
|
}
|
|
138
159
|
|
|
160
|
+
/**
|
|
161
|
+
* Total prompt tokens = uncached input + cache reads + cache writes.
|
|
162
|
+
*
|
|
163
|
+
* Providers (Anthropic API and the claude CLI alike) report `usage.input_tokens`
|
|
164
|
+
* as the *uncached* portion only — the cached bulk lives in the separate cache
|
|
165
|
+
* counts. `llmCostAttributes` expects `inputTokens` to be the TOTAL (it
|
|
166
|
+
* re-derives uncached by subtracting the cache split), and `llm.token_count.prompt`
|
|
167
|
+
* should likewise reflect the whole prompt. So every capture boundary normalizes
|
|
168
|
+
* here instead of feeding the bare uncached count (which billed the cached bulk
|
|
169
|
+
* at the full input rate, or mislabeled the token count). Returns null when the
|
|
170
|
+
* uncached count itself is unknown.
|
|
171
|
+
*/
|
|
172
|
+
export function totalPromptTokens(
|
|
173
|
+
uncachedInputTokens: number | null | undefined,
|
|
174
|
+
cacheReadTokens: number | null | undefined,
|
|
175
|
+
cacheWriteTokens: number | null | undefined,
|
|
176
|
+
): number | null {
|
|
177
|
+
if (uncachedInputTokens == null) return null;
|
|
178
|
+
return uncachedInputTokens + (cacheReadTokens ?? 0) + (cacheWriteTokens ?? 0);
|
|
179
|
+
}
|
|
180
|
+
|
|
139
181
|
const DEFAULT_MODEL_RATES: Record<string, { inputUsdPer1M: number; outputUsdPer1M: number }> = {
|
|
182
|
+
'claude-opus-4-8': { inputUsdPer1M: 5, outputUsdPer1M: 25 },
|
|
140
183
|
'claude-opus-4-7': { inputUsdPer1M: 5, outputUsdPer1M: 25 },
|
|
141
184
|
'claude-opus-4-6': { inputUsdPer1M: 5, outputUsdPer1M: 25 },
|
|
142
185
|
'claude-opus-4-5': { inputUsdPer1M: 5, outputUsdPer1M: 25 },
|
|
@@ -230,6 +273,59 @@ export async function traced<T>(
|
|
|
230
273
|
);
|
|
231
274
|
}
|
|
232
275
|
|
|
276
|
+
/**
|
|
277
|
+
* Like `traced`, but accumulates `llm.cost.*` from all descendant LLM spans
|
|
278
|
+
* and sets the rolled-up totals on the parent span when `fn` completes.
|
|
279
|
+
* Use on root spans (`cli.teach`, `cli.audit`) so Phoenix shows the full cost.
|
|
280
|
+
*/
|
|
281
|
+
export async function tracedWithCostRollup<T>(
|
|
282
|
+
name: string,
|
|
283
|
+
kind: TraceKind,
|
|
284
|
+
attributes: TraceAttributes | undefined,
|
|
285
|
+
fn: (span: Span) => Promise<T> | T,
|
|
286
|
+
): Promise<T> {
|
|
287
|
+
const acc: CostAccumulator = {
|
|
288
|
+
inputTokens: 0,
|
|
289
|
+
outputTokens: 0,
|
|
290
|
+
cacheReadTokens: 0,
|
|
291
|
+
cacheWriteTokens: 0,
|
|
292
|
+
uncachedInputCost: 0,
|
|
293
|
+
cacheReadCost: 0,
|
|
294
|
+
cacheWriteCost: 0,
|
|
295
|
+
completionCost: 0,
|
|
296
|
+
};
|
|
297
|
+
|
|
298
|
+
const applyCostRollup = (span: Span): void => {
|
|
299
|
+
const promptCost = acc.uncachedInputCost + acc.cacheReadCost + acc.cacheWriteCost;
|
|
300
|
+
const totalCost = promptCost + acc.completionCost;
|
|
301
|
+
if (totalCost === 0 && acc.inputTokens === 0 && acc.outputTokens === 0) return;
|
|
302
|
+
setSpanAttributes(span, {
|
|
303
|
+
[SemanticConventions.LLM_TOKEN_COUNT_PROMPT]: acc.inputTokens,
|
|
304
|
+
[SemanticConventions.LLM_TOKEN_COUNT_COMPLETION]: acc.outputTokens,
|
|
305
|
+
[SemanticConventions.LLM_TOKEN_COUNT_TOTAL]: acc.inputTokens + acc.outputTokens,
|
|
306
|
+
[SemanticConventions.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ]: acc.cacheReadTokens,
|
|
307
|
+
[SemanticConventions.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE]: acc.cacheWriteTokens,
|
|
308
|
+
[SemanticConventions.LLM_COST_PROMPT]: promptCost,
|
|
309
|
+
[SemanticConventions.LLM_COST_COMPLETION]: acc.completionCost,
|
|
310
|
+
[SemanticConventions.LLM_COST_TOTAL]: totalCost,
|
|
311
|
+
[SemanticConventions.LLM_COST_PROMPT_DETAILS_CACHE_READ]: acc.cacheReadCost,
|
|
312
|
+
[SemanticConventions.LLM_COST_PROMPT_DETAILS_CACHE_WRITE]: acc.cacheWriteCost,
|
|
313
|
+
[SemanticConventions.LLM_COST_INPUT]: acc.uncachedInputCost,
|
|
314
|
+
'imprint.llm.cost_estimated': true,
|
|
315
|
+
});
|
|
316
|
+
};
|
|
317
|
+
|
|
318
|
+
return costAccumulatorStorage.run(acc, () =>
|
|
319
|
+
traced(name, kind, attributes, async (span) => {
|
|
320
|
+
try {
|
|
321
|
+
return await fn(span);
|
|
322
|
+
} finally {
|
|
323
|
+
applyCostRollup(span);
|
|
324
|
+
}
|
|
325
|
+
}),
|
|
326
|
+
);
|
|
327
|
+
}
|
|
328
|
+
|
|
233
329
|
export function startTraceSpan(
|
|
234
330
|
name: string,
|
|
235
331
|
kind: TraceKind,
|
|
@@ -444,23 +540,56 @@ function llmCostAttributes(opts: {
|
|
|
444
540
|
: hasCacheBreakdown
|
|
445
541
|
? Math.max(0, opts.inputTokens - cacheRead - cacheWrite)
|
|
446
542
|
: opts.inputTokens;
|
|
543
|
+
|
|
544
|
+
let uncachedInputCost: number | undefined;
|
|
545
|
+
let cacheReadCost = 0;
|
|
546
|
+
let cacheWriteCost = 0;
|
|
547
|
+
if (uncachedInput !== undefined) {
|
|
548
|
+
if (hasCacheBreakdown) {
|
|
549
|
+
uncachedInputCost = (uncachedInput / 1_000_000) * opts.inputUsdPer1M;
|
|
550
|
+
cacheReadCost = (cacheRead / 1_000_000) * opts.inputUsdPer1M * CACHE_READ_MULTIPLIER;
|
|
551
|
+
cacheWriteCost = (cacheWrite / 1_000_000) * opts.inputUsdPer1M * CACHE_WRITE_MULTIPLIER;
|
|
552
|
+
} else {
|
|
553
|
+
uncachedInputCost = (uncachedInput / 1_000_000) * opts.inputUsdPer1M;
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
|
|
447
557
|
const prompt =
|
|
448
|
-
|
|
558
|
+
uncachedInputCost === undefined
|
|
449
559
|
? undefined
|
|
450
|
-
:
|
|
451
|
-
? (uncachedInput / 1_000_000) * opts.inputUsdPer1M +
|
|
452
|
-
(cacheRead / 1_000_000) * opts.inputUsdPer1M * CACHE_READ_MULTIPLIER +
|
|
453
|
-
(cacheWrite / 1_000_000) * opts.inputUsdPer1M * CACHE_WRITE_MULTIPLIER
|
|
454
|
-
: (uncachedInput / 1_000_000) * opts.inputUsdPer1M;
|
|
560
|
+
: uncachedInputCost + cacheReadCost + cacheWriteCost;
|
|
455
561
|
const completion =
|
|
456
562
|
opts.outputTokens === undefined
|
|
457
563
|
? undefined
|
|
458
564
|
: (opts.outputTokens / 1_000_000) * opts.outputUsdPer1M;
|
|
459
565
|
const total = (prompt ?? 0) + (completion ?? 0);
|
|
566
|
+
|
|
567
|
+
// Roll up into the nearest ancestor tracedWithCostRollup, if any.
|
|
568
|
+
const acc = getActiveCostAccumulator();
|
|
569
|
+
if (acc) {
|
|
570
|
+
acc.inputTokens += opts.inputTokens ?? 0;
|
|
571
|
+
acc.outputTokens += opts.outputTokens ?? 0;
|
|
572
|
+
acc.cacheReadTokens += cacheRead;
|
|
573
|
+
acc.cacheWriteTokens += cacheWrite;
|
|
574
|
+
acc.uncachedInputCost += uncachedInputCost ?? 0;
|
|
575
|
+
acc.cacheReadCost += cacheReadCost;
|
|
576
|
+
acc.cacheWriteCost += cacheWriteCost;
|
|
577
|
+
acc.completionCost += completion ?? 0;
|
|
578
|
+
}
|
|
579
|
+
|
|
460
580
|
return {
|
|
461
581
|
...(prompt !== undefined ? { [SemanticConventions.LLM_COST_PROMPT]: prompt } : {}),
|
|
462
582
|
...(completion !== undefined ? { [SemanticConventions.LLM_COST_COMPLETION]: completion } : {}),
|
|
463
583
|
[SemanticConventions.LLM_COST_TOTAL]: total,
|
|
584
|
+
...(hasCacheBreakdown
|
|
585
|
+
? {
|
|
586
|
+
[SemanticConventions.LLM_COST_PROMPT_DETAILS_CACHE_READ]: cacheReadCost,
|
|
587
|
+
[SemanticConventions.LLM_COST_PROMPT_DETAILS_CACHE_WRITE]: cacheWriteCost,
|
|
588
|
+
[SemanticConventions.LLM_COST_INPUT]: uncachedInputCost,
|
|
589
|
+
[SemanticConventions.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ]: cacheRead,
|
|
590
|
+
[SemanticConventions.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE]: cacheWrite,
|
|
591
|
+
}
|
|
592
|
+
: {}),
|
|
464
593
|
'imprint.llm.cost_estimated': true,
|
|
465
594
|
};
|
|
466
595
|
}
|