imprint-mcp 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/CHANGELOG.md +168 -0
  2. package/LICENSE +21 -0
  3. package/README.md +322 -0
  4. package/examples/discoverandgo/README.md +57 -0
  5. package/examples/discoverandgo/book_discoverandgo_museum_pass/cron.json +8 -0
  6. package/examples/discoverandgo/book_discoverandgo_museum_pass/index.ts +89 -0
  7. package/examples/discoverandgo/book_discoverandgo_museum_pass/workflow.json +39 -0
  8. package/examples/echo/README.md +37 -0
  9. package/examples/echo/echo_test/index.ts +31 -0
  10. package/examples/google-flights/search_google_flights/index.ts +101 -0
  11. package/examples/google-flights/search_google_flights/parser.test.ts +140 -0
  12. package/examples/google-flights/search_google_flights/parser.ts +189 -0
  13. package/examples/google-flights/search_google_flights/playbook.yaml +130 -0
  14. package/examples/google-flights/search_google_flights/workflow.json +48 -0
  15. package/examples/google-hotels/search_google_hotels/index.ts +194 -0
  16. package/examples/google-hotels/search_google_hotels/parser.test.ts +168 -0
  17. package/examples/google-hotels/search_google_hotels/parser.ts +330 -0
  18. package/examples/google-hotels/search_google_hotels/playbook.yaml +125 -0
  19. package/examples/google-hotels/search_google_hotels/workflow.json +111 -0
  20. package/examples/namecheap-domains/search_namecheap_domains/index.ts +144 -0
  21. package/examples/namecheap-domains/search_namecheap_domains/parser.ts +380 -0
  22. package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +50 -0
  23. package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +136 -0
  24. package/examples/namecheap-domains/search_namecheap_domains/workflow.json +97 -0
  25. package/examples/southwest/README.md +81 -0
  26. package/examples/southwest/search_southwest_flights/backends.json +23 -0
  27. package/examples/southwest/search_southwest_flights/cron.json +19 -0
  28. package/examples/southwest/search_southwest_flights/index.ts +110 -0
  29. package/examples/southwest/search_southwest_flights/playbook.yaml +46 -0
  30. package/examples/southwest/search_southwest_flights/workflow.json +54 -0
  31. package/package.json +78 -0
  32. package/prompts/compile-agent.md +580 -0
  33. package/prompts/intent-detection.md +198 -0
  34. package/prompts/playbook-compilation.md +279 -0
  35. package/prompts/request-triage.md +74 -0
  36. package/prompts/tool-candidate-detection.md +104 -0
  37. package/src/cli.ts +1287 -0
  38. package/src/imprint/agent.ts +468 -0
  39. package/src/imprint/app-api-hosts.ts +53 -0
  40. package/src/imprint/backend-ladder.ts +568 -0
  41. package/src/imprint/check.ts +136 -0
  42. package/src/imprint/chromium.ts +211 -0
  43. package/src/imprint/claude-cli-compile.ts +640 -0
  44. package/src/imprint/cli-credential.ts +394 -0
  45. package/src/imprint/codex-cli-compile.ts +712 -0
  46. package/src/imprint/compile-agent-types.ts +40 -0
  47. package/src/imprint/compile-agent.ts +404 -0
  48. package/src/imprint/compile-tools.ts +1389 -0
  49. package/src/imprint/compile.ts +720 -0
  50. package/src/imprint/cookie-jar.ts +246 -0
  51. package/src/imprint/credential-bundle.ts +195 -0
  52. package/src/imprint/credential-extract.ts +290 -0
  53. package/src/imprint/credential-store.ts +707 -0
  54. package/src/imprint/cron.ts +312 -0
  55. package/src/imprint/doctor.ts +223 -0
  56. package/src/imprint/emit.ts +154 -0
  57. package/src/imprint/etld.ts +134 -0
  58. package/src/imprint/freeform-redact.ts +216 -0
  59. package/src/imprint/inject-listener.ts +137 -0
  60. package/src/imprint/install.ts +795 -0
  61. package/src/imprint/integrations.ts +385 -0
  62. package/src/imprint/is-compiled.ts +2 -0
  63. package/src/imprint/json-path.ts +100 -0
  64. package/src/imprint/llm.ts +998 -0
  65. package/src/imprint/load-json.ts +54 -0
  66. package/src/imprint/log.ts +33 -0
  67. package/src/imprint/login.ts +166 -0
  68. package/src/imprint/mcp-compile-server.ts +282 -0
  69. package/src/imprint/mcp-maintenance.ts +1790 -0
  70. package/src/imprint/mcp-server.ts +350 -0
  71. package/src/imprint/multi-progress.ts +69 -0
  72. package/src/imprint/notify.ts +155 -0
  73. package/src/imprint/paths.ts +64 -0
  74. package/src/imprint/playbook-parser.ts +21 -0
  75. package/src/imprint/playbook-runner.ts +465 -0
  76. package/src/imprint/probe-backends.ts +251 -0
  77. package/src/imprint/progress.ts +28 -0
  78. package/src/imprint/record.ts +470 -0
  79. package/src/imprint/redact.ts +550 -0
  80. package/src/imprint/replay-capture.ts +387 -0
  81. package/src/imprint/request-context.ts +66 -0
  82. package/src/imprint/runtime-link.ts +73 -0
  83. package/src/imprint/runtime.ts +942 -0
  84. package/src/imprint/sensitive-keys.ts +156 -0
  85. package/src/imprint/session-diff.ts +409 -0
  86. package/src/imprint/session-merge.ts +198 -0
  87. package/src/imprint/session-writer.ts +149 -0
  88. package/src/imprint/sites.ts +27 -0
  89. package/src/imprint/stealth-fetch.ts +434 -0
  90. package/src/imprint/teach-state.ts +235 -0
  91. package/src/imprint/teach.ts +2120 -0
  92. package/src/imprint/tool-candidates.ts +423 -0
  93. package/src/imprint/tool-loader.ts +186 -0
  94. package/src/imprint/tool-selection.ts +70 -0
  95. package/src/imprint/tracing.ts +508 -0
  96. package/src/imprint/types.ts +472 -0
  97. package/src/imprint/version.ts +21 -0
@@ -0,0 +1,720 @@
1
+ /**
2
+ * One recording compiles to two artifacts: workflow.json (API-replay)
3
+ * and playbook.yaml (DOM-replay). Both share the same skeleton —
4
+ * read session, redact-if-needed, slim, call LLM, parse, validate,
5
+ * write next to the session — so they live in one file with the
6
+ * differences (slim strategy, prompt, parser, schema, output filename)
7
+ * factored into a CompileTask config.
8
+ */
9
+
10
+ import {
11
+ existsSync,
12
+ mkdirSync,
13
+ readFileSync,
14
+ readdirSync,
15
+ renameSync,
16
+ statSync,
17
+ writeFileSync,
18
+ } from 'node:fs';
19
+ import { dirname, join as pathJoin } from 'node:path';
20
+ import type { OnDeadlineReached } from './agent.ts';
21
+ import { inferAppApiHosts } from './app-api-hosts.ts';
22
+ import { type CompileAgentProgress, compileAgent } from './compile-agent.ts';
23
+ import { isSameRegistrableDomain, registrableDomain } from './etld.ts';
24
+ import { type LLMOptions, extractJsonArray, resolveProvider } from './llm.ts';
25
+ import { loadJsonFile } from './load-json.ts';
26
+ import { createLog } from './log.ts';
27
+ import { imprintHomeDir, localSiteDir, localToolDir } from './paths.ts';
28
+ import { parsePlaybook } from './playbook-parser.ts';
29
+ import { redactSession } from './redact.ts';
30
+ import { compactRequestContexts, requestContextDigest } from './request-context.ts';
31
+ import { ensureImprintRuntimeLink } from './runtime-link.ts';
32
+ import type { ClassifiedValue } from './session-diff.ts';
33
+ import type { SharedCompileContext, ToolCandidate } from './tool-candidates.ts';
34
+ import { setSpanAttributes, traced } from './tracing.ts';
35
+ import {
36
+ type Playbook,
37
+ type Session,
38
+ SessionSchema,
39
+ type Workflow,
40
+ WorkflowSchema,
41
+ } from './types.ts';
42
+
43
+ export type { CompileAgentProgress } from './compile-agent.ts';
44
+
45
+ const PROMPTS_DIR = pathJoin(import.meta.dir, '..', '..', 'prompts');
46
+ const log = createLog('compile');
47
+
48
+ interface CompileOptions {
49
+ /** Path to session.json or session.redacted.json */
50
+ sessionPath: string;
51
+ /** Where to write the artifact. Defaults to the generated tool directory. */
52
+ outPath?: string;
53
+ /** Override LLM config (region, model, project). */
54
+ llmConfig?: LLMOptions;
55
+ /** If true, send the FULL session to the LLM (don't shrink). Useful for
56
+ * debugging when shrinking might be over-aggressive. Default false. */
57
+ noShrink?: boolean;
58
+ /** Candidate-specific compile scope for multi-tool teach. */
59
+ candidate?: ToolCandidate;
60
+ /** Shared auth/helper guidance generated once for a multi-tool teach run. */
61
+ sharedContext?: SharedCompileContext;
62
+ /** Pre-computed triage result from a shared pass. When set, compilePlaybook
63
+ * skips its own triageRequests() LLM call and merges the shared selectedSeqs
64
+ * with any per-tool preserveSeqs locally. */
65
+ preTriagedSession?: TriageResult;
66
+ }
67
+
68
+ // ─── generate (workflow.json) ────────────────────────────────────────────────
69
+
70
+ interface GenerateOptions extends CompileOptions {
71
+ /** Hard wall-clock budget for the agent. Default 30 minutes. */
72
+ maxDurationMs?: number;
73
+ /** Progress callback with verification cycle information. */
74
+ onProgress?: (p: CompileAgentProgress) => void;
75
+ /** Called when wall-clock deadline is reached; return ms to extend or null to time out. */
76
+ onDeadlineReached?: OnDeadlineReached;
77
+ /** Retain parser.test.ts after successful verification. */
78
+ keepTest?: boolean;
79
+ /** Directory where workflow.json/parser.ts/parser.test.ts are written. */
80
+ outDir?: string;
81
+ /** Dual-pass value classifications from replay-and-diff. */
82
+ classifications?: ClassifiedValue[];
83
+ /** Credential values extracted during teach, passed to integration tests via env var. */
84
+ teachCredentials?: { site: string; values: Record<string, string> };
85
+ }
86
+
87
+ interface GenerateResult {
88
+ workflow: Workflow;
89
+ workflowPath: string;
90
+ /** Number of requests the LLM saw (after shrinking). */
91
+ requestsSent: number;
92
+ /** Original count before shrinking. */
93
+ requestsOriginal: number;
94
+ inputTokens: number | null;
95
+ outputTokens: number | null;
96
+ durationMs: number;
97
+ }
98
+
99
+ export async function generate(opts: GenerateOptions): Promise<GenerateResult> {
100
+ return await traced(
101
+ 'compile.generate',
102
+ 'AGENT',
103
+ {
104
+ 'imprint.session_path': opts.sessionPath,
105
+ 'imprint.provider': opts.llmConfig?.provider ?? 'auto',
106
+ 'imprint.tool_name': opts.candidate?.toolName,
107
+ 'imprint.out_path': opts.outPath,
108
+ 'imprint.out_dir': opts.outDir,
109
+ },
110
+ async (span) => {
111
+ ensureImprintRuntimeLink(imprintHomeDir());
112
+ const outDir = opts.outDir ?? (opts.outPath ? dirname(opts.outPath) : undefined);
113
+ const result = await compileAgent({
114
+ sessionPath: opts.sessionPath,
115
+ maxDurationMs: opts.maxDurationMs,
116
+ llmConfig: opts.llmConfig,
117
+ onProgress: opts.onProgress,
118
+ onDeadlineReached: opts.onDeadlineReached,
119
+ keepTest: opts.keepTest,
120
+ outDir,
121
+ candidate: opts.candidate,
122
+ sharedContext: opts.sharedContext,
123
+ classifications: opts.classifications,
124
+ teachCredentials: opts.teachCredentials,
125
+ });
126
+
127
+ setSpanAttributes(span, {
128
+ 'imprint.compile.outcome': result.outcome,
129
+ 'imprint.compile.turns': result.turns,
130
+ 'imprint.compile.duration_ms': result.durationMs,
131
+ 'imprint.compile.input_tokens': result.inputTokens,
132
+ 'imprint.compile.output_tokens': result.outputTokens,
133
+ 'imprint.compile.cache_read_input_tokens': result.cacheReadInputTokens,
134
+ 'imprint.compile.cache_creation_input_tokens': result.cacheCreationInputTokens,
135
+ 'imprint.compile.conversation_log': result.conversationLogPath,
136
+ });
137
+
138
+ if (!result.success) {
139
+ const lines = [
140
+ 'compile agent did not produce a verified workflow.',
141
+ `outcome: ${result.outcome}`,
142
+ `message: ${result.message}`,
143
+ `turns: ${result.turns}, duration: ${(result.durationMs / 1000).toFixed(1)}s`,
144
+ `conversation log: ${result.conversationLogPath}`,
145
+ ];
146
+ if (result.outcome === 'timeout') {
147
+ lines.push(
148
+ 'hint: increase the timeout with --timeout (teach) or --max-duration (generate)',
149
+ );
150
+ }
151
+ throw new Error(lines.join('\n'));
152
+ }
153
+
154
+ // Load the agent-written workflow.json from disk and validate.
155
+ if (!result.workflowPath) {
156
+ throw new Error('compile agent reported success but no workflowPath');
157
+ }
158
+ const workflow = loadJsonFile(
159
+ result.workflowPath,
160
+ WorkflowSchema,
161
+ {
162
+ notFound: 'compile agent reported success but workflow.json missing',
163
+ badSchema: 'compile agent wrote an invalid workflow.json',
164
+ },
165
+ 'workflow',
166
+ );
167
+ let workflowPath = opts.outPath ?? result.workflowPath;
168
+ if (!opts.outDir && !opts.outPath) {
169
+ workflowPath = relocateGeneratedWorkflow(result.workflowPath, workflow);
170
+ }
171
+ if (opts.outPath && opts.outPath !== result.workflowPath) {
172
+ writeFileSync(opts.outPath, `${JSON.stringify(workflow, null, 2)}\n`, 'utf8');
173
+ }
174
+
175
+ setSpanAttributes(span, {
176
+ 'imprint.workflow_path': workflowPath,
177
+ 'imprint.workflow_tool_name': workflow.toolName,
178
+ });
179
+
180
+ return {
181
+ workflow,
182
+ workflowPath,
183
+ requestsSent: 0, // legacy field — no longer meaningful for agentic compile
184
+ requestsOriginal: 0, // legacy field
185
+ inputTokens: result.inputTokens,
186
+ outputTokens: result.outputTokens,
187
+ durationMs: result.durationMs,
188
+ };
189
+ },
190
+ );
191
+ }
192
+
193
+ function relocateGeneratedWorkflow(workflowPath: string, workflow: Workflow): string {
194
+ const sourceDir = dirname(workflowPath);
195
+ const finalDir = localToolDir(workflow.site, workflow.toolName);
196
+ if (sourceDir === finalDir) return workflowPath;
197
+ mkdirSync(finalDir, { recursive: true });
198
+ for (const artifact of [
199
+ 'workflow.json',
200
+ 'parser.ts',
201
+ 'parser.test.ts',
202
+ '.compile-log.json',
203
+ '.compile-done.json',
204
+ '.compile-give-up.json',
205
+ ]) {
206
+ const source = pathJoin(sourceDir, artifact);
207
+ if (!existsSync(source)) continue;
208
+ renameSync(source, pathJoin(finalDir, artifact));
209
+ }
210
+ return pathJoin(finalDir, 'workflow.json');
211
+ }
212
+
213
+ /**
214
+ * Drop request noise before sending to the LLM. Modern SPAs load 500-1000
215
+ * requests per page, 80% of which are JS bundles, ad pixels, third-party
216
+ * trackers, and font/image assets. Without aggressive shrinking the
217
+ * redacted session easily blows past 10M tokens.
218
+ *
219
+ * Two rules:
220
+ * 1. Same-origin only. Anything not under the start URL's root domain
221
+ * is presumed third-party noise. Workflows that legitimately call
222
+ * out to a different domain (e.g., a login redirect to an SSO
223
+ * provider) should pass `--no-shrink`.
224
+ * 2. Drop NOISE_RESOURCE_TYPES. Scripts and assets balloon the prompt
225
+ * without informing codegen — what matters is the API surface
226
+ * (XHR/Fetch/Document), not the JS that drove it.
227
+ *
228
+ * Net effect on Southwest: 813 → 34 requests, 6.5M → 0.3M tokens.
229
+ */
230
+ export function shrinkSession(session: Session): Session {
231
+ const startUrl = safeUrl(session.url);
232
+ const startRoot = startUrl ? registrableDomain(startUrl.hostname) : null;
233
+ const appApiHosts = inferAppApiHosts(session, startRoot);
234
+
235
+ const NOISE_RESOURCE_TYPES = new Set([
236
+ 'Image',
237
+ 'Font',
238
+ 'Stylesheet',
239
+ 'Media',
240
+ 'Manifest',
241
+ 'Other',
242
+ 'Script', // JS bundles — huge and never load-bearing for codegen
243
+ 'Ping', // beacons — by definition fire-and-forget telemetry
244
+ 'Preflight', // CORS preflights — runtime replays them automatically
245
+ ]);
246
+
247
+ const shrunkRequests = session.requests.filter((r) => {
248
+ const url = safeUrl(r.url);
249
+ if (!url) return false;
250
+ if (NOISE_RESOURCE_TYPES.has(r.resourceType)) return false;
251
+ if (
252
+ startRoot &&
253
+ !isSameRegistrableDomain(url.hostname, startRoot) &&
254
+ !appApiHosts.has(url.hostname)
255
+ )
256
+ return false;
257
+ return true;
258
+ });
259
+
260
+ return { ...session, requests: shrunkRequests };
261
+ }
262
+
263
+ function safeUrl(s: string): URL | null {
264
+ try {
265
+ return new URL(s);
266
+ } catch {
267
+ return null;
268
+ }
269
+ }
270
+
271
+ // ─── triageRequests (LLM-based request filtering) ───────────────────────────
272
+
273
+ const TRIAGE_RESOURCE_TYPES = new Set(['XHR', 'Fetch', 'Document']);
274
+ const HEADER_TRUNCATE_LIMIT = 200;
275
+ // Per-request body cap for triage. Triage only needs enough body to distinguish
276
+ // data-bearing POSTs (search/booking) from telemetry; full bodies on a busy
277
+ // site can total >1MB and blow the 200K-token cap on `claude-opus-4-7`.
278
+ const TRIAGE_BODY_LIMIT = 500;
279
+
280
+ export interface TriageResult {
281
+ session: Session;
282
+ selectedSeqs: number[];
283
+ consideredCount: number;
284
+ inputTokens: number | null;
285
+ outputTokens: number | null;
286
+ durationMs: number;
287
+ }
288
+
289
+ interface TriageRequestContext {
290
+ seq: number;
291
+ timestamp: number;
292
+ method: string;
293
+ url: string;
294
+ resourceType: string;
295
+ status?: number;
296
+ mimeType?: string;
297
+ headers: string;
298
+ body?: string;
299
+ bodyDigest?: string;
300
+ bodyLength?: number;
301
+ responseBodyDigest?: string;
302
+ responseBodyLength?: number;
303
+ repeatCount?: number;
304
+ repeatedSeqs?: number[];
305
+ lastTimestamp?: number;
306
+ }
307
+
308
+ export async function triageRequests(
309
+ session: Session,
310
+ llmConfig?: LLMOptions,
311
+ context: Pick<CompileOptions, 'candidate' | 'sharedContext'> = {},
312
+ ): Promise<TriageResult> {
313
+ const preserveSeqs = new Set([
314
+ ...(context.candidate?.requestSeqs ?? []),
315
+ ...(context.candidate?.dependencySeqs ?? []),
316
+ ...(context.sharedContext?.loginRequestSeqs ?? []),
317
+ ]);
318
+ const candidates = session.requests.filter(
319
+ (r) => TRIAGE_RESOURCE_TYPES.has(r.resourceType) || preserveSeqs.has(r.seq),
320
+ );
321
+
322
+ return await traced(
323
+ 'compile.triage_requests',
324
+ 'RETRIEVER',
325
+ {
326
+ 'imprint.site': session.site,
327
+ 'imprint.requests_total': session.requests.length,
328
+ 'imprint.requests_considered': candidates.length,
329
+ 'imprint.provider': llmConfig?.provider ?? 'auto',
330
+ },
331
+ async (span) => {
332
+ const compacted = compactRequestContexts(
333
+ candidates.map((r) => ({
334
+ seq: r.seq,
335
+ timestamp: r.timestamp,
336
+ method: r.method,
337
+ url: r.url,
338
+ resourceType: r.resourceType,
339
+ status: r.response?.status,
340
+ mimeType: r.response?.mimeType,
341
+ headers: truncateHeaders(r.headers),
342
+ body: truncate(r.body, TRIAGE_BODY_LIMIT),
343
+ bodyDigest: requestContextDigest(r.body),
344
+ bodyLength: r.body?.length,
345
+ responseBodyDigest: requestContextDigest(r.response?.body),
346
+ responseBodyLength: r.response?.body?.length,
347
+ })),
348
+ triageRequestGroupKey,
349
+ { preserveSeqs },
350
+ );
351
+ // Strip digest/length fields the LLM doesn't use — they served compaction only
352
+ const metadata = compacted.map(
353
+ ({ bodyDigest, responseBodyDigest, bodyLength, responseBodyLength, ...rest }) => rest,
354
+ );
355
+
356
+ const triagePayload = {
357
+ site: session.site,
358
+ url: session.url,
359
+ narration: session.narration,
360
+ requests: metadata,
361
+ };
362
+
363
+ const promptPath = pathJoin(PROMPTS_DIR, 'request-triage.md');
364
+ if (!existsSync(promptPath)) {
365
+ throw new Error(
366
+ `Triage prompt not found at ${promptPath}\n→ this is an Imprint installation problem.`,
367
+ );
368
+ }
369
+ const systemPrompt = readFileSync(promptPath, 'utf8');
370
+
371
+ log(
372
+ `triaging ${metadata.length} compacted requests (from ${candidates.length} candidates / ${session.requests.length} total)…`,
373
+ );
374
+ const llm = resolveProvider(llmConfig ?? {});
375
+ const result = await llm.analyze(systemPrompt, triagePayload);
376
+
377
+ const arrayText = extractJsonArray(result.text);
378
+ if (!arrayText) {
379
+ throw new Error(
380
+ `Triage LLM did not return a JSON array.\nRaw response:\n${result.text.slice(0, 1000)}`,
381
+ );
382
+ }
383
+
384
+ let seqs: unknown;
385
+ try {
386
+ seqs = JSON.parse(arrayText);
387
+ } catch (err) {
388
+ throw new Error(
389
+ `Triage response was not valid JSON: ${err instanceof Error ? err.message : String(err)}\nExtracted:\n${arrayText.slice(0, 500)}`,
390
+ );
391
+ }
392
+
393
+ if (!Array.isArray(seqs) || !seqs.every((s) => typeof s === 'number')) {
394
+ throw new Error(
395
+ `Triage response is not an array of numbers.\nParsed: ${JSON.stringify(seqs).slice(0, 500)}`,
396
+ );
397
+ }
398
+
399
+ const selectedSet = new Set([...(seqs as number[]), ...preserveSeqs]);
400
+ const triaged: Session = {
401
+ ...session,
402
+ requests: session.requests.filter((r) => selectedSet.has(r.seq)),
403
+ };
404
+
405
+ log(`triage selected ${selectedSet.size} requests out of ${candidates.length} candidates`);
406
+
407
+ setSpanAttributes(span, {
408
+ 'imprint.requests_compacted': metadata.length,
409
+ 'imprint.requests_selected': selectedSet.size,
410
+ 'imprint.triage.duration_ms': result.durationMs,
411
+ 'imprint.triage.input_tokens': result.inputTokens,
412
+ 'imprint.triage.output_tokens': result.outputTokens,
413
+ });
414
+
415
+ return {
416
+ session: triaged,
417
+ selectedSeqs: [...selectedSet],
418
+ consideredCount: candidates.length,
419
+ inputTokens: result.inputTokens,
420
+ outputTokens: result.outputTokens,
421
+ durationMs: result.durationMs,
422
+ };
423
+ },
424
+ );
425
+ }
426
+
427
+ function triageRequestGroupKey(request: TriageRequestContext): unknown[] {
428
+ let urlKey: string = request.url;
429
+ let paramSignature = '';
430
+ try {
431
+ const parsed = new URL(request.url);
432
+ urlKey = `${parsed.hostname}${parsed.pathname}`;
433
+ // Include sorted query parameter names so requests with different
434
+ // parameter signatures are grouped separately (e.g., a config fetch
435
+ // vs a lookup endpoint that shares the same pathname but adds a
436
+ // filter/query param). Cap at 10 params — URLs with more are
437
+ // typically analytics/telemetry where slight param-set variation
438
+ // should not prevent compaction.
439
+ const paramNames = [...new Set(parsed.searchParams.keys())].sort();
440
+ if (paramNames.length > 0 && paramNames.length <= 10) {
441
+ paramSignature = paramNames.join(',');
442
+ }
443
+ } catch {
444
+ // keep full url as fallback
445
+ }
446
+ return [
447
+ request.method,
448
+ urlKey,
449
+ paramSignature,
450
+ request.resourceType,
451
+ request.status,
452
+ request.mimeType,
453
+ request.bodyDigest,
454
+ ];
455
+ }
456
+
457
+ function truncateHeaders(headers: Record<string, string>): string {
458
+ const serialized = JSON.stringify(headers);
459
+ if (serialized.length <= HEADER_TRUNCATE_LIMIT) return serialized;
460
+ return `${serialized.slice(0, HEADER_TRUNCATE_LIMIT)}…`;
461
+ }
462
+
463
+ // ─── compilePlaybook (playbook.yaml) ─────────────────────────────────────────
464
+
465
+ interface CompilePlaybookResult {
466
+ playbook: Playbook;
467
+ playbookPath: string;
468
+ inputTokens: number | null;
469
+ outputTokens: number | null;
470
+ durationMs: number;
471
+ }
472
+
473
+ const RESPONSE_BODY_LIMIT = 4000;
474
+
475
+ export function defaultCompilePlaybookPath(site: string, toolName: string): string {
476
+ return pathJoin(localToolDir(site, toolName), 'playbook.yaml');
477
+ }
478
+
479
+ export function resolveDefaultCompilePlaybookPath(site: string, playbookToolName: string): string {
480
+ const toolNames = existingWorkflowToolNames(site);
481
+ if (toolNames.length === 0 || toolNames.includes(playbookToolName)) {
482
+ return defaultCompilePlaybookPath(site, playbookToolName);
483
+ }
484
+ if (toolNames.length === 1) {
485
+ const toolName = toolNames[0] ?? playbookToolName;
486
+ throw new Error(
487
+ [
488
+ `compiled playbook toolName "${playbookToolName}" does not match the generated workflow "${toolName}" for site "${site}".`,
489
+ `→ rerun compile-playbook with --out ${defaultCompilePlaybookPath(site, toolName)}`,
490
+ ].join('\n'),
491
+ );
492
+ }
493
+ throw new Error(
494
+ [
495
+ `compiled playbook toolName "${playbookToolName}" does not match any generated workflow for site "${site}".`,
496
+ `Generated workflows: ${toolNames.join(', ')}`,
497
+ `→ rerun compile-playbook with --out ~/.imprint/${site}/<toolName>/playbook.yaml`,
498
+ ].join('\n'),
499
+ );
500
+ }
501
+
502
+ function existingWorkflowToolNames(site: string): string[] {
503
+ const siteDir = localSiteDir(site);
504
+ if (!existsSync(siteDir)) return [];
505
+ const out: string[] = [];
506
+ for (const entry of readdirSync(siteDir)) {
507
+ const dir = pathJoin(siteDir, entry);
508
+ try {
509
+ if (!statSync(dir).isDirectory()) continue;
510
+ } catch {
511
+ continue;
512
+ }
513
+ if (existsSync(pathJoin(dir, 'workflow.json'))) out.push(entry);
514
+ }
515
+ return out.sort();
516
+ }
517
+
518
+ export async function compilePlaybook(opts: CompileOptions): Promise<CompilePlaybookResult> {
519
+ return await traced(
520
+ 'compile.playbook',
521
+ 'CHAIN',
522
+ {
523
+ 'imprint.session_path': opts.sessionPath,
524
+ 'imprint.provider': opts.llmConfig?.provider ?? 'auto',
525
+ 'imprint.tool_name': opts.candidate?.toolName,
526
+ 'imprint.out_path': opts.outPath,
527
+ 'imprint.no_shrink': opts.noShrink ?? false,
528
+ },
529
+ async (span) => {
530
+ const result = await compilePlaybookImpl(opts);
531
+ setSpanAttributes(span, {
532
+ 'imprint.playbook_path': result.playbookPath,
533
+ 'imprint.playbook_tool_name': result.playbook.toolName,
534
+ 'imprint.playbook.duration_ms': result.durationMs,
535
+ 'imprint.playbook.input_tokens': result.inputTokens,
536
+ 'imprint.playbook.output_tokens': result.outputTokens,
537
+ });
538
+ return result;
539
+ },
540
+ );
541
+ }
542
+
543
+ async function compilePlaybookImpl(opts: CompileOptions): Promise<CompilePlaybookResult> {
544
+ // 1. Load session.
545
+ let session: Session = loadJsonFile(
546
+ opts.sessionPath,
547
+ SessionSchema,
548
+ {
549
+ notFound: '→ run `imprint record <site>` to create one.',
550
+ notJson: `→ if it's a partial .jsonl, run \`imprint assemble ${opts.sessionPath}\` first.`,
551
+ badSchema: '→ check the file came from `imprint record`.',
552
+ },
553
+ 'session',
554
+ );
555
+
556
+ // 2. Auto-redact if needed.
557
+ const looksRedacted = JSON.stringify(session).includes('[REDACTED:');
558
+ if (!looksRedacted) {
559
+ const r = redactSession(session);
560
+ session = r.session;
561
+ if (r.stats.totalRedactions > 0) {
562
+ const freeformNote =
563
+ r.stats.freeformRedactions > 0
564
+ ? ` (${r.stats.freeformRedactions} free-form finding(s))`
565
+ : '';
566
+ log(`redacted ${r.stats.totalRedactions} value(s)${freeformNote} before sending to LLM`);
567
+ }
568
+ }
569
+
570
+ // 3. Triage: LLM selects which requests matter.
571
+ let triageTokens: { input: number | null; output: number | null; durationMs: number } = {
572
+ input: null,
573
+ output: null,
574
+ durationMs: 0,
575
+ };
576
+ if (opts.preTriagedSession && !opts.noShrink) {
577
+ // Shared triage path: merge pre-computed seqs with candidate-specific preserveSeqs
578
+ const preserveSeqs = new Set([
579
+ ...(opts.candidate?.requestSeqs ?? []),
580
+ ...(opts.candidate?.dependencySeqs ?? []),
581
+ ...(opts.sharedContext?.loginRequestSeqs ?? []),
582
+ ]);
583
+ const finalSeqs = new Set([...opts.preTriagedSession.selectedSeqs, ...preserveSeqs]);
584
+ session = {
585
+ ...session,
586
+ requests: session.requests.filter((r) => finalSeqs.has(r.seq)),
587
+ };
588
+ log('using shared triage result (skipping per-tool triage LLM call)');
589
+ triageTokens = {
590
+ input: opts.preTriagedSession.inputTokens,
591
+ output: opts.preTriagedSession.outputTokens,
592
+ durationMs: opts.preTriagedSession.durationMs,
593
+ };
594
+ } else if (!opts.noShrink) {
595
+ const triage = await triageRequests(session, opts.llmConfig, {
596
+ candidate: opts.candidate,
597
+ sharedContext: opts.sharedContext,
598
+ });
599
+ session = triage.session;
600
+ triageTokens = {
601
+ input: triage.inputTokens,
602
+ output: triage.outputTokens,
603
+ durationMs: triage.durationMs,
604
+ };
605
+ }
606
+
607
+ // 4. Build slim payload from triaged requests (with response bodies).
608
+ const xhrs = session.requests
609
+ .filter(
610
+ (r) =>
611
+ r.resourceType === 'XHR' || r.resourceType === 'Fetch' || r.resourceType === 'Document',
612
+ )
613
+ .map((r) => ({
614
+ seq: r.seq,
615
+ timestamp: r.timestamp,
616
+ method: r.method,
617
+ url: r.url,
618
+ resourceType: r.resourceType,
619
+ status: r.response?.status,
620
+ response_body: truncate(r.response?.body, RESPONSE_BODY_LIMIT),
621
+ }));
622
+
623
+ log(
624
+ `compiling playbook from ${session.events.length} events / ${xhrs.length} XHRs / ${session.narration.length} narration lines…`,
625
+ );
626
+
627
+ const slimmed = {
628
+ site: session.site,
629
+ url: session.url,
630
+ candidate: opts.candidate,
631
+ sharedContext: opts.sharedContext,
632
+ narration: session.narration,
633
+ events: session.events,
634
+ requests: xhrs,
635
+ };
636
+
637
+ // 5. Main compilation LLM call.
638
+ const promptPath = pathJoin(PROMPTS_DIR, 'playbook-compilation.md');
639
+ if (!existsSync(promptPath)) {
640
+ throw new Error(
641
+ `Prompt not found at ${promptPath}\n→ this is an Imprint installation problem.`,
642
+ );
643
+ }
644
+ const systemPrompt = `${readFileSync(promptPath, 'utf8')}${
645
+ opts.candidate
646
+ ? `\n\nCandidate scope:\nCompile only this candidate: ${JSON.stringify(opts.candidate, null, 2)}\nShared context: ${JSON.stringify(opts.sharedContext ?? {}, null, 2)}\nThe playbook toolName and parameters must match the selected candidate/workflow, not any other action in the recording.\n`
647
+ : ''
648
+ }`;
649
+
650
+ const llm = resolveProvider(opts.llmConfig ?? {});
651
+
652
+ let playbook: Playbook | undefined;
653
+ let lastResult = await llm.analyze(systemPrompt, slimmed);
654
+ let llmInputTokens = lastResult.inputTokens;
655
+ let llmOutputTokens = lastResult.outputTokens;
656
+ let llmDurationMs = lastResult.durationMs;
657
+ let lastErr: unknown;
658
+ for (let attempt = 0; attempt < 2; attempt++) {
659
+ try {
660
+ playbook = parsePlaybook(stripCodeFences(lastResult.text).trim());
661
+ lastErr = undefined;
662
+ break;
663
+ } catch (err) {
664
+ lastErr = err;
665
+ if (attempt === 0) {
666
+ log('playbook YAML failed to parse, retrying with error feedback…');
667
+ const fixPrompt = `Your previous output was invalid YAML. The parser error was:\n\n${err instanceof Error ? err.message : String(err)}\n\nFix the YAML and return the corrected playbook. Output ONLY valid YAML, no prose.`;
668
+ lastResult = await llm.analyze(systemPrompt, `${JSON.stringify(slimmed)}\n\n${fixPrompt}`);
669
+ llmInputTokens = addNullable(llmInputTokens, lastResult.inputTokens);
670
+ llmOutputTokens = addNullable(llmOutputTokens, lastResult.outputTokens);
671
+ llmDurationMs += lastResult.durationMs;
672
+ }
673
+ }
674
+ }
675
+ if (lastErr) {
676
+ throw new Error(
677
+ `Compiled playbook failed to parse: ${lastErr instanceof Error ? lastErr.message : String(lastErr)}\nRaw output:\n${lastResult.text.slice(0, 1500)}`,
678
+ );
679
+ }
680
+ if (!playbook) {
681
+ throw new Error('Playbook was not assigned after compile loop — this should not happen.');
682
+ }
683
+
684
+ if (opts.candidate && playbook.toolName !== opts.candidate.toolName) {
685
+ throw new Error(
686
+ `Compiled playbook toolName "${playbook.toolName}" does not match selected candidate "${opts.candidate.toolName}".`,
687
+ );
688
+ }
689
+
690
+ const outPath =
691
+ opts.outPath ?? resolveDefaultCompilePlaybookPath(session.site, playbook.toolName);
692
+ mkdirSync(dirname(outPath), { recursive: true });
693
+ writeFileSync(outPath, `${stripCodeFences(lastResult.text).trim()}\n`);
694
+
695
+ return {
696
+ playbook,
697
+ playbookPath: outPath,
698
+ inputTokens: addNullable(triageTokens.input, llmInputTokens),
699
+ outputTokens: addNullable(triageTokens.output, llmOutputTokens),
700
+ durationMs: triageTokens.durationMs + llmDurationMs,
701
+ };
702
+ }
703
+
704
+ function addNullable(a: number | null, b: number | null): number | null {
705
+ if (a === null && b === null) return null;
706
+ return (a ?? 0) + (b ?? 0);
707
+ }
708
+
709
+ function truncate(s: string | undefined, limit: number): string | undefined {
710
+ if (!s) return undefined;
711
+ if (s.length <= limit) return s;
712
+ return `${s.slice(0, limit)}…(truncated, original length ${s.length})`;
713
+ }
714
+
715
+ function stripCodeFences(s: string): string {
716
+ const trimmed = s.trim();
717
+ const fenced = trimmed.match(/^```(?:\w+)?\n([\s\S]*?)\n```$/);
718
+ if (fenced?.[1]) return fenced[1];
719
+ return trimmed;
720
+ }