imprint-mcp 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +168 -0
- package/LICENSE +21 -0
- package/README.md +322 -0
- package/examples/discoverandgo/README.md +57 -0
- package/examples/discoverandgo/book_discoverandgo_museum_pass/cron.json +8 -0
- package/examples/discoverandgo/book_discoverandgo_museum_pass/index.ts +89 -0
- package/examples/discoverandgo/book_discoverandgo_museum_pass/workflow.json +39 -0
- package/examples/echo/README.md +37 -0
- package/examples/echo/echo_test/index.ts +31 -0
- package/examples/google-flights/search_google_flights/index.ts +101 -0
- package/examples/google-flights/search_google_flights/parser.test.ts +140 -0
- package/examples/google-flights/search_google_flights/parser.ts +189 -0
- package/examples/google-flights/search_google_flights/playbook.yaml +130 -0
- package/examples/google-flights/search_google_flights/workflow.json +48 -0
- package/examples/google-hotels/search_google_hotels/index.ts +194 -0
- package/examples/google-hotels/search_google_hotels/parser.test.ts +168 -0
- package/examples/google-hotels/search_google_hotels/parser.ts +330 -0
- package/examples/google-hotels/search_google_hotels/playbook.yaml +125 -0
- package/examples/google-hotels/search_google_hotels/workflow.json +111 -0
- package/examples/namecheap-domains/search_namecheap_domains/index.ts +144 -0
- package/examples/namecheap-domains/search_namecheap_domains/parser.ts +380 -0
- package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +50 -0
- package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +136 -0
- package/examples/namecheap-domains/search_namecheap_domains/workflow.json +97 -0
- package/examples/southwest/README.md +81 -0
- package/examples/southwest/search_southwest_flights/backends.json +23 -0
- package/examples/southwest/search_southwest_flights/cron.json +19 -0
- package/examples/southwest/search_southwest_flights/index.ts +110 -0
- package/examples/southwest/search_southwest_flights/playbook.yaml +46 -0
- package/examples/southwest/search_southwest_flights/workflow.json +54 -0
- package/package.json +78 -0
- package/prompts/compile-agent.md +580 -0
- package/prompts/intent-detection.md +198 -0
- package/prompts/playbook-compilation.md +279 -0
- package/prompts/request-triage.md +74 -0
- package/prompts/tool-candidate-detection.md +104 -0
- package/src/cli.ts +1287 -0
- package/src/imprint/agent.ts +468 -0
- package/src/imprint/app-api-hosts.ts +53 -0
- package/src/imprint/backend-ladder.ts +568 -0
- package/src/imprint/check.ts +136 -0
- package/src/imprint/chromium.ts +211 -0
- package/src/imprint/claude-cli-compile.ts +640 -0
- package/src/imprint/cli-credential.ts +394 -0
- package/src/imprint/codex-cli-compile.ts +712 -0
- package/src/imprint/compile-agent-types.ts +40 -0
- package/src/imprint/compile-agent.ts +404 -0
- package/src/imprint/compile-tools.ts +1389 -0
- package/src/imprint/compile.ts +720 -0
- package/src/imprint/cookie-jar.ts +246 -0
- package/src/imprint/credential-bundle.ts +195 -0
- package/src/imprint/credential-extract.ts +290 -0
- package/src/imprint/credential-store.ts +707 -0
- package/src/imprint/cron.ts +312 -0
- package/src/imprint/doctor.ts +223 -0
- package/src/imprint/emit.ts +154 -0
- package/src/imprint/etld.ts +134 -0
- package/src/imprint/freeform-redact.ts +216 -0
- package/src/imprint/inject-listener.ts +137 -0
- package/src/imprint/install.ts +795 -0
- package/src/imprint/integrations.ts +385 -0
- package/src/imprint/is-compiled.ts +2 -0
- package/src/imprint/json-path.ts +100 -0
- package/src/imprint/llm.ts +998 -0
- package/src/imprint/load-json.ts +54 -0
- package/src/imprint/log.ts +33 -0
- package/src/imprint/login.ts +166 -0
- package/src/imprint/mcp-compile-server.ts +282 -0
- package/src/imprint/mcp-maintenance.ts +1790 -0
- package/src/imprint/mcp-server.ts +350 -0
- package/src/imprint/multi-progress.ts +69 -0
- package/src/imprint/notify.ts +155 -0
- package/src/imprint/paths.ts +64 -0
- package/src/imprint/playbook-parser.ts +21 -0
- package/src/imprint/playbook-runner.ts +465 -0
- package/src/imprint/probe-backends.ts +251 -0
- package/src/imprint/progress.ts +28 -0
- package/src/imprint/record.ts +470 -0
- package/src/imprint/redact.ts +550 -0
- package/src/imprint/replay-capture.ts +387 -0
- package/src/imprint/request-context.ts +66 -0
- package/src/imprint/runtime-link.ts +73 -0
- package/src/imprint/runtime.ts +942 -0
- package/src/imprint/sensitive-keys.ts +156 -0
- package/src/imprint/session-diff.ts +409 -0
- package/src/imprint/session-merge.ts +198 -0
- package/src/imprint/session-writer.ts +149 -0
- package/src/imprint/sites.ts +27 -0
- package/src/imprint/stealth-fetch.ts +434 -0
- package/src/imprint/teach-state.ts +235 -0
- package/src/imprint/teach.ts +2120 -0
- package/src/imprint/tool-candidates.ts +423 -0
- package/src/imprint/tool-loader.ts +186 -0
- package/src/imprint/tool-selection.ts +70 -0
- package/src/imprint/tracing.ts +508 -0
- package/src/imprint/types.ts +472 -0
- package/src/imprint/version.ts +21 -0
|
@@ -0,0 +1,720 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* One recording compiles to two artifacts: workflow.json (API-replay)
|
|
3
|
+
* and playbook.yaml (DOM-replay). Both share the same skeleton —
|
|
4
|
+
* read session, redact-if-needed, slim, call LLM, parse, validate,
|
|
5
|
+
* write next to the session — so they live in one file with the
|
|
6
|
+
* differences (slim strategy, prompt, parser, schema, output filename)
|
|
7
|
+
* factored into a CompileTask config.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import {
|
|
11
|
+
existsSync,
|
|
12
|
+
mkdirSync,
|
|
13
|
+
readFileSync,
|
|
14
|
+
readdirSync,
|
|
15
|
+
renameSync,
|
|
16
|
+
statSync,
|
|
17
|
+
writeFileSync,
|
|
18
|
+
} from 'node:fs';
|
|
19
|
+
import { dirname, join as pathJoin } from 'node:path';
|
|
20
|
+
import type { OnDeadlineReached } from './agent.ts';
|
|
21
|
+
import { inferAppApiHosts } from './app-api-hosts.ts';
|
|
22
|
+
import { type CompileAgentProgress, compileAgent } from './compile-agent.ts';
|
|
23
|
+
import { isSameRegistrableDomain, registrableDomain } from './etld.ts';
|
|
24
|
+
import { type LLMOptions, extractJsonArray, resolveProvider } from './llm.ts';
|
|
25
|
+
import { loadJsonFile } from './load-json.ts';
|
|
26
|
+
import { createLog } from './log.ts';
|
|
27
|
+
import { imprintHomeDir, localSiteDir, localToolDir } from './paths.ts';
|
|
28
|
+
import { parsePlaybook } from './playbook-parser.ts';
|
|
29
|
+
import { redactSession } from './redact.ts';
|
|
30
|
+
import { compactRequestContexts, requestContextDigest } from './request-context.ts';
|
|
31
|
+
import { ensureImprintRuntimeLink } from './runtime-link.ts';
|
|
32
|
+
import type { ClassifiedValue } from './session-diff.ts';
|
|
33
|
+
import type { SharedCompileContext, ToolCandidate } from './tool-candidates.ts';
|
|
34
|
+
import { setSpanAttributes, traced } from './tracing.ts';
|
|
35
|
+
import {
|
|
36
|
+
type Playbook,
|
|
37
|
+
type Session,
|
|
38
|
+
SessionSchema,
|
|
39
|
+
type Workflow,
|
|
40
|
+
WorkflowSchema,
|
|
41
|
+
} from './types.ts';
|
|
42
|
+
|
|
43
|
+
export type { CompileAgentProgress } from './compile-agent.ts';
|
|
44
|
+
|
|
45
|
+
const PROMPTS_DIR = pathJoin(import.meta.dir, '..', '..', 'prompts');
|
|
46
|
+
const log = createLog('compile');
|
|
47
|
+
|
|
48
|
+
interface CompileOptions {
|
|
49
|
+
/** Path to session.json or session.redacted.json */
|
|
50
|
+
sessionPath: string;
|
|
51
|
+
/** Where to write the artifact. Defaults to the generated tool directory. */
|
|
52
|
+
outPath?: string;
|
|
53
|
+
/** Override LLM config (region, model, project). */
|
|
54
|
+
llmConfig?: LLMOptions;
|
|
55
|
+
/** If true, send the FULL session to the LLM (don't shrink). Useful for
|
|
56
|
+
* debugging when shrinking might be over-aggressive. Default false. */
|
|
57
|
+
noShrink?: boolean;
|
|
58
|
+
/** Candidate-specific compile scope for multi-tool teach. */
|
|
59
|
+
candidate?: ToolCandidate;
|
|
60
|
+
/** Shared auth/helper guidance generated once for a multi-tool teach run. */
|
|
61
|
+
sharedContext?: SharedCompileContext;
|
|
62
|
+
/** Pre-computed triage result from a shared pass. When set, compilePlaybook
|
|
63
|
+
* skips its own triageRequests() LLM call and merges the shared selectedSeqs
|
|
64
|
+
* with any per-tool preserveSeqs locally. */
|
|
65
|
+
preTriagedSession?: TriageResult;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// ─── generate (workflow.json) ────────────────────────────────────────────────
|
|
69
|
+
|
|
70
|
+
interface GenerateOptions extends CompileOptions {
|
|
71
|
+
/** Hard wall-clock budget for the agent. Default 30 minutes. */
|
|
72
|
+
maxDurationMs?: number;
|
|
73
|
+
/** Progress callback with verification cycle information. */
|
|
74
|
+
onProgress?: (p: CompileAgentProgress) => void;
|
|
75
|
+
/** Called when wall-clock deadline is reached; return ms to extend or null to time out. */
|
|
76
|
+
onDeadlineReached?: OnDeadlineReached;
|
|
77
|
+
/** Retain parser.test.ts after successful verification. */
|
|
78
|
+
keepTest?: boolean;
|
|
79
|
+
/** Directory where workflow.json/parser.ts/parser.test.ts are written. */
|
|
80
|
+
outDir?: string;
|
|
81
|
+
/** Dual-pass value classifications from replay-and-diff. */
|
|
82
|
+
classifications?: ClassifiedValue[];
|
|
83
|
+
/** Credential values extracted during teach, passed to integration tests via env var. */
|
|
84
|
+
teachCredentials?: { site: string; values: Record<string, string> };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
interface GenerateResult {
|
|
88
|
+
workflow: Workflow;
|
|
89
|
+
workflowPath: string;
|
|
90
|
+
/** Number of requests the LLM saw (after shrinking). */
|
|
91
|
+
requestsSent: number;
|
|
92
|
+
/** Original count before shrinking. */
|
|
93
|
+
requestsOriginal: number;
|
|
94
|
+
inputTokens: number | null;
|
|
95
|
+
outputTokens: number | null;
|
|
96
|
+
durationMs: number;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export async function generate(opts: GenerateOptions): Promise<GenerateResult> {
|
|
100
|
+
return await traced(
|
|
101
|
+
'compile.generate',
|
|
102
|
+
'AGENT',
|
|
103
|
+
{
|
|
104
|
+
'imprint.session_path': opts.sessionPath,
|
|
105
|
+
'imprint.provider': opts.llmConfig?.provider ?? 'auto',
|
|
106
|
+
'imprint.tool_name': opts.candidate?.toolName,
|
|
107
|
+
'imprint.out_path': opts.outPath,
|
|
108
|
+
'imprint.out_dir': opts.outDir,
|
|
109
|
+
},
|
|
110
|
+
async (span) => {
|
|
111
|
+
ensureImprintRuntimeLink(imprintHomeDir());
|
|
112
|
+
const outDir = opts.outDir ?? (opts.outPath ? dirname(opts.outPath) : undefined);
|
|
113
|
+
const result = await compileAgent({
|
|
114
|
+
sessionPath: opts.sessionPath,
|
|
115
|
+
maxDurationMs: opts.maxDurationMs,
|
|
116
|
+
llmConfig: opts.llmConfig,
|
|
117
|
+
onProgress: opts.onProgress,
|
|
118
|
+
onDeadlineReached: opts.onDeadlineReached,
|
|
119
|
+
keepTest: opts.keepTest,
|
|
120
|
+
outDir,
|
|
121
|
+
candidate: opts.candidate,
|
|
122
|
+
sharedContext: opts.sharedContext,
|
|
123
|
+
classifications: opts.classifications,
|
|
124
|
+
teachCredentials: opts.teachCredentials,
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
setSpanAttributes(span, {
|
|
128
|
+
'imprint.compile.outcome': result.outcome,
|
|
129
|
+
'imprint.compile.turns': result.turns,
|
|
130
|
+
'imprint.compile.duration_ms': result.durationMs,
|
|
131
|
+
'imprint.compile.input_tokens': result.inputTokens,
|
|
132
|
+
'imprint.compile.output_tokens': result.outputTokens,
|
|
133
|
+
'imprint.compile.cache_read_input_tokens': result.cacheReadInputTokens,
|
|
134
|
+
'imprint.compile.cache_creation_input_tokens': result.cacheCreationInputTokens,
|
|
135
|
+
'imprint.compile.conversation_log': result.conversationLogPath,
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
if (!result.success) {
|
|
139
|
+
const lines = [
|
|
140
|
+
'compile agent did not produce a verified workflow.',
|
|
141
|
+
`outcome: ${result.outcome}`,
|
|
142
|
+
`message: ${result.message}`,
|
|
143
|
+
`turns: ${result.turns}, duration: ${(result.durationMs / 1000).toFixed(1)}s`,
|
|
144
|
+
`conversation log: ${result.conversationLogPath}`,
|
|
145
|
+
];
|
|
146
|
+
if (result.outcome === 'timeout') {
|
|
147
|
+
lines.push(
|
|
148
|
+
'hint: increase the timeout with --timeout (teach) or --max-duration (generate)',
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
throw new Error(lines.join('\n'));
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Load the agent-written workflow.json from disk and validate.
|
|
155
|
+
if (!result.workflowPath) {
|
|
156
|
+
throw new Error('compile agent reported success but no workflowPath');
|
|
157
|
+
}
|
|
158
|
+
const workflow = loadJsonFile(
|
|
159
|
+
result.workflowPath,
|
|
160
|
+
WorkflowSchema,
|
|
161
|
+
{
|
|
162
|
+
notFound: 'compile agent reported success but workflow.json missing',
|
|
163
|
+
badSchema: 'compile agent wrote an invalid workflow.json',
|
|
164
|
+
},
|
|
165
|
+
'workflow',
|
|
166
|
+
);
|
|
167
|
+
let workflowPath = opts.outPath ?? result.workflowPath;
|
|
168
|
+
if (!opts.outDir && !opts.outPath) {
|
|
169
|
+
workflowPath = relocateGeneratedWorkflow(result.workflowPath, workflow);
|
|
170
|
+
}
|
|
171
|
+
if (opts.outPath && opts.outPath !== result.workflowPath) {
|
|
172
|
+
writeFileSync(opts.outPath, `${JSON.stringify(workflow, null, 2)}\n`, 'utf8');
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
setSpanAttributes(span, {
|
|
176
|
+
'imprint.workflow_path': workflowPath,
|
|
177
|
+
'imprint.workflow_tool_name': workflow.toolName,
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
return {
|
|
181
|
+
workflow,
|
|
182
|
+
workflowPath,
|
|
183
|
+
requestsSent: 0, // legacy field — no longer meaningful for agentic compile
|
|
184
|
+
requestsOriginal: 0, // legacy field
|
|
185
|
+
inputTokens: result.inputTokens,
|
|
186
|
+
outputTokens: result.outputTokens,
|
|
187
|
+
durationMs: result.durationMs,
|
|
188
|
+
};
|
|
189
|
+
},
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function relocateGeneratedWorkflow(workflowPath: string, workflow: Workflow): string {
|
|
194
|
+
const sourceDir = dirname(workflowPath);
|
|
195
|
+
const finalDir = localToolDir(workflow.site, workflow.toolName);
|
|
196
|
+
if (sourceDir === finalDir) return workflowPath;
|
|
197
|
+
mkdirSync(finalDir, { recursive: true });
|
|
198
|
+
for (const artifact of [
|
|
199
|
+
'workflow.json',
|
|
200
|
+
'parser.ts',
|
|
201
|
+
'parser.test.ts',
|
|
202
|
+
'.compile-log.json',
|
|
203
|
+
'.compile-done.json',
|
|
204
|
+
'.compile-give-up.json',
|
|
205
|
+
]) {
|
|
206
|
+
const source = pathJoin(sourceDir, artifact);
|
|
207
|
+
if (!existsSync(source)) continue;
|
|
208
|
+
renameSync(source, pathJoin(finalDir, artifact));
|
|
209
|
+
}
|
|
210
|
+
return pathJoin(finalDir, 'workflow.json');
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Drop request noise before sending to the LLM. Modern SPAs load 500-1000
|
|
215
|
+
* requests per page, 80% of which are JS bundles, ad pixels, third-party
|
|
216
|
+
* trackers, and font/image assets. Without aggressive shrinking the
|
|
217
|
+
* redacted session easily blows past 10M tokens.
|
|
218
|
+
*
|
|
219
|
+
* Two rules:
|
|
220
|
+
* 1. Same-origin only. Anything not under the start URL's root domain
|
|
221
|
+
* is presumed third-party noise. Workflows that legitimately call
|
|
222
|
+
* out to a different domain (e.g., a login redirect to an SSO
|
|
223
|
+
* provider) should pass `--no-shrink`.
|
|
224
|
+
* 2. Drop NOISE_RESOURCE_TYPES. Scripts and assets balloon the prompt
|
|
225
|
+
* without informing codegen — what matters is the API surface
|
|
226
|
+
* (XHR/Fetch/Document), not the JS that drove it.
|
|
227
|
+
*
|
|
228
|
+
* Net effect on Southwest: 813 → 34 requests, 6.5M → 0.3M tokens.
|
|
229
|
+
*/
|
|
230
|
+
export function shrinkSession(session: Session): Session {
|
|
231
|
+
const startUrl = safeUrl(session.url);
|
|
232
|
+
const startRoot = startUrl ? registrableDomain(startUrl.hostname) : null;
|
|
233
|
+
const appApiHosts = inferAppApiHosts(session, startRoot);
|
|
234
|
+
|
|
235
|
+
const NOISE_RESOURCE_TYPES = new Set([
|
|
236
|
+
'Image',
|
|
237
|
+
'Font',
|
|
238
|
+
'Stylesheet',
|
|
239
|
+
'Media',
|
|
240
|
+
'Manifest',
|
|
241
|
+
'Other',
|
|
242
|
+
'Script', // JS bundles — huge and never load-bearing for codegen
|
|
243
|
+
'Ping', // beacons — by definition fire-and-forget telemetry
|
|
244
|
+
'Preflight', // CORS preflights — runtime replays them automatically
|
|
245
|
+
]);
|
|
246
|
+
|
|
247
|
+
const shrunkRequests = session.requests.filter((r) => {
|
|
248
|
+
const url = safeUrl(r.url);
|
|
249
|
+
if (!url) return false;
|
|
250
|
+
if (NOISE_RESOURCE_TYPES.has(r.resourceType)) return false;
|
|
251
|
+
if (
|
|
252
|
+
startRoot &&
|
|
253
|
+
!isSameRegistrableDomain(url.hostname, startRoot) &&
|
|
254
|
+
!appApiHosts.has(url.hostname)
|
|
255
|
+
)
|
|
256
|
+
return false;
|
|
257
|
+
return true;
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
return { ...session, requests: shrunkRequests };
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function safeUrl(s: string): URL | null {
|
|
264
|
+
try {
|
|
265
|
+
return new URL(s);
|
|
266
|
+
} catch {
|
|
267
|
+
return null;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// ─── triageRequests (LLM-based request filtering) ───────────────────────────
|
|
272
|
+
|
|
273
|
+
const TRIAGE_RESOURCE_TYPES = new Set(['XHR', 'Fetch', 'Document']);
|
|
274
|
+
const HEADER_TRUNCATE_LIMIT = 200;
|
|
275
|
+
// Per-request body cap for triage. Triage only needs enough body to distinguish
|
|
276
|
+
// data-bearing POSTs (search/booking) from telemetry; full bodies on a busy
|
|
277
|
+
// site can total >1MB and blow the 200K-token cap on `claude-opus-4-7`.
|
|
278
|
+
const TRIAGE_BODY_LIMIT = 500;
|
|
279
|
+
|
|
280
|
+
export interface TriageResult {
|
|
281
|
+
session: Session;
|
|
282
|
+
selectedSeqs: number[];
|
|
283
|
+
consideredCount: number;
|
|
284
|
+
inputTokens: number | null;
|
|
285
|
+
outputTokens: number | null;
|
|
286
|
+
durationMs: number;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
interface TriageRequestContext {
|
|
290
|
+
seq: number;
|
|
291
|
+
timestamp: number;
|
|
292
|
+
method: string;
|
|
293
|
+
url: string;
|
|
294
|
+
resourceType: string;
|
|
295
|
+
status?: number;
|
|
296
|
+
mimeType?: string;
|
|
297
|
+
headers: string;
|
|
298
|
+
body?: string;
|
|
299
|
+
bodyDigest?: string;
|
|
300
|
+
bodyLength?: number;
|
|
301
|
+
responseBodyDigest?: string;
|
|
302
|
+
responseBodyLength?: number;
|
|
303
|
+
repeatCount?: number;
|
|
304
|
+
repeatedSeqs?: number[];
|
|
305
|
+
lastTimestamp?: number;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
export async function triageRequests(
|
|
309
|
+
session: Session,
|
|
310
|
+
llmConfig?: LLMOptions,
|
|
311
|
+
context: Pick<CompileOptions, 'candidate' | 'sharedContext'> = {},
|
|
312
|
+
): Promise<TriageResult> {
|
|
313
|
+
const preserveSeqs = new Set([
|
|
314
|
+
...(context.candidate?.requestSeqs ?? []),
|
|
315
|
+
...(context.candidate?.dependencySeqs ?? []),
|
|
316
|
+
...(context.sharedContext?.loginRequestSeqs ?? []),
|
|
317
|
+
]);
|
|
318
|
+
const candidates = session.requests.filter(
|
|
319
|
+
(r) => TRIAGE_RESOURCE_TYPES.has(r.resourceType) || preserveSeqs.has(r.seq),
|
|
320
|
+
);
|
|
321
|
+
|
|
322
|
+
return await traced(
|
|
323
|
+
'compile.triage_requests',
|
|
324
|
+
'RETRIEVER',
|
|
325
|
+
{
|
|
326
|
+
'imprint.site': session.site,
|
|
327
|
+
'imprint.requests_total': session.requests.length,
|
|
328
|
+
'imprint.requests_considered': candidates.length,
|
|
329
|
+
'imprint.provider': llmConfig?.provider ?? 'auto',
|
|
330
|
+
},
|
|
331
|
+
async (span) => {
|
|
332
|
+
const compacted = compactRequestContexts(
|
|
333
|
+
candidates.map((r) => ({
|
|
334
|
+
seq: r.seq,
|
|
335
|
+
timestamp: r.timestamp,
|
|
336
|
+
method: r.method,
|
|
337
|
+
url: r.url,
|
|
338
|
+
resourceType: r.resourceType,
|
|
339
|
+
status: r.response?.status,
|
|
340
|
+
mimeType: r.response?.mimeType,
|
|
341
|
+
headers: truncateHeaders(r.headers),
|
|
342
|
+
body: truncate(r.body, TRIAGE_BODY_LIMIT),
|
|
343
|
+
bodyDigest: requestContextDigest(r.body),
|
|
344
|
+
bodyLength: r.body?.length,
|
|
345
|
+
responseBodyDigest: requestContextDigest(r.response?.body),
|
|
346
|
+
responseBodyLength: r.response?.body?.length,
|
|
347
|
+
})),
|
|
348
|
+
triageRequestGroupKey,
|
|
349
|
+
{ preserveSeqs },
|
|
350
|
+
);
|
|
351
|
+
// Strip digest/length fields the LLM doesn't use — they served compaction only
|
|
352
|
+
const metadata = compacted.map(
|
|
353
|
+
({ bodyDigest, responseBodyDigest, bodyLength, responseBodyLength, ...rest }) => rest,
|
|
354
|
+
);
|
|
355
|
+
|
|
356
|
+
const triagePayload = {
|
|
357
|
+
site: session.site,
|
|
358
|
+
url: session.url,
|
|
359
|
+
narration: session.narration,
|
|
360
|
+
requests: metadata,
|
|
361
|
+
};
|
|
362
|
+
|
|
363
|
+
const promptPath = pathJoin(PROMPTS_DIR, 'request-triage.md');
|
|
364
|
+
if (!existsSync(promptPath)) {
|
|
365
|
+
throw new Error(
|
|
366
|
+
`Triage prompt not found at ${promptPath}\n→ this is an Imprint installation problem.`,
|
|
367
|
+
);
|
|
368
|
+
}
|
|
369
|
+
const systemPrompt = readFileSync(promptPath, 'utf8');
|
|
370
|
+
|
|
371
|
+
log(
|
|
372
|
+
`triaging ${metadata.length} compacted requests (from ${candidates.length} candidates / ${session.requests.length} total)…`,
|
|
373
|
+
);
|
|
374
|
+
const llm = resolveProvider(llmConfig ?? {});
|
|
375
|
+
const result = await llm.analyze(systemPrompt, triagePayload);
|
|
376
|
+
|
|
377
|
+
const arrayText = extractJsonArray(result.text);
|
|
378
|
+
if (!arrayText) {
|
|
379
|
+
throw new Error(
|
|
380
|
+
`Triage LLM did not return a JSON array.\nRaw response:\n${result.text.slice(0, 1000)}`,
|
|
381
|
+
);
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
let seqs: unknown;
|
|
385
|
+
try {
|
|
386
|
+
seqs = JSON.parse(arrayText);
|
|
387
|
+
} catch (err) {
|
|
388
|
+
throw new Error(
|
|
389
|
+
`Triage response was not valid JSON: ${err instanceof Error ? err.message : String(err)}\nExtracted:\n${arrayText.slice(0, 500)}`,
|
|
390
|
+
);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
if (!Array.isArray(seqs) || !seqs.every((s) => typeof s === 'number')) {
|
|
394
|
+
throw new Error(
|
|
395
|
+
`Triage response is not an array of numbers.\nParsed: ${JSON.stringify(seqs).slice(0, 500)}`,
|
|
396
|
+
);
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
const selectedSet = new Set([...(seqs as number[]), ...preserveSeqs]);
|
|
400
|
+
const triaged: Session = {
|
|
401
|
+
...session,
|
|
402
|
+
requests: session.requests.filter((r) => selectedSet.has(r.seq)),
|
|
403
|
+
};
|
|
404
|
+
|
|
405
|
+
log(`triage selected ${selectedSet.size} requests out of ${candidates.length} candidates`);
|
|
406
|
+
|
|
407
|
+
setSpanAttributes(span, {
|
|
408
|
+
'imprint.requests_compacted': metadata.length,
|
|
409
|
+
'imprint.requests_selected': selectedSet.size,
|
|
410
|
+
'imprint.triage.duration_ms': result.durationMs,
|
|
411
|
+
'imprint.triage.input_tokens': result.inputTokens,
|
|
412
|
+
'imprint.triage.output_tokens': result.outputTokens,
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
return {
|
|
416
|
+
session: triaged,
|
|
417
|
+
selectedSeqs: [...selectedSet],
|
|
418
|
+
consideredCount: candidates.length,
|
|
419
|
+
inputTokens: result.inputTokens,
|
|
420
|
+
outputTokens: result.outputTokens,
|
|
421
|
+
durationMs: result.durationMs,
|
|
422
|
+
};
|
|
423
|
+
},
|
|
424
|
+
);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
function triageRequestGroupKey(request: TriageRequestContext): unknown[] {
|
|
428
|
+
let urlKey: string = request.url;
|
|
429
|
+
let paramSignature = '';
|
|
430
|
+
try {
|
|
431
|
+
const parsed = new URL(request.url);
|
|
432
|
+
urlKey = `${parsed.hostname}${parsed.pathname}`;
|
|
433
|
+
// Include sorted query parameter names so requests with different
|
|
434
|
+
// parameter signatures are grouped separately (e.g., a config fetch
|
|
435
|
+
// vs a lookup endpoint that shares the same pathname but adds a
|
|
436
|
+
// filter/query param). Cap at 10 params — URLs with more are
|
|
437
|
+
// typically analytics/telemetry where slight param-set variation
|
|
438
|
+
// should not prevent compaction.
|
|
439
|
+
const paramNames = [...new Set(parsed.searchParams.keys())].sort();
|
|
440
|
+
if (paramNames.length > 0 && paramNames.length <= 10) {
|
|
441
|
+
paramSignature = paramNames.join(',');
|
|
442
|
+
}
|
|
443
|
+
} catch {
|
|
444
|
+
// keep full url as fallback
|
|
445
|
+
}
|
|
446
|
+
return [
|
|
447
|
+
request.method,
|
|
448
|
+
urlKey,
|
|
449
|
+
paramSignature,
|
|
450
|
+
request.resourceType,
|
|
451
|
+
request.status,
|
|
452
|
+
request.mimeType,
|
|
453
|
+
request.bodyDigest,
|
|
454
|
+
];
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
function truncateHeaders(headers: Record<string, string>): string {
|
|
458
|
+
const serialized = JSON.stringify(headers);
|
|
459
|
+
if (serialized.length <= HEADER_TRUNCATE_LIMIT) return serialized;
|
|
460
|
+
return `${serialized.slice(0, HEADER_TRUNCATE_LIMIT)}…`;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// ─── compilePlaybook (playbook.yaml) ─────────────────────────────────────────
|
|
464
|
+
|
|
465
|
+
interface CompilePlaybookResult {
|
|
466
|
+
playbook: Playbook;
|
|
467
|
+
playbookPath: string;
|
|
468
|
+
inputTokens: number | null;
|
|
469
|
+
outputTokens: number | null;
|
|
470
|
+
durationMs: number;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
const RESPONSE_BODY_LIMIT = 4000;
|
|
474
|
+
|
|
475
|
+
export function defaultCompilePlaybookPath(site: string, toolName: string): string {
|
|
476
|
+
return pathJoin(localToolDir(site, toolName), 'playbook.yaml');
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
export function resolveDefaultCompilePlaybookPath(site: string, playbookToolName: string): string {
|
|
480
|
+
const toolNames = existingWorkflowToolNames(site);
|
|
481
|
+
if (toolNames.length === 0 || toolNames.includes(playbookToolName)) {
|
|
482
|
+
return defaultCompilePlaybookPath(site, playbookToolName);
|
|
483
|
+
}
|
|
484
|
+
if (toolNames.length === 1) {
|
|
485
|
+
const toolName = toolNames[0] ?? playbookToolName;
|
|
486
|
+
throw new Error(
|
|
487
|
+
[
|
|
488
|
+
`compiled playbook toolName "${playbookToolName}" does not match the generated workflow "${toolName}" for site "${site}".`,
|
|
489
|
+
`→ rerun compile-playbook with --out ${defaultCompilePlaybookPath(site, toolName)}`,
|
|
490
|
+
].join('\n'),
|
|
491
|
+
);
|
|
492
|
+
}
|
|
493
|
+
throw new Error(
|
|
494
|
+
[
|
|
495
|
+
`compiled playbook toolName "${playbookToolName}" does not match any generated workflow for site "${site}".`,
|
|
496
|
+
`Generated workflows: ${toolNames.join(', ')}`,
|
|
497
|
+
`→ rerun compile-playbook with --out ~/.imprint/${site}/<toolName>/playbook.yaml`,
|
|
498
|
+
].join('\n'),
|
|
499
|
+
);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
function existingWorkflowToolNames(site: string): string[] {
|
|
503
|
+
const siteDir = localSiteDir(site);
|
|
504
|
+
if (!existsSync(siteDir)) return [];
|
|
505
|
+
const out: string[] = [];
|
|
506
|
+
for (const entry of readdirSync(siteDir)) {
|
|
507
|
+
const dir = pathJoin(siteDir, entry);
|
|
508
|
+
try {
|
|
509
|
+
if (!statSync(dir).isDirectory()) continue;
|
|
510
|
+
} catch {
|
|
511
|
+
continue;
|
|
512
|
+
}
|
|
513
|
+
if (existsSync(pathJoin(dir, 'workflow.json'))) out.push(entry);
|
|
514
|
+
}
|
|
515
|
+
return out.sort();
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
export async function compilePlaybook(opts: CompileOptions): Promise<CompilePlaybookResult> {
|
|
519
|
+
return await traced(
|
|
520
|
+
'compile.playbook',
|
|
521
|
+
'CHAIN',
|
|
522
|
+
{
|
|
523
|
+
'imprint.session_path': opts.sessionPath,
|
|
524
|
+
'imprint.provider': opts.llmConfig?.provider ?? 'auto',
|
|
525
|
+
'imprint.tool_name': opts.candidate?.toolName,
|
|
526
|
+
'imprint.out_path': opts.outPath,
|
|
527
|
+
'imprint.no_shrink': opts.noShrink ?? false,
|
|
528
|
+
},
|
|
529
|
+
async (span) => {
|
|
530
|
+
const result = await compilePlaybookImpl(opts);
|
|
531
|
+
setSpanAttributes(span, {
|
|
532
|
+
'imprint.playbook_path': result.playbookPath,
|
|
533
|
+
'imprint.playbook_tool_name': result.playbook.toolName,
|
|
534
|
+
'imprint.playbook.duration_ms': result.durationMs,
|
|
535
|
+
'imprint.playbook.input_tokens': result.inputTokens,
|
|
536
|
+
'imprint.playbook.output_tokens': result.outputTokens,
|
|
537
|
+
});
|
|
538
|
+
return result;
|
|
539
|
+
},
|
|
540
|
+
);
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
async function compilePlaybookImpl(opts: CompileOptions): Promise<CompilePlaybookResult> {
|
|
544
|
+
// 1. Load session.
|
|
545
|
+
let session: Session = loadJsonFile(
|
|
546
|
+
opts.sessionPath,
|
|
547
|
+
SessionSchema,
|
|
548
|
+
{
|
|
549
|
+
notFound: '→ run `imprint record <site>` to create one.',
|
|
550
|
+
notJson: `→ if it's a partial .jsonl, run \`imprint assemble ${opts.sessionPath}\` first.`,
|
|
551
|
+
badSchema: '→ check the file came from `imprint record`.',
|
|
552
|
+
},
|
|
553
|
+
'session',
|
|
554
|
+
);
|
|
555
|
+
|
|
556
|
+
// 2. Auto-redact if needed.
|
|
557
|
+
const looksRedacted = JSON.stringify(session).includes('[REDACTED:');
|
|
558
|
+
if (!looksRedacted) {
|
|
559
|
+
const r = redactSession(session);
|
|
560
|
+
session = r.session;
|
|
561
|
+
if (r.stats.totalRedactions > 0) {
|
|
562
|
+
const freeformNote =
|
|
563
|
+
r.stats.freeformRedactions > 0
|
|
564
|
+
? ` (${r.stats.freeformRedactions} free-form finding(s))`
|
|
565
|
+
: '';
|
|
566
|
+
log(`redacted ${r.stats.totalRedactions} value(s)${freeformNote} before sending to LLM`);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
// 3. Triage: LLM selects which requests matter.
|
|
571
|
+
let triageTokens: { input: number | null; output: number | null; durationMs: number } = {
|
|
572
|
+
input: null,
|
|
573
|
+
output: null,
|
|
574
|
+
durationMs: 0,
|
|
575
|
+
};
|
|
576
|
+
if (opts.preTriagedSession && !opts.noShrink) {
|
|
577
|
+
// Shared triage path: merge pre-computed seqs with candidate-specific preserveSeqs
|
|
578
|
+
const preserveSeqs = new Set([
|
|
579
|
+
...(opts.candidate?.requestSeqs ?? []),
|
|
580
|
+
...(opts.candidate?.dependencySeqs ?? []),
|
|
581
|
+
...(opts.sharedContext?.loginRequestSeqs ?? []),
|
|
582
|
+
]);
|
|
583
|
+
const finalSeqs = new Set([...opts.preTriagedSession.selectedSeqs, ...preserveSeqs]);
|
|
584
|
+
session = {
|
|
585
|
+
...session,
|
|
586
|
+
requests: session.requests.filter((r) => finalSeqs.has(r.seq)),
|
|
587
|
+
};
|
|
588
|
+
log('using shared triage result (skipping per-tool triage LLM call)');
|
|
589
|
+
triageTokens = {
|
|
590
|
+
input: opts.preTriagedSession.inputTokens,
|
|
591
|
+
output: opts.preTriagedSession.outputTokens,
|
|
592
|
+
durationMs: opts.preTriagedSession.durationMs,
|
|
593
|
+
};
|
|
594
|
+
} else if (!opts.noShrink) {
|
|
595
|
+
const triage = await triageRequests(session, opts.llmConfig, {
|
|
596
|
+
candidate: opts.candidate,
|
|
597
|
+
sharedContext: opts.sharedContext,
|
|
598
|
+
});
|
|
599
|
+
session = triage.session;
|
|
600
|
+
triageTokens = {
|
|
601
|
+
input: triage.inputTokens,
|
|
602
|
+
output: triage.outputTokens,
|
|
603
|
+
durationMs: triage.durationMs,
|
|
604
|
+
};
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
// 4. Build slim payload from triaged requests (with response bodies).
|
|
608
|
+
const xhrs = session.requests
|
|
609
|
+
.filter(
|
|
610
|
+
(r) =>
|
|
611
|
+
r.resourceType === 'XHR' || r.resourceType === 'Fetch' || r.resourceType === 'Document',
|
|
612
|
+
)
|
|
613
|
+
.map((r) => ({
|
|
614
|
+
seq: r.seq,
|
|
615
|
+
timestamp: r.timestamp,
|
|
616
|
+
method: r.method,
|
|
617
|
+
url: r.url,
|
|
618
|
+
resourceType: r.resourceType,
|
|
619
|
+
status: r.response?.status,
|
|
620
|
+
response_body: truncate(r.response?.body, RESPONSE_BODY_LIMIT),
|
|
621
|
+
}));
|
|
622
|
+
|
|
623
|
+
log(
|
|
624
|
+
`compiling playbook from ${session.events.length} events / ${xhrs.length} XHRs / ${session.narration.length} narration lines…`,
|
|
625
|
+
);
|
|
626
|
+
|
|
627
|
+
const slimmed = {
|
|
628
|
+
site: session.site,
|
|
629
|
+
url: session.url,
|
|
630
|
+
candidate: opts.candidate,
|
|
631
|
+
sharedContext: opts.sharedContext,
|
|
632
|
+
narration: session.narration,
|
|
633
|
+
events: session.events,
|
|
634
|
+
requests: xhrs,
|
|
635
|
+
};
|
|
636
|
+
|
|
637
|
+
// 5. Main compilation LLM call.
|
|
638
|
+
const promptPath = pathJoin(PROMPTS_DIR, 'playbook-compilation.md');
|
|
639
|
+
if (!existsSync(promptPath)) {
|
|
640
|
+
throw new Error(
|
|
641
|
+
`Prompt not found at ${promptPath}\n→ this is an Imprint installation problem.`,
|
|
642
|
+
);
|
|
643
|
+
}
|
|
644
|
+
const systemPrompt = `${readFileSync(promptPath, 'utf8')}${
|
|
645
|
+
opts.candidate
|
|
646
|
+
? `\n\nCandidate scope:\nCompile only this candidate: ${JSON.stringify(opts.candidate, null, 2)}\nShared context: ${JSON.stringify(opts.sharedContext ?? {}, null, 2)}\nThe playbook toolName and parameters must match the selected candidate/workflow, not any other action in the recording.\n`
|
|
647
|
+
: ''
|
|
648
|
+
}`;
|
|
649
|
+
|
|
650
|
+
const llm = resolveProvider(opts.llmConfig ?? {});
|
|
651
|
+
|
|
652
|
+
let playbook: Playbook | undefined;
|
|
653
|
+
let lastResult = await llm.analyze(systemPrompt, slimmed);
|
|
654
|
+
let llmInputTokens = lastResult.inputTokens;
|
|
655
|
+
let llmOutputTokens = lastResult.outputTokens;
|
|
656
|
+
let llmDurationMs = lastResult.durationMs;
|
|
657
|
+
let lastErr: unknown;
|
|
658
|
+
for (let attempt = 0; attempt < 2; attempt++) {
|
|
659
|
+
try {
|
|
660
|
+
playbook = parsePlaybook(stripCodeFences(lastResult.text).trim());
|
|
661
|
+
lastErr = undefined;
|
|
662
|
+
break;
|
|
663
|
+
} catch (err) {
|
|
664
|
+
lastErr = err;
|
|
665
|
+
if (attempt === 0) {
|
|
666
|
+
log('playbook YAML failed to parse, retrying with error feedback…');
|
|
667
|
+
const fixPrompt = `Your previous output was invalid YAML. The parser error was:\n\n${err instanceof Error ? err.message : String(err)}\n\nFix the YAML and return the corrected playbook. Output ONLY valid YAML, no prose.`;
|
|
668
|
+
lastResult = await llm.analyze(systemPrompt, `${JSON.stringify(slimmed)}\n\n${fixPrompt}`);
|
|
669
|
+
llmInputTokens = addNullable(llmInputTokens, lastResult.inputTokens);
|
|
670
|
+
llmOutputTokens = addNullable(llmOutputTokens, lastResult.outputTokens);
|
|
671
|
+
llmDurationMs += lastResult.durationMs;
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
if (lastErr) {
|
|
676
|
+
throw new Error(
|
|
677
|
+
`Compiled playbook failed to parse: ${lastErr instanceof Error ? lastErr.message : String(lastErr)}\nRaw output:\n${lastResult.text.slice(0, 1500)}`,
|
|
678
|
+
);
|
|
679
|
+
}
|
|
680
|
+
if (!playbook) {
|
|
681
|
+
throw new Error('Playbook was not assigned after compile loop — this should not happen.');
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
if (opts.candidate && playbook.toolName !== opts.candidate.toolName) {
|
|
685
|
+
throw new Error(
|
|
686
|
+
`Compiled playbook toolName "${playbook.toolName}" does not match selected candidate "${opts.candidate.toolName}".`,
|
|
687
|
+
);
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
const outPath =
|
|
691
|
+
opts.outPath ?? resolveDefaultCompilePlaybookPath(session.site, playbook.toolName);
|
|
692
|
+
mkdirSync(dirname(outPath), { recursive: true });
|
|
693
|
+
writeFileSync(outPath, `${stripCodeFences(lastResult.text).trim()}\n`);
|
|
694
|
+
|
|
695
|
+
return {
|
|
696
|
+
playbook,
|
|
697
|
+
playbookPath: outPath,
|
|
698
|
+
inputTokens: addNullable(triageTokens.input, llmInputTokens),
|
|
699
|
+
outputTokens: addNullable(triageTokens.output, llmOutputTokens),
|
|
700
|
+
durationMs: triageTokens.durationMs + llmDurationMs,
|
|
701
|
+
};
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
function addNullable(a: number | null, b: number | null): number | null {
|
|
705
|
+
if (a === null && b === null) return null;
|
|
706
|
+
return (a ?? 0) + (b ?? 0);
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
function truncate(s: string | undefined, limit: number): string | undefined {
|
|
710
|
+
if (!s) return undefined;
|
|
711
|
+
if (s.length <= limit) return s;
|
|
712
|
+
return `${s.slice(0, limit)}…(truncated, original length ${s.length})`;
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
function stripCodeFences(s: string): string {
|
|
716
|
+
const trimmed = s.trim();
|
|
717
|
+
const fenced = trimmed.match(/^```(?:\w+)?\n([\s\S]*?)\n```$/);
|
|
718
|
+
if (fenced?.[1]) return fenced[1];
|
|
719
|
+
return trimmed;
|
|
720
|
+
}
|