imprint-mcp 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +168 -0
- package/LICENSE +21 -0
- package/README.md +322 -0
- package/examples/discoverandgo/README.md +57 -0
- package/examples/discoverandgo/book_discoverandgo_museum_pass/cron.json +8 -0
- package/examples/discoverandgo/book_discoverandgo_museum_pass/index.ts +89 -0
- package/examples/discoverandgo/book_discoverandgo_museum_pass/workflow.json +39 -0
- package/examples/echo/README.md +37 -0
- package/examples/echo/echo_test/index.ts +31 -0
- package/examples/google-flights/search_google_flights/index.ts +101 -0
- package/examples/google-flights/search_google_flights/parser.test.ts +140 -0
- package/examples/google-flights/search_google_flights/parser.ts +189 -0
- package/examples/google-flights/search_google_flights/playbook.yaml +130 -0
- package/examples/google-flights/search_google_flights/workflow.json +48 -0
- package/examples/google-hotels/search_google_hotels/index.ts +194 -0
- package/examples/google-hotels/search_google_hotels/parser.test.ts +168 -0
- package/examples/google-hotels/search_google_hotels/parser.ts +330 -0
- package/examples/google-hotels/search_google_hotels/playbook.yaml +125 -0
- package/examples/google-hotels/search_google_hotels/workflow.json +111 -0
- package/examples/namecheap-domains/search_namecheap_domains/index.ts +144 -0
- package/examples/namecheap-domains/search_namecheap_domains/parser.ts +380 -0
- package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +50 -0
- package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +136 -0
- package/examples/namecheap-domains/search_namecheap_domains/workflow.json +97 -0
- package/examples/southwest/README.md +81 -0
- package/examples/southwest/search_southwest_flights/backends.json +23 -0
- package/examples/southwest/search_southwest_flights/cron.json +19 -0
- package/examples/southwest/search_southwest_flights/index.ts +110 -0
- package/examples/southwest/search_southwest_flights/playbook.yaml +46 -0
- package/examples/southwest/search_southwest_flights/workflow.json +54 -0
- package/package.json +78 -0
- package/prompts/compile-agent.md +580 -0
- package/prompts/intent-detection.md +198 -0
- package/prompts/playbook-compilation.md +279 -0
- package/prompts/request-triage.md +74 -0
- package/prompts/tool-candidate-detection.md +104 -0
- package/src/cli.ts +1287 -0
- package/src/imprint/agent.ts +468 -0
- package/src/imprint/app-api-hosts.ts +53 -0
- package/src/imprint/backend-ladder.ts +568 -0
- package/src/imprint/check.ts +136 -0
- package/src/imprint/chromium.ts +211 -0
- package/src/imprint/claude-cli-compile.ts +640 -0
- package/src/imprint/cli-credential.ts +394 -0
- package/src/imprint/codex-cli-compile.ts +712 -0
- package/src/imprint/compile-agent-types.ts +40 -0
- package/src/imprint/compile-agent.ts +404 -0
- package/src/imprint/compile-tools.ts +1389 -0
- package/src/imprint/compile.ts +720 -0
- package/src/imprint/cookie-jar.ts +246 -0
- package/src/imprint/credential-bundle.ts +195 -0
- package/src/imprint/credential-extract.ts +290 -0
- package/src/imprint/credential-store.ts +707 -0
- package/src/imprint/cron.ts +312 -0
- package/src/imprint/doctor.ts +223 -0
- package/src/imprint/emit.ts +154 -0
- package/src/imprint/etld.ts +134 -0
- package/src/imprint/freeform-redact.ts +216 -0
- package/src/imprint/inject-listener.ts +137 -0
- package/src/imprint/install.ts +795 -0
- package/src/imprint/integrations.ts +385 -0
- package/src/imprint/is-compiled.ts +2 -0
- package/src/imprint/json-path.ts +100 -0
- package/src/imprint/llm.ts +998 -0
- package/src/imprint/load-json.ts +54 -0
- package/src/imprint/log.ts +33 -0
- package/src/imprint/login.ts +166 -0
- package/src/imprint/mcp-compile-server.ts +282 -0
- package/src/imprint/mcp-maintenance.ts +1790 -0
- package/src/imprint/mcp-server.ts +350 -0
- package/src/imprint/multi-progress.ts +69 -0
- package/src/imprint/notify.ts +155 -0
- package/src/imprint/paths.ts +64 -0
- package/src/imprint/playbook-parser.ts +21 -0
- package/src/imprint/playbook-runner.ts +465 -0
- package/src/imprint/probe-backends.ts +251 -0
- package/src/imprint/progress.ts +28 -0
- package/src/imprint/record.ts +470 -0
- package/src/imprint/redact.ts +550 -0
- package/src/imprint/replay-capture.ts +387 -0
- package/src/imprint/request-context.ts +66 -0
- package/src/imprint/runtime-link.ts +73 -0
- package/src/imprint/runtime.ts +942 -0
- package/src/imprint/sensitive-keys.ts +156 -0
- package/src/imprint/session-diff.ts +409 -0
- package/src/imprint/session-merge.ts +198 -0
- package/src/imprint/session-writer.ts +149 -0
- package/src/imprint/sites.ts +27 -0
- package/src/imprint/stealth-fetch.ts +434 -0
- package/src/imprint/teach-state.ts +235 -0
- package/src/imprint/teach.ts +2120 -0
- package/src/imprint/tool-candidates.ts +423 -0
- package/src/imprint/tool-loader.ts +186 -0
- package/src/imprint/tool-selection.ts +70 -0
- package/src/imprint/tracing.ts +508 -0
- package/src/imprint/types.ts +472 -0
- package/src/imprint/version.ts +21 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared types for the compile-agent surface.
|
|
3
|
+
*
|
|
4
|
+
* Lives in its own file so both compile-agent.ts (the in-process loop driver
|
|
5
|
+
* for anthropic-api) and claude-cli-compile.ts (the claude-cli MCP driver)
|
|
6
|
+
* can reference them without importing each other.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type { AgentProgress } from './agent.ts';
|
|
10
|
+
|
|
11
|
+
export interface CompileAgentProgress extends AgentProgress {
|
|
12
|
+
/** 1-based verification cycle. Cycle 1 is the initial agent run. Subsequent cycles
|
|
13
|
+
* happen when the agent claims done() but external verification fails. */
|
|
14
|
+
verificationCycle: number;
|
|
15
|
+
/** Hard cap on verification cycles (typically 5). */
|
|
16
|
+
maxVerificationCycles: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface CompileAgentResult {
|
|
20
|
+
/** True only if external verification passed. */
|
|
21
|
+
success: boolean;
|
|
22
|
+
/** Why we stopped — done, give_up, timeout, soft_cap, error. */
|
|
23
|
+
outcome: 'done' | 'give_up' | 'timeout' | 'soft_cap' | 'error';
|
|
24
|
+
/** Path to workflow.json if written. */
|
|
25
|
+
workflowPath?: string;
|
|
26
|
+
/** Path to parser.ts if written. */
|
|
27
|
+
parserPath?: string;
|
|
28
|
+
/** Path to parser.test.ts if written. */
|
|
29
|
+
parserTestPath?: string;
|
|
30
|
+
/** Free-form summary, error message, or give-up reason. */
|
|
31
|
+
message: string;
|
|
32
|
+
/** Conversation log saved to this path. */
|
|
33
|
+
conversationLogPath: string;
|
|
34
|
+
turns: number;
|
|
35
|
+
durationMs: number;
|
|
36
|
+
inputTokens: number;
|
|
37
|
+
outputTokens: number;
|
|
38
|
+
cacheReadInputTokens: number;
|
|
39
|
+
cacheCreationInputTokens: number;
|
|
40
|
+
}
|
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agentic compilation pipeline: session → workflow.json + parser.ts + parser.test.ts.
|
|
3
|
+
*
|
|
4
|
+
* The agent loop inspects the captured session, writes code, tests it, and
|
|
5
|
+
* iterates until external verification passes. See prompts/compile-agent.md
|
|
6
|
+
* for the system prompt.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from 'node:fs';
|
|
10
|
+
import { join as pathJoin } from 'node:path';
|
|
11
|
+
import {
|
|
12
|
+
type AgentProgress,
|
|
13
|
+
type AgentResult,
|
|
14
|
+
type OnDeadlineReached,
|
|
15
|
+
doneTool,
|
|
16
|
+
giveUpTool,
|
|
17
|
+
runAgentLoop,
|
|
18
|
+
} from './agent.ts';
|
|
19
|
+
import { compileViaClaudeCli } from './claude-cli-compile.ts';
|
|
20
|
+
import { compileViaCodexCli } from './codex-cli-compile.ts';
|
|
21
|
+
import type { CompileAgentProgress, CompileAgentResult } from './compile-agent-types.ts';
|
|
22
|
+
import { buildCompileTools, externalVerification } from './compile-tools.ts';
|
|
23
|
+
import { type Replacement, extractCredentials } from './credential-extract.ts';
|
|
24
|
+
import {
|
|
25
|
+
type LLMOptions,
|
|
26
|
+
type ProviderName,
|
|
27
|
+
type ToolUseProvider,
|
|
28
|
+
isToolUseProvider,
|
|
29
|
+
preferredAgentModel,
|
|
30
|
+
resolveProvider,
|
|
31
|
+
} from './llm.ts';
|
|
32
|
+
import { loadJsonFile } from './load-json.ts';
|
|
33
|
+
import { createLog } from './log.ts';
|
|
34
|
+
import { localSiteDir } from './paths.ts';
|
|
35
|
+
import { detectPageMintedHeaders, redactSession } from './redact.ts';
|
|
36
|
+
import type { ClassifiedValue } from './session-diff.ts';
|
|
37
|
+
import type { SharedCompileContext, ToolCandidate } from './tool-candidates.ts';
|
|
38
|
+
import { type Session, SessionSchema } from './types.ts';
|
|
39
|
+
|
|
40
|
+
export type { CompileAgentProgress } from './compile-agent-types.ts';
|
|
41
|
+
|
|
42
|
+
const log = createLog('compile-agent');
|
|
43
|
+
|
|
44
|
+
const REPO_ROOT = pathJoin(import.meta.dir, '..', '..');
|
|
45
|
+
const PROMPTS_DIR = pathJoin(REPO_ROOT, 'prompts');
|
|
46
|
+
|
|
47
|
+
/** Re-exported for callers (cli, teach) that need to display the selected
|
|
48
|
+
* model before kicking off the agent loop. */
|
|
49
|
+
export function resolveCompileAgentModel(provider: ProviderName): string {
|
|
50
|
+
return preferredAgentModel(provider);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
interface CompileAgentOptions {
|
|
54
|
+
/** Path to the recorded session JSON (absolute or relative). */
|
|
55
|
+
sessionPath: string;
|
|
56
|
+
/** Hard wall-clock budget. Default 10 minutes. */
|
|
57
|
+
maxDurationMs?: number;
|
|
58
|
+
/** Override LLM config (region, model, project). */
|
|
59
|
+
llmConfig?: LLMOptions;
|
|
60
|
+
/** For testing only — inject a pre-configured provider instead of using llmConfig.
|
|
61
|
+
* Production callers omit this and use llmConfig. */
|
|
62
|
+
llmProvider?: ToolUseProvider;
|
|
63
|
+
/** Progress callback with verification cycle information. */
|
|
64
|
+
onProgress?: (p: CompileAgentProgress) => void;
|
|
65
|
+
/** Retain parser.test.ts after successful verification. By default it's
|
|
66
|
+
* deleted (the test reads the gitignored redacted session at
|
|
67
|
+
* $IMPRINT_SESSION_PATH, so it's not reproducible elsewhere — keeping it
|
|
68
|
+
* on disk just confuses `bun test`). Pass true with `--keep-test` to
|
|
69
|
+
* inspect the agent's test output locally. */
|
|
70
|
+
keepTest?: boolean;
|
|
71
|
+
/** Credential placeholders to inject before redaction. Provided by `imprint
|
|
72
|
+
* teach` when the credential-extract pass found a login pair; for direct
|
|
73
|
+
* `imprint generate` callers we run extraction inline (best-effort, no
|
|
74
|
+
* prompts — values flow into the credential manager only when the user
|
|
75
|
+
* goes through the teach flow). */
|
|
76
|
+
replacements?: Replacement[];
|
|
77
|
+
/** Directory where workflow.json/parser.ts/parser.test.ts are written. */
|
|
78
|
+
outDir?: string;
|
|
79
|
+
/** Candidate-specific compile scope for multi-tool teach. */
|
|
80
|
+
candidate?: ToolCandidate;
|
|
81
|
+
/** Shared auth/helper guidance generated once for a multi-tool teach run. */
|
|
82
|
+
sharedContext?: SharedCompileContext;
|
|
83
|
+
/** Dual-pass value classifications from replay-and-diff. */
|
|
84
|
+
classifications?: ClassifiedValue[];
|
|
85
|
+
/** Credential values extracted during teach, passed to integration tests via env var. */
|
|
86
|
+
teachCredentials?: { site: string; values: Record<string, string> };
|
|
87
|
+
/** Called when wall-clock deadline is reached; return ms to extend or null to time out. */
|
|
88
|
+
onDeadlineReached?: OnDeadlineReached;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export async function compileAgent(opts: CompileAgentOptions): Promise<CompileAgentResult> {
|
|
92
|
+
const startTime = Date.now();
|
|
93
|
+
|
|
94
|
+
// 1. Load + validate the session
|
|
95
|
+
let session: Session = loadJsonFile(
|
|
96
|
+
opts.sessionPath,
|
|
97
|
+
SessionSchema,
|
|
98
|
+
{
|
|
99
|
+
notFound: '→ run `imprint record <site>` to create one.',
|
|
100
|
+
notJson: `→ if it's a partial .jsonl, run \`imprint assemble ${opts.sessionPath}\` first.`,
|
|
101
|
+
badSchema: '→ check the file came from `imprint record`.',
|
|
102
|
+
},
|
|
103
|
+
'session',
|
|
104
|
+
);
|
|
105
|
+
|
|
106
|
+
// 2. Auto-redact if not already redacted (preserves any ${credential.X}
|
|
107
|
+
// placeholders that teach.ts already injected). When replacements are
|
|
108
|
+
// passed in via opts (the teach path), we honor them; otherwise we run
|
|
109
|
+
// extraction inline so direct `imprint generate` callers also get
|
|
110
|
+
// credential-aware redaction (values are NOT persisted to the keychain
|
|
111
|
+
// on this path — that requires going through `imprint teach` or
|
|
112
|
+
// `imprint credential set`).
|
|
113
|
+
const looksRedacted = JSON.stringify(session).includes('[REDACTED:');
|
|
114
|
+
if (!looksRedacted) {
|
|
115
|
+
let replacements = opts.replacements;
|
|
116
|
+
if (!replacements) {
|
|
117
|
+
const auto = extractCredentials(session);
|
|
118
|
+
replacements = auto.replacements;
|
|
119
|
+
}
|
|
120
|
+
const pageMintedHeaders = detectPageMintedHeaders(session);
|
|
121
|
+
const r = redactSession(session, { replacements, keepHeaders: pageMintedHeaders });
|
|
122
|
+
session = r.session;
|
|
123
|
+
if (r.stats.totalRedactions > 0 || r.stats.placeholdersInjected > 0) {
|
|
124
|
+
const freeformNote =
|
|
125
|
+
r.stats.freeformRedactions > 0
|
|
126
|
+
? ` (${r.stats.freeformRedactions} free-form finding(s))`
|
|
127
|
+
: '';
|
|
128
|
+
log(
|
|
129
|
+
`redacted ${r.stats.totalRedactions} value(s)${freeformNote} and injected ${r.stats.placeholdersInjected} credential placeholder(s) before sending to LLM`,
|
|
130
|
+
);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// 3. Determine the generated tool directory.
|
|
135
|
+
const absoluteToolDir = opts.outDir ?? localSiteDir(session.site);
|
|
136
|
+
|
|
137
|
+
// 3b. Ensure type dependencies exist so the agent doesn't waste turns
|
|
138
|
+
// discovering and installing @types/bun + @types/node during the loop.
|
|
139
|
+
mkdirSync(absoluteToolDir, { recursive: true });
|
|
140
|
+
const harnessPkgPath = pathJoin(absoluteToolDir, 'package.json');
|
|
141
|
+
if (!existsSync(harnessPkgPath)) {
|
|
142
|
+
writeFileSync(
|
|
143
|
+
harnessPkgPath,
|
|
144
|
+
JSON.stringify(
|
|
145
|
+
{
|
|
146
|
+
name: `imprint-tool-${session.site}`,
|
|
147
|
+
private: true,
|
|
148
|
+
devDependencies: {
|
|
149
|
+
'@types/bun': 'latest',
|
|
150
|
+
'@types/node': 'latest',
|
|
151
|
+
'bun-types': 'latest',
|
|
152
|
+
},
|
|
153
|
+
},
|
|
154
|
+
null,
|
|
155
|
+
2,
|
|
156
|
+
),
|
|
157
|
+
'utf8',
|
|
158
|
+
);
|
|
159
|
+
}
|
|
160
|
+
const harnessNmPath = pathJoin(absoluteToolDir, 'node_modules');
|
|
161
|
+
if (!existsSync(harnessNmPath)) {
|
|
162
|
+
Bun.spawnSync(['bun', 'install'], { cwd: absoluteToolDir });
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// 4. Load the system prompt
|
|
166
|
+
const systemPromptPath = pathJoin(PROMPTS_DIR, 'compile-agent.md');
|
|
167
|
+
if (!existsSync(systemPromptPath)) {
|
|
168
|
+
throw new Error(
|
|
169
|
+
`System prompt not found at ${systemPromptPath}\n→ this is an Imprint installation problem; please file an issue at https://github.com/ashaychangwani/imprint/issues with the steps you ran.`,
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
const systemPrompt = readFileSync(systemPromptPath, 'utf8');
|
|
173
|
+
|
|
174
|
+
// 5. Build the toolset (shared with the MCP server used by the claude-cli path)
|
|
175
|
+
const sessionPathAbs = opts.sessionPath.startsWith('/')
|
|
176
|
+
? opts.sessionPath
|
|
177
|
+
: pathJoin(REPO_ROOT, opts.sessionPath);
|
|
178
|
+
const tools = [
|
|
179
|
+
...buildCompileTools(session, absoluteToolDir, sessionPathAbs, {
|
|
180
|
+
candidate: opts.candidate,
|
|
181
|
+
sharedContext: opts.sharedContext,
|
|
182
|
+
classifications: opts.classifications,
|
|
183
|
+
teachCredentials: opts.teachCredentials,
|
|
184
|
+
}),
|
|
185
|
+
doneTool(),
|
|
186
|
+
giveUpTool(),
|
|
187
|
+
];
|
|
188
|
+
|
|
189
|
+
// 6. Build the initial user message
|
|
190
|
+
const initialUserMessage = `A new compile task is starting.
|
|
191
|
+
|
|
192
|
+
Session path: ${sessionPathAbs}
|
|
193
|
+
Tool directory: ${absoluteToolDir}
|
|
194
|
+
You will write artifacts into the tool directory.
|
|
195
|
+
${formatCandidateContext(opts.candidate, opts.sharedContext)}
|
|
196
|
+
|
|
197
|
+
Begin by calling read_session_summary to orient yourself, then proceed per the system prompt.`;
|
|
198
|
+
|
|
199
|
+
// 7. Compute deadline
|
|
200
|
+
const deadlineMs = Date.now() + (opts.maxDurationMs ?? 10 * 60 * 1000);
|
|
201
|
+
|
|
202
|
+
// 8. Instantiate provider (or use injected one for testing).
|
|
203
|
+
// CLI providers take a different path: they don't implement Anthropic
|
|
204
|
+
// messageWithTools, so we shell out with the same toolset registered as a
|
|
205
|
+
// stdio MCP server. The user's CLI auth drives the agent loop end-to-end.
|
|
206
|
+
let provider: ToolUseProvider;
|
|
207
|
+
if (opts.llmProvider) {
|
|
208
|
+
provider = opts.llmProvider;
|
|
209
|
+
} else {
|
|
210
|
+
const resolvedProvider = resolveProvider(opts.llmConfig);
|
|
211
|
+
if (resolvedProvider.name === 'claude-cli') {
|
|
212
|
+
return await compileViaClaudeCli({
|
|
213
|
+
session,
|
|
214
|
+
absoluteToolDir,
|
|
215
|
+
sessionPath: opts.sessionPath,
|
|
216
|
+
systemPromptPath,
|
|
217
|
+
deadlineMs,
|
|
218
|
+
onProgress: opts.onProgress,
|
|
219
|
+
onDeadlineReached: opts.onDeadlineReached,
|
|
220
|
+
startTime,
|
|
221
|
+
keepTest: opts.keepTest,
|
|
222
|
+
candidate: opts.candidate,
|
|
223
|
+
sharedContext: opts.sharedContext,
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
if (resolvedProvider.name === 'codex-cli') {
|
|
227
|
+
return await compileViaCodexCli({
|
|
228
|
+
session,
|
|
229
|
+
absoluteToolDir,
|
|
230
|
+
sessionPath: opts.sessionPath,
|
|
231
|
+
systemPromptPath,
|
|
232
|
+
deadlineMs,
|
|
233
|
+
onProgress: opts.onProgress,
|
|
234
|
+
startTime,
|
|
235
|
+
keepTest: opts.keepTest,
|
|
236
|
+
candidate: opts.candidate,
|
|
237
|
+
sharedContext: opts.sharedContext,
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
if (!isToolUseProvider(resolvedProvider)) {
|
|
241
|
+
throw new Error(
|
|
242
|
+
[
|
|
243
|
+
`provider "${resolvedProvider.name}" does not support tool use, which the compile-agent requires.`,
|
|
244
|
+
'→ use one of: claude-cli, codex-cli, anthropic-api (install a supported CLI, or set ANTHROPIC_API_KEY)',
|
|
245
|
+
].join('\n'),
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
provider = resolvedProvider;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// 9. Run the agent loop with verification sub-loop
|
|
252
|
+
let totalTurns = 0;
|
|
253
|
+
let totalInputTokens = 0;
|
|
254
|
+
let totalOutputTokens = 0;
|
|
255
|
+
let outcome: AgentResult['outcome'] = 'error';
|
|
256
|
+
let message = '';
|
|
257
|
+
let conversationLog: AgentResult['conversationLog'] = [];
|
|
258
|
+
|
|
259
|
+
const MAX_VERIFICATION_CYCLES = 5;
|
|
260
|
+
let verificationCycle = 0;
|
|
261
|
+
let result: AgentResult | null = null;
|
|
262
|
+
let currentInitialMessage = initialUserMessage;
|
|
263
|
+
|
|
264
|
+
while (verificationCycle < MAX_VERIFICATION_CYCLES) {
|
|
265
|
+
verificationCycle++;
|
|
266
|
+
|
|
267
|
+
// Wrap the user's onProgress callback to inject verification cycle info
|
|
268
|
+
const userOnProgress = opts.onProgress;
|
|
269
|
+
const wrappedOnProgress = userOnProgress
|
|
270
|
+
? (p: AgentProgress) =>
|
|
271
|
+
userOnProgress({
|
|
272
|
+
...p,
|
|
273
|
+
verificationCycle,
|
|
274
|
+
maxVerificationCycles: MAX_VERIFICATION_CYCLES,
|
|
275
|
+
})
|
|
276
|
+
: undefined;
|
|
277
|
+
|
|
278
|
+
// Run the agent loop
|
|
279
|
+
result = await runAgentLoop({
|
|
280
|
+
systemPrompt,
|
|
281
|
+
initialUserMessage: currentInitialMessage,
|
|
282
|
+
tools,
|
|
283
|
+
deadlineMs,
|
|
284
|
+
llm: provider,
|
|
285
|
+
onProgress: wrappedOnProgress,
|
|
286
|
+
onDeadlineReached: opts.onDeadlineReached,
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
totalTurns += result.turns;
|
|
290
|
+
totalInputTokens += result.inputTokens;
|
|
291
|
+
totalOutputTokens += result.outputTokens;
|
|
292
|
+
conversationLog = [...conversationLog, ...result.conversationLog];
|
|
293
|
+
|
|
294
|
+
outcome = result.outcome;
|
|
295
|
+
|
|
296
|
+
// If not done, break out
|
|
297
|
+
if (result.outcome !== 'done') {
|
|
298
|
+
message = buildMessageFromOutcome(result);
|
|
299
|
+
break;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Perform external verification
|
|
303
|
+
const { failures, warnings } = await externalVerification(
|
|
304
|
+
absoluteToolDir,
|
|
305
|
+
session,
|
|
306
|
+
sessionPathAbs,
|
|
307
|
+
{
|
|
308
|
+
expectedToolName: opts.candidate?.toolName,
|
|
309
|
+
likelyParams: opts.candidate?.likelyParams,
|
|
310
|
+
candidateRequestSeqs: opts.candidate?.requestSeqs,
|
|
311
|
+
},
|
|
312
|
+
);
|
|
313
|
+
|
|
314
|
+
if (warnings.length > 0) {
|
|
315
|
+
log(`verification warnings (non-blocking):\n${warnings.join('\n')}`);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
if (failures.length === 0) {
|
|
319
|
+
// Success (possibly with warnings)
|
|
320
|
+
message = result.doneSummary ?? 'Task completed';
|
|
321
|
+
if (warnings.length > 0) {
|
|
322
|
+
message += `\n\nWarnings:\n${warnings.join('\n')}`;
|
|
323
|
+
}
|
|
324
|
+
if (!opts.keepTest) {
|
|
325
|
+
for (const f of ['parser.test.ts', 'integration.test.ts']) {
|
|
326
|
+
const testPath = pathJoin(absoluteToolDir, f);
|
|
327
|
+
if (existsSync(testPath)) unlinkSync(testPath);
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
break;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// Verification failed — re-enter the loop with a continuation message
|
|
334
|
+
if (verificationCycle >= MAX_VERIFICATION_CYCLES) {
|
|
335
|
+
outcome = 'error';
|
|
336
|
+
message = `Verification failed after ${MAX_VERIFICATION_CYCLES} cycles. Final failures:\n${failures.join('\n')}`;
|
|
337
|
+
break;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
log(`verification failed (cycle ${verificationCycle}), resuming agent loop...`);
|
|
341
|
+
currentInitialMessage = `You called done but verification failed:
|
|
342
|
+
|
|
343
|
+
${failures.map((f) => `- ${f}`).join('\n')}
|
|
344
|
+
|
|
345
|
+
Resume your work. Read the files you wrote (workflow.json, parser.ts, parser.test.ts), fix the issues, re-run tests, and call done again when fixed.`;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// 10. Persist conversation log
|
|
349
|
+
mkdirSync(absoluteToolDir, { recursive: true });
|
|
350
|
+
const conversationLogPath = pathJoin(absoluteToolDir, '.compile-log.json');
|
|
351
|
+
writeFileSync(conversationLogPath, JSON.stringify(conversationLog, null, 2), 'utf8');
|
|
352
|
+
|
|
353
|
+
// 11. Return the result
|
|
354
|
+
const workflowPath = pathJoin(absoluteToolDir, 'workflow.json');
|
|
355
|
+
const parserPath = pathJoin(absoluteToolDir, 'parser.ts');
|
|
356
|
+
const parserTestPath = pathJoin(absoluteToolDir, 'parser.test.ts');
|
|
357
|
+
|
|
358
|
+
return {
|
|
359
|
+
success: outcome === 'done',
|
|
360
|
+
outcome,
|
|
361
|
+
workflowPath: existsSync(workflowPath) ? workflowPath : undefined,
|
|
362
|
+
parserPath: existsSync(parserPath) ? parserPath : undefined,
|
|
363
|
+
// parserTestPath only set if it survived (--keep-test); otherwise undefined.
|
|
364
|
+
parserTestPath: existsSync(parserTestPath) ? parserTestPath : undefined,
|
|
365
|
+
message,
|
|
366
|
+
conversationLogPath,
|
|
367
|
+
turns: totalTurns,
|
|
368
|
+
durationMs: Date.now() - startTime,
|
|
369
|
+
inputTokens: totalInputTokens,
|
|
370
|
+
outputTokens: totalOutputTokens,
|
|
371
|
+
cacheReadInputTokens: 0,
|
|
372
|
+
cacheCreationInputTokens: 0,
|
|
373
|
+
};
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
function buildMessageFromOutcome(result: AgentResult): string {
|
|
377
|
+
switch (result.outcome) {
|
|
378
|
+
case 'give_up':
|
|
379
|
+
return `Agent gave up: ${result.giveUpReason ?? 'unknown reason'}\n${result.giveUpDetail ?? ''}`;
|
|
380
|
+
case 'timeout':
|
|
381
|
+
return 'Agent loop timed out before completion';
|
|
382
|
+
case 'soft_cap':
|
|
383
|
+
return 'Agent loop exceeded soft turn cap (100 turns)';
|
|
384
|
+
case 'error':
|
|
385
|
+
return `Agent loop error: ${result.errorMessage ?? 'unknown error'}`;
|
|
386
|
+
default:
|
|
387
|
+
return 'Unknown outcome';
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
function formatCandidateContext(
|
|
392
|
+
candidate: ToolCandidate | undefined,
|
|
393
|
+
sharedContext: SharedCompileContext | undefined,
|
|
394
|
+
): string {
|
|
395
|
+
if (!candidate && !sharedContext) return '';
|
|
396
|
+
return `
|
|
397
|
+
Selected candidate context:
|
|
398
|
+
${candidate ? JSON.stringify(candidate, null, 2) : '(none)'}
|
|
399
|
+
|
|
400
|
+
Shared compile context:
|
|
401
|
+
${sharedContext ? JSON.stringify(sharedContext, null, 2) : '(none)'}
|
|
402
|
+
|
|
403
|
+
Compile only the selected candidate. Do not create tools for other actions in the recording.`;
|
|
404
|
+
}
|