martin-loop 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +362 -344
  3. package/dist/bin/martin-loop.js +23 -0
  4. package/dist/index.d.ts +22 -0
  5. package/dist/index.js +31 -0
  6. package/dist/vendor/adapters/claude-cli.d.ts +89 -0
  7. package/dist/vendor/adapters/claude-cli.js +555 -0
  8. package/dist/vendor/adapters/cli-bridge.d.ts +28 -0
  9. package/dist/vendor/adapters/cli-bridge.js +127 -0
  10. package/dist/vendor/adapters/direct-provider.d.ts +10 -0
  11. package/dist/vendor/adapters/direct-provider.js +41 -0
  12. package/dist/vendor/adapters/index.d.ts +5 -0
  13. package/dist/vendor/adapters/index.js +5 -0
  14. package/dist/vendor/adapters/runtime-support.d.ts +14 -0
  15. package/dist/vendor/adapters/runtime-support.js +52 -0
  16. package/dist/vendor/adapters/stub-agent-cli.d.ts +8 -0
  17. package/dist/vendor/adapters/stub-agent-cli.js +41 -0
  18. package/dist/vendor/adapters/stub-direct-provider.d.ts +8 -0
  19. package/dist/vendor/adapters/stub-direct-provider.js +10 -0
  20. package/dist/vendor/cli/bin/martin.d.ts +2 -0
  21. package/dist/vendor/cli/bin/martin.js +19 -0
  22. package/dist/vendor/cli/index.d.ts +39 -0
  23. package/dist/vendor/cli/index.js +634 -0
  24. package/dist/vendor/cli/persistence.d.ts +34 -0
  25. package/dist/vendor/cli/persistence.js +71 -0
  26. package/dist/vendor/contracts/governance.d.ts +21 -0
  27. package/dist/vendor/contracts/governance.js +12 -0
  28. package/dist/vendor/contracts/index.d.ts +330 -0
  29. package/dist/vendor/contracts/index.js +203 -0
  30. package/dist/vendor/core/compiler.d.ts +50 -0
  31. package/dist/vendor/core/compiler.js +47 -0
  32. package/dist/vendor/core/grounding.d.ts +37 -0
  33. package/dist/vendor/core/grounding.js +270 -0
  34. package/dist/vendor/core/index.d.ts +145 -0
  35. package/dist/vendor/core/index.js +1099 -0
  36. package/dist/vendor/core/leash.d.ts +48 -0
  37. package/dist/vendor/core/leash.js +408 -0
  38. package/dist/vendor/core/persistence/compiler.d.ts +18 -0
  39. package/dist/vendor/core/persistence/compiler.js +35 -0
  40. package/dist/vendor/core/persistence/index.d.ts +6 -0
  41. package/dist/vendor/core/persistence/index.js +4 -0
  42. package/dist/vendor/core/persistence/ledger.d.ts +23 -0
  43. package/dist/vendor/core/persistence/ledger.js +10 -0
  44. package/dist/vendor/core/persistence/store.d.ts +77 -0
  45. package/dist/vendor/core/persistence/store.js +84 -0
  46. package/dist/vendor/core/policy.d.ts +126 -0
  47. package/dist/vendor/core/policy.js +625 -0
  48. package/dist/vendor/core/rollback.d.ts +11 -0
  49. package/dist/vendor/core/rollback.js +219 -0
  50. package/docs/oss/EXAMPLES.md +126 -126
  51. package/docs/oss/OSS-BOUNDARY-REPORT.json +113 -113
  52. package/docs/oss/OSS-BOUNDARY-REPORT.md +48 -48
  53. package/docs/oss/QUICKSTART.md +135 -135
  54. package/docs/oss/README.md +93 -93
  55. package/docs/oss/RELEASE-SURFACE-REPORT.json +45 -45
  56. package/docs/oss/RELEASE-SURFACE-REPORT.md +35 -35
  57. package/package.json +56 -54
@@ -0,0 +1,555 @@
1
+ /**
2
+ * Real agent-CLI adapters.
3
+ *
4
+ * Exports a generic factory (`createAgentCliAdapter`) and two pre-configured
5
+ * factories (`createClaudeCliAdapter`, `createCodexCliAdapter`) that spawn
6
+ * the respective AI coding CLI as a child subprocess.
7
+ *
8
+ * Usage in CLI:
9
+ * createClaudeCliAdapter({ workingDirectory: process.cwd() })
10
+ * createCodexCliAdapter({ workingDirectory: process.cwd() })
11
+ *
12
+ * MCP tools and integration tests use the same factories.
13
+ */
14
+ import { readGitExecutionArtifacts, runSubprocess, runVerification } from "./cli-bridge.js";
15
+ import { createAdapterCapabilities, normalizeStructuredErrors, normalizeUsage } from "./runtime-support.js";
16
+ // ---------------------------------------------------------------------------
17
+ // Cost estimation
18
+ //
19
+ // Token costs are estimated using a blended average across top models:
20
+ // Anthropic Sonnet, OpenAI GPT-4o Mini, Gemini Flash, Meta Llama 3.
21
+ // Override at runtime with --input-cost-per-1k / --output-cost-per-1k CLI
22
+ // flags or martin.config.yaml pricing section.
23
+ // ---------------------------------------------------------------------------
24
+ const BLENDED_INPUT_COST_PER_1K = 0.003; // $/1K input tokens
25
+ const BLENDED_OUTPUT_COST_PER_1K = 0.012; // $/1K output tokens
26
+ // Per-model overrides for common Claude models (fallback: blended average)
27
+ const MODEL_PRICING = {
28
+ "claude-opus-4-6": { inputPer1K: 0.015, outputPer1K: 0.075 },
29
+ "claude-sonnet-4-6": { inputPer1K: 0.003, outputPer1K: 0.015 },
30
+ "claude-haiku-4-5": { inputPer1K: 0.00025, outputPer1K: 0.00125 },
31
+ // Keep legacy names working
32
+ "claude-opus": { inputPer1K: 0.015, outputPer1K: 0.075 },
33
+ "claude-sonnet": { inputPer1K: 0.003, outputPer1K: 0.015 },
34
+ "claude-haiku": { inputPer1K: 0.00025, outputPer1K: 0.00125 }
35
+ };
36
+ function extractUsage(parsed, modelLabel) {
37
+ if (!parsed?.usage) {
38
+ return normalizeUsage({
39
+ actualUsd: 0,
40
+ tokensIn: 0,
41
+ tokensOut: 0,
42
+ provenance: "unavailable"
43
+ });
44
+ }
45
+ const tokensIn = (parsed.usage.inputTokens ?? parsed.usage.input_tokens ?? 0) +
46
+ (parsed.usage.cacheReadInputTokens ?? parsed.usage.cache_read_input_tokens ?? 0) +
47
+ (parsed.usage.cacheCreationInputTokens ?? parsed.usage.cache_creation_input_tokens ?? 0);
48
+ const tokensOut = parsed.usage.outputTokens ?? parsed.usage.output_tokens ?? 0;
49
+ const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
50
+ { inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
51
+ const actualUsd = (tokensIn / 1000) * pricing.inputPer1K +
52
+ (tokensOut / 1000) * pricing.outputPer1K;
53
+ return normalizeUsage({
54
+ actualUsd: Number(actualUsd.toFixed(6)),
55
+ tokensIn,
56
+ tokensOut,
57
+ provenance: "actual"
58
+ });
59
+ }
60
+ // ---------------------------------------------------------------------------
61
+ // Structural failure hint detection
62
+ //
63
+ // Provides a classHint to failure-taxonomy based on structural evidence
64
+ // rather than keyword scanning (which suffers from false positives).
65
+ // ---------------------------------------------------------------------------
66
+ function inferStructuralClassHint(agentOutput, verificationSummary, exitCode, objective) {
67
+ // Exit code + stderr "Error:" pattern → syntax error
68
+ if (exitCode !== 0 && /\bError:/i.test(verificationSummary)) {
69
+ return "syntax_error";
70
+ }
71
+ // Agent output grossly longer than objective → scope creep signal
72
+ // (5× ratio heuristic: if the agent wrote 5× more than the objective length, flag it)
73
+ if (agentOutput.length > objective.length * 10 && agentOutput.length > 2000) {
74
+ return "scope_creep";
75
+ }
76
+ // Repeated identical short responses → stalled / hallucination
77
+ const trimmed = agentOutput.trim();
78
+ if (trimmed.length < 100 && trimmed.length > 0) {
79
+ // Very short response on a non-trivial task could be hallucination
80
+ return "hallucination";
81
+ }
82
+ return undefined;
83
+ }
84
+ // ---------------------------------------------------------------------------
85
+ // Generic factory
86
+ // ---------------------------------------------------------------------------
87
+ export function createAgentCliAdapter(options) {
88
+ const workingDirectory = options.workingDirectory ?? process.cwd();
89
+ const timeoutMs = options.timeoutMs ?? 300_000;
90
+ const verifyTimeoutMs = options.verifyTimeoutMs ?? 60_000;
91
+ const adapterId = `agent-cli:${options.adapterIdSuffix ?? options.command}`;
92
+ const supportsJsonOutput = options.supportsJsonOutput === true;
93
+ const adapter = {
94
+ adapterId,
95
+ kind: "agent-cli",
96
+ label: options.label ?? `${options.command} CLI adapter`,
97
+ metadata: {
98
+ providerId: options.command,
99
+ model: options.model ?? options.command,
100
+ transport: "cli",
101
+ capabilities: createAdapterCapabilities({
102
+ preflight: true,
103
+ usageSettlement: supportsJsonOutput,
104
+ diffArtifacts: true,
105
+ structuredErrors: true,
106
+ cachingSignals: supportsJsonOutput
107
+ })
108
+ },
109
+ async execute(request) {
110
+ const prompt = buildPrompt(request);
111
+ const estimatedUsage = estimateUsage(prompt, options.model ?? options.command);
112
+ // Preflight: bail if projected cost exceeds remaining budget
113
+ if (request.context.remainingBudgetUsd > 0) {
114
+ const projected = estimatePromptCost(prompt, options.model ?? "");
115
+ if (projected > request.context.remainingBudgetUsd * 0.95) {
116
+ return {
117
+ status: "failed",
118
+ summary: `Preflight: projected cost $${projected.toFixed(4)} exceeds remaining budget $${request.context.remainingBudgetUsd.toFixed(4)}.`,
119
+ usage: normalizeUsage({
120
+ actualUsd: projected,
121
+ estimatedUsd: projected,
122
+ tokensIn: estimatedUsage.tokensIn,
123
+ tokensOut: estimatedUsage.tokensOut,
124
+ provenance: "estimated"
125
+ }),
126
+ verification: { passed: false, summary: "Stopped before execution: budget preflight failed." },
127
+ failure: { message: "budget_preflight_exceeded", classHint: "budget_pressure" }
128
+ };
129
+ }
130
+ }
131
+ const args = options.argsBuilder(prompt);
132
+ // stdinPrompt: if argsBuilder signals stdin delivery by returning args ending with "--stdin-prompt",
133
+ // remove that sentinel and pass the prompt via stdin instead (avoids Windows shell-escaping issues).
134
+ const useStdin = args.at(-1) === "--stdin-prompt";
135
+ const spawnArgs = useStdin ? args.slice(0, -1) : args;
136
+ const agentResult = await runSubprocess(options.command, spawnArgs, {
137
+ cwd: workingDirectory,
138
+ timeoutMs,
139
+ spawnImpl: options.spawnImpl,
140
+ ...(useStdin ? { stdinData: prompt } : {})
141
+ });
142
+ if (agentResult.timedOut) {
143
+ return {
144
+ status: "failed",
145
+ summary: `${options.command} subprocess timed out before completing.`,
146
+ usage: normalizeUsage({
147
+ actualUsd: estimatedUsage.actualUsd,
148
+ estimatedUsd: estimatedUsage.actualUsd,
149
+ tokensIn: estimatedUsage.tokensIn,
150
+ tokensOut: estimatedUsage.tokensOut,
151
+ provenance: "estimated"
152
+ }),
153
+ verification: { passed: false, summary: "Subprocess timed out." },
154
+ failure: {
155
+ message: `${options.command} did not respond within ${String(timeoutMs)}ms. stalled`
156
+ }
157
+ };
158
+ }
159
+ if (agentResult.exitCode !== 0 && agentResult.stdout.trim().length === 0) {
160
+ return {
161
+ status: "failed",
162
+ summary: `${options.command} subprocess exited with an error.`,
163
+ usage: normalizeUsage({
164
+ actualUsd: 0,
165
+ tokensIn: 0,
166
+ tokensOut: 0,
167
+ provenance: "unavailable"
168
+ }),
169
+ verification: { passed: false, summary: "Subprocess error." },
170
+ failure: {
171
+ message: `${agentResult.stderr.trim() || `Exit code ${String(agentResult.exitCode)}`}. environment_mismatch`
172
+ }
173
+ };
174
+ }
175
+ // Parse JSON output if the CLI supports it (Claude with --output-format json)
176
+ let parsed;
177
+ if (supportsJsonOutput) {
178
+ try {
179
+ parsed = JSON.parse(agentResult.stdout);
180
+ }
181
+ catch {
182
+ // Fall through to plain-text handling
183
+ }
184
+ }
185
+ const agentText = parsed?.result ?? agentResult.stdout.trim();
186
+ const summary = truncate(agentText, 2000);
187
+ const usage = parsed?.usage
188
+ ? extractUsage(parsed, options.model)
189
+ : normalizeUsage({
190
+ actualUsd: estimatedUsage.actualUsd,
191
+ estimatedUsd: estimatedUsage.actualUsd,
192
+ tokensIn: estimatedUsage.tokensIn,
193
+ tokensOut: Math.max(estimatedUsage.tokensOut, Math.ceil(agentText.length / 4)),
194
+ provenance: "estimated"
195
+ });
196
+ const verificationStack = request.context.verificationStack;
197
+ const verification = await runVerification(request.context.verificationPlan, workingDirectory, verifyTimeoutMs, verificationStack, options.spawnImpl);
198
+ // Check for zero-diff (agent ran but made no file changes)
199
+ const repoRoot = request.context.repoRoot;
200
+ let noDiff = false;
201
+ if (repoRoot) {
202
+ noDiff = await checkNoDiff(repoRoot);
203
+ }
204
+ // Extract structured errors from stderr/stdout for better failure context
205
+ const structuredErrors = normalizeStructuredErrors(extractStructuredErrors(agentResult.stderr, agentResult.stdout));
206
+ const executionArtifacts = repoRoot
207
+ ? await readGitExecutionArtifacts(repoRoot, 5000, options.spawnImpl)
208
+ : undefined;
209
+ // Scope contract enforcement: check touched files against allowedPaths/deniedPaths
210
+ let scopeViolations = [];
211
+ const scopeCtx = request.context;
212
+ if (repoRoot && (scopeCtx.allowedPaths?.length || scopeCtx.deniedPaths?.length)) {
213
+ const diffResult = await runSubprocess("git", ["diff", "--name-only", "HEAD"], { cwd: repoRoot, timeoutMs: 5000 });
214
+ if (diffResult.exitCode === 0 && diffResult.stdout.trim()) {
215
+ const touchedFiles = diffResult.stdout.trim().split("\n").filter(Boolean);
216
+ const allowed = scopeCtx.allowedPaths ?? [];
217
+ const denied = scopeCtx.deniedPaths ?? [];
218
+ for (const file of touchedFiles) {
219
+ // Check denied patterns (simple glob-like: prefix or exact)
220
+ if (denied.some((d) => file === d || file.startsWith(d.replace(/\*+$/, "")))) {
221
+ scopeViolations.push(file);
222
+ continue;
223
+ }
224
+ // If allowedPaths specified, file must match at least one
225
+ if (allowed.length > 0 && !allowed.some((a) => file === a || file.startsWith(a.replace(/\*+$/, "")))) {
226
+ scopeViolations.push(file);
227
+ }
228
+ }
229
+ }
230
+ }
231
+ // Derive structural classHint from evidence, not keyword scanning
232
+ const structuralHint = inferStructuralClassHint(agentText, verification.summary, agentResult.exitCode, request.context.objective);
233
+ if (verification.passed) {
234
+ return {
235
+ status: "completed",
236
+ summary,
237
+ usage,
238
+ verification: { passed: true, summary: verification.summary },
239
+ ...(executionArtifacts
240
+ ? {
241
+ execution: {
242
+ ...executionArtifacts,
243
+ ...(structuredErrors.length > 0 ? { structuredErrors } : {})
244
+ }
245
+ }
246
+ : structuredErrors.length > 0
247
+ ? { execution: { structuredErrors } }
248
+ : {})
249
+ };
250
+ }
251
+ const classHint = scopeViolations.length > 0
252
+ ? "scope_creep"
253
+ : noDiff
254
+ ? "no_progress"
255
+ : (structuralHint ?? undefined);
256
+ const errorBlock = structuredErrors.length > 0
257
+ ? `\nSTRUCTURED ERRORS:\n${structuredErrors.map(e => ` ${e.file}${e.line !== undefined ? `:${String(e.line)}` : ""} — ${e.code ? `${e.code}: ` : ""}${e.message}`).join("\n")}`
258
+ : "";
259
+ const scopeBlock = scopeViolations.length > 0
260
+ ? `\n Scope violations: ${scopeViolations.join(", ")}`
261
+ : "";
262
+ // Write PROGRESS.md to help the next attempt re-anchor on the original objective
263
+ if (repoRoot) {
264
+ try {
265
+ const { writeFile, readFile, appendFile: appendFs } = await import("node:fs/promises");
266
+ const progressPath = `${repoRoot}/PROGRESS.md`;
267
+ const timestamp = new Date().toISOString();
268
+ const entry = `\n## Attempt ${String(request.previousAttempts.length + 1)} — ${timestamp}\n- Failure class: ${classHint ?? "verification_failure"}\n- Verification: ${verification.summary}${errorBlock}${scopeBlock}\n`;
269
+ let content;
270
+ try {
271
+ content = await readFile(progressPath, "utf8");
272
+ }
273
+ catch {
274
+ content = `# Martin Loop Progress\n\n**Original objective:** ${request.context.objective}\n`;
275
+ }
276
+ await writeFile(progressPath, content + entry, "utf8");
277
+ }
278
+ catch {
279
+ // Non-fatal
280
+ }
281
+ // Reset tracked files to HEAD so next attempt starts from clean state
282
+ try {
283
+ await runSubprocess("git", ["restore", "--staged", "--worktree", "."], { cwd: repoRoot, timeoutMs: 5000 });
284
+ }
285
+ catch {
286
+ // Non-fatal
287
+ }
288
+ }
289
+ return {
290
+ status: "failed",
291
+ summary: (structuredErrors.length > 0 || scopeViolations.length > 0)
292
+ ? `${summary}${errorBlock}${scopeViolations.length > 0 ? `\nScope violations: ${scopeViolations.join(", ")}` : ""}`
293
+ : summary,
294
+ usage,
295
+ verification: { passed: false, summary: verification.summary },
296
+ ...(executionArtifacts
297
+ ? {
298
+ execution: {
299
+ ...executionArtifacts,
300
+ ...(structuredErrors.length > 0 ? { structuredErrors } : {})
301
+ }
302
+ }
303
+ : structuredErrors.length > 0
304
+ ? { execution: { structuredErrors } }
305
+ : {}),
306
+ failure: {
307
+ message: verification.summary,
308
+ ...(classHint ? { classHint } : {})
309
+ }
310
+ };
311
+ },
312
+ /**
313
+ * Return a new adapter instance with a different model.
314
+ * Used by run-martin.ts when a change_model intervention fires.
315
+ * Model escalation order: haiku → sonnet → opus (cheapest-first, escalate on repeated failure).
316
+ */
317
+ withModel(newModel) {
318
+ return createAgentCliAdapter({
319
+ ...options,
320
+ model: newModel,
321
+ adapterIdSuffix: `${options.adapterIdSuffix ?? options.command}:${newModel}`
322
+ });
323
+ }
324
+ };
325
+ return adapter;
326
+ }
327
+ // ---------------------------------------------------------------------------
328
+ // Pre-configured: Claude CLI
329
+ // ---------------------------------------------------------------------------
330
+ /**
331
+ * Spawns `claude --output-format json --print "<prompt>" --dangerously-skip-permissions [extraArgs]`.
332
+ *
333
+ * The --output-format json flag causes Claude CLI to return structured JSON
334
+ * including real token usage counts, enabling accurate cost tracking.
335
+ *
336
+ * Requires the Claude Code CLI to be installed and authenticated:
337
+ * https://docs.anthropic.com/claude-code
338
+ */
339
+ export function createClaudeCliAdapter(options = {}) {
340
+ const modelArgs = options.model ? ["--model", options.model] : [];
341
+ const extraArgs = options.extraArgs ?? [];
342
+ return createAgentCliAdapter({
343
+ command: "claude",
344
+ adapterIdSuffix: "claude",
345
+ model: options.model ?? "claude-sonnet-4-6",
346
+ label: options.label ?? "Claude CLI adapter",
347
+ workingDirectory: options.workingDirectory,
348
+ timeoutMs: options.timeoutMs,
349
+ verifyTimeoutMs: options.verifyTimeoutMs,
350
+ supportsJsonOutput: true,
351
+ spawnImpl: options.spawnImpl,
352
+ argsBuilder: (_prompt) => [
353
+ "--output-format",
354
+ "json",
355
+ "--print",
356
+ "--dangerously-skip-permissions",
357
+ ...modelArgs,
358
+ ...extraArgs,
359
+ "--stdin-prompt" // sentinel: tells execute() to deliver prompt via stdin
360
+ ]
361
+ });
362
+ }
363
+ // ---------------------------------------------------------------------------
364
+ // Pre-configured: OpenAI Codex CLI
365
+ // ---------------------------------------------------------------------------
366
+ /**
367
+ * Spawns `codex [--full-auto] [--model <model>] "<prompt>" [extraArgs]`.
368
+ *
369
+ * Requires the Codex CLI to be installed and authenticated:
370
+ * npm install -g @openai/codex
371
+ */
372
+ export function createCodexCliAdapter(options = {}) {
373
+ const fullAuto = options.fullAuto !== false;
374
+ const modelArgs = options.model ? ["--model", options.model] : [];
375
+ const extraArgs = options.extraArgs ?? [];
376
+ return createAgentCliAdapter({
377
+ command: "codex",
378
+ adapterIdSuffix: "codex",
379
+ model: options.model ?? "codex",
380
+ label: options.label ?? "Codex CLI adapter",
381
+ workingDirectory: options.workingDirectory,
382
+ timeoutMs: options.timeoutMs,
383
+ verifyTimeoutMs: options.verifyTimeoutMs,
384
+ supportsJsonOutput: false,
385
+ spawnImpl: options.spawnImpl,
386
+ argsBuilder: (prompt) => [
387
+ ...(fullAuto ? ["--full-auto"] : []),
388
+ ...modelArgs,
389
+ prompt,
390
+ ...extraArgs
391
+ ]
392
+ });
393
+ }
394
+ // ---------------------------------------------------------------------------
395
+ // Prompt builder
396
+ //
397
+ // Implements Qralph-style context isolation:
398
+ // - Each attempt gets a fresh, distilled prompt — NOT the full conversation history
399
+ // - Prior attempts are summarized (last 3 max, via distillContext in core)
400
+ // - Interventions translate into concrete prompt directives
401
+ // - Context budget info surfaces remaining runway to the agent
402
+ // ---------------------------------------------------------------------------
403
+ function buildPrompt(request) {
404
+ const lines = [];
405
+ lines.push("You are running in autonomous agentic mode.");
406
+ lines.push("MAKE ALL REQUIRED FILE EDITS NOW. Do not ask for confirmation. Do not ask clarifying questions.");
407
+ lines.push("Do not explain what you found without also making the changes. Edit the files and complete the task.");
408
+ lines.push("");
409
+ lines.push("If PROGRESS.md exists in your working directory, read it first for context from prior attempts.");
410
+ lines.push("If it does not exist, proceed with the objective below.");
411
+ lines.push("");
412
+ lines.push("Complete the following coding task. Make all necessary file changes.");
413
+ lines.push("When you are done, the verification commands listed below must pass.");
414
+ lines.push("");
415
+ lines.push("OBJECTIVE:");
416
+ lines.push(sanitizeForPrompt(request.context.objective));
417
+ lines.push("");
418
+ // Acceptance criteria (from task contract)
419
+ if (request.context.acceptanceCriteria?.length) {
420
+ lines.push("ACCEPTANCE CRITERIA (all must be satisfied):");
421
+ for (const criterion of request.context.acceptanceCriteria ?? []) {
422
+ lines.push(` - ${sanitizeForPrompt(criterion)}`);
423
+ }
424
+ lines.push("");
425
+ }
426
+ // Scope contract
427
+ const ctx = request.context;
428
+ if (ctx.allowedPaths?.length || ctx.deniedPaths?.length) {
429
+ lines.push("SCOPE CONTRACT (immutable — do not expand):");
430
+ if (ctx.allowedPaths?.length) {
431
+ lines.push(` Allowed paths: ${ctx.allowedPaths.join(", ")}`);
432
+ }
433
+ if (ctx.deniedPaths?.length) {
434
+ lines.push(` Forbidden paths: ${ctx.deniedPaths.join(", ")}`);
435
+ }
436
+ lines.push("");
437
+ }
438
+ if (request.context.verificationPlan.length > 0) {
439
+ lines.push("VERIFICATION (all commands must exit with code 0):");
440
+ for (const cmd of request.context.verificationPlan) {
441
+ lines.push(` ${cmd}`);
442
+ }
443
+ lines.push("");
444
+ }
445
+ const attemptNumber = request.previousAttempts.length + 1;
446
+ lines.push("CONSTRAINTS:");
447
+ lines.push(` Attempt ${String(attemptNumber)}`);
448
+ lines.push(` Remaining budget: $${String(request.context.remainingBudgetUsd)} USD`);
449
+ lines.push(` Remaining iterations: ${String(request.context.remainingIterations)}`);
450
+ lines.push(" Do not expand scope beyond what is needed to pass verification.");
451
+ lines.push("");
452
+ if (request.previousAttempts.length > 0) {
453
+ lines.push("PRIOR FAILED ATTEMPTS (learn from these — do not repeat the same mistakes):");
454
+ for (const attempt of request.previousAttempts) {
455
+ const failurePart = attempt.failureClass ? ` [${attempt.failureClass}]` : "";
456
+ const interventionPart = attempt.intervention ? ` -> intervention: ${attempt.intervention}` : "";
457
+ lines.push(` Attempt ${String(attempt.index)}${failurePart}: ${sanitizeForPrompt(attempt.summary ?? "")}${interventionPart}`);
458
+ }
459
+ lines.push("");
460
+ }
461
+ // Intervention directives
462
+ const lastIntervention = request.previousAttempts.at(-1)?.intervention;
463
+ if (lastIntervention === "tighten_task") {
464
+ lines.push("SCOPE LOCK (prior attempt expanded scope — do not repeat):");
465
+ lines.push(" Only touch files directly required to make the verification commands pass.");
466
+ lines.push(" Do NOT add features, refactor unrelated code, or modify files outside the objective.");
467
+ lines.push("");
468
+ }
469
+ if (lastIntervention === "compress_context") {
470
+ lines.push("BREVITY MODE (prior attempt was too large — be concise):");
471
+ lines.push(" Keep changes minimal. Only output what changed and why.");
472
+ lines.push("");
473
+ }
474
+ if (lastIntervention === "run_verifier") {
475
+ lines.push("VERIFICATION FOCUS (prior attempt failed verification):");
476
+ lines.push(" Before finalizing, mentally simulate running each verification command.");
477
+ lines.push(" Only mark yourself done when confident all commands will pass.");
478
+ lines.push("");
479
+ }
480
+ if (lastIntervention === "change_model") {
481
+ lines.push("FRESH APPROACH (previous attempts did not converge):");
482
+ lines.push(" Do not repeat prior reasoning. Start from first principles on the objective.");
483
+ lines.push("");
484
+ }
485
+ lines.push(`FOCUS: ${sanitizeForPrompt(request.context.focus)}`);
486
+ return lines.join("\n");
487
+ }
488
+ // ---------------------------------------------------------------------------
489
+ // Utilities
490
+ // ---------------------------------------------------------------------------
491
+ function truncate(text, maxLength) {
492
+ if (text.length <= maxLength) {
493
+ return text;
494
+ }
495
+ return `...${text.slice(-(maxLength - 3))}`;
496
+ }
497
+ const INJECTION_PATTERNS = [
498
+ /\[INST\]/gi,
499
+ /<\/?system>/gi,
500
+ /^(IGNORE|DISREGARD|FORGET|NEW INSTRUCTION|OVERRIDE)\b.+$/gim,
501
+ /<\/?s>/gi
502
+ ];
503
+ function sanitizeForPrompt(input) {
504
+ let out = input;
505
+ for (const pattern of INJECTION_PATTERNS) {
506
+ out = out.replace(pattern, "[FILTERED]");
507
+ }
508
+ return redactSecretsForPrompt(out);
509
+ }
510
+ function estimatePromptCost(promptText, model) {
511
+ const inputTokens = Math.ceil(promptText.length / 3.5);
512
+ const outputTokens = 2000;
513
+ const pricing = MODEL_PRICING[model] ?? { inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
514
+ return (inputTokens / 1000) * pricing.inputPer1K + (outputTokens / 1000) * pricing.outputPer1K;
515
+ }
516
+ function estimateUsage(promptText, model) {
517
+ const inputTokens = Math.ceil(promptText.length / 3.5);
518
+ const outputTokens = 2_000;
519
+ return normalizeUsage({
520
+ actualUsd: estimatePromptCost(promptText, model),
521
+ estimatedUsd: estimatePromptCost(promptText, model),
522
+ tokensIn: inputTokens,
523
+ tokensOut: outputTokens,
524
+ provenance: "estimated"
525
+ });
526
+ }
527
+ function redactSecretsForPrompt(input) {
528
+ return input
529
+ .replace(/\bOPENAI_API_KEY\s*=\s*[^\s"'`]+/giu, "OPENAI_API_KEY=[REDACTED_SECRET]")
530
+ .replace(/\bsk-[A-Za-z0-9_-]{8,}\b/gu, "[REDACTED_SECRET]")
531
+ .replace(/\bghp_[A-Za-z0-9_]{8,}\b/gu, "[REDACTED_SECRET]")
532
+ .replace(/\B\.env(?!\.example\b)(?:\.[A-Za-z0-9._-]+)?\b/giu, "[REDACTED_PATH]");
533
+ }
534
+ function extractStructuredErrors(stderr, stdout) {
535
+ const errors = [];
536
+ const combined = `${stderr}\n${stdout}`;
537
+ // TypeScript: file.ts(42,5): error TS2322: message
538
+ for (const m of combined.matchAll(/^(.+\.tsx?)\((\d+),(\d+)\): error (TS\d+): (.+)$/gm)) {
539
+ errors.push({ file: m[1] ?? "", line: Number(m[2]), col: Number(m[3]), code: m[4], message: m[5] ?? "" });
540
+ }
541
+ // ESLint / tsc path-style: ./src/foo.ts:42:5: error message
542
+ for (const m of combined.matchAll(/^(\.?\/[\w./-]+\.tsx?):(\d+):(\d+):\s+error\s+(.+)$/gm)) {
543
+ errors.push({ file: m[1] ?? "", line: Number(m[2]), col: Number(m[3]), message: m[4] ?? "" });
544
+ }
545
+ // Jest FAIL line: FAIL src/foo.test.ts
546
+ for (const m of combined.matchAll(/^FAIL\s+([\w./-]+\.test\.[jt]sx?)$/gm)) {
547
+ errors.push({ file: m[1] ?? "", message: "Test suite failed" });
548
+ }
549
+ return errors.slice(0, 10); // cap at 10 to avoid bloating prompts
550
+ }
551
+ async function checkNoDiff(repoRoot) {
552
+ const result = await runSubprocess("git", ["diff", "--name-only", "HEAD"], { cwd: repoRoot, timeoutMs: 5000 });
553
+ return result.exitCode === 0 && result.stdout.trim().length === 0;
554
+ }
555
+ //# sourceMappingURL=claude-cli.js.map
@@ -0,0 +1,28 @@
1
+ import { type ChildProcess, type SpawnOptions } from "node:child_process";
2
+ import { diffStatsFromNumstat } from "./runtime-support.js";
3
+ export type SpawnLike = (command: string, args?: readonly string[], options?: SpawnOptions) => ChildProcess;
4
+ export interface SubprocessResult {
5
+ exitCode: number;
6
+ stdout: string;
7
+ stderr: string;
8
+ timedOut: boolean;
9
+ }
10
+ export interface VerificationOutcome {
11
+ passed: boolean;
12
+ summary: string;
13
+ }
14
+ export declare function runSubprocess(command: string, args: string[], options: {
15
+ cwd: string;
16
+ timeoutMs: number;
17
+ spawnImpl?: SpawnLike;
18
+ stdinData?: string;
19
+ }): Promise<SubprocessResult>;
20
+ export declare function runVerification(commands: string[], cwd: string, timeoutMs: number, verificationStack?: Array<{
21
+ command: string;
22
+ type: string;
23
+ fastFail?: boolean;
24
+ }>, spawnImpl?: SpawnLike): Promise<VerificationOutcome>;
25
+ export declare function readGitExecutionArtifacts(repoRoot: string, timeoutMs: number, spawnImpl?: SpawnLike): Promise<{
26
+ changedFiles?: string[];
27
+ diffStats?: ReturnType<typeof diffStatsFromNumstat>;
28
+ }>;