martin-loop 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +207 -189
- package/dist/bin/martin-loop.js +23 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.js +31 -0
- package/dist/vendor/adapters/claude-cli.d.ts +89 -0
- package/dist/vendor/adapters/claude-cli.js +555 -0
- package/dist/vendor/adapters/cli-bridge.d.ts +28 -0
- package/dist/vendor/adapters/cli-bridge.js +127 -0
- package/dist/vendor/adapters/direct-provider.d.ts +10 -0
- package/dist/vendor/adapters/direct-provider.js +41 -0
- package/dist/vendor/adapters/index.d.ts +5 -0
- package/dist/vendor/adapters/index.js +5 -0
- package/dist/vendor/adapters/runtime-support.d.ts +14 -0
- package/dist/vendor/adapters/runtime-support.js +52 -0
- package/dist/vendor/adapters/stub-agent-cli.d.ts +8 -0
- package/dist/vendor/adapters/stub-agent-cli.js +41 -0
- package/dist/vendor/adapters/stub-direct-provider.d.ts +8 -0
- package/dist/vendor/adapters/stub-direct-provider.js +10 -0
- package/dist/vendor/cli/bin/martin.d.ts +2 -0
- package/dist/vendor/cli/bin/martin.js +19 -0
- package/dist/vendor/cli/index.d.ts +39 -0
- package/dist/vendor/cli/index.js +634 -0
- package/dist/vendor/cli/persistence.d.ts +34 -0
- package/dist/vendor/cli/persistence.js +71 -0
- package/dist/vendor/contracts/governance.d.ts +21 -0
- package/dist/vendor/contracts/governance.js +12 -0
- package/dist/vendor/contracts/index.d.ts +330 -0
- package/dist/vendor/contracts/index.js +203 -0
- package/dist/vendor/core/compiler.d.ts +50 -0
- package/dist/vendor/core/compiler.js +47 -0
- package/dist/vendor/core/grounding.d.ts +37 -0
- package/dist/vendor/core/grounding.js +270 -0
- package/dist/vendor/core/index.d.ts +145 -0
- package/dist/vendor/core/index.js +1099 -0
- package/dist/vendor/core/leash.d.ts +48 -0
- package/dist/vendor/core/leash.js +408 -0
- package/dist/vendor/core/persistence/compiler.d.ts +18 -0
- package/dist/vendor/core/persistence/compiler.js +35 -0
- package/dist/vendor/core/persistence/index.d.ts +6 -0
- package/dist/vendor/core/persistence/index.js +4 -0
- package/dist/vendor/core/persistence/ledger.d.ts +23 -0
- package/dist/vendor/core/persistence/ledger.js +10 -0
- package/dist/vendor/core/persistence/store.d.ts +77 -0
- package/dist/vendor/core/persistence/store.js +84 -0
- package/dist/vendor/core/policy.d.ts +126 -0
- package/dist/vendor/core/policy.js +625 -0
- package/dist/vendor/core/rollback.d.ts +11 -0
- package/dist/vendor/core/rollback.js +219 -0
- package/docs/oss/OSS-BOUNDARY-REPORT.json +1 -1
- package/docs/oss/OSS-BOUNDARY-REPORT.md +1 -1
- package/docs/oss/RELEASE-SURFACE-REPORT.json +1 -1
- package/docs/oss/RELEASE-SURFACE-REPORT.md +1 -1
- package/package.json +54 -54
|
@@ -0,0 +1,1099 @@
|
|
|
1
|
+
import { spawnSync } from "node:child_process";
|
|
2
|
+
import { appendLoopEvent, createLoopRecord } from "../contracts/index.js";
|
|
3
|
+
import { classifyFailure, computeEvidenceVector, evaluatePatchDecision, evaluateCostGovernor, evaluateBudgetPreflight, inferExit, nextPolicyPhase, policyPhaseToLifecycleState, scorePatchDecision, selectRecoveryRecipe } from "./policy.js";
|
|
4
|
+
import { evaluateChangeApprovalLeash, evaluateFilesystemLeash, evaluateSecretLeash, redactSecretsFromText, resolveExecutionProfile, evaluateVerificationLeash } from "./leash.js";
|
|
5
|
+
import { buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations } from "./grounding.js";
|
|
6
|
+
import { captureRollbackBoundary, restoreRollbackBoundary } from "./rollback.js";
|
|
7
|
+
import { compilePromptPacket } from "./compiler.js";
|
|
8
|
+
import { makeLedgerEvent } from "./persistence/index.js";
|
|
9
|
+
export { classifyFailure, computeEvidenceVector, evaluatePatchDecision, evaluateCostGovernor, evaluateBudgetPreflight, inferExit, nextPolicyPhase, policyPhaseToLifecycleState, scorePatchDecision, selectRecoveryRecipe, evaluateVerificationLeash, evaluateFilesystemLeash, evaluateChangeApprovalLeash, evaluateSecretLeash, resolveExecutionProfile, redactSecretsFromText, buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations, captureRollbackBoundary, restoreRollbackBoundary };
|
|
10
|
+
// ─── Prompt packet compiler ──────────────────────────────────────────────────
|
|
11
|
+
export { compilePromptPacket } from "./compiler.js";
|
|
12
|
+
// ─── Persistence (RunStore, LedgerEvent, FileRunStore) ──────────────────────
|
|
13
|
+
export { createFileRunStore, makeLedgerEvent, resolveRunsRoot } from "./persistence/index.js";
|
|
14
|
+
export { compileAndPersistContext } from "./persistence/index.js";
|
|
15
|
+
/**
|
|
16
|
+
* Admission gate — must pass before any attempt is executed.
|
|
17
|
+
* Evaluates budget headroom, oscillation, and repetitive failure patterns.
|
|
18
|
+
* PolicyPhase remains explicit: attempts are only admitted in ADMIT phase.
|
|
19
|
+
*/
|
|
20
|
+
export function evaluateAttemptPolicy(input) {
|
|
21
|
+
const { request, projectedUsd } = input;
|
|
22
|
+
// Budget gate: reject if projected cost exceeds remaining
|
|
23
|
+
if (projectedUsd > request.context.remainingBudgetUsd) {
|
|
24
|
+
return {
|
|
25
|
+
allowed: false,
|
|
26
|
+
reason: `Projected cost $${projectedUsd} exceeds remaining budget $${request.context.remainingBudgetUsd}.`,
|
|
27
|
+
recommendedIntervention: "stop_loop"
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
// Iteration gate
|
|
31
|
+
if (request.context.remainingIterations <= 0) {
|
|
32
|
+
return {
|
|
33
|
+
allowed: false,
|
|
34
|
+
reason: "No remaining iterations in the budget.",
|
|
35
|
+
recommendedIntervention: "stop_loop"
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
// Oscillation detection: A/B/A pattern in failure classes
|
|
39
|
+
const failures = request.previousAttempts
|
|
40
|
+
.map((a) => a.failureClass)
|
|
41
|
+
.filter((fc) => Boolean(fc));
|
|
42
|
+
if (failures.length >= 3) {
|
|
43
|
+
const last3 = failures.slice(-3);
|
|
44
|
+
const isOscillating = last3[0] !== last3[1] && last3[0] === last3[2];
|
|
45
|
+
if (isOscillating) {
|
|
46
|
+
return {
|
|
47
|
+
allowed: false,
|
|
48
|
+
reason: "Oscillating failure pattern detected. Escalating to human.",
|
|
49
|
+
recommendedIntervention: "escalate_human"
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
// Materially repetitive detection: same summary content pattern 3x
|
|
54
|
+
if (request.previousAttempts.length >= 3) {
|
|
55
|
+
const lastThree = request.previousAttempts.slice(-3);
|
|
56
|
+
const summaries = lastThree
|
|
57
|
+
.map((a) => a.summary?.toLowerCase() ?? "")
|
|
58
|
+
.filter((s) => s.length > 10);
|
|
59
|
+
if (summaries.length === 3) {
|
|
60
|
+
// Compute rough similarity: shared significant tokens
|
|
61
|
+
const tokenize = (s) => new Set(s.match(/[a-z]{4,}/g) ?? []);
|
|
62
|
+
const tokens0 = tokenize(summaries[0] ?? "");
|
|
63
|
+
const tokens2 = tokenize(summaries[2] ?? "");
|
|
64
|
+
const shared = [...tokens0].filter((t) => tokens2.has(t));
|
|
65
|
+
const similarity = shared.length / Math.max(tokens0.size, 1);
|
|
66
|
+
if (similarity > 0.5) {
|
|
67
|
+
return {
|
|
68
|
+
allowed: false,
|
|
69
|
+
reason: "Materially repetitive attempts detected. Escalating to human.",
|
|
70
|
+
recommendedIntervention: "escalate_human"
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return {
|
|
76
|
+
allowed: true,
|
|
77
|
+
reason: "Attempt admitted."
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
export function distillContext(loop, options = {}) {
|
|
81
|
+
const maxRecentAttempts = options.maxRecentAttempts ?? 3;
|
|
82
|
+
const recentAttempts = loop.attempts.slice(-maxRecentAttempts);
|
|
83
|
+
return {
|
|
84
|
+
focus: `${loop.task.objective} Follow the verification plan and stay inside the configured budget.`,
|
|
85
|
+
recentAttempts,
|
|
86
|
+
constraints: {
|
|
87
|
+
remainingBudgetUsd: roundUsd(loop.budget.maxUsd - loop.cost.actualUsd),
|
|
88
|
+
remainingIterations: Math.max(loop.budget.maxIterations - loop.attempts.length, 0),
|
|
89
|
+
remainingTokens: Math.max(loop.budget.maxTokens - loop.cost.tokensIn - loop.cost.tokensOut, 0)
|
|
90
|
+
}
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
export async function runMartin(input) {
|
|
94
|
+
const now = input.now ?? (() => new Date().toISOString());
|
|
95
|
+
const idFactory = input.idFactory;
|
|
96
|
+
let loop = createLoopRecord({
|
|
97
|
+
workspaceId: input.workspaceId,
|
|
98
|
+
projectId: input.projectId,
|
|
99
|
+
task: input.task,
|
|
100
|
+
budget: input.budget,
|
|
101
|
+
...(input.teamId ? { teamId: input.teamId } : {}),
|
|
102
|
+
...(input.metadata ? { metadata: input.metadata } : {})
|
|
103
|
+
}, { now: now(), idFactory });
|
|
104
|
+
loop = appendLoopEvent(loop, {
|
|
105
|
+
type: "run.started",
|
|
106
|
+
lifecycleState: "running",
|
|
107
|
+
payload: {
|
|
108
|
+
adapterId: input.adapter.adapterId,
|
|
109
|
+
providerId: input.adapter.metadata.providerId,
|
|
110
|
+
model: input.adapter.metadata.model,
|
|
111
|
+
transport: getAdapterTransport(input.adapter)
|
|
112
|
+
}
|
|
113
|
+
}, { now: now(), idFactory });
|
|
114
|
+
if (input.store) {
|
|
115
|
+
await input.store.initRun({
|
|
116
|
+
runId: loop.loopId,
|
|
117
|
+
workspaceId: input.workspaceId,
|
|
118
|
+
projectId: input.projectId,
|
|
119
|
+
task: input.task,
|
|
120
|
+
budget: input.budget,
|
|
121
|
+
createdAt: loop.createdAt,
|
|
122
|
+
...(input.metadata ? { metadata: input.metadata } : {})
|
|
123
|
+
});
|
|
124
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
125
|
+
kind: "contract.created",
|
|
126
|
+
runId: loop.loopId,
|
|
127
|
+
payload: { workspaceId: input.workspaceId, projectId: input.projectId }
|
|
128
|
+
}));
|
|
129
|
+
}
|
|
130
|
+
const DEFAULT_FALLBACK_MODELS = [
|
|
131
|
+
"claude-haiku-4-5",
|
|
132
|
+
"claude-sonnet-4-6",
|
|
133
|
+
"claude-opus-4-6"
|
|
134
|
+
];
|
|
135
|
+
const adapterChain = [input.adapter, ...(input.fallbackAdapters ?? [])];
|
|
136
|
+
let currentAdapterIndex = 0;
|
|
137
|
+
let currentAdapter = adapterChain[currentAdapterIndex] ?? input.adapter;
|
|
138
|
+
let useCompressedContext = false;
|
|
139
|
+
const executionProfile = resolveExecutionProfile({
|
|
140
|
+
executionProfile: input.task.executionProfile,
|
|
141
|
+
allowedNetworkDomains: input.task.allowedNetworkDomains
|
|
142
|
+
});
|
|
143
|
+
// Safety leash: block destructive verifier commands before any attempt
|
|
144
|
+
const leashDecision = evaluateVerificationLeash({
|
|
145
|
+
verificationPlan: input.task.verificationPlan,
|
|
146
|
+
verificationStack: input.task.verificationStack,
|
|
147
|
+
executionProfile: input.task.executionProfile,
|
|
148
|
+
allowedNetworkDomains: input.task.allowedNetworkDomains
|
|
149
|
+
});
|
|
150
|
+
if (!leashDecision.allowed) {
|
|
151
|
+
const reason = `${leashDecision.reason ?? "Safety leash blocked verifier commands."} Blocked: ${leashDecision.blockedCommands.join(", ")}`;
|
|
152
|
+
const leashExitDecision = {
|
|
153
|
+
shouldExit: true,
|
|
154
|
+
lifecycleState: "human_escalation",
|
|
155
|
+
status: "exited",
|
|
156
|
+
reason
|
|
157
|
+
};
|
|
158
|
+
if (input.store) {
|
|
159
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
160
|
+
kind: "safety.violations_found",
|
|
161
|
+
runId: loop.loopId,
|
|
162
|
+
payload: {
|
|
163
|
+
surface: leashDecision.surface,
|
|
164
|
+
blocked: true,
|
|
165
|
+
profile: leashDecision.profile ?? executionProfile.name,
|
|
166
|
+
violations: serializeSafetyViolations(leashDecision)
|
|
167
|
+
}
|
|
168
|
+
}));
|
|
169
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
170
|
+
kind: "run.exited",
|
|
171
|
+
runId: loop.loopId,
|
|
172
|
+
payload: {
|
|
173
|
+
lifecycleState: leashExitDecision.lifecycleState,
|
|
174
|
+
status: leashExitDecision.status,
|
|
175
|
+
reason: leashExitDecision.reason
|
|
176
|
+
}
|
|
177
|
+
}));
|
|
178
|
+
}
|
|
179
|
+
return {
|
|
180
|
+
loop: finalizeLoop(loop, leashExitDecision, now(), idFactory),
|
|
181
|
+
decision: leashExitDecision
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
const secretDecision = evaluateSecretLeash({
|
|
185
|
+
values: [
|
|
186
|
+
input.task.title,
|
|
187
|
+
input.task.objective,
|
|
188
|
+
...(input.task.acceptanceCriteria ?? [])
|
|
189
|
+
]
|
|
190
|
+
});
|
|
191
|
+
if (!secretDecision.allowed) {
|
|
192
|
+
const secretExitDecision = {
|
|
193
|
+
shouldExit: true,
|
|
194
|
+
lifecycleState: "human_escalation",
|
|
195
|
+
status: "exited",
|
|
196
|
+
reason: secretDecision.reason ?? "Safety leash blocked secret-like values in the runtime context."
|
|
197
|
+
};
|
|
198
|
+
if (input.store) {
|
|
199
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
200
|
+
kind: "safety.violations_found",
|
|
201
|
+
runId: loop.loopId,
|
|
202
|
+
payload: {
|
|
203
|
+
surface: "secret",
|
|
204
|
+
blocked: true,
|
|
205
|
+
violations: secretDecision.violations.map((violation) => violation.match ?? violation.message)
|
|
206
|
+
}
|
|
207
|
+
}));
|
|
208
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
209
|
+
kind: "run.exited",
|
|
210
|
+
runId: loop.loopId,
|
|
211
|
+
payload: {
|
|
212
|
+
lifecycleState: secretExitDecision.lifecycleState,
|
|
213
|
+
status: secretExitDecision.status,
|
|
214
|
+
reason: secretExitDecision.reason
|
|
215
|
+
}
|
|
216
|
+
}));
|
|
217
|
+
}
|
|
218
|
+
return {
|
|
219
|
+
loop: finalizeLoop(loop, secretExitDecision, now(), idFactory),
|
|
220
|
+
decision: secretExitDecision
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
// Explicit PolicyPhase state machine — starts at GATHER, advances per attempt
|
|
224
|
+
let currentPhase = "GATHER";
|
|
225
|
+
let phaseRetryCount = 0;
|
|
226
|
+
while (loop.attempts.length < loop.budget.maxIterations) {
|
|
227
|
+
const distilled = distillContext(loop, {
|
|
228
|
+
maxRecentAttempts: useCompressedContext ? 1 : (input.maxRecentAttempts ?? 3)
|
|
229
|
+
});
|
|
230
|
+
useCompressedContext = false;
|
|
231
|
+
const attemptStartedAt = now();
|
|
232
|
+
const attemptId = makeId("att", idFactory);
|
|
233
|
+
const executingAdapter = currentAdapter;
|
|
234
|
+
const budgetPreflight = evaluateBudgetPreflight({
|
|
235
|
+
promptCharCount: distilled.focus.length + loop.task.objective.length * 3,
|
|
236
|
+
attemptCount: loop.attempts.length,
|
|
237
|
+
remainingBudgetUsd: distilled.constraints.remainingBudgetUsd,
|
|
238
|
+
perAttemptCapUsd: loop.budget.maxUsd * 0.25
|
|
239
|
+
});
|
|
240
|
+
if (!budgetPreflight.allowed) {
|
|
241
|
+
const preflightExitDecision = {
|
|
242
|
+
shouldExit: true,
|
|
243
|
+
lifecycleState: "budget_exit",
|
|
244
|
+
status: "exited",
|
|
245
|
+
reason: budgetPreflight.reason
|
|
246
|
+
};
|
|
247
|
+
if (input.store) {
|
|
248
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
249
|
+
kind: "attempt.rejected",
|
|
250
|
+
runId: loop.loopId,
|
|
251
|
+
attemptIndex: loop.attempts.length + 1,
|
|
252
|
+
payload: { reason: budgetPreflight.reason, source: "budget_preflight" }
|
|
253
|
+
}));
|
|
254
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
255
|
+
kind: "run.exited",
|
|
256
|
+
runId: loop.loopId,
|
|
257
|
+
payload: {
|
|
258
|
+
lifecycleState: preflightExitDecision.lifecycleState,
|
|
259
|
+
status: preflightExitDecision.status,
|
|
260
|
+
reason: preflightExitDecision.reason
|
|
261
|
+
}
|
|
262
|
+
}));
|
|
263
|
+
}
|
|
264
|
+
return {
|
|
265
|
+
loop: finalizeLoop(loop, preflightExitDecision, now(), idFactory),
|
|
266
|
+
decision: preflightExitDecision
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
// GATHER → ADMIT: run admission control before executing
|
|
270
|
+
currentPhase = "ADMIT";
|
|
271
|
+
const admissionDecision = evaluateAttemptPolicy({
|
|
272
|
+
request: {
|
|
273
|
+
loopId: loop.loopId,
|
|
274
|
+
attemptId,
|
|
275
|
+
context: {
|
|
276
|
+
taskTitle: loop.task.title,
|
|
277
|
+
objective: loop.task.objective,
|
|
278
|
+
verificationPlan: loop.task.verificationPlan,
|
|
279
|
+
...(loop.task.verificationStack ? { verificationStack: loop.task.verificationStack } : {}),
|
|
280
|
+
...(loop.task.repoRoot ? { repoRoot: loop.task.repoRoot } : {}),
|
|
281
|
+
...(loop.task.allowedPaths ? { allowedPaths: loop.task.allowedPaths } : {}),
|
|
282
|
+
...(loop.task.deniedPaths ? { deniedPaths: loop.task.deniedPaths } : {}),
|
|
283
|
+
...(loop.task.acceptanceCriteria ? { acceptanceCriteria: loop.task.acceptanceCriteria } : {}),
|
|
284
|
+
...(loop.task.executionProfile ? { executionProfile: loop.task.executionProfile } : {}),
|
|
285
|
+
...(loop.task.allowedNetworkDomains
|
|
286
|
+
? { allowedNetworkDomains: loop.task.allowedNetworkDomains }
|
|
287
|
+
: {}),
|
|
288
|
+
...(loop.task.approvalPolicy ? { approvalPolicy: loop.task.approvalPolicy } : {}),
|
|
289
|
+
focus: distilled.focus,
|
|
290
|
+
remainingBudgetUsd: distilled.constraints.remainingBudgetUsd,
|
|
291
|
+
remainingIterations: distilled.constraints.remainingIterations,
|
|
292
|
+
remainingTokens: distilled.constraints.remainingTokens
|
|
293
|
+
},
|
|
294
|
+
previousAttempts: loop.attempts
|
|
295
|
+
},
|
|
296
|
+
projectedUsd: budgetPreflight.estimate.estimatedAttemptCostUsd
|
|
297
|
+
});
|
|
298
|
+
if (!admissionDecision.allowed) {
|
|
299
|
+
const exitReason = admissionDecision.reason;
|
|
300
|
+
const exitDecision = {
|
|
301
|
+
shouldExit: true,
|
|
302
|
+
lifecycleState: admissionDecision.recommendedIntervention === "escalate_human"
|
|
303
|
+
? "human_escalation"
|
|
304
|
+
: "budget_exit",
|
|
305
|
+
status: "exited",
|
|
306
|
+
reason: exitReason
|
|
307
|
+
};
|
|
308
|
+
if (input.store) {
|
|
309
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
310
|
+
kind: "attempt.rejected",
|
|
311
|
+
runId: loop.loopId,
|
|
312
|
+
attemptIndex: loop.attempts.length + 1,
|
|
313
|
+
payload: { reason: admissionDecision.reason }
|
|
314
|
+
}));
|
|
315
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
316
|
+
kind: "run.exited",
|
|
317
|
+
runId: loop.loopId,
|
|
318
|
+
payload: {
|
|
319
|
+
lifecycleState: exitDecision.lifecycleState,
|
|
320
|
+
status: exitDecision.status,
|
|
321
|
+
reason: exitDecision.reason
|
|
322
|
+
}
|
|
323
|
+
}));
|
|
324
|
+
}
|
|
325
|
+
return {
|
|
326
|
+
loop: finalizeLoop(loop, exitDecision, now(), idFactory),
|
|
327
|
+
decision: exitDecision
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
if (input.store) {
|
|
331
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
332
|
+
kind: "attempt.admitted",
|
|
333
|
+
runId: loop.loopId,
|
|
334
|
+
attemptIndex: loop.attempts.length + 1,
|
|
335
|
+
payload: {
|
|
336
|
+
attemptId,
|
|
337
|
+
adapterId: executingAdapter.adapterId,
|
|
338
|
+
providerId: executingAdapter.metadata.providerId,
|
|
339
|
+
model: executingAdapter.metadata.model,
|
|
340
|
+
transport: getAdapterTransport(executingAdapter)
|
|
341
|
+
}
|
|
342
|
+
}));
|
|
343
|
+
}
|
|
344
|
+
// ADMIT → PATCH
|
|
345
|
+
currentPhase = "PATCH";
|
|
346
|
+
loop = appendLoopEvent(loop, {
|
|
347
|
+
type: "attempt.started",
|
|
348
|
+
lifecycleState: "running",
|
|
349
|
+
payload: {
|
|
350
|
+
attemptId,
|
|
351
|
+
adapterId: executingAdapter.adapterId,
|
|
352
|
+
model: executingAdapter.metadata.model,
|
|
353
|
+
policyPhase: currentPhase
|
|
354
|
+
}
|
|
355
|
+
}, { now: attemptStartedAt, idFactory });
|
|
356
|
+
const request = {
|
|
357
|
+
loopId: loop.loopId,
|
|
358
|
+
attemptId,
|
|
359
|
+
context: {
|
|
360
|
+
taskTitle: loop.task.title,
|
|
361
|
+
objective: loop.task.objective,
|
|
362
|
+
verificationPlan: loop.task.verificationPlan,
|
|
363
|
+
...(loop.task.verificationStack ? { verificationStack: loop.task.verificationStack } : {}),
|
|
364
|
+
...(loop.task.repoRoot ? { repoRoot: loop.task.repoRoot } : {}),
|
|
365
|
+
...(loop.task.allowedPaths ? { allowedPaths: loop.task.allowedPaths } : {}),
|
|
366
|
+
...(loop.task.deniedPaths ? { deniedPaths: loop.task.deniedPaths } : {}),
|
|
367
|
+
...(loop.task.acceptanceCriteria ? { acceptanceCriteria: loop.task.acceptanceCriteria } : {}),
|
|
368
|
+
...(loop.task.executionProfile ? { executionProfile: loop.task.executionProfile } : {}),
|
|
369
|
+
...(loop.task.allowedNetworkDomains
|
|
370
|
+
? { allowedNetworkDomains: loop.task.allowedNetworkDomains }
|
|
371
|
+
: {}),
|
|
372
|
+
...(loop.task.approvalPolicy ? { approvalPolicy: loop.task.approvalPolicy } : {}),
|
|
373
|
+
focus: distilled.focus,
|
|
374
|
+
remainingBudgetUsd: distilled.constraints.remainingBudgetUsd,
|
|
375
|
+
remainingIterations: distilled.constraints.remainingIterations,
|
|
376
|
+
remainingTokens: distilled.constraints.remainingTokens
|
|
377
|
+
},
|
|
378
|
+
previousAttempts: loop.attempts
|
|
379
|
+
};
|
|
380
|
+
const rollbackBoundary = await captureRollbackBoundary({
|
|
381
|
+
repoRoot: request.context.repoRoot,
|
|
382
|
+
capturedAt: attemptStartedAt
|
|
383
|
+
});
|
|
384
|
+
const result = await executingAdapter.execute(request);
|
|
385
|
+
const attemptCompletedAt = now();
|
|
386
|
+
const compiledContext = compilePromptPacket(request);
|
|
387
|
+
// PATCH → VERIFY
|
|
388
|
+
currentPhase = "VERIFY";
|
|
389
|
+
let failure = result.status === "failed"
|
|
390
|
+
? classifyFailure({ attempts: loop.attempts, result })
|
|
391
|
+
: undefined;
|
|
392
|
+
const currentAttemptIndex = loop.attempts.length + 1;
|
|
393
|
+
const attempt = {
|
|
394
|
+
attemptId,
|
|
395
|
+
index: currentAttemptIndex,
|
|
396
|
+
adapterId: executingAdapter.adapterId,
|
|
397
|
+
model: executingAdapter.metadata.model,
|
|
398
|
+
startedAt: attemptStartedAt,
|
|
399
|
+
completedAt: attemptCompletedAt,
|
|
400
|
+
summary: result.summary,
|
|
401
|
+
...(failure?.failureClass ? { failureClass: failure.failureClass } : {}),
|
|
402
|
+
...(failure?.recommendedIntervention
|
|
403
|
+
? { intervention: failure.recommendedIntervention }
|
|
404
|
+
: {})
|
|
405
|
+
};
|
|
406
|
+
loop = {
|
|
407
|
+
...loop,
|
|
408
|
+
attempts: [...loop.attempts, attempt],
|
|
409
|
+
cost: {
|
|
410
|
+
actualUsd: roundUsd(loop.cost.actualUsd + getUsageUsd(result.usage)),
|
|
411
|
+
avoidedUsd: loop.cost.avoidedUsd,
|
|
412
|
+
tokensIn: loop.cost.tokensIn + result.usage.tokensIn,
|
|
413
|
+
tokensOut: loop.cost.tokensOut + result.usage.tokensOut
|
|
414
|
+
},
|
|
415
|
+
updatedAt: attemptCompletedAt
|
|
416
|
+
};
|
|
417
|
+
loop = appendLoopEvent(loop, {
|
|
418
|
+
type: "attempt.completed",
|
|
419
|
+
lifecycleState: "running",
|
|
420
|
+
payload: { attemptId, status: result.status, summary: result.summary }
|
|
421
|
+
}, { now: attemptCompletedAt, idFactory });
|
|
422
|
+
const previousVerifierScore = getLastVerifierScore(loop);
|
|
423
|
+
if (failure) {
|
|
424
|
+
if (failure.recommendedIntervention === "compress_context") {
|
|
425
|
+
useCompressedContext = true;
|
|
426
|
+
}
|
|
427
|
+
let adapterSwitched = false;
|
|
428
|
+
if (failure.recommendedIntervention === "switch_adapter") {
|
|
429
|
+
const nextAdapter = adapterChain[currentAdapterIndex + 1];
|
|
430
|
+
if (nextAdapter) {
|
|
431
|
+
currentAdapterIndex += 1;
|
|
432
|
+
currentAdapter = nextAdapter;
|
|
433
|
+
adapterSwitched = true;
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
if (failure.recommendedIntervention === "change_model" && currentAdapter.withModel) {
|
|
437
|
+
const fallbackModels = input.fallbackModels ?? DEFAULT_FALLBACK_MODELS;
|
|
438
|
+
const nextModel = fallbackModels[loop.attempts.length % fallbackModels.length];
|
|
439
|
+
if (nextModel) {
|
|
440
|
+
currentAdapter = currentAdapter.withModel(nextModel);
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
loop = appendLoopEvent(loop, {
|
|
444
|
+
type: "failure.classified",
|
|
445
|
+
lifecycleState: "running",
|
|
446
|
+
payload: {
|
|
447
|
+
attemptId,
|
|
448
|
+
failureClass: failure.failureClass,
|
|
449
|
+
rationale: failure.rationale
|
|
450
|
+
}
|
|
451
|
+
}, { now: attemptCompletedAt, idFactory });
|
|
452
|
+
loop = appendLoopEvent(loop, {
|
|
453
|
+
type: "intervention.selected",
|
|
454
|
+
lifecycleState: "running",
|
|
455
|
+
payload: { attemptId, intervention: failure.recommendedIntervention }
|
|
456
|
+
}, { now: attemptCompletedAt, idFactory });
|
|
457
|
+
if (adapterSwitched) {
|
|
458
|
+
loop = appendLoopEvent(loop, {
|
|
459
|
+
type: "intervention.selected",
|
|
460
|
+
lifecycleState: "running",
|
|
461
|
+
payload: {
|
|
462
|
+
attemptId,
|
|
463
|
+
intervention: "switch_adapter",
|
|
464
|
+
nextAdapterId: currentAdapter.adapterId,
|
|
465
|
+
transport: getAdapterTransport(currentAdapter)
|
|
466
|
+
}
|
|
467
|
+
}, { now: attemptCompletedAt, idFactory });
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
loop = appendLoopEvent(loop, {
|
|
471
|
+
type: "verification.completed",
|
|
472
|
+
lifecycleState: result.verification.passed ? "completed" : "verifying",
|
|
473
|
+
payload: {
|
|
474
|
+
attemptId,
|
|
475
|
+
passed: result.verification.passed,
|
|
476
|
+
summary: result.verification.summary
|
|
477
|
+
}
|
|
478
|
+
}, { now: attemptCompletedAt, idFactory });
|
|
479
|
+
const costState = evaluateCostGovernor({
|
|
480
|
+
budget: loop.budget,
|
|
481
|
+
cost: loop.cost,
|
|
482
|
+
attemptsUsed: loop.attempts.length
|
|
483
|
+
});
|
|
484
|
+
loop = appendLoopEvent(loop, {
|
|
485
|
+
type: "budget.updated",
|
|
486
|
+
lifecycleState: costState.shouldStop ? "budget_exit" : "running",
|
|
487
|
+
payload: {
|
|
488
|
+
actualUsd: loop.cost.actualUsd,
|
|
489
|
+
remainingBudgetUsd: costState.remainingBudgetUsd,
|
|
490
|
+
pressure: costState.pressure
|
|
491
|
+
}
|
|
492
|
+
}, { now: now(), idFactory });
|
|
493
|
+
if (input.store) {
|
|
494
|
+
const settlement = createBudgetSettlement({
|
|
495
|
+
runId: loop.loopId,
|
|
496
|
+
attemptIndex: currentAttemptIndex,
|
|
497
|
+
usage: result.usage,
|
|
498
|
+
estimate: budgetPreflight.estimate,
|
|
499
|
+
settledAt: attemptCompletedAt
|
|
500
|
+
});
|
|
501
|
+
await input.store.writeAttemptArtifacts(loop.loopId, currentAttemptIndex, {
|
|
502
|
+
compiledContext,
|
|
503
|
+
...(rollbackBoundary ? { rollbackBoundary } : {})
|
|
504
|
+
});
|
|
505
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
506
|
+
kind: "patch.generated",
|
|
507
|
+
runId: loop.loopId,
|
|
508
|
+
attemptIndex: currentAttemptIndex,
|
|
509
|
+
payload: { status: result.status, summary: result.summary }
|
|
510
|
+
}));
|
|
511
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
512
|
+
kind: "verification.completed",
|
|
513
|
+
runId: loop.loopId,
|
|
514
|
+
attemptIndex: currentAttemptIndex,
|
|
515
|
+
payload: { passed: result.verification.passed, summary: result.verification.summary }
|
|
516
|
+
}));
|
|
517
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
518
|
+
kind: "budget.settled",
|
|
519
|
+
runId: loop.loopId,
|
|
520
|
+
attemptIndex: currentAttemptIndex,
|
|
521
|
+
payload: {
|
|
522
|
+
actualUsd: settlement.totalActualUsd,
|
|
523
|
+
estimatedUsd: result.usage.estimatedUsd,
|
|
524
|
+
tokensIn: result.usage.tokensIn,
|
|
525
|
+
tokensOut: result.usage.tokensOut,
|
|
526
|
+
provenance: getUsageProvenance(result.usage),
|
|
527
|
+
transport: getAdapterTransport(executingAdapter),
|
|
528
|
+
providerId: executingAdapter.metadata.providerId,
|
|
529
|
+
model: executingAdapter.metadata.model,
|
|
530
|
+
patchCost: settlement.patchCost,
|
|
531
|
+
verificationCost: settlement.verificationCost,
|
|
532
|
+
varianceUsd: settlement.varianceUsd,
|
|
533
|
+
preflightEstimateUsd: settlement.preflightEstimateUsd
|
|
534
|
+
}
|
|
535
|
+
}));
|
|
536
|
+
}
|
|
537
|
+
const changedFiles = resolveChangedFiles(result, request.context.repoRoot);
|
|
538
|
+
// Evidence is only reliable when the adapter explicitly reported files OR git actually
|
|
539
|
+
// returned a non-empty list. A repoRoot alone is insufficient — git may fail (e.g. not
|
|
540
|
+
// a git repo) and silently return [], which would falsely trigger no_code_change.
|
|
541
|
+
const changedFileEvidenceAvailable = result.execution?.changedFiles !== undefined || changedFiles.length > 0;
|
|
542
|
+
const filesystemDecision = evaluateFilesystemLeash({
|
|
543
|
+
repoRoot: request.context.repoRoot,
|
|
544
|
+
changedFiles,
|
|
545
|
+
allowedPaths: request.context.allowedPaths,
|
|
546
|
+
deniedPaths: request.context.deniedPaths
|
|
547
|
+
});
|
|
548
|
+
if (!filesystemDecision.allowed) {
|
|
549
|
+
const patchDecision = evaluatePatchDecision({
|
|
550
|
+
verificationPassed: result.verification.passed,
|
|
551
|
+
previousVerifierScore,
|
|
552
|
+
verifierScore: result.verification.passed ? 1 : 0,
|
|
553
|
+
scopeViolationCount: filesystemDecision.violations.length,
|
|
554
|
+
changedFileCount: changedFiles.length,
|
|
555
|
+
diffNovelty: changedFiles.length > 0 ? 1 : 0,
|
|
556
|
+
diffStats: result.execution?.diffStats,
|
|
557
|
+
costUsd: getUsageUsd(result.usage),
|
|
558
|
+
summary: result.summary
|
|
559
|
+
});
|
|
560
|
+
const filesystemExitDecision = {
|
|
561
|
+
shouldExit: true,
|
|
562
|
+
lifecycleState: "human_escalation",
|
|
563
|
+
status: "exited",
|
|
564
|
+
reason: filesystemDecision.reason ?? "Safety leash blocked filesystem changes."
|
|
565
|
+
};
|
|
566
|
+
const rollbackOutcome = await restoreRollbackBoundary({
|
|
567
|
+
repoRoot: request.context.repoRoot,
|
|
568
|
+
boundary: rollbackBoundary,
|
|
569
|
+
restoredAt: attemptCompletedAt,
|
|
570
|
+
decision: patchDecision.decision
|
|
571
|
+
});
|
|
572
|
+
if (input.store) {
|
|
573
|
+
await input.store.writeAttemptArtifacts(loop.loopId, currentAttemptIndex, {
|
|
574
|
+
compiledContext,
|
|
575
|
+
leash: createLeashArtifact(filesystemDecision, currentAttemptIndex),
|
|
576
|
+
patchScore: patchDecision.score,
|
|
577
|
+
patchDecision: toPatchDecisionArtifact(patchDecision),
|
|
578
|
+
...(rollbackBoundary ? { rollbackBoundary } : {}),
|
|
579
|
+
...(rollbackOutcome ? { rollbackOutcome } : {})
|
|
580
|
+
});
|
|
581
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
582
|
+
kind: "safety.violations_found",
|
|
583
|
+
runId: loop.loopId,
|
|
584
|
+
attemptIndex: currentAttemptIndex,
|
|
585
|
+
payload: {
|
|
586
|
+
surface: "filesystem",
|
|
587
|
+
blocked: true,
|
|
588
|
+
attemptIndex: currentAttemptIndex,
|
|
589
|
+
violations: filesystemDecision.violations
|
|
590
|
+
}
|
|
591
|
+
}));
|
|
592
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
593
|
+
kind: "attempt.discarded",
|
|
594
|
+
runId: loop.loopId,
|
|
595
|
+
attemptIndex: currentAttemptIndex,
|
|
596
|
+
payload: {
|
|
597
|
+
decision: patchDecision.decision,
|
|
598
|
+
reason: patchDecision.summary,
|
|
599
|
+
reasonCodes: patchDecision.reasonCodes,
|
|
600
|
+
score: patchDecision.score.score
|
|
601
|
+
}
|
|
602
|
+
}));
|
|
603
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
604
|
+
kind: "run.exited",
|
|
605
|
+
runId: loop.loopId,
|
|
606
|
+
payload: {
|
|
607
|
+
lifecycleState: filesystemExitDecision.lifecycleState,
|
|
608
|
+
status: filesystemExitDecision.status,
|
|
609
|
+
reason: filesystemExitDecision.reason
|
|
610
|
+
}
|
|
611
|
+
}));
|
|
612
|
+
}
|
|
613
|
+
return {
|
|
614
|
+
loop: finalizeLoop(loop, filesystemExitDecision, now(), idFactory),
|
|
615
|
+
decision: filesystemExitDecision
|
|
616
|
+
};
|
|
617
|
+
}
|
|
618
|
+
const changeApprovalDecision = evaluateChangeApprovalLeash({
|
|
619
|
+
changedFiles,
|
|
620
|
+
executionProfile: request.context.executionProfile,
|
|
621
|
+
approvalPolicy: request.context.approvalPolicy
|
|
622
|
+
});
|
|
623
|
+
if (!changeApprovalDecision.allowed) {
|
|
624
|
+
const patchDecision = evaluatePatchDecision({
|
|
625
|
+
verificationPassed: result.verification.passed,
|
|
626
|
+
previousVerifierScore,
|
|
627
|
+
verifierScore: result.verification.passed ? 1 : 0,
|
|
628
|
+
safetyViolationCount: changeApprovalDecision.violations.length,
|
|
629
|
+
changedFileCount: changedFiles.length,
|
|
630
|
+
diffNovelty: changedFiles.length > 0 ? 1 : 0,
|
|
631
|
+
diffStats: result.execution?.diffStats,
|
|
632
|
+
costUsd: getUsageUsd(result.usage),
|
|
633
|
+
humanApprovalRequired: true,
|
|
634
|
+
summary: result.summary
|
|
635
|
+
});
|
|
636
|
+
const approvalExitDecision = {
|
|
637
|
+
shouldExit: true,
|
|
638
|
+
lifecycleState: "human_escalation",
|
|
639
|
+
status: "exited",
|
|
640
|
+
reason: changeApprovalDecision.reason ??
|
|
641
|
+
"Safety leash blocked dependency or migration changes that require approval."
|
|
642
|
+
};
|
|
643
|
+
const rollbackOutcome = await restoreRollbackBoundary({
|
|
644
|
+
repoRoot: request.context.repoRoot,
|
|
645
|
+
boundary: rollbackBoundary,
|
|
646
|
+
restoredAt: attemptCompletedAt,
|
|
647
|
+
decision: patchDecision.decision
|
|
648
|
+
});
|
|
649
|
+
if (input.store) {
|
|
650
|
+
await input.store.writeAttemptArtifacts(loop.loopId, currentAttemptIndex, {
|
|
651
|
+
compiledContext,
|
|
652
|
+
leash: createLeashArtifact(changeApprovalDecision, currentAttemptIndex),
|
|
653
|
+
patchScore: patchDecision.score,
|
|
654
|
+
patchDecision: toPatchDecisionArtifact(patchDecision),
|
|
655
|
+
...(rollbackBoundary ? { rollbackBoundary } : {}),
|
|
656
|
+
...(rollbackOutcome ? { rollbackOutcome } : {})
|
|
657
|
+
});
|
|
658
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
659
|
+
kind: "safety.violations_found",
|
|
660
|
+
runId: loop.loopId,
|
|
661
|
+
attemptIndex: currentAttemptIndex,
|
|
662
|
+
payload: {
|
|
663
|
+
surface: "dependency",
|
|
664
|
+
blocked: true,
|
|
665
|
+
profile: changeApprovalDecision.profile ?? executionProfile.name,
|
|
666
|
+
attemptIndex: currentAttemptIndex,
|
|
667
|
+
violations: changeApprovalDecision.violations
|
|
668
|
+
}
|
|
669
|
+
}));
|
|
670
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
671
|
+
kind: "attempt.discarded",
|
|
672
|
+
runId: loop.loopId,
|
|
673
|
+
attemptIndex: currentAttemptIndex,
|
|
674
|
+
payload: {
|
|
675
|
+
decision: patchDecision.decision,
|
|
676
|
+
reason: patchDecision.summary,
|
|
677
|
+
reasonCodes: patchDecision.reasonCodes,
|
|
678
|
+
score: patchDecision.score.score
|
|
679
|
+
}
|
|
680
|
+
}));
|
|
681
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
682
|
+
kind: "run.exited",
|
|
683
|
+
runId: loop.loopId,
|
|
684
|
+
payload: {
|
|
685
|
+
lifecycleState: approvalExitDecision.lifecycleState,
|
|
686
|
+
status: approvalExitDecision.status,
|
|
687
|
+
reason: approvalExitDecision.reason
|
|
688
|
+
}
|
|
689
|
+
}));
|
|
690
|
+
}
|
|
691
|
+
return {
|
|
692
|
+
loop: finalizeLoop(loop, approvalExitDecision, now(), idFactory),
|
|
693
|
+
decision: approvalExitDecision
|
|
694
|
+
};
|
|
695
|
+
}
|
|
696
|
+
// VERIFY: Run grounding scan on patch diff if available
|
|
697
|
+
// Uses the task's repoRoot to build/load the grounding index, then scans any diff
|
|
698
|
+
let groundingScanResult;
|
|
699
|
+
const patchDiff = buildPatchDiff(result, changedFiles);
|
|
700
|
+
if (patchDiff && input.task.repoRoot) {
|
|
701
|
+
try {
|
|
702
|
+
const groundingIndex = await loadOrBuildRepoGroundingIndex(input.task.repoRoot);
|
|
703
|
+
groundingScanResult = scanPatchForGroundingViolations(patchDiff, groundingIndex, {
|
|
704
|
+
allowedPaths: input.task.allowedPaths
|
|
705
|
+
});
|
|
706
|
+
if (input.store && groundingScanResult.violations.length > 0) {
|
|
707
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
708
|
+
kind: "grounding.violations_found",
|
|
709
|
+
runId: loop.loopId,
|
|
710
|
+
attemptIndex: currentAttemptIndex,
|
|
711
|
+
payload: {
|
|
712
|
+
violationCount: groundingScanResult.violations.length,
|
|
713
|
+
resolvedFiles: groundingScanResult.resolvedFiles,
|
|
714
|
+
contentOnly: groundingScanResult.contentOnly,
|
|
715
|
+
violations: groundingScanResult.violations.slice(0, 10)
|
|
716
|
+
}
|
|
717
|
+
}));
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
catch {
|
|
721
|
+
// Grounding scan is best-effort — never fail the loop because of a scan error
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
let patchDecision;
|
|
725
|
+
if (result.status === "completed") {
|
|
726
|
+
patchDecision = evaluatePatchDecision({
|
|
727
|
+
verificationPassed: result.verification.passed,
|
|
728
|
+
previousVerifierScore,
|
|
729
|
+
verifierScore: result.verification.passed ? 1 : 0,
|
|
730
|
+
groundingViolationCount: groundingScanResult?.violations.length ?? 0,
|
|
731
|
+
changedFileCount: changedFileEvidenceAvailable ? changedFiles.length : undefined,
|
|
732
|
+
diffNovelty: changedFileEvidenceAvailable ? (changedFiles.length > 0 ? 1 : 0) : undefined,
|
|
733
|
+
diffStats: result.execution?.diffStats,
|
|
734
|
+
costUsd: getUsageUsd(result.usage),
|
|
735
|
+
summary: result.summary
|
|
736
|
+
});
|
|
737
|
+
}
|
|
738
|
+
let rollbackOutcome;
|
|
739
|
+
if (patchDecision && patchDecision.decision !== "KEEP") {
|
|
740
|
+
rollbackOutcome = await restoreRollbackBoundary({
|
|
741
|
+
repoRoot: request.context.repoRoot,
|
|
742
|
+
boundary: rollbackBoundary,
|
|
743
|
+
restoredAt: attemptCompletedAt,
|
|
744
|
+
decision: patchDecision.decision
|
|
745
|
+
});
|
|
746
|
+
}
|
|
747
|
+
else if (result.status === "failed") {
|
|
748
|
+
rollbackOutcome = await restoreRollbackBoundary({
|
|
749
|
+
repoRoot: request.context.repoRoot,
|
|
750
|
+
boundary: rollbackBoundary,
|
|
751
|
+
restoredAt: attemptCompletedAt,
|
|
752
|
+
decision: "DISCARD"
|
|
753
|
+
});
|
|
754
|
+
}
|
|
755
|
+
if (input.store) {
|
|
756
|
+
await input.store.writeAttemptArtifacts(loop.loopId, currentAttemptIndex, {
|
|
757
|
+
compiledContext,
|
|
758
|
+
...(patchDiff ? { diff: patchDiff } : {}),
|
|
759
|
+
...(groundingScanResult ? { groundingScan: groundingScanResult } : {}),
|
|
760
|
+
...(patchDecision ? { patchScore: patchDecision.score } : {}),
|
|
761
|
+
...(patchDecision ? { patchDecision: toPatchDecisionArtifact(patchDecision) } : {}),
|
|
762
|
+
...(rollbackBoundary ? { rollbackBoundary } : {}),
|
|
763
|
+
...(rollbackOutcome ? { rollbackOutcome } : {})
|
|
764
|
+
});
|
|
765
|
+
}
|
|
766
|
+
if (input.store) {
|
|
767
|
+
if (patchDecision) {
|
|
768
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
769
|
+
kind: patchDecision.decision === "KEEP" ? "attempt.kept" : "attempt.discarded",
|
|
770
|
+
runId: loop.loopId,
|
|
771
|
+
attemptIndex: currentAttemptIndex,
|
|
772
|
+
payload: {
|
|
773
|
+
decision: patchDecision.decision,
|
|
774
|
+
reason: patchDecision.summary,
|
|
775
|
+
reasonCodes: patchDecision.reasonCodes,
|
|
776
|
+
score: patchDecision.score.score
|
|
777
|
+
}
|
|
778
|
+
}));
|
|
779
|
+
}
|
|
780
|
+
else {
|
|
781
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
782
|
+
kind: result.verification.passed ? "attempt.kept" : "attempt.discarded",
|
|
783
|
+
runId: loop.loopId,
|
|
784
|
+
attemptIndex: currentAttemptIndex,
|
|
785
|
+
payload: { reason: result.verification.summary }
|
|
786
|
+
}));
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
if (patchDecision && patchDecision.decision !== "KEEP" && !failure) {
|
|
790
|
+
failure = classifyPatchDecisionFailure(patchDecision);
|
|
791
|
+
loop = applyPatchFailureToLoop(loop, {
|
|
792
|
+
attemptId,
|
|
793
|
+
summary: patchDecision.summary,
|
|
794
|
+
failure
|
|
795
|
+
});
|
|
796
|
+
if (failure.recommendedIntervention === "compress_context") {
|
|
797
|
+
useCompressedContext = true;
|
|
798
|
+
}
|
|
799
|
+
loop = appendLoopEvent(loop, {
|
|
800
|
+
type: "failure.classified",
|
|
801
|
+
lifecycleState: "running",
|
|
802
|
+
payload: {
|
|
803
|
+
attemptId,
|
|
804
|
+
failureClass: failure.failureClass,
|
|
805
|
+
rationale: failure.rationale
|
|
806
|
+
}
|
|
807
|
+
}, { now: attemptCompletedAt, idFactory });
|
|
808
|
+
loop = appendLoopEvent(loop, {
|
|
809
|
+
type: "intervention.selected",
|
|
810
|
+
lifecycleState: "running",
|
|
811
|
+
payload: { attemptId, intervention: failure.recommendedIntervention }
|
|
812
|
+
}, { now: attemptCompletedAt, idFactory });
|
|
813
|
+
}
|
|
814
|
+
if (patchDecision?.decision === "ESCALATE" || patchDecision?.decision === "HANDOFF") {
|
|
815
|
+
const patchExitDecision = {
|
|
816
|
+
shouldExit: true,
|
|
817
|
+
lifecycleState: "human_escalation",
|
|
818
|
+
status: "exited",
|
|
819
|
+
reason: patchDecision.summary
|
|
820
|
+
};
|
|
821
|
+
if (input.store) {
|
|
822
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
823
|
+
kind: "run.exited",
|
|
824
|
+
runId: loop.loopId,
|
|
825
|
+
payload: {
|
|
826
|
+
lifecycleState: patchExitDecision.lifecycleState,
|
|
827
|
+
status: patchExitDecision.status,
|
|
828
|
+
reason: patchExitDecision.reason
|
|
829
|
+
}
|
|
830
|
+
}));
|
|
831
|
+
}
|
|
832
|
+
return {
|
|
833
|
+
loop: finalizeLoop(loop, patchExitDecision, now(), idFactory),
|
|
834
|
+
decision: patchExitDecision
|
|
835
|
+
};
|
|
836
|
+
}
|
|
837
|
+
const effectiveResult = patchDecision && patchDecision.decision !== "KEEP"
|
|
838
|
+
? {
|
|
839
|
+
...result,
|
|
840
|
+
status: "failed",
|
|
841
|
+
summary: patchDecision.summary,
|
|
842
|
+
verification: {
|
|
843
|
+
...result.verification,
|
|
844
|
+
passed: false,
|
|
845
|
+
summary: patchDecision.summary
|
|
846
|
+
},
|
|
847
|
+
failure: {
|
|
848
|
+
message: patchDecision.summary,
|
|
849
|
+
...(failure?.failureClass ? { classHint: failure.failureClass } : {})
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
: result;
|
|
853
|
+
const decision = inferExit({
|
|
854
|
+
loop,
|
|
855
|
+
lastResult: effectiveResult,
|
|
856
|
+
lastFailure: failure,
|
|
857
|
+
costState,
|
|
858
|
+
canSwitchAdapter: failure?.recommendedIntervention === "switch_adapter" &&
|
|
859
|
+
adapterChain[currentAdapterIndex] !== undefined &&
|
|
860
|
+
currentAdapter.adapterId !== executingAdapter.adapterId
|
|
861
|
+
});
|
|
862
|
+
// Advance phase based on result
|
|
863
|
+
currentPhase = nextPolicyPhase(currentPhase, effectiveResult, costState, phaseRetryCount);
|
|
864
|
+
if (failure)
|
|
865
|
+
phaseRetryCount++;
|
|
866
|
+
else
|
|
867
|
+
phaseRetryCount = 0;
|
|
868
|
+
if (decision.shouldExit) {
|
|
869
|
+
if (input.store) {
|
|
870
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
871
|
+
kind: "run.exited",
|
|
872
|
+
runId: loop.loopId,
|
|
873
|
+
payload: {
|
|
874
|
+
lifecycleState: decision.lifecycleState,
|
|
875
|
+
status: decision.status,
|
|
876
|
+
reason: decision.reason
|
|
877
|
+
}
|
|
878
|
+
}));
|
|
879
|
+
}
|
|
880
|
+
return {
|
|
881
|
+
loop: finalizeLoop(loop, decision, now(), idFactory),
|
|
882
|
+
decision
|
|
883
|
+
};
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
const decision = {
|
|
887
|
+
shouldExit: true,
|
|
888
|
+
lifecycleState: "budget_exit",
|
|
889
|
+
status: "exited",
|
|
890
|
+
reason: "Martin exited because the loop exhausted its configured iteration budget."
|
|
891
|
+
};
|
|
892
|
+
if (input.store) {
|
|
893
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
894
|
+
kind: "run.exited",
|
|
895
|
+
runId: loop.loopId,
|
|
896
|
+
payload: {
|
|
897
|
+
lifecycleState: decision.lifecycleState,
|
|
898
|
+
status: decision.status,
|
|
899
|
+
reason: decision.reason
|
|
900
|
+
}
|
|
901
|
+
}));
|
|
902
|
+
}
|
|
903
|
+
return {
|
|
904
|
+
loop: finalizeLoop(loop, decision, now(), idFactory),
|
|
905
|
+
decision
|
|
906
|
+
};
|
|
907
|
+
}
|
|
908
|
+
function finalizeLoop(loop, decision, timestamp, idFactory) {
|
|
909
|
+
const finalized = appendLoopEvent(loop, {
|
|
910
|
+
type: "run.completed",
|
|
911
|
+
lifecycleState: decision.lifecycleState,
|
|
912
|
+
payload: { status: decision.status, reason: decision.reason }
|
|
913
|
+
}, { now: timestamp, idFactory });
|
|
914
|
+
return {
|
|
915
|
+
...finalized,
|
|
916
|
+
status: decision.status,
|
|
917
|
+
lifecycleState: decision.lifecycleState,
|
|
918
|
+
updatedAt: timestamp
|
|
919
|
+
};
|
|
920
|
+
}
|
|
921
|
+
function getAdapterTransport(adapter) {
|
|
922
|
+
return adapter.metadata.transport ?? (adapter.kind === "agent-cli" ? "cli" : "http");
|
|
923
|
+
}
|
|
924
|
+
function getUsageUsd(usage) {
|
|
925
|
+
return roundUsd(usage.actualUsd);
|
|
926
|
+
}
|
|
927
|
+
function getUsageProvenance(usage) {
|
|
928
|
+
if (usage.provenance) {
|
|
929
|
+
return usage.provenance;
|
|
930
|
+
}
|
|
931
|
+
if (usage.estimatedUsd !== undefined) {
|
|
932
|
+
return "estimated";
|
|
933
|
+
}
|
|
934
|
+
return "actual";
|
|
935
|
+
}
|
|
936
|
+
function resolveChangedFiles(result, repoRoot) {
|
|
937
|
+
if (result.execution?.changedFiles?.length) {
|
|
938
|
+
return result.execution.changedFiles;
|
|
939
|
+
}
|
|
940
|
+
if (!repoRoot) {
|
|
941
|
+
return [];
|
|
942
|
+
}
|
|
943
|
+
try {
|
|
944
|
+
const diff = spawnSync("git", ["diff", "--name-only", "HEAD"], {
|
|
945
|
+
cwd: repoRoot,
|
|
946
|
+
encoding: "utf8"
|
|
947
|
+
});
|
|
948
|
+
if (diff.status !== 0 || typeof diff.stdout !== "string") {
|
|
949
|
+
return [];
|
|
950
|
+
}
|
|
951
|
+
return diff.stdout
|
|
952
|
+
.split(/\r?\n/u)
|
|
953
|
+
.map((entry) => entry.trim())
|
|
954
|
+
.filter(Boolean);
|
|
955
|
+
}
|
|
956
|
+
catch {
|
|
957
|
+
return [];
|
|
958
|
+
}
|
|
959
|
+
}
|
|
960
|
+
function buildPatchDiff(result, changedFiles) {
|
|
961
|
+
// Use structured diff stats to build a minimal diff header if no raw diff is available
|
|
962
|
+
if (result.execution?.changedFiles?.length) {
|
|
963
|
+
// Build a synthetic diff header from changed file list
|
|
964
|
+
return result.execution.changedFiles
|
|
965
|
+
.map((file) => `--- a/${file}\n+++ b/${file}\n@@ -0,0 +1 @@\n+`)
|
|
966
|
+
.join("\n");
|
|
967
|
+
}
|
|
968
|
+
if (changedFiles.length > 0) {
|
|
969
|
+
return changedFiles
|
|
970
|
+
.map((file) => `--- a/${file}\n+++ b/${file}\n@@ -0,0 +1 @@\n+`)
|
|
971
|
+
.join("\n");
|
|
972
|
+
}
|
|
973
|
+
return undefined;
|
|
974
|
+
}
|
|
975
|
+
function createBudgetSettlement(input) {
|
|
976
|
+
const totalActualUsd = getUsageUsd(input.usage);
|
|
977
|
+
return {
|
|
978
|
+
runId: input.runId,
|
|
979
|
+
attemptIndex: input.attemptIndex,
|
|
980
|
+
patchCost: {
|
|
981
|
+
usd: totalActualUsd,
|
|
982
|
+
tokensIn: input.usage.tokensIn,
|
|
983
|
+
tokensOut: input.usage.tokensOut,
|
|
984
|
+
provenance: getUsageProvenance(input.usage)
|
|
985
|
+
},
|
|
986
|
+
verificationCost: {
|
|
987
|
+
usd: 0,
|
|
988
|
+
provenance: "unavailable"
|
|
989
|
+
},
|
|
990
|
+
totalActualUsd,
|
|
991
|
+
preflightEstimateUsd: input.estimate.estimatedAttemptCostUsd,
|
|
992
|
+
varianceUsd: roundUsd(totalActualUsd - input.estimate.estimatedAttemptCostUsd),
|
|
993
|
+
settledAt: input.settledAt
|
|
994
|
+
};
|
|
995
|
+
}
|
|
996
|
+
function roundUsd(value) {
|
|
997
|
+
return Math.round(value * 100) / 100;
|
|
998
|
+
}
|
|
999
|
+
function makeId(prefix, idFactory) {
|
|
1000
|
+
if (idFactory)
|
|
1001
|
+
return idFactory(prefix);
|
|
1002
|
+
return `${prefix}_${Math.random().toString(36).slice(2, 10)}`;
|
|
1003
|
+
}
|
|
1004
|
+
function serializeSafetyViolations(decision) {
|
|
1005
|
+
if (decision.surface === "command") {
|
|
1006
|
+
return decision.blockedCommands;
|
|
1007
|
+
}
|
|
1008
|
+
return decision.violations;
|
|
1009
|
+
}
|
|
1010
|
+
function createLeashArtifact(decision, attemptIndex) {
|
|
1011
|
+
return {
|
|
1012
|
+
attemptIndex,
|
|
1013
|
+
surface: decision.surface,
|
|
1014
|
+
blocked: true,
|
|
1015
|
+
...(decision.profile ? { profile: decision.profile } : {}),
|
|
1016
|
+
...(decision.reason ? { reason: decision.reason } : {}),
|
|
1017
|
+
violations: decision.violations
|
|
1018
|
+
};
|
|
1019
|
+
}
|
|
1020
|
+
function getLastVerifierScore(loop) {
|
|
1021
|
+
for (let index = loop.events.length - 1; index >= 0; index -= 1) {
|
|
1022
|
+
const event = loop.events[index];
|
|
1023
|
+
if (event?.type !== "verification.completed") {
|
|
1024
|
+
continue;
|
|
1025
|
+
}
|
|
1026
|
+
return event.payload["passed"] === true ? 1 : 0;
|
|
1027
|
+
}
|
|
1028
|
+
return 0;
|
|
1029
|
+
}
|
|
1030
|
+
function toPatchDecisionArtifact(decision) {
|
|
1031
|
+
return {
|
|
1032
|
+
decision: decision.decision,
|
|
1033
|
+
summary: decision.summary,
|
|
1034
|
+
reasonCodes: decision.reasonCodes
|
|
1035
|
+
};
|
|
1036
|
+
}
|
|
1037
|
+
function classifyPatchDecisionFailure(decision) {
|
|
1038
|
+
if (decision.reasonCodes.includes("grounding_failure")) {
|
|
1039
|
+
return {
|
|
1040
|
+
failureClass: "repo_grounding_failure",
|
|
1041
|
+
rationale: "Patch truth discarded the attempt because grounding evidence contradicted the patch.",
|
|
1042
|
+
retryable: true,
|
|
1043
|
+
recommendedIntervention: "run_verifier"
|
|
1044
|
+
};
|
|
1045
|
+
}
|
|
1046
|
+
if (decision.reasonCodes.includes("scope_violation")) {
|
|
1047
|
+
return {
|
|
1048
|
+
failureClass: "scope_creep",
|
|
1049
|
+
rationale: "Patch truth discarded the attempt because it changed files outside the task scope.",
|
|
1050
|
+
retryable: true,
|
|
1051
|
+
recommendedIntervention: "tighten_task"
|
|
1052
|
+
};
|
|
1053
|
+
}
|
|
1054
|
+
if (decision.reasonCodes.includes("verifier_regressed") ||
|
|
1055
|
+
decision.reasonCodes.includes("large_diff_no_improvement")) {
|
|
1056
|
+
return {
|
|
1057
|
+
failureClass: "test_regression",
|
|
1058
|
+
rationale: "Patch truth discarded the attempt because the verifier regressed or stopped improving.",
|
|
1059
|
+
retryable: true,
|
|
1060
|
+
recommendedIntervention: "run_verifier"
|
|
1061
|
+
};
|
|
1062
|
+
}
|
|
1063
|
+
if (decision.reasonCodes.includes("human_approval_required")) {
|
|
1064
|
+
return {
|
|
1065
|
+
failureClass: "scope_creep",
|
|
1066
|
+
rationale: "Patch truth escalated the attempt because it requires explicit human approval.",
|
|
1067
|
+
retryable: false,
|
|
1068
|
+
recommendedIntervention: "escalate_human"
|
|
1069
|
+
};
|
|
1070
|
+
}
|
|
1071
|
+
if (decision.reasonCodes.includes("safety_violation")) {
|
|
1072
|
+
return {
|
|
1073
|
+
failureClass: "scope_creep",
|
|
1074
|
+
rationale: "Patch truth escalated the attempt because safety evidence blocked it.",
|
|
1075
|
+
retryable: false,
|
|
1076
|
+
recommendedIntervention: "escalate_human"
|
|
1077
|
+
};
|
|
1078
|
+
}
|
|
1079
|
+
return {
|
|
1080
|
+
failureClass: "no_progress",
|
|
1081
|
+
rationale: "Patch truth discarded the attempt because it did not produce a trustworthy code change.",
|
|
1082
|
+
retryable: true,
|
|
1083
|
+
recommendedIntervention: "compress_context"
|
|
1084
|
+
};
|
|
1085
|
+
}
|
|
1086
|
+
function applyPatchFailureToLoop(loop, input) {
|
|
1087
|
+
return {
|
|
1088
|
+
...loop,
|
|
1089
|
+
attempts: loop.attempts.map((attempt) => attempt.attemptId === input.attemptId
|
|
1090
|
+
? {
|
|
1091
|
+
...attempt,
|
|
1092
|
+
summary: input.summary,
|
|
1093
|
+
failureClass: input.failure.failureClass,
|
|
1094
|
+
intervention: input.failure.recommendedIntervention
|
|
1095
|
+
}
|
|
1096
|
+
: attempt)
|
|
1097
|
+
};
|
|
1098
|
+
}
|
|
1099
|
+
//# sourceMappingURL=index.js.map
|