martin-loop 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +331 -58
  3. package/dist/bin/martin-loop.js +12 -8
  4. package/dist/index.d.ts +21 -8
  5. package/dist/index.js +31 -9
  6. package/dist/vendor/adapters/claude-cli.d.ts +89 -0
  7. package/dist/vendor/adapters/claude-cli.js +555 -0
  8. package/dist/vendor/adapters/cli-bridge.d.ts +28 -0
  9. package/dist/vendor/adapters/cli-bridge.js +127 -0
  10. package/dist/vendor/adapters/direct-provider.d.ts +10 -0
  11. package/dist/vendor/adapters/direct-provider.js +41 -0
  12. package/dist/vendor/adapters/index.d.ts +5 -0
  13. package/dist/vendor/adapters/index.js +5 -0
  14. package/dist/vendor/adapters/runtime-support.d.ts +14 -0
  15. package/dist/vendor/adapters/runtime-support.js +52 -0
  16. package/dist/vendor/adapters/stub-agent-cli.d.ts +8 -0
  17. package/dist/vendor/adapters/stub-agent-cli.js +41 -0
  18. package/dist/vendor/adapters/stub-direct-provider.d.ts +8 -0
  19. package/dist/vendor/adapters/stub-direct-provider.js +10 -0
  20. package/dist/vendor/cli/bin/martin.js +19 -0
  21. package/dist/vendor/cli/index.d.ts +39 -0
  22. package/dist/vendor/cli/index.js +634 -0
  23. package/dist/vendor/cli/persistence.d.ts +34 -0
  24. package/dist/vendor/cli/persistence.js +71 -0
  25. package/dist/vendor/contracts/governance.d.ts +21 -0
  26. package/dist/vendor/contracts/governance.js +12 -0
  27. package/dist/vendor/contracts/index.d.ts +330 -0
  28. package/dist/vendor/contracts/index.js +203 -0
  29. package/dist/vendor/core/compiler.d.ts +50 -0
  30. package/dist/vendor/core/compiler.js +47 -0
  31. package/dist/vendor/core/grounding.d.ts +37 -0
  32. package/dist/vendor/core/grounding.js +270 -0
  33. package/dist/vendor/core/index.d.ts +145 -0
  34. package/dist/vendor/core/index.js +1099 -0
  35. package/dist/vendor/core/leash.d.ts +48 -0
  36. package/dist/vendor/core/leash.js +408 -0
  37. package/dist/vendor/core/persistence/compiler.d.ts +18 -0
  38. package/dist/vendor/core/persistence/compiler.js +35 -0
  39. package/dist/vendor/core/persistence/index.d.ts +6 -0
  40. package/dist/vendor/core/persistence/index.js +4 -0
  41. package/dist/vendor/core/persistence/ledger.d.ts +23 -0
  42. package/dist/vendor/core/persistence/ledger.js +10 -0
  43. package/dist/vendor/core/persistence/store.d.ts +77 -0
  44. package/dist/vendor/core/persistence/store.js +84 -0
  45. package/dist/vendor/core/policy.d.ts +126 -0
  46. package/dist/vendor/core/policy.js +625 -0
  47. package/dist/vendor/core/rollback.d.ts +11 -0
  48. package/dist/vendor/core/rollback.js +219 -0
  49. package/docs/oss/EXAMPLES.md +126 -0
  50. package/docs/oss/OSS-BOUNDARY-REPORT.json +113 -0
  51. package/docs/oss/OSS-BOUNDARY-REPORT.md +48 -0
  52. package/docs/oss/QUICKSTART.md +135 -0
  53. package/docs/{README.md → oss/README.md} +17 -13
  54. package/docs/oss/RELEASE-SURFACE-REPORT.json +45 -0
  55. package/docs/oss/RELEASE-SURFACE-REPORT.md +35 -0
  56. package/package.json +27 -35
  57. package/dist/bin/martin-loop.js.map +0 -1
  58. package/dist/index.js.map +0 -1
  59. package/docs/EXAMPLES.md +0 -96
  60. package/docs/QUICKSTART.md +0 -127
  61. package/docs/release/CLAIM-TO-CAPABILITY.md +0 -19
  62. /package/dist/{bin/martin-loop.d.ts → vendor/cli/bin/martin.d.ts} +0 -0
@@ -0,0 +1,625 @@
1
+ /**
2
+ * Maps a PolicyPhase to the LoopLifecycleState used in events and persistence.
3
+ * FailureClass remains a thin output label — policy reads EvidenceVector, not the label.
4
+ */
5
+ export function policyPhaseToLifecycleState(phase) {
6
+ switch (phase) {
7
+ case "GATHER":
8
+ case "ADMIT":
9
+ case "PATCH":
10
+ return "running";
11
+ case "VERIFY":
12
+ return "verifying";
13
+ case "RECOVER":
14
+ return "running";
15
+ case "ESCALATE":
16
+ case "HANDOFF":
17
+ return "human_escalation";
18
+ case "ABORT":
19
+ return "stuck_exit";
20
+ default:
21
+ return "running";
22
+ }
23
+ }
24
+ /**
25
+ * Determines the next PolicyPhase given the current phase and last result.
26
+ * This is the explicit typed transition table — no implicit fallthrough.
27
+ */
28
+ export function nextPolicyPhase(current, result, costState, retryCount) {
29
+ // Successful verification → complete via HANDOFF
30
+ if (result.status === "completed" && result.verification.passed) {
31
+ return "HANDOFF";
32
+ }
33
+ // Hard budget limit → ABORT
34
+ if (costState.shouldStop) {
35
+ return "ABORT";
36
+ }
37
+ switch (current) {
38
+ case "GATHER":
39
+ return "ADMIT";
40
+ case "ADMIT":
41
+ return "PATCH";
42
+ case "PATCH":
43
+ return "VERIFY";
44
+ case "VERIFY":
45
+ return retryCount >= 2 ? "ESCALATE" : "RECOVER";
46
+ case "RECOVER":
47
+ return retryCount >= 3 ? "ESCALATE" : "PATCH";
48
+ case "ESCALATE":
49
+ return "ABORT";
50
+ case "ABORT":
51
+ case "HANDOFF":
52
+ return current; // terminal — no further transitions
53
+ default:
54
+ return "PATCH";
55
+ }
56
+ }
57
+ export function classifyFailure(input) {
58
+ if (input.result.failure?.classHint) {
59
+ return mapClassHintToAssessment(input.result.failure.classHint, input.attempts);
60
+ }
61
+ const message = [
62
+ input.result.summary,
63
+ input.result.verification.summary,
64
+ input.result.failure?.message
65
+ ]
66
+ .filter(Boolean)
67
+ .join(" ")
68
+ .toLowerCase();
69
+ const repeatedFailure = detectRepeatedFailure(input.attempts);
70
+ if (containsPositive(message, [
71
+ "enoent",
72
+ "command not found",
73
+ "missing from path",
74
+ "not in path",
75
+ "could not find"
76
+ ])) {
77
+ return {
78
+ failureClass: "environment_mismatch",
79
+ rationale: "The adapter could not access the required local runtime or CLI tooling.",
80
+ retryable: false,
81
+ recommendedIntervention: "switch_adapter"
82
+ };
83
+ }
84
+ if (containsPositive(message, ["syntax error", "parse error", "typescript error"])) {
85
+ return {
86
+ failureClass: "syntax_error",
87
+ rationale: "The attempt failed on a parser or compiler-style issue.",
88
+ retryable: true,
89
+ recommendedIntervention: "compress_context"
90
+ };
91
+ }
92
+ if (containsPositive(message, [
93
+ "cannot find module",
94
+ "module not found",
95
+ "cannot resolve module",
96
+ "cannot find name",
97
+ "does not exist on type",
98
+ "no such file or directory",
99
+ "unknown import",
100
+ "unknown symbol",
101
+ "undefined reference"
102
+ ])) {
103
+ return {
104
+ failureClass: "repo_grounding_failure",
105
+ rationale: "The attempt referenced modules, symbols, or files that are not grounded in the repo or approved docs.",
106
+ retryable: true,
107
+ recommendedIntervention: repeatedFailure === "repo_grounding_failure" ? "tighten_task" : "run_verifier"
108
+ };
109
+ }
110
+ if (containsPositive(message, [
111
+ "verification failed",
112
+ "test failed",
113
+ "regression",
114
+ "failing test",
115
+ "assertionerror"
116
+ ])) {
117
+ return {
118
+ failureClass: "verification_failure",
119
+ rationale: "The proposed fix did not satisfy the verification gate.",
120
+ retryable: true,
121
+ recommendedIntervention: repeatedFailure === "verification_failure" ? "tighten_task" : "run_verifier"
122
+ };
123
+ }
124
+ if (containsPositive(message, [
125
+ "scope creep",
126
+ "too broad",
127
+ "unrelated files",
128
+ "outside allowed paths",
129
+ "forbidden file"
130
+ ])) {
131
+ return {
132
+ failureClass: "scope_creep",
133
+ rationale: "The attempt drifted away from the original task boundary.",
134
+ retryable: true,
135
+ recommendedIntervention: "tighten_task"
136
+ };
137
+ }
138
+ if (!input.result.verification.passed) {
139
+ return {
140
+ failureClass: "verification_failure",
141
+ rationale: "Verification did not pass and no specific failure pattern was detected.",
142
+ retryable: true,
143
+ recommendedIntervention: repeatedFailure === "verification_failure" ? "tighten_task" : "run_verifier"
144
+ };
145
+ }
146
+ if (detectOscillation(input.attempts)) {
147
+ return {
148
+ failureClass: "logic_error",
149
+ rationale: "Oscillating failure pattern detected — switching strategy to break the cycle.",
150
+ retryable: true,
151
+ recommendedIntervention: "change_model"
152
+ };
153
+ }
154
+ return {
155
+ failureClass: repeatedFailure ?? "logic_error",
156
+ rationale: "The loop is still failing to produce a correct implementation.",
157
+ retryable: true,
158
+ recommendedIntervention: repeatedFailure === "logic_error" ? "change_model" : "compress_context"
159
+ };
160
+ }
161
+ export function evaluateCostGovernor(input) {
162
+ const remainingBudgetUsd = roundUsd(input.budget.maxUsd - input.cost.actualUsd);
163
+ const remainingIterations = Math.max(input.budget.maxIterations - input.attemptsUsed, 0);
164
+ const remainingTokens = Math.max(input.budget.maxTokens - input.cost.tokensIn - input.cost.tokensOut, 0);
165
+ if (input.cost.actualUsd >= input.budget.maxUsd ||
166
+ input.attemptsUsed >= input.budget.maxIterations ||
167
+ remainingTokens <= 0) {
168
+ return {
169
+ pressure: "hard_limit",
170
+ shouldStop: true,
171
+ remainingBudgetUsd,
172
+ remainingIterations,
173
+ remainingTokens,
174
+ recommendedIntervention: "stop_loop"
175
+ };
176
+ }
177
+ if (input.cost.actualUsd >= input.budget.softLimitUsd) {
178
+ return {
179
+ pressure: "soft_limit",
180
+ shouldStop: false,
181
+ remainingBudgetUsd,
182
+ remainingIterations,
183
+ remainingTokens,
184
+ recommendedIntervention: "compress_context"
185
+ };
186
+ }
187
+ return {
188
+ pressure: "healthy",
189
+ shouldStop: false,
190
+ remainingBudgetUsd,
191
+ remainingIterations,
192
+ remainingTokens
193
+ };
194
+ }
195
+ export function inferExit(input) {
196
+ if (input.lastResult.status === "completed" && input.lastResult.verification.passed) {
197
+ return {
198
+ shouldExit: true,
199
+ lifecycleState: "completed",
200
+ status: "completed",
201
+ reason: "Martin verified the fix and can hard-complete the loop."
202
+ };
203
+ }
204
+ if (input.costState.shouldStop) {
205
+ return {
206
+ shouldExit: true,
207
+ lifecycleState: "budget_exit",
208
+ status: "exited",
209
+ reason: "Martin exited because the budget governor hit a hard limit."
210
+ };
211
+ }
212
+ if (detectOscillation(input.loop.attempts)) {
213
+ return {
214
+ shouldExit: true,
215
+ lifecycleState: "diminishing_returns",
216
+ status: "exited",
217
+ reason: "Oscillating failure pattern detected — loop is cycling without measurable progress."
218
+ };
219
+ }
220
+ const lastTwo = input.loop.attempts.slice(-2);
221
+ const repeatedFailure = lastTwo.length === 2 &&
222
+ lastTwo.every((attempt) => attempt.failureClass &&
223
+ attempt.failureClass === lastTwo[0]?.failureClass &&
224
+ attempt.failureClass === input.lastFailure?.failureClass);
225
+ if (repeatedFailure && input.lastFailure) {
226
+ return {
227
+ shouldExit: true,
228
+ lifecycleState: "diminishing_returns",
229
+ status: "exited",
230
+ reason: `Martin exited because ${input.lastFailure.failureClass} repeated across consecutive attempts.`
231
+ };
232
+ }
233
+ if (input.lastFailure?.failureClass === "environment_mismatch" &&
234
+ !input.lastFailure.retryable &&
235
+ !input.canSwitchAdapter) {
236
+ return {
237
+ shouldExit: true,
238
+ lifecycleState: "stuck_exit",
239
+ status: "exited",
240
+ reason: "Martin exited because the runtime environment could not support the requested adapter."
241
+ };
242
+ }
243
+ return {
244
+ shouldExit: false,
245
+ lifecycleState: "running",
246
+ status: "running",
247
+ reason: "Martin should continue with another attempt."
248
+ };
249
+ }
250
+ function containsPositive(haystack, needles) {
251
+ return needles.some((needle) => {
252
+ const idx = haystack.indexOf(needle);
253
+ if (idx === -1)
254
+ return false;
255
+ const before = haystack.slice(Math.max(0, idx - 30), idx);
256
+ return !/\b(no|not|without|zero|pass|passes|passing|passed|clear|cleared|fix|fixed|resolve|resolved|0)\s*$/u.test(before);
257
+ });
258
+ }
259
+ function detectOscillation(attempts) {
260
+ const classes = attempts
261
+ .slice(-4)
262
+ .map((a) => a.failureClass)
263
+ .filter(Boolean);
264
+ if (classes.length < 3)
265
+ return false;
266
+ if (classes[0] !== classes[1] && classes[0] === classes[2])
267
+ return true;
268
+ if (classes.length >= 4 &&
269
+ classes[0] !== classes[1] &&
270
+ classes[0] === classes[2] &&
271
+ classes[1] === classes[3])
272
+ return true;
273
+ return false;
274
+ }
275
+ function mapClassHintToAssessment(classHint, attempts) {
276
+ const repeatedFailure = detectRepeatedFailure(attempts);
277
+ switch (classHint) {
278
+ case "syntax_error":
279
+ return {
280
+ failureClass: "syntax_error",
281
+ rationale: "Structural evidence: non-zero exit code with compiler-like error output.",
282
+ retryable: true,
283
+ recommendedIntervention: "compress_context"
284
+ };
285
+ case "repo_grounding_failure":
286
+ return {
287
+ failureClass: "repo_grounding_failure",
288
+ rationale: "Structural evidence: missing repo module, symbol, or file reference.",
289
+ retryable: true,
290
+ recommendedIntervention: repeatedFailure === "repo_grounding_failure" ? "tighten_task" : "run_verifier"
291
+ };
292
+ case "scope_creep":
293
+ return {
294
+ failureClass: "scope_creep",
295
+ rationale: "Structural evidence: agent output exceeded the task contract or touched forbidden files.",
296
+ retryable: true,
297
+ recommendedIntervention: "tighten_task"
298
+ };
299
+ case "hallucination":
300
+ return {
301
+ failureClass: "hallucination",
302
+ rationale: "Structural evidence: suspiciously short or trivial agent response.",
303
+ retryable: true,
304
+ recommendedIntervention: repeatedFailure === "hallucination" ? "change_model" : "run_verifier"
305
+ };
306
+ case "verification_failure":
307
+ case "test_regression":
308
+ return {
309
+ failureClass: classHint,
310
+ rationale: "Structural evidence: verification commands did not pass.",
311
+ retryable: true,
312
+ recommendedIntervention: repeatedFailure === classHint ? "tighten_task" : "run_verifier"
313
+ };
314
+ default:
315
+ return {
316
+ failureClass: classHint,
317
+ rationale: "Structural hint provided by adapter.",
318
+ retryable: true,
319
+ recommendedIntervention: repeatedFailure ? "change_model" : "compress_context"
320
+ };
321
+ }
322
+ }
323
+ function detectRepeatedFailure(attempts) {
324
+ const lastTwo = attempts.slice(-2);
325
+ if (lastTwo.length === 2 &&
326
+ lastTwo[0]?.failureClass &&
327
+ lastTwo[0].failureClass === lastTwo[1]?.failureClass) {
328
+ return lastTwo[0].failureClass;
329
+ }
330
+ return undefined;
331
+ }
332
+ function roundUsd(value) {
333
+ return Math.round(value * 100) / 100;
334
+ }
335
+ export function evaluateBudgetPreflight(input) {
336
+ const pricePerMToken = input.pricePerMTokenUsd ?? 3.0;
337
+ const rawPromptTokens = Math.ceil(input.promptCharCount / 4);
338
+ const estimatedPromptTokens = Math.ceil(rawPromptTokens * 1.2);
339
+ const estimatedToolOverheadTokens = 800 + input.attemptCount * 200;
340
+ const estimatedOutputTokensMax = 4_000;
341
+ const estimatedVerifierCostUsd = 0.01;
342
+ const estimatedAttemptCostUsd = roundUsd(((estimatedPromptTokens + estimatedToolOverheadTokens + estimatedOutputTokensMax) /
343
+ 1_000_000) *
344
+ pricePerMToken) + estimatedVerifierCostUsd;
345
+ const provenance = "estimated";
346
+ const estimate = {
347
+ estimatedPromptTokens,
348
+ estimatedToolOverheadTokens,
349
+ estimatedOutputTokensMax,
350
+ estimatedVerifierCostUsd,
351
+ estimatedAttemptCostUsd,
352
+ provenance
353
+ };
354
+ if (estimatedAttemptCostUsd > input.remainingBudgetUsd) {
355
+ return {
356
+ allowed: false,
357
+ reason: `Preflight: estimated attempt cost $${estimatedAttemptCostUsd} exceeds remaining budget $${input.remainingBudgetUsd}.`,
358
+ estimate
359
+ };
360
+ }
361
+ const perAttemptCap = input.perAttemptCapUsd ?? Math.max(input.remainingBudgetUsd * 0.2, 0.05);
362
+ if (estimatedAttemptCostUsd > perAttemptCap) {
363
+ return {
364
+ allowed: false,
365
+ reason: `Preflight: estimated attempt cost $${estimatedAttemptCostUsd} exceeds per-attempt cap $${roundUsd(perAttemptCap)}.`,
366
+ estimate
367
+ };
368
+ }
369
+ return {
370
+ allowed: true,
371
+ reason: "Preflight passed.",
372
+ estimate
373
+ };
374
+ }
375
+ export function computeEvidenceVector(input) {
376
+ const typeErrors = input.compilerOutput
377
+ ? (input.compilerOutput.match(/\berror TS\d+/g) ?? []).length
378
+ : 0;
379
+ const compileErrors = typeErrors +
380
+ (input.compilerOutput ? (input.compilerOutput.match(/\bSyntaxError\b/g) ?? []).length : 0);
381
+ const failingTests = input.testOutput
382
+ ? (input.testOutput.match(/\bFAIL\b/gi) ?? []).length +
383
+ (input.testOutput.match(/\bfailed\b/gi) ?? []).length +
384
+ (input.testOutput.match(/[✗×]/g) ?? []).length
385
+ : 0;
386
+ const verifierScore = input.verifierScore ?? 0;
387
+ let diffNovelty = 1;
388
+ if (input.diff && input.previousDiff) {
389
+ const currentTokens = tokenizeDiff(input.diff);
390
+ const previousTokens = tokenizeDiff(input.previousDiff);
391
+ if (previousTokens.size > 0) {
392
+ const overlap = [...currentTokens].filter((token) => previousTokens.has(token)).length;
393
+ const similarity = overlap / Math.max(currentTokens.size, previousTokens.size, 1);
394
+ diffNovelty = Math.max(0, 1 - similarity);
395
+ }
396
+ }
397
+ const forbiddenTouchedFileCount = input.forbiddenTouchedFiles?.length ?? 0;
398
+ const missingSymbolCount = input.missingSymbols?.length ?? 0;
399
+ const progressDelta = input.verifierScore !== undefined && input.previousVerifierScore !== undefined
400
+ ? Math.max(0, input.verifierScore - input.previousVerifierScore)
401
+ : 0;
402
+ const costPerProgressUnit = progressDelta > 0 && input.actualUsd !== undefined ? roundUsd(input.actualUsd / progressDelta) : 0;
403
+ const safetyRiskScore = Math.min(1, forbiddenTouchedFileCount * 0.3 + missingSymbolCount * 0.1);
404
+ return {
405
+ compileErrors,
406
+ typeErrors,
407
+ failingTests,
408
+ verifierScore,
409
+ diffNovelty,
410
+ forbiddenTouchedFileCount,
411
+ missingSymbolCount,
412
+ costPerProgressUnit,
413
+ retryCountForSurface: input.retryCountForSurface ?? 0,
414
+ safetyRiskScore
415
+ };
416
+ }
417
+ export function selectRecoveryRecipe(evidence) {
418
+ if (evidence.safetyRiskScore >= 0.7 || evidence.forbiddenTouchedFileCount > 2) {
419
+ return {
420
+ recipe: "abort_safety_violation",
421
+ rationale: "High safety risk score or multiple forbidden file touches. Abort required.",
422
+ intervention: "escalate_human"
423
+ };
424
+ }
425
+ if (evidence.missingSymbolCount > 0 && evidence.retryCountForSurface <= 1) {
426
+ return {
427
+ recipe: "force_repo_anatomy_slices",
428
+ rationale: "Missing symbols detected. Force repo grounding context into the next prompt.",
429
+ intervention: "run_verifier"
430
+ };
431
+ }
432
+ if (evidence.forbiddenTouchedFileCount > 0) {
433
+ return {
434
+ recipe: "tighten_allowlist_reduce_patch",
435
+ rationale: "Patch touched forbidden files. Tighten scope and reduce patch budget.",
436
+ intervention: "tighten_task"
437
+ };
438
+ }
439
+ if (evidence.compileErrors > 0 || evidence.typeErrors > 0) {
440
+ return {
441
+ recipe: "narrow_prompt_targeted_files",
442
+ rationale: "Compile or type errors detected. Narrow the next prompt to targeted files.",
443
+ intervention: "compress_context"
444
+ };
445
+ }
446
+ if (evidence.failingTests > 0 && evidence.retryCountForSurface <= 2) {
447
+ return {
448
+ recipe: "failing_tests_only",
449
+ rationale: "Test failures remain. Focus on the failing tests and touched files only.",
450
+ intervention: "run_verifier"
451
+ };
452
+ }
453
+ if (evidence.diffNovelty < 0.2) {
454
+ if (evidence.retryCountForSurface >= 2) {
455
+ return {
456
+ recipe: "strategy_swap",
457
+ rationale: "Very low diff novelty across repeated retries. Swap strategy.",
458
+ intervention: "change_model"
459
+ };
460
+ }
461
+ return {
462
+ recipe: "narrow_prompt_targeted_files",
463
+ rationale: "Low diff novelty suggests repetition. Compress context and narrow focus.",
464
+ intervention: "compress_context"
465
+ };
466
+ }
467
+ if (evidence.costPerProgressUnit > 5 || evidence.retryCountForSurface >= 3) {
468
+ return {
469
+ recipe: "downgrade_model",
470
+ rationale: "Cost efficiency is degrading or retries are exhausted. Downgrade the model.",
471
+ intervention: "change_model"
472
+ };
473
+ }
474
+ return {
475
+ recipe: "escalate_human",
476
+ rationale: "No specific recovery pattern matched. Escalate for human review.",
477
+ intervention: "escalate_human"
478
+ };
479
+ }
480
+ export function evaluatePatchDecision(input) {
481
+ const score = scorePatchDecision(input);
482
+ const reasonCodes = [...score.reasonCodes];
483
+ const decision = decidePatchOutcome(input, reasonCodes);
484
+ return {
485
+ decision,
486
+ summary: buildPatchDecisionSummary(decision, reasonCodes, input.summary),
487
+ reasonCodes,
488
+ score
489
+ };
490
+ }
491
+ export function scorePatchDecision(input) {
492
+ const verifierScore = input.verifierScore ?? (input.verificationPassed ? 1 : 0);
493
+ const previousVerifierScore = input.previousVerifierScore ?? 0;
494
+ const verifierDelta = roundScore(verifierScore - previousVerifierScore);
495
+ const groundingViolationCount = input.groundingViolationCount ?? 0;
496
+ const scopeViolationCount = input.scopeViolationCount ?? 0;
497
+ const safetyViolationCount = input.safetyViolationCount ?? 0;
498
+ const changedFileEvidenceAvailable = input.changedFileCount !== undefined;
499
+ const changedFileCount = input.changedFileCount ?? 0;
500
+ const noveltyScore = input.diffNovelty ?? (changedFileCount > 0 ? 1 : 0);
501
+ const diffRiskScore = computeDiffRiskScore(input.diffStats);
502
+ const costUsd = roundUsd(input.costUsd ?? 0);
503
+ const reasonCodes = [];
504
+ if (input.verificationPassed) {
505
+ reasonCodes.push("verifier_passed");
506
+ }
507
+ if (groundingViolationCount > 0) {
508
+ reasonCodes.push("grounding_failure");
509
+ }
510
+ if (scopeViolationCount > 0) {
511
+ reasonCodes.push("scope_violation");
512
+ }
513
+ if (changedFileEvidenceAvailable && changedFileCount === 0) {
514
+ reasonCodes.push("no_code_change");
515
+ }
516
+ if (input.humanApprovalRequired) {
517
+ reasonCodes.push("human_approval_required");
518
+ }
519
+ if (safetyViolationCount > 0) {
520
+ reasonCodes.push("safety_violation");
521
+ }
522
+ if (verifierDelta < 0) {
523
+ reasonCodes.push("verifier_regressed");
524
+ }
525
+ if (!input.verificationPassed && noveltyScore < 0.2 && verifierDelta <= 0) {
526
+ reasonCodes.push("low_novelty_no_progress");
527
+ }
528
+ if (!input.verificationPassed && diffRiskScore >= 0.7 && verifierDelta <= 0) {
529
+ reasonCodes.push("large_diff_no_improvement");
530
+ }
531
+ if (!input.verificationPassed &&
532
+ reasonCodes.length === 0) {
533
+ reasonCodes.push("verifier_not_improved");
534
+ }
535
+ let score = 0;
536
+ if (input.verificationPassed) {
537
+ score += 0.55;
538
+ }
539
+ score += Math.max(verifierDelta, 0) * 0.2;
540
+ score -= groundingViolationCount * 0.45;
541
+ score -= scopeViolationCount * 0.35;
542
+ score -= safetyViolationCount * 0.45;
543
+ if (input.humanApprovalRequired) {
544
+ score -= 0.25;
545
+ }
546
+ if (changedFileEvidenceAvailable && changedFileCount === 0) {
547
+ score -= 0.35;
548
+ }
549
+ if (!input.verificationPassed && noveltyScore < 0.2 && verifierDelta <= 0) {
550
+ score -= 0.2;
551
+ }
552
+ if (!input.verificationPassed && diffRiskScore >= 0.7 && verifierDelta <= 0) {
553
+ score -= 0.25;
554
+ }
555
+ score -= diffRiskScore * 0.1;
556
+ score -= Math.min(costUsd / 10, 0.1);
557
+ return {
558
+ score: roundScore(Math.max(-1, Math.min(1, score))),
559
+ verifierScore: roundScore(verifierScore),
560
+ verifierDelta,
561
+ groundingViolationCount,
562
+ scopeViolationCount,
563
+ safetyViolationCount,
564
+ changedFileCount,
565
+ diffRiskScore,
566
+ noveltyScore: roundScore(noveltyScore),
567
+ costUsd,
568
+ reasonCodes
569
+ };
570
+ }
571
+ function tokenizeDiff(diff) {
572
+ const tokens = new Set();
573
+ for (const line of diff.split("\n")) {
574
+ if ((!line.startsWith("+") && !line.startsWith("-")) || line.startsWith("+++") || line.startsWith("---")) {
575
+ continue;
576
+ }
577
+ for (const token of line.slice(1).match(/[A-Za-z_][A-Za-z0-9_]{2,}/g) ?? []) {
578
+ tokens.add(token);
579
+ }
580
+ }
581
+ return tokens;
582
+ }
583
+ function decidePatchOutcome(input, reasonCodes) {
584
+ if (input.humanApprovalRequired || reasonCodes.includes("safety_violation")) {
585
+ return "ESCALATE";
586
+ }
587
+ if (reasonCodes.includes("grounding_failure") ||
588
+ reasonCodes.includes("scope_violation") ||
589
+ reasonCodes.includes("no_code_change") ||
590
+ reasonCodes.includes("verifier_regressed") ||
591
+ reasonCodes.includes("large_diff_no_improvement") ||
592
+ reasonCodes.includes("low_novelty_no_progress") ||
593
+ reasonCodes.includes("verifier_not_improved")) {
594
+ return "DISCARD";
595
+ }
596
+ if (input.verificationPassed) {
597
+ return "KEEP";
598
+ }
599
+ return "DISCARD";
600
+ }
601
+ function buildPatchDecisionSummary(decision, reasonCodes, summary) {
602
+ const headline = {
603
+ KEEP: "Patch kept.",
604
+ DISCARD: "Patch discarded.",
605
+ ESCALATE: "Patch requires escalation.",
606
+ HANDOFF: "Patch requires handoff."
607
+ }[decision];
608
+ const reasons = reasonCodes.join(", ");
609
+ if (summary) {
610
+ return `${headline} Reasons: ${reasons || "none"}. Attempt summary: ${summary}`;
611
+ }
612
+ return `${headline} Reasons: ${reasons || "none"}.`;
613
+ }
614
+ function computeDiffRiskScore(input) {
615
+ if (!input) {
616
+ return 0;
617
+ }
618
+ const fileRisk = Math.min(input.filesChanged / 8, 1);
619
+ const lineRisk = Math.min((input.addedLines + input.deletedLines) / 200, 1);
620
+ return roundScore(Math.max(fileRisk, lineRisk));
621
+ }
622
+ function roundScore(value) {
623
+ return Math.round(value * 100) / 100;
624
+ }
625
+ //# sourceMappingURL=policy.js.map
@@ -0,0 +1,11 @@
1
+ import type { PatchDecision, RollbackBoundaryArtifact, RollbackOutcomeArtifact } from "../contracts/index.js";
2
+ export declare function captureRollbackBoundary(input: {
3
+ repoRoot?: string;
4
+ capturedAt: string;
5
+ }): Promise<RollbackBoundaryArtifact | undefined>;
6
+ export declare function restoreRollbackBoundary(input: {
7
+ repoRoot?: string;
8
+ boundary?: RollbackBoundaryArtifact;
9
+ restoredAt: string;
10
+ decision: PatchDecision;
11
+ }): Promise<RollbackOutcomeArtifact | undefined>;