@martinloop/mcp 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +181 -41
- package/dist/server-validation.d.ts +10 -0
- package/dist/server-validation.js +234 -0
- package/dist/server.js +59 -15
- package/dist/tools/get-status.d.ts +10 -2
- package/dist/tools/get-status.js +11 -4
- package/dist/tools/inspect-loop.d.ts +4 -2
- package/dist/tools/inspect-loop.js +4 -7
- package/dist/tools/run-loop.d.ts +2 -0
- package/dist/tools/run-loop.js +10 -3
- package/dist/tools/run-store.d.ts +20 -0
- package/dist/tools/run-store.js +109 -0
- package/dist/vendor/adapters/claude-cli.d.ts +19 -4
- package/dist/vendor/adapters/claude-cli.js +55 -24
- package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
- package/dist/vendor/adapters/cli-bridge.js +154 -28
- package/dist/vendor/adapters/index.d.ts +1 -0
- package/dist/vendor/adapters/index.js +1 -0
- package/dist/vendor/adapters/verifier-only.d.ts +7 -0
- package/dist/vendor/adapters/verifier-only.js +57 -0
- package/dist/vendor/contracts/index.d.ts +3 -1
- package/dist/vendor/core/compiler.d.ts +2 -0
- package/dist/vendor/core/compiler.js +10 -4
- package/dist/vendor/core/context-integrity.d.ts +26 -0
- package/dist/vendor/core/context-integrity.js +56 -0
- package/dist/vendor/core/index.d.ts +7 -4
- package/dist/vendor/core/index.js +222 -64
- package/dist/vendor/core/persistence/index.d.ts +2 -0
- package/dist/vendor/core/persistence/index.js +1 -0
- package/dist/vendor/core/persistence/runs-reader.d.ts +52 -0
- package/dist/vendor/core/persistence/runs-reader.js +84 -0
- package/dist/vendor/core/persistence/store.d.ts +6 -1
- package/dist/vendor/core/persistence/store.js +5 -0
- package/dist/vendor/core/policy.d.ts +6 -0
- package/package.json +17 -12
- package/server.json +21 -0
|
@@ -5,12 +5,15 @@ import { evaluateChangeApprovalLeash, evaluateFilesystemLeash, evaluateSecretLea
|
|
|
5
5
|
import { buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations } from "./grounding.js";
|
|
6
6
|
import { captureRollbackBoundary, restoreRollbackBoundary } from "./rollback.js";
|
|
7
7
|
import { compilePromptPacket } from "./compiler.js";
|
|
8
|
-
import { makeLedgerEvent } from "./persistence/index.js";
|
|
8
|
+
import { makeLedgerEvent, resolveRunsRoot, runDir } from "./persistence/index.js";
|
|
9
|
+
import { runContextIntegrityPrecheck } from "./context-integrity.js";
|
|
9
10
|
export { classifyFailure, computeEvidenceVector, evaluatePatchDecision, evaluateCostGovernor, evaluateBudgetPreflight, inferExit, nextPolicyPhase, policyPhaseToLifecycleState, scorePatchDecision, selectRecoveryRecipe, evaluateVerificationLeash, evaluateFilesystemLeash, evaluateChangeApprovalLeash, evaluateSecretLeash, resolveExecutionProfile, redactSecretsFromText, buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations, captureRollbackBoundary, restoreRollbackBoundary };
|
|
11
|
+
// ─── Context Integrity Pre-gate ──────────────────────────────────────────────
|
|
12
|
+
export { runContextIntegrityPrecheck } from "./context-integrity.js";
|
|
10
13
|
// ─── Prompt packet compiler ──────────────────────────────────────────────────
|
|
11
14
|
export { compilePromptPacket } from "./compiler.js";
|
|
12
15
|
// ─── Persistence (RunStore, LedgerEvent, FileRunStore) ──────────────────────
|
|
13
|
-
export { createFileRunStore, makeLedgerEvent, resolveRunsRoot } from "./persistence/index.js";
|
|
16
|
+
export { createFileRunStore, makeLedgerEvent, readAllLoopRecords, readLatestLoopRecord, readLatestLoopRecordFromFile, readLoopRecordsFromFile, resolveRunsRoot } from "./persistence/index.js";
|
|
14
17
|
export { compileAndPersistContext } from "./persistence/index.js";
|
|
15
18
|
/**
|
|
16
19
|
* Admission gate — must pass before any attempt is executed.
|
|
@@ -126,6 +129,7 @@ export async function runMartin(input) {
|
|
|
126
129
|
runId: loop.loopId,
|
|
127
130
|
payload: { workspaceId: input.workspaceId, projectId: input.projectId }
|
|
128
131
|
}));
|
|
132
|
+
await persistLoopRecordIfSupported(input.store, loop);
|
|
129
133
|
}
|
|
130
134
|
const DEFAULT_FALLBACK_MODELS = [
|
|
131
135
|
"claude-haiku-4-5",
|
|
@@ -136,6 +140,7 @@ export async function runMartin(input) {
|
|
|
136
140
|
let currentAdapterIndex = 0;
|
|
137
141
|
let currentAdapter = adapterChain[currentAdapterIndex] ?? input.adapter;
|
|
138
142
|
let useCompressedContext = false;
|
|
143
|
+
const isVerifyOnly = input.task.mutationMode === "verify_only";
|
|
139
144
|
const executionProfile = resolveExecutionProfile({
|
|
140
145
|
executionProfile: input.task.executionProfile,
|
|
141
146
|
allowedNetworkDomains: input.task.allowedNetworkDomains
|
|
@@ -153,7 +158,8 @@ export async function runMartin(input) {
|
|
|
153
158
|
shouldExit: true,
|
|
154
159
|
lifecycleState: "human_escalation",
|
|
155
160
|
status: "exited",
|
|
156
|
-
reason
|
|
161
|
+
reason,
|
|
162
|
+
...classifySafetyLeashExit(leashDecision, "verifier")
|
|
157
163
|
};
|
|
158
164
|
if (input.store) {
|
|
159
165
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
@@ -169,15 +175,13 @@ export async function runMartin(input) {
|
|
|
169
175
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
170
176
|
kind: "run.exited",
|
|
171
177
|
runId: loop.loopId,
|
|
172
|
-
payload:
|
|
173
|
-
lifecycleState: leashExitDecision.lifecycleState,
|
|
174
|
-
status: leashExitDecision.status,
|
|
175
|
-
reason: leashExitDecision.reason
|
|
176
|
-
}
|
|
178
|
+
payload: createRunExitPayload(leashExitDecision)
|
|
177
179
|
}));
|
|
178
180
|
}
|
|
181
|
+
const finalizedLoop = finalizeLoop(loop, leashExitDecision, now(), idFactory);
|
|
182
|
+
await persistLoopRecordIfSupported(input.store, finalizedLoop);
|
|
179
183
|
return {
|
|
180
|
-
loop:
|
|
184
|
+
loop: finalizedLoop,
|
|
181
185
|
decision: leashExitDecision
|
|
182
186
|
};
|
|
183
187
|
}
|
|
@@ -193,7 +197,8 @@ export async function runMartin(input) {
|
|
|
193
197
|
shouldExit: true,
|
|
194
198
|
lifecycleState: "human_escalation",
|
|
195
199
|
status: "exited",
|
|
196
|
-
reason: secretDecision.reason ?? "Safety leash blocked secret-like values in the runtime context."
|
|
200
|
+
reason: secretDecision.reason ?? "Safety leash blocked secret-like values in the runtime context.",
|
|
201
|
+
...classifySafetyLeashExit(secretDecision, "secret")
|
|
197
202
|
};
|
|
198
203
|
if (input.store) {
|
|
199
204
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
@@ -208,15 +213,13 @@ export async function runMartin(input) {
|
|
|
208
213
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
209
214
|
kind: "run.exited",
|
|
210
215
|
runId: loop.loopId,
|
|
211
|
-
payload:
|
|
212
|
-
lifecycleState: secretExitDecision.lifecycleState,
|
|
213
|
-
status: secretExitDecision.status,
|
|
214
|
-
reason: secretExitDecision.reason
|
|
215
|
-
}
|
|
216
|
+
payload: createRunExitPayload(secretExitDecision)
|
|
216
217
|
}));
|
|
217
218
|
}
|
|
219
|
+
const finalizedLoop = finalizeLoop(loop, secretExitDecision, now(), idFactory);
|
|
220
|
+
await persistLoopRecordIfSupported(input.store, finalizedLoop);
|
|
218
221
|
return {
|
|
219
|
-
loop:
|
|
222
|
+
loop: finalizedLoop,
|
|
220
223
|
decision: secretExitDecision
|
|
221
224
|
};
|
|
222
225
|
}
|
|
@@ -254,20 +257,52 @@ export async function runMartin(input) {
|
|
|
254
257
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
255
258
|
kind: "run.exited",
|
|
256
259
|
runId: loop.loopId,
|
|
257
|
-
payload:
|
|
258
|
-
lifecycleState: preflightExitDecision.lifecycleState,
|
|
259
|
-
status: preflightExitDecision.status,
|
|
260
|
-
reason: preflightExitDecision.reason
|
|
261
|
-
}
|
|
260
|
+
payload: createRunExitPayload(preflightExitDecision)
|
|
262
261
|
}));
|
|
263
262
|
}
|
|
263
|
+
const finalizedLoop = finalizeLoop(loop, preflightExitDecision, now(), idFactory);
|
|
264
|
+
await persistLoopRecordIfSupported(input.store, finalizedLoop);
|
|
264
265
|
return {
|
|
265
|
-
loop:
|
|
266
|
+
loop: finalizedLoop,
|
|
266
267
|
decision: preflightExitDecision
|
|
267
268
|
};
|
|
268
269
|
}
|
|
269
270
|
// GATHER → ADMIT: run admission control before executing
|
|
270
271
|
currentPhase = "ADMIT";
|
|
272
|
+
// T05: Context Integrity Pre-gate — blocks authority inversion / injection before reasoning
|
|
273
|
+
const contextPrecheck = await runContextIntegrityPrecheck(loop.loopId, loop.attempts.length + 1, runDir(resolveRunsRoot(), loop.loopId), {
|
|
274
|
+
userPrompt: distilled.focus,
|
|
275
|
+
history: loop.attempts.map(a => a.summary).join("\n")
|
|
276
|
+
});
|
|
277
|
+
if (contextPrecheck.verdict === "context_poisoning_block") {
|
|
278
|
+
currentPhase = "ABORT";
|
|
279
|
+
const poisoningExitDecision = {
|
|
280
|
+
shouldExit: true,
|
|
281
|
+
lifecycleState: "human_escalation",
|
|
282
|
+
status: "exited",
|
|
283
|
+
reason: "Context Integrity Pre-gate: context poisoning attempt detected.",
|
|
284
|
+
failureClass: "safety_leash_blocked",
|
|
285
|
+
safetySurface: "context_integrity",
|
|
286
|
+
reasonCode: "context_poisoning_blocked"
|
|
287
|
+
};
|
|
288
|
+
if (input.store) {
|
|
289
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
290
|
+
kind: "safety.violations_found",
|
|
291
|
+
runId: loop.loopId,
|
|
292
|
+
payload: {
|
|
293
|
+
verdict: contextPrecheck.verdict,
|
|
294
|
+
signals: contextPrecheck.detectedSignals,
|
|
295
|
+
source: "context_integrity_pregate"
|
|
296
|
+
}
|
|
297
|
+
}));
|
|
298
|
+
}
|
|
299
|
+
const finalizedLoop = finalizeLoop(loop, poisoningExitDecision, now(), idFactory);
|
|
300
|
+
await persistLoopRecordIfSupported(input.store, finalizedLoop);
|
|
301
|
+
return {
|
|
302
|
+
loop: finalizedLoop,
|
|
303
|
+
decision: poisoningExitDecision
|
|
304
|
+
};
|
|
305
|
+
}
|
|
271
306
|
const admissionDecision = evaluateAttemptPolicy({
|
|
272
307
|
request: {
|
|
273
308
|
loopId: loop.loopId,
|
|
@@ -315,15 +350,13 @@ export async function runMartin(input) {
|
|
|
315
350
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
316
351
|
kind: "run.exited",
|
|
317
352
|
runId: loop.loopId,
|
|
318
|
-
payload:
|
|
319
|
-
lifecycleState: exitDecision.lifecycleState,
|
|
320
|
-
status: exitDecision.status,
|
|
321
|
-
reason: exitDecision.reason
|
|
322
|
-
}
|
|
353
|
+
payload: createRunExitPayload(exitDecision)
|
|
323
354
|
}));
|
|
324
355
|
}
|
|
356
|
+
const finalizedLoop = finalizeLoop(loop, exitDecision, now(), idFactory);
|
|
357
|
+
await persistLoopRecordIfSupported(input.store, finalizedLoop);
|
|
325
358
|
return {
|
|
326
|
-
loop:
|
|
359
|
+
loop: finalizedLoop,
|
|
327
360
|
decision: exitDecision
|
|
328
361
|
};
|
|
329
362
|
}
|
|
@@ -361,6 +394,7 @@ export async function runMartin(input) {
|
|
|
361
394
|
objective: loop.task.objective,
|
|
362
395
|
verificationPlan: loop.task.verificationPlan,
|
|
363
396
|
...(loop.task.verificationStack ? { verificationStack: loop.task.verificationStack } : {}),
|
|
397
|
+
...(loop.task.mutationMode ? { mutationMode: loop.task.mutationMode } : {}),
|
|
364
398
|
...(loop.task.repoRoot ? { repoRoot: loop.task.repoRoot } : {}),
|
|
365
399
|
...(loop.task.allowedPaths ? { allowedPaths: loop.task.allowedPaths } : {}),
|
|
366
400
|
...(loop.task.deniedPaths ? { deniedPaths: loop.task.deniedPaths } : {}),
|
|
@@ -539,6 +573,92 @@ export async function runMartin(input) {
|
|
|
539
573
|
// returned a non-empty list. A repoRoot alone is insufficient — git may fail (e.g. not
|
|
540
574
|
// a git repo) and silently return [], which would falsely trigger no_code_change.
|
|
541
575
|
const changedFileEvidenceAvailable = result.execution?.changedFiles !== undefined || changedFiles.length > 0;
|
|
576
|
+
if (isVerifyOnly && changedFiles.length > 0) {
|
|
577
|
+
const patchDecision = evaluatePatchDecision({
|
|
578
|
+
verificationPassed: result.verification.passed,
|
|
579
|
+
previousVerifierScore,
|
|
580
|
+
verifierScore: result.verification.passed ? 1 : 0,
|
|
581
|
+
scopeViolationCount: changedFiles.length,
|
|
582
|
+
changedFileCount: changedFiles.length,
|
|
583
|
+
diffNovelty: 1,
|
|
584
|
+
diffStats: result.execution?.diffStats,
|
|
585
|
+
costUsd: getUsageUsd(result.usage),
|
|
586
|
+
summary: result.summary
|
|
587
|
+
});
|
|
588
|
+
const verifyOnlyExitDecision = {
|
|
589
|
+
shouldExit: true,
|
|
590
|
+
lifecycleState: "human_escalation",
|
|
591
|
+
status: "exited",
|
|
592
|
+
reason: "Verify-only mode forbids file changes.",
|
|
593
|
+
failureClass: "safety_leash_blocked",
|
|
594
|
+
safetySurface: "filesystem",
|
|
595
|
+
reasonCode: "verify_only_write_attempt"
|
|
596
|
+
};
|
|
597
|
+
const rollbackOutcome = await restoreRollbackBoundary({
|
|
598
|
+
repoRoot: request.context.repoRoot,
|
|
599
|
+
boundary: rollbackBoundary,
|
|
600
|
+
restoredAt: attemptCompletedAt,
|
|
601
|
+
decision: patchDecision.decision
|
|
602
|
+
});
|
|
603
|
+
if (input.store) {
|
|
604
|
+
const verifyOnlyViolation = {
|
|
605
|
+
kind: "path_not_allowed",
|
|
606
|
+
message: `Verify-only mode forbids changed files: ${changedFiles.join(", ")}`,
|
|
607
|
+
file: changedFiles[0]
|
|
608
|
+
};
|
|
609
|
+
await input.store.writeAttemptArtifacts(loop.loopId, currentAttemptIndex, {
|
|
610
|
+
compiledContext,
|
|
611
|
+
leash: createLeashArtifact({
|
|
612
|
+
surface: "filesystem",
|
|
613
|
+
reason: verifyOnlyExitDecision.reason,
|
|
614
|
+
violations: [verifyOnlyViolation]
|
|
615
|
+
}, currentAttemptIndex),
|
|
616
|
+
patchScore: patchDecision.score,
|
|
617
|
+
patchDecision: toPatchDecisionArtifact(patchDecision),
|
|
618
|
+
...(rollbackBoundary ? { rollbackBoundary } : {}),
|
|
619
|
+
...(rollbackOutcome ? { rollbackOutcome } : {})
|
|
620
|
+
});
|
|
621
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
622
|
+
kind: "safety.violations_found",
|
|
623
|
+
runId: loop.loopId,
|
|
624
|
+
attemptIndex: currentAttemptIndex,
|
|
625
|
+
payload: {
|
|
626
|
+
surface: "filesystem",
|
|
627
|
+
blocked: true,
|
|
628
|
+
attemptIndex: currentAttemptIndex,
|
|
629
|
+
violations: [
|
|
630
|
+
{
|
|
631
|
+
kind: "path_not_allowed",
|
|
632
|
+
message: verifyOnlyExitDecision.reason,
|
|
633
|
+
files: changedFiles
|
|
634
|
+
}
|
|
635
|
+
]
|
|
636
|
+
}
|
|
637
|
+
}));
|
|
638
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
639
|
+
kind: "attempt.discarded",
|
|
640
|
+
runId: loop.loopId,
|
|
641
|
+
attemptIndex: currentAttemptIndex,
|
|
642
|
+
payload: {
|
|
643
|
+
decision: patchDecision.decision,
|
|
644
|
+
reason: patchDecision.summary,
|
|
645
|
+
reasonCodes: patchDecision.reasonCodes,
|
|
646
|
+
score: patchDecision.score.score
|
|
647
|
+
}
|
|
648
|
+
}));
|
|
649
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
650
|
+
kind: "run.exited",
|
|
651
|
+
runId: loop.loopId,
|
|
652
|
+
payload: createRunExitPayload(verifyOnlyExitDecision)
|
|
653
|
+
}));
|
|
654
|
+
}
|
|
655
|
+
const finalizedLoop = finalizeLoop(loop, verifyOnlyExitDecision, now(), idFactory);
|
|
656
|
+
await persistLoopRecordIfSupported(input.store, finalizedLoop);
|
|
657
|
+
return {
|
|
658
|
+
loop: finalizedLoop,
|
|
659
|
+
decision: verifyOnlyExitDecision
|
|
660
|
+
};
|
|
661
|
+
}
|
|
542
662
|
const filesystemDecision = evaluateFilesystemLeash({
|
|
543
663
|
repoRoot: request.context.repoRoot,
|
|
544
664
|
changedFiles,
|
|
@@ -561,7 +681,8 @@ export async function runMartin(input) {
|
|
|
561
681
|
shouldExit: true,
|
|
562
682
|
lifecycleState: "human_escalation",
|
|
563
683
|
status: "exited",
|
|
564
|
-
reason: filesystemDecision.reason ?? "Safety leash blocked filesystem changes."
|
|
684
|
+
reason: filesystemDecision.reason ?? "Safety leash blocked filesystem changes.",
|
|
685
|
+
...classifySafetyLeashExit(filesystemDecision, "filesystem")
|
|
565
686
|
};
|
|
566
687
|
const rollbackOutcome = await restoreRollbackBoundary({
|
|
567
688
|
repoRoot: request.context.repoRoot,
|
|
@@ -603,15 +724,13 @@ export async function runMartin(input) {
|
|
|
603
724
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
604
725
|
kind: "run.exited",
|
|
605
726
|
runId: loop.loopId,
|
|
606
|
-
payload:
|
|
607
|
-
lifecycleState: filesystemExitDecision.lifecycleState,
|
|
608
|
-
status: filesystemExitDecision.status,
|
|
609
|
-
reason: filesystemExitDecision.reason
|
|
610
|
-
}
|
|
727
|
+
payload: createRunExitPayload(filesystemExitDecision)
|
|
611
728
|
}));
|
|
612
729
|
}
|
|
730
|
+
const finalizedLoop = finalizeLoop(loop, filesystemExitDecision, now(), idFactory);
|
|
731
|
+
await persistLoopRecordIfSupported(input.store, finalizedLoop);
|
|
613
732
|
return {
|
|
614
|
-
loop:
|
|
733
|
+
loop: finalizedLoop,
|
|
615
734
|
decision: filesystemExitDecision
|
|
616
735
|
};
|
|
617
736
|
}
|
|
@@ -638,7 +757,8 @@ export async function runMartin(input) {
|
|
|
638
757
|
lifecycleState: "human_escalation",
|
|
639
758
|
status: "exited",
|
|
640
759
|
reason: changeApprovalDecision.reason ??
|
|
641
|
-
"Safety leash blocked dependency or migration changes that require approval."
|
|
760
|
+
"Safety leash blocked dependency or migration changes that require approval.",
|
|
761
|
+
...classifySafetyLeashExit(changeApprovalDecision, "dependency")
|
|
642
762
|
};
|
|
643
763
|
const rollbackOutcome = await restoreRollbackBoundary({
|
|
644
764
|
repoRoot: request.context.repoRoot,
|
|
@@ -681,15 +801,13 @@ export async function runMartin(input) {
|
|
|
681
801
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
682
802
|
kind: "run.exited",
|
|
683
803
|
runId: loop.loopId,
|
|
684
|
-
payload:
|
|
685
|
-
lifecycleState: approvalExitDecision.lifecycleState,
|
|
686
|
-
status: approvalExitDecision.status,
|
|
687
|
-
reason: approvalExitDecision.reason
|
|
688
|
-
}
|
|
804
|
+
payload: createRunExitPayload(approvalExitDecision)
|
|
689
805
|
}));
|
|
690
806
|
}
|
|
807
|
+
const finalizedLoop = finalizeLoop(loop, approvalExitDecision, now(), idFactory);
|
|
808
|
+
await persistLoopRecordIfSupported(input.store, finalizedLoop);
|
|
691
809
|
return {
|
|
692
|
-
loop:
|
|
810
|
+
loop: finalizedLoop,
|
|
693
811
|
decision: approvalExitDecision
|
|
694
812
|
};
|
|
695
813
|
}
|
|
@@ -728,8 +846,8 @@ export async function runMartin(input) {
|
|
|
728
846
|
previousVerifierScore,
|
|
729
847
|
verifierScore: result.verification.passed ? 1 : 0,
|
|
730
848
|
groundingViolationCount: groundingScanResult?.violations.length ?? 0,
|
|
731
|
-
changedFileCount: changedFileEvidenceAvailable ? changedFiles.length : undefined,
|
|
732
|
-
diffNovelty: changedFileEvidenceAvailable ? (changedFiles.length > 0 ? 1 : 0) : undefined,
|
|
849
|
+
changedFileCount: !isVerifyOnly && changedFileEvidenceAvailable ? changedFiles.length : undefined,
|
|
850
|
+
diffNovelty: !isVerifyOnly && changedFileEvidenceAvailable ? (changedFiles.length > 0 ? 1 : 0) : undefined,
|
|
733
851
|
diffStats: result.execution?.diffStats,
|
|
734
852
|
costUsd: getUsageUsd(result.usage),
|
|
735
853
|
summary: result.summary
|
|
@@ -822,15 +940,13 @@ export async function runMartin(input) {
|
|
|
822
940
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
823
941
|
kind: "run.exited",
|
|
824
942
|
runId: loop.loopId,
|
|
825
|
-
payload:
|
|
826
|
-
lifecycleState: patchExitDecision.lifecycleState,
|
|
827
|
-
status: patchExitDecision.status,
|
|
828
|
-
reason: patchExitDecision.reason
|
|
829
|
-
}
|
|
943
|
+
payload: createRunExitPayload(patchExitDecision)
|
|
830
944
|
}));
|
|
831
945
|
}
|
|
946
|
+
const finalizedLoop = finalizeLoop(loop, patchExitDecision, now(), idFactory);
|
|
947
|
+
await persistLoopRecordIfSupported(input.store, finalizedLoop);
|
|
832
948
|
return {
|
|
833
|
-
loop:
|
|
949
|
+
loop: finalizedLoop,
|
|
834
950
|
decision: patchExitDecision
|
|
835
951
|
};
|
|
836
952
|
}
|
|
@@ -870,15 +986,13 @@ export async function runMartin(input) {
|
|
|
870
986
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
871
987
|
kind: "run.exited",
|
|
872
988
|
runId: loop.loopId,
|
|
873
|
-
payload:
|
|
874
|
-
lifecycleState: decision.lifecycleState,
|
|
875
|
-
status: decision.status,
|
|
876
|
-
reason: decision.reason
|
|
877
|
-
}
|
|
989
|
+
payload: createRunExitPayload(decision)
|
|
878
990
|
}));
|
|
879
991
|
}
|
|
992
|
+
const finalizedLoop = finalizeLoop(loop, decision, now(), idFactory);
|
|
993
|
+
await persistLoopRecordIfSupported(input.store, finalizedLoop);
|
|
880
994
|
return {
|
|
881
|
-
loop:
|
|
995
|
+
loop: finalizedLoop,
|
|
882
996
|
decision
|
|
883
997
|
};
|
|
884
998
|
}
|
|
@@ -893,23 +1007,64 @@ export async function runMartin(input) {
|
|
|
893
1007
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
894
1008
|
kind: "run.exited",
|
|
895
1009
|
runId: loop.loopId,
|
|
896
|
-
payload:
|
|
897
|
-
lifecycleState: decision.lifecycleState,
|
|
898
|
-
status: decision.status,
|
|
899
|
-
reason: decision.reason
|
|
900
|
-
}
|
|
1010
|
+
payload: createRunExitPayload(decision)
|
|
901
1011
|
}));
|
|
902
1012
|
}
|
|
1013
|
+
const finalizedLoop = finalizeLoop(loop, decision, now(), idFactory);
|
|
1014
|
+
await persistLoopRecordIfSupported(input.store, finalizedLoop);
|
|
903
1015
|
return {
|
|
904
|
-
loop:
|
|
1016
|
+
loop: finalizedLoop,
|
|
905
1017
|
decision
|
|
906
1018
|
};
|
|
907
1019
|
}
|
|
1020
|
+
function createRunExitPayload(decision) {
|
|
1021
|
+
return {
|
|
1022
|
+
lifecycleState: decision.lifecycleState,
|
|
1023
|
+
status: decision.status,
|
|
1024
|
+
reason: decision.reason,
|
|
1025
|
+
...(decision.failureClass ? { failureClass: decision.failureClass } : {}),
|
|
1026
|
+
...(decision.safetySurface ? { safetySurface: decision.safetySurface } : {}),
|
|
1027
|
+
...(decision.reasonCode ? { reasonCode: decision.reasonCode } : {})
|
|
1028
|
+
};
|
|
1029
|
+
}
|
|
1030
|
+
function classifySafetyLeashExit(decision, safetySurface = decision.surface) {
|
|
1031
|
+
return {
|
|
1032
|
+
failureClass: "safety_leash_blocked",
|
|
1033
|
+
safetySurface,
|
|
1034
|
+
reasonCode: safetyLeashReasonCode(decision, safetySurface)
|
|
1035
|
+
};
|
|
1036
|
+
}
|
|
1037
|
+
function safetyLeashReasonCode(decision, safetySurface) {
|
|
1038
|
+
const kind = decision.violations[0]?.kind;
|
|
1039
|
+
switch (kind) {
|
|
1040
|
+
case "command_blocked":
|
|
1041
|
+
return safetySurface === "verifier" ? "destructive_verifier_command" : "command_blocked";
|
|
1042
|
+
case "network_blocked":
|
|
1043
|
+
return safetySurface === "verifier" ? "verifier_network_blocked" : "network_access_blocked";
|
|
1044
|
+
case "secret_value":
|
|
1045
|
+
return "secret_context_value";
|
|
1046
|
+
case "path_denied":
|
|
1047
|
+
case "protected_path":
|
|
1048
|
+
return "protected_surface_write";
|
|
1049
|
+
case "path_not_allowed":
|
|
1050
|
+
return "surface_write_not_allowed";
|
|
1051
|
+
case "path_outside_repo":
|
|
1052
|
+
return "outside_repo_write";
|
|
1053
|
+
case "dependency_approval_required":
|
|
1054
|
+
return "dependency_approval_required";
|
|
1055
|
+
case "migration_approval_required":
|
|
1056
|
+
return "migration_approval_required";
|
|
1057
|
+
case "config_change_approval_required":
|
|
1058
|
+
return "config_change_approval_required";
|
|
1059
|
+
default:
|
|
1060
|
+
return `${safetySurface}_safety_block`;
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
908
1063
|
function finalizeLoop(loop, decision, timestamp, idFactory) {
|
|
909
1064
|
const finalized = appendLoopEvent(loop, {
|
|
910
1065
|
type: "run.completed",
|
|
911
1066
|
lifecycleState: decision.lifecycleState,
|
|
912
|
-
payload:
|
|
1067
|
+
payload: createRunExitPayload(decision)
|
|
913
1068
|
}, { now: timestamp, idFactory });
|
|
914
1069
|
return {
|
|
915
1070
|
...finalized,
|
|
@@ -918,6 +1073,9 @@ function finalizeLoop(loop, decision, timestamp, idFactory) {
|
|
|
918
1073
|
updatedAt: timestamp
|
|
919
1074
|
};
|
|
920
1075
|
}
|
|
1076
|
+
async function persistLoopRecordIfSupported(store, loop) {
|
|
1077
|
+
await store?.writeLoopRecord?.(loop.loopId, loop);
|
|
1078
|
+
}
|
|
921
1079
|
function getAdapterTransport(adapter) {
|
|
922
1080
|
return adapter.metadata.transport ?? (adapter.kind === "agent-cli" ? "cli" : "http");
|
|
923
1081
|
}
|
|
@@ -934,7 +1092,7 @@ function getUsageProvenance(usage) {
|
|
|
934
1092
|
return "actual";
|
|
935
1093
|
}
|
|
936
1094
|
function resolveChangedFiles(result, repoRoot) {
|
|
937
|
-
if (result.execution?.changedFiles
|
|
1095
|
+
if (result.execution?.changedFiles !== undefined) {
|
|
938
1096
|
return result.execution.changedFiles;
|
|
939
1097
|
}
|
|
940
1098
|
if (!repoRoot) {
|
|
@@ -2,5 +2,7 @@ export { makeLedgerEvent } from "./ledger.js";
|
|
|
2
2
|
export type { LedgerEvent, LedgerEventDraft, LedgerEventKind } from "./ledger.js";
|
|
3
3
|
export { artifactDir, createFileRunStore, resolveRunsRoot, runDir } from "./store.js";
|
|
4
4
|
export type { AttemptArtifacts, RunContract, RunStore } from "./store.js";
|
|
5
|
+
export { readAllLoopRecords, readLatestLoopRecord, readLatestLoopRecordFromFile, readLoopRecordsFromFile } from "./runs-reader.js";
|
|
6
|
+
export type { LoopAttemptRecord, LoopRunRecord } from "./runs-reader.js";
|
|
5
7
|
export { compileAndPersistContext } from "./compiler.js";
|
|
6
8
|
export type { CompileResult } from "./compiler.js";
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export { makeLedgerEvent } from "./ledger.js";
|
|
2
2
|
export { artifactDir, createFileRunStore, resolveRunsRoot, runDir } from "./store.js";
|
|
3
|
+
export { readAllLoopRecords, readLatestLoopRecord, readLatestLoopRecordFromFile, readLoopRecordsFromFile } from "./runs-reader.js";
|
|
3
4
|
export { compileAndPersistContext } from "./compiler.js";
|
|
4
5
|
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reads completed loop records from ~/.martin/runs/ for analysis.
|
|
3
|
+
* Used by the Trust Calibration Engine and other offline analytics.
|
|
4
|
+
*
|
|
5
|
+
* Supports both storage layouts:
|
|
6
|
+
* - legacy root JSONL files: <runsRoot>/*.jsonl
|
|
7
|
+
* - canonical run trees: <runsRoot>/<loopId>/loop-record.json
|
|
8
|
+
*/
|
|
9
|
+
export interface LoopAttemptRecord {
|
|
10
|
+
index: number;
|
|
11
|
+
model?: string;
|
|
12
|
+
adapterId?: string;
|
|
13
|
+
failureClass?: string;
|
|
14
|
+
intervention?: string;
|
|
15
|
+
startedAt?: string;
|
|
16
|
+
completedAt?: string;
|
|
17
|
+
}
|
|
18
|
+
export interface LoopRunRecord {
|
|
19
|
+
loopId: string;
|
|
20
|
+
status: string;
|
|
21
|
+
lifecycleState: string;
|
|
22
|
+
createdAt: string;
|
|
23
|
+
updatedAt: string;
|
|
24
|
+
budget: {
|
|
25
|
+
maxUsd: number;
|
|
26
|
+
softLimitUsd: number;
|
|
27
|
+
maxIterations: number;
|
|
28
|
+
maxTokens: number;
|
|
29
|
+
};
|
|
30
|
+
cost: {
|
|
31
|
+
actualUsd: number;
|
|
32
|
+
tokensIn: number;
|
|
33
|
+
tokensOut: number;
|
|
34
|
+
avoidedUsd?: number;
|
|
35
|
+
};
|
|
36
|
+
attempts: LoopAttemptRecord[];
|
|
37
|
+
task: {
|
|
38
|
+
title: string;
|
|
39
|
+
objective: string;
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
export declare function readLoopRecordsFromFile(file: string): Promise<LoopRunRecord[]>;
|
|
43
|
+
export declare function readLatestLoopRecordFromFile(file: string): Promise<LoopRunRecord | null>;
|
|
44
|
+
/**
|
|
45
|
+
* Reads all loop records from the given directory (default: ~/.martin/runs/).
|
|
46
|
+
* Returns an empty array if the directory doesn't exist or has no records.
|
|
47
|
+
*/
|
|
48
|
+
export declare function readAllLoopRecords(runsDir?: string): Promise<LoopRunRecord[]>;
|
|
49
|
+
/**
|
|
50
|
+
* Returns the most recently updated loop record, or null if none exist.
|
|
51
|
+
*/
|
|
52
|
+
export declare function readLatestLoopRecord(runsDir?: string): Promise<LoopRunRecord | null>;
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reads completed loop records from ~/.martin/runs/ for analysis.
|
|
3
|
+
* Used by the Trust Calibration Engine and other offline analytics.
|
|
4
|
+
*
|
|
5
|
+
* Supports both storage layouts:
|
|
6
|
+
* - legacy root JSONL files: <runsRoot>/*.jsonl
|
|
7
|
+
* - canonical run trees: <runsRoot>/<loopId>/loop-record.json
|
|
8
|
+
*/
|
|
9
|
+
import { readFile, readdir } from "node:fs/promises";
|
|
10
|
+
import { homedir } from "node:os";
|
|
11
|
+
import { extname, join } from "node:path";
|
|
12
|
+
export async function readLoopRecordsFromFile(file) {
|
|
13
|
+
const text = await readFile(file, "utf8");
|
|
14
|
+
const extension = extname(file).toLowerCase();
|
|
15
|
+
if (extension === ".jsonl") {
|
|
16
|
+
return text
|
|
17
|
+
.split(/\r?\n/u)
|
|
18
|
+
.map((line) => line.trim())
|
|
19
|
+
.filter(Boolean)
|
|
20
|
+
.map((line) => JSON.parse(line));
|
|
21
|
+
}
|
|
22
|
+
const parsed = JSON.parse(text);
|
|
23
|
+
return Array.isArray(parsed) ? parsed : [parsed];
|
|
24
|
+
}
|
|
25
|
+
export async function readLatestLoopRecordFromFile(file) {
|
|
26
|
+
const records = await readLoopRecordsFromFile(file);
|
|
27
|
+
if (records.length === 0)
|
|
28
|
+
return null;
|
|
29
|
+
return records.reduce((latest, record) => {
|
|
30
|
+
const currentTimestamp = new Date(record.updatedAt ?? record.createdAt).getTime();
|
|
31
|
+
const latestTimestamp = new Date(latest.updatedAt ?? latest.createdAt).getTime();
|
|
32
|
+
return currentTimestamp > latestTimestamp ? record : latest;
|
|
33
|
+
}, records[0]);
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Reads all loop records from the given directory (default: ~/.martin/runs/).
|
|
37
|
+
* Returns an empty array if the directory doesn't exist or has no records.
|
|
38
|
+
*/
|
|
39
|
+
export async function readAllLoopRecords(runsDir) {
|
|
40
|
+
const dir = runsDir ?? join(homedir(), ".martin", "runs");
|
|
41
|
+
let entries;
|
|
42
|
+
try {
|
|
43
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
return [];
|
|
47
|
+
}
|
|
48
|
+
const records = [];
|
|
49
|
+
const jsonlFiles = entries
|
|
50
|
+
.filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl"))
|
|
51
|
+
.map((entry) => entry.name);
|
|
52
|
+
for (const file of jsonlFiles) {
|
|
53
|
+
try {
|
|
54
|
+
records.push(...(await readLoopRecordsFromFile(join(dir, file))));
|
|
55
|
+
}
|
|
56
|
+
catch {
|
|
57
|
+
// skip malformed files or lines
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
const runDirectories = entries.filter((entry) => entry.isDirectory());
|
|
61
|
+
for (const entry of runDirectories) {
|
|
62
|
+
try {
|
|
63
|
+
records.push(...(await readLoopRecordsFromFile(join(dir, entry.name, "loop-record.json"))));
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
// skip missing or malformed canonical records
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return records;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Returns the most recently updated loop record, or null if none exist.
|
|
73
|
+
*/
|
|
74
|
+
export async function readLatestLoopRecord(runsDir) {
|
|
75
|
+
const records = await readAllLoopRecords(runsDir);
|
|
76
|
+
if (records.length === 0)
|
|
77
|
+
return null;
|
|
78
|
+
return records.reduce((latest, r) => {
|
|
79
|
+
const a = new Date(r.updatedAt ?? r.createdAt).getTime();
|
|
80
|
+
const b = new Date(latest.updatedAt ?? latest.createdAt).getTime();
|
|
81
|
+
return a > b ? r : latest;
|
|
82
|
+
}, records[0]);
|
|
83
|
+
}
|
|
84
|
+
//# sourceMappingURL=runs-reader.js.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { LoopBudget, LoopTask, MachineState } from "../../contracts/index.js";
|
|
1
|
+
import type { LoopBudget, LoopRecord, LoopTask, MachineState } from "../../contracts/index.js";
|
|
2
2
|
import { type LedgerEvent } from "./ledger.js";
|
|
3
3
|
export interface RunContract {
|
|
4
4
|
runId: string;
|
|
@@ -53,6 +53,11 @@ export interface RunStore {
|
|
|
53
53
|
* Write artifacts for a completed attempt to artifacts/attempt-<n>/.
|
|
54
54
|
*/
|
|
55
55
|
writeAttemptArtifacts(runId: string, attemptIndex: number, artifacts: AttemptArtifacts): Promise<void>;
|
|
56
|
+
/**
|
|
57
|
+
* Persist the latest canonical loop record snapshot when the caller has one.
|
|
58
|
+
* Optional to avoid breaking custom RunStore implementations.
|
|
59
|
+
*/
|
|
60
|
+
writeLoopRecord?(runId: string, loop: LoopRecord): Promise<void>;
|
|
56
61
|
}
|
|
57
62
|
export declare function resolveRunsRoot(env?: NodeJS.ProcessEnv): string;
|
|
58
63
|
export declare function runDir(runsRoot: string, runId: string): string;
|
|
@@ -74,6 +74,11 @@ export function createFileRunStore(options = {}) {
|
|
|
74
74
|
if (artifacts.rollbackOutcome !== undefined) {
|
|
75
75
|
await writeJsonFile(join(dir, "rollback-outcome.json"), artifacts.rollbackOutcome);
|
|
76
76
|
}
|
|
77
|
+
},
|
|
78
|
+
async writeLoopRecord(runId, loop) {
|
|
79
|
+
const dir = runDir(runsRoot, runId);
|
|
80
|
+
await mkdir(dir, { recursive: true });
|
|
81
|
+
await writeJsonFile(join(dir, "loop-record.json"), loop);
|
|
77
82
|
}
|
|
78
83
|
};
|
|
79
84
|
}
|
|
@@ -18,6 +18,12 @@ export interface ExitDecision {
|
|
|
18
18
|
lifecycleState: LoopLifecycleState;
|
|
19
19
|
status: LoopStatus;
|
|
20
20
|
reason: string;
|
|
21
|
+
/** Machine-readable stop classifier for non-attempt exits such as preflight safety blocks. */
|
|
22
|
+
failureClass?: FailureClass;
|
|
23
|
+
/** Machine-readable safety surface, when the stop came from a safety leash. */
|
|
24
|
+
safetySurface?: string;
|
|
25
|
+
/** Stable reason code for dashboards, MCP, and downstream automation. */
|
|
26
|
+
reasonCode?: string;
|
|
21
27
|
}
|
|
22
28
|
export interface MartinAdapterResultLike {
|
|
23
29
|
status: "completed" | "failed";
|