martin-loop 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -16
- package/demo/seeded-workspace/README.md +35 -0
- package/demo/seeded-workspace/TASKS.md +29 -0
- package/demo/seeded-workspace/martin.config.yaml +11 -0
- package/demo/seeded-workspace/package.json +8 -0
- package/demo/seeded-workspace/src/invoice-summary.js +11 -0
- package/demo/seeded-workspace/test/invoice-summary.test.js +20 -0
- package/dist/vendor/adapters/claude-cli.d.ts +19 -4
- package/dist/vendor/adapters/claude-cli.js +55 -24
- package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
- package/dist/vendor/adapters/cli-bridge.js +154 -28
- package/dist/vendor/adapters/index.d.ts +1 -0
- package/dist/vendor/adapters/index.js +1 -0
- package/dist/vendor/adapters/verifier-only.d.ts +7 -0
- package/dist/vendor/adapters/verifier-only.js +57 -0
- package/dist/vendor/cli/index.d.ts +6 -1
- package/dist/vendor/cli/index.js +124 -7
- package/dist/vendor/contracts/index.d.ts +3 -1
- package/dist/vendor/core/compiler.d.ts +2 -0
- package/dist/vendor/core/compiler.js +10 -4
- package/dist/vendor/core/context-integrity.d.ts +26 -0
- package/dist/vendor/core/context-integrity.js +56 -0
- package/dist/vendor/core/index.d.ts +5 -2
- package/dist/vendor/core/index.js +186 -54
- package/dist/vendor/core/policy.d.ts +6 -0
- package/docs/distribution/DIRECTORY-SUBMISSIONS.md +89 -0
- package/docs/distribution/INTEGRATION-OUTREACH.md +61 -0
- package/docs/distribution/UNDER-3-CHALLENGE.md +65 -0
- package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -0
- package/docs/oss/EXAMPLES.md +9 -1
- package/docs/oss/OSS-BOUNDARY-REPORT.json +3 -7
- package/docs/oss/OSS-BOUNDARY-REPORT.md +2 -2
- package/docs/oss/QUICKSTART.md +33 -3
- package/docs/oss/RALPH-LOOP-SAFETY.md +113 -0
- package/docs/oss/README.md +6 -3
- package/docs/oss/RELEASE-SURFACE-REPORT.json +1 -1
- package/docs/oss/RELEASE-SURFACE-REPORT.md +1 -1
- package/package.json +8 -2
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
const POISON_PATTERNS = [
|
|
4
|
+
/ignore\s+(?:all\s+)?previous\s+instructions/i,
|
|
5
|
+
/you\s+are\s+now\s+a\s+(?!Martin\s+Loop)/i,
|
|
6
|
+
/new\s+rule:/i,
|
|
7
|
+
/disregard\s+(?:safety|policy|guardrails)/i,
|
|
8
|
+
/override\s+system\s+authority/i,
|
|
9
|
+
/hidden\s+instruction:/i,
|
|
10
|
+
/\[system_override\]/i,
|
|
11
|
+
/\[authority_inversion\]/i
|
|
12
|
+
];
|
|
13
|
+
/**
|
|
14
|
+
* T05: Context Poisoning Pre-gate.
|
|
15
|
+
* Scans untrusted input channels for authority inversion or instruction re-injection.
|
|
16
|
+
* Runs BEFORE admission control and core reasoning.
|
|
17
|
+
*/
|
|
18
|
+
export async function runContextIntegrityPrecheck(runId, attemptIndex, artifactsDir, inputs) {
|
|
19
|
+
const signals = [];
|
|
20
|
+
const analyzedChannels = {
|
|
21
|
+
system: true,
|
|
22
|
+
user: Boolean(inputs.userPrompt),
|
|
23
|
+
tools: Boolean(inputs.toolOutput),
|
|
24
|
+
history: Boolean(inputs.history)
|
|
25
|
+
};
|
|
26
|
+
const untrustedBuffer = [inputs.userPrompt, inputs.toolOutput, inputs.retrievedContext]
|
|
27
|
+
.filter(Boolean)
|
|
28
|
+
.join("\n---\n");
|
|
29
|
+
for (const pattern of POISON_PATTERNS) {
|
|
30
|
+
if (pattern.test(untrustedBuffer)) {
|
|
31
|
+
signals.push(`Detected poison pattern: ${pattern.toString()}`);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
if (/\b(?:I am|You are)\s+(?!Martin\s+Loop|an\s+AI)\b/i.test(untrustedBuffer)) {
|
|
35
|
+
signals.push("Identity redefinition attempt detected.");
|
|
36
|
+
}
|
|
37
|
+
const verdict = signals.length > 0 ? "context_poisoning_block" : "clean";
|
|
38
|
+
const precheck = {
|
|
39
|
+
runId,
|
|
40
|
+
attemptIndex,
|
|
41
|
+
verdict,
|
|
42
|
+
reason: signals.length > 0 ? `Detected ${signals.length} poisoning signal(s).` : undefined,
|
|
43
|
+
detectedSignals: signals,
|
|
44
|
+
analyzedChannels,
|
|
45
|
+
timestamp: new Date().toISOString()
|
|
46
|
+
};
|
|
47
|
+
try {
|
|
48
|
+
await mkdir(artifactsDir, { recursive: true });
|
|
49
|
+
await writeFile(join(artifactsDir, "context-integrity-precheck.json"), JSON.stringify(precheck, null, 2), "utf8");
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
// non-fatal — artifact persistence is best-effort
|
|
53
|
+
}
|
|
54
|
+
return precheck;
|
|
55
|
+
}
|
|
56
|
+
//# sourceMappingURL=context-integrity.js.map
|
|
@@ -1,14 +1,16 @@
|
|
|
1
|
-
import { type ApprovalPolicy, type CostProvenance, type ExecutionProfile, type FailureClass, type InterventionType, type LoopArtifact, type LoopAttempt, type LoopBudget, type LoopRecord, type LoopTask } from "../contracts/index.js";
|
|
1
|
+
import { type ApprovalPolicy, type CostProvenance, type ExecutionProfile, type FailureClass, type InterventionType, type LoopArtifact, type LoopAttempt, type LoopBudget, type MutationMode, type LoopRecord, type LoopTask } from "../contracts/index.js";
|
|
2
2
|
import { classifyFailure, computeEvidenceVector, evaluatePatchDecision, evaluateCostGovernor, evaluateBudgetPreflight, inferExit, nextPolicyPhase, policyPhaseToLifecycleState, scorePatchDecision, selectRecoveryRecipe, type ExitDecision } from "./policy.js";
|
|
3
3
|
import { evaluateChangeApprovalLeash, evaluateFilesystemLeash, evaluateSecretLeash, redactSecretsFromText, resolveExecutionProfile, evaluateVerificationLeash } from "./leash.js";
|
|
4
4
|
import { buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations } from "./grounding.js";
|
|
5
5
|
import { captureRollbackBoundary, restoreRollbackBoundary } from "./rollback.js";
|
|
6
6
|
import { type RunStore } from "./persistence/index.js";
|
|
7
|
-
export type { ApprovalPolicy, BudgetPreflightEstimate, BudgetSettlement, CostProvenance, EvidenceVector, ExecutionProfile, FailureClass, InterventionType, PatchDecision, PatchDecisionArtifact, PatchDecisionReasonCode, PatchScore, RollbackBoundaryArtifact, RollbackBoundaryStrategy, RollbackFileSnapshot, RollbackOutcomeArtifact, RollbackOutcomeStatus, PolicyPhase } from "../contracts/index.js";
|
|
7
|
+
export type { ApprovalPolicy, BudgetPreflightEstimate, BudgetSettlement, CostProvenance, EvidenceVector, ExecutionProfile, FailureClass, InterventionType, PatchDecision, PatchDecisionArtifact, PatchDecisionReasonCode, PatchScore, MutationMode, RollbackBoundaryArtifact, RollbackBoundaryStrategy, RollbackFileSnapshot, RollbackOutcomeArtifact, RollbackOutcomeStatus, PolicyPhase } from "../contracts/index.js";
|
|
8
8
|
export { classifyFailure, computeEvidenceVector, evaluatePatchDecision, evaluateCostGovernor, evaluateBudgetPreflight, inferExit, nextPolicyPhase, policyPhaseToLifecycleState, scorePatchDecision, selectRecoveryRecipe, evaluateVerificationLeash, evaluateFilesystemLeash, evaluateChangeApprovalLeash, evaluateSecretLeash, resolveExecutionProfile, redactSecretsFromText, buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations, captureRollbackBoundary, restoreRollbackBoundary };
|
|
9
9
|
export type { BudgetPreflightDecision, BudgetPreflightInput, CostGovernorState, EvidenceVectorInput, EvaluatedPatchDecision, ExitDecision, FailureAssessment, PatchDecisionInput, RecoveryDecision, RecoveryRecipe } from "./policy.js";
|
|
10
10
|
export type { ResolvedExecutionProfile, SafetyLeashDecision, SafetyViolation } from "./leash.js";
|
|
11
11
|
export type { GroundingScanResult, GroundingViolation, GroundingViolationKind, RepoGroundingHit, RepoGroundingIndex } from "./grounding.js";
|
|
12
|
+
export { runContextIntegrityPrecheck } from "./context-integrity.js";
|
|
13
|
+
export type { ContextIntegrityPrecheck, ContextIntegrityVerdict } from "./context-integrity.js";
|
|
12
14
|
export { compilePromptPacket } from "./compiler.js";
|
|
13
15
|
export type { PromptPacket, CompilerAdapterRequest } from "./compiler.js";
|
|
14
16
|
export { createFileRunStore, makeLedgerEvent, resolveRunsRoot } from "./persistence/index.js";
|
|
@@ -23,6 +25,7 @@ export interface MartinAdapterRequest {
|
|
|
23
25
|
objective: string;
|
|
24
26
|
verificationPlan: string[];
|
|
25
27
|
verificationStack?: LoopTask["verificationStack"];
|
|
28
|
+
mutationMode?: MutationMode;
|
|
26
29
|
/** Absolute path to the repository root. */
|
|
27
30
|
repoRoot?: string;
|
|
28
31
|
/** Glob patterns for files the agent may modify. Empty = no restriction. */
|
|
@@ -5,8 +5,11 @@ import { evaluateChangeApprovalLeash, evaluateFilesystemLeash, evaluateSecretLea
|
|
|
5
5
|
import { buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations } from "./grounding.js";
|
|
6
6
|
import { captureRollbackBoundary, restoreRollbackBoundary } from "./rollback.js";
|
|
7
7
|
import { compilePromptPacket } from "./compiler.js";
|
|
8
|
-
import { makeLedgerEvent } from "./persistence/index.js";
|
|
8
|
+
import { makeLedgerEvent, resolveRunsRoot, runDir } from "./persistence/index.js";
|
|
9
|
+
import { runContextIntegrityPrecheck } from "./context-integrity.js";
|
|
9
10
|
export { classifyFailure, computeEvidenceVector, evaluatePatchDecision, evaluateCostGovernor, evaluateBudgetPreflight, inferExit, nextPolicyPhase, policyPhaseToLifecycleState, scorePatchDecision, selectRecoveryRecipe, evaluateVerificationLeash, evaluateFilesystemLeash, evaluateChangeApprovalLeash, evaluateSecretLeash, resolveExecutionProfile, redactSecretsFromText, buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations, captureRollbackBoundary, restoreRollbackBoundary };
|
|
11
|
+
// ─── Context Integrity Pre-gate ──────────────────────────────────────────────
|
|
12
|
+
export { runContextIntegrityPrecheck } from "./context-integrity.js";
|
|
10
13
|
// ─── Prompt packet compiler ──────────────────────────────────────────────────
|
|
11
14
|
export { compilePromptPacket } from "./compiler.js";
|
|
12
15
|
// ─── Persistence (RunStore, LedgerEvent, FileRunStore) ──────────────────────
|
|
@@ -136,6 +139,7 @@ export async function runMartin(input) {
|
|
|
136
139
|
let currentAdapterIndex = 0;
|
|
137
140
|
let currentAdapter = adapterChain[currentAdapterIndex] ?? input.adapter;
|
|
138
141
|
let useCompressedContext = false;
|
|
142
|
+
const isVerifyOnly = input.task.mutationMode === "verify_only";
|
|
139
143
|
const executionProfile = resolveExecutionProfile({
|
|
140
144
|
executionProfile: input.task.executionProfile,
|
|
141
145
|
allowedNetworkDomains: input.task.allowedNetworkDomains
|
|
@@ -153,7 +157,8 @@ export async function runMartin(input) {
|
|
|
153
157
|
shouldExit: true,
|
|
154
158
|
lifecycleState: "human_escalation",
|
|
155
159
|
status: "exited",
|
|
156
|
-
reason
|
|
160
|
+
reason,
|
|
161
|
+
...classifySafetyLeashExit(leashDecision, "verifier")
|
|
157
162
|
};
|
|
158
163
|
if (input.store) {
|
|
159
164
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
@@ -169,11 +174,7 @@ export async function runMartin(input) {
|
|
|
169
174
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
170
175
|
kind: "run.exited",
|
|
171
176
|
runId: loop.loopId,
|
|
172
|
-
payload:
|
|
173
|
-
lifecycleState: leashExitDecision.lifecycleState,
|
|
174
|
-
status: leashExitDecision.status,
|
|
175
|
-
reason: leashExitDecision.reason
|
|
176
|
-
}
|
|
177
|
+
payload: createRunExitPayload(leashExitDecision)
|
|
177
178
|
}));
|
|
178
179
|
}
|
|
179
180
|
return {
|
|
@@ -193,7 +194,8 @@ export async function runMartin(input) {
|
|
|
193
194
|
shouldExit: true,
|
|
194
195
|
lifecycleState: "human_escalation",
|
|
195
196
|
status: "exited",
|
|
196
|
-
reason: secretDecision.reason ?? "Safety leash blocked secret-like values in the runtime context."
|
|
197
|
+
reason: secretDecision.reason ?? "Safety leash blocked secret-like values in the runtime context.",
|
|
198
|
+
...classifySafetyLeashExit(secretDecision, "secret")
|
|
197
199
|
};
|
|
198
200
|
if (input.store) {
|
|
199
201
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
@@ -208,11 +210,7 @@ export async function runMartin(input) {
|
|
|
208
210
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
209
211
|
kind: "run.exited",
|
|
210
212
|
runId: loop.loopId,
|
|
211
|
-
payload:
|
|
212
|
-
lifecycleState: secretExitDecision.lifecycleState,
|
|
213
|
-
status: secretExitDecision.status,
|
|
214
|
-
reason: secretExitDecision.reason
|
|
215
|
-
}
|
|
213
|
+
payload: createRunExitPayload(secretExitDecision)
|
|
216
214
|
}));
|
|
217
215
|
}
|
|
218
216
|
return {
|
|
@@ -254,11 +252,7 @@ export async function runMartin(input) {
|
|
|
254
252
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
255
253
|
kind: "run.exited",
|
|
256
254
|
runId: loop.loopId,
|
|
257
|
-
payload:
|
|
258
|
-
lifecycleState: preflightExitDecision.lifecycleState,
|
|
259
|
-
status: preflightExitDecision.status,
|
|
260
|
-
reason: preflightExitDecision.reason
|
|
261
|
-
}
|
|
255
|
+
payload: createRunExitPayload(preflightExitDecision)
|
|
262
256
|
}));
|
|
263
257
|
}
|
|
264
258
|
return {
|
|
@@ -268,6 +262,38 @@ export async function runMartin(input) {
|
|
|
268
262
|
}
|
|
269
263
|
// GATHER → ADMIT: run admission control before executing
|
|
270
264
|
currentPhase = "ADMIT";
|
|
265
|
+
// T05: Context Integrity Pre-gate — blocks authority inversion / injection before reasoning
|
|
266
|
+
const contextPrecheck = await runContextIntegrityPrecheck(loop.loopId, loop.attempts.length + 1, runDir(resolveRunsRoot(), loop.loopId), {
|
|
267
|
+
userPrompt: distilled.focus,
|
|
268
|
+
history: loop.attempts.map(a => a.summary).join("\n")
|
|
269
|
+
});
|
|
270
|
+
if (contextPrecheck.verdict === "context_poisoning_block") {
|
|
271
|
+
currentPhase = "ABORT";
|
|
272
|
+
const poisoningExitDecision = {
|
|
273
|
+
shouldExit: true,
|
|
274
|
+
lifecycleState: "human_escalation",
|
|
275
|
+
status: "exited",
|
|
276
|
+
reason: "Context Integrity Pre-gate: context poisoning attempt detected.",
|
|
277
|
+
failureClass: "safety_leash_blocked",
|
|
278
|
+
safetySurface: "context_integrity",
|
|
279
|
+
reasonCode: "context_poisoning_blocked"
|
|
280
|
+
};
|
|
281
|
+
if (input.store) {
|
|
282
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
283
|
+
kind: "safety.violations_found",
|
|
284
|
+
runId: loop.loopId,
|
|
285
|
+
payload: {
|
|
286
|
+
verdict: contextPrecheck.verdict,
|
|
287
|
+
signals: contextPrecheck.detectedSignals,
|
|
288
|
+
source: "context_integrity_pregate"
|
|
289
|
+
}
|
|
290
|
+
}));
|
|
291
|
+
}
|
|
292
|
+
return {
|
|
293
|
+
loop: finalizeLoop(loop, poisoningExitDecision, now(), idFactory),
|
|
294
|
+
decision: poisoningExitDecision
|
|
295
|
+
};
|
|
296
|
+
}
|
|
271
297
|
const admissionDecision = evaluateAttemptPolicy({
|
|
272
298
|
request: {
|
|
273
299
|
loopId: loop.loopId,
|
|
@@ -315,11 +341,7 @@ export async function runMartin(input) {
|
|
|
315
341
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
316
342
|
kind: "run.exited",
|
|
317
343
|
runId: loop.loopId,
|
|
318
|
-
payload:
|
|
319
|
-
lifecycleState: exitDecision.lifecycleState,
|
|
320
|
-
status: exitDecision.status,
|
|
321
|
-
reason: exitDecision.reason
|
|
322
|
-
}
|
|
344
|
+
payload: createRunExitPayload(exitDecision)
|
|
323
345
|
}));
|
|
324
346
|
}
|
|
325
347
|
return {
|
|
@@ -361,6 +383,7 @@ export async function runMartin(input) {
|
|
|
361
383
|
objective: loop.task.objective,
|
|
362
384
|
verificationPlan: loop.task.verificationPlan,
|
|
363
385
|
...(loop.task.verificationStack ? { verificationStack: loop.task.verificationStack } : {}),
|
|
386
|
+
...(loop.task.mutationMode ? { mutationMode: loop.task.mutationMode } : {}),
|
|
364
387
|
...(loop.task.repoRoot ? { repoRoot: loop.task.repoRoot } : {}),
|
|
365
388
|
...(loop.task.allowedPaths ? { allowedPaths: loop.task.allowedPaths } : {}),
|
|
366
389
|
...(loop.task.deniedPaths ? { deniedPaths: loop.task.deniedPaths } : {}),
|
|
@@ -539,6 +562,90 @@ export async function runMartin(input) {
|
|
|
539
562
|
// returned a non-empty list. A repoRoot alone is insufficient — git may fail (e.g. not
|
|
540
563
|
// a git repo) and silently return [], which would falsely trigger no_code_change.
|
|
541
564
|
const changedFileEvidenceAvailable = result.execution?.changedFiles !== undefined || changedFiles.length > 0;
|
|
565
|
+
if (isVerifyOnly && changedFiles.length > 0) {
|
|
566
|
+
const patchDecision = evaluatePatchDecision({
|
|
567
|
+
verificationPassed: result.verification.passed,
|
|
568
|
+
previousVerifierScore,
|
|
569
|
+
verifierScore: result.verification.passed ? 1 : 0,
|
|
570
|
+
scopeViolationCount: changedFiles.length,
|
|
571
|
+
changedFileCount: changedFiles.length,
|
|
572
|
+
diffNovelty: 1,
|
|
573
|
+
diffStats: result.execution?.diffStats,
|
|
574
|
+
costUsd: getUsageUsd(result.usage),
|
|
575
|
+
summary: result.summary
|
|
576
|
+
});
|
|
577
|
+
const verifyOnlyExitDecision = {
|
|
578
|
+
shouldExit: true,
|
|
579
|
+
lifecycleState: "human_escalation",
|
|
580
|
+
status: "exited",
|
|
581
|
+
reason: "Verify-only mode forbids file changes.",
|
|
582
|
+
failureClass: "safety_leash_blocked",
|
|
583
|
+
safetySurface: "filesystem",
|
|
584
|
+
reasonCode: "verify_only_write_attempt"
|
|
585
|
+
};
|
|
586
|
+
const rollbackOutcome = await restoreRollbackBoundary({
|
|
587
|
+
repoRoot: request.context.repoRoot,
|
|
588
|
+
boundary: rollbackBoundary,
|
|
589
|
+
restoredAt: attemptCompletedAt,
|
|
590
|
+
decision: patchDecision.decision
|
|
591
|
+
});
|
|
592
|
+
if (input.store) {
|
|
593
|
+
const verifyOnlyViolation = {
|
|
594
|
+
kind: "path_not_allowed",
|
|
595
|
+
message: `Verify-only mode forbids changed files: ${changedFiles.join(", ")}`,
|
|
596
|
+
file: changedFiles[0]
|
|
597
|
+
};
|
|
598
|
+
await input.store.writeAttemptArtifacts(loop.loopId, currentAttemptIndex, {
|
|
599
|
+
compiledContext,
|
|
600
|
+
leash: createLeashArtifact({
|
|
601
|
+
surface: "filesystem",
|
|
602
|
+
reason: verifyOnlyExitDecision.reason,
|
|
603
|
+
violations: [verifyOnlyViolation]
|
|
604
|
+
}, currentAttemptIndex),
|
|
605
|
+
patchScore: patchDecision.score,
|
|
606
|
+
patchDecision: toPatchDecisionArtifact(patchDecision),
|
|
607
|
+
...(rollbackBoundary ? { rollbackBoundary } : {}),
|
|
608
|
+
...(rollbackOutcome ? { rollbackOutcome } : {})
|
|
609
|
+
});
|
|
610
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
611
|
+
kind: "safety.violations_found",
|
|
612
|
+
runId: loop.loopId,
|
|
613
|
+
attemptIndex: currentAttemptIndex,
|
|
614
|
+
payload: {
|
|
615
|
+
surface: "filesystem",
|
|
616
|
+
blocked: true,
|
|
617
|
+
attemptIndex: currentAttemptIndex,
|
|
618
|
+
violations: [
|
|
619
|
+
{
|
|
620
|
+
kind: "path_not_allowed",
|
|
621
|
+
message: verifyOnlyExitDecision.reason,
|
|
622
|
+
files: changedFiles
|
|
623
|
+
}
|
|
624
|
+
]
|
|
625
|
+
}
|
|
626
|
+
}));
|
|
627
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
628
|
+
kind: "attempt.discarded",
|
|
629
|
+
runId: loop.loopId,
|
|
630
|
+
attemptIndex: currentAttemptIndex,
|
|
631
|
+
payload: {
|
|
632
|
+
decision: patchDecision.decision,
|
|
633
|
+
reason: patchDecision.summary,
|
|
634
|
+
reasonCodes: patchDecision.reasonCodes,
|
|
635
|
+
score: patchDecision.score.score
|
|
636
|
+
}
|
|
637
|
+
}));
|
|
638
|
+
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
639
|
+
kind: "run.exited",
|
|
640
|
+
runId: loop.loopId,
|
|
641
|
+
payload: createRunExitPayload(verifyOnlyExitDecision)
|
|
642
|
+
}));
|
|
643
|
+
}
|
|
644
|
+
return {
|
|
645
|
+
loop: finalizeLoop(loop, verifyOnlyExitDecision, now(), idFactory),
|
|
646
|
+
decision: verifyOnlyExitDecision
|
|
647
|
+
};
|
|
648
|
+
}
|
|
542
649
|
const filesystemDecision = evaluateFilesystemLeash({
|
|
543
650
|
repoRoot: request.context.repoRoot,
|
|
544
651
|
changedFiles,
|
|
@@ -561,7 +668,8 @@ export async function runMartin(input) {
|
|
|
561
668
|
shouldExit: true,
|
|
562
669
|
lifecycleState: "human_escalation",
|
|
563
670
|
status: "exited",
|
|
564
|
-
reason: filesystemDecision.reason ?? "Safety leash blocked filesystem changes."
|
|
671
|
+
reason: filesystemDecision.reason ?? "Safety leash blocked filesystem changes.",
|
|
672
|
+
...classifySafetyLeashExit(filesystemDecision, "filesystem")
|
|
565
673
|
};
|
|
566
674
|
const rollbackOutcome = await restoreRollbackBoundary({
|
|
567
675
|
repoRoot: request.context.repoRoot,
|
|
@@ -603,11 +711,7 @@ export async function runMartin(input) {
|
|
|
603
711
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
604
712
|
kind: "run.exited",
|
|
605
713
|
runId: loop.loopId,
|
|
606
|
-
payload:
|
|
607
|
-
lifecycleState: filesystemExitDecision.lifecycleState,
|
|
608
|
-
status: filesystemExitDecision.status,
|
|
609
|
-
reason: filesystemExitDecision.reason
|
|
610
|
-
}
|
|
714
|
+
payload: createRunExitPayload(filesystemExitDecision)
|
|
611
715
|
}));
|
|
612
716
|
}
|
|
613
717
|
return {
|
|
@@ -638,7 +742,8 @@ export async function runMartin(input) {
|
|
|
638
742
|
lifecycleState: "human_escalation",
|
|
639
743
|
status: "exited",
|
|
640
744
|
reason: changeApprovalDecision.reason ??
|
|
641
|
-
"Safety leash blocked dependency or migration changes that require approval."
|
|
745
|
+
"Safety leash blocked dependency or migration changes that require approval.",
|
|
746
|
+
...classifySafetyLeashExit(changeApprovalDecision, "dependency")
|
|
642
747
|
};
|
|
643
748
|
const rollbackOutcome = await restoreRollbackBoundary({
|
|
644
749
|
repoRoot: request.context.repoRoot,
|
|
@@ -681,11 +786,7 @@ export async function runMartin(input) {
|
|
|
681
786
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
682
787
|
kind: "run.exited",
|
|
683
788
|
runId: loop.loopId,
|
|
684
|
-
payload:
|
|
685
|
-
lifecycleState: approvalExitDecision.lifecycleState,
|
|
686
|
-
status: approvalExitDecision.status,
|
|
687
|
-
reason: approvalExitDecision.reason
|
|
688
|
-
}
|
|
789
|
+
payload: createRunExitPayload(approvalExitDecision)
|
|
689
790
|
}));
|
|
690
791
|
}
|
|
691
792
|
return {
|
|
@@ -728,8 +829,8 @@ export async function runMartin(input) {
|
|
|
728
829
|
previousVerifierScore,
|
|
729
830
|
verifierScore: result.verification.passed ? 1 : 0,
|
|
730
831
|
groundingViolationCount: groundingScanResult?.violations.length ?? 0,
|
|
731
|
-
changedFileCount: changedFileEvidenceAvailable ? changedFiles.length : undefined,
|
|
732
|
-
diffNovelty: changedFileEvidenceAvailable ? (changedFiles.length > 0 ? 1 : 0) : undefined,
|
|
832
|
+
changedFileCount: !isVerifyOnly && changedFileEvidenceAvailable ? changedFiles.length : undefined,
|
|
833
|
+
diffNovelty: !isVerifyOnly && changedFileEvidenceAvailable ? (changedFiles.length > 0 ? 1 : 0) : undefined,
|
|
733
834
|
diffStats: result.execution?.diffStats,
|
|
734
835
|
costUsd: getUsageUsd(result.usage),
|
|
735
836
|
summary: result.summary
|
|
@@ -822,11 +923,7 @@ export async function runMartin(input) {
|
|
|
822
923
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
823
924
|
kind: "run.exited",
|
|
824
925
|
runId: loop.loopId,
|
|
825
|
-
payload:
|
|
826
|
-
lifecycleState: patchExitDecision.lifecycleState,
|
|
827
|
-
status: patchExitDecision.status,
|
|
828
|
-
reason: patchExitDecision.reason
|
|
829
|
-
}
|
|
926
|
+
payload: createRunExitPayload(patchExitDecision)
|
|
830
927
|
}));
|
|
831
928
|
}
|
|
832
929
|
return {
|
|
@@ -870,11 +967,7 @@ export async function runMartin(input) {
|
|
|
870
967
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
871
968
|
kind: "run.exited",
|
|
872
969
|
runId: loop.loopId,
|
|
873
|
-
payload:
|
|
874
|
-
lifecycleState: decision.lifecycleState,
|
|
875
|
-
status: decision.status,
|
|
876
|
-
reason: decision.reason
|
|
877
|
-
}
|
|
970
|
+
payload: createRunExitPayload(decision)
|
|
878
971
|
}));
|
|
879
972
|
}
|
|
880
973
|
return {
|
|
@@ -893,11 +986,7 @@ export async function runMartin(input) {
|
|
|
893
986
|
await input.store.appendLedger(loop.loopId, makeLedgerEvent({
|
|
894
987
|
kind: "run.exited",
|
|
895
988
|
runId: loop.loopId,
|
|
896
|
-
payload:
|
|
897
|
-
lifecycleState: decision.lifecycleState,
|
|
898
|
-
status: decision.status,
|
|
899
|
-
reason: decision.reason
|
|
900
|
-
}
|
|
989
|
+
payload: createRunExitPayload(decision)
|
|
901
990
|
}));
|
|
902
991
|
}
|
|
903
992
|
return {
|
|
@@ -905,11 +994,54 @@ export async function runMartin(input) {
|
|
|
905
994
|
decision
|
|
906
995
|
};
|
|
907
996
|
}
|
|
997
|
+
function createRunExitPayload(decision) {
|
|
998
|
+
return {
|
|
999
|
+
lifecycleState: decision.lifecycleState,
|
|
1000
|
+
status: decision.status,
|
|
1001
|
+
reason: decision.reason,
|
|
1002
|
+
...(decision.failureClass ? { failureClass: decision.failureClass } : {}),
|
|
1003
|
+
...(decision.safetySurface ? { safetySurface: decision.safetySurface } : {}),
|
|
1004
|
+
...(decision.reasonCode ? { reasonCode: decision.reasonCode } : {})
|
|
1005
|
+
};
|
|
1006
|
+
}
|
|
1007
|
+
function classifySafetyLeashExit(decision, safetySurface = decision.surface) {
|
|
1008
|
+
return {
|
|
1009
|
+
failureClass: "safety_leash_blocked",
|
|
1010
|
+
safetySurface,
|
|
1011
|
+
reasonCode: safetyLeashReasonCode(decision, safetySurface)
|
|
1012
|
+
};
|
|
1013
|
+
}
|
|
1014
|
+
function safetyLeashReasonCode(decision, safetySurface) {
|
|
1015
|
+
const kind = decision.violations[0]?.kind;
|
|
1016
|
+
switch (kind) {
|
|
1017
|
+
case "command_blocked":
|
|
1018
|
+
return safetySurface === "verifier" ? "destructive_verifier_command" : "command_blocked";
|
|
1019
|
+
case "network_blocked":
|
|
1020
|
+
return safetySurface === "verifier" ? "verifier_network_blocked" : "network_access_blocked";
|
|
1021
|
+
case "secret_value":
|
|
1022
|
+
return "secret_context_value";
|
|
1023
|
+
case "path_denied":
|
|
1024
|
+
case "protected_path":
|
|
1025
|
+
return "protected_surface_write";
|
|
1026
|
+
case "path_not_allowed":
|
|
1027
|
+
return "surface_write_not_allowed";
|
|
1028
|
+
case "path_outside_repo":
|
|
1029
|
+
return "outside_repo_write";
|
|
1030
|
+
case "dependency_approval_required":
|
|
1031
|
+
return "dependency_approval_required";
|
|
1032
|
+
case "migration_approval_required":
|
|
1033
|
+
return "migration_approval_required";
|
|
1034
|
+
case "config_change_approval_required":
|
|
1035
|
+
return "config_change_approval_required";
|
|
1036
|
+
default:
|
|
1037
|
+
return `${safetySurface}_safety_block`;
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
908
1040
|
function finalizeLoop(loop, decision, timestamp, idFactory) {
|
|
909
1041
|
const finalized = appendLoopEvent(loop, {
|
|
910
1042
|
type: "run.completed",
|
|
911
1043
|
lifecycleState: decision.lifecycleState,
|
|
912
|
-
payload:
|
|
1044
|
+
payload: createRunExitPayload(decision)
|
|
913
1045
|
}, { now: timestamp, idFactory });
|
|
914
1046
|
return {
|
|
915
1047
|
...finalized,
|
|
@@ -934,7 +1066,7 @@ function getUsageProvenance(usage) {
|
|
|
934
1066
|
return "actual";
|
|
935
1067
|
}
|
|
936
1068
|
function resolveChangedFiles(result, repoRoot) {
|
|
937
|
-
if (result.execution?.changedFiles
|
|
1069
|
+
if (result.execution?.changedFiles !== undefined) {
|
|
938
1070
|
return result.execution.changedFiles;
|
|
939
1071
|
}
|
|
940
1072
|
if (!repoRoot) {
|
|
@@ -18,6 +18,12 @@ export interface ExitDecision {
|
|
|
18
18
|
lifecycleState: LoopLifecycleState;
|
|
19
19
|
status: LoopStatus;
|
|
20
20
|
reason: string;
|
|
21
|
+
/** Machine-readable stop classifier for non-attempt exits such as preflight safety blocks. */
|
|
22
|
+
failureClass?: FailureClass;
|
|
23
|
+
/** Machine-readable safety surface, when the stop came from a safety leash. */
|
|
24
|
+
safetySurface?: string;
|
|
25
|
+
/** Stable reason code for dashboards, MCP, and downstream automation. */
|
|
26
|
+
reasonCode?: string;
|
|
21
27
|
}
|
|
22
28
|
export interface MartinAdapterResultLike {
|
|
23
29
|
status: "completed" | "failed";
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# Directory Submission Pack
|
|
2
|
+
|
|
3
|
+
Use this file as the single source of truth for public directory submissions.
|
|
4
|
+
|
|
5
|
+
## Short tagline
|
|
6
|
+
|
|
7
|
+
Open-source control plane for AI coding agents.
|
|
8
|
+
|
|
9
|
+
## Long description
|
|
10
|
+
|
|
11
|
+
MartinLoop is an open-source governed runtime for AI coding agents. It wraps autonomous coding loops with budget caps, verifier gates, rollback evidence, JSONL run records, failure classification, and MCP/Claude/Codex integration so agent work can be inspected, halted, and trusted.
|
|
12
|
+
|
|
13
|
+
## Primary links
|
|
14
|
+
|
|
15
|
+
- GitHub repo: [github.com/Keesan12/martin-loop](https://github.com/Keesan12/martin-loop)
|
|
16
|
+
- Website: [martinloop.com](https://martinloop.com)
|
|
17
|
+
- npm package: [npmjs.com/package/martin-loop](https://www.npmjs.com/package/martin-loop)
|
|
18
|
+
- Benchmark challenge: [UNDER-3-CHALLENGE.md](./UNDER-3-CHALLENGE.md)
|
|
19
|
+
|
|
20
|
+
## Submission checklist
|
|
21
|
+
|
|
22
|
+
### OpenAlternative
|
|
23
|
+
|
|
24
|
+
- status: pending
|
|
25
|
+
- surface: OSS alternative listing
|
|
26
|
+
- copy to use: short tagline + long description
|
|
27
|
+
- include: GitHub, website, npm
|
|
28
|
+
|
|
29
|
+
### DevHunt
|
|
30
|
+
|
|
31
|
+
- status: pending
|
|
32
|
+
- surface: product hunt style dev tools directory
|
|
33
|
+
- copy to use: short tagline + long description
|
|
34
|
+
- include: benchmark challenge and demo command
|
|
35
|
+
|
|
36
|
+
### Uneed
|
|
37
|
+
|
|
38
|
+
- status: pending
|
|
39
|
+
- surface: startup/tool discovery
|
|
40
|
+
- copy to use: short tagline + long description
|
|
41
|
+
- include: GitHub, website, npm
|
|
42
|
+
|
|
43
|
+
### BetaList
|
|
44
|
+
|
|
45
|
+
- status: pending
|
|
46
|
+
- surface: early product discovery
|
|
47
|
+
- copy to use: short tagline + long description
|
|
48
|
+
- include: why governed agent runs matter
|
|
49
|
+
|
|
50
|
+
### Microlaunch
|
|
51
|
+
|
|
52
|
+
- status: pending
|
|
53
|
+
- surface: lightweight launch directory
|
|
54
|
+
- copy to use: short tagline + long description
|
|
55
|
+
- include: demo command and benchmark challenge
|
|
56
|
+
|
|
57
|
+
### AlternativeTo
|
|
58
|
+
|
|
59
|
+
- status: pending
|
|
60
|
+
- surface: alternative comparison listing
|
|
61
|
+
- copy to use: short tagline + long description
|
|
62
|
+
- include: comparable tools and differentiators
|
|
63
|
+
|
|
64
|
+
### Futurepedia
|
|
65
|
+
|
|
66
|
+
- status: pending
|
|
67
|
+
- surface: AI tools directory
|
|
68
|
+
- copy to use: short tagline + long description
|
|
69
|
+
- include: Claude, Codex, and MCP integration
|
|
70
|
+
|
|
71
|
+
### Toolify
|
|
72
|
+
|
|
73
|
+
- status: pending
|
|
74
|
+
- surface: AI tool directory
|
|
75
|
+
- copy to use: short tagline + long description
|
|
76
|
+
- include: benchmark challenge link
|
|
77
|
+
|
|
78
|
+
### There’s An AI For That
|
|
79
|
+
|
|
80
|
+
- status: pending
|
|
81
|
+
- surface: AI tool catalog
|
|
82
|
+
- copy to use: short tagline + long description
|
|
83
|
+
- include: GitHub, website, npm
|
|
84
|
+
|
|
85
|
+
## Notes
|
|
86
|
+
|
|
87
|
+
- Prefer submissions that link directly to the repo, website, and npm package together.
|
|
88
|
+
- Reuse the benchmark challenge and `martin-loop demo` as the fastest trust-building assets.
|
|
89
|
+
- If a directory wants screenshots, use the current public repo README visuals instead of inventing a separate pitch deck.
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Integration Outreach Pack
|
|
2
|
+
|
|
3
|
+
Use this file for direct outreach to projects and communities building around AI coding agents.
|
|
4
|
+
|
|
5
|
+
## Core message
|
|
6
|
+
|
|
7
|
+
Hey [Name] — I’m building MartinLoop, an OSS governed runtime for AI coding agents.
|
|
8
|
+
|
|
9
|
+
The repo already supports budget caps, verifier gates, JSONL run records, rollback evidence, Claude/Codex adapters, and an MCP package.
|
|
10
|
+
|
|
11
|
+
I’m trying to understand where a control layer like this should integrate best with projects like [their project]: CLI wrapper, MCP boundary, CI, or runtime adapter.
|
|
12
|
+
|
|
13
|
+
Would value your blunt take — useful direction or wrong abstraction?
|
|
14
|
+
|
|
15
|
+
## Target projects
|
|
16
|
+
|
|
17
|
+
- Claude Code
|
|
18
|
+
- Codex CLI
|
|
19
|
+
- MCP servers
|
|
20
|
+
- Aider
|
|
21
|
+
- Cline
|
|
22
|
+
- Continue
|
|
23
|
+
- OpenHands
|
|
24
|
+
- SWE-agent
|
|
25
|
+
- Goose
|
|
26
|
+
- DevContainers
|
|
27
|
+
- GitHub Actions
|
|
28
|
+
|
|
29
|
+
## Outreach notes by target
|
|
30
|
+
|
|
31
|
+
### Claude Code
|
|
32
|
+
|
|
33
|
+
- emphasize governed repo runs and MCP install path
|
|
34
|
+
- ask whether the best control point is local CLI wrapper or MCP boundary
|
|
35
|
+
|
|
36
|
+
### Codex CLI
|
|
37
|
+
|
|
38
|
+
- emphasize budget caps, verifier gates, and auditable run records
|
|
39
|
+
- ask whether wrapper, runtime adapter, or CI integration is most useful
|
|
40
|
+
|
|
41
|
+
### MCP projects
|
|
42
|
+
|
|
43
|
+
- emphasize the packaged `@martinloop/mcp` server surface
|
|
44
|
+
- ask whether the trust layer belongs at tool boundary or runtime boundary
|
|
45
|
+
|
|
46
|
+
### Aider, Cline, Continue, OpenHands, SWE-agent, Goose
|
|
47
|
+
|
|
48
|
+
- emphasize adapter-normalized receipts and halt reasons
|
|
49
|
+
- ask how much control should live in the agent runtime versus CI or wrapper
|
|
50
|
+
|
|
51
|
+
### DevContainers and GitHub Actions
|
|
52
|
+
|
|
53
|
+
- emphasize safe default automation, budget visibility, and verifier gates in shared team workflows
|
|
54
|
+
- ask where platform teams want policy to live
|
|
55
|
+
|
|
56
|
+
## Supporting assets
|
|
57
|
+
|
|
58
|
+
- challenge page: [UNDER-3-CHALLENGE.md](./UNDER-3-CHALLENGE.md)
|
|
59
|
+
- directory copy: [DIRECTORY-SUBMISSIONS.md](./DIRECTORY-SUBMISSIONS.md)
|
|
60
|
+
- repo: [github.com/Keesan12/martin-loop](https://github.com/Keesan12/martin-loop)
|
|
61
|
+
- npm: [npmjs.com/package/martin-loop](https://www.npmjs.com/package/martin-loop)
|