npm - martin-loop - Versions diffs - 0.1.4 → 0.1.5 - Mend

martin-loop 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/LICENSE +21 -21
package/README.md +398 -362
package/demo/seeded-workspace/README.md +35 -0
package/demo/seeded-workspace/TASKS.md +29 -0
package/demo/seeded-workspace/martin.config.yaml +11 -0
package/demo/seeded-workspace/package.json +8 -0
package/demo/seeded-workspace/src/invoice-summary.js +11 -0
package/demo/seeded-workspace/test/invoice-summary.test.js +20 -0
package/dist/vendor/adapters/claude-cli.d.ts +19 -4
package/dist/vendor/adapters/claude-cli.js +55 -24
package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
package/dist/vendor/adapters/cli-bridge.js +154 -28
package/dist/vendor/adapters/index.d.ts +1 -0
package/dist/vendor/adapters/index.js +1 -0
package/dist/vendor/adapters/verifier-only.d.ts +7 -0
package/dist/vendor/adapters/verifier-only.js +57 -0
package/dist/vendor/cli/index.d.ts +6 -1
package/dist/vendor/cli/index.js +124 -7
package/dist/vendor/contracts/index.d.ts +3 -1
package/dist/vendor/core/compiler.d.ts +2 -0
package/dist/vendor/core/compiler.js +10 -4
package/dist/vendor/core/context-integrity.d.ts +26 -0
package/dist/vendor/core/context-integrity.js +56 -0
package/dist/vendor/core/index.d.ts +5 -2
package/dist/vendor/core/index.js +186 -54
package/dist/vendor/core/policy.d.ts +6 -0
package/docs/distribution/DIRECTORY-SUBMISSIONS.md +89 -0
package/docs/distribution/INTEGRATION-OUTREACH.md +61 -0
package/docs/distribution/UNDER-3-CHALLENGE.md +65 -0
package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -0
package/docs/oss/EXAMPLES.md +134 -126
package/docs/oss/OSS-BOUNDARY-REPORT.json +109 -113
package/docs/oss/OSS-BOUNDARY-REPORT.md +48 -48
package/docs/oss/QUICKSTART.md +165 -135
package/docs/oss/RALPH-LOOP-SAFETY.md +113 -0
package/docs/oss/README.md +96 -93
package/docs/oss/RELEASE-SURFACE-REPORT.json +45 -45
package/docs/oss/RELEASE-SURFACE-REPORT.md +35 -35
package/package.json +19 -11

package/dist/vendor/core/index.js CHANGED Viewed

@@ -5,8 +5,11 @@ import { evaluateChangeApprovalLeash, evaluateFilesystemLeash, evaluateSecretLea
 import { buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations } from "./grounding.js";
 import { captureRollbackBoundary, restoreRollbackBoundary } from "./rollback.js";
 import { compilePromptPacket } from "./compiler.js";
-import { makeLedgerEvent } from "./persistence/index.js";
+import { makeLedgerEvent, resolveRunsRoot, runDir } from "./persistence/index.js";
+import { runContextIntegrityPrecheck } from "./context-integrity.js";
 export { classifyFailure, computeEvidenceVector, evaluatePatchDecision, evaluateCostGovernor, evaluateBudgetPreflight, inferExit, nextPolicyPhase, policyPhaseToLifecycleState, scorePatchDecision, selectRecoveryRecipe, evaluateVerificationLeash, evaluateFilesystemLeash, evaluateChangeApprovalLeash, evaluateSecretLeash, resolveExecutionProfile, redactSecretsFromText, buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations, captureRollbackBoundary, restoreRollbackBoundary };
+// ─── Context Integrity Pre-gate ──────────────────────────────────────────────
+export { runContextIntegrityPrecheck } from "./context-integrity.js";
 // ─── Prompt packet compiler ──────────────────────────────────────────────────
 export { compilePromptPacket } from "./compiler.js";
 // ─── Persistence (RunStore, LedgerEvent, FileRunStore) ──────────────────────
@@ -136,6 +139,7 @@ export async function runMartin(input) {
     let currentAdapterIndex = 0;
     let currentAdapter = adapterChain[currentAdapterIndex] ?? input.adapter;
     let useCompressedContext = false;
+    const isVerifyOnly = input.task.mutationMode === "verify_only";
     const executionProfile = resolveExecutionProfile({
         executionProfile: input.task.executionProfile,
         allowedNetworkDomains: input.task.allowedNetworkDomains
@@ -153,7 +157,8 @@ export async function runMartin(input) {
             shouldExit: true,
             lifecycleState: "human_escalation",
             status: "exited",
-            reason
+            reason,
+            ...classifySafetyLeashExit(leashDecision, "verifier")
         };
         if (input.store) {
             await input.store.appendLedger(loop.loopId, makeLedgerEvent({
@@ -169,11 +174,7 @@ export async function runMartin(input) {
             await input.store.appendLedger(loop.loopId, makeLedgerEvent({
                 kind: "run.exited",
                 runId: loop.loopId,
-                payload: {
-                    lifecycleState: leashExitDecision.lifecycleState,
-                    status: leashExitDecision.status,
-                    reason: leashExitDecision.reason
-                }
+                payload: createRunExitPayload(leashExitDecision)
             }));
         }
         return {
@@ -193,7 +194,8 @@ export async function runMartin(input) {
             shouldExit: true,
             lifecycleState: "human_escalation",
             status: "exited",
-            reason: secretDecision.reason ?? "Safety leash blocked secret-like values in the runtime context."
+            reason: secretDecision.reason ?? "Safety leash blocked secret-like values in the runtime context.",
+            ...classifySafetyLeashExit(secretDecision, "secret")
         };
         if (input.store) {
             await input.store.appendLedger(loop.loopId, makeLedgerEvent({
@@ -208,11 +210,7 @@ export async function runMartin(input) {
             await input.store.appendLedger(loop.loopId, makeLedgerEvent({
                 kind: "run.exited",
                 runId: loop.loopId,
-                payload: {
-                    lifecycleState: secretExitDecision.lifecycleState,
-                    status: secretExitDecision.status,
-                    reason: secretExitDecision.reason
-                }
+                payload: createRunExitPayload(secretExitDecision)
             }));
         }
         return {
@@ -254,11 +252,7 @@ export async function runMartin(input) {
                 await input.store.appendLedger(loop.loopId, makeLedgerEvent({
                     kind: "run.exited",
                     runId: loop.loopId,
-                    payload: {
-                        lifecycleState: preflightExitDecision.lifecycleState,
-                        status: preflightExitDecision.status,
-                        reason: preflightExitDecision.reason
-                    }
+                    payload: createRunExitPayload(preflightExitDecision)
                 }));
             }
             return {
@@ -268,6 +262,38 @@ export async function runMartin(input) {
         }
         // GATHER → ADMIT: run admission control before executing
         currentPhase = "ADMIT";
+        // T05: Context Integrity Pre-gate — blocks authority inversion / injection before reasoning
+        const contextPrecheck = await runContextIntegrityPrecheck(loop.loopId, loop.attempts.length + 1, runDir(resolveRunsRoot(), loop.loopId), {
+            userPrompt: distilled.focus,
+            history: loop.attempts.map(a => a.summary).join("\n")
+        });
+        if (contextPrecheck.verdict === "context_poisoning_block") {
+            currentPhase = "ABORT";
+            const poisoningExitDecision = {
+                shouldExit: true,
+                lifecycleState: "human_escalation",
+                status: "exited",
+                reason: "Context Integrity Pre-gate: context poisoning attempt detected.",
+                failureClass: "safety_leash_blocked",
+                safetySurface: "context_integrity",
+                reasonCode: "context_poisoning_blocked"
+            };
+            if (input.store) {
+                await input.store.appendLedger(loop.loopId, makeLedgerEvent({
+                    kind: "safety.violations_found",
+                    runId: loop.loopId,
+                    payload: {
+                        verdict: contextPrecheck.verdict,
+                        signals: contextPrecheck.detectedSignals,
+                        source: "context_integrity_pregate"
+                    }
+                }));
+            }
+            return {
+                loop: finalizeLoop(loop, poisoningExitDecision, now(), idFactory),
+                decision: poisoningExitDecision
+            };
+        }
         const admissionDecision = evaluateAttemptPolicy({
             request: {
                 loopId: loop.loopId,
@@ -315,11 +341,7 @@ export async function runMartin(input) {
                 await input.store.appendLedger(loop.loopId, makeLedgerEvent({
                     kind: "run.exited",
                     runId: loop.loopId,
-                    payload: {
-                        lifecycleState: exitDecision.lifecycleState,
-                        status: exitDecision.status,
-                        reason: exitDecision.reason
-                    }
+                    payload: createRunExitPayload(exitDecision)
                 }));
             }
             return {
@@ -361,6 +383,7 @@ export async function runMartin(input) {
                 objective: loop.task.objective,
                 verificationPlan: loop.task.verificationPlan,
                 ...(loop.task.verificationStack ? { verificationStack: loop.task.verificationStack } : {}),
+                ...(loop.task.mutationMode ? { mutationMode: loop.task.mutationMode } : {}),
                 ...(loop.task.repoRoot ? { repoRoot: loop.task.repoRoot } : {}),
                 ...(loop.task.allowedPaths ? { allowedPaths: loop.task.allowedPaths } : {}),
                 ...(loop.task.deniedPaths ? { deniedPaths: loop.task.deniedPaths } : {}),
@@ -539,6 +562,90 @@ export async function runMartin(input) {
         // returned a non-empty list. A repoRoot alone is insufficient — git may fail (e.g. not
         // a git repo) and silently return [], which would falsely trigger no_code_change.
         const changedFileEvidenceAvailable = result.execution?.changedFiles !== undefined || changedFiles.length > 0;
+        if (isVerifyOnly && changedFiles.length > 0) {
+            const patchDecision = evaluatePatchDecision({
+                verificationPassed: result.verification.passed,
+                previousVerifierScore,
+                verifierScore: result.verification.passed ? 1 : 0,
+                scopeViolationCount: changedFiles.length,
+                changedFileCount: changedFiles.length,
+                diffNovelty: 1,
+                diffStats: result.execution?.diffStats,
+                costUsd: getUsageUsd(result.usage),
+                summary: result.summary
+            });
+            const verifyOnlyExitDecision = {
+                shouldExit: true,
+                lifecycleState: "human_escalation",
+                status: "exited",
+                reason: "Verify-only mode forbids file changes.",
+                failureClass: "safety_leash_blocked",
+                safetySurface: "filesystem",
+                reasonCode: "verify_only_write_attempt"
+            };
+            const rollbackOutcome = await restoreRollbackBoundary({
+                repoRoot: request.context.repoRoot,
+                boundary: rollbackBoundary,
+                restoredAt: attemptCompletedAt,
+                decision: patchDecision.decision
+            });
+            if (input.store) {
+                const verifyOnlyViolation = {
+                    kind: "path_not_allowed",
+                    message: `Verify-only mode forbids changed files: ${changedFiles.join(", ")}`,
+                    file: changedFiles[0]
+                };
+                await input.store.writeAttemptArtifacts(loop.loopId, currentAttemptIndex, {
+                    compiledContext,
+                    leash: createLeashArtifact({
+                        surface: "filesystem",
+                        reason: verifyOnlyExitDecision.reason,
+                        violations: [verifyOnlyViolation]
+                    }, currentAttemptIndex),
+                    patchScore: patchDecision.score,
+                    patchDecision: toPatchDecisionArtifact(patchDecision),
+                    ...(rollbackBoundary ? { rollbackBoundary } : {}),
+                    ...(rollbackOutcome ? { rollbackOutcome } : {})
+                });
+                await input.store.appendLedger(loop.loopId, makeLedgerEvent({
+                    kind: "safety.violations_found",
+                    runId: loop.loopId,
+                    attemptIndex: currentAttemptIndex,
+                    payload: {
+                        surface: "filesystem",
+                        blocked: true,
+                        attemptIndex: currentAttemptIndex,
+                        violations: [
+                            {
+                                kind: "path_not_allowed",
+                                message: verifyOnlyExitDecision.reason,
+                                files: changedFiles
+                            }
+                        ]
+                    }
+                }));
+                await input.store.appendLedger(loop.loopId, makeLedgerEvent({
+                    kind: "attempt.discarded",
+                    runId: loop.loopId,
+                    attemptIndex: currentAttemptIndex,
+                    payload: {
+                        decision: patchDecision.decision,
+                        reason: patchDecision.summary,
+                        reasonCodes: patchDecision.reasonCodes,
+                        score: patchDecision.score.score
+                    }
+                }));
+                await input.store.appendLedger(loop.loopId, makeLedgerEvent({
+                    kind: "run.exited",
+                    runId: loop.loopId,
+                    payload: createRunExitPayload(verifyOnlyExitDecision)
+                }));
+            }
+            return {
+                loop: finalizeLoop(loop, verifyOnlyExitDecision, now(), idFactory),
+                decision: verifyOnlyExitDecision
+            };
+        }
         const filesystemDecision = evaluateFilesystemLeash({
             repoRoot: request.context.repoRoot,
             changedFiles,
@@ -561,7 +668,8 @@ export async function runMartin(input) {
                 shouldExit: true,
                 lifecycleState: "human_escalation",
                 status: "exited",
-                reason: filesystemDecision.reason ?? "Safety leash blocked filesystem changes."
+                reason: filesystemDecision.reason ?? "Safety leash blocked filesystem changes.",
+                ...classifySafetyLeashExit(filesystemDecision, "filesystem")
             };
             const rollbackOutcome = await restoreRollbackBoundary({
                 repoRoot: request.context.repoRoot,
@@ -603,11 +711,7 @@ export async function runMartin(input) {
                 await input.store.appendLedger(loop.loopId, makeLedgerEvent({
                     kind: "run.exited",
                     runId: loop.loopId,
-                    payload: {
-                        lifecycleState: filesystemExitDecision.lifecycleState,
-                        status: filesystemExitDecision.status,
-                        reason: filesystemExitDecision.reason
-                    }
+                    payload: createRunExitPayload(filesystemExitDecision)
                 }));
             }
             return {
@@ -638,7 +742,8 @@ export async function runMartin(input) {
                 lifecycleState: "human_escalation",
                 status: "exited",
                 reason: changeApprovalDecision.reason ??
-                    "Safety leash blocked dependency or migration changes that require approval."
+                    "Safety leash blocked dependency or migration changes that require approval.",
+                ...classifySafetyLeashExit(changeApprovalDecision, "dependency")
             };
             const rollbackOutcome = await restoreRollbackBoundary({
                 repoRoot: request.context.repoRoot,
@@ -681,11 +786,7 @@ export async function runMartin(input) {
                 await input.store.appendLedger(loop.loopId, makeLedgerEvent({
                     kind: "run.exited",
                     runId: loop.loopId,
-                    payload: {
-                        lifecycleState: approvalExitDecision.lifecycleState,
-                        status: approvalExitDecision.status,
-                        reason: approvalExitDecision.reason
-                    }
+                    payload: createRunExitPayload(approvalExitDecision)
                 }));
             }
             return {
@@ -728,8 +829,8 @@ export async function runMartin(input) {
                 previousVerifierScore,
                 verifierScore: result.verification.passed ? 1 : 0,
                 groundingViolationCount: groundingScanResult?.violations.length ?? 0,
-                changedFileCount: changedFileEvidenceAvailable ? changedFiles.length : undefined,
-                diffNovelty: changedFileEvidenceAvailable ? (changedFiles.length > 0 ? 1 : 0) : undefined,
+                changedFileCount: !isVerifyOnly && changedFileEvidenceAvailable ? changedFiles.length : undefined,
+                diffNovelty: !isVerifyOnly && changedFileEvidenceAvailable ? (changedFiles.length > 0 ? 1 : 0) : undefined,
                 diffStats: result.execution?.diffStats,
                 costUsd: getUsageUsd(result.usage),
                 summary: result.summary
@@ -822,11 +923,7 @@ export async function runMartin(input) {
                 await input.store.appendLedger(loop.loopId, makeLedgerEvent({
                     kind: "run.exited",
                     runId: loop.loopId,
-                    payload: {
-                        lifecycleState: patchExitDecision.lifecycleState,
-                        status: patchExitDecision.status,
-                        reason: patchExitDecision.reason
-                    }
+                    payload: createRunExitPayload(patchExitDecision)
                 }));
             }
             return {
@@ -870,11 +967,7 @@ export async function runMartin(input) {
                 await input.store.appendLedger(loop.loopId, makeLedgerEvent({
                     kind: "run.exited",
                     runId: loop.loopId,
-                    payload: {
-                        lifecycleState: decision.lifecycleState,
-                        status: decision.status,
-                        reason: decision.reason
-                    }
+                    payload: createRunExitPayload(decision)
                 }));
             }
             return {
@@ -893,11 +986,7 @@ export async function runMartin(input) {
         await input.store.appendLedger(loop.loopId, makeLedgerEvent({
             kind: "run.exited",
             runId: loop.loopId,
-            payload: {
-                lifecycleState: decision.lifecycleState,
-                status: decision.status,
-                reason: decision.reason
-            }
+            payload: createRunExitPayload(decision)
         }));
     }
     return {
@@ -905,11 +994,54 @@ export async function runMartin(input) {
         decision
     };
 }
+function createRunExitPayload(decision) {
+    return {
+        lifecycleState: decision.lifecycleState,
+        status: decision.status,
+        reason: decision.reason,
+        ...(decision.failureClass ? { failureClass: decision.failureClass } : {}),
+        ...(decision.safetySurface ? { safetySurface: decision.safetySurface } : {}),
+        ...(decision.reasonCode ? { reasonCode: decision.reasonCode } : {})
+    };
+}
+function classifySafetyLeashExit(decision, safetySurface = decision.surface) {
+    return {
+        failureClass: "safety_leash_blocked",
+        safetySurface,
+        reasonCode: safetyLeashReasonCode(decision, safetySurface)
+    };
+}
+function safetyLeashReasonCode(decision, safetySurface) {
+    const kind = decision.violations[0]?.kind;
+    switch (kind) {
+        case "command_blocked":
+            return safetySurface === "verifier" ? "destructive_verifier_command" : "command_blocked";
+        case "network_blocked":
+            return safetySurface === "verifier" ? "verifier_network_blocked" : "network_access_blocked";
+        case "secret_value":
+            return "secret_context_value";
+        case "path_denied":
+        case "protected_path":
+            return "protected_surface_write";
+        case "path_not_allowed":
+            return "surface_write_not_allowed";
+        case "path_outside_repo":
+            return "outside_repo_write";
+        case "dependency_approval_required":
+            return "dependency_approval_required";
+        case "migration_approval_required":
+            return "migration_approval_required";
+        case "config_change_approval_required":
+            return "config_change_approval_required";
+        default:
+            return `${safetySurface}_safety_block`;
+    }
+}
 function finalizeLoop(loop, decision, timestamp, idFactory) {
     const finalized = appendLoopEvent(loop, {
         type: "run.completed",
         lifecycleState: decision.lifecycleState,
-        payload: { status: decision.status, reason: decision.reason }
+        payload: createRunExitPayload(decision)
     }, { now: timestamp, idFactory });
     return {
         ...finalized,
@@ -934,7 +1066,7 @@ function getUsageProvenance(usage) {
     return "actual";
 }
 function resolveChangedFiles(result, repoRoot) {
-    if (result.execution?.changedFiles?.length) {
+    if (result.execution?.changedFiles !== undefined) {
         return result.execution.changedFiles;
     }
     if (!repoRoot) {

package/dist/vendor/core/policy.d.ts CHANGED Viewed

@@ -18,6 +18,12 @@ export interface ExitDecision {
     lifecycleState: LoopLifecycleState;
     status: LoopStatus;
     reason: string;
+    /** Machine-readable stop classifier for non-attempt exits such as preflight safety blocks. */
+    failureClass?: FailureClass;
+    /** Machine-readable safety surface, when the stop came from a safety leash. */
+    safetySurface?: string;
+    /** Stable reason code for dashboards, MCP, and downstream automation. */
+    reasonCode?: string;
 }
 export interface MartinAdapterResultLike {
     status: "completed" | "failed";

package/docs/distribution/DIRECTORY-SUBMISSIONS.md ADDED Viewed

@@ -0,0 +1,89 @@
+# Directory Submission Pack
+Use this file as the single source of truth for public directory submissions.
+## Short tagline
+Open-source control plane for AI coding agents.
+## Long description
+MartinLoop is an open-source governed runtime for AI coding agents. It wraps autonomous coding loops with budget caps, verifier gates, rollback evidence, JSONL run records, failure classification, and MCP/Claude/Codex integration so agent work can be inspected, halted, and trusted.
+## Primary links
+- GitHub repo: [github.com/Keesan12/martin-loop](https://github.com/Keesan12/martin-loop)
+- Website: [martinloop.com](https://martinloop.com)
+- npm package: [npmjs.com/package/martin-loop](https://www.npmjs.com/package/martin-loop)
+- Benchmark challenge: [UNDER-3-CHALLENGE.md](./UNDER-3-CHALLENGE.md)
+## Submission checklist
+### OpenAlternative
+- status: pending
+- surface: OSS alternative listing
+- copy to use: short tagline + long description
+- include: GitHub, website, npm
+### DevHunt
+- status: pending
+- surface: product hunt style dev tools directory
+- copy to use: short tagline + long description
+- include: benchmark challenge and demo command
+### Uneed
+- status: pending
+- surface: startup/tool discovery
+- copy to use: short tagline + long description
+- include: GitHub, website, npm
+### BetaList
+- status: pending
+- surface: early product discovery
+- copy to use: short tagline + long description
+- include: why governed agent runs matter
+### Microlaunch
+- status: pending
+- surface: lightweight launch directory
+- copy to use: short tagline + long description
+- include: demo command and benchmark challenge
+### AlternativeTo
+- status: pending
+- surface: alternative comparison listing
+- copy to use: short tagline + long description
+- include: comparable tools and differentiators
+### Futurepedia
+- status: pending
+- surface: AI tools directory
+- copy to use: short tagline + long description
+- include: Claude, Codex, and MCP integration
+### Toolify
+- status: pending
+- surface: AI tool directory
+- copy to use: short tagline + long description
+- include: benchmark challenge link
+### There’s An AI For That
+- status: pending
+- surface: AI tool catalog
+- copy to use: short tagline + long description
+- include: GitHub, website, npm
+## Notes
+- Prefer submissions that link directly to the repo, website, and npm package together.
+- Reuse the benchmark challenge and `martin-loop demo` as the fastest trust-building assets.
+- If a directory wants screenshots, use the current public repo README visuals instead of inventing a separate pitch deck.

package/docs/distribution/INTEGRATION-OUTREACH.md ADDED Viewed

@@ -0,0 +1,61 @@
+# Integration Outreach Pack
+Use this file for direct outreach to projects and communities building around AI coding agents.
+## Core message
+Hey [Name] — I’m building MartinLoop, an OSS governed runtime for AI coding agents.
+The repo already supports budget caps, verifier gates, JSONL run records, rollback evidence, Claude/Codex adapters, and an MCP package.
+I’m trying to understand where a control layer like this should integrate best with projects like [their project]: CLI wrapper, MCP boundary, CI, or runtime adapter.
+Would value your blunt take — useful direction or wrong abstraction?
+## Target projects
+- Claude Code
+- Codex CLI
+- MCP servers
+- Aider
+- Cline
+- Continue
+- OpenHands
+- SWE-agent
+- Goose
+- DevContainers
+- GitHub Actions
+## Outreach notes by target
+### Claude Code
+- emphasize governed repo runs and MCP install path
+- ask whether the best control point is local CLI wrapper or MCP boundary
+### Codex CLI
+- emphasize budget caps, verifier gates, and auditable run records
+- ask whether wrapper, runtime adapter, or CI integration is most useful
+### MCP projects
+- emphasize the packaged `@martinloop/mcp` server surface
+- ask whether the trust layer belongs at tool boundary or runtime boundary
+### Aider, Cline, Continue, OpenHands, SWE-agent, Goose
+- emphasize adapter-normalized receipts and halt reasons
+- ask how much control should live in the agent runtime versus CI or wrapper
+### DevContainers and GitHub Actions
+- emphasize safe default automation, budget visibility, and verifier gates in shared team workflows
+- ask where platform teams want policy to live
+## Supporting assets
+- challenge page: [UNDER-3-CHALLENGE.md](./UNDER-3-CHALLENGE.md)
+- directory copy: [DIRECTORY-SUBMISSIONS.md](./DIRECTORY-SUBMISSIONS.md)
+- repo: [github.com/Keesan12/martin-loop](https://github.com/Keesan12/martin-loop)
+- npm: [npmjs.com/package/martin-loop](https://www.npmjs.com/package/martin-loop)

package/docs/distribution/UNDER-3-CHALLENGE.md ADDED Viewed

@@ -0,0 +1,65 @@
+# Can your AI coding agent finish this task under $3?
+MartinLoop is testing a simple question:
+Can an AI coding agent complete a task under a fixed budget, with verifier-passed completion and an inspectable run record?
+## Current repo-backed comparison
+Same task, same starting state:
+- governed MartinLoop run: `$2.30`
+- uncontrolled retry loop: `$5.20`
+- governed outcome: `completed` and verifier-passed with an inspectable record
+- uncontrolled outcome: failed after repeated retries with no comparable audit trail
+These numbers match the current public benchmark story shown in the repo README and visualized in [`docs/assets/side-by-side.svg`](../assets/side-by-side.svg).
+## Why this matters
+The claim is not that every governed run is always cheaper. The claim is that the run becomes inspectable and enforceable:
+- budget policy is explicit
+- verifier success is explicit
+- stop reasons are explicit
+- artifacts are inspectable after the run
+That makes a coding-agent result easier to trust, replay, compare, and audit.
+## Reproduce it
+From the repo root:
+```bash
+pnpm --filter @martin/benchmarks test
+pnpm --filter @martin/benchmarks eval
+pnpm --filter @martin/benchmarks eval:phase12
+```
+## What to share back
+If you run a similar challenge with Claude Code, Codex CLI, Cursor, Aider, Cline, Continue, OpenHands, SWE-agent, Goose, or an internal coding agent, share:
+- total budget used
+- number of attempts
+- verifier result
+- whether the final run was auditable
+- whether rollback evidence was available
+## Try MartinLoop without risking your repo
+You can copy the public demo sandbox first:
+```bash
+npx martin-loop demo
+```
+Then run the sandbox locally with the printed next steps.
+## Claim boundary
+This page intentionally stays inside the current public evidence boundary:
+- the `$2.30` and `$5.20` figures are the current repo-backed benchmark story used in the public README
+- the reproduction commands above are real commands from this repository
+- the benchmark harness remains a workspace-level surface, so challenge claims should stay tied to repo-backed outputs rather than generic marketing numbers