martin-loop 0.1.5 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODE_OF_CONDUCT.md +32 -0
- package/LICENSE +21 -21
- package/README.md +307 -398
- package/demo/seeded-workspace/README.md +35 -35
- package/demo/seeded-workspace/TASKS.md +29 -29
- package/demo/seeded-workspace/martin.config.yaml +11 -11
- package/demo/seeded-workspace/package.json +8 -8
- package/demo/seeded-workspace/src/invoice-summary.js +11 -11
- package/demo/seeded-workspace/test/invoice-summary.test.js +20 -20
- package/dist/bin/martin-loop.js +0 -0
- package/dist/vendor/adapters/counter.d.ts +1 -0
- package/dist/vendor/adapters/counter.js +4 -0
- package/dist/vendor/adapters/git-baseline.d.ts +50 -0
- package/dist/vendor/adapters/git-baseline.js +233 -0
- package/dist/vendor/adapters/openrouter-adapter.d.ts +15 -0
- package/dist/vendor/adapters/openrouter-adapter.js +302 -0
- package/dist/vendor/adapters/usage.d.ts +48 -0
- package/dist/vendor/adapters/usage.js +66 -0
- package/dist/vendor/cli/bin/exit.d.ts +12 -0
- package/dist/vendor/cli/bin/exit.js +28 -0
- package/dist/vendor/cli/commands/analyze.d.ts +5 -0
- package/dist/vendor/cli/commands/analyze.js +58 -0
- package/dist/vendor/cli/commands/audit-log-verify.d.ts +34 -0
- package/dist/vendor/cli/commands/audit-log-verify.js +99 -0
- package/dist/vendor/cli/commands/audit.d.ts +8 -0
- package/dist/vendor/cli/commands/audit.js +199 -0
- package/dist/vendor/cli/commands/corpus.d.ts +5 -0
- package/dist/vendor/cli/commands/corpus.js +60 -0
- package/dist/vendor/cli/commands/doctor.d.ts +8 -0
- package/dist/vendor/cli/commands/doctor.js +219 -0
- package/dist/vendor/cli/commands/explain.d.ts +17 -0
- package/dist/vendor/cli/commands/explain.js +176 -0
- package/dist/vendor/cli/commands/export.d.ts +5 -0
- package/dist/vendor/cli/commands/export.js +60 -0
- package/dist/vendor/cli/commands/governance.d.ts +8 -0
- package/dist/vendor/cli/commands/governance.js +95 -0
- package/dist/vendor/cli/commands/improve.d.ts +18 -0
- package/dist/vendor/cli/commands/improve.js +396 -0
- package/dist/vendor/cli/commands/init.d.ts +8 -0
- package/dist/vendor/cli/commands/init.js +281 -0
- package/dist/vendor/cli/commands/migration.d.ts +8 -0
- package/dist/vendor/cli/commands/migration.js +67 -0
- package/dist/vendor/cli/commands/prior.d.ts +23 -0
- package/dist/vendor/cli/commands/prior.js +145 -0
- package/dist/vendor/cli/commands/resume.d.ts +21 -0
- package/dist/vendor/cli/commands/resume.js +73 -0
- package/dist/vendor/cli/commands/verify.d.ts +6 -0
- package/dist/vendor/cli/commands/verify.js +43 -0
- package/dist/vendor/cli/research/public-corpus.d.ts +43 -0
- package/dist/vendor/cli/research/public-corpus.js +151 -0
- package/dist/vendor/cli/ui/error-card.d.ts +38 -0
- package/dist/vendor/cli/ui/error-card.js +103 -0
- package/dist/vendor/cli/ui/mission-brief.d.ts +41 -0
- package/dist/vendor/cli/ui/mission-brief.js +173 -0
- package/dist/vendor/cli/ui/summary-card.d.ts +34 -0
- package/dist/vendor/cli/ui/summary-card.js +102 -0
- package/dist/vendor/contracts/audit.d.ts +46 -0
- package/dist/vendor/contracts/audit.js +360 -0
- package/dist/vendor/contracts/post-phase15.d.ts +240 -0
- package/dist/vendor/contracts/post-phase15.js +166 -0
- package/dist/vendor/core/agent/mandates.d.ts +46 -0
- package/dist/vendor/core/agent/mandates.js +178 -0
- package/dist/vendor/core/agent/receipts.d.ts +38 -0
- package/dist/vendor/core/agent/receipts.js +131 -0
- package/dist/vendor/core/agent/signing.d.ts +17 -0
- package/dist/vendor/core/agent/signing.js +91 -0
- package/dist/vendor/core/attestation/sign.d.ts +25 -0
- package/dist/vendor/core/attestation/sign.js +216 -0
- package/dist/vendor/core/autonomy/autonomous-promotion.d.ts +120 -0
- package/dist/vendor/core/autonomy/autonomous-promotion.js +346 -0
- package/dist/vendor/core/autonomy/envelope-v2.d.ts +29 -0
- package/dist/vendor/core/autonomy/envelope-v2.js +60 -0
- package/dist/vendor/core/autonomy/envelope.d.ts +17 -0
- package/dist/vendor/core/autonomy/envelope.js +27 -0
- package/dist/vendor/core/autonomy/escalation-ledger.d.ts +20 -0
- package/dist/vendor/core/autonomy/escalation-ledger.js +18 -0
- package/dist/vendor/core/autonomy/resume.d.ts +15 -0
- package/dist/vendor/core/autonomy/resume.js +23 -0
- package/dist/vendor/core/circuit/circuit-breaker.d.ts +60 -0
- package/dist/vendor/core/circuit/circuit-breaker.js +143 -0
- package/dist/vendor/core/context-distillation.d.ts +3 -0
- package/dist/vendor/core/context-distillation.js +44 -0
- package/dist/vendor/core/context-flow/compile-context.d.ts +8 -0
- package/dist/vendor/core/context-flow/compile-context.js +111 -0
- package/dist/vendor/core/context-flow/entities.d.ts +2 -0
- package/dist/vendor/core/context-flow/entities.js +44 -0
- package/dist/vendor/core/context-flow/evaluate-policy.d.ts +2 -0
- package/dist/vendor/core/context-flow/evaluate-policy.js +42 -0
- package/dist/vendor/core/context-flow/index.d.ts +11 -0
- package/dist/vendor/core/context-flow/index.js +24 -0
- package/dist/vendor/core/context-flow/labels.d.ts +3 -0
- package/dist/vendor/core/context-flow/labels.js +17 -0
- package/dist/vendor/core/context-flow/normalizer.d.ts +9 -0
- package/dist/vendor/core/context-flow/normalizer.js +69 -0
- package/dist/vendor/core/context-flow/profiles.d.ts +33 -0
- package/dist/vendor/core/context-flow/profiles.js +36 -0
- package/dist/vendor/core/context-flow/redaction.d.ts +1 -0
- package/dist/vendor/core/context-flow/redaction.js +6 -0
- package/dist/vendor/core/context-flow/sensitivity.d.ts +2 -0
- package/dist/vendor/core/context-flow/sensitivity.js +27 -0
- package/dist/vendor/core/context-flow/sync-preview.d.ts +2 -0
- package/dist/vendor/core/context-flow/sync-preview.js +22 -0
- package/dist/vendor/core/context-flow/token-estimator.d.ts +3 -0
- package/dist/vendor/core/context-flow/token-estimator.js +13 -0
- package/dist/vendor/core/context-flow/types.d.ts +91 -0
- package/dist/vendor/core/context-flow/types.js +2 -0
- package/dist/vendor/core/context-utility.d.ts +47 -0
- package/dist/vendor/core/context-utility.js +405 -0
- package/dist/vendor/core/cost/pipeline.d.ts +92 -0
- package/dist/vendor/core/cost/pipeline.js +141 -0
- package/dist/vendor/core/cost/tagged-cost.d.ts +27 -0
- package/dist/vendor/core/cost/tagged-cost.js +55 -0
- package/dist/vendor/core/cost-governor.d.ts +2 -0
- package/dist/vendor/core/cost-governor.js +50 -0
- package/dist/vendor/core/cve/cve-check.d.ts +80 -0
- package/dist/vendor/core/cve/cve-check.js +172 -0
- package/dist/vendor/core/digital-twin/index.d.ts +27 -0
- package/dist/vendor/core/digital-twin/index.js +90 -0
- package/dist/vendor/core/drift/drift-graph.d.ts +47 -0
- package/dist/vendor/core/drift/drift-graph.js +100 -0
- package/dist/vendor/core/drift/objective-lock.d.ts +69 -0
- package/dist/vendor/core/drift/objective-lock.js +88 -0
- package/dist/vendor/core/drift/scope.d.ts +46 -0
- package/dist/vendor/core/drift/scope.js +102 -0
- package/dist/vendor/core/drift/signature-lock.d.ts +48 -0
- package/dist/vendor/core/drift/signature-lock.js +202 -0
- package/dist/vendor/core/drift/stale-proof-gate.d.ts +21 -0
- package/dist/vendor/core/drift/stale-proof-gate.js +19 -0
- package/dist/vendor/core/eval/known-bad-world-runner.d.ts +24 -0
- package/dist/vendor/core/eval/known-bad-world-runner.js +256 -0
- package/dist/vendor/core/evidence/claim-audit.d.ts +18 -0
- package/dist/vendor/core/evidence/claim-audit.js +89 -0
- package/dist/vendor/core/exit-intelligence.d.ts +2 -0
- package/dist/vendor/core/exit-intelligence.js +58 -0
- package/dist/vendor/core/explain/formatter.d.ts +42 -0
- package/dist/vendor/core/explain/formatter.js +171 -0
- package/dist/vendor/core/explain/timeline.d.ts +29 -0
- package/dist/vendor/core/explain/timeline.js +213 -0
- package/dist/vendor/core/failure-taxonomy.d.ts +2 -0
- package/dist/vendor/core/failure-taxonomy.js +76 -0
- package/dist/vendor/core/gateway/index.d.ts +10 -0
- package/dist/vendor/core/gateway/index.js +12 -0
- package/dist/vendor/core/gateway/registry.d.ts +40 -0
- package/dist/vendor/core/gateway/registry.js +97 -0
- package/dist/vendor/core/gateway/transport.d.ts +31 -0
- package/dist/vendor/core/gateway/transport.js +82 -0
- package/dist/vendor/core/gateway/vault.d.ts +19 -0
- package/dist/vendor/core/gateway/vault.js +29 -0
- package/dist/vendor/core/graph/adapters.d.ts +43 -0
- package/dist/vendor/core/graph/adapters.js +91 -0
- package/dist/vendor/core/graph/hotspots.d.ts +22 -0
- package/dist/vendor/core/graph/hotspots.js +30 -0
- package/dist/vendor/core/graph/index.d.ts +1 -0
- package/dist/vendor/core/graph/index.js +2 -0
- package/dist/vendor/core/honey/honey-tokens.d.ts +32 -0
- package/dist/vendor/core/honey/honey-tokens.js +44 -0
- package/dist/vendor/core/index.d.ts +2 -2
- package/dist/vendor/core/index.js +38 -12
- package/dist/vendor/core/learning/bayesian-update.d.ts +31 -0
- package/dist/vendor/core/learning/bayesian-update.js +60 -0
- package/dist/vendor/core/learning/prior-sets.d.ts +42 -0
- package/dist/vendor/core/learning/prior-sets.js +111 -0
- package/dist/vendor/core/learning/promotion-gate.d.ts +17 -0
- package/dist/vendor/core/learning/promotion-gate.js +23 -0
- package/dist/vendor/core/leash/blast-radius.d.ts +42 -0
- package/dist/vendor/core/leash/blast-radius.js +156 -0
- package/dist/vendor/core/leash/policy-leash.d.ts +31 -0
- package/dist/vendor/core/leash/policy-leash.js +117 -0
- package/dist/vendor/core/memo/memo.d.ts +63 -0
- package/dist/vendor/core/memo/memo.js +97 -0
- package/dist/vendor/core/memory/learning-pipeline.d.ts +154 -0
- package/dist/vendor/core/memory/learning-pipeline.js +391 -0
- package/dist/vendor/core/memory/palace.d.ts +84 -0
- package/dist/vendor/core/memory/palace.js +379 -0
- package/dist/vendor/core/merge/ast-merge.d.ts +22 -0
- package/dist/vendor/core/merge/ast-merge.js +350 -0
- package/dist/vendor/core/merge/text-merge.d.ts +12 -0
- package/dist/vendor/core/merge/text-merge.js +182 -0
- package/dist/vendor/core/otel/tracer.d.ts +45 -0
- package/dist/vendor/core/otel/tracer.js +116 -0
- package/dist/vendor/core/parallel/parallel-attempts.d.ts +28 -0
- package/dist/vendor/core/parallel/parallel-attempts.js +41 -0
- package/dist/vendor/core/parallel/scorer.d.ts +24 -0
- package/dist/vendor/core/parallel/scorer.js +65 -0
- package/dist/vendor/core/pattern-detection.d.ts +64 -0
- package/dist/vendor/core/pattern-detection.js +108 -0
- package/dist/vendor/core/persistence/checkpoint.d.ts +44 -0
- package/dist/vendor/core/persistence/checkpoint.js +156 -0
- package/dist/vendor/core/persistence/cleanup.d.ts +22 -0
- package/dist/vendor/core/persistence/cleanup.js +131 -0
- package/dist/vendor/core/persistence/index.d.ts +2 -0
- package/dist/vendor/core/persistence/index.js +1 -0
- package/dist/vendor/core/persistence/runs-reader.d.ts +52 -0
- package/dist/vendor/core/persistence/runs-reader.js +84 -0
- package/dist/vendor/core/persistence/store.d.ts +6 -1
- package/dist/vendor/core/persistence/store.js +5 -0
- package/dist/vendor/core/policy/file-touch-quota.d.ts +60 -0
- package/dist/vendor/core/policy/file-touch-quota.js +105 -0
- package/dist/vendor/core/policy/policy-loader.d.ts +30 -0
- package/dist/vendor/core/policy/policy-loader.js +170 -0
- package/dist/vendor/core/policy/policy-schema.d.ts +55 -0
- package/dist/vendor/core/policy/policy-schema.js +78 -0
- package/dist/vendor/core/probe/probe.d.ts +49 -0
- package/dist/vendor/core/probe/probe.js +115 -0
- package/dist/vendor/core/proof/patch-proof.d.ts +58 -0
- package/dist/vendor/core/proof/patch-proof.js +84 -0
- package/dist/vendor/core/proof/semantic-probe.d.ts +25 -0
- package/dist/vendor/core/proof/semantic-probe.js +82 -0
- package/dist/vendor/core/recovery/failure-mode-runner.d.ts +29 -0
- package/dist/vendor/core/recovery/failure-mode-runner.js +39 -0
- package/dist/vendor/core/red-blue/red-phase.d.ts +64 -0
- package/dist/vendor/core/red-blue/red-phase.js +141 -0
- package/dist/vendor/core/red-blue/risk-tiers.d.ts +22 -0
- package/dist/vendor/core/red-blue/risk-tiers.js +33 -0
- package/dist/vendor/core/replay/replay.d.ts +85 -0
- package/dist/vendor/core/replay/replay.js +109 -0
- package/dist/vendor/core/router/engine.d.ts +54 -0
- package/dist/vendor/core/router/engine.js +131 -0
- package/dist/vendor/core/router/index.d.ts +1 -0
- package/dist/vendor/core/router/index.js +2 -0
- package/dist/vendor/core/router/trust-calibration.d.ts +57 -0
- package/dist/vendor/core/router/trust-calibration.js +127 -0
- package/dist/vendor/core/run-martin.d.ts +2 -0
- package/dist/vendor/core/run-martin.js +287 -0
- package/dist/vendor/core/security/cve-scanner.d.ts +62 -0
- package/dist/vendor/core/security/cve-scanner.js +178 -0
- package/dist/vendor/core/sentinel/efficiency-sentinel.d.ts +29 -0
- package/dist/vendor/core/sentinel/efficiency-sentinel.js +30 -0
- package/dist/vendor/core/sentinel/progress-guard.d.ts +35 -0
- package/dist/vendor/core/sentinel/progress-guard.js +46 -0
- package/dist/vendor/core/siem/siem-emitter.d.ts +49 -0
- package/dist/vendor/core/siem/siem-emitter.js +157 -0
- package/dist/vendor/core/strategy/attempt-brief.d.ts +22 -0
- package/dist/vendor/core/strategy/attempt-brief.js +89 -0
- package/dist/vendor/core/summarize/diff-summary.d.ts +35 -0
- package/dist/vendor/core/summarize/diff-summary.js +204 -0
- package/dist/vendor/core/surface-signals.d.ts +21 -0
- package/dist/vendor/core/surface-signals.js +139 -0
- package/dist/vendor/core/truth/truth-wall.d.ts +51 -0
- package/dist/vendor/core/truth/truth-wall.js +69 -0
- package/dist/vendor/core/truth-spine.d.ts +26 -0
- package/dist/vendor/core/truth-spine.js +62 -0
- package/dist/vendor/core/types.d.ts +115 -0
- package/dist/vendor/core/types.js +2 -0
- package/dist/vendor/core/verification/tiered-verify.d.ts +17 -0
- package/dist/vendor/core/verification/tiered-verify.js +29 -0
- package/dist/vendor/core/verifier-pyramid.d.ts +32 -0
- package/dist/vendor/core/verifier-pyramid.js +111 -0
- package/dist/vendor/core/workflow-artifacts.d.ts +99 -0
- package/dist/vendor/core/workflow-artifacts.js +668 -0
- package/dist/vendor/core/wrap/supervised-run.d.ts +96 -0
- package/dist/vendor/core/wrap/supervised-run.js +178 -0
- package/docs/assets/cli-animated.svg +139 -0
- package/docs/assets/cli-static.svg +34 -0
- package/docs/assets/github-hero-v2.svg +23 -0
- package/docs/assets/martin-raplph.png.jpg +0 -0
- package/docs/assets/martinloop-logo.png +0 -0
- package/docs/assets/nvidia-inception-program-light.png +0 -0
- package/docs/assets/nvidia-inception-program.png +0 -0
- package/docs/assets/phase3c-sidesidebyside-demo.html +228 -0
- package/docs/assets/side-by-side.svg +134 -0
- package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -142
- package/docs/oss/EXAMPLES.md +134 -134
- package/docs/oss/OSS-BOUNDARY-REPORT.json +1 -1
- package/docs/oss/OSS-BOUNDARY-REPORT.md +1 -1
- package/docs/oss/QUICKSTART.md +170 -165
- package/docs/oss/RALPH-LOOP-SAFETY.md +113 -113
- package/docs/oss/README.md +96 -96
- package/docs/oss/RELEASE-SURFACE-REPORT.json +2 -1
- package/docs/oss/RELEASE-SURFACE-REPORT.md +2 -1
- package/package.json +130 -58
- package/docs/distribution/DIRECTORY-SUBMISSIONS.md +0 -89
- package/docs/distribution/INTEGRATION-OUTREACH.md +0 -61
- package/docs/distribution/UNDER-3-CHALLENGE.md +0 -65
|
@@ -1,113 +1,113 @@
|
|
|
1
|
-
# Ralph-Style Loop Safety Guide
|
|
2
|
-
|
|
3
|
-
Ralph-style loops are useful because they keep trying until a coding task reaches a stopping condition. MartinLoop is not a replacement for that pattern. It is the governance layer that makes the pattern safer to run unattended.
|
|
4
|
-
|
|
5
|
-
For install and first-run steps, start with the repo quickstart: [README.md#quick-start](../../README.md#quick-start)
|
|
6
|
-
|
|
7
|
-
## 1. What Ralph-style loops do well
|
|
8
|
-
|
|
9
|
-
Ralph-style loops are good at persistence:
|
|
10
|
-
|
|
11
|
-
- they retry after a failed attempt
|
|
12
|
-
- they keep working toward a concrete objective
|
|
13
|
-
- they help teams automate long-running coding tasks that would otherwise need constant supervision
|
|
14
|
-
|
|
15
|
-
That persistence is the reason teams use them. The problem is not the existence of the loop. The problem is what happens when the loop keeps running without a clear governance contract.
|
|
16
|
-
|
|
17
|
-
## 2. Where unattended loops fail
|
|
18
|
-
|
|
19
|
-
An unattended coding loop can fail in ways that are expensive even when no single attempt looks dramatic on its own:
|
|
20
|
-
|
|
21
|
-
- spend keeps accumulating across retries
|
|
22
|
-
- verifier failures repeat without a meaningful strategy change
|
|
23
|
-
- file edits drift outside the intended task boundary
|
|
24
|
-
- the final outcome is hard to audit because the reasoning trail is incomplete
|
|
25
|
-
- operators know that the loop stopped, but not whether it stopped for success, safety, or exhaustion
|
|
26
|
-
|
|
27
|
-
Those are governance failures, not only model failures.
|
|
28
|
-
|
|
29
|
-
## 3. Why max iterations alone are not enough
|
|
30
|
-
|
|
31
|
-
A max-iteration limit is helpful, but it only answers one question: "How many times may this loop try?"
|
|
32
|
-
|
|
33
|
-
It does not answer:
|
|
34
|
-
|
|
35
|
-
- how much budget can be spent before the next attempt is rejected
|
|
36
|
-
- whether the verifier command is safe to run
|
|
37
|
-
- whether the patch stayed inside the approved file scope
|
|
38
|
-
- whether a failed run left rollback evidence behind
|
|
39
|
-
- whether the recorded outcome is trustworthy enough to resume or inspect later
|
|
40
|
-
|
|
41
|
-
Iteration caps are one guardrail. They are not a full control layer.
|
|
42
|
-
|
|
43
|
-
## 4. What MartinLoop adds
|
|
44
|
-
|
|
45
|
-
MartinLoop governs the loop before, during, and after execution:
|
|
46
|
-
|
|
47
|
-
- **Budget governance** rejects work that would exceed the configured spend, token, or iteration envelope
|
|
48
|
-
- **Verifier gates** only allow a run to finish as `completed` when the agent result and verification state both pass
|
|
49
|
-
- **Safety leash checks** evaluate verifier commands, file boundaries, and approval-sensitive actions before work is accepted
|
|
50
|
-
- **Stop reasons** make the final lifecycle state explicit, such as `completed`, `budget_exit`, or `human_escalation`
|
|
51
|
-
- **Run records** append JSONL evidence under `~/.martin/runs/` so operators can inspect what happened later
|
|
52
|
-
- **Rollback evidence** preserves the recovery boundary for repo-backed runs when persistence is configured
|
|
53
|
-
|
|
54
|
-
That is why MartinLoop should be thought of as a companion governance layer around a Ralph-style loop, not an argument against using one.
|
|
55
|
-
|
|
56
|
-
## 5. Example governed run
|
|
57
|
-
|
|
58
|
-
```bash
|
|
59
|
-
martin run "fix the auth regression" \
|
|
60
|
-
--budget 3.00 \
|
|
61
|
-
--soft-limit-usd 2.00 \
|
|
62
|
-
--max-iterations 2 \
|
|
63
|
-
--verify "pnpm test"
|
|
64
|
-
```
|
|
65
|
-
|
|
66
|
-
This changes the operator contract in a few important ways:
|
|
67
|
-
|
|
68
|
-
- the next attempt can be rejected before overspend happens
|
|
69
|
-
- the run still has to satisfy the verifier
|
|
70
|
-
- the final state is inspectable instead of being inferred from logs alone
|
|
71
|
-
|
|
72
|
-
## 6. Example stop reason
|
|
73
|
-
|
|
74
|
-
MartinLoop returns an explicit lifecycle state and reason when a run stops:
|
|
75
|
-
|
|
76
|
-
```json
|
|
77
|
-
{
|
|
78
|
-
"decision": {
|
|
79
|
-
"shouldExit": true,
|
|
80
|
-
"lifecycleState": "budget_exit",
|
|
81
|
-
"status": "exited",
|
|
82
|
-
"reason": "Martin exited because the budget governor hit a hard limit."
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
```
|
|
86
|
-
|
|
87
|
-
That answer is more useful than "the loop stopped" because it tells the operator whether the run ended for success, safety, or exhaustion.
|
|
88
|
-
|
|
89
|
-
## 7. Example JSONL run record
|
|
90
|
-
|
|
91
|
-
Each run appends a JSONL record shaped like:
|
|
92
|
-
|
|
93
|
-
```json
|
|
94
|
-
{
|
|
95
|
-
"loopId": "loop_example123",
|
|
96
|
-
"workspaceId": "ws_demo",
|
|
97
|
-
"projectId": "proj_demo",
|
|
98
|
-
"status": "exited",
|
|
99
|
-
"lifecycleState": "budget_exit",
|
|
100
|
-
"budget": {
|
|
101
|
-
"maxUsd": 3,
|
|
102
|
-
"softLimitUsd": 2,
|
|
103
|
-
"maxIterations": 2,
|
|
104
|
-
"maxTokens": 20000
|
|
105
|
-
},
|
|
106
|
-
"metadata": {
|
|
107
|
-
"policyProfile": "balanced",
|
|
108
|
-
"telemetryDestination": "local-only"
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
The full record can also include attempts, events, verifier outcomes, and persisted artifact references. That is the evidence trail MartinLoop adds around a retrying coding loop.
|
|
1
|
+
# Ralph-Style Loop Safety Guide
|
|
2
|
+
|
|
3
|
+
Ralph-style loops are useful because they keep trying until a coding task reaches a stopping condition. MartinLoop is not a replacement for that pattern. It is the governance layer that makes the pattern safer to run unattended.
|
|
4
|
+
|
|
5
|
+
For install and first-run steps, start with the repo quickstart: [README.md#quick-start](../../README.md#quick-start)
|
|
6
|
+
|
|
7
|
+
## 1. What Ralph-style loops do well
|
|
8
|
+
|
|
9
|
+
Ralph-style loops are good at persistence:
|
|
10
|
+
|
|
11
|
+
- they retry after a failed attempt
|
|
12
|
+
- they keep working toward a concrete objective
|
|
13
|
+
- they help teams automate long-running coding tasks that would otherwise need constant supervision
|
|
14
|
+
|
|
15
|
+
That persistence is the reason teams use them. The problem is not the existence of the loop. The problem is what happens when the loop keeps running without a clear governance contract.
|
|
16
|
+
|
|
17
|
+
## 2. Where unattended loops fail
|
|
18
|
+
|
|
19
|
+
An unattended coding loop can fail in ways that are expensive even when no single attempt looks dramatic on its own:
|
|
20
|
+
|
|
21
|
+
- spend keeps accumulating across retries
|
|
22
|
+
- verifier failures repeat without a meaningful strategy change
|
|
23
|
+
- file edits drift outside the intended task boundary
|
|
24
|
+
- the final outcome is hard to audit because the reasoning trail is incomplete
|
|
25
|
+
- operators know that the loop stopped, but not whether it stopped for success, safety, or exhaustion
|
|
26
|
+
|
|
27
|
+
Those are governance failures, not only model failures.
|
|
28
|
+
|
|
29
|
+
## 3. Why max iterations alone are not enough
|
|
30
|
+
|
|
31
|
+
A max-iteration limit is helpful, but it only answers one question: "How many times may this loop try?"
|
|
32
|
+
|
|
33
|
+
It does not answer:
|
|
34
|
+
|
|
35
|
+
- how much budget can be spent before the next attempt is rejected
|
|
36
|
+
- whether the verifier command is safe to run
|
|
37
|
+
- whether the patch stayed inside the approved file scope
|
|
38
|
+
- whether a failed run left rollback evidence behind
|
|
39
|
+
- whether the recorded outcome is trustworthy enough to resume or inspect later
|
|
40
|
+
|
|
41
|
+
Iteration caps are one guardrail. They are not a full control layer.
|
|
42
|
+
|
|
43
|
+
## 4. What MartinLoop adds
|
|
44
|
+
|
|
45
|
+
MartinLoop governs the loop before, during, and after execution:
|
|
46
|
+
|
|
47
|
+
- **Budget governance** rejects work that would exceed the configured spend, token, or iteration envelope
|
|
48
|
+
- **Verifier gates** only allow a run to finish as `completed` when the agent result and verification state both pass
|
|
49
|
+
- **Safety leash checks** evaluate verifier commands, file boundaries, and approval-sensitive actions before work is accepted
|
|
50
|
+
- **Stop reasons** make the final lifecycle state explicit, such as `completed`, `budget_exit`, or `human_escalation`
|
|
51
|
+
- **Run records** append JSONL evidence under `~/.martin/runs/` so operators can inspect what happened later
|
|
52
|
+
- **Rollback evidence** preserves the recovery boundary for repo-backed runs when persistence is configured
|
|
53
|
+
|
|
54
|
+
That is why MartinLoop should be thought of as a companion governance layer around a Ralph-style loop, not an argument against using one.
|
|
55
|
+
|
|
56
|
+
## 5. Example governed run
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
martin run "fix the auth regression" \
|
|
60
|
+
--budget 3.00 \
|
|
61
|
+
--soft-limit-usd 2.00 \
|
|
62
|
+
--max-iterations 2 \
|
|
63
|
+
--verify "pnpm test"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
This changes the operator contract in a few important ways:
|
|
67
|
+
|
|
68
|
+
- the next attempt can be rejected before overspend happens
|
|
69
|
+
- the run still has to satisfy the verifier
|
|
70
|
+
- the final state is inspectable instead of being inferred from logs alone
|
|
71
|
+
|
|
72
|
+
## 6. Example stop reason
|
|
73
|
+
|
|
74
|
+
MartinLoop returns an explicit lifecycle state and reason when a run stops:
|
|
75
|
+
|
|
76
|
+
```json
|
|
77
|
+
{
|
|
78
|
+
"decision": {
|
|
79
|
+
"shouldExit": true,
|
|
80
|
+
"lifecycleState": "budget_exit",
|
|
81
|
+
"status": "exited",
|
|
82
|
+
"reason": "Martin exited because the budget governor hit a hard limit."
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
That answer is more useful than "the loop stopped" because it tells the operator whether the run ended for success, safety, or exhaustion.
|
|
88
|
+
|
|
89
|
+
## 7. Example JSONL run record
|
|
90
|
+
|
|
91
|
+
Each run appends a JSONL record shaped like:
|
|
92
|
+
|
|
93
|
+
```json
|
|
94
|
+
{
|
|
95
|
+
"loopId": "loop_example123",
|
|
96
|
+
"workspaceId": "ws_demo",
|
|
97
|
+
"projectId": "proj_demo",
|
|
98
|
+
"status": "exited",
|
|
99
|
+
"lifecycleState": "budget_exit",
|
|
100
|
+
"budget": {
|
|
101
|
+
"maxUsd": 3,
|
|
102
|
+
"softLimitUsd": 2,
|
|
103
|
+
"maxIterations": 2,
|
|
104
|
+
"maxTokens": 20000
|
|
105
|
+
},
|
|
106
|
+
"metadata": {
|
|
107
|
+
"policyProfile": "balanced",
|
|
108
|
+
"telemetryDestination": "local-only"
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
The full record can also include attempts, events, verifier outcomes, and persisted artifact references. That is the evidence trail MartinLoop adds around a retrying coding loop.
|
package/docs/oss/README.md
CHANGED
|
@@ -1,96 +1,96 @@
|
|
|
1
|
-
# Martin OSS Core
|
|
2
|
-
|
|
3
|
-
Martin Loop is a governed AI coding-loop runtime. The core runtime is real and verified through the Phase 12 certification gate; the repo is now in Phase
|
|
4
|
-
|
|
5
|
-
## What the OSS core includes today
|
|
6
|
-
|
|
7
|
-
- `@martin/contracts`: shared loop, policy, grounding, leash, budget, and rollback types
|
|
8
|
-
- `@martin/core`: the runtime controller, persistence layer, grounding scanner, leash engine, patch-truth scoring, and rollback restoration logic
|
|
9
|
-
- `@martin/adapters`: normalized Claude CLI, Codex CLI, and direct-provider or stub adapter surfaces
|
|
10
|
-
- `@martin/cli`: the local operator CLI for `run`, `inspect`, and `resume`
|
|
11
|
-
- `@martinloop/mcp`: the MCP server surface for `martin_run`, `martin_inspect`, and `martin_status`
|
|
12
|
-
|
|
13
|
-
## What is still outside the initial OSS promise
|
|
14
|
-
|
|
15
|
-
- The root workspace now exposes the `martin-loop` public package facade, and `@martinloop/mcp` now has a standalone tarball shape validated via `pnpm --filter @martinloop/mcp smoke:pack`, but registry publication is still a separate release step.
|
|
16
|
-
- `@martin/contracts`, `@martin/core`, and `@martin/adapters` are still marked `private` in their package manifests.
|
|
17
|
-
- The hosted control-plane and local dashboard remain in the repo, but they are not yet the finalized public OSS boundary.
|
|
18
|
-
- The benchmark harness remains a workspace-only RC surface under `benchmarks/` and is not part of the publishable CLI boundary yet.
|
|
19
|
-
- Final licensing, public package publishing, and managed-product packaging are still gated behind later Phase 13 to Phase 15 work.
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
-
|
|
27
|
-
-
|
|
28
|
-
-
|
|
29
|
-
-
|
|
30
|
-
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
- `
|
|
38
|
-
- `
|
|
39
|
-
- `
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
- `
|
|
47
|
-
- `
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
-
|
|
57
|
-
-
|
|
58
|
-
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
pnpm
|
|
69
|
-
pnpm
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
- `pnpm
|
|
80
|
-
- `pnpm
|
|
81
|
-
- `pnpm
|
|
82
|
-
- `pnpm
|
|
83
|
-
- `pnpm
|
|
84
|
-
|
|
85
|
-
`pnpm
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
- [`docs/oss/
|
|
92
|
-
- [`docs/oss/
|
|
93
|
-
- [`docs/oss/OSS-BOUNDARY-REPORT.md`](./OSS-BOUNDARY-REPORT.md) for the current machine-checked OSS boundary and public-surface status
|
|
94
|
-
- [`docs/oss/RELEASE-SURFACE-REPORT.md`](./RELEASE-SURFACE-REPORT.md) for the current machine-checked release-surface audit
|
|
95
|
-
- [`docs/pilot/README.md`](../pilot/README.md) for the pilot-prep package that remains explicitly gated behind Phase 13 completion
|
|
96
|
-
- [`../../README.md`](../../README.md) for the repo-level RC status and workspace map
|
|
1
|
+
# Martin OSS Core
|
|
2
|
+
|
|
3
|
+
Martin Loop is a governed AI coding-loop runtime. The core runtime is real and verified through the Phase 12 certification gate; the repo is now in the Phase 15 public-release lane, which means the focus is release truth, packaging, and final-gate evidence rather than new feature invention.
|
|
4
|
+
|
|
5
|
+
## What the OSS core includes today
|
|
6
|
+
|
|
7
|
+
- `@martin/contracts`: shared loop, policy, grounding, leash, budget, and rollback types
|
|
8
|
+
- `@martin/core`: the runtime controller, persistence layer, grounding scanner, leash engine, patch-truth scoring, and rollback restoration logic
|
|
9
|
+
- `@martin/adapters`: normalized Claude CLI, Codex CLI, and direct-provider or stub adapter surfaces
|
|
10
|
+
- `@martin/cli`: the local operator CLI for `run`, `inspect`, and `resume`
|
|
11
|
+
- `@martinloop/mcp`: the MCP server surface for `martin_run`, `martin_inspect`, and `martin_status`
|
|
12
|
+
|
|
13
|
+
## What is still outside the initial OSS promise
|
|
14
|
+
|
|
15
|
+
- The root workspace now exposes the `martin-loop` public package facade, and `@martinloop/mcp` now has a standalone tarball shape plus a published-package smoke validated via `pnpm --filter @martinloop/mcp smoke:pack` and `pnpm --filter @martinloop/mcp smoke:published`, but registry publication is still a separate release step.
|
|
16
|
+
- `@martin/contracts`, `@martin/core`, and `@martin/adapters` are still marked `private` in their package manifests.
|
|
17
|
+
- The hosted control-plane and local dashboard remain in the repo, but they are not yet the finalized public OSS boundary.
|
|
18
|
+
- The benchmark harness remains a workspace-only RC surface under `benchmarks/` and is not part of the publishable CLI boundary yet.
|
|
19
|
+
- Final licensing, public package publishing, and managed-product packaging are still gated behind later Phase 13 to Phase 15 work.
|
|
20
|
+
- Internal workspace packages remain non-public release internals unless the release lane explicitly widens that surface.
|
|
21
|
+
|
|
22
|
+
That means this repo is ready for grounded engineering review and RC validation, but it is not yet claiming a finished public OSS release.
|
|
23
|
+
|
|
24
|
+
## Runtime truth the current core enforces
|
|
25
|
+
|
|
26
|
+
- Explicit policy phases: `GATHER`, `ADMIT`, `PATCH`, `VERIFY`, `RECOVER`, `ESCALATE`, `ABORT`, `HANDOFF`
|
|
27
|
+
- Grounding scans against repo anatomy before success is accepted
|
|
28
|
+
- Blocking leash behavior for unsafe verifier commands, file-scope violations, approval-boundary changes, and secret handling
|
|
29
|
+
- Provenance-aware accounting using `actual`, `estimated`, and `unavailable`
|
|
30
|
+
- Persisted attempt artifacts under `~/.martin/runs/<runId>/artifacts/attempt-XXX/`
|
|
31
|
+
- Patch-truth scoring plus rollback boundary and restore outcome artifacts for discarded or blocked repo-backed attempts
|
|
32
|
+
|
|
33
|
+
## Trust profiles
|
|
34
|
+
|
|
35
|
+
Martin currently exposes these execution profiles:
|
|
36
|
+
|
|
37
|
+
- `strict_local`: safest default for local repo work
|
|
38
|
+
- `ci_safe`: tighter CI-oriented behavior
|
|
39
|
+
- `staging_controlled`: controlled outbound or network allowances with approvals
|
|
40
|
+
- `research_untrusted`: looser network posture for research-oriented runs while still enforcing approval boundaries
|
|
41
|
+
|
|
42
|
+
## Accounting labels
|
|
43
|
+
|
|
44
|
+
Martin keeps cost provenance explicit:
|
|
45
|
+
|
|
46
|
+
- `actual`: reported directly by the provider or adapter settlement
|
|
47
|
+
- `estimated`: derived from pricing logic or modeled usage
|
|
48
|
+
- `unavailable`: the adapter could not produce a trustworthy number
|
|
49
|
+
|
|
50
|
+
Do not collapse those labels when building dashboards, docs, or public claims.
|
|
51
|
+
|
|
52
|
+
## Frozen public launch target
|
|
53
|
+
|
|
54
|
+
The current engineering memo freezes these public-launch targets for release planning:
|
|
55
|
+
|
|
56
|
+
- install target: `npm install martin-loop`
|
|
57
|
+
- CLI target: `npx martin-loop ...`
|
|
58
|
+
- SDK target: `import { MartinLoop } from "martin-loop"`
|
|
59
|
+
- MCP target (publish-ready): `npx @martinloop/mcp`
|
|
60
|
+
|
|
61
|
+
Those runtime targets are implemented in the root package facade and verified through a clean-install smoke test. The MCP target is packaged and verified through a tarball launch smoke test. During the current RC phase, the honest operator path still includes the repo-local workflow documented below and in the quickstart, because public registry publication and broader release packaging remain separate release steps.
|
|
62
|
+
|
|
63
|
+
## Reproducibility
|
|
64
|
+
|
|
65
|
+
From the repo root:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
pnpm install
|
|
69
|
+
pnpm build
|
|
70
|
+
pnpm rc:validate
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
`pnpm rc:validate` runs the current RC matrix in an isolated temp home so fresh-home behavior is checked instead of depending on warmed `~/.martin` state. Use `pnpm rc:validate:install` when you also want the RC run to perform a clean `pnpm install --frozen-lockfile` first.
|
|
74
|
+
|
|
75
|
+
## RC gate commands
|
|
76
|
+
|
|
77
|
+
The current release-candidate gate is:
|
|
78
|
+
|
|
79
|
+
- `pnpm oss:validate`
|
|
80
|
+
- `pnpm public:smoke`
|
|
81
|
+
- `pnpm mcp:published:smoke`
|
|
82
|
+
- `pnpm repo:smoke`
|
|
83
|
+
- `pnpm rc:validate`
|
|
84
|
+
- `pnpm pilot:prep:validate`
|
|
85
|
+
- `pnpm release:matrix:local`
|
|
86
|
+
|
|
87
|
+
`pnpm rc:validate` now includes the machine-checked release-surface audit in addition to the existing build, test, benchmark, provider-path, OSS-boundary, and control-plane checks.
|
|
88
|
+
|
|
89
|
+
## Where to go next
|
|
90
|
+
|
|
91
|
+
- [`docs/oss/QUICKSTART.md`](./QUICKSTART.md) for clone-to-first-run setup
|
|
92
|
+
- [`docs/oss/EXAMPLES.md`](./EXAMPLES.md) for grounded CLI and MCP examples
|
|
93
|
+
- [`docs/oss/OSS-BOUNDARY-REPORT.md`](./OSS-BOUNDARY-REPORT.md) for the current machine-checked OSS boundary and public-surface status
|
|
94
|
+
- [`docs/oss/RELEASE-SURFACE-REPORT.md`](./RELEASE-SURFACE-REPORT.md) for the current machine-checked release-surface audit
|
|
95
|
+
- [`docs/pilot/README.md`](../pilot/README.md) for the pilot-prep package that remains explicitly gated behind Phase 13 completion
|
|
96
|
+
- [`../../README.md`](../../README.md) for the repo-level RC status and workspace map
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"generatedAt": "2026-05-
|
|
2
|
+
"generatedAt": "2026-05-12T17:46:28.520Z",
|
|
3
3
|
"publicSurface": {
|
|
4
4
|
"packageName": "martin-loop",
|
|
5
5
|
"installCommand": "npm install martin-loop",
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
"rcGateCommands": [
|
|
10
10
|
"pnpm oss:validate",
|
|
11
11
|
"pnpm public:smoke",
|
|
12
|
+
"pnpm mcp:published:smoke",
|
|
12
13
|
"pnpm repo:smoke",
|
|
13
14
|
"pnpm rc:validate",
|
|
14
15
|
"pnpm pilot:prep:validate",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Martin Loop Phase 13 Release Surface Audit
|
|
2
2
|
|
|
3
|
-
Generated: 2026-05-
|
|
3
|
+
Generated: 2026-05-12T17:46:28.520Z
|
|
4
4
|
|
|
5
5
|
## Verdict
|
|
6
6
|
**GO**
|
|
@@ -14,6 +14,7 @@ Generated: 2026-05-11T21:47:37.407Z
|
|
|
14
14
|
## RC Gate Commands
|
|
15
15
|
- `pnpm oss:validate`
|
|
16
16
|
- `pnpm public:smoke`
|
|
17
|
+
- `pnpm mcp:published:smoke`
|
|
17
18
|
- `pnpm repo:smoke`
|
|
18
19
|
- `pnpm rc:validate`
|
|
19
20
|
- `pnpm pilot:prep:validate`
|