martin-loop 0.1.5 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/CODE_OF_CONDUCT.md +32 -0
  2. package/LICENSE +21 -21
  3. package/README.md +307 -398
  4. package/demo/seeded-workspace/README.md +35 -35
  5. package/demo/seeded-workspace/TASKS.md +29 -29
  6. package/demo/seeded-workspace/martin.config.yaml +11 -11
  7. package/demo/seeded-workspace/package.json +8 -8
  8. package/demo/seeded-workspace/src/invoice-summary.js +11 -11
  9. package/demo/seeded-workspace/test/invoice-summary.test.js +20 -20
  10. package/dist/bin/martin-loop.js +0 -0
  11. package/dist/vendor/adapters/counter.d.ts +1 -0
  12. package/dist/vendor/adapters/counter.js +4 -0
  13. package/dist/vendor/adapters/git-baseline.d.ts +50 -0
  14. package/dist/vendor/adapters/git-baseline.js +233 -0
  15. package/dist/vendor/adapters/openrouter-adapter.d.ts +15 -0
  16. package/dist/vendor/adapters/openrouter-adapter.js +302 -0
  17. package/dist/vendor/adapters/usage.d.ts +48 -0
  18. package/dist/vendor/adapters/usage.js +66 -0
  19. package/dist/vendor/cli/bin/exit.d.ts +12 -0
  20. package/dist/vendor/cli/bin/exit.js +28 -0
  21. package/dist/vendor/cli/commands/analyze.d.ts +5 -0
  22. package/dist/vendor/cli/commands/analyze.js +58 -0
  23. package/dist/vendor/cli/commands/audit-log-verify.d.ts +34 -0
  24. package/dist/vendor/cli/commands/audit-log-verify.js +99 -0
  25. package/dist/vendor/cli/commands/audit.d.ts +8 -0
  26. package/dist/vendor/cli/commands/audit.js +199 -0
  27. package/dist/vendor/cli/commands/corpus.d.ts +5 -0
  28. package/dist/vendor/cli/commands/corpus.js +60 -0
  29. package/dist/vendor/cli/commands/doctor.d.ts +8 -0
  30. package/dist/vendor/cli/commands/doctor.js +219 -0
  31. package/dist/vendor/cli/commands/explain.d.ts +17 -0
  32. package/dist/vendor/cli/commands/explain.js +176 -0
  33. package/dist/vendor/cli/commands/export.d.ts +5 -0
  34. package/dist/vendor/cli/commands/export.js +60 -0
  35. package/dist/vendor/cli/commands/governance.d.ts +8 -0
  36. package/dist/vendor/cli/commands/governance.js +95 -0
  37. package/dist/vendor/cli/commands/improve.d.ts +18 -0
  38. package/dist/vendor/cli/commands/improve.js +396 -0
  39. package/dist/vendor/cli/commands/init.d.ts +8 -0
  40. package/dist/vendor/cli/commands/init.js +281 -0
  41. package/dist/vendor/cli/commands/migration.d.ts +8 -0
  42. package/dist/vendor/cli/commands/migration.js +67 -0
  43. package/dist/vendor/cli/commands/prior.d.ts +23 -0
  44. package/dist/vendor/cli/commands/prior.js +145 -0
  45. package/dist/vendor/cli/commands/resume.d.ts +21 -0
  46. package/dist/vendor/cli/commands/resume.js +73 -0
  47. package/dist/vendor/cli/commands/verify.d.ts +6 -0
  48. package/dist/vendor/cli/commands/verify.js +43 -0
  49. package/dist/vendor/cli/research/public-corpus.d.ts +43 -0
  50. package/dist/vendor/cli/research/public-corpus.js +151 -0
  51. package/dist/vendor/cli/ui/error-card.d.ts +38 -0
  52. package/dist/vendor/cli/ui/error-card.js +103 -0
  53. package/dist/vendor/cli/ui/mission-brief.d.ts +41 -0
  54. package/dist/vendor/cli/ui/mission-brief.js +173 -0
  55. package/dist/vendor/cli/ui/summary-card.d.ts +34 -0
  56. package/dist/vendor/cli/ui/summary-card.js +102 -0
  57. package/dist/vendor/contracts/audit.d.ts +46 -0
  58. package/dist/vendor/contracts/audit.js +360 -0
  59. package/dist/vendor/contracts/post-phase15.d.ts +240 -0
  60. package/dist/vendor/contracts/post-phase15.js +166 -0
  61. package/dist/vendor/core/agent/mandates.d.ts +46 -0
  62. package/dist/vendor/core/agent/mandates.js +178 -0
  63. package/dist/vendor/core/agent/receipts.d.ts +38 -0
  64. package/dist/vendor/core/agent/receipts.js +131 -0
  65. package/dist/vendor/core/agent/signing.d.ts +17 -0
  66. package/dist/vendor/core/agent/signing.js +91 -0
  67. package/dist/vendor/core/attestation/sign.d.ts +25 -0
  68. package/dist/vendor/core/attestation/sign.js +216 -0
  69. package/dist/vendor/core/autonomy/autonomous-promotion.d.ts +120 -0
  70. package/dist/vendor/core/autonomy/autonomous-promotion.js +346 -0
  71. package/dist/vendor/core/autonomy/envelope-v2.d.ts +29 -0
  72. package/dist/vendor/core/autonomy/envelope-v2.js +60 -0
  73. package/dist/vendor/core/autonomy/envelope.d.ts +17 -0
  74. package/dist/vendor/core/autonomy/envelope.js +27 -0
  75. package/dist/vendor/core/autonomy/escalation-ledger.d.ts +20 -0
  76. package/dist/vendor/core/autonomy/escalation-ledger.js +18 -0
  77. package/dist/vendor/core/autonomy/resume.d.ts +15 -0
  78. package/dist/vendor/core/autonomy/resume.js +23 -0
  79. package/dist/vendor/core/circuit/circuit-breaker.d.ts +60 -0
  80. package/dist/vendor/core/circuit/circuit-breaker.js +143 -0
  81. package/dist/vendor/core/context-distillation.d.ts +3 -0
  82. package/dist/vendor/core/context-distillation.js +44 -0
  83. package/dist/vendor/core/context-flow/compile-context.d.ts +8 -0
  84. package/dist/vendor/core/context-flow/compile-context.js +111 -0
  85. package/dist/vendor/core/context-flow/entities.d.ts +2 -0
  86. package/dist/vendor/core/context-flow/entities.js +44 -0
  87. package/dist/vendor/core/context-flow/evaluate-policy.d.ts +2 -0
  88. package/dist/vendor/core/context-flow/evaluate-policy.js +42 -0
  89. package/dist/vendor/core/context-flow/index.d.ts +11 -0
  90. package/dist/vendor/core/context-flow/index.js +24 -0
  91. package/dist/vendor/core/context-flow/labels.d.ts +3 -0
  92. package/dist/vendor/core/context-flow/labels.js +17 -0
  93. package/dist/vendor/core/context-flow/normalizer.d.ts +9 -0
  94. package/dist/vendor/core/context-flow/normalizer.js +69 -0
  95. package/dist/vendor/core/context-flow/profiles.d.ts +33 -0
  96. package/dist/vendor/core/context-flow/profiles.js +36 -0
  97. package/dist/vendor/core/context-flow/redaction.d.ts +1 -0
  98. package/dist/vendor/core/context-flow/redaction.js +6 -0
  99. package/dist/vendor/core/context-flow/sensitivity.d.ts +2 -0
  100. package/dist/vendor/core/context-flow/sensitivity.js +27 -0
  101. package/dist/vendor/core/context-flow/sync-preview.d.ts +2 -0
  102. package/dist/vendor/core/context-flow/sync-preview.js +22 -0
  103. package/dist/vendor/core/context-flow/token-estimator.d.ts +3 -0
  104. package/dist/vendor/core/context-flow/token-estimator.js +13 -0
  105. package/dist/vendor/core/context-flow/types.d.ts +91 -0
  106. package/dist/vendor/core/context-flow/types.js +2 -0
  107. package/dist/vendor/core/context-utility.d.ts +47 -0
  108. package/dist/vendor/core/context-utility.js +405 -0
  109. package/dist/vendor/core/cost/pipeline.d.ts +92 -0
  110. package/dist/vendor/core/cost/pipeline.js +141 -0
  111. package/dist/vendor/core/cost/tagged-cost.d.ts +27 -0
  112. package/dist/vendor/core/cost/tagged-cost.js +55 -0
  113. package/dist/vendor/core/cost-governor.d.ts +2 -0
  114. package/dist/vendor/core/cost-governor.js +50 -0
  115. package/dist/vendor/core/cve/cve-check.d.ts +80 -0
  116. package/dist/vendor/core/cve/cve-check.js +172 -0
  117. package/dist/vendor/core/digital-twin/index.d.ts +27 -0
  118. package/dist/vendor/core/digital-twin/index.js +90 -0
  119. package/dist/vendor/core/drift/drift-graph.d.ts +47 -0
  120. package/dist/vendor/core/drift/drift-graph.js +100 -0
  121. package/dist/vendor/core/drift/objective-lock.d.ts +69 -0
  122. package/dist/vendor/core/drift/objective-lock.js +88 -0
  123. package/dist/vendor/core/drift/scope.d.ts +46 -0
  124. package/dist/vendor/core/drift/scope.js +102 -0
  125. package/dist/vendor/core/drift/signature-lock.d.ts +48 -0
  126. package/dist/vendor/core/drift/signature-lock.js +202 -0
  127. package/dist/vendor/core/drift/stale-proof-gate.d.ts +21 -0
  128. package/dist/vendor/core/drift/stale-proof-gate.js +19 -0
  129. package/dist/vendor/core/eval/known-bad-world-runner.d.ts +24 -0
  130. package/dist/vendor/core/eval/known-bad-world-runner.js +256 -0
  131. package/dist/vendor/core/evidence/claim-audit.d.ts +18 -0
  132. package/dist/vendor/core/evidence/claim-audit.js +89 -0
  133. package/dist/vendor/core/exit-intelligence.d.ts +2 -0
  134. package/dist/vendor/core/exit-intelligence.js +58 -0
  135. package/dist/vendor/core/explain/formatter.d.ts +42 -0
  136. package/dist/vendor/core/explain/formatter.js +171 -0
  137. package/dist/vendor/core/explain/timeline.d.ts +29 -0
  138. package/dist/vendor/core/explain/timeline.js +213 -0
  139. package/dist/vendor/core/failure-taxonomy.d.ts +2 -0
  140. package/dist/vendor/core/failure-taxonomy.js +76 -0
  141. package/dist/vendor/core/gateway/index.d.ts +10 -0
  142. package/dist/vendor/core/gateway/index.js +12 -0
  143. package/dist/vendor/core/gateway/registry.d.ts +40 -0
  144. package/dist/vendor/core/gateway/registry.js +97 -0
  145. package/dist/vendor/core/gateway/transport.d.ts +31 -0
  146. package/dist/vendor/core/gateway/transport.js +82 -0
  147. package/dist/vendor/core/gateway/vault.d.ts +19 -0
  148. package/dist/vendor/core/gateway/vault.js +29 -0
  149. package/dist/vendor/core/graph/adapters.d.ts +43 -0
  150. package/dist/vendor/core/graph/adapters.js +91 -0
  151. package/dist/vendor/core/graph/hotspots.d.ts +22 -0
  152. package/dist/vendor/core/graph/hotspots.js +30 -0
  153. package/dist/vendor/core/graph/index.d.ts +1 -0
  154. package/dist/vendor/core/graph/index.js +2 -0
  155. package/dist/vendor/core/honey/honey-tokens.d.ts +32 -0
  156. package/dist/vendor/core/honey/honey-tokens.js +44 -0
  157. package/dist/vendor/core/index.d.ts +2 -2
  158. package/dist/vendor/core/index.js +38 -12
  159. package/dist/vendor/core/learning/bayesian-update.d.ts +31 -0
  160. package/dist/vendor/core/learning/bayesian-update.js +60 -0
  161. package/dist/vendor/core/learning/prior-sets.d.ts +42 -0
  162. package/dist/vendor/core/learning/prior-sets.js +111 -0
  163. package/dist/vendor/core/learning/promotion-gate.d.ts +17 -0
  164. package/dist/vendor/core/learning/promotion-gate.js +23 -0
  165. package/dist/vendor/core/leash/blast-radius.d.ts +42 -0
  166. package/dist/vendor/core/leash/blast-radius.js +156 -0
  167. package/dist/vendor/core/leash/policy-leash.d.ts +31 -0
  168. package/dist/vendor/core/leash/policy-leash.js +117 -0
  169. package/dist/vendor/core/memo/memo.d.ts +63 -0
  170. package/dist/vendor/core/memo/memo.js +97 -0
  171. package/dist/vendor/core/memory/learning-pipeline.d.ts +154 -0
  172. package/dist/vendor/core/memory/learning-pipeline.js +391 -0
  173. package/dist/vendor/core/memory/palace.d.ts +84 -0
  174. package/dist/vendor/core/memory/palace.js +379 -0
  175. package/dist/vendor/core/merge/ast-merge.d.ts +22 -0
  176. package/dist/vendor/core/merge/ast-merge.js +350 -0
  177. package/dist/vendor/core/merge/text-merge.d.ts +12 -0
  178. package/dist/vendor/core/merge/text-merge.js +182 -0
  179. package/dist/vendor/core/otel/tracer.d.ts +45 -0
  180. package/dist/vendor/core/otel/tracer.js +116 -0
  181. package/dist/vendor/core/parallel/parallel-attempts.d.ts +28 -0
  182. package/dist/vendor/core/parallel/parallel-attempts.js +41 -0
  183. package/dist/vendor/core/parallel/scorer.d.ts +24 -0
  184. package/dist/vendor/core/parallel/scorer.js +65 -0
  185. package/dist/vendor/core/pattern-detection.d.ts +64 -0
  186. package/dist/vendor/core/pattern-detection.js +108 -0
  187. package/dist/vendor/core/persistence/checkpoint.d.ts +44 -0
  188. package/dist/vendor/core/persistence/checkpoint.js +156 -0
  189. package/dist/vendor/core/persistence/cleanup.d.ts +22 -0
  190. package/dist/vendor/core/persistence/cleanup.js +131 -0
  191. package/dist/vendor/core/persistence/index.d.ts +2 -0
  192. package/dist/vendor/core/persistence/index.js +1 -0
  193. package/dist/vendor/core/persistence/runs-reader.d.ts +52 -0
  194. package/dist/vendor/core/persistence/runs-reader.js +84 -0
  195. package/dist/vendor/core/persistence/store.d.ts +6 -1
  196. package/dist/vendor/core/persistence/store.js +5 -0
  197. package/dist/vendor/core/policy/file-touch-quota.d.ts +60 -0
  198. package/dist/vendor/core/policy/file-touch-quota.js +105 -0
  199. package/dist/vendor/core/policy/policy-loader.d.ts +30 -0
  200. package/dist/vendor/core/policy/policy-loader.js +170 -0
  201. package/dist/vendor/core/policy/policy-schema.d.ts +55 -0
  202. package/dist/vendor/core/policy/policy-schema.js +78 -0
  203. package/dist/vendor/core/probe/probe.d.ts +49 -0
  204. package/dist/vendor/core/probe/probe.js +115 -0
  205. package/dist/vendor/core/proof/patch-proof.d.ts +58 -0
  206. package/dist/vendor/core/proof/patch-proof.js +84 -0
  207. package/dist/vendor/core/proof/semantic-probe.d.ts +25 -0
  208. package/dist/vendor/core/proof/semantic-probe.js +82 -0
  209. package/dist/vendor/core/recovery/failure-mode-runner.d.ts +29 -0
  210. package/dist/vendor/core/recovery/failure-mode-runner.js +39 -0
  211. package/dist/vendor/core/red-blue/red-phase.d.ts +64 -0
  212. package/dist/vendor/core/red-blue/red-phase.js +141 -0
  213. package/dist/vendor/core/red-blue/risk-tiers.d.ts +22 -0
  214. package/dist/vendor/core/red-blue/risk-tiers.js +33 -0
  215. package/dist/vendor/core/replay/replay.d.ts +85 -0
  216. package/dist/vendor/core/replay/replay.js +109 -0
  217. package/dist/vendor/core/router/engine.d.ts +54 -0
  218. package/dist/vendor/core/router/engine.js +131 -0
  219. package/dist/vendor/core/router/index.d.ts +1 -0
  220. package/dist/vendor/core/router/index.js +2 -0
  221. package/dist/vendor/core/router/trust-calibration.d.ts +57 -0
  222. package/dist/vendor/core/router/trust-calibration.js +127 -0
  223. package/dist/vendor/core/run-martin.d.ts +2 -0
  224. package/dist/vendor/core/run-martin.js +287 -0
  225. package/dist/vendor/core/security/cve-scanner.d.ts +62 -0
  226. package/dist/vendor/core/security/cve-scanner.js +178 -0
  227. package/dist/vendor/core/sentinel/efficiency-sentinel.d.ts +29 -0
  228. package/dist/vendor/core/sentinel/efficiency-sentinel.js +30 -0
  229. package/dist/vendor/core/sentinel/progress-guard.d.ts +35 -0
  230. package/dist/vendor/core/sentinel/progress-guard.js +46 -0
  231. package/dist/vendor/core/siem/siem-emitter.d.ts +49 -0
  232. package/dist/vendor/core/siem/siem-emitter.js +157 -0
  233. package/dist/vendor/core/strategy/attempt-brief.d.ts +22 -0
  234. package/dist/vendor/core/strategy/attempt-brief.js +89 -0
  235. package/dist/vendor/core/summarize/diff-summary.d.ts +35 -0
  236. package/dist/vendor/core/summarize/diff-summary.js +204 -0
  237. package/dist/vendor/core/surface-signals.d.ts +21 -0
  238. package/dist/vendor/core/surface-signals.js +139 -0
  239. package/dist/vendor/core/truth/truth-wall.d.ts +51 -0
  240. package/dist/vendor/core/truth/truth-wall.js +69 -0
  241. package/dist/vendor/core/truth-spine.d.ts +26 -0
  242. package/dist/vendor/core/truth-spine.js +62 -0
  243. package/dist/vendor/core/types.d.ts +115 -0
  244. package/dist/vendor/core/types.js +2 -0
  245. package/dist/vendor/core/verification/tiered-verify.d.ts +17 -0
  246. package/dist/vendor/core/verification/tiered-verify.js +29 -0
  247. package/dist/vendor/core/verifier-pyramid.d.ts +32 -0
  248. package/dist/vendor/core/verifier-pyramid.js +111 -0
  249. package/dist/vendor/core/workflow-artifacts.d.ts +99 -0
  250. package/dist/vendor/core/workflow-artifacts.js +668 -0
  251. package/dist/vendor/core/wrap/supervised-run.d.ts +96 -0
  252. package/dist/vendor/core/wrap/supervised-run.js +178 -0
  253. package/docs/assets/cli-animated.svg +139 -0
  254. package/docs/assets/cli-static.svg +34 -0
  255. package/docs/assets/github-hero-v2.svg +23 -0
  256. package/docs/assets/martin-raplph.png.jpg +0 -0
  257. package/docs/assets/martinloop-logo.png +0 -0
  258. package/docs/assets/nvidia-inception-program-light.png +0 -0
  259. package/docs/assets/nvidia-inception-program.png +0 -0
  260. package/docs/assets/phase3c-sidesidebyside-demo.html +228 -0
  261. package/docs/assets/side-by-side.svg +134 -0
  262. package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -142
  263. package/docs/oss/EXAMPLES.md +134 -134
  264. package/docs/oss/OSS-BOUNDARY-REPORT.json +1 -1
  265. package/docs/oss/OSS-BOUNDARY-REPORT.md +1 -1
  266. package/docs/oss/QUICKSTART.md +170 -165
  267. package/docs/oss/RALPH-LOOP-SAFETY.md +113 -113
  268. package/docs/oss/README.md +96 -96
  269. package/docs/oss/RELEASE-SURFACE-REPORT.json +2 -1
  270. package/docs/oss/RELEASE-SURFACE-REPORT.md +2 -1
  271. package/package.json +130 -58
  272. package/docs/distribution/DIRECTORY-SUBMISSIONS.md +0 -89
  273. package/docs/distribution/INTEGRATION-OUTREACH.md +0 -61
  274. package/docs/distribution/UNDER-3-CHALLENGE.md +0 -65
@@ -1,113 +1,113 @@
1
- # Ralph-Style Loop Safety Guide
2
-
3
- Ralph-style loops are useful because they keep trying until a coding task reaches a stopping condition. MartinLoop is not a replacement for that pattern. It is the governance layer that makes the pattern safer to run unattended.
4
-
5
- For install and first-run steps, start with the repo quickstart: [README.md#quick-start](../../README.md#quick-start)
6
-
7
- ## 1. What Ralph-style loops do well
8
-
9
- Ralph-style loops are good at persistence:
10
-
11
- - they retry after a failed attempt
12
- - they keep working toward a concrete objective
13
- - they help teams automate long-running coding tasks that would otherwise need constant supervision
14
-
15
- That persistence is the reason teams use them. The problem is not the existence of the loop. The problem is what happens when the loop keeps running without a clear governance contract.
16
-
17
- ## 2. Where unattended loops fail
18
-
19
- An unattended coding loop can fail in ways that are expensive even when no single attempt looks dramatic on its own:
20
-
21
- - spend keeps accumulating across retries
22
- - verifier failures repeat without a meaningful strategy change
23
- - file edits drift outside the intended task boundary
24
- - the final outcome is hard to audit because the reasoning trail is incomplete
25
- - operators know that the loop stopped, but not whether it stopped for success, safety, or exhaustion
26
-
27
- Those are governance failures, not only model failures.
28
-
29
- ## 3. Why max iterations alone are not enough
30
-
31
- A max-iteration limit is helpful, but it only answers one question: "How many times may this loop try?"
32
-
33
- It does not answer:
34
-
35
- - how much budget can be spent before the next attempt is rejected
36
- - whether the verifier command is safe to run
37
- - whether the patch stayed inside the approved file scope
38
- - whether a failed run left rollback evidence behind
39
- - whether the recorded outcome is trustworthy enough to resume or inspect later
40
-
41
- Iteration caps are one guardrail. They are not a full control layer.
42
-
43
- ## 4. What MartinLoop adds
44
-
45
- MartinLoop governs the loop before, during, and after execution:
46
-
47
- - **Budget governance** rejects work that would exceed the configured spend, token, or iteration envelope
48
- - **Verifier gates** only allow a run to finish as `completed` when the agent result and verification state both pass
49
- - **Safety leash checks** evaluate verifier commands, file boundaries, and approval-sensitive actions before work is accepted
50
- - **Stop reasons** make the final lifecycle state explicit, such as `completed`, `budget_exit`, or `human_escalation`
51
- - **Run records** append JSONL evidence under `~/.martin/runs/` so operators can inspect what happened later
52
- - **Rollback evidence** preserves the recovery boundary for repo-backed runs when persistence is configured
53
-
54
- That is why MartinLoop should be thought of as a companion governance layer around a Ralph-style loop, not an argument against using one.
55
-
56
- ## 5. Example governed run
57
-
58
- ```bash
59
- martin run "fix the auth regression" \
60
- --budget 3.00 \
61
- --soft-limit-usd 2.00 \
62
- --max-iterations 2 \
63
- --verify "pnpm test"
64
- ```
65
-
66
- This changes the operator contract in a few important ways:
67
-
68
- - the next attempt can be rejected before overspend happens
69
- - the run still has to satisfy the verifier
70
- - the final state is inspectable instead of being inferred from logs alone
71
-
72
- ## 6. Example stop reason
73
-
74
- MartinLoop returns an explicit lifecycle state and reason when a run stops:
75
-
76
- ```json
77
- {
78
- "decision": {
79
- "shouldExit": true,
80
- "lifecycleState": "budget_exit",
81
- "status": "exited",
82
- "reason": "Martin exited because the budget governor hit a hard limit."
83
- }
84
- }
85
- ```
86
-
87
- That answer is more useful than "the loop stopped" because it tells the operator whether the run ended for success, safety, or exhaustion.
88
-
89
- ## 7. Example JSONL run record
90
-
91
- Each run appends a JSONL record shaped like:
92
-
93
- ```json
94
- {
95
- "loopId": "loop_example123",
96
- "workspaceId": "ws_demo",
97
- "projectId": "proj_demo",
98
- "status": "exited",
99
- "lifecycleState": "budget_exit",
100
- "budget": {
101
- "maxUsd": 3,
102
- "softLimitUsd": 2,
103
- "maxIterations": 2,
104
- "maxTokens": 20000
105
- },
106
- "metadata": {
107
- "policyProfile": "balanced",
108
- "telemetryDestination": "local-only"
109
- }
110
- }
111
- ```
112
-
113
- The full record can also include attempts, events, verifier outcomes, and persisted artifact references. That is the evidence trail MartinLoop adds around a retrying coding loop.
1
+ # Ralph-Style Loop Safety Guide
2
+
3
+ Ralph-style loops are useful because they keep trying until a coding task reaches a stopping condition. MartinLoop is not a replacement for that pattern. It is the governance layer that makes the pattern safer to run unattended.
4
+
5
+ For install and first-run steps, start with the repo quickstart: [README.md#quick-start](../../README.md#quick-start)
6
+
7
+ ## 1. What Ralph-style loops do well
8
+
9
+ Ralph-style loops are good at persistence:
10
+
11
+ - they retry after a failed attempt
12
+ - they keep working toward a concrete objective
13
+ - they help teams automate long-running coding tasks that would otherwise need constant supervision
14
+
15
+ That persistence is the reason teams use them. The problem is not the existence of the loop. The problem is what happens when the loop keeps running without a clear governance contract.
16
+
17
+ ## 2. Where unattended loops fail
18
+
19
+ An unattended coding loop can fail in ways that are expensive even when no single attempt looks dramatic on its own:
20
+
21
+ - spend keeps accumulating across retries
22
+ - verifier failures repeat without a meaningful strategy change
23
+ - file edits drift outside the intended task boundary
24
+ - the final outcome is hard to audit because the reasoning trail is incomplete
25
+ - operators know that the loop stopped, but not whether it stopped for success, safety, or exhaustion
26
+
27
+ Those are governance failures, not only model failures.
28
+
29
+ ## 3. Why max iterations alone are not enough
30
+
31
+ A max-iteration limit is helpful, but it only answers one question: "How many times may this loop try?"
32
+
33
+ It does not answer:
34
+
35
+ - how much budget can be spent before the next attempt is rejected
36
+ - whether the verifier command is safe to run
37
+ - whether the patch stayed inside the approved file scope
38
+ - whether a failed run left rollback evidence behind
39
+ - whether the recorded outcome is trustworthy enough to resume or inspect later
40
+
41
+ Iteration caps are one guardrail. They are not a full control layer.
42
+
43
+ ## 4. What MartinLoop adds
44
+
45
+ MartinLoop governs the loop before, during, and after execution:
46
+
47
+ - **Budget governance** rejects work that would exceed the configured spend, token, or iteration envelope
48
+ - **Verifier gates** only allow a run to finish as `completed` when the agent result and verification state both pass
49
+ - **Safety leash checks** evaluate verifier commands, file boundaries, and approval-sensitive actions before work is accepted
50
+ - **Stop reasons** make the final lifecycle state explicit, such as `completed`, `budget_exit`, or `human_escalation`
51
+ - **Run records** append JSONL evidence under `~/.martin/runs/` so operators can inspect what happened later
52
+ - **Rollback evidence** preserves the recovery boundary for repo-backed runs when persistence is configured
53
+
54
+ That is why MartinLoop should be thought of as a companion governance layer around a Ralph-style loop, not an argument against using one.
55
+
56
+ ## 5. Example governed run
57
+
58
+ ```bash
59
+ martin run "fix the auth regression" \
60
+ --budget 3.00 \
61
+ --soft-limit-usd 2.00 \
62
+ --max-iterations 2 \
63
+ --verify "pnpm test"
64
+ ```
65
+
66
+ This changes the operator contract in a few important ways:
67
+
68
+ - the next attempt can be rejected before overspend happens
69
+ - the run still has to satisfy the verifier
70
+ - the final state is inspectable instead of being inferred from logs alone
71
+
72
+ ## 6. Example stop reason
73
+
74
+ MartinLoop returns an explicit lifecycle state and reason when a run stops:
75
+
76
+ ```json
77
+ {
78
+ "decision": {
79
+ "shouldExit": true,
80
+ "lifecycleState": "budget_exit",
81
+ "status": "exited",
82
+ "reason": "Martin exited because the budget governor hit a hard limit."
83
+ }
84
+ }
85
+ ```
86
+
87
+ That answer is more useful than "the loop stopped" because it tells the operator whether the run ended for success, safety, or exhaustion.
88
+
89
+ ## 7. Example JSONL run record
90
+
91
+ Each run appends a JSONL record shaped like:
92
+
93
+ ```json
94
+ {
95
+ "loopId": "loop_example123",
96
+ "workspaceId": "ws_demo",
97
+ "projectId": "proj_demo",
98
+ "status": "exited",
99
+ "lifecycleState": "budget_exit",
100
+ "budget": {
101
+ "maxUsd": 3,
102
+ "softLimitUsd": 2,
103
+ "maxIterations": 2,
104
+ "maxTokens": 20000
105
+ },
106
+ "metadata": {
107
+ "policyProfile": "balanced",
108
+ "telemetryDestination": "local-only"
109
+ }
110
+ }
111
+ ```
112
+
113
+ The full record can also include attempts, events, verifier outcomes, and persisted artifact references. That is the evidence trail MartinLoop adds around a retrying coding loop.
@@ -1,96 +1,96 @@
1
- # Martin OSS Core
2
-
3
- Martin Loop is a governed AI coding-loop runtime. The core runtime is real and verified through the Phase 12 certification gate; the repo is now in Phase 13 release-candidate engineering, which means the focus is reproducibility, OSS boundary cleanup, and pilot readiness rather than new feature invention.
4
-
5
- ## What the OSS core includes today
6
-
7
- - `@martin/contracts`: shared loop, policy, grounding, leash, budget, and rollback types
8
- - `@martin/core`: the runtime controller, persistence layer, grounding scanner, leash engine, patch-truth scoring, and rollback restoration logic
9
- - `@martin/adapters`: normalized Claude CLI, Codex CLI, and direct-provider or stub adapter surfaces
10
- - `@martin/cli`: the local operator CLI for `run`, `inspect`, and `resume`
11
- - `@martinloop/mcp`: the MCP server surface for `martin_run`, `martin_inspect`, and `martin_status`
12
-
13
- ## What is still outside the initial OSS promise
14
-
15
- - The root workspace now exposes the `martin-loop` public package facade, and `@martinloop/mcp` now has a standalone tarball shape validated via `pnpm --filter @martinloop/mcp smoke:pack`, but registry publication is still a separate release step.
16
- - `@martin/contracts`, `@martin/core`, and `@martin/adapters` are still marked `private` in their package manifests.
17
- - The hosted control-plane and local dashboard remain in the repo, but they are not yet the finalized public OSS boundary.
18
- - The benchmark harness remains a workspace-only RC surface under `benchmarks/` and is not part of the publishable CLI boundary yet.
19
- - Final licensing, public package publishing, and managed-product packaging are still gated behind later Phase 13 to Phase 15 work.
20
-
21
- That means this repo is ready for grounded engineering review and RC validation, but it is not yet claiming a finished public OSS release.
22
-
23
- ## Runtime truth the current core enforces
24
-
25
- - Explicit policy phases: `GATHER`, `ADMIT`, `PATCH`, `VERIFY`, `RECOVER`, `ESCALATE`, `ABORT`, `HANDOFF`
26
- - Grounding scans against repo anatomy before success is accepted
27
- - Blocking leash behavior for unsafe verifier commands, file-scope violations, approval-boundary changes, and secret handling
28
- - Provenance-aware accounting using `actual`, `estimated`, and `unavailable`
29
- - Persisted attempt artifacts under `~/.martin/runs/<runId>/artifacts/attempt-XXX/`
30
- - Patch-truth scoring plus rollback boundary and restore outcome artifacts for discarded or blocked repo-backed attempts
31
-
32
- ## Trust profiles
33
-
34
- Martin currently exposes these execution profiles:
35
-
36
- - `strict_local`: safest default for local repo work
37
- - `ci_safe`: tighter CI-oriented behavior
38
- - `staging_controlled`: controlled outbound or network allowances with approvals
39
- - `research_untrusted`: looser network posture for research-oriented runs while still enforcing approval boundaries
40
-
41
- ## Accounting labels
42
-
43
- Martin keeps cost provenance explicit:
44
-
45
- - `actual`: reported directly by the provider or adapter settlement
46
- - `estimated`: derived from pricing logic or modeled usage
47
- - `unavailable`: the adapter could not produce a trustworthy number
48
-
49
- Do not collapse those labels when building dashboards, docs, or public claims.
50
-
51
- ## Frozen public launch target
52
-
53
- The current engineering memo freezes these public-launch targets for release planning:
54
-
55
- - install target: `npm install martin-loop`
56
- - CLI target: `npx martin-loop ...`
57
- - SDK target: `import { MartinLoop } from "martin-loop"`
58
- - MCP target (publish-ready): `npx @martinloop/mcp`
59
-
60
- Those runtime targets are implemented in the root package facade and verified through a clean-install smoke test. The MCP target is packaged and verified through a tarball launch smoke test. During the current RC phase, the honest operator path still includes the repo-local workflow documented below and in the quickstart, because public registry publication and broader release packaging remain separate release steps.
61
-
62
- ## Reproducibility
63
-
64
- From the repo root:
65
-
66
- ```bash
67
- pnpm install
68
- pnpm build
69
- pnpm rc:validate
70
- ```
71
-
72
- `pnpm rc:validate` runs the current RC matrix in an isolated temp home so fresh-home behavior is checked instead of depending on warmed `~/.martin` state. Use `pnpm rc:validate:install` when you also want the RC run to perform a clean `pnpm install --frozen-lockfile` first.
73
-
74
- ## RC gate commands
75
-
76
- The current release-candidate gate is:
77
-
78
- - `pnpm oss:validate`
79
- - `pnpm public:smoke`
80
- - `pnpm repo:smoke`
81
- - `pnpm rc:validate`
82
- - `pnpm pilot:prep:validate`
83
- - `pnpm release:matrix:local`
84
-
85
- `pnpm rc:validate` now includes the machine-checked release-surface audit in addition to the existing build, test, benchmark, provider-path, OSS-boundary, and control-plane checks.
86
-
87
- ## Where to go next
88
-
89
- - [`docs/oss/QUICKSTART.md`](./QUICKSTART.md) for clone-to-first-run setup
90
- - [`docs/oss/EXAMPLES.md`](./EXAMPLES.md) for grounded CLI and MCP examples
91
- - [`docs/oss/CLAUDE-CODE-WALKTHROUGH.md`](./CLAUDE-CODE-WALKTHROUGH.md) for a Claude Code-specific governed-run walkthrough
92
- - [`docs/oss/RALPH-LOOP-SAFETY.md`](./RALPH-LOOP-SAFETY.md) for a technical guide to governing Ralph-style loops safely
93
- - [`docs/oss/OSS-BOUNDARY-REPORT.md`](./OSS-BOUNDARY-REPORT.md) for the current machine-checked OSS boundary and public-surface status
94
- - [`docs/oss/RELEASE-SURFACE-REPORT.md`](./RELEASE-SURFACE-REPORT.md) for the current machine-checked release-surface audit
95
- - [`docs/pilot/README.md`](../pilot/README.md) for the pilot-prep package that remains explicitly gated behind Phase 13 completion
96
- - [`../../README.md`](../../README.md) for the repo-level RC status and workspace map
1
+ # Martin OSS Core
2
+
3
+ Martin Loop is a governed AI coding-loop runtime. The core runtime is real and verified through the Phase 12 certification gate; the repo is now in the Phase 15 public-release lane, which means the focus is release truth, packaging, and final-gate evidence rather than new feature invention.
4
+
5
+ ## What the OSS core includes today
6
+
7
+ - `@martin/contracts`: shared loop, policy, grounding, leash, budget, and rollback types
8
+ - `@martin/core`: the runtime controller, persistence layer, grounding scanner, leash engine, patch-truth scoring, and rollback restoration logic
9
+ - `@martin/adapters`: normalized Claude CLI, Codex CLI, and direct-provider or stub adapter surfaces
10
+ - `@martin/cli`: the local operator CLI for `run`, `inspect`, and `resume`
11
+ - `@martinloop/mcp`: the MCP server surface for `martin_run`, `martin_inspect`, and `martin_status`
12
+
13
+ ## What is still outside the initial OSS promise
14
+
15
+ - The root workspace now exposes the `martin-loop` public package facade, and `@martinloop/mcp` now has a standalone tarball shape plus a published-package smoke validated via `pnpm --filter @martinloop/mcp smoke:pack` and `pnpm --filter @martinloop/mcp smoke:published`, but registry publication is still a separate release step.
16
+ - `@martin/contracts`, `@martin/core`, and `@martin/adapters` are still marked `private` in their package manifests.
17
+ - The hosted control-plane and local dashboard remain in the repo, but they are not yet the finalized public OSS boundary.
18
+ - The benchmark harness remains a workspace-only RC surface under `benchmarks/` and is not part of the publishable CLI boundary yet.
19
+ - Final licensing, public package publishing, and managed-product packaging are still gated behind later Phase 13 to Phase 15 work.
20
+ - Internal workspace packages remain non-public release internals unless the release lane explicitly widens that surface.
21
+
22
+ That means this repo is ready for grounded engineering review and RC validation, but it is not yet claiming a finished public OSS release.
23
+
24
+ ## Runtime truth the current core enforces
25
+
26
+ - Explicit policy phases: `GATHER`, `ADMIT`, `PATCH`, `VERIFY`, `RECOVER`, `ESCALATE`, `ABORT`, `HANDOFF`
27
+ - Grounding scans against repo anatomy before success is accepted
28
+ - Blocking leash behavior for unsafe verifier commands, file-scope violations, approval-boundary changes, and secret handling
29
+ - Provenance-aware accounting using `actual`, `estimated`, and `unavailable`
30
+ - Persisted attempt artifacts under `~/.martin/runs/<runId>/artifacts/attempt-XXX/`
31
+ - Patch-truth scoring plus rollback boundary and restore outcome artifacts for discarded or blocked repo-backed attempts
32
+
33
+ ## Trust profiles
34
+
35
+ Martin currently exposes these execution profiles:
36
+
37
+ - `strict_local`: safest default for local repo work
38
+ - `ci_safe`: tighter CI-oriented behavior
39
+ - `staging_controlled`: controlled outbound or network allowances with approvals
40
+ - `research_untrusted`: looser network posture for research-oriented runs while still enforcing approval boundaries
41
+
42
+ ## Accounting labels
43
+
44
+ Martin keeps cost provenance explicit:
45
+
46
+ - `actual`: reported directly by the provider or adapter settlement
47
+ - `estimated`: derived from pricing logic or modeled usage
48
+ - `unavailable`: the adapter could not produce a trustworthy number
49
+
50
+ Do not collapse those labels when building dashboards, docs, or public claims.
51
+
52
+ ## Frozen public launch target
53
+
54
+ The current engineering memo freezes these public-launch targets for release planning:
55
+
56
+ - install target: `npm install martin-loop`
57
+ - CLI target: `npx martin-loop ...`
58
+ - SDK target: `import { MartinLoop } from "martin-loop"`
59
+ - MCP target (publish-ready): `npx @martinloop/mcp`
60
+
61
+ Those runtime targets are implemented in the root package facade and verified through a clean-install smoke test. The MCP target is packaged and verified through a tarball launch smoke test. During the current RC phase, the honest operator path still includes the repo-local workflow documented below and in the quickstart, because public registry publication and broader release packaging remain separate release steps.
62
+
63
+ ## Reproducibility
64
+
65
+ From the repo root:
66
+
67
+ ```bash
68
+ pnpm install
69
+ pnpm build
70
+ pnpm rc:validate
71
+ ```
72
+
73
+ `pnpm rc:validate` runs the current RC matrix in an isolated temp home so fresh-home behavior is checked instead of depending on warmed `~/.martin` state. Use `pnpm rc:validate:install` when you also want the RC run to perform a clean `pnpm install --frozen-lockfile` first.
74
+
75
+ ## RC gate commands
76
+
77
+ The current release-candidate gate is:
78
+
79
+ - `pnpm oss:validate`
80
+ - `pnpm public:smoke`
81
+ - `pnpm mcp:published:smoke`
82
+ - `pnpm repo:smoke`
83
+ - `pnpm rc:validate`
84
+ - `pnpm pilot:prep:validate`
85
+ - `pnpm release:matrix:local`
86
+
87
+ `pnpm rc:validate` now includes the machine-checked release-surface audit in addition to the existing build, test, benchmark, provider-path, OSS-boundary, and control-plane checks.
88
+
89
+ ## Where to go next
90
+
91
+ - [`docs/oss/QUICKSTART.md`](./QUICKSTART.md) for clone-to-first-run setup
92
+ - [`docs/oss/EXAMPLES.md`](./EXAMPLES.md) for grounded CLI and MCP examples
93
+ - [`docs/oss/OSS-BOUNDARY-REPORT.md`](./OSS-BOUNDARY-REPORT.md) for the current machine-checked OSS boundary and public-surface status
94
+ - [`docs/oss/RELEASE-SURFACE-REPORT.md`](./RELEASE-SURFACE-REPORT.md) for the current machine-checked release-surface audit
95
+ - [`docs/pilot/README.md`](../pilot/README.md) for the pilot-prep package that remains explicitly gated behind Phase 13 completion
96
+ - [`../../README.md`](../../README.md) for the repo-level RC status and workspace map
@@ -1,5 +1,5 @@
1
1
  {
2
- "generatedAt": "2026-05-11T21:47:37.407Z",
2
+ "generatedAt": "2026-05-12T17:46:28.520Z",
3
3
  "publicSurface": {
4
4
  "packageName": "martin-loop",
5
5
  "installCommand": "npm install martin-loop",
@@ -9,6 +9,7 @@
9
9
  "rcGateCommands": [
10
10
  "pnpm oss:validate",
11
11
  "pnpm public:smoke",
12
+ "pnpm mcp:published:smoke",
12
13
  "pnpm repo:smoke",
13
14
  "pnpm rc:validate",
14
15
  "pnpm pilot:prep:validate",
@@ -1,6 +1,6 @@
1
1
  # Martin Loop Phase 13 Release Surface Audit
2
2
 
3
- Generated: 2026-05-11T21:47:37.407Z
3
+ Generated: 2026-05-12T17:46:28.520Z
4
4
 
5
5
  ## Verdict
6
6
  **GO**
@@ -14,6 +14,7 @@ Generated: 2026-05-11T21:47:37.407Z
14
14
  ## RC Gate Commands
15
15
  - `pnpm oss:validate`
16
16
  - `pnpm public:smoke`
17
+ - `pnpm mcp:published:smoke`
17
18
  - `pnpm repo:smoke`
18
19
  - `pnpm rc:validate`
19
20
  - `pnpm pilot:prep:validate`