martin-loop 0.1.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/CODE_OF_CONDUCT.md +32 -0
  2. package/README.md +172 -227
  3. package/demo/seeded-workspace/README.md +35 -0
  4. package/demo/seeded-workspace/TASKS.md +29 -0
  5. package/demo/seeded-workspace/martin.config.yaml +11 -0
  6. package/demo/seeded-workspace/package.json +8 -0
  7. package/demo/seeded-workspace/src/invoice-summary.js +11 -0
  8. package/demo/seeded-workspace/test/invoice-summary.test.js +20 -0
  9. package/dist/bin/martin-loop.js +0 -0
  10. package/dist/vendor/adapters/claude-cli.d.ts +19 -4
  11. package/dist/vendor/adapters/claude-cli.js +55 -24
  12. package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
  13. package/dist/vendor/adapters/cli-bridge.js +154 -28
  14. package/dist/vendor/adapters/counter.d.ts +1 -0
  15. package/dist/vendor/adapters/counter.js +4 -0
  16. package/dist/vendor/adapters/git-baseline.d.ts +50 -0
  17. package/dist/vendor/adapters/git-baseline.js +233 -0
  18. package/dist/vendor/adapters/index.d.ts +1 -0
  19. package/dist/vendor/adapters/index.js +1 -0
  20. package/dist/vendor/adapters/openrouter-adapter.d.ts +15 -0
  21. package/dist/vendor/adapters/openrouter-adapter.js +302 -0
  22. package/dist/vendor/adapters/usage.d.ts +48 -0
  23. package/dist/vendor/adapters/usage.js +66 -0
  24. package/dist/vendor/adapters/verifier-only.d.ts +7 -0
  25. package/dist/vendor/adapters/verifier-only.js +57 -0
  26. package/dist/vendor/cli/bin/exit.d.ts +12 -0
  27. package/dist/vendor/cli/bin/exit.js +28 -0
  28. package/dist/vendor/cli/commands/analyze.d.ts +5 -0
  29. package/dist/vendor/cli/commands/analyze.js +58 -0
  30. package/dist/vendor/cli/commands/audit-log-verify.d.ts +34 -0
  31. package/dist/vendor/cli/commands/audit-log-verify.js +99 -0
  32. package/dist/vendor/cli/commands/audit.d.ts +8 -0
  33. package/dist/vendor/cli/commands/audit.js +199 -0
  34. package/dist/vendor/cli/commands/corpus.d.ts +5 -0
  35. package/dist/vendor/cli/commands/corpus.js +60 -0
  36. package/dist/vendor/cli/commands/doctor.d.ts +8 -0
  37. package/dist/vendor/cli/commands/doctor.js +219 -0
  38. package/dist/vendor/cli/commands/explain.d.ts +17 -0
  39. package/dist/vendor/cli/commands/explain.js +176 -0
  40. package/dist/vendor/cli/commands/export.d.ts +5 -0
  41. package/dist/vendor/cli/commands/export.js +60 -0
  42. package/dist/vendor/cli/commands/governance.d.ts +8 -0
  43. package/dist/vendor/cli/commands/governance.js +95 -0
  44. package/dist/vendor/cli/commands/improve.d.ts +18 -0
  45. package/dist/vendor/cli/commands/improve.js +396 -0
  46. package/dist/vendor/cli/commands/init.d.ts +8 -0
  47. package/dist/vendor/cli/commands/init.js +281 -0
  48. package/dist/vendor/cli/commands/migration.d.ts +8 -0
  49. package/dist/vendor/cli/commands/migration.js +67 -0
  50. package/dist/vendor/cli/commands/prior.d.ts +23 -0
  51. package/dist/vendor/cli/commands/prior.js +145 -0
  52. package/dist/vendor/cli/commands/resume.d.ts +21 -0
  53. package/dist/vendor/cli/commands/resume.js +73 -0
  54. package/dist/vendor/cli/commands/verify.d.ts +6 -0
  55. package/dist/vendor/cli/commands/verify.js +43 -0
  56. package/dist/vendor/cli/index.d.ts +6 -1
  57. package/dist/vendor/cli/index.js +124 -7
  58. package/dist/vendor/cli/research/public-corpus.d.ts +43 -0
  59. package/dist/vendor/cli/research/public-corpus.js +151 -0
  60. package/dist/vendor/cli/ui/error-card.d.ts +38 -0
  61. package/dist/vendor/cli/ui/error-card.js +103 -0
  62. package/dist/vendor/cli/ui/mission-brief.d.ts +41 -0
  63. package/dist/vendor/cli/ui/mission-brief.js +173 -0
  64. package/dist/vendor/cli/ui/summary-card.d.ts +34 -0
  65. package/dist/vendor/cli/ui/summary-card.js +102 -0
  66. package/dist/vendor/contracts/audit.d.ts +46 -0
  67. package/dist/vendor/contracts/audit.js +360 -0
  68. package/dist/vendor/contracts/index.d.ts +3 -1
  69. package/dist/vendor/contracts/post-phase15.d.ts +240 -0
  70. package/dist/vendor/contracts/post-phase15.js +166 -0
  71. package/dist/vendor/core/agent/mandates.d.ts +46 -0
  72. package/dist/vendor/core/agent/mandates.js +178 -0
  73. package/dist/vendor/core/agent/receipts.d.ts +38 -0
  74. package/dist/vendor/core/agent/receipts.js +131 -0
  75. package/dist/vendor/core/agent/signing.d.ts +17 -0
  76. package/dist/vendor/core/agent/signing.js +91 -0
  77. package/dist/vendor/core/attestation/sign.d.ts +25 -0
  78. package/dist/vendor/core/attestation/sign.js +216 -0
  79. package/dist/vendor/core/autonomy/autonomous-promotion.d.ts +120 -0
  80. package/dist/vendor/core/autonomy/autonomous-promotion.js +346 -0
  81. package/dist/vendor/core/autonomy/envelope-v2.d.ts +29 -0
  82. package/dist/vendor/core/autonomy/envelope-v2.js +60 -0
  83. package/dist/vendor/core/autonomy/envelope.d.ts +17 -0
  84. package/dist/vendor/core/autonomy/envelope.js +27 -0
  85. package/dist/vendor/core/autonomy/escalation-ledger.d.ts +20 -0
  86. package/dist/vendor/core/autonomy/escalation-ledger.js +18 -0
  87. package/dist/vendor/core/autonomy/resume.d.ts +15 -0
  88. package/dist/vendor/core/autonomy/resume.js +23 -0
  89. package/dist/vendor/core/circuit/circuit-breaker.d.ts +60 -0
  90. package/dist/vendor/core/circuit/circuit-breaker.js +143 -0
  91. package/dist/vendor/core/compiler.d.ts +2 -0
  92. package/dist/vendor/core/compiler.js +10 -4
  93. package/dist/vendor/core/context-distillation.d.ts +3 -0
  94. package/dist/vendor/core/context-distillation.js +44 -0
  95. package/dist/vendor/core/context-flow/compile-context.d.ts +8 -0
  96. package/dist/vendor/core/context-flow/compile-context.js +111 -0
  97. package/dist/vendor/core/context-flow/entities.d.ts +2 -0
  98. package/dist/vendor/core/context-flow/entities.js +44 -0
  99. package/dist/vendor/core/context-flow/evaluate-policy.d.ts +2 -0
  100. package/dist/vendor/core/context-flow/evaluate-policy.js +42 -0
  101. package/dist/vendor/core/context-flow/index.d.ts +11 -0
  102. package/dist/vendor/core/context-flow/index.js +24 -0
  103. package/dist/vendor/core/context-flow/labels.d.ts +3 -0
  104. package/dist/vendor/core/context-flow/labels.js +17 -0
  105. package/dist/vendor/core/context-flow/normalizer.d.ts +9 -0
  106. package/dist/vendor/core/context-flow/normalizer.js +69 -0
  107. package/dist/vendor/core/context-flow/profiles.d.ts +33 -0
  108. package/dist/vendor/core/context-flow/profiles.js +36 -0
  109. package/dist/vendor/core/context-flow/redaction.d.ts +1 -0
  110. package/dist/vendor/core/context-flow/redaction.js +6 -0
  111. package/dist/vendor/core/context-flow/sensitivity.d.ts +2 -0
  112. package/dist/vendor/core/context-flow/sensitivity.js +27 -0
  113. package/dist/vendor/core/context-flow/sync-preview.d.ts +2 -0
  114. package/dist/vendor/core/context-flow/sync-preview.js +22 -0
  115. package/dist/vendor/core/context-flow/token-estimator.d.ts +3 -0
  116. package/dist/vendor/core/context-flow/token-estimator.js +13 -0
  117. package/dist/vendor/core/context-flow/types.d.ts +91 -0
  118. package/dist/vendor/core/context-flow/types.js +2 -0
  119. package/dist/vendor/core/context-integrity.d.ts +26 -0
  120. package/dist/vendor/core/context-integrity.js +56 -0
  121. package/dist/vendor/core/context-utility.d.ts +47 -0
  122. package/dist/vendor/core/context-utility.js +405 -0
  123. package/dist/vendor/core/cost/pipeline.d.ts +92 -0
  124. package/dist/vendor/core/cost/pipeline.js +141 -0
  125. package/dist/vendor/core/cost/tagged-cost.d.ts +27 -0
  126. package/dist/vendor/core/cost/tagged-cost.js +55 -0
  127. package/dist/vendor/core/cost-governor.d.ts +2 -0
  128. package/dist/vendor/core/cost-governor.js +50 -0
  129. package/dist/vendor/core/cve/cve-check.d.ts +80 -0
  130. package/dist/vendor/core/cve/cve-check.js +172 -0
  131. package/dist/vendor/core/digital-twin/index.d.ts +27 -0
  132. package/dist/vendor/core/digital-twin/index.js +90 -0
  133. package/dist/vendor/core/drift/drift-graph.d.ts +47 -0
  134. package/dist/vendor/core/drift/drift-graph.js +100 -0
  135. package/dist/vendor/core/drift/objective-lock.d.ts +69 -0
  136. package/dist/vendor/core/drift/objective-lock.js +88 -0
  137. package/dist/vendor/core/drift/scope.d.ts +46 -0
  138. package/dist/vendor/core/drift/scope.js +102 -0
  139. package/dist/vendor/core/drift/signature-lock.d.ts +48 -0
  140. package/dist/vendor/core/drift/signature-lock.js +202 -0
  141. package/dist/vendor/core/drift/stale-proof-gate.d.ts +21 -0
  142. package/dist/vendor/core/drift/stale-proof-gate.js +19 -0
  143. package/dist/vendor/core/eval/known-bad-world-runner.d.ts +24 -0
  144. package/dist/vendor/core/eval/known-bad-world-runner.js +256 -0
  145. package/dist/vendor/core/evidence/claim-audit.d.ts +18 -0
  146. package/dist/vendor/core/evidence/claim-audit.js +89 -0
  147. package/dist/vendor/core/exit-intelligence.d.ts +2 -0
  148. package/dist/vendor/core/exit-intelligence.js +58 -0
  149. package/dist/vendor/core/explain/formatter.d.ts +42 -0
  150. package/dist/vendor/core/explain/formatter.js +171 -0
  151. package/dist/vendor/core/explain/timeline.d.ts +29 -0
  152. package/dist/vendor/core/explain/timeline.js +213 -0
  153. package/dist/vendor/core/failure-taxonomy.d.ts +2 -0
  154. package/dist/vendor/core/failure-taxonomy.js +76 -0
  155. package/dist/vendor/core/gateway/index.d.ts +10 -0
  156. package/dist/vendor/core/gateway/index.js +12 -0
  157. package/dist/vendor/core/gateway/registry.d.ts +40 -0
  158. package/dist/vendor/core/gateway/registry.js +97 -0
  159. package/dist/vendor/core/gateway/transport.d.ts +31 -0
  160. package/dist/vendor/core/gateway/transport.js +82 -0
  161. package/dist/vendor/core/gateway/vault.d.ts +19 -0
  162. package/dist/vendor/core/gateway/vault.js +29 -0
  163. package/dist/vendor/core/graph/adapters.d.ts +43 -0
  164. package/dist/vendor/core/graph/adapters.js +91 -0
  165. package/dist/vendor/core/graph/hotspots.d.ts +22 -0
  166. package/dist/vendor/core/graph/hotspots.js +30 -0
  167. package/dist/vendor/core/graph/index.d.ts +1 -0
  168. package/dist/vendor/core/graph/index.js +2 -0
  169. package/dist/vendor/core/honey/honey-tokens.d.ts +32 -0
  170. package/dist/vendor/core/honey/honey-tokens.js +44 -0
  171. package/dist/vendor/core/index.d.ts +7 -4
  172. package/dist/vendor/core/index.js +222 -64
  173. package/dist/vendor/core/learning/bayesian-update.d.ts +31 -0
  174. package/dist/vendor/core/learning/bayesian-update.js +60 -0
  175. package/dist/vendor/core/learning/prior-sets.d.ts +42 -0
  176. package/dist/vendor/core/learning/prior-sets.js +111 -0
  177. package/dist/vendor/core/learning/promotion-gate.d.ts +17 -0
  178. package/dist/vendor/core/learning/promotion-gate.js +23 -0
  179. package/dist/vendor/core/leash/blast-radius.d.ts +42 -0
  180. package/dist/vendor/core/leash/blast-radius.js +156 -0
  181. package/dist/vendor/core/leash/policy-leash.d.ts +31 -0
  182. package/dist/vendor/core/leash/policy-leash.js +117 -0
  183. package/dist/vendor/core/memo/memo.d.ts +63 -0
  184. package/dist/vendor/core/memo/memo.js +97 -0
  185. package/dist/vendor/core/memory/learning-pipeline.d.ts +154 -0
  186. package/dist/vendor/core/memory/learning-pipeline.js +391 -0
  187. package/dist/vendor/core/memory/palace.d.ts +84 -0
  188. package/dist/vendor/core/memory/palace.js +379 -0
  189. package/dist/vendor/core/merge/ast-merge.d.ts +22 -0
  190. package/dist/vendor/core/merge/ast-merge.js +350 -0
  191. package/dist/vendor/core/merge/text-merge.d.ts +12 -0
  192. package/dist/vendor/core/merge/text-merge.js +182 -0
  193. package/dist/vendor/core/otel/tracer.d.ts +45 -0
  194. package/dist/vendor/core/otel/tracer.js +116 -0
  195. package/dist/vendor/core/parallel/parallel-attempts.d.ts +28 -0
  196. package/dist/vendor/core/parallel/parallel-attempts.js +41 -0
  197. package/dist/vendor/core/parallel/scorer.d.ts +24 -0
  198. package/dist/vendor/core/parallel/scorer.js +65 -0
  199. package/dist/vendor/core/pattern-detection.d.ts +64 -0
  200. package/dist/vendor/core/pattern-detection.js +108 -0
  201. package/dist/vendor/core/persistence/checkpoint.d.ts +44 -0
  202. package/dist/vendor/core/persistence/checkpoint.js +156 -0
  203. package/dist/vendor/core/persistence/cleanup.d.ts +22 -0
  204. package/dist/vendor/core/persistence/cleanup.js +131 -0
  205. package/dist/vendor/core/persistence/index.d.ts +2 -0
  206. package/dist/vendor/core/persistence/index.js +1 -0
  207. package/dist/vendor/core/persistence/runs-reader.d.ts +52 -0
  208. package/dist/vendor/core/persistence/runs-reader.js +84 -0
  209. package/dist/vendor/core/persistence/store.d.ts +6 -1
  210. package/dist/vendor/core/persistence/store.js +5 -0
  211. package/dist/vendor/core/policy/file-touch-quota.d.ts +60 -0
  212. package/dist/vendor/core/policy/file-touch-quota.js +105 -0
  213. package/dist/vendor/core/policy/policy-loader.d.ts +30 -0
  214. package/dist/vendor/core/policy/policy-loader.js +170 -0
  215. package/dist/vendor/core/policy/policy-schema.d.ts +55 -0
  216. package/dist/vendor/core/policy/policy-schema.js +78 -0
  217. package/dist/vendor/core/policy.d.ts +6 -0
  218. package/dist/vendor/core/probe/probe.d.ts +49 -0
  219. package/dist/vendor/core/probe/probe.js +115 -0
  220. package/dist/vendor/core/proof/patch-proof.d.ts +58 -0
  221. package/dist/vendor/core/proof/patch-proof.js +84 -0
  222. package/dist/vendor/core/proof/semantic-probe.d.ts +25 -0
  223. package/dist/vendor/core/proof/semantic-probe.js +82 -0
  224. package/dist/vendor/core/recovery/failure-mode-runner.d.ts +29 -0
  225. package/dist/vendor/core/recovery/failure-mode-runner.js +39 -0
  226. package/dist/vendor/core/red-blue/red-phase.d.ts +64 -0
  227. package/dist/vendor/core/red-blue/red-phase.js +141 -0
  228. package/dist/vendor/core/red-blue/risk-tiers.d.ts +22 -0
  229. package/dist/vendor/core/red-blue/risk-tiers.js +33 -0
  230. package/dist/vendor/core/replay/replay.d.ts +85 -0
  231. package/dist/vendor/core/replay/replay.js +109 -0
  232. package/dist/vendor/core/router/engine.d.ts +54 -0
  233. package/dist/vendor/core/router/engine.js +131 -0
  234. package/dist/vendor/core/router/index.d.ts +1 -0
  235. package/dist/vendor/core/router/index.js +2 -0
  236. package/dist/vendor/core/router/trust-calibration.d.ts +57 -0
  237. package/dist/vendor/core/router/trust-calibration.js +127 -0
  238. package/dist/vendor/core/run-martin.d.ts +2 -0
  239. package/dist/vendor/core/run-martin.js +287 -0
  240. package/dist/vendor/core/security/cve-scanner.d.ts +62 -0
  241. package/dist/vendor/core/security/cve-scanner.js +178 -0
  242. package/dist/vendor/core/sentinel/efficiency-sentinel.d.ts +29 -0
  243. package/dist/vendor/core/sentinel/efficiency-sentinel.js +30 -0
  244. package/dist/vendor/core/sentinel/progress-guard.d.ts +35 -0
  245. package/dist/vendor/core/sentinel/progress-guard.js +46 -0
  246. package/dist/vendor/core/siem/siem-emitter.d.ts +49 -0
  247. package/dist/vendor/core/siem/siem-emitter.js +157 -0
  248. package/dist/vendor/core/strategy/attempt-brief.d.ts +22 -0
  249. package/dist/vendor/core/strategy/attempt-brief.js +89 -0
  250. package/dist/vendor/core/summarize/diff-summary.d.ts +35 -0
  251. package/dist/vendor/core/summarize/diff-summary.js +204 -0
  252. package/dist/vendor/core/surface-signals.d.ts +21 -0
  253. package/dist/vendor/core/surface-signals.js +139 -0
  254. package/dist/vendor/core/truth/truth-wall.d.ts +51 -0
  255. package/dist/vendor/core/truth/truth-wall.js +69 -0
  256. package/dist/vendor/core/truth-spine.d.ts +26 -0
  257. package/dist/vendor/core/truth-spine.js +62 -0
  258. package/dist/vendor/core/types.d.ts +115 -0
  259. package/dist/vendor/core/types.js +2 -0
  260. package/dist/vendor/core/verification/tiered-verify.d.ts +17 -0
  261. package/dist/vendor/core/verification/tiered-verify.js +29 -0
  262. package/dist/vendor/core/verifier-pyramid.d.ts +32 -0
  263. package/dist/vendor/core/verifier-pyramid.js +111 -0
  264. package/dist/vendor/core/workflow-artifacts.d.ts +99 -0
  265. package/dist/vendor/core/workflow-artifacts.js +668 -0
  266. package/dist/vendor/core/wrap/supervised-run.d.ts +96 -0
  267. package/dist/vendor/core/wrap/supervised-run.js +178 -0
  268. package/docs/assets/cli-animated.svg +139 -0
  269. package/docs/assets/cli-static.svg +34 -0
  270. package/docs/assets/github-hero-v2.svg +23 -0
  271. package/docs/assets/martin-raplph.png.jpg +0 -0
  272. package/docs/assets/martinloop-logo.png +0 -0
  273. package/docs/assets/nvidia-inception-program-light.png +0 -0
  274. package/docs/assets/nvidia-inception-program.png +0 -0
  275. package/docs/assets/phase3c-sidesidebyside-demo.html +228 -0
  276. package/docs/assets/side-by-side.svg +134 -0
  277. package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -0
  278. package/docs/oss/EXAMPLES.md +9 -1
  279. package/docs/oss/OSS-BOUNDARY-REPORT.json +109 -113
  280. package/docs/oss/OSS-BOUNDARY-REPORT.md +48 -48
  281. package/docs/oss/QUICKSTART.md +39 -4
  282. package/docs/oss/RALPH-LOOP-SAFETY.md +113 -0
  283. package/docs/oss/README.md +7 -4
  284. package/docs/oss/RELEASE-SURFACE-REPORT.json +46 -45
  285. package/docs/oss/RELEASE-SURFACE-REPORT.md +36 -35
  286. package/package.json +129 -49
@@ -0,0 +1,32 @@
1
+ # Code of Conduct
2
+
3
+ MartinLoop is an open-source project for developers building safer autonomous AI coding workflows.
4
+
5
+ We expect contributors and maintainers to keep discussion respectful, technical, and useful.
6
+
7
+ ## Expected behavior
8
+
9
+ - Be respectful and direct
10
+ - Assume good intent
11
+ - Give constructive technical feedback
12
+ - Stay focused on improving the project
13
+ - Help keep autonomous AI coding safer, cheaper, and more inspectable
14
+
15
+ ## Unacceptable behavior
16
+
17
+ - Harassment or abusive comments
18
+ - Personal attacks
19
+ - Publishing private information
20
+ - Spam or fake engagement
21
+ - Submitting secrets, credentials, or unsafe examples
22
+ - Encouraging unsafe agent behavior that can spend money or mutate files without clear warnings
23
+
24
+ ## Enforcement
25
+
26
+ Maintainers may remove comments, close issues, block users, or reject contributions that violate this code of conduct.
27
+
28
+ ## Reporting
29
+
30
+ Report concerns privately to:
31
+
32
+ keesan@martinloop.com
package/README.md CHANGED
@@ -1,237 +1,143 @@
1
- <div align="center">
1
+ # Martin Loop
2
2
 
3
- <img src="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/martinloop-logo.png" alt="MartinLoop" width="260">
3
+ Governed runtime for AI coding agents with budgets, policy gates, rollback evidence, and auditable run records.
4
4
 
5
- ### A governed runtime for autonomous AI coding agents. ⭐⭐⭐
5
+ ![Martin Loop CLI release surface](./docs/assets/marketing/github-readme-cli.svg)
6
6
 
7
- [![License: MIT](https://img.shields.io/badge/license-MIT-7c3aed?style=flat-square)](./LICENSE)
8
- [![TypeScript](https://img.shields.io/badge/TypeScript-strict-3178c6?style=flat-square&logo=typescript&logoColor=white)](./tsconfig.base.json)
9
- [![Node](https://img.shields.io/badge/node-%3E%3D20-3c873a?style=flat-square&logo=nodedotjs&logoColor=white)](#quick-start)
10
- [![npm](https://img.shields.io/badge/npm-martin--loop-cc3534?style=flat-square&logo=npm&logoColor=white)](https://www.npmjs.com/package/martin-loop)
7
+ <picture>
8
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/nvidia-inception-program.png" />
9
+ <img src="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/nvidia-inception-program-light.png" alt="NVIDIA Inception Program logo" width="280" />
10
+ </picture>
11
11
 
12
- <br>
12
+ Martin Loop has been accepted into the NVIDIA Inception program.
13
13
 
14
- **Your overnight AI pipeline estimated $2.40.**
15
- **You woke up to a $65 bill.**
16
- <br> 47 retries. No hard stop. No rollback. No audit trail. Nothing merged.
17
- MartinLoop exists so that never happens again.✅ <br> <br>
18
- If you think autonomous AI coding agents need budgets, brakes, and receipts, ⭐ the repo so more builders can find it.
19
- <br>
14
+ [![License](https://img.shields.io/badge/license-MIT-blue)](./LICENSE)
15
+ [![TypeScript](https://img.shields.io/badge/language-TypeScript-3178c6)](./tsconfig.base.json)
16
+ [![Node](https://img.shields.io/badge/node-%3E%3D20-green)](./package.json)
20
17
 
21
- > AI coding agents are useful. Unbounded retry loops are not.
22
- >
23
- > MartinLoop wraps agent runs with budgets, policy checks, verifier gates, rollback evidence, and inspectable run records.
24
- <br>
25
- <img src="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/cli-animated.svg" alt="MartinLoop CLI — governed agent run" width="720">
18
+ AI coding agents can write files, run commands, spend provider budget, and leave behind hard-to-review state. Martin Loop wraps those actions in a runtime that answers five questions before you trust the result:
26
19
 
27
- </div>
20
+ - What is the task allowed to touch?
21
+ - What is the hard budget?
22
+ - Which safety and policy gates ran?
23
+ - Did the verifier actually execute?
24
+ - Where is the evidence if the result needs review, rollback, or resume?
28
25
 
29
- ---
26
+ Current public wording should describe Martin Loop as a **bounded governed AI-coding runtime**, not as a hands-off unrestricted production developer or broadly self-updating system. Trace-autonomy certification exists as a gate, but public trace-autonomy wording remains blocked until the signed evidence pack and external audit signoff pass.
30
27
 
31
- ## The Problem
28
+ ## Release State
32
29
 
33
- A typical autonomous coding loop keeps attempting work until tests pass. Without a governance layer, that loop can keep spending, mutate files outside the intended scope, lose track of why it failed, and leave teams without a clean audit trail.
30
+ - Phase 14 staged pilot: closed
31
+ - Phase 15 public release: active
32
+ - Phase 15 adds release-specific truth, packaging, and final-gate checks on top of this baseline.
34
33
 
35
- Ralph-style loops are powerful but they attempt ➡️ check ➡️ retry ➡️ repeat, with no strong answer to:
34
+ ## Why It Exists
36
35
 
37
- - What changed?
38
- - What did it cost?
39
- - Why was it allowed?
40
- - Why did it stop?
41
- - Can we inspect or resume it later?
42
-
43
- MartinLoop governs the failure mode.
44
-
45
- ---
46
-
47
- ## The Solution
48
-
49
- ✅ Martin Loop wraps AI coding loops with a governance layer.
50
-
51
- It does not try to replace the agent pattern. It makes that pattern safe to run.
52
-
53
- ### What MartinLoop Does Today
54
-
55
- | Capability | Current behavior |
36
+ | Risk in agentic coding loops | Martin Loop control |
56
37
  |---|---|
57
- | Budget governance | Enforces `maxUsd`, `softLimitUsd`, `maxIterations`, and `maxTokens`; rejects attempts projected to exceed remaining budget and exits on budget or iteration exhaustion. Hard USD budget caps that stop work before the next attempt breaches policy. |
58
- | Verifier gate | A run only reaches `completed` when the adapter result and verifier state pass. Unsafe verifier commands are blocked before agent execution. |
59
- | Failure taxonomy | Classifies failures across 11 current classes, including hallucination, test regression, scope creep, repo grounding failure, environment mismatch, and budget pressure, that distinguishes real success from unsafe, invalid, or terminal behavior.|
60
- | Safety leash | Evaluates verifier commands, file scope, dependency or migration changes that require approval, and secret-like values in task text. **Policy-as-code**. |
61
- | Rollback evidence | Captures rollback boundaries and restore outcomes for repo-backed attempts when a persistence store is configured. |
62
- | Context distillation | Carries a distilled summary of recent attempts and remaining constraints into subsequent attempts. |
63
- | Run records | The CLI appends JSONL loop records under `~/.martin/runs/<workspaceId>.jsonl`; lower-level stores can also persist contracts, ledgers, and attempt artifacts.
64
-
65
-
66
- ⭐The result is a runtime that can complete good work, refuse unsafe work, stop uneconomical work, and leave evidence behind.✅
67
- ---
68
-
69
- ## The Ralph Loop, explained
70
-
71
- **"Everybody has gotten infatuated with what we call these Ralph Wiggum loops, just like send the thing off and it'll just go figure something out..A, It never figures anything out. And B, you just get this ginormous bill...**" - Chamath Palihapitiya, All-In Podcast #263, March 2026
72
-
73
- ⛔ The **Ralph Loop** is the failure mode where an AI coding agent keeps trying without knowing when it should stop.
74
-
75
- The pattern is simple: attempt the task, run checks, retry on failure, repeat. The problem is not that the loop exists. The problem is that most implementations have no hard budget cap, no signed evidence layer, and no pre-execution control system. They know how to keep trying. They do **not** know when continuing is unsafe, uneconomical, or impossible.
76
-
77
- ✅ Martin Loop solves the Ralph Loop problem by enforcing rules **before** damage happens:
78
-
79
- - it stops the next attempt before budget overspend
80
- - it classifies unsafe or invalid actions before execution
81
- - it appends a structured JSONL audit record for every attempt
82
- - it rolls back failed runs instead of leaving broken state behind
83
- - it reduces runaway token growth with context distillation
38
+ | Retry loops can spend without a hard stop | Budget admission checks, soft limits, hard USD caps, iteration caps, and token caps |
39
+ | Agents can mutate the wrong surface | Task contracts, allowed paths, denied paths, policy leashes, and rollback evidence |
40
+ | "Verifier passed" can hide that no verifier ran | Verifier lifecycle state, failure taxonomy, and machine-readable exit reasons |
41
+ | Failure evidence is easy to lose | Canonical run records, ledger events, receipts, and handoff artifacts |
42
+ | Operators need proof before widening claims | Claim linting, release gates, audit packs, and signed autonomy certification artifacts |
84
43
 
85
- If Ralph ever burned $165.70 on your dime, you're in the right place. Martin stopped him at $4.97 with a full audit trail. LFG! 🚀 Finally a Martin Prince leash for Ralph Wiggums! :)
44
+ Martin Loop does not replace Claude, Codex, or other coding agents. It gives those agents an operating envelope.
86
45
 
87
- <div align="center">
88
- <img src="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/martin-raplph.png.jpg" alt="Martin vs Ralph — governed vs ungoverned agent loop" width="240">
89
- </div>
46
+ ![Martin Loop architecture](./docs/assets/architecture.svg)
90
47
 
91
- ### How It Works — Five Layers
48
+ ## What It Does Today
92
49
 
93
- | Layer | What it does |
50
+ | Capability | Current behavior |
94
51
  |---|---|
95
- | **1. Task Contract** | Objective, verifier plan, repo root, allowed/denied paths, acceptance criteria, workspace, project, and budget. |
96
- | **2. Policy & Budget** | Defaults from `martin.config.yaml`; CLI flags override. Budget preflight rejects attempts before execution. |
97
- | **3. Agent Adapters** | Claude CLI, Codex CLI, direct-provider, and stub adapters normalize execution results into the core runtime contract. |
98
- | **4. Safety & Verification** | Verifier commands, file scope, approval-boundary changes, secret-like values, and grounding determine whether work is kept. |
99
- | **5. Persistence** | CLI writes JSONL records under `~/.martin/runs/`. Repo-backed runs can also persist contracts, ledgers, diffs, and rollback artifacts. |
100
-
101
- ---
102
-
103
- ## See It In Action
104
-
105
- Same task, same starting state. MartinLoop completes in one verified attempt at `$2.30`. The uncontrolled loop retries four times, spends `$5.20`, and fails with no audit trail.
106
-
107
- Martin Loop matters because it turns AI coding from an opaque experiment into something that can be governed, replayed, verified, and trusted.
108
-
109
- <div align="center">
110
- <img src="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/side-by-side.svg" alt="Martin vs Ralph — governed vs ungoverned agent loop side-by-side benchmark comparison" width="720" height="1080">
111
- </div>
52
+ | Budget governance | Enforces `maxUsd`, `softLimitUsd`, `maxIterations`, and `maxTokens`; rejects attempts projected to exceed remaining budget. |
53
+ | Safety leash | Blocks unsafe verifier commands, out-of-scope writes, secret-like task text, and policy-restricted surfaces before accepting work. |
54
+ | Adapter execution | Normalizes Claude CLI, Codex CLI, direct-provider, and stub execution into one runtime result contract. |
55
+ | Verification gate | A run reaches `completed` only when adapter execution and verifier state both support the result. |
56
+ | Rollback evidence | Captures rollback boundaries and restore outcomes for repo-backed attempts when persistence is configured. |
57
+ | Trace intelligence | Reads run ledgers and reports loop patterns such as recovery exhaustion, verifier blind spots, oscillation, and budget pressure. |
58
+ | Guarded improvement | Emits improvement evidence and promotion artifacts; trusted-surface promotion remains policy-gated and claim-safe. |
59
+ | MCP package | Provides a standalone `@martinloop/mcp` server with `martin_run`, `martin_inspect`, and `martin_status`. |
112
60
 
61
+ ## Quick Start
113
62
 
114
- Reproducible locally:
63
+ Install the public package:
115
64
 
116
- ```sh
117
- pnpm --filter @martin/benchmarks test
118
- pnpm --filter @martin/benchmarks eval
119
- pnpm --filter @martin/benchmarks eval:phase12
65
+ ```bash
66
+ npm install martin-loop
67
+ npx martin-loop --help
120
68
  ```
121
69
 
122
- ---
70
+ Run a provider-free stub loop first:
123
71
 
124
- ## Quick Start
125
-
126
- ```sh
127
- npm install -g martin-loop
72
+ ```bash
73
+ MARTIN_LIVE=false MARTIN_NO_BRIEF=1 npx martin-loop run --objective "Summarize this repository" --yes
128
74
  ```
129
75
 
130
- This installs both the `martin-loop` package and the `martin` command alias. The package is currently published on npm as version `0.1.2`.
131
-
132
- ### Public Package Surface
133
-
134
- The frozen public package surface for this release candidate is:
76
+ Run a governed task with budget and verifier controls:
135
77
 
136
- - Install target: `npm install martin-loop`
137
- - CLI target: `npx martin-loop`
138
- - SDK target: `import { MartinLoop } from "martin-loop"`
139
-
140
- The `martin` command alias is installed for local operator convenience, but the public CLI surface is `npx martin-loop`.
141
-
142
- ### Run a governed task
143
-
144
- ```sh
145
- martin run "fix the auth regression" \
78
+ ```bash
79
+ npx martin-loop run \
80
+ --objective "Fix the auth regression" \
146
81
  --budget 3.00 \
147
- --verify "pnpm test"
82
+ --verify "pnpm test" \
83
+ --allow-path "src/**" \
84
+ --deny-path ".env*"
148
85
  ```
149
86
 
150
- You can also pass the objective explicitly:
87
+ Inspect a persisted run record:
151
88
 
152
- ```sh
153
- martin run --objective "fix the auth regression" --budget 3.00 --verify "pnpm test"
89
+ ```bash
90
+ npx martin-loop inspect --file ~/.martin/runs/<workspaceId>.jsonl
154
91
  ```
155
92
 
156
- For a no-spend repo-local dry run, use the stub adapter:
93
+ ## MCP Server
157
94
 
158
- ```powershell
159
- $env:MARTIN_LIVE='false'
160
- pnpm run:cli -- run --objective "Summarize the current runtime state" --verify "pnpm --filter @martin/core test"
161
- Remove-Item Env:MARTIN_LIVE
162
- ```
163
-
164
- ### Inspect or resume runs
95
+ Use the standalone MCP package when an MCP host needs to invoke Martin Loop:
165
96
 
166
- ```sh
167
- martin inspect --file ~/.martin/runs/<workspaceId>.jsonl
168
- martin resume <loopId>
97
+ ```bash
98
+ npx @martinloop/mcp
169
99
  ```
170
100
 
171
- `inspect` prints a portfolio summary for records in the file. `resume` looks up a persisted loop record by ID under `~/.martin/runs/`.
101
+ Claude Code examples:
172
102
 
173
- ---
174
-
175
- ## CLI
103
+ ```bash
104
+ claude mcp add --scope user martin-loop -- npx @martinloop/mcp
105
+ ```
176
106
 
177
- ```text
178
- martin run <objective> [options]
107
+ Windows PowerShell or cmd:
179
108
 
180
- --objective <text> The task to accomplish, or pass it as the first positional arg
181
- --budget <n> Hard cost cap in USD
182
- --budget-usd <n> Alias for --budget
183
- --soft-limit-usd <n> Soft budget threshold in USD
184
- --verify <cmd> Verifier command after each attempt
185
- --max-iterations <n> Maximum number of attempts
186
- --max-tokens <n> Maximum total token budget
187
- --engine <name> Adapter to use: claude (default) or codex
188
- --model <name> Override the adapter model
189
- --cwd <path> Repo root for the run
190
- --allow-path <glob> Restrict agent writes to this path pattern; repeatable
191
- --deny-path <glob> Block this path pattern; repeatable
192
- --accept <criterion> Add an acceptance criterion; repeatable
193
- --config <path> Path to a martin.config.yaml file
194
- --workspace <id> Workspace ID for the run record
195
- --project <id> Project ID for the run record
196
- --metadata <key=value> Attach metadata to the run record; repeatable
109
+ ```powershell
110
+ claude mcp add --scope user martin-loop cmd /c "npx @martinloop/mcp"
197
111
  ```
198
112
 
199
- The public CLI also includes `inspect`, `resume`, and a `bench` redirect that points reviewers to the workspace benchmark harness.
113
+ The standalone MCP package is intentionally narrow. It exposes:
200
114
 
201
- <div align="center">
202
- <img src="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/cli-static.svg" alt="MartinLoop CLI terminal output" width="720">
203
- </div>
115
+ - `martin_run`
116
+ - `martin_inspect`
117
+ - `martin_status`
204
118
 
205
- ---
119
+ Official registry publication is a guarded release step. The local package gate is:
206
120
 
207
- ## Policy File
121
+ ```bash
122
+ pnpm --filter @martinloop/mcp test
123
+ pnpm --filter @martinloop/mcp build
124
+ pnpm --filter @martinloop/mcp smoke:pack
125
+ pnpm --filter @martinloop/mcp smoke:published
126
+ ```
208
127
 
209
- Drop a `martin.config.yaml` in your repo root to set governance defaults:
128
+ ## Public Package Surface
210
129
 
211
- ```yaml
212
- budget:
213
- maxUsd: 5.00
214
- softLimitUsd: 3.75
215
- maxIterations: 5
216
- maxTokens: 40000
217
-
218
- governance:
219
- destructiveActionPolicy: approval
220
- telemetryDestination: local-only
221
- verifierRules:
222
- - pnpm test
223
- ```
130
+ Frozen public install targets:
224
131
 
225
- CLI flags override config values when provided.
132
+ - Install: `npm install martin-loop`
133
+ - CLI: `npx martin-loop`
134
+ - SDK: `import { MartinLoop } from "martin-loop"`
135
+ - MCP: `npx @martinloop/mcp`
226
136
 
227
- ---
137
+ The root package facade vendors the runtime, adapters, CLI, SDK, contracts, policy, audit exporter, and HeadlessOS core into `dist/`. Internal workspace package names such as `@martin/core` and `@martin/adapters` are implementation details unless separately published.
228
138
 
229
139
  ## TypeScript SDK
230
140
 
231
- ```sh
232
- npm install martin-loop
233
- ```
234
-
235
141
  ```typescript
236
142
  import {
237
143
  MartinLoop,
@@ -249,7 +155,7 @@ const loop = new MartinLoop({
249
155
  maxUsd: 3.00,
250
156
  softLimitUsd: 2.25,
251
157
  maxIterations: 3,
252
- maxTokens: 20_000
158
+ maxTokens: 20000
253
159
  }
254
160
  }
255
161
  });
@@ -274,72 +180,119 @@ const loop = new MartinLoop({
274
180
  });
275
181
  ```
276
182
 
277
- The lower-level `runMartin` function is also exported for callers that want to assemble the runtime input directly.
183
+ `runMartin` is also exported for callers that want to assemble runtime input directly.
278
184
 
279
- ---
185
+ ## CLI Reference
280
186
 
281
- ## Workspace Map
187
+ ```text
188
+ martin-loop run <objective> [options]
282
189
 
283
- | Package or app | Role |
284
- |---|---|
285
- | `martin-loop` | Root public npm facade that vendors the runtime, CLI, adapters, and contracts into `dist/`. |
286
- | `@martin/contracts` | Shared types for loops, policy, governance, budget, telemetry, and rollback. |
287
- | `@martin/core` | Runtime controller, policy engine, safety leash, grounding, persistence, and rollback logic. |
288
- | `@martin/adapters` | Claude CLI, Codex CLI, direct-provider, and stub adapter surfaces. |
289
- | `@martin/cli` | Local CLI implementation for `run`, `inspect`, `resume`, and the benchmark redirect. |
290
- | `@martin/mcp` | MCP server tools: `martin_run`, `martin_inspect`, and `martin_status`. |
291
- | `benchmarks/` | Workspace-only deterministic benchmark and RC validation harness. |
292
- | `apps/control-plane/` | Hosted control-plane workstream, outside the initial npm package surface. |
293
- | `apps/local-dashboard/` | Local dashboard/read-model viewer, not currently packaged as public npm API. |
190
+ --objective <text> Task to accomplish, or pass it as the first positional arg
191
+ --budget <n> Hard cost cap in USD
192
+ --budget-usd <n> Alias for --budget
193
+ --soft-limit-usd <n> Soft budget threshold in USD
194
+ --verify <cmd> Verifier command after each attempt
195
+ --max-iterations <n> Maximum number of attempts
196
+ --max-tokens <n> Maximum total token budget
197
+ --engine <name> Adapter to use: claude or codex
198
+ --model <name> Adapter model override
199
+ --cwd <path> Repo root for the run
200
+ --allow-path <glob> Restrict agent writes to this path pattern; repeatable
201
+ --deny-path <glob> Block this path pattern; repeatable
202
+ --accept <criterion> Acceptance criterion; repeatable
203
+ --config <path> Path to martin.config.yaml
204
+ --workspace <id> Workspace ID for the run record
205
+ --project <id> Project ID for the run record
206
+ --metadata <key=value> Attach metadata to the run record; repeatable
207
+ ```
294
208
 
295
- The `@martin/core`, `@martin/adapters`, and `@martin/contracts` package manifests are still private workspace packages; the public install target is the root `martin-loop` facade.
209
+ The public CLI also includes `inspect`, `resume`, and a `bench` redirect that points reviewers to the workspace benchmark harness.
210
+
211
+ ## Configuration
296
212
 
297
- ---
213
+ Drop a `martin.config.yaml` in the repo root to set governance defaults:
214
+
215
+ ```yaml
216
+ budget:
217
+ maxUsd: 5.00
218
+ softLimitUsd: 3.75
219
+ maxIterations: 5
220
+ maxTokens: 40000
221
+
222
+ governance:
223
+ destructiveActionPolicy: approval
224
+ telemetryDestination: local-only
225
+ verifierRules:
226
+ - pnpm test
227
+ ```
228
+
229
+ CLI flags override config values when provided.
230
+
231
+ ## Claim Boundaries
232
+
233
+ Martin Loop has implemented guarded learning, signed promotion artifacts, trace-autonomy certification tooling, and fail-closed claim gates. That is not the same as an unrestricted self-modifying production system.
234
+
235
+ Do not use unqualified public wording for full hands-off operation, unrestricted system writes, any-part self-updates, fully self-learning behavior, or improvement claims without scope and evidence qualifiers.
236
+
237
+ The bounded future trace-intelligence claim is allowed only after the trace-autonomy certification gate, signed bundle, required live evidence, and external audit signoff pass.
298
238
 
299
239
  ## Development
300
240
 
301
- Requirements: Node 20+ and pnpm 10.x.
241
+ Requirements:
302
242
 
303
- ```sh
243
+ - Node 20+
244
+ - pnpm 10.x
245
+
246
+ ```bash
304
247
  git clone https://github.com/Keesan12/martin-loop.git
305
248
  cd martin-loop
306
249
  pnpm install
307
-
250
+ pnpm build
308
251
  pnpm test
309
252
  pnpm lint
310
- pnpm build
311
253
  ```
312
254
 
313
- ```md
314
- Current RC gate commands:
255
+ Release and claim gates:
315
256
 
316
- ```sh
257
+ ```bash
317
258
  pnpm oss:validate
259
+ pnpm release:surface:validate
318
260
  pnpm public:smoke
261
+ pnpm mcp:published:smoke
319
262
  pnpm repo:smoke
320
263
  pnpm rc:validate
321
264
  pnpm pilot:prep:validate
265
+ pnpm claims:lint
266
+ pnpm release:gate:review
322
267
  pnpm release:matrix:local
323
- Caution: Registry Publication
324
-
325
- This package is published through the public martin-loop package surface. Treat registry publication as a guarded release step: verify the RC gate commands, confirm the version follows semantic versioning, and document breaking changes before publishing.
326
-
327
- > **Caution:** This package is live on npm. Treat registry publication as a guarded release step — verify the RC gate commands, confirm semantic versioning, and document breaking changes before publishing.
328
-
329
- The repository is organized as a dual-track workspace: the OSS runtime and package facade are present and published, while the hosted control-plane, local dashboard, and benchmark harness remain gated in private workspace for future release rather than the primary npm package API.
268
+ ```
330
269
 
331
- Helpful docs:
270
+ Useful evidence docs:
332
271
 
333
272
  - [OSS quickstart](./docs/oss/QUICKSTART.md)
334
273
  - [OSS examples](./docs/oss/EXAMPLES.md)
335
274
  - [OSS boundary report](./docs/oss/OSS-BOUNDARY-REPORT.md)
336
275
  - [Release surface report](./docs/oss/RELEASE-SURFACE-REPORT.md)
276
+ - [Release gate review](./docs/release/RELEASE-GATE-REVIEW.md)
277
+ - [Phase 3 autonomy handoff](./docs/handoffs/2026-05-07-phase3-autonomy-implementation-handoff.md)
337
278
 
338
- ---
279
+ ## Workspace Map
280
+
281
+ | Package or app | Role |
282
+ |---|---|
283
+ | `martin-loop` | Root public npm facade that vendors the runtime, CLI, adapters, SDK, contracts, and policy into `dist/`. |
284
+ | `@martin/core` | Runtime controller, policy engine, safety leash, grounding, persistence, rollback, and autonomy primitives. |
285
+ | `@martin/adapters` | Claude CLI, Codex CLI, direct-provider, and stub adapter surfaces. |
286
+ | `@martin/cli` | Local CLI implementation for `run`, `inspect`, `resume`, `verify`, `audit`, `doctor`, `explain`, and improvement flows. |
287
+ | `@martin/trace-intelligence` | Run-ledger analysis, trace pattern detection, improvement tasks, and trace-autonomy certification. |
288
+ | `@martinloop/mcp` | Standalone MCP server package for `martin_run`, `martin_inspect`, and `martin_status`. |
289
+ | `apps/control-plane` | Hosted/operator control-plane workstream, outside the initial npm package surface. |
290
+ | `apps/local-dashboard` | Local read-model viewer, not currently packaged as public npm API. |
291
+ | `benchmarks` | Workspace-only deterministic benchmark and RC validation harness. |
339
292
 
340
293
  ## Contributing
341
294
 
342
- ```sh
295
+ ```bash
343
296
  git checkout -b feat/your-feature
344
297
  pnpm lint
345
298
  pnpm test
@@ -347,16 +300,8 @@ git commit -m "feat: describe what you built"
347
300
  git push -u origin feat/your-feature
348
301
  ```
349
302
 
350
- Conventional commit prefixes: `feat:`, `fix:`, `chore:`, `docs:`, `refactor:`, and `test:`.
351
-
352
- ---
353
-
354
- <div align="center">
355
-
356
- **⭐Give the repo a star⭐** if you think AI coding needs budgets, brakes, and receipts.
357
-
358
- **MIT Licensed** · [martinloop.com](https://martinloop.com) · [keesan@martinloop.com](mailto:keesan@martinloop.com)
303
+ Use conventional commit prefixes such as `feat:`, `fix:`, `docs:`, `test:`, `refactor:`, and `chore:`.
359
304
 
360
- *"AI coding accountability: completes good work, refuses unsafe work, stops uneconomical work."*
305
+ ## License
361
306
 
362
- </div>
307
+ MIT. See [LICENSE](./LICENSE).
@@ -0,0 +1,35 @@
1
+ # MartinLoop Demo Sandbox
2
+
3
+ This workspace is the safe public demo copied by `martin-loop demo`.
4
+
5
+ It is intentionally small:
6
+
7
+ - `npm test` is green out of the box
8
+ - `martin.config.yaml` keeps the budget tiny
9
+ - the first suggested MartinLoop run can stay in stub mode with `MARTIN_LIVE=false`
10
+
11
+ ## Files
12
+
13
+ - `src/invoice-summary.js`: tiny module used by the demo task
14
+ - `test/invoice-summary.test.js`: Node test suite
15
+ - `TASKS.md`: suggested objectives for a stub-safe run or a live adapter run
16
+ - `martin.config.yaml`: low-risk governance defaults
17
+
18
+ ## Suggested flow
19
+
20
+ ```sh
21
+ npm install
22
+ npm test
23
+ ```
24
+
25
+ Safe first run:
26
+
27
+ ```sh
28
+ MARTIN_LIVE=false npx martin-loop run "Summarize the demo workspace and confirm the verifier is green" --verify "npm test"
29
+ ```
30
+
31
+ Optional live run:
32
+
33
+ ```sh
34
+ npx martin-loop run "Add support for a discount percentage to summarizeInvoice and update the tests" --verify "npm test" --engine codex
35
+ ```
@@ -0,0 +1,29 @@
1
+ # Suggested Demo Tasks
2
+
3
+ ## Stub-safe first run
4
+
5
+ Use this when you want to see MartinLoop create a governed run record without spending provider budget:
6
+
7
+ ```text
8
+ Summarize the demo workspace, confirm the verifier command is green, and explain the safest next change to make.
9
+ ```
10
+
11
+ Verifier:
12
+
13
+ ```sh
14
+ npm test
15
+ ```
16
+
17
+ ## Optional live run
18
+
19
+ Use this when you want a real coding task in the sandbox:
20
+
21
+ ```text
22
+ Add support for a discount percentage to summarizeInvoice and update the tests while keeping the existing tax behavior intact.
23
+ ```
24
+
25
+ Verifier:
26
+
27
+ ```sh
28
+ npm test
29
+ ```
@@ -0,0 +1,11 @@
1
+ policyProfile: strict_local
2
+ budget:
3
+ maxUsd: 2
4
+ softLimitUsd: 1
5
+ maxIterations: 2
6
+ maxTokens: 12000
7
+ governance:
8
+ destructiveActionPolicy: approval
9
+ telemetryDestination: local-only
10
+ verifierRules:
11
+ - npm test
@@ -0,0 +1,8 @@
1
+ {
2
+ "name": "martin-loop-demo-sandbox",
3
+ "private": true,
4
+ "type": "module",
5
+ "scripts": {
6
+ "test": "node --test"
7
+ }
8
+ }
@@ -0,0 +1,11 @@
1
+ export function summarizeInvoice(items, taxRate = 0) {
2
+ const subtotal = items.reduce((sum, item) => sum + item.quantity * item.unitPrice, 0);
3
+ const tax = Number((subtotal * taxRate).toFixed(2));
4
+ const total = Number((subtotal + tax).toFixed(2));
5
+
6
+ return {
7
+ subtotal: Number(subtotal.toFixed(2)),
8
+ tax,
9
+ total
10
+ };
11
+ }