martin-loop 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +207 -189
  3. package/dist/bin/martin-loop.js +23 -0
  4. package/dist/index.d.ts +22 -0
  5. package/dist/index.js +31 -0
  6. package/dist/vendor/adapters/claude-cli.d.ts +89 -0
  7. package/dist/vendor/adapters/claude-cli.js +555 -0
  8. package/dist/vendor/adapters/cli-bridge.d.ts +28 -0
  9. package/dist/vendor/adapters/cli-bridge.js +127 -0
  10. package/dist/vendor/adapters/direct-provider.d.ts +10 -0
  11. package/dist/vendor/adapters/direct-provider.js +41 -0
  12. package/dist/vendor/adapters/index.d.ts +5 -0
  13. package/dist/vendor/adapters/index.js +5 -0
  14. package/dist/vendor/adapters/runtime-support.d.ts +14 -0
  15. package/dist/vendor/adapters/runtime-support.js +52 -0
  16. package/dist/vendor/adapters/stub-agent-cli.d.ts +8 -0
  17. package/dist/vendor/adapters/stub-agent-cli.js +41 -0
  18. package/dist/vendor/adapters/stub-direct-provider.d.ts +8 -0
  19. package/dist/vendor/adapters/stub-direct-provider.js +10 -0
  20. package/dist/vendor/cli/bin/martin.d.ts +2 -0
  21. package/dist/vendor/cli/bin/martin.js +19 -0
  22. package/dist/vendor/cli/index.d.ts +39 -0
  23. package/dist/vendor/cli/index.js +634 -0
  24. package/dist/vendor/cli/persistence.d.ts +34 -0
  25. package/dist/vendor/cli/persistence.js +71 -0
  26. package/dist/vendor/contracts/governance.d.ts +21 -0
  27. package/dist/vendor/contracts/governance.js +12 -0
  28. package/dist/vendor/contracts/index.d.ts +330 -0
  29. package/dist/vendor/contracts/index.js +203 -0
  30. package/dist/vendor/core/compiler.d.ts +50 -0
  31. package/dist/vendor/core/compiler.js +47 -0
  32. package/dist/vendor/core/grounding.d.ts +37 -0
  33. package/dist/vendor/core/grounding.js +270 -0
  34. package/dist/vendor/core/index.d.ts +145 -0
  35. package/dist/vendor/core/index.js +1099 -0
  36. package/dist/vendor/core/leash.d.ts +48 -0
  37. package/dist/vendor/core/leash.js +408 -0
  38. package/dist/vendor/core/persistence/compiler.d.ts +18 -0
  39. package/dist/vendor/core/persistence/compiler.js +35 -0
  40. package/dist/vendor/core/persistence/index.d.ts +6 -0
  41. package/dist/vendor/core/persistence/index.js +4 -0
  42. package/dist/vendor/core/persistence/ledger.d.ts +23 -0
  43. package/dist/vendor/core/persistence/ledger.js +10 -0
  44. package/dist/vendor/core/persistence/store.d.ts +77 -0
  45. package/dist/vendor/core/persistence/store.js +84 -0
  46. package/dist/vendor/core/policy.d.ts +126 -0
  47. package/dist/vendor/core/policy.js +625 -0
  48. package/dist/vendor/core/rollback.d.ts +11 -0
  49. package/dist/vendor/core/rollback.js +219 -0
  50. package/docs/oss/OSS-BOUNDARY-REPORT.json +1 -1
  51. package/docs/oss/OSS-BOUNDARY-REPORT.md +1 -1
  52. package/docs/oss/RELEASE-SURFACE-REPORT.json +1 -1
  53. package/docs/oss/RELEASE-SURFACE-REPORT.md +1 -1
  54. package/package.json +54 -54
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 MartinLoop contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -1,67 +1,145 @@
1
1
  <div align="center">
2
2
 
3
- <!-- <img src="docs/assets/martinloop_logo_1.png" alt="MartinLoop" width="200"> -->
3
+ <img src="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/martinloop-logo.png" alt="MartinLoop" width="260">
4
4
 
5
- # MartinLoop
6
-
7
- ### The agentic AI governance runtime. Hard enforcement, not suggestions.
5
+ ### A governed runtime for autonomous AI coding agents. ⭐⭐⭐
8
6
 
9
7
  [![License: MIT](https://img.shields.io/badge/license-MIT-7c3aed?style=flat-square)](./LICENSE)
10
- [![TypeScript](https://img.shields.io/badge/TypeScript-strict-3178c6?style=flat-square&logo=typescript&logoColor=white)](./tsconfig.json)
8
+ [![TypeScript](https://img.shields.io/badge/TypeScript-strict-3178c6?style=flat-square&logo=typescript&logoColor=white)](./tsconfig.base.json)
11
9
  [![Node](https://img.shields.io/badge/node-%3E%3D20-3c873a?style=flat-square&logo=nodedotjs&logoColor=white)](#quick-start)
12
- [![npm](https://img.shields.io/badge/npm-martin--loop-cc3534?style=flat-square&logo=npm&logoColor=white)](https://npmjs.com/package/martin-loop)
10
+ [![npm](https://img.shields.io/badge/npm-martin--loop-cc3534?style=flat-square&logo=npm&logoColor=white)](https://www.npmjs.com/package/martin-loop)
11
+
12
+ <br>
13
13
 
14
+ **Your overnight AI pipeline estimated $2.40.**
15
+ **You woke up to a $65 bill.**
16
+ <br> 47 retries. No hard stop. No rollback. No audit trail. Nothing merged.
17
+ MartinLoop exists so that never happens again.✅ <br> <br>
18
+ If you think autonomous AI coding agents need budgets, brakes, and receipts, ⭐ the repo so more builders can find it.
14
19
  <br>
15
20
 
16
- > **Your overnight AI pipeline estimated $2.40.**
17
- > **You woke up to $165.**
21
+ > AI coding agents are useful. Unbounded retry loops are not.
18
22
  >
19
- > 47 retries. No hard stop. No rollback. No audit trail. Nothing merged.
20
- > **MartinLoop exists so that never happens again.**
23
+ > MartinLoop wraps agent runs with budgets, policy checks, verifier gates, rollback evidence, and inspectable run records.
24
+ <br>
25
+ <img src="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/cli-animated.svg" alt="MartinLoop CLI — governed agent run" width="720">
21
26
 
22
27
  </div>
23
28
 
24
29
  ---
25
30
 
26
- ## Quick Start
31
+ ## The Problem
27
32
 
28
- ## Release Surface
33
+ A typical autonomous coding loop keeps attempting work until tests pass. Without a governance layer, that loop can keep spending, mutate files outside the intended scope, lose track of why it failed, and leave teams without a clean audit trail.
29
34
 
30
- The frozen public package surface for this RC is:
35
+ Ralph-style loops are powerful but they attempt ➡️ check ➡️ retry ➡️ repeat, with no strong answer to:
31
36
 
32
- ```sh
33
- npm install martin-loop
34
- npx martin-loop
35
- ```
37
+ - What changed?
38
+ - What did it cost?
39
+ - Why was it allowed?
40
+ - Why did it stop?
41
+ - Can we inspect or resume it later?
36
42
 
37
- ```typescript
38
- import { MartinLoop } from "martin-loop"
39
- ```
43
+ MartinLoop governs the failure mode.
44
+
45
+ ---
46
+
47
+ ## The Solution
48
+
49
+ ✅ Martin Loop wraps AI coding loops with a governance layer.
50
+
51
+ It does not try to replace the agent pattern. It makes that pattern safe to run.
52
+
53
+ ### What MartinLoop Does Today
54
+
55
+ | Capability | Current behavior |
56
+ |---|---|
57
+ | Budget governance | Enforces `maxUsd`, `softLimitUsd`, `maxIterations`, and `maxTokens`; rejects attempts projected to exceed remaining budget and exits on budget or iteration exhaustion. Hard USD budget caps that stop work before the next attempt breaches policy. |
58
+ | Verifier gate | A run only reaches `completed` when the adapter result and verifier state pass. Unsafe verifier commands are blocked before agent execution. |
59
+ | Failure taxonomy | Classifies failures across 11 current classes, including hallucination, test regression, scope creep, repo grounding failure, environment mismatch, and budget pressure, that distinguishes real success from unsafe, invalid, or terminal behavior.|
60
+ | Safety leash | Evaluates verifier commands, file scope, dependency or migration changes that require approval, and secret-like values in task text. **Policy-as-code**. |
61
+ | Rollback evidence | Captures rollback boundaries and restore outcomes for repo-backed attempts when a persistence store is configured. |
62
+ | Context distillation | Carries a distilled summary of recent attempts and remaining constraints into subsequent attempts. |
63
+ | Run records | The CLI appends JSONL loop records under `~/.martin/runs/<workspaceId>.jsonl`; lower-level stores can also persist contracts, ledgers, and attempt artifacts.
64
+
65
+
66
+ ⭐The result is a runtime that can complete good work, refuse unsafe work, stop uneconomical work, and leave evidence behind.✅
67
+ ---
68
+
69
+ ## The Ralph Loop, explained
70
+
71
+ **"Everybody has gotten infatuated with what we call these Ralph Wiggum loops, just like send the thing off and it'll just go figure something out..A, It never figures anything out. And B, you just get this ginormous bill...**" - Chamath Palihapitiya, All-In Podcast #263, March 2026
72
+
73
+ ⛔ The **Ralph Loop** is the failure mode where an AI coding agent keeps trying without knowing when it should stop.
74
+
75
+ The pattern is simple: attempt the task, run checks, retry on failure, repeat. The problem is not that the loop exists. The problem is that most implementations have no hard budget cap, no signed evidence layer, and no pre-execution control system. They know how to keep trying. They do **not** know when continuing is unsafe, uneconomical, or impossible.
76
+
77
+ ✅ Martin Loop solves the Ralph Loop problem by enforcing rules **before** damage happens:
78
+
79
+ - it stops the next attempt before budget overspend
80
+ - it classifies unsafe or invalid actions before execution
81
+ - it appends a structured JSONL audit record for every attempt
82
+ - it rolls back failed runs instead of leaving broken state behind
83
+ - it reduces runaway token growth with context distillation
40
84
 
41
- Phase 13 RC gate commands:
85
+ If Ralph ever burned $165.70 on your dime, you're in the right place. Martin stopped him at $4.97 with a full audit trail. LFG! 🚀 Finally a Martin Prince leash for Ralph Wiggums! :)
86
+
87
+ <div align="center">
88
+ <img src="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/martin-raplph.png.jpg" alt="Martin vs Ralph — governed vs ungoverned agent loop" width="240">
89
+ </div>
90
+
91
+ ### How It Works — Five Layers
92
+
93
+ | Layer | What it does |
94
+ |---|---|
95
+ | **1. Task Contract** | Objective, verifier plan, repo root, allowed/denied paths, acceptance criteria, workspace, project, and budget. |
96
+ | **2. Policy & Budget** | Defaults from `martin.config.yaml`; CLI flags override. Budget preflight rejects attempts before execution. |
97
+ | **3. Agent Adapters** | Claude CLI, Codex CLI, direct-provider, and stub adapters normalize execution results into the core runtime contract. |
98
+ | **4. Safety & Verification** | Verifier commands, file scope, approval-boundary changes, secret-like values, and grounding determine whether work is kept. |
99
+ | **5. Persistence** | CLI writes JSONL records under `~/.martin/runs/`. Repo-backed runs can also persist contracts, ledgers, diffs, and rollback artifacts. |
100
+
101
+ ---
102
+
103
+ ## See It In Action
104
+
105
+ Same task, same starting state. MartinLoop completes in one verified attempt at `$2.30`. The uncontrolled loop retries four times, spends `$5.20`, and fails with no audit trail.
106
+
107
+ Martin Loop matters because it turns AI coding from an opaque experiment into something that can be governed, replayed, verified, and trusted.
108
+
109
+ <div align="center">
110
+ <img src="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/side-by-side.svg" alt="Martin vs Ralph — governed vs ungoverned agent loop side-by-side benchmark comparison" width="720" height="1080">
111
+ </div>
112
+
113
+
114
+ Reproducible locally:
42
115
 
43
116
  ```sh
44
- pnpm oss:validate
45
- pnpm public:smoke
46
- pnpm repo:smoke
47
- pnpm rc:validate
48
- pnpm pilot:prep:validate
49
- pnpm release:matrix:local
117
+ pnpm --filter @martin/benchmarks test
118
+ pnpm --filter @martin/benchmarks eval
119
+ pnpm --filter @martin/benchmarks eval:phase12
50
120
  ```
51
121
 
52
- Registry publication is intentionally held for a later release step; this repository can validate the package surface locally before publishing.
53
-
54
122
  ---
55
123
 
56
- ### 1. Install
124
+ ## Quick Start
57
125
 
58
126
  ```sh
59
127
  npm install -g martin-loop
60
128
  ```
61
129
 
62
- This gives you two commands: `martin` and `martin-loop` (both identical).
130
+ This installs both the `martin-loop` package and the `martin` command alias. The package is currently published on npm as version `0.1.2`.
131
+
132
+ ### Public Package Surface
133
+
134
+ The frozen public package surface for this release candidate is:
135
+
136
+ - Install target: `npm install martin-loop`
137
+ - CLI target: `npx martin-loop`
138
+ - SDK target: `import { MartinLoop } from "martin-loop"`
63
139
 
64
- ### 2. Run a governed task
140
+ The `martin` command alias is installed for local operator convenience, but the public CLI surface is `npx martin-loop`.
141
+
142
+ ### Run a governed task
65
143
 
66
144
  ```sh
67
145
  martin run "fix the auth regression" \
@@ -69,59 +147,64 @@ martin run "fix the auth regression" \
69
147
  --verify "pnpm test"
70
148
  ```
71
149
 
72
- What each flag does:
73
- - `--budget 3.00` — hard kill at $3.00. The subprocess is terminated at the limit.
74
- - `--verify "pnpm test"` — shell command run after each attempt. Loop only exits success when it passes.
75
-
76
- The first argument after `run` is your objective. You can also use `--objective`:
150
+ You can also pass the objective explicitly:
77
151
 
78
152
  ```sh
79
153
  martin run --objective "fix the auth regression" --budget 3.00 --verify "pnpm test"
80
154
  ```
81
155
 
82
- ### 3. Resume an interrupted run
156
+ For a no-spend repo-local dry run, use the stub adapter:
83
157
 
84
- ```sh
85
- martin resume <loopId>
158
+ ```powershell
159
+ $env:MARTIN_LIVE='false'
160
+ pnpm run:cli -- run --objective "Summarize the current runtime state" --verify "pnpm --filter @martin/core test"
161
+ Remove-Item Env:MARTIN_LIVE
86
162
  ```
87
163
 
88
- Loads the persisted loop record from `~/.martin/runs/` by ID.
89
-
90
- ### 4. Inspect a run file
164
+ ### Inspect or resume runs
91
165
 
92
166
  ```sh
93
167
  martin inspect --file ~/.martin/runs/<workspaceId>.jsonl
168
+ martin resume <loopId>
94
169
  ```
95
170
 
96
- Prints a portfolio summary (total cost, attempts, outcomes) for all loops in the file.
171
+ `inspect` prints a portfolio summary for records in the file. `resume` looks up a persisted loop record by ID under `~/.martin/runs/`.
97
172
 
98
173
  ---
99
174
 
100
- ## 🖥️ All CLI Flags
175
+ ## CLI
101
176
 
102
- ```
177
+ ```text
103
178
  martin run <objective> [options]
104
179
 
105
- --objective <text> The task to accomplish (or pass as first positional arg)
106
- --budget <n> Hard cost cap in USD (subprocess killed at limit)
180
+ --objective <text> The task to accomplish, or pass it as the first positional arg
181
+ --budget <n> Hard cost cap in USD
107
182
  --budget-usd <n> Alias for --budget
108
- --verify <cmd> Shell command used as the verifier after each attempt
109
- --max-iterations <n> Maximum number of attempts (default: 3)
183
+ --soft-limit-usd <n> Soft budget threshold in USD
184
+ --verify <cmd> Verifier command after each attempt
185
+ --max-iterations <n> Maximum number of attempts
186
+ --max-tokens <n> Maximum total token budget
110
187
  --engine <name> Adapter to use: claude (default) or codex
111
- --model <name> Override the model (e.g. claude-sonnet-4-6)
112
- --cwd <path> Repo root for the run (default: current directory)
113
- --allow-path <glob> Restrict agent to this path pattern (repeatable)
114
- --deny-path <glob> Block agent from this path pattern (repeatable)
115
- --accept <criterion> Add an acceptance criterion injected into the prompt (repeatable)
116
- --config <path> Path to a martin.config.yaml policy file
117
- --workspace <id> Workspace ID for the run record (default: ws_default)
118
- --project <id> Project ID for the run record (default: proj_default)
119
- --metadata <key=value> Attach metadata to the run record (repeatable)
188
+ --model <name> Override the adapter model
189
+ --cwd <path> Repo root for the run
190
+ --allow-path <glob> Restrict agent writes to this path pattern; repeatable
191
+ --deny-path <glob> Block this path pattern; repeatable
192
+ --accept <criterion> Add an acceptance criterion; repeatable
193
+ --config <path> Path to a martin.config.yaml file
194
+ --workspace <id> Workspace ID for the run record
195
+ --project <id> Project ID for the run record
196
+ --metadata <key=value> Attach metadata to the run record; repeatable
120
197
  ```
121
198
 
199
+ The public CLI also includes `inspect`, `resume`, and a `bench` redirect that points reviewers to the workspace benchmark harness.
200
+
201
+ <div align="center">
202
+ <img src="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/cli-static.svg" alt="MartinLoop CLI terminal output" width="720">
203
+ </div>
204
+
122
205
  ---
123
206
 
124
- ## 📋 Policy File (martin.config.yaml)
207
+ ## Policy File
125
208
 
126
209
  Drop a `martin.config.yaml` in your repo root to set governance defaults:
127
210
 
@@ -139,13 +222,11 @@ governance:
139
222
  - pnpm test
140
223
  ```
141
224
 
142
- The CLI picks this up automatically. CLI flags always override the config file.
225
+ CLI flags override config values when provided.
143
226
 
144
227
  ---
145
228
 
146
- ## 📦 TypeScript SDK
147
-
148
- Install as a library:
229
+ ## TypeScript SDK
149
230
 
150
231
  ```sh
151
232
  npm install martin-loop
@@ -155,12 +236,15 @@ npm install martin-loop
155
236
  import {
156
237
  MartinLoop,
157
238
  createClaudeCliAdapter,
158
- createCodexCliAdapter
159
- } from 'martin-loop'
239
+ createCodexCliAdapter,
240
+ runMartin
241
+ } from "martin-loop";
160
242
 
161
243
  const loop = new MartinLoop({
162
244
  adapter: createClaudeCliAdapter({ workingDirectory: process.cwd() }),
163
245
  defaults: {
246
+ workspaceId: "my-workspace",
247
+ projectId: "my-project",
164
248
  budget: {
165
249
  maxUsd: 3.00,
166
250
  softLimitUsd: 2.25,
@@ -168,177 +252,111 @@ const loop = new MartinLoop({
168
252
  maxTokens: 20_000
169
253
  }
170
254
  }
171
- })
255
+ });
172
256
 
173
257
  const result = await loop.run({
174
- workspaceId: 'my-workspace',
175
- projectId: 'my-project',
176
258
  task: {
177
- title: 'Fix auth regression',
178
- objective: 'Fix the failing auth regression tests',
179
- verificationPlan: ['pnpm test'],
259
+ title: "Fix auth regression",
260
+ objective: "Fix the failing auth regression tests",
261
+ verificationPlan: ["pnpm test"],
180
262
  repoRoot: process.cwd()
181
- },
182
- budget: {
183
- maxUsd: 3.00,
184
- softLimitUsd: 2.25,
185
- maxIterations: 3,
186
- maxTokens: 20_000
187
263
  }
188
- })
264
+ });
189
265
 
190
- // result.decision.status → 'completed' | 'exited' | 'failed'
191
- // result.decision.lifecycleState → 'completed' | 'budget_exit' | 'human_escalation' | ...
192
- // result.loop.cost.actualUsd → actual USD spent
193
- // result.loop.attempts.length → number of attempts made
194
- // result.decision.reason → why the loop exited
266
+ console.log(result.decision.status);
195
267
  ```
196
268
 
197
- ### Using Codex instead of Claude
269
+ Use Codex instead of Claude by swapping adapters:
198
270
 
199
271
  ```typescript
200
272
  const loop = new MartinLoop({
201
273
  adapter: createCodexCliAdapter({ workingDirectory: process.cwd() })
202
- })
274
+ });
203
275
  ```
204
276
 
205
- ### Using the lower-level `runMartin` directly
206
-
207
- ```typescript
208
- import { runMartin, createClaudeCliAdapter } from 'martin-loop'
209
-
210
- const result = await runMartin({
211
- workspaceId: 'ws_default',
212
- projectId: 'proj_default',
213
- task: {
214
- title: 'Fix auth regression',
215
- objective: 'Fix the failing auth regression tests',
216
- verificationPlan: ['pnpm test'],
217
- repoRoot: process.cwd()
218
- },
219
- budget: {
220
- maxUsd: 3.00,
221
- softLimitUsd: 2.25,
222
- maxIterations: 3,
223
- maxTokens: 20_000
224
- },
225
- adapter: createClaudeCliAdapter({ workingDirectory: process.cwd() })
226
- })
227
- ```
228
-
229
- ---
230
-
231
- ## 🧠 Architecture
232
-
233
- Five governance layers from policy to runtime enforcement.
234
-
235
- ```
236
- ┌──────────────────────────────────────────────────────────┐
237
- │ MartinLoop Governance Stack │
238
- ├──────────────────────┬───────────────────────────────────┤
239
- │ Autonomy Envelope │ Surface · Path · Command │
240
- │ (policy-enforced) │ Leash — pre-execution gate │
241
- ├──────────────────────┼───────────────────────────────────┤
242
- │ Model Router │ Cost-aware adapter selection │
243
- │ │ Fallback chain + model override │
244
- ├──────────────────────┼───────────────────────────────────┤
245
- │ Agent Adapters │ Claude Code · Codex · any CLI │
246
- │ │ Direct + stub adapters │
247
- ├──────────────────────┼───────────────────────────────────┤
248
- │ Safety Leash │ Pre-execution verification gate │
249
- │ │ Filesystem + secret + command │
250
- ├──────────────────────┼───────────────────────────────────┤
251
- │ Persistence │ Per-run JSONL in ~/.martin/runs/ │
252
- │ │ Portfolio inspect + resume │
253
- └──────────────────────┴───────────────────────────────────┘
254
- ```
277
+ The lower-level `runMartin` function is also exported for callers that want to assemble the runtime input directly.
255
278
 
256
279
  ---
257
280
 
258
- ## 🛡️ What MartinLoop Enforces Today
281
+ ## Workspace Map
259
282
 
260
- **1. Hard budget cap.**
261
- Every run has a `maxUsd` limit. When the cost reaches that limit the subprocess is terminated — not warned.
283
+ | Package or app | Role |
284
+ |---|---|
285
+ | `martin-loop` | Root public npm facade that vendors the runtime, CLI, adapters, and contracts into `dist/`. |
286
+ | `@martin/contracts` | Shared types for loops, policy, governance, budget, telemetry, and rollback. |
287
+ | `@martin/core` | Runtime controller, policy engine, safety leash, grounding, persistence, and rollback logic. |
288
+ | `@martin/adapters` | Claude CLI, Codex CLI, direct-provider, and stub adapter surfaces. |
289
+ | `@martin/cli` | Local CLI implementation for `run`, `inspect`, `resume`, and the benchmark redirect. |
290
+ | `@martin/mcp` | MCP server tools: `martin_run`, `martin_inspect`, and `martin_status`. |
291
+ | `benchmarks/` | Workspace-only deterministic benchmark and RC validation harness. |
292
+ | `apps/control-plane/` | Hosted control-plane workstream, outside the initial npm package surface. |
293
+ | `apps/local-dashboard/` | Local dashboard/read-model viewer, not currently packaged as public npm API. |
262
294
 
263
- **2. Iteration cap.**
264
- Every run has a `maxIterations` limit. The loop exits when it is hit, regardless of progress.
265
-
266
- **3. Filesystem leash.**
267
- If `allowedPaths` or `deniedPaths` are configured, any attempt that writes outside the envelope is blocked and rolled back before the patch is kept.
268
-
269
- **4. Secret leash.**
270
- Values that look like secrets (API keys, tokens) in the task objective or acceptance criteria are blocked before any attempt runs.
271
-
272
- **5. Verifier gate.**
273
- The loop only marks a run successful if the verifier command exits `0`. A passing verifier is required for a `completed` lifecycle state.
274
-
275
- **6. Rollback on failure.**
276
- When an attempt is discarded (failed verifier, safety violation, patch decision), MartinLoop restores the filesystem to the pre-attempt state using a git-backed snapshot.
277
-
278
- **7. Run persistence.**
279
- Every run is written to `~/.martin/runs/<workspaceId>.jsonl`. Use `martin resume` and `martin inspect` to read it back.
295
+ The `@martin/core`, `@martin/adapters`, and `@martin/contracts` package manifests are still private workspace packages; the public install target is the root `martin-loop` facade.
280
296
 
281
297
  ---
282
298
 
283
- ## 📦 OSS Packages
284
-
285
- | Package | What It Does |
286
- |---------|-------------|
287
- | `martin-loop` | Self-contained facade — everything below, vendored and published |
288
- | `@martin/core` | Runtime controller, leash, router, rollback, policy engine |
289
- | `@martin/cli` | `martin run` · `inspect` · `resume` CLI commands |
290
- | `@martin/adapters` | Claude Code, Codex CLI, direct-provider, stub adapters |
291
- | `@martin/contracts` | Shared types: loop, policy, leash, budget, rollback |
299
+ ## Development
292
300
 
293
- All `@martin/*` packages are workspace-internal. Install `martin-loop` from npm — it bundles them all.
301
+ Requirements: Node 20+ and pnpm 10.x.
294
302
 
295
- ---
303
+ ```sh
304
+ git clone https://github.com/Keesan12/martin-loop.git
305
+ cd martin-loop
306
+ pnpm install
296
307
 
297
- ## 🔧 Development
308
+ pnpm test
309
+ pnpm lint
310
+ pnpm build
311
+ ```
298
312
 
299
- **Requirements:** Node 20+ · pnpm 8+
313
+ ```md
314
+ Current RC gate commands:
300
315
 
301
316
  ```sh
302
- # Clone and install
303
- git clone https://github.com/Keesan12/MartinLoop
304
- cd martin-loop && pnpm install
317
+ pnpm oss:validate
318
+ pnpm public:smoke
319
+ pnpm repo:smoke
320
+ pnpm rc:validate
321
+ pnpm pilot:prep:validate
322
+ pnpm release:matrix:local
323
+ Caution: Registry Publication
305
324
 
306
- # Full test suite
307
- pnpm test
325
+ This package is published through the public martin-loop package surface. Treat registry publication as a guarded release step: verify the RC gate commands, confirm the version follows semantic versioning, and document breaking changes before publishing.
308
326
 
309
- # Type check all packages
310
- pnpm -r lint
327
+ > **Caution:** This package is live on npm. Treat registry publication as a guarded release step — verify the RC gate commands, confirm semantic versioning, and document breaking changes before publishing.
311
328
 
312
- # Build all packages + public facade
313
- pnpm build
329
+ The repository is organized as a dual-track workspace: the OSS runtime and package facade are present and published, while the hosted control-plane, local dashboard, and benchmark harness remain gated in private workspace for future release rather than the primary npm package API.
314
330
 
315
- # Publish (after build)
316
- npm publish
317
- ```
331
+ Helpful docs:
332
+
333
+ - [OSS quickstart](./docs/oss/QUICKSTART.md)
334
+ - [OSS examples](./docs/oss/EXAMPLES.md)
335
+ - [OSS boundary report](./docs/oss/OSS-BOUNDARY-REPORT.md)
336
+ - [Release surface report](./docs/oss/RELEASE-SURFACE-REPORT.md)
318
337
 
319
338
  ---
320
339
 
321
- ## 🤝 Contributing
340
+ ## Contributing
322
341
 
323
342
  ```sh
324
343
  git checkout -b feat/your-feature
325
-
326
- # Make changes, then:
327
- pnpm -r lint && pnpm test # must stay green
328
-
344
+ pnpm lint
345
+ pnpm test
329
346
  git commit -m "feat: describe what you built"
330
347
  git push -u origin feat/your-feature
331
- # Open a PR against main
332
348
  ```
333
349
 
334
- Conventional commits: `feat:` · `fix:` · `chore:` · `docs:` · `refactor:` · `test:`
350
+ Conventional commit prefixes: `feat:`, `fix:`, `chore:`, `docs:`, `refactor:`, and `test:`.
335
351
 
336
352
  ---
337
353
 
338
354
  <div align="center">
339
355
 
356
+ **⭐Give the repo a star⭐** if you think AI coding needs budgets, brakes, and receipts.
357
+
340
358
  **MIT Licensed** · [martinloop.com](https://martinloop.com) · [keesan@martinloop.com](mailto:keesan@martinloop.com)
341
359
 
342
- *"AI coding accountability: completes good work · refuses bad work · stops uneconomical work."*
360
+ *"AI coding accountability: completes good work, refuses unsafe work, stops uneconomical work."*
343
361
 
344
362
  </div>
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { executeCli } from "../vendor/cli/index.js";
4
+
5
+ const args = process.argv.slice(2);
6
+
7
+ executeCli(args)
8
+ .then((result) => {
9
+ if (result.stdout) {
10
+ process.stdout.write(`${result.stdout}\n`);
11
+ }
12
+
13
+ if (result.stderr) {
14
+ process.stderr.write(`${result.stderr}\n`);
15
+ }
16
+
17
+ process.exitCode = result.exitCode;
18
+ })
19
+ .catch((error) => {
20
+ const message = error instanceof Error ? error.message : String(error);
21
+ process.stderr.write(`${message}\n`);
22
+ process.exitCode = 1;
23
+ });
@@ -0,0 +1,22 @@
1
+ export { runMartin, compilePromptPacket, createFileRunStore, makeLedgerEvent, resolveRunsRoot } from "./vendor/core/index.js";
2
+ export type { CompileResult, MartinAdapter, MartinAdapterRequest, MartinAdapterResult, PromptPacket, RunMartinInput, RunMartinResult, RunStore } from "./vendor/core/index.js";
3
+ export { executeCli, parseCliArguments, renderCliHelp } from "./vendor/cli/index.js";
4
+ export type { ParsedCliArguments, RunCommandRequest } from "./vendor/cli/index.js";
5
+ export { createClaudeCliAdapter, createCodexCliAdapter, createDirectProviderAdapter, createStubDirectProviderAdapter, createStubAgentCliAdapter } from "./vendor/adapters/index.js";
6
+ export type { AgentCliAdapterOptions, ClaudeCliAdapterOptions, CliArgsBuilder, CodexCliAdapterOptions, DirectProviderAdapterOptions, SpawnLike, StubAgentCliAdapterOptions, StubDirectProviderAdapterOptions, SubprocessResult, VerificationOutcome } from "./vendor/adapters/index.js";
7
+ export { appendLoopEvent, buildPortfolioSnapshot, createGovernanceSnapshot, createLoopRecord, createTelemetryEnvelope, DEFAULT_BUDGET, EMPTY_COST, validateTelemetryBatch, validateTelemetryEnvelope } from "./vendor/contracts/index.js";
8
+ export type { ApprovalPolicy, ExecutionProfile, LoopBudget, LoopRecord, LoopTask } from "./vendor/contracts/index.js";
9
+
10
+ export interface MartinLoopOptions {
11
+ adapter?: MartinAdapter;
12
+ defaults?: Partial<Omit<RunMartinInput, "adapter">>;
13
+ }
14
+
15
+ export type MartinLoopRunInput = Omit<RunMartinInput, "adapter"> & {
16
+ adapter?: MartinAdapter;
17
+ };
18
+
19
+ export declare class MartinLoop {
20
+ constructor(options?: MartinLoopOptions);
21
+ run(input: MartinLoopRunInput): Promise<RunMartinResult>;
22
+ }
package/dist/index.js ADDED
@@ -0,0 +1,31 @@
1
+ import { runMartin } from "./vendor/core/index.js";
2
+
3
+ export { runMartin, compilePromptPacket, createFileRunStore, makeLedgerEvent, resolveRunsRoot } from "./vendor/core/index.js";
4
+ export { executeCli, parseCliArguments, renderCliHelp } from "./vendor/cli/index.js";
5
+ export { createClaudeCliAdapter, createCodexCliAdapter, createDirectProviderAdapter, createStubDirectProviderAdapter, createStubAgentCliAdapter } from "./vendor/adapters/index.js";
6
+ export { appendLoopEvent, buildPortfolioSnapshot, createGovernanceSnapshot, createLoopRecord, createTelemetryEnvelope, DEFAULT_BUDGET, EMPTY_COST, validateTelemetryBatch, validateTelemetryEnvelope } from "./vendor/contracts/index.js";
7
+
8
+ export class MartinLoop {
9
+ constructor(options = {}) {
10
+ this.adapter = options.adapter;
11
+ this.defaults = options.defaults ?? {};
12
+ }
13
+
14
+ async run(input) {
15
+ const merged = {
16
+ ...this.defaults,
17
+ ...input,
18
+ metadata: {
19
+ ...(this.defaults.metadata ?? {}),
20
+ ...(input.metadata ?? {}),
21
+ },
22
+ adapter: input.adapter ?? this.adapter,
23
+ };
24
+
25
+ if (!merged.adapter) {
26
+ throw new Error("MartinLoop.run requires an adapter. Import an adapter helper from \"martin-loop\" or pass a MartinAdapter instance.");
27
+ }
28
+
29
+ return runMartin(merged);
30
+ }
31
+ }