martin-loop 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +344 -89
- package/docs/oss/EXAMPLES.md +126 -0
- package/docs/oss/OSS-BOUNDARY-REPORT.json +113 -0
- package/docs/oss/OSS-BOUNDARY-REPORT.md +48 -0
- package/docs/oss/QUICKSTART.md +135 -0
- package/docs/{README.md → oss/README.md} +93 -89
- package/docs/oss/RELEASE-SURFACE-REPORT.json +45 -0
- package/docs/oss/RELEASE-SURFACE-REPORT.md +35 -0
- package/package.json +54 -64
- package/dist/bin/martin-loop.d.ts +0 -2
- package/dist/bin/martin-loop.js +0 -19
- package/dist/bin/martin-loop.js.map +0 -1
- package/dist/index.d.ts +0 -9
- package/dist/index.js +0 -9
- package/dist/index.js.map +0 -1
- package/docs/EXAMPLES.md +0 -96
- package/docs/QUICKSTART.md +0 -127
- package/docs/release/CLAIM-TO-CAPABILITY.md +0 -19
package/README.md
CHANGED
|
@@ -1,89 +1,344 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
```
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
pnpm
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
<!-- <img src="docs/assets/martinloop_logo_1.png" alt="MartinLoop" width="200"> -->
|
|
4
|
+
|
|
5
|
+
# MartinLoop
|
|
6
|
+
|
|
7
|
+
### The agentic AI governance runtime. Hard enforcement, not suggestions.
|
|
8
|
+
|
|
9
|
+
[](./LICENSE)
|
|
10
|
+
[](./tsconfig.json)
|
|
11
|
+
[](#quick-start)
|
|
12
|
+
[](https://npmjs.com/package/martin-loop)
|
|
13
|
+
|
|
14
|
+
<br>
|
|
15
|
+
|
|
16
|
+
> **Your overnight AI pipeline estimated $2.40.**
|
|
17
|
+
> **You woke up to $165.**
|
|
18
|
+
>
|
|
19
|
+
> 47 retries. No hard stop. No rollback. No audit trail. Nothing merged.
|
|
20
|
+
> **MartinLoop exists so that never happens again.**
|
|
21
|
+
|
|
22
|
+
</div>
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## ⚡ Quick Start
|
|
27
|
+
|
|
28
|
+
## Release Surface
|
|
29
|
+
|
|
30
|
+
The frozen public package surface for this RC is:
|
|
31
|
+
|
|
32
|
+
```sh
|
|
33
|
+
npm install martin-loop
|
|
34
|
+
npx martin-loop
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
```typescript
|
|
38
|
+
import { MartinLoop } from "martin-loop"
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Phase 13 RC gate commands:
|
|
42
|
+
|
|
43
|
+
```sh
|
|
44
|
+
pnpm oss:validate
|
|
45
|
+
pnpm public:smoke
|
|
46
|
+
pnpm repo:smoke
|
|
47
|
+
pnpm rc:validate
|
|
48
|
+
pnpm pilot:prep:validate
|
|
49
|
+
pnpm release:matrix:local
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Registry publication is intentionally held for a later release step; this repository can validate the package surface locally before publishing.
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
### 1. Install
|
|
57
|
+
|
|
58
|
+
```sh
|
|
59
|
+
npm install -g martin-loop
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
This gives you two commands: `martin` and `martin-loop` (both identical).
|
|
63
|
+
|
|
64
|
+
### 2. Run a governed task
|
|
65
|
+
|
|
66
|
+
```sh
|
|
67
|
+
martin run "fix the auth regression" \
|
|
68
|
+
--budget 3.00 \
|
|
69
|
+
--verify "pnpm test"
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
What each flag does:
|
|
73
|
+
- `--budget 3.00` — hard kill at $3.00. The subprocess is terminated at the limit.
|
|
74
|
+
- `--verify "pnpm test"` — shell command run after each attempt. Loop only exits success when it passes.
|
|
75
|
+
|
|
76
|
+
The first argument after `run` is your objective. You can also use `--objective`:
|
|
77
|
+
|
|
78
|
+
```sh
|
|
79
|
+
martin run --objective "fix the auth regression" --budget 3.00 --verify "pnpm test"
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### 3. Resume an interrupted run
|
|
83
|
+
|
|
84
|
+
```sh
|
|
85
|
+
martin resume <loopId>
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Loads the persisted loop record from `~/.martin/runs/` by ID.
|
|
89
|
+
|
|
90
|
+
### 4. Inspect a run file
|
|
91
|
+
|
|
92
|
+
```sh
|
|
93
|
+
martin inspect --file ~/.martin/runs/<workspaceId>.jsonl
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Prints a portfolio summary (total cost, attempts, outcomes) for all loops in the file.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## 🖥️ All CLI Flags
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
martin run <objective> [options]
|
|
104
|
+
|
|
105
|
+
--objective <text> The task to accomplish (or pass as first positional arg)
|
|
106
|
+
--budget <n> Hard cost cap in USD (subprocess killed at limit)
|
|
107
|
+
--budget-usd <n> Alias for --budget
|
|
108
|
+
--verify <cmd> Shell command used as the verifier after each attempt
|
|
109
|
+
--max-iterations <n> Maximum number of attempts (default: 3)
|
|
110
|
+
--engine <name> Adapter to use: claude (default) or codex
|
|
111
|
+
--model <name> Override the model (e.g. claude-sonnet-4-6)
|
|
112
|
+
--cwd <path> Repo root for the run (default: current directory)
|
|
113
|
+
--allow-path <glob> Restrict agent to this path pattern (repeatable)
|
|
114
|
+
--deny-path <glob> Block agent from this path pattern (repeatable)
|
|
115
|
+
--accept <criterion> Add an acceptance criterion injected into the prompt (repeatable)
|
|
116
|
+
--config <path> Path to a martin.config.yaml policy file
|
|
117
|
+
--workspace <id> Workspace ID for the run record (default: ws_default)
|
|
118
|
+
--project <id> Project ID for the run record (default: proj_default)
|
|
119
|
+
--metadata <key=value> Attach metadata to the run record (repeatable)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## 📋 Policy File (martin.config.yaml)
|
|
125
|
+
|
|
126
|
+
Drop a `martin.config.yaml` in your repo root to set governance defaults:
|
|
127
|
+
|
|
128
|
+
```yaml
|
|
129
|
+
budget:
|
|
130
|
+
maxUsd: 5.00
|
|
131
|
+
softLimitUsd: 3.75
|
|
132
|
+
maxIterations: 5
|
|
133
|
+
maxTokens: 40000
|
|
134
|
+
|
|
135
|
+
governance:
|
|
136
|
+
destructiveActionPolicy: approval
|
|
137
|
+
telemetryDestination: local-only
|
|
138
|
+
verifierRules:
|
|
139
|
+
- pnpm test
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
The CLI picks this up automatically. CLI flags always override the config file.
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## 📦 TypeScript SDK
|
|
147
|
+
|
|
148
|
+
Install as a library:
|
|
149
|
+
|
|
150
|
+
```sh
|
|
151
|
+
npm install martin-loop
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
```typescript
|
|
155
|
+
import {
|
|
156
|
+
MartinLoop,
|
|
157
|
+
createClaudeCliAdapter,
|
|
158
|
+
createCodexCliAdapter
|
|
159
|
+
} from 'martin-loop'
|
|
160
|
+
|
|
161
|
+
const loop = new MartinLoop({
|
|
162
|
+
adapter: createClaudeCliAdapter({ workingDirectory: process.cwd() }),
|
|
163
|
+
defaults: {
|
|
164
|
+
budget: {
|
|
165
|
+
maxUsd: 3.00,
|
|
166
|
+
softLimitUsd: 2.25,
|
|
167
|
+
maxIterations: 3,
|
|
168
|
+
maxTokens: 20_000
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
})
|
|
172
|
+
|
|
173
|
+
const result = await loop.run({
|
|
174
|
+
workspaceId: 'my-workspace',
|
|
175
|
+
projectId: 'my-project',
|
|
176
|
+
task: {
|
|
177
|
+
title: 'Fix auth regression',
|
|
178
|
+
objective: 'Fix the failing auth regression tests',
|
|
179
|
+
verificationPlan: ['pnpm test'],
|
|
180
|
+
repoRoot: process.cwd()
|
|
181
|
+
},
|
|
182
|
+
budget: {
|
|
183
|
+
maxUsd: 3.00,
|
|
184
|
+
softLimitUsd: 2.25,
|
|
185
|
+
maxIterations: 3,
|
|
186
|
+
maxTokens: 20_000
|
|
187
|
+
}
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
// result.decision.status → 'completed' | 'exited' | 'failed'
|
|
191
|
+
// result.decision.lifecycleState → 'completed' | 'budget_exit' | 'human_escalation' | ...
|
|
192
|
+
// result.loop.cost.actualUsd → actual USD spent
|
|
193
|
+
// result.loop.attempts.length → number of attempts made
|
|
194
|
+
// result.decision.reason → why the loop exited
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Using Codex instead of Claude
|
|
198
|
+
|
|
199
|
+
```typescript
|
|
200
|
+
const loop = new MartinLoop({
|
|
201
|
+
adapter: createCodexCliAdapter({ workingDirectory: process.cwd() })
|
|
202
|
+
})
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Using the lower-level `runMartin` directly
|
|
206
|
+
|
|
207
|
+
```typescript
|
|
208
|
+
import { runMartin, createClaudeCliAdapter } from 'martin-loop'
|
|
209
|
+
|
|
210
|
+
const result = await runMartin({
|
|
211
|
+
workspaceId: 'ws_default',
|
|
212
|
+
projectId: 'proj_default',
|
|
213
|
+
task: {
|
|
214
|
+
title: 'Fix auth regression',
|
|
215
|
+
objective: 'Fix the failing auth regression tests',
|
|
216
|
+
verificationPlan: ['pnpm test'],
|
|
217
|
+
repoRoot: process.cwd()
|
|
218
|
+
},
|
|
219
|
+
budget: {
|
|
220
|
+
maxUsd: 3.00,
|
|
221
|
+
softLimitUsd: 2.25,
|
|
222
|
+
maxIterations: 3,
|
|
223
|
+
maxTokens: 20_000
|
|
224
|
+
},
|
|
225
|
+
adapter: createClaudeCliAdapter({ workingDirectory: process.cwd() })
|
|
226
|
+
})
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## 🧠 Architecture
|
|
232
|
+
|
|
233
|
+
Five governance layers from policy to runtime enforcement.
|
|
234
|
+
|
|
235
|
+
```
|
|
236
|
+
┌──────────────────────────────────────────────────────────┐
|
|
237
|
+
│ MartinLoop Governance Stack │
|
|
238
|
+
├──────────────────────┬───────────────────────────────────┤
|
|
239
|
+
│ Autonomy Envelope │ Surface · Path · Command │
|
|
240
|
+
│ (policy-enforced) │ Leash — pre-execution gate │
|
|
241
|
+
├──────────────────────┼───────────────────────────────────┤
|
|
242
|
+
│ Model Router │ Cost-aware adapter selection │
|
|
243
|
+
│ │ Fallback chain + model override │
|
|
244
|
+
├──────────────────────┼───────────────────────────────────┤
|
|
245
|
+
│ Agent Adapters │ Claude Code · Codex · any CLI │
|
|
246
|
+
│ │ Direct + stub adapters │
|
|
247
|
+
├──────────────────────┼───────────────────────────────────┤
|
|
248
|
+
│ Safety Leash │ Pre-execution verification gate │
|
|
249
|
+
│ │ Filesystem + secret + command │
|
|
250
|
+
├──────────────────────┼───────────────────────────────────┤
|
|
251
|
+
│ Persistence │ Per-run JSONL in ~/.martin/runs/ │
|
|
252
|
+
│ │ Portfolio inspect + resume │
|
|
253
|
+
└──────────────────────┴───────────────────────────────────┘
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## 🛡️ What MartinLoop Enforces Today
|
|
259
|
+
|
|
260
|
+
**1. Hard budget cap.**
|
|
261
|
+
Every run has a `maxUsd` limit. When the cost reaches that limit the subprocess is terminated — not warned.
|
|
262
|
+
|
|
263
|
+
**2. Iteration cap.**
|
|
264
|
+
Every run has a `maxIterations` limit. The loop exits when it is hit, regardless of progress.
|
|
265
|
+
|
|
266
|
+
**3. Filesystem leash.**
|
|
267
|
+
If `allowedPaths` or `deniedPaths` are configured, any attempt that writes outside the envelope is blocked and rolled back before the patch is kept.
|
|
268
|
+
|
|
269
|
+
**4. Secret leash.**
|
|
270
|
+
Values that look like secrets (API keys, tokens) in the task objective or acceptance criteria are blocked before any attempt runs.
|
|
271
|
+
|
|
272
|
+
**5. Verifier gate.**
|
|
273
|
+
The loop only marks a run successful if the verifier command exits `0`. A passing verifier is required for a `completed` lifecycle state.
|
|
274
|
+
|
|
275
|
+
**6. Rollback on failure.**
|
|
276
|
+
When an attempt is discarded (failed verifier, safety violation, patch decision), MartinLoop restores the filesystem to the pre-attempt state using a git-backed snapshot.
|
|
277
|
+
|
|
278
|
+
**7. Run persistence.**
|
|
279
|
+
Every run is written to `~/.martin/runs/<workspaceId>.jsonl`. Use `martin resume` and `martin inspect` to read it back.
|
|
280
|
+
|
|
281
|
+
---
|
|
282
|
+
|
|
283
|
+
## 📦 OSS Packages
|
|
284
|
+
|
|
285
|
+
| Package | What It Does |
|
|
286
|
+
|---------|-------------|
|
|
287
|
+
| `martin-loop` | Self-contained facade — everything below, vendored and published |
|
|
288
|
+
| `@martin/core` | Runtime controller, leash, router, rollback, policy engine |
|
|
289
|
+
| `@martin/cli` | `martin run` · `inspect` · `resume` CLI commands |
|
|
290
|
+
| `@martin/adapters` | Claude Code, Codex CLI, direct-provider, stub adapters |
|
|
291
|
+
| `@martin/contracts` | Shared types: loop, policy, leash, budget, rollback |
|
|
292
|
+
|
|
293
|
+
All `@martin/*` packages are workspace-internal. Install `martin-loop` from npm — it bundles them all.
|
|
294
|
+
|
|
295
|
+
---
|
|
296
|
+
|
|
297
|
+
## 🔧 Development
|
|
298
|
+
|
|
299
|
+
**Requirements:** Node 20+ · pnpm 8+
|
|
300
|
+
|
|
301
|
+
```sh
|
|
302
|
+
# Clone and install
|
|
303
|
+
git clone https://github.com/Keesan12/MartinLoop
|
|
304
|
+
cd martin-loop && pnpm install
|
|
305
|
+
|
|
306
|
+
# Full test suite
|
|
307
|
+
pnpm test
|
|
308
|
+
|
|
309
|
+
# Type check all packages
|
|
310
|
+
pnpm -r lint
|
|
311
|
+
|
|
312
|
+
# Build all packages + public facade
|
|
313
|
+
pnpm build
|
|
314
|
+
|
|
315
|
+
# Publish (after build)
|
|
316
|
+
npm publish
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
---
|
|
320
|
+
|
|
321
|
+
## 🤝 Contributing
|
|
322
|
+
|
|
323
|
+
```sh
|
|
324
|
+
git checkout -b feat/your-feature
|
|
325
|
+
|
|
326
|
+
# Make changes, then:
|
|
327
|
+
pnpm -r lint && pnpm test # must stay green
|
|
328
|
+
|
|
329
|
+
git commit -m "feat: describe what you built"
|
|
330
|
+
git push -u origin feat/your-feature
|
|
331
|
+
# Open a PR against main
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
Conventional commits: `feat:` · `fix:` · `chore:` · `docs:` · `refactor:` · `test:`
|
|
335
|
+
|
|
336
|
+
---
|
|
337
|
+
|
|
338
|
+
<div align="center">
|
|
339
|
+
|
|
340
|
+
**MIT Licensed** · [martinloop.com](https://martinloop.com) · [keesan@martinloop.com](mailto:keesan@martinloop.com)
|
|
341
|
+
|
|
342
|
+
*"AI coding accountability: completes good work · refuses bad work · stops uneconomical work."*
|
|
343
|
+
|
|
344
|
+
</div>
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# Examples
|
|
2
|
+
|
|
3
|
+
These examples are grounded in the current CLI and MCP surfaces in this repo. Where an example depends on a real provider path, it is labeled that way explicitly.
|
|
4
|
+
|
|
5
|
+
These are still primarily repo-local RC examples. The root `martin-loop` package facade is now real and smoke-validated, but registry publication remains a later release step.
|
|
6
|
+
|
|
7
|
+
## 1. Stub-backed hello world
|
|
8
|
+
|
|
9
|
+
Use this when you want a safe first pass through the loop without real model spend.
|
|
10
|
+
|
|
11
|
+
### PowerShell
|
|
12
|
+
|
|
13
|
+
```powershell
|
|
14
|
+
$env:MARTIN_LIVE='false'
|
|
15
|
+
pnpm run:cli -- run `
|
|
16
|
+
--workspace ws_demo `
|
|
17
|
+
--project proj_demo `
|
|
18
|
+
--objective "Describe the current Martin run lifecycle in one paragraph" `
|
|
19
|
+
--verify "pnpm --filter @martin/core test"
|
|
20
|
+
Remove-Item Env:MARTIN_LIVE
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Why this is useful:
|
|
24
|
+
|
|
25
|
+
- exercises `runMartin`
|
|
26
|
+
- writes a real loop record and artifacts
|
|
27
|
+
- avoids external provider dependencies
|
|
28
|
+
|
|
29
|
+
## 2. Repo-backed task with explicit scope
|
|
30
|
+
|
|
31
|
+
Use allow and deny paths so the task contract is narrow and reviewable.
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pnpm run:cli -- run \
|
|
35
|
+
--cwd . \
|
|
36
|
+
--objective "Tighten README wording for the OSS quickstart" \
|
|
37
|
+
--verify "pnpm --filter @martin/core test" \
|
|
38
|
+
--allow-path README.md \
|
|
39
|
+
--allow-path docs/oss/** \
|
|
40
|
+
--deny-path apps/control-plane/** \
|
|
41
|
+
--accept "Only update documentation files" \
|
|
42
|
+
--accept "Do not modify runtime code"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
What this demonstrates:
|
|
46
|
+
|
|
47
|
+
- repo root selection with `--cwd`
|
|
48
|
+
- scoped file-edit boundaries
|
|
49
|
+
- acceptance criteria injection into the task contract
|
|
50
|
+
|
|
51
|
+
## 3. Safety-block example
|
|
52
|
+
|
|
53
|
+
This example is expected to block before execution because the verifier command is unsafe.
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pnpm run:cli -- run \
|
|
57
|
+
--objective "Try to run an unsafe verifier" \
|
|
58
|
+
--verify "rm -rf ."
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Expected behavior:
|
|
62
|
+
|
|
63
|
+
- the leash blocks the verifier command before adapter execution
|
|
64
|
+
- the run exits through a safety-oriented path rather than pretending the command was acceptable
|
|
65
|
+
- the attempt artifact set includes a persisted leash artifact when applicable
|
|
66
|
+
|
|
67
|
+
The point of this example is not that `rm` exists on every machine. The point is that the raw verifier text is evaluated before the process would be allowed to run.
|
|
68
|
+
|
|
69
|
+
## 4. Budget-constrained live run
|
|
70
|
+
|
|
71
|
+
This is a live-provider example. Only use it when you have the relevant CLI and credentials configured.
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pnpm run:cli -- run \
|
|
75
|
+
--engine codex \
|
|
76
|
+
--model o3 \
|
|
77
|
+
--objective "Refactor the CLI argument parser for clarity" \
|
|
78
|
+
--verify "pnpm --filter @martin/cli test" \
|
|
79
|
+
--budget-usd 2 \
|
|
80
|
+
--soft-limit-usd 1 \
|
|
81
|
+
--max-iterations 2
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
What to review afterward:
|
|
85
|
+
|
|
86
|
+
- admission and settlement events in `ledger.jsonl`
|
|
87
|
+
- cost provenance labels in the run artifacts
|
|
88
|
+
- whether the loop stopped for completion, budget pressure, or lack of progress
|
|
89
|
+
|
|
90
|
+
## 5. MCP invocation shape
|
|
91
|
+
|
|
92
|
+
The MCP server exposes `martin_run`, `martin_inspect`, and `martin_status`.
|
|
93
|
+
|
|
94
|
+
Example `martin_run` payload:
|
|
95
|
+
|
|
96
|
+
```json
|
|
97
|
+
{
|
|
98
|
+
"objective": "Tighten the local dashboard copy",
|
|
99
|
+
"workingDirectory": ".",
|
|
100
|
+
"engine": "claude",
|
|
101
|
+
"verificationPlan": ["pnpm --filter @martin/control-plane test"],
|
|
102
|
+
"maxUsd": 5,
|
|
103
|
+
"maxIterations": 2,
|
|
104
|
+
"maxTokens": 20000,
|
|
105
|
+
"workspaceId": "ws_mcp",
|
|
106
|
+
"projectId": "proj_mcp"
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## 6. What to inspect in artifacts
|
|
111
|
+
|
|
112
|
+
For a repo-backed attempt, look at:
|
|
113
|
+
|
|
114
|
+
- `contract.json`
|
|
115
|
+
- `state.json`
|
|
116
|
+
- `ledger.jsonl`
|
|
117
|
+
- `artifacts/attempt-XXX/compiled-context.json`
|
|
118
|
+
- `artifacts/attempt-XXX/diff.patch`
|
|
119
|
+
- `artifacts/attempt-XXX/grounding-scan.json`
|
|
120
|
+
- `artifacts/attempt-XXX/leash.json`
|
|
121
|
+
- `artifacts/attempt-XXX/patch-score.json`
|
|
122
|
+
- `artifacts/attempt-XXX/patch-decision.json`
|
|
123
|
+
- `artifacts/attempt-XXX/rollback-boundary.json`
|
|
124
|
+
- `artifacts/attempt-XXX/rollback-outcome.json`
|
|
125
|
+
|
|
126
|
+
Those files are the evidence trail that backs the runtime’s claims.
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
{
|
|
2
|
+
"generatedAt": "2026-04-21T09:43:03.509Z",
|
|
3
|
+
"verdict": "go",
|
|
4
|
+
"publicSurface": {
|
|
5
|
+
"packageName": "martin-loop",
|
|
6
|
+
"canonicalPackageManager": "npm",
|
|
7
|
+
"installCommand": "npm install martin-loop",
|
|
8
|
+
"npxCommand": "npx martin-loop",
|
|
9
|
+
"sdkImportPath": "martin-loop",
|
|
10
|
+
"supportsNpxCommand": true,
|
|
11
|
+
"supportsSdkImport": true
|
|
12
|
+
},
|
|
13
|
+
"ossCorePackages": [
|
|
14
|
+
{
|
|
15
|
+
"name": "@martin/contracts",
|
|
16
|
+
"path": "packages/contracts",
|
|
17
|
+
"private": true,
|
|
18
|
+
"publishAccess": null,
|
|
19
|
+
"workspaceDependencies": [],
|
|
20
|
+
"classification": "oss_core",
|
|
21
|
+
"classificationReason": "Intended Phase 13 OSS core surface."
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"name": "@martin/core",
|
|
25
|
+
"path": "packages/core",
|
|
26
|
+
"private": true,
|
|
27
|
+
"publishAccess": null,
|
|
28
|
+
"workspaceDependencies": [
|
|
29
|
+
"@martin/contracts"
|
|
30
|
+
],
|
|
31
|
+
"classification": "oss_core",
|
|
32
|
+
"classificationReason": "Intended Phase 13 OSS core surface."
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"name": "@martin/adapters",
|
|
36
|
+
"path": "packages/adapters",
|
|
37
|
+
"private": true,
|
|
38
|
+
"publishAccess": null,
|
|
39
|
+
"workspaceDependencies": [
|
|
40
|
+
"@martin/core"
|
|
41
|
+
],
|
|
42
|
+
"classification": "oss_core",
|
|
43
|
+
"classificationReason": "Intended Phase 13 OSS core surface."
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"name": "@martin/cli",
|
|
47
|
+
"path": "packages/cli",
|
|
48
|
+
"private": false,
|
|
49
|
+
"publishAccess": "public",
|
|
50
|
+
"workspaceDependencies": [
|
|
51
|
+
"@martin/adapters",
|
|
52
|
+
"@martin/contracts",
|
|
53
|
+
"@martin/core"
|
|
54
|
+
],
|
|
55
|
+
"classification": "oss_core",
|
|
56
|
+
"classificationReason": "Intended Phase 13 OSS core surface."
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"name": "@martin/mcp",
|
|
60
|
+
"path": "packages/mcp",
|
|
61
|
+
"private": false,
|
|
62
|
+
"publishAccess": "public",
|
|
63
|
+
"workspaceDependencies": [
|
|
64
|
+
"@martin/adapters",
|
|
65
|
+
"@martin/contracts",
|
|
66
|
+
"@martin/core"
|
|
67
|
+
],
|
|
68
|
+
"classification": "oss_core",
|
|
69
|
+
"classificationReason": "Intended Phase 13 OSS core surface."
|
|
70
|
+
}
|
|
71
|
+
],
|
|
72
|
+
"nonOssWorkspacePackages": [
|
|
73
|
+
{
|
|
74
|
+
"name": "@martin/control-plane",
|
|
75
|
+
"path": "apps/control-plane",
|
|
76
|
+
"private": true,
|
|
77
|
+
"publishAccess": null,
|
|
78
|
+
"workspaceDependencies": [
|
|
79
|
+
"@martin/contracts"
|
|
80
|
+
],
|
|
81
|
+
"classification": "non_oss_workspace",
|
|
82
|
+
"classificationReason": "Managed or RC-only workspace surface that stays out of the initial OSS boundary."
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"name": "@martin/benchmarks",
|
|
86
|
+
"path": "benchmarks",
|
|
87
|
+
"private": true,
|
|
88
|
+
"publishAccess": null,
|
|
89
|
+
"workspaceDependencies": [
|
|
90
|
+
"@martin/adapters",
|
|
91
|
+
"@martin/contracts",
|
|
92
|
+
"@martin/core"
|
|
93
|
+
],
|
|
94
|
+
"classification": "non_oss_workspace",
|
|
95
|
+
"classificationReason": "Managed or RC-only workspace surface that stays out of the initial OSS boundary."
|
|
96
|
+
}
|
|
97
|
+
],
|
|
98
|
+
"localOnlySurfaces": [
|
|
99
|
+
{
|
|
100
|
+
"path": "apps/local-dashboard",
|
|
101
|
+
"reason": "Local read-model viewer that is not yet packaged as a publishable OSS workspace."
|
|
102
|
+
}
|
|
103
|
+
],
|
|
104
|
+
"dependencyLeaks": [],
|
|
105
|
+
"summary": {
|
|
106
|
+
"ossCoreCount": 5,
|
|
107
|
+
"nonOssWorkspaceCount": 2,
|
|
108
|
+
"localOnlySurfaceCount": 1,
|
|
109
|
+
"dependencyLeakCount": 0,
|
|
110
|
+
"privateOssCoreCount": 3,
|
|
111
|
+
"publishReadyOssCoreCount": 2
|
|
112
|
+
}
|
|
113
|
+
}
|