martin-loop 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +362 -344
- package/dist/bin/martin-loop.js +23 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.js +31 -0
- package/dist/vendor/adapters/claude-cli.d.ts +89 -0
- package/dist/vendor/adapters/claude-cli.js +555 -0
- package/dist/vendor/adapters/cli-bridge.d.ts +28 -0
- package/dist/vendor/adapters/cli-bridge.js +127 -0
- package/dist/vendor/adapters/direct-provider.d.ts +10 -0
- package/dist/vendor/adapters/direct-provider.js +41 -0
- package/dist/vendor/adapters/index.d.ts +5 -0
- package/dist/vendor/adapters/index.js +5 -0
- package/dist/vendor/adapters/runtime-support.d.ts +14 -0
- package/dist/vendor/adapters/runtime-support.js +52 -0
- package/dist/vendor/adapters/stub-agent-cli.d.ts +8 -0
- package/dist/vendor/adapters/stub-agent-cli.js +41 -0
- package/dist/vendor/adapters/stub-direct-provider.d.ts +8 -0
- package/dist/vendor/adapters/stub-direct-provider.js +10 -0
- package/dist/vendor/cli/bin/martin.d.ts +2 -0
- package/dist/vendor/cli/bin/martin.js +19 -0
- package/dist/vendor/cli/index.d.ts +39 -0
- package/dist/vendor/cli/index.js +634 -0
- package/dist/vendor/cli/persistence.d.ts +34 -0
- package/dist/vendor/cli/persistence.js +71 -0
- package/dist/vendor/contracts/governance.d.ts +21 -0
- package/dist/vendor/contracts/governance.js +12 -0
- package/dist/vendor/contracts/index.d.ts +330 -0
- package/dist/vendor/contracts/index.js +203 -0
- package/dist/vendor/core/compiler.d.ts +50 -0
- package/dist/vendor/core/compiler.js +47 -0
- package/dist/vendor/core/grounding.d.ts +37 -0
- package/dist/vendor/core/grounding.js +270 -0
- package/dist/vendor/core/index.d.ts +145 -0
- package/dist/vendor/core/index.js +1099 -0
- package/dist/vendor/core/leash.d.ts +48 -0
- package/dist/vendor/core/leash.js +408 -0
- package/dist/vendor/core/persistence/compiler.d.ts +18 -0
- package/dist/vendor/core/persistence/compiler.js +35 -0
- package/dist/vendor/core/persistence/index.d.ts +6 -0
- package/dist/vendor/core/persistence/index.js +4 -0
- package/dist/vendor/core/persistence/ledger.d.ts +23 -0
- package/dist/vendor/core/persistence/ledger.js +10 -0
- package/dist/vendor/core/persistence/store.d.ts +77 -0
- package/dist/vendor/core/persistence/store.js +84 -0
- package/dist/vendor/core/policy.d.ts +126 -0
- package/dist/vendor/core/policy.js +625 -0
- package/dist/vendor/core/rollback.d.ts +11 -0
- package/dist/vendor/core/rollback.js +219 -0
- package/docs/oss/EXAMPLES.md +126 -126
- package/docs/oss/OSS-BOUNDARY-REPORT.json +113 -113
- package/docs/oss/OSS-BOUNDARY-REPORT.md +48 -48
- package/docs/oss/QUICKSTART.md +135 -135
- package/docs/oss/README.md +93 -93
- package/docs/oss/RELEASE-SURFACE-REPORT.json +45 -45
- package/docs/oss/RELEASE-SURFACE-REPORT.md +35 -35
- package/package.json +56 -54
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
import { spawnSync } from "node:child_process";
|
|
2
|
+
import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
3
|
+
import { dirname, relative, resolve } from "node:path";
|
|
4
|
+
export async function captureRollbackBoundary(input) {
|
|
5
|
+
if (!input.repoRoot) {
|
|
6
|
+
return undefined;
|
|
7
|
+
}
|
|
8
|
+
const repoState = readRepoState(input.repoRoot);
|
|
9
|
+
const snapshotPaths = uniqueSorted([
|
|
10
|
+
...repoState.trackedDirtyFiles,
|
|
11
|
+
...repoState.untrackedFiles
|
|
12
|
+
]);
|
|
13
|
+
const snapshots = [];
|
|
14
|
+
for (const filePath of snapshotPaths) {
|
|
15
|
+
snapshots.push(await readRollbackSnapshot(input.repoRoot, filePath));
|
|
16
|
+
}
|
|
17
|
+
return {
|
|
18
|
+
strategy: "git_head_plus_snapshot",
|
|
19
|
+
capturedAt: input.capturedAt,
|
|
20
|
+
...(readGitScalar(input.repoRoot, ["rev-parse", "HEAD"])
|
|
21
|
+
? { headRef: readGitScalar(input.repoRoot, ["rev-parse", "HEAD"]) }
|
|
22
|
+
: {}),
|
|
23
|
+
trackedDirtyFiles: repoState.trackedDirtyFiles,
|
|
24
|
+
untrackedFiles: repoState.untrackedFiles,
|
|
25
|
+
snapshots
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
export async function restoreRollbackBoundary(input) {
|
|
29
|
+
if (!input.repoRoot) {
|
|
30
|
+
return undefined;
|
|
31
|
+
}
|
|
32
|
+
if (!input.boundary) {
|
|
33
|
+
return {
|
|
34
|
+
attempted: false,
|
|
35
|
+
status: "unavailable",
|
|
36
|
+
restoredAt: input.restoredAt,
|
|
37
|
+
decision: input.decision,
|
|
38
|
+
before: emptyRepoState(),
|
|
39
|
+
after: emptyRepoState(),
|
|
40
|
+
restoredFiles: [],
|
|
41
|
+
deletedFiles: [],
|
|
42
|
+
error: "Rollback boundary was unavailable for this attempt."
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
const before = readRepoState(input.repoRoot);
|
|
46
|
+
if (repoStateMatchesBoundary(before, input.boundary)) {
|
|
47
|
+
return {
|
|
48
|
+
attempted: false,
|
|
49
|
+
status: "not_required",
|
|
50
|
+
restoredAt: input.restoredAt,
|
|
51
|
+
decision: input.decision,
|
|
52
|
+
before,
|
|
53
|
+
after: before,
|
|
54
|
+
restoredFiles: [],
|
|
55
|
+
deletedFiles: []
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
const restoredFiles = new Set();
|
|
59
|
+
const deletedFiles = new Set();
|
|
60
|
+
try {
|
|
61
|
+
const baselineTracked = new Set(input.boundary.trackedDirtyFiles);
|
|
62
|
+
const baselineUntracked = new Set(input.boundary.untrackedFiles);
|
|
63
|
+
for (const filePath of before.trackedDirtyFiles) {
|
|
64
|
+
if (!baselineTracked.has(filePath)) {
|
|
65
|
+
restoreTrackedFileFromHead(input.repoRoot, filePath);
|
|
66
|
+
restoredFiles.add(filePath);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
for (const filePath of before.untrackedFiles) {
|
|
70
|
+
if (!baselineUntracked.has(filePath)) {
|
|
71
|
+
await removeRepoPath(input.repoRoot, filePath);
|
|
72
|
+
deletedFiles.add(filePath);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
for (const snapshot of input.boundary.snapshots) {
|
|
76
|
+
await restoreRollbackSnapshot(input.repoRoot, snapshot);
|
|
77
|
+
if (snapshot.existed) {
|
|
78
|
+
restoredFiles.add(snapshot.path);
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
deletedFiles.add(snapshot.path);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
const after = readRepoState(input.repoRoot);
|
|
85
|
+
const restored = repoStateMatchesBoundary(after, input.boundary);
|
|
86
|
+
return {
|
|
87
|
+
attempted: true,
|
|
88
|
+
status: restored ? "restored" : "failed",
|
|
89
|
+
restoredAt: input.restoredAt,
|
|
90
|
+
decision: input.decision,
|
|
91
|
+
before,
|
|
92
|
+
after,
|
|
93
|
+
restoredFiles: [...restoredFiles].sort(),
|
|
94
|
+
deletedFiles: [...deletedFiles].sort(),
|
|
95
|
+
...(restored
|
|
96
|
+
? {}
|
|
97
|
+
: { error: "Repo state still diverged from the recorded rollback boundary after restore." })
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
catch (error) {
|
|
101
|
+
const after = readRepoState(input.repoRoot);
|
|
102
|
+
return {
|
|
103
|
+
attempted: true,
|
|
104
|
+
status: "failed",
|
|
105
|
+
restoredAt: input.restoredAt,
|
|
106
|
+
decision: input.decision,
|
|
107
|
+
before,
|
|
108
|
+
after,
|
|
109
|
+
restoredFiles: [...restoredFiles].sort(),
|
|
110
|
+
deletedFiles: [...deletedFiles].sort(),
|
|
111
|
+
error: toErrorMessage(error)
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
function readRepoState(repoRoot) {
|
|
116
|
+
return {
|
|
117
|
+
trackedDirtyFiles: readGitLines(repoRoot, ["diff", "--name-only", "HEAD"]),
|
|
118
|
+
untrackedFiles: readGitLines(repoRoot, ["ls-files", "--others", "--exclude-standard"])
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
function readGitLines(repoRoot, args) {
|
|
122
|
+
const result = spawnSync("git", args, {
|
|
123
|
+
cwd: repoRoot,
|
|
124
|
+
encoding: "utf8"
|
|
125
|
+
});
|
|
126
|
+
if (result.status !== 0 || typeof result.stdout !== "string") {
|
|
127
|
+
return [];
|
|
128
|
+
}
|
|
129
|
+
return uniqueSorted(result.stdout
|
|
130
|
+
.split(/\r?\n/u)
|
|
131
|
+
.map((line) => normalizeRepoPath(line))
|
|
132
|
+
.filter(Boolean));
|
|
133
|
+
}
|
|
134
|
+
function readGitScalar(repoRoot, args) {
|
|
135
|
+
const result = spawnSync("git", args, {
|
|
136
|
+
cwd: repoRoot,
|
|
137
|
+
encoding: "utf8"
|
|
138
|
+
});
|
|
139
|
+
if (result.status !== 0 || typeof result.stdout !== "string") {
|
|
140
|
+
return undefined;
|
|
141
|
+
}
|
|
142
|
+
const value = result.stdout.trim();
|
|
143
|
+
return value.length > 0 ? value : undefined;
|
|
144
|
+
}
|
|
145
|
+
async function readRollbackSnapshot(repoRoot, filePath) {
|
|
146
|
+
const absolutePath = resolveRepoPath(repoRoot, filePath);
|
|
147
|
+
try {
|
|
148
|
+
const contents = await readFile(absolutePath);
|
|
149
|
+
return {
|
|
150
|
+
path: normalizeRepoPath(filePath),
|
|
151
|
+
existed: true,
|
|
152
|
+
encoding: "base64",
|
|
153
|
+
contentBase64: contents.toString("base64")
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
catch {
|
|
157
|
+
return {
|
|
158
|
+
path: normalizeRepoPath(filePath),
|
|
159
|
+
existed: false,
|
|
160
|
+
encoding: "base64"
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
async function restoreRollbackSnapshot(repoRoot, snapshot) {
|
|
165
|
+
const absolutePath = resolveRepoPath(repoRoot, snapshot.path);
|
|
166
|
+
if (!snapshot.existed || !snapshot.contentBase64) {
|
|
167
|
+
await rm(absolutePath, { recursive: true, force: true });
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
await mkdir(dirname(absolutePath), { recursive: true });
|
|
171
|
+
await writeFile(absolutePath, Buffer.from(snapshot.contentBase64, "base64"));
|
|
172
|
+
}
|
|
173
|
+
function restoreTrackedFileFromHead(repoRoot, filePath) {
|
|
174
|
+
const result = spawnSync("git", ["restore", "--staged", "--worktree", "--source=HEAD", "--", filePath], {
|
|
175
|
+
cwd: repoRoot,
|
|
176
|
+
encoding: "utf8"
|
|
177
|
+
});
|
|
178
|
+
if (result.status !== 0) {
|
|
179
|
+
throw new Error(result.stderr?.trim() || `git restore failed for ${filePath}`);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
async function removeRepoPath(repoRoot, filePath) {
|
|
183
|
+
await rm(resolveRepoPath(repoRoot, filePath), { recursive: true, force: true });
|
|
184
|
+
}
|
|
185
|
+
function resolveRepoPath(repoRoot, filePath) {
|
|
186
|
+
const resolvedRoot = resolve(repoRoot);
|
|
187
|
+
const resolvedPath = resolve(resolvedRoot, filePath);
|
|
188
|
+
const relativePath = relative(resolvedRoot, resolvedPath);
|
|
189
|
+
if (relativePath.startsWith("..") || relativePath === "") {
|
|
190
|
+
throw new Error(`Refusing to access a rollback path outside repo root: ${filePath}`);
|
|
191
|
+
}
|
|
192
|
+
return resolvedPath;
|
|
193
|
+
}
|
|
194
|
+
function repoStateMatchesBoundary(state, boundary) {
|
|
195
|
+
return arraysEqual(state.trackedDirtyFiles, boundary.trackedDirtyFiles) &&
|
|
196
|
+
arraysEqual(state.untrackedFiles, boundary.untrackedFiles);
|
|
197
|
+
}
|
|
198
|
+
function arraysEqual(left, right) {
|
|
199
|
+
if (left.length !== right.length) {
|
|
200
|
+
return false;
|
|
201
|
+
}
|
|
202
|
+
return left.every((value, index) => value === right[index]);
|
|
203
|
+
}
|
|
204
|
+
function uniqueSorted(values) {
|
|
205
|
+
return [...new Set(values.map((value) => normalizeRepoPath(value)).filter(Boolean))].sort();
|
|
206
|
+
}
|
|
207
|
+
function normalizeRepoPath(value) {
|
|
208
|
+
return value.trim().replace(/\\/gu, "/");
|
|
209
|
+
}
|
|
210
|
+
function emptyRepoState() {
|
|
211
|
+
return {
|
|
212
|
+
trackedDirtyFiles: [],
|
|
213
|
+
untrackedFiles: []
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
function toErrorMessage(error) {
|
|
217
|
+
return error instanceof Error ? error.message : String(error);
|
|
218
|
+
}
|
|
219
|
+
//# sourceMappingURL=rollback.js.map
|
package/docs/oss/EXAMPLES.md
CHANGED
|
@@ -1,126 +1,126 @@
|
|
|
1
|
-
# Examples
|
|
2
|
-
|
|
3
|
-
These examples are grounded in the current CLI and MCP surfaces in this repo. Where an example depends on a real provider path, it is labeled that way explicitly.
|
|
4
|
-
|
|
5
|
-
These are still primarily repo-local RC examples. The root `martin-loop` package facade is now real and smoke-validated, but registry publication remains a later release step.
|
|
6
|
-
|
|
7
|
-
## 1. Stub-backed hello world
|
|
8
|
-
|
|
9
|
-
Use this when you want a safe first pass through the loop without real model spend.
|
|
10
|
-
|
|
11
|
-
### PowerShell
|
|
12
|
-
|
|
13
|
-
```powershell
|
|
14
|
-
$env:MARTIN_LIVE='false'
|
|
15
|
-
pnpm run:cli -- run `
|
|
16
|
-
--workspace ws_demo `
|
|
17
|
-
--project proj_demo `
|
|
18
|
-
--objective "Describe the current Martin run lifecycle in one paragraph" `
|
|
19
|
-
--verify "pnpm --filter @martin/core test"
|
|
20
|
-
Remove-Item Env:MARTIN_LIVE
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
Why this is useful:
|
|
24
|
-
|
|
25
|
-
- exercises `runMartin`
|
|
26
|
-
- writes a real loop record and artifacts
|
|
27
|
-
- avoids external provider dependencies
|
|
28
|
-
|
|
29
|
-
## 2. Repo-backed task with explicit scope
|
|
30
|
-
|
|
31
|
-
Use allow and deny paths so the task contract is narrow and reviewable.
|
|
32
|
-
|
|
33
|
-
```bash
|
|
34
|
-
pnpm run:cli -- run \
|
|
35
|
-
--cwd . \
|
|
36
|
-
--objective "Tighten README wording for the OSS quickstart" \
|
|
37
|
-
--verify "pnpm --filter @martin/core test" \
|
|
38
|
-
--allow-path README.md \
|
|
39
|
-
--allow-path docs/oss/** \
|
|
40
|
-
--deny-path apps/control-plane/** \
|
|
41
|
-
--accept "Only update documentation files" \
|
|
42
|
-
--accept "Do not modify runtime code"
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
What this demonstrates:
|
|
46
|
-
|
|
47
|
-
- repo root selection with `--cwd`
|
|
48
|
-
- scoped file-edit boundaries
|
|
49
|
-
- acceptance criteria injection into the task contract
|
|
50
|
-
|
|
51
|
-
## 3. Safety-block example
|
|
52
|
-
|
|
53
|
-
This example is expected to block before execution because the verifier command is unsafe.
|
|
54
|
-
|
|
55
|
-
```bash
|
|
56
|
-
pnpm run:cli -- run \
|
|
57
|
-
--objective "Try to run an unsafe verifier" \
|
|
58
|
-
--verify "rm -rf ."
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
Expected behavior:
|
|
62
|
-
|
|
63
|
-
- the leash blocks the verifier command before adapter execution
|
|
64
|
-
- the run exits through a safety-oriented path rather than pretending the command was acceptable
|
|
65
|
-
- the attempt artifact set includes a persisted leash artifact when applicable
|
|
66
|
-
|
|
67
|
-
The point of this example is not that `rm` exists on every machine. The point is that the raw verifier text is evaluated before the process would be allowed to run.
|
|
68
|
-
|
|
69
|
-
## 4. Budget-constrained live run
|
|
70
|
-
|
|
71
|
-
This is a live-provider example. Only use it when you have the relevant CLI and credentials configured.
|
|
72
|
-
|
|
73
|
-
```bash
|
|
74
|
-
pnpm run:cli -- run \
|
|
75
|
-
--engine codex \
|
|
76
|
-
--model o3 \
|
|
77
|
-
--objective "Refactor the CLI argument parser for clarity" \
|
|
78
|
-
--verify "pnpm --filter @martin/cli test" \
|
|
79
|
-
--budget-usd 2 \
|
|
80
|
-
--soft-limit-usd 1 \
|
|
81
|
-
--max-iterations 2
|
|
82
|
-
```
|
|
83
|
-
|
|
84
|
-
What to review afterward:
|
|
85
|
-
|
|
86
|
-
- admission and settlement events in `ledger.jsonl`
|
|
87
|
-
- cost provenance labels in the run artifacts
|
|
88
|
-
- whether the loop stopped for completion, budget pressure, or lack of progress
|
|
89
|
-
|
|
90
|
-
## 5. MCP invocation shape
|
|
91
|
-
|
|
92
|
-
The MCP server exposes `martin_run`, `martin_inspect`, and `martin_status`.
|
|
93
|
-
|
|
94
|
-
Example `martin_run` payload:
|
|
95
|
-
|
|
96
|
-
```json
|
|
97
|
-
{
|
|
98
|
-
"objective": "Tighten the local dashboard copy",
|
|
99
|
-
"workingDirectory": ".",
|
|
100
|
-
"engine": "claude",
|
|
101
|
-
"verificationPlan": ["pnpm --filter @martin/control-plane test"],
|
|
102
|
-
"maxUsd": 5,
|
|
103
|
-
"maxIterations": 2,
|
|
104
|
-
"maxTokens": 20000,
|
|
105
|
-
"workspaceId": "ws_mcp",
|
|
106
|
-
"projectId": "proj_mcp"
|
|
107
|
-
}
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
## 6. What to inspect in artifacts
|
|
111
|
-
|
|
112
|
-
For a repo-backed attempt, look at:
|
|
113
|
-
|
|
114
|
-
- `contract.json`
|
|
115
|
-
- `state.json`
|
|
116
|
-
- `ledger.jsonl`
|
|
117
|
-
- `artifacts/attempt-XXX/compiled-context.json`
|
|
118
|
-
- `artifacts/attempt-XXX/diff.patch`
|
|
119
|
-
- `artifacts/attempt-XXX/grounding-scan.json`
|
|
120
|
-
- `artifacts/attempt-XXX/leash.json`
|
|
121
|
-
- `artifacts/attempt-XXX/patch-score.json`
|
|
122
|
-
- `artifacts/attempt-XXX/patch-decision.json`
|
|
123
|
-
- `artifacts/attempt-XXX/rollback-boundary.json`
|
|
124
|
-
- `artifacts/attempt-XXX/rollback-outcome.json`
|
|
125
|
-
|
|
126
|
-
Those files are the evidence trail that backs the runtime’s claims.
|
|
1
|
+
# Examples
|
|
2
|
+
|
|
3
|
+
These examples are grounded in the current CLI and MCP surfaces in this repo. Where an example depends on a real provider path, it is labeled that way explicitly.
|
|
4
|
+
|
|
5
|
+
These are still primarily repo-local RC examples. The root `martin-loop` package facade is now real and smoke-validated, but registry publication remains a later release step.
|
|
6
|
+
|
|
7
|
+
## 1. Stub-backed hello world
|
|
8
|
+
|
|
9
|
+
Use this when you want a safe first pass through the loop without real model spend.
|
|
10
|
+
|
|
11
|
+
### PowerShell
|
|
12
|
+
|
|
13
|
+
```powershell
|
|
14
|
+
$env:MARTIN_LIVE='false'
|
|
15
|
+
pnpm run:cli -- run `
|
|
16
|
+
--workspace ws_demo `
|
|
17
|
+
--project proj_demo `
|
|
18
|
+
--objective "Describe the current Martin run lifecycle in one paragraph" `
|
|
19
|
+
--verify "pnpm --filter @martin/core test"
|
|
20
|
+
Remove-Item Env:MARTIN_LIVE
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Why this is useful:
|
|
24
|
+
|
|
25
|
+
- exercises `runMartin`
|
|
26
|
+
- writes a real loop record and artifacts
|
|
27
|
+
- avoids external provider dependencies
|
|
28
|
+
|
|
29
|
+
## 2. Repo-backed task with explicit scope
|
|
30
|
+
|
|
31
|
+
Use allow and deny paths so the task contract is narrow and reviewable.
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pnpm run:cli -- run \
|
|
35
|
+
--cwd . \
|
|
36
|
+
--objective "Tighten README wording for the OSS quickstart" \
|
|
37
|
+
--verify "pnpm --filter @martin/core test" \
|
|
38
|
+
--allow-path README.md \
|
|
39
|
+
--allow-path docs/oss/** \
|
|
40
|
+
--deny-path apps/control-plane/** \
|
|
41
|
+
--accept "Only update documentation files" \
|
|
42
|
+
--accept "Do not modify runtime code"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
What this demonstrates:
|
|
46
|
+
|
|
47
|
+
- repo root selection with `--cwd`
|
|
48
|
+
- scoped file-edit boundaries
|
|
49
|
+
- acceptance criteria injection into the task contract
|
|
50
|
+
|
|
51
|
+
## 3. Safety-block example
|
|
52
|
+
|
|
53
|
+
This example is expected to block before execution because the verifier command is unsafe.
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pnpm run:cli -- run \
|
|
57
|
+
--objective "Try to run an unsafe verifier" \
|
|
58
|
+
--verify "rm -rf ."
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Expected behavior:
|
|
62
|
+
|
|
63
|
+
- the leash blocks the verifier command before adapter execution
|
|
64
|
+
- the run exits through a safety-oriented path rather than pretending the command was acceptable
|
|
65
|
+
- the attempt artifact set includes a persisted leash artifact when applicable
|
|
66
|
+
|
|
67
|
+
The point of this example is not that `rm` exists on every machine. The point is that the raw verifier text is evaluated before the process would be allowed to run.
|
|
68
|
+
|
|
69
|
+
## 4. Budget-constrained live run
|
|
70
|
+
|
|
71
|
+
This is a live-provider example. Only use it when you have the relevant CLI and credentials configured.
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pnpm run:cli -- run \
|
|
75
|
+
--engine codex \
|
|
76
|
+
--model o3 \
|
|
77
|
+
--objective "Refactor the CLI argument parser for clarity" \
|
|
78
|
+
--verify "pnpm --filter @martin/cli test" \
|
|
79
|
+
--budget-usd 2 \
|
|
80
|
+
--soft-limit-usd 1 \
|
|
81
|
+
--max-iterations 2
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
What to review afterward:
|
|
85
|
+
|
|
86
|
+
- admission and settlement events in `ledger.jsonl`
|
|
87
|
+
- cost provenance labels in the run artifacts
|
|
88
|
+
- whether the loop stopped for completion, budget pressure, or lack of progress
|
|
89
|
+
|
|
90
|
+
## 5. MCP invocation shape
|
|
91
|
+
|
|
92
|
+
The MCP server exposes `martin_run`, `martin_inspect`, and `martin_status`.
|
|
93
|
+
|
|
94
|
+
Example `martin_run` payload:
|
|
95
|
+
|
|
96
|
+
```json
|
|
97
|
+
{
|
|
98
|
+
"objective": "Tighten the local dashboard copy",
|
|
99
|
+
"workingDirectory": ".",
|
|
100
|
+
"engine": "claude",
|
|
101
|
+
"verificationPlan": ["pnpm --filter @martin/control-plane test"],
|
|
102
|
+
"maxUsd": 5,
|
|
103
|
+
"maxIterations": 2,
|
|
104
|
+
"maxTokens": 20000,
|
|
105
|
+
"workspaceId": "ws_mcp",
|
|
106
|
+
"projectId": "proj_mcp"
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## 6. What to inspect in artifacts
|
|
111
|
+
|
|
112
|
+
For a repo-backed attempt, look at:
|
|
113
|
+
|
|
114
|
+
- `contract.json`
|
|
115
|
+
- `state.json`
|
|
116
|
+
- `ledger.jsonl`
|
|
117
|
+
- `artifacts/attempt-XXX/compiled-context.json`
|
|
118
|
+
- `artifacts/attempt-XXX/diff.patch`
|
|
119
|
+
- `artifacts/attempt-XXX/grounding-scan.json`
|
|
120
|
+
- `artifacts/attempt-XXX/leash.json`
|
|
121
|
+
- `artifacts/attempt-XXX/patch-score.json`
|
|
122
|
+
- `artifacts/attempt-XXX/patch-decision.json`
|
|
123
|
+
- `artifacts/attempt-XXX/rollback-boundary.json`
|
|
124
|
+
- `artifacts/attempt-XXX/rollback-outcome.json`
|
|
125
|
+
|
|
126
|
+
Those files are the evidence trail that backs the runtime’s claims.
|