martin-loop 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +362 -344
- package/dist/bin/martin-loop.js +23 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.js +31 -0
- package/dist/vendor/adapters/claude-cli.d.ts +89 -0
- package/dist/vendor/adapters/claude-cli.js +555 -0
- package/dist/vendor/adapters/cli-bridge.d.ts +28 -0
- package/dist/vendor/adapters/cli-bridge.js +127 -0
- package/dist/vendor/adapters/direct-provider.d.ts +10 -0
- package/dist/vendor/adapters/direct-provider.js +41 -0
- package/dist/vendor/adapters/index.d.ts +5 -0
- package/dist/vendor/adapters/index.js +5 -0
- package/dist/vendor/adapters/runtime-support.d.ts +14 -0
- package/dist/vendor/adapters/runtime-support.js +52 -0
- package/dist/vendor/adapters/stub-agent-cli.d.ts +8 -0
- package/dist/vendor/adapters/stub-agent-cli.js +41 -0
- package/dist/vendor/adapters/stub-direct-provider.d.ts +8 -0
- package/dist/vendor/adapters/stub-direct-provider.js +10 -0
- package/dist/vendor/cli/bin/martin.d.ts +2 -0
- package/dist/vendor/cli/bin/martin.js +19 -0
- package/dist/vendor/cli/index.d.ts +39 -0
- package/dist/vendor/cli/index.js +634 -0
- package/dist/vendor/cli/persistence.d.ts +34 -0
- package/dist/vendor/cli/persistence.js +71 -0
- package/dist/vendor/contracts/governance.d.ts +21 -0
- package/dist/vendor/contracts/governance.js +12 -0
- package/dist/vendor/contracts/index.d.ts +330 -0
- package/dist/vendor/contracts/index.js +203 -0
- package/dist/vendor/core/compiler.d.ts +50 -0
- package/dist/vendor/core/compiler.js +47 -0
- package/dist/vendor/core/grounding.d.ts +37 -0
- package/dist/vendor/core/grounding.js +270 -0
- package/dist/vendor/core/index.d.ts +145 -0
- package/dist/vendor/core/index.js +1099 -0
- package/dist/vendor/core/leash.d.ts +48 -0
- package/dist/vendor/core/leash.js +408 -0
- package/dist/vendor/core/persistence/compiler.d.ts +18 -0
- package/dist/vendor/core/persistence/compiler.js +35 -0
- package/dist/vendor/core/persistence/index.d.ts +6 -0
- package/dist/vendor/core/persistence/index.js +4 -0
- package/dist/vendor/core/persistence/ledger.d.ts +23 -0
- package/dist/vendor/core/persistence/ledger.js +10 -0
- package/dist/vendor/core/persistence/store.d.ts +77 -0
- package/dist/vendor/core/persistence/store.js +84 -0
- package/dist/vendor/core/policy.d.ts +126 -0
- package/dist/vendor/core/policy.js +625 -0
- package/dist/vendor/core/rollback.d.ts +11 -0
- package/dist/vendor/core/rollback.js +219 -0
- package/docs/oss/EXAMPLES.md +126 -126
- package/docs/oss/OSS-BOUNDARY-REPORT.json +113 -113
- package/docs/oss/OSS-BOUNDARY-REPORT.md +48 -48
- package/docs/oss/QUICKSTART.md +135 -135
- package/docs/oss/README.md +93 -93
- package/docs/oss/RELEASE-SURFACE-REPORT.json +45 -45
- package/docs/oss/RELEASE-SURFACE-REPORT.md +35 -35
- package/package.json +56 -54
|
@@ -0,0 +1,634 @@
|
|
|
1
|
+
import { appendFile, mkdir, readFile } from "node:fs/promises";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { isAbsolute, join, resolve } from "node:path";
|
|
4
|
+
import { createClaudeCliAdapter, createCodexCliAdapter, createStubDirectProviderAdapter } from "../adapters/index.js";
|
|
5
|
+
import { runMartin } from "../core/index.js";
|
|
6
|
+
import { buildPortfolioSnapshot, createLoopRecord } from "../contracts/index.js";
|
|
7
|
+
export async function executeCli(args) {
|
|
8
|
+
const parsed = parseCliArguments(args);
|
|
9
|
+
switch (parsed.command) {
|
|
10
|
+
case "help": {
|
|
11
|
+
return {
|
|
12
|
+
exitCode: 0,
|
|
13
|
+
stdout: renderCliHelp(),
|
|
14
|
+
stderr: ""
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
case "run": {
|
|
18
|
+
const resolvedGuardrails = await resolveGuardrails(parsed.request, args);
|
|
19
|
+
const verificationPlan = parsed.request.verificationPlan.length > 0
|
|
20
|
+
? parsed.request.verificationPlan
|
|
21
|
+
: resolvedGuardrails.verifierRules;
|
|
22
|
+
const resolvedRequest = {
|
|
23
|
+
...parsed.request,
|
|
24
|
+
budget: resolvedGuardrails.budget,
|
|
25
|
+
verificationPlan,
|
|
26
|
+
metadata: {
|
|
27
|
+
...parsed.request.metadata,
|
|
28
|
+
policyProfile: resolvedGuardrails.policyProfile,
|
|
29
|
+
telemetryDestination: resolvedGuardrails.telemetryDestination
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
const workingDirectory = parsed.request.cwd ?? readOption(args, "--cwd") ?? process.cwd();
|
|
33
|
+
const adapter = selectAdapter(args, workingDirectory, parsed.request.model, parsed.request.engine);
|
|
34
|
+
let result;
|
|
35
|
+
try {
|
|
36
|
+
result = await runMartin({
|
|
37
|
+
workspaceId: resolvedRequest.workspaceId,
|
|
38
|
+
projectId: resolvedRequest.projectId,
|
|
39
|
+
task: {
|
|
40
|
+
title: resolvedRequest.title,
|
|
41
|
+
objective: resolvedRequest.objective,
|
|
42
|
+
verificationPlan: resolvedRequest.verificationPlan,
|
|
43
|
+
repoRoot: workingDirectory,
|
|
44
|
+
...(resolvedRequest.allowedPaths?.length ? { allowedPaths: resolvedRequest.allowedPaths } : {}),
|
|
45
|
+
...(resolvedRequest.deniedPaths?.length ? { deniedPaths: resolvedRequest.deniedPaths } : {}),
|
|
46
|
+
...(resolvedRequest.acceptanceCriteria?.length ? { acceptanceCriteria: resolvedRequest.acceptanceCriteria } : {})
|
|
47
|
+
},
|
|
48
|
+
budget: resolvedRequest.budget,
|
|
49
|
+
metadata: resolvedRequest.metadata,
|
|
50
|
+
adapter
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
const fallbackLoop = createLoopRecord({
|
|
55
|
+
workspaceId: resolvedRequest.workspaceId,
|
|
56
|
+
projectId: resolvedRequest.projectId,
|
|
57
|
+
task: {
|
|
58
|
+
title: resolvedRequest.title,
|
|
59
|
+
objective: resolvedRequest.objective,
|
|
60
|
+
verificationPlan: resolvedRequest.verificationPlan,
|
|
61
|
+
repoRoot: workingDirectory
|
|
62
|
+
},
|
|
63
|
+
budget: resolvedRequest.budget,
|
|
64
|
+
metadata: resolvedRequest.metadata,
|
|
65
|
+
status: "exited",
|
|
66
|
+
lifecycleState: "human_escalation"
|
|
67
|
+
});
|
|
68
|
+
result = {
|
|
69
|
+
loop: fallbackLoop,
|
|
70
|
+
decision: {
|
|
71
|
+
shouldExit: true,
|
|
72
|
+
status: "exited",
|
|
73
|
+
lifecycleState: "human_escalation",
|
|
74
|
+
reason: "adapter-unavailable"
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
// Persist loop record to ~/.martin/runs/<workspaceId>.jsonl
|
|
79
|
+
// Dashboard and inspect commands read from this file.
|
|
80
|
+
try {
|
|
81
|
+
const runsDir = join(homedir(), ".martin", "runs");
|
|
82
|
+
await mkdir(runsDir, { recursive: true });
|
|
83
|
+
const outFile = join(runsDir, `${resolvedRequest.workspaceId}.jsonl`);
|
|
84
|
+
await appendFile(outFile, JSON.stringify(result.loop) + "\n", "utf8");
|
|
85
|
+
}
|
|
86
|
+
catch {
|
|
87
|
+
// Non-fatal — persistence failure should not crash the run output
|
|
88
|
+
}
|
|
89
|
+
return {
|
|
90
|
+
exitCode: 0,
|
|
91
|
+
stdout: JSON.stringify({
|
|
92
|
+
command: "run",
|
|
93
|
+
decision: result.decision,
|
|
94
|
+
loop: result.loop,
|
|
95
|
+
effectivePolicy: {
|
|
96
|
+
configPath: resolvedGuardrails.configPath,
|
|
97
|
+
policyProfile: resolvedGuardrails.policyProfile,
|
|
98
|
+
destructiveActionPolicy: resolvedGuardrails.destructiveActionPolicy,
|
|
99
|
+
verifierRules: resolvedGuardrails.verifierRules,
|
|
100
|
+
budget: resolvedGuardrails.budget,
|
|
101
|
+
maxUsd: resolvedGuardrails.budget.maxUsd,
|
|
102
|
+
softLimitUsd: resolvedGuardrails.budget.softLimitUsd,
|
|
103
|
+
maxIterations: resolvedGuardrails.budget.maxIterations,
|
|
104
|
+
maxTokens: resolvedGuardrails.budget.maxTokens,
|
|
105
|
+
telemetryDestination: resolvedGuardrails.telemetryDestination
|
|
106
|
+
}
|
|
107
|
+
}, null, 2),
|
|
108
|
+
stderr: ""
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
case "bench": {
|
|
112
|
+
return {
|
|
113
|
+
exitCode: 1,
|
|
114
|
+
stdout: "",
|
|
115
|
+
stderr: "The benchmark harness remains a workspace-only RC surface and is not part of the publishable @martin/cli boundary yet. Use pnpm --filter @martin/benchmarks test or pnpm --filter @martin/benchmarks eval:phase12 from the repo root instead."
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
case "inspect": {
|
|
119
|
+
try {
|
|
120
|
+
const contents = await readFile(parsed.file, "utf8");
|
|
121
|
+
const loops = parseLoopRecords(contents);
|
|
122
|
+
return {
|
|
123
|
+
exitCode: 0,
|
|
124
|
+
stdout: JSON.stringify({
|
|
125
|
+
command: "inspect",
|
|
126
|
+
source: parsed.file,
|
|
127
|
+
summary: buildPortfolioSnapshot(loops)
|
|
128
|
+
}, null, 2),
|
|
129
|
+
stderr: ""
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
catch (err) {
|
|
133
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
134
|
+
return {
|
|
135
|
+
exitCode: 1,
|
|
136
|
+
stdout: "",
|
|
137
|
+
stderr: `Error: ${message}`
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
case "resume": {
|
|
142
|
+
if (!parsed.loopId) {
|
|
143
|
+
return {
|
|
144
|
+
exitCode: 1,
|
|
145
|
+
stdout: "",
|
|
146
|
+
stderr: "Error: resume requires a loop ID. Usage: martin resume <loopId>"
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
try {
|
|
150
|
+
const runsDir = join(homedir(), ".martin", "runs");
|
|
151
|
+
// Search all JSONL files for the matching loopId
|
|
152
|
+
const { readdir } = await import("node:fs/promises");
|
|
153
|
+
const files = await readdir(runsDir).catch(() => []);
|
|
154
|
+
let found = null;
|
|
155
|
+
for (const file of files.filter((f) => f.endsWith(".jsonl"))) {
|
|
156
|
+
const contents = await readFile(join(runsDir, file), "utf8");
|
|
157
|
+
for (const line of contents.split("\n").filter(Boolean)) {
|
|
158
|
+
try {
|
|
159
|
+
const record = JSON.parse(line);
|
|
160
|
+
if (record.loopId === parsed.loopId) {
|
|
161
|
+
found = record;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
catch { /* skip malformed */ }
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
if (!found) {
|
|
168
|
+
return {
|
|
169
|
+
exitCode: 1,
|
|
170
|
+
stdout: "",
|
|
171
|
+
stderr: `Error: loop ${parsed.loopId} not found in ~/.martin/runs/`
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
return {
|
|
175
|
+
exitCode: 0,
|
|
176
|
+
stdout: JSON.stringify({ command: "resume", loop: found }, null, 2),
|
|
177
|
+
stderr: ""
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
catch (err) {
|
|
181
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
182
|
+
return { exitCode: 1, stdout: "", stderr: `Error: ${message}` };
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
export function parseCliArguments(args) {
|
|
188
|
+
const [command, ...rest] = args;
|
|
189
|
+
if (!command || command === "help" || command === "--help" || command === "-h") {
|
|
190
|
+
return {
|
|
191
|
+
command: "help"
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
if (command === "run") {
|
|
195
|
+
const verificationPlan = [];
|
|
196
|
+
const metadata = {};
|
|
197
|
+
const request = {
|
|
198
|
+
verificationPlan,
|
|
199
|
+
metadata,
|
|
200
|
+
budget: {
|
|
201
|
+
maxUsd: 10,
|
|
202
|
+
softLimitUsd: 7,
|
|
203
|
+
maxIterations: 3,
|
|
204
|
+
maxTokens: 20_000
|
|
205
|
+
}
|
|
206
|
+
};
|
|
207
|
+
// First positional arg (not a flag) is treated as the objective
|
|
208
|
+
const firstPositional = rest[0] && !rest[0].startsWith("--") ? rest[0] : undefined;
|
|
209
|
+
if (firstPositional) {
|
|
210
|
+
request.objective = firstPositional;
|
|
211
|
+
request.title ??= firstPositional;
|
|
212
|
+
}
|
|
213
|
+
for (let index = 0; index < rest.length; index += 1) {
|
|
214
|
+
const token = rest[index];
|
|
215
|
+
const next = rest[index + 1];
|
|
216
|
+
switch (token) {
|
|
217
|
+
case "--workspace":
|
|
218
|
+
request.workspaceId = next;
|
|
219
|
+
index += 1;
|
|
220
|
+
break;
|
|
221
|
+
case "--project":
|
|
222
|
+
request.projectId = next;
|
|
223
|
+
index += 1;
|
|
224
|
+
break;
|
|
225
|
+
case "--title":
|
|
226
|
+
request.title = next;
|
|
227
|
+
index += 1;
|
|
228
|
+
break;
|
|
229
|
+
case "--objective":
|
|
230
|
+
request.objective = next;
|
|
231
|
+
request.title ??= next;
|
|
232
|
+
index += 1;
|
|
233
|
+
break;
|
|
234
|
+
case "--verify":
|
|
235
|
+
if (next) {
|
|
236
|
+
verificationPlan.push(next);
|
|
237
|
+
}
|
|
238
|
+
index += 1;
|
|
239
|
+
break;
|
|
240
|
+
case "--metadata":
|
|
241
|
+
if (next) {
|
|
242
|
+
const [key, value] = next.split("=");
|
|
243
|
+
if (key && value) {
|
|
244
|
+
metadata[key] = value;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
index += 1;
|
|
248
|
+
break;
|
|
249
|
+
case "--budget":
|
|
250
|
+
case "--budget-usd":
|
|
251
|
+
request.budget = {
|
|
252
|
+
...request.budget,
|
|
253
|
+
maxUsd: Number(next)
|
|
254
|
+
};
|
|
255
|
+
index += 1;
|
|
256
|
+
break;
|
|
257
|
+
case "--soft-limit-usd":
|
|
258
|
+
request.budget = {
|
|
259
|
+
...request.budget,
|
|
260
|
+
softLimitUsd: Number(next)
|
|
261
|
+
};
|
|
262
|
+
index += 1;
|
|
263
|
+
break;
|
|
264
|
+
case "--max-iterations":
|
|
265
|
+
request.budget = {
|
|
266
|
+
...request.budget,
|
|
267
|
+
maxIterations: Number(next)
|
|
268
|
+
};
|
|
269
|
+
index += 1;
|
|
270
|
+
break;
|
|
271
|
+
case "--max-tokens":
|
|
272
|
+
request.budget = {
|
|
273
|
+
...request.budget,
|
|
274
|
+
maxTokens: Number(next)
|
|
275
|
+
};
|
|
276
|
+
index += 1;
|
|
277
|
+
break;
|
|
278
|
+
case "--policy":
|
|
279
|
+
if (next) {
|
|
280
|
+
metadata.policyProfile = next;
|
|
281
|
+
}
|
|
282
|
+
index += 1;
|
|
283
|
+
break;
|
|
284
|
+
case "--telemetry":
|
|
285
|
+
if (next) {
|
|
286
|
+
metadata.telemetryDestination = next;
|
|
287
|
+
}
|
|
288
|
+
index += 1;
|
|
289
|
+
break;
|
|
290
|
+
case "--config":
|
|
291
|
+
request.configPath = next;
|
|
292
|
+
index += 1;
|
|
293
|
+
break;
|
|
294
|
+
case "--cwd":
|
|
295
|
+
request.cwd = next;
|
|
296
|
+
index += 1;
|
|
297
|
+
break;
|
|
298
|
+
case "--allow-path":
|
|
299
|
+
if (next) {
|
|
300
|
+
request.allowedPaths = [...(request.allowedPaths ?? []), next];
|
|
301
|
+
}
|
|
302
|
+
index += 1;
|
|
303
|
+
break;
|
|
304
|
+
case "--deny-path":
|
|
305
|
+
if (next) {
|
|
306
|
+
request.deniedPaths = [...(request.deniedPaths ?? []), next];
|
|
307
|
+
}
|
|
308
|
+
index += 1;
|
|
309
|
+
break;
|
|
310
|
+
case "--accept":
|
|
311
|
+
if (next) {
|
|
312
|
+
request.acceptanceCriteria = [...(request.acceptanceCriteria ?? []), next];
|
|
313
|
+
}
|
|
314
|
+
index += 1;
|
|
315
|
+
break;
|
|
316
|
+
case "--model":
|
|
317
|
+
request.model = next;
|
|
318
|
+
index += 1;
|
|
319
|
+
break;
|
|
320
|
+
case "--engine":
|
|
321
|
+
request.engine = next;
|
|
322
|
+
index += 1;
|
|
323
|
+
break;
|
|
324
|
+
default:
|
|
325
|
+
break;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
return {
|
|
329
|
+
command: "run",
|
|
330
|
+
request: {
|
|
331
|
+
workspaceId: request.workspaceId ?? "ws_default",
|
|
332
|
+
projectId: request.projectId ?? "proj_default",
|
|
333
|
+
title: request.title ?? request.objective ?? "Martin Loop Task",
|
|
334
|
+
objective: request.objective ?? request.title ?? "Martin Loop Task",
|
|
335
|
+
verificationPlan,
|
|
336
|
+
metadata,
|
|
337
|
+
budget: request.budget,
|
|
338
|
+
...(request.configPath ? { configPath: request.configPath } : {}),
|
|
339
|
+
...(request.cwd ? { cwd: request.cwd } : {}),
|
|
340
|
+
...(request.model ? { model: request.model } : {}),
|
|
341
|
+
...(request.engine ? { engine: request.engine } : {}),
|
|
342
|
+
...(request.allowedPaths?.length ? { allowedPaths: request.allowedPaths } : {}),
|
|
343
|
+
...(request.deniedPaths?.length ? { deniedPaths: request.deniedPaths } : {}),
|
|
344
|
+
...(request.acceptanceCriteria?.length ? { acceptanceCriteria: request.acceptanceCriteria } : {})
|
|
345
|
+
}
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
if (command === "bench") {
|
|
349
|
+
return {
|
|
350
|
+
command: "bench",
|
|
351
|
+
suiteId: readOption(rest, "--suite") ?? "ralphy-smoke"
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
if (command === "inspect") {
|
|
355
|
+
return {
|
|
356
|
+
command: "inspect",
|
|
357
|
+
file: readOption(rest, "--file") ?? ""
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
if (command === "resume") {
|
|
361
|
+
const loopId = rest[0] ?? readOption(rest, "--loop-id") ?? "";
|
|
362
|
+
return { command: "resume", loopId };
|
|
363
|
+
}
|
|
364
|
+
return {
|
|
365
|
+
command: "help"
|
|
366
|
+
};
|
|
367
|
+
}
|
|
368
|
+
export function renderCliHelp() {
|
|
369
|
+
return [
|
|
370
|
+
"Martin Loop CLI",
|
|
371
|
+
"",
|
|
372
|
+
"Usage:",
|
|
373
|
+
" martin-loop run <objective> [options]",
|
|
374
|
+
" martin run <objective> [options] (alias)",
|
|
375
|
+
" martin-loop run --objective <text> [options]",
|
|
376
|
+
" martin-loop inspect --file <path>",
|
|
377
|
+
" martin-loop resume <loopId>",
|
|
378
|
+
" martin-loop bench --suite <suiteId>",
|
|
379
|
+
"",
|
|
380
|
+
"Commands:",
|
|
381
|
+
" run Execute a bounded Martin loop against the current repository.",
|
|
382
|
+
" inspect Read a persisted loop record and summarize its portfolio metrics.",
|
|
383
|
+
" resume Load a persisted loop record by loop ID from ~/.martin/runs/.",
|
|
384
|
+
" bench Redirect to the workspace-only RC benchmark harness.",
|
|
385
|
+
"",
|
|
386
|
+
"Common options:",
|
|
387
|
+
" --help Show this message.",
|
|
388
|
+
" --engine <name> Adapter to use: claude (default) or codex.",
|
|
389
|
+
" --model <name> Override the model (e.g. claude-sonnet-4-6).",
|
|
390
|
+
" --cwd <path> Set the repo root used for repo-backed runs.",
|
|
391
|
+
" --budget <n> Set the hard cost cap in USD (subprocess killed at limit).",
|
|
392
|
+
" --budget-usd <n> Alias for --budget.",
|
|
393
|
+
" --verify <cmd> Shell command to run as the verifier after each attempt.",
|
|
394
|
+
" --max-iterations <n> Set the maximum number of attempts.",
|
|
395
|
+
" --allow-path <glob> Restrict agent writes to this path pattern (repeatable).",
|
|
396
|
+
" --deny-path <glob> Block agent from this path pattern (repeatable).",
|
|
397
|
+
" --accept <criterion> Add an acceptance criterion to the prompt (repeatable).",
|
|
398
|
+
" --config <path> Path to martin.config.yaml."
|
|
399
|
+
].join("\n");
|
|
400
|
+
}
|
|
401
|
+
function readOption(tokens, flag) {
|
|
402
|
+
const index = tokens.indexOf(flag);
|
|
403
|
+
return index >= 0 ? tokens[index + 1] : undefined;
|
|
404
|
+
}
|
|
405
|
+
function hasFlag(tokens, flag) {
|
|
406
|
+
return tokens.includes(flag);
|
|
407
|
+
}
|
|
408
|
+
function parseLoopRecords(contents) {
|
|
409
|
+
try {
|
|
410
|
+
const parsed = JSON.parse(contents);
|
|
411
|
+
return Array.isArray(parsed) ? parsed : [parsed];
|
|
412
|
+
}
|
|
413
|
+
catch (jsonError) {
|
|
414
|
+
const lines = contents.split(/\r?\n/u).filter((line) => line.trim().length > 0);
|
|
415
|
+
if (lines.length === 0) {
|
|
416
|
+
throw jsonError;
|
|
417
|
+
}
|
|
418
|
+
return lines.map((line) => JSON.parse(line));
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
async function resolveGuardrails(request, rawArgs) {
|
|
422
|
+
const tokens = rawArgs.slice(1);
|
|
423
|
+
const { config, configPath } = await loadGuardrailsConfig(request.configPath);
|
|
424
|
+
const budget = {
|
|
425
|
+
maxUsd: config?.budget?.maxUsd ?? request.budget.maxUsd,
|
|
426
|
+
softLimitUsd: config?.budget?.softLimitUsd ?? request.budget.softLimitUsd,
|
|
427
|
+
maxIterations: config?.budget?.maxIterations ?? request.budget.maxIterations,
|
|
428
|
+
maxTokens: config?.budget?.maxTokens ?? request.budget.maxTokens
|
|
429
|
+
};
|
|
430
|
+
if (hasFlag(tokens, "--budget-usd")) {
|
|
431
|
+
budget.maxUsd = request.budget.maxUsd;
|
|
432
|
+
}
|
|
433
|
+
if (hasFlag(tokens, "--soft-limit-usd")) {
|
|
434
|
+
budget.softLimitUsd = request.budget.softLimitUsd;
|
|
435
|
+
}
|
|
436
|
+
if (hasFlag(tokens, "--max-iterations")) {
|
|
437
|
+
budget.maxIterations = request.budget.maxIterations;
|
|
438
|
+
}
|
|
439
|
+
if (hasFlag(tokens, "--max-tokens")) {
|
|
440
|
+
budget.maxTokens = request.budget.maxTokens;
|
|
441
|
+
}
|
|
442
|
+
// Ensure softLimitUsd never exceeds maxUsd (CLI default issue when --budget-usd < 5)
|
|
443
|
+
if (budget.softLimitUsd >= budget.maxUsd) {
|
|
444
|
+
budget.softLimitUsd = Math.round(budget.maxUsd * 0.75 * 100) / 100;
|
|
445
|
+
}
|
|
446
|
+
let policyProfile = config?.policyProfile ?? "balanced";
|
|
447
|
+
if (hasFlag(tokens, "--policy")) {
|
|
448
|
+
policyProfile = request.metadata.policyProfile ?? policyProfile;
|
|
449
|
+
}
|
|
450
|
+
let telemetryDestination = config?.governance?.telemetryDestination ?? "local-only";
|
|
451
|
+
if (hasFlag(tokens, "--telemetry")) {
|
|
452
|
+
telemetryDestination = request.metadata.telemetryDestination ?? telemetryDestination;
|
|
453
|
+
}
|
|
454
|
+
const destructiveActionPolicy = config?.governance?.destructiveActionPolicy ?? "approval";
|
|
455
|
+
const verifierRules = request.verificationPlan.length > 0
|
|
456
|
+
? request.verificationPlan
|
|
457
|
+
: config?.governance?.verifierRules !== undefined
|
|
458
|
+
? config.governance.verifierRules
|
|
459
|
+
: ["pnpm test"];
|
|
460
|
+
return {
|
|
461
|
+
configPath,
|
|
462
|
+
policyProfile,
|
|
463
|
+
telemetryDestination,
|
|
464
|
+
destructiveActionPolicy,
|
|
465
|
+
verifierRules,
|
|
466
|
+
budget
|
|
467
|
+
};
|
|
468
|
+
}
|
|
469
|
+
async function loadGuardrailsConfig(configPath) {
|
|
470
|
+
const resolvedPath = configPath
|
|
471
|
+
? resolveConfigPath(configPath)
|
|
472
|
+
: join(getInvocationRoot(), "martin.config.yaml");
|
|
473
|
+
const configIsExplicit = typeof configPath === "string" && configPath.trim().length > 0;
|
|
474
|
+
try {
|
|
475
|
+
const contents = await readFile(resolvedPath, "utf8");
|
|
476
|
+
return {
|
|
477
|
+
config: parseGuardrailsYaml(contents),
|
|
478
|
+
configPath: resolvedPath
|
|
479
|
+
};
|
|
480
|
+
}
|
|
481
|
+
catch (error) {
|
|
482
|
+
if (!configIsExplicit && isNodeErrorWithCode(error, "ENOENT")) {
|
|
483
|
+
return {
|
|
484
|
+
config: undefined,
|
|
485
|
+
configPath: resolvedPath
|
|
486
|
+
};
|
|
487
|
+
}
|
|
488
|
+
if (configIsExplicit && isNodeErrorWithCode(error, "ENOENT")) {
|
|
489
|
+
throw new Error(`Config file not found: ${resolvedPath}`);
|
|
490
|
+
}
|
|
491
|
+
throw error;
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
function resolveConfigPath(configPath) {
|
|
495
|
+
const normalizedConfigPath = process.platform === "win32" ? configPath : configPath.replace(/\\/g, "/");
|
|
496
|
+
if (isAbsolute(normalizedConfigPath)) {
|
|
497
|
+
return normalizedConfigPath;
|
|
498
|
+
}
|
|
499
|
+
return resolve(getInvocationRoot(), normalizedConfigPath);
|
|
500
|
+
}
|
|
501
|
+
function getInvocationRoot() {
|
|
502
|
+
const initCwd = process.env.INIT_CWD;
|
|
503
|
+
return typeof initCwd === "string" && initCwd.trim().length > 0 ? initCwd : process.cwd();
|
|
504
|
+
}
|
|
505
|
+
function parseGuardrailsYaml(contents) {
|
|
506
|
+
const config = {};
|
|
507
|
+
let section;
|
|
508
|
+
let governanceList;
|
|
509
|
+
for (const rawLine of contents.split(/\r?\n/u)) {
|
|
510
|
+
const noComment = rawLine.replace(/\s+#.*$/u, "");
|
|
511
|
+
if (noComment.trim().length === 0) {
|
|
512
|
+
continue;
|
|
513
|
+
}
|
|
514
|
+
const indent = noComment.match(/^\s*/u)?.[0].length ?? 0;
|
|
515
|
+
const line = noComment.trim();
|
|
516
|
+
if (indent === 0) {
|
|
517
|
+
governanceList = undefined;
|
|
518
|
+
const topMatch = line.match(/^([A-Za-z][\w-]*):(?:\s*(.*))?$/u);
|
|
519
|
+
if (!topMatch) {
|
|
520
|
+
continue;
|
|
521
|
+
}
|
|
522
|
+
const [, key, rawValue = ""] = topMatch;
|
|
523
|
+
if (key === "budget") {
|
|
524
|
+
section = "budget";
|
|
525
|
+
config.budget ??= {};
|
|
526
|
+
continue;
|
|
527
|
+
}
|
|
528
|
+
if (key === "governance") {
|
|
529
|
+
section = "governance";
|
|
530
|
+
config.governance ??= {};
|
|
531
|
+
continue;
|
|
532
|
+
}
|
|
533
|
+
section = undefined;
|
|
534
|
+
if (key === "policyProfile" && rawValue.length > 0) {
|
|
535
|
+
config.policyProfile = parseYamlScalar(rawValue);
|
|
536
|
+
}
|
|
537
|
+
continue;
|
|
538
|
+
}
|
|
539
|
+
if (indent === 2 && section) {
|
|
540
|
+
const nestedMatch = line.match(/^([A-Za-z][\w-]*):(?:\s*(.*))?$/u);
|
|
541
|
+
if (!nestedMatch) {
|
|
542
|
+
continue;
|
|
543
|
+
}
|
|
544
|
+
const [, key, rawValue = ""] = nestedMatch;
|
|
545
|
+
if (section === "governance" && key === "verifierRules" && rawValue.length === 0) {
|
|
546
|
+
governanceList = "verifierRules";
|
|
547
|
+
config.governance ??= {};
|
|
548
|
+
config.governance.verifierRules = [];
|
|
549
|
+
continue;
|
|
550
|
+
}
|
|
551
|
+
governanceList = undefined;
|
|
552
|
+
const scalar = parseYamlScalar(rawValue);
|
|
553
|
+
if (section === "budget") {
|
|
554
|
+
config.budget ??= {};
|
|
555
|
+
if (key === "maxUsd") {
|
|
556
|
+
config.budget.maxUsd = toFiniteNumber(scalar);
|
|
557
|
+
}
|
|
558
|
+
else if (key === "softLimitUsd") {
|
|
559
|
+
config.budget.softLimitUsd = toFiniteNumber(scalar);
|
|
560
|
+
}
|
|
561
|
+
else if (key === "maxIterations") {
|
|
562
|
+
config.budget.maxIterations = toFiniteNumber(scalar);
|
|
563
|
+
}
|
|
564
|
+
else if (key === "maxTokens") {
|
|
565
|
+
config.budget.maxTokens = toFiniteNumber(scalar);
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
if (section === "governance") {
|
|
569
|
+
config.governance ??= {};
|
|
570
|
+
if (key === "destructiveActionPolicy") {
|
|
571
|
+
config.governance.destructiveActionPolicy = scalar;
|
|
572
|
+
}
|
|
573
|
+
else if (key === "telemetryDestination") {
|
|
574
|
+
config.governance.telemetryDestination = scalar;
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
continue;
|
|
578
|
+
}
|
|
579
|
+
if (indent === 4 && section === "governance" && governanceList === "verifierRules") {
|
|
580
|
+
const itemMatch = line.match(/^-\s*(.+)$/u);
|
|
581
|
+
const itemValue = itemMatch?.[1];
|
|
582
|
+
if (!itemValue) {
|
|
583
|
+
continue;
|
|
584
|
+
}
|
|
585
|
+
config.governance ??= {};
|
|
586
|
+
config.governance.verifierRules ??= [];
|
|
587
|
+
config.governance.verifierRules.push(parseYamlScalar(itemValue));
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
return config;
|
|
591
|
+
}
|
|
592
|
+
function parseYamlScalar(value) {
|
|
593
|
+
const trimmed = value.trim();
|
|
594
|
+
if ((trimmed.startsWith('"') && trimmed.endsWith('"')) ||
|
|
595
|
+
(trimmed.startsWith("'") && trimmed.endsWith("'"))) {
|
|
596
|
+
return trimmed.slice(1, -1);
|
|
597
|
+
}
|
|
598
|
+
return trimmed;
|
|
599
|
+
}
|
|
600
|
+
function toFiniteNumber(value) {
|
|
601
|
+
const parsed = Number(value);
|
|
602
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
603
|
+
}
|
|
604
|
+
function isNodeErrorWithCode(error, code) {
|
|
605
|
+
return (typeof error === "object" &&
|
|
606
|
+
error !== null &&
|
|
607
|
+
"code" in error &&
|
|
608
|
+
typeof error.code === "string" &&
|
|
609
|
+
error.code === code);
|
|
610
|
+
}
|
|
611
|
+
/**
|
|
612
|
+
* Selects the adapter based on CLI flags and environment variables.
|
|
613
|
+
*
|
|
614
|
+
* --engine claude (default) — real Claude CLI subprocess
|
|
615
|
+
* --engine codex — real Codex CLI subprocess
|
|
616
|
+
* MARTIN_LIVE=false — stub adapter (for tests / dry-runs)
|
|
617
|
+
*/
|
|
618
|
+
function selectAdapter(rawArgs, workingDirectory, modelOverride, engineOverride) {
|
|
619
|
+
if (process.env.MARTIN_LIVE === "false") {
|
|
620
|
+
return createStubDirectProviderAdapter({
|
|
621
|
+
label: "Stub adapter (MARTIN_LIVE=false)",
|
|
622
|
+
providerId: "stub",
|
|
623
|
+
model: "stub"
|
|
624
|
+
});
|
|
625
|
+
}
|
|
626
|
+
const engine = engineOverride ?? readOption(rawArgs, "--engine") ?? "claude";
|
|
627
|
+
if (engine === "codex") {
|
|
628
|
+
const model = modelOverride ?? readOption(rawArgs, "--model");
|
|
629
|
+
return createCodexCliAdapter({ workingDirectory, ...(model ? { model } : {}) });
|
|
630
|
+
}
|
|
631
|
+
const model = modelOverride ?? readOption(rawArgs, "--model");
|
|
632
|
+
return createClaudeCliAdapter({ workingDirectory, ...(model ? { model } : {}) });
|
|
633
|
+
}
|
|
634
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type { LoopRecord } from "../contracts/index.js";
|
|
2
|
+
export type PersistedLoopState = {
|
|
3
|
+
loopId: string;
|
|
4
|
+
workspaceId: string;
|
|
5
|
+
projectId: string;
|
|
6
|
+
status: LoopRecord["status"];
|
|
7
|
+
lifecycleState: LoopRecord["lifecycleState"];
|
|
8
|
+
createdAt: string;
|
|
9
|
+
updatedAt: string;
|
|
10
|
+
task: {
|
|
11
|
+
title: string;
|
|
12
|
+
objective: string;
|
|
13
|
+
repoRoot?: string;
|
|
14
|
+
};
|
|
15
|
+
budget: LoopRecord["budget"];
|
|
16
|
+
cost: LoopRecord["cost"];
|
|
17
|
+
metrics: {
|
|
18
|
+
attemptCount: number;
|
|
19
|
+
eventCount: number;
|
|
20
|
+
failureCount: number;
|
|
21
|
+
};
|
|
22
|
+
};
|
|
23
|
+
export declare function resolveRunsRoot(env?: NodeJS.ProcessEnv): string;
|
|
24
|
+
/**
|
|
25
|
+
* Write all loop artifacts to disk at the end of a run.
|
|
26
|
+
* Uses the Phase 3 flat path: ~/.martin/runs/<loopId>/
|
|
27
|
+
* - contract.json (task + budget, immutable)
|
|
28
|
+
* - state.json (status, cost, metrics summary)
|
|
29
|
+
* - ledger.jsonl (all events, one JSON per line)
|
|
30
|
+
* - attempts/ (per-attempt JSON files)
|
|
31
|
+
*/
|
|
32
|
+
export declare function persistLoopArtifacts(loop: LoopRecord, options?: {
|
|
33
|
+
runsRoot?: string;
|
|
34
|
+
}): Promise<void>;
|