@martinloop/mcp 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -132
- package/dist/discovery-metadata.d.ts +10 -5
- package/dist/discovery-metadata.js +95 -5
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/prompts.d.ts +1 -1
- package/dist/prompts.js +93 -1
- package/dist/resources.d.ts +9 -1
- package/dist/resources.js +247 -16
- package/dist/server-validation.d.ts +2 -1
- package/dist/server-validation.js +124 -0
- package/dist/server.js +379 -5
- package/dist/tools/doctor.d.ts +14 -1
- package/dist/tools/doctor.js +43 -8
- package/dist/tools/eval.d.ts +24 -0
- package/dist/tools/eval.js +66 -0
- package/dist/tools/get-run.d.ts +2 -0
- package/dist/tools/get-run.js +2 -1
- package/dist/tools/get-status.d.ts +8 -0
- package/dist/tools/get-status.js +18 -0
- package/dist/tools/get-verification-results.d.ts +2 -0
- package/dist/tools/get-verification-results.js +2 -1
- package/dist/tools/logs.d.ts +25 -0
- package/dist/tools/logs.js +49 -0
- package/dist/tools/plan.d.ts +20 -0
- package/dist/tools/plan.js +10 -0
- package/dist/tools/pr-tools.d.ts +31 -0
- package/dist/tools/pr-tools.js +112 -0
- package/dist/tools/preflight.d.ts +24 -1
- package/dist/tools/preflight.js +47 -7
- package/dist/tools/run-controls.d.ts +36 -0
- package/dist/tools/run-controls.js +88 -0
- package/dist/tools/run-dossier.d.ts +16 -0
- package/dist/tools/run-dossier.js +64 -2
- package/dist/tools/run-loop.d.ts +3 -2
- package/dist/tools/run-loop.js +52 -13
- package/dist/tools/tool-errors.d.ts +1 -1
- package/dist/tools/tool-errors.js +1 -1
- package/dist/tools/tool-support.d.ts +6 -3
- package/dist/tools/tool-support.js +37 -3
- package/dist/tools/workflow-governance.d.ts +133 -0
- package/dist/tools/workflow-governance.js +581 -0
- package/dist/vendor/adapters/claude-cli.d.ts +25 -0
- package/dist/vendor/adapters/claude-cli.js +279 -19
- package/dist/vendor/adapters/cli-bridge.d.ts +6 -0
- package/dist/vendor/adapters/cli-bridge.js +58 -9
- package/dist/vendor/adapters/codex-launcher.d.ts +44 -0
- package/dist/vendor/adapters/codex-launcher.js +247 -0
- package/dist/vendor/adapters/index.d.ts +4 -2
- package/dist/vendor/adapters/index.js +4 -1
- package/dist/vendor/adapters/openai-compatible.d.ts +62 -0
- package/dist/vendor/adapters/openai-compatible.js +267 -0
- package/dist/vendor/adapters/runtime-support.d.ts +3 -0
- package/dist/vendor/adapters/runtime-support.js +8 -1
- package/dist/vendor/adapters/verifier-only.js +4 -3
- package/dist/vendor/contracts/index.d.ts +39 -0
- package/dist/vendor/contracts/index.js +2 -0
- package/dist/vendor/core/index.d.ts +23 -3
- package/dist/vendor/core/index.js +88 -15
- package/dist/vendor/core/persistence/index.d.ts +2 -0
- package/dist/vendor/core/persistence/index.js +1 -0
- package/dist/vendor/core/persistence/integrity.d.ts +38 -0
- package/dist/vendor/core/persistence/integrity.js +239 -0
- package/dist/vendor/core/persistence/store.d.ts +7 -0
- package/dist/vendor/core/persistence/store.js +25 -1
- package/dist/vendor/core/policy.d.ts +9 -0
- package/dist/workflow-state.d.ts +25 -0
- package/dist/workflow-state.js +102 -0
- package/package.json +3 -3
- package/server.json +2 -2
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { appendFile, readFile } from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { invalidArgumentsError, unsupportedOperationError } from "./tool-errors.js";
|
|
4
|
+
import { loadDetailedLoopRecord } from "./run-store.js";
|
|
5
|
+
export async function createRunControlReceipt(action, input) {
|
|
6
|
+
const detail = await loadDetailedLoopRecord(input);
|
|
7
|
+
if (!detail.canonicalRunDirectory) {
|
|
8
|
+
throw unsupportedOperationError("Run control receipts require a canonical run directory.", "Use a canonical loopId-backed Martin run before writing control receipts.");
|
|
9
|
+
}
|
|
10
|
+
const receipt = {
|
|
11
|
+
controlId: `ctl_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`,
|
|
12
|
+
loopId: detail.loop.loopId,
|
|
13
|
+
action,
|
|
14
|
+
requestedAt: new Date().toISOString(),
|
|
15
|
+
...(input.reason ? { reason: input.reason } : {}),
|
|
16
|
+
...(input.requestedBy ? { requestedBy: input.requestedBy } : {})
|
|
17
|
+
};
|
|
18
|
+
const receiptPath = path.join(detail.canonicalRunDirectory, "controls.jsonl");
|
|
19
|
+
await appendFile(receiptPath, `${JSON.stringify(receipt)}\n`, "utf8");
|
|
20
|
+
const state = await readRunControlState(detail);
|
|
21
|
+
return {
|
|
22
|
+
ok: true,
|
|
23
|
+
summary: action === "cancel"
|
|
24
|
+
? `Cancellation request recorded for ${detail.loop.loopId}.`
|
|
25
|
+
: action === "pause"
|
|
26
|
+
? `Pause request recorded for ${detail.loop.loopId}.`
|
|
27
|
+
: `Continue request recorded for ${detail.loop.loopId}.`,
|
|
28
|
+
loopId: detail.loop.loopId,
|
|
29
|
+
requestedAction: action,
|
|
30
|
+
state
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
export async function readRunControlState(detailOrInput) {
|
|
34
|
+
const detail = isDetailedLoopSource(detailOrInput)
|
|
35
|
+
? detailOrInput
|
|
36
|
+
: await loadDetailedLoopRecord(detailOrInput);
|
|
37
|
+
const receiptPath = detail.canonicalRunDirectory
|
|
38
|
+
? path.join(detail.canonicalRunDirectory, "controls.jsonl")
|
|
39
|
+
: undefined;
|
|
40
|
+
const receipts = receiptPath ? await readControlReceipts(receiptPath) : [];
|
|
41
|
+
const latestReceipt = receipts.at(-1);
|
|
42
|
+
return {
|
|
43
|
+
requestedState: latestReceipt?.action === "pause"
|
|
44
|
+
? "paused"
|
|
45
|
+
: latestReceipt?.action === "cancel"
|
|
46
|
+
? "cancellation_requested"
|
|
47
|
+
: "active",
|
|
48
|
+
...(latestReceipt ? { latestReceipt } : {}),
|
|
49
|
+
approvalState: latestReceipt?.action === "pause" ? "resume_requested" : "not_required",
|
|
50
|
+
receipts,
|
|
51
|
+
...(receiptPath ? { receiptPath } : {})
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
export async function readControlReceipts(receiptPath) {
|
|
55
|
+
try {
|
|
56
|
+
const contents = await readFile(receiptPath, "utf8");
|
|
57
|
+
return contents
|
|
58
|
+
.split(/\r?\n/u)
|
|
59
|
+
.map((line) => line.trim())
|
|
60
|
+
.filter(Boolean)
|
|
61
|
+
.map((line) => JSON.parse(line))
|
|
62
|
+
.filter(isRunControlReceipt);
|
|
63
|
+
}
|
|
64
|
+
catch {
|
|
65
|
+
return [];
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
function isDetailedLoopSource(value) {
|
|
69
|
+
return typeof value === "object" && value !== null && "loop" in value && "runsRoot" in value;
|
|
70
|
+
}
|
|
71
|
+
function isRunControlReceipt(value) {
|
|
72
|
+
return (typeof value === "object" &&
|
|
73
|
+
value !== null &&
|
|
74
|
+
typeof value.controlId === "string" &&
|
|
75
|
+
typeof value.loopId === "string" &&
|
|
76
|
+
typeof value.action === "string" &&
|
|
77
|
+
typeof value.requestedAt === "string");
|
|
78
|
+
}
|
|
79
|
+
export function validateControlReason(value) {
|
|
80
|
+
if (value === undefined) {
|
|
81
|
+
return undefined;
|
|
82
|
+
}
|
|
83
|
+
const trimmed = value.trim();
|
|
84
|
+
if (trimmed.length === 0) {
|
|
85
|
+
throw invalidArgumentsError("Invalid reason.");
|
|
86
|
+
}
|
|
87
|
+
return trimmed;
|
|
88
|
+
}
|
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
import { buildArtifactSummary, buildBudgetSnapshot, buildCostSnapshot, buildEventSummaries, buildLoopPreview, buildVerificationSummary } from "./tool-support.js";
|
|
2
|
+
import { readRunControlState } from "./run-controls.js";
|
|
3
|
+
import { martinEvalTool } from "./eval.js";
|
|
4
|
+
import { assessRunRisk } from "./workflow-governance.js";
|
|
5
|
+
import type { ReceiptIntegritySummary } from "../vendor/contracts/index.js";
|
|
2
6
|
export interface MartinRunDossierInput {
|
|
3
7
|
file?: string;
|
|
4
8
|
loopId?: string;
|
|
5
9
|
runsDir?: string;
|
|
6
10
|
latest?: boolean;
|
|
11
|
+
format?: "json" | "md" | "github-pr";
|
|
7
12
|
}
|
|
8
13
|
export interface MartinRunDossierOutput {
|
|
9
14
|
source: string;
|
|
@@ -11,6 +16,7 @@ export interface MartinRunDossierOutput {
|
|
|
11
16
|
loop: ReturnType<typeof buildLoopPreview>;
|
|
12
17
|
budget: ReturnType<typeof buildBudgetSnapshot>;
|
|
13
18
|
cost: ReturnType<typeof buildCostSnapshot>;
|
|
19
|
+
receiptIntegrity: ReceiptIntegritySummary;
|
|
14
20
|
attempts: Array<{
|
|
15
21
|
index: number;
|
|
16
22
|
attemptId?: string;
|
|
@@ -30,6 +36,16 @@ export interface MartinRunDossierOutput {
|
|
|
30
36
|
resources: string[];
|
|
31
37
|
prompts: string[];
|
|
32
38
|
};
|
|
39
|
+
review: {
|
|
40
|
+
diffSummary: string;
|
|
41
|
+
risk: ReturnType<typeof assessRunRisk>;
|
|
42
|
+
outcome: "passed" | "failed" | "needs_review";
|
|
43
|
+
nextAction: string;
|
|
44
|
+
};
|
|
45
|
+
evaluation: Awaited<ReturnType<typeof martinEvalTool>>;
|
|
46
|
+
control: Awaited<ReturnType<typeof readRunControlState>>;
|
|
47
|
+
format: "json" | "md" | "github-pr";
|
|
48
|
+
rendered?: string;
|
|
33
49
|
inspection: {
|
|
34
50
|
runsRoot: string;
|
|
35
51
|
canonicalRunDirectory?: string;
|
|
@@ -1,9 +1,23 @@
|
|
|
1
|
-
import { buildArtifactSummary, buildBudgetSnapshot, buildCostSnapshot, buildEventSummaries, buildLoopPreview, buildSuggestedPromptNames, buildSuggestedResourceUris, buildVerificationSummary } from "./tool-support.js";
|
|
1
|
+
import { buildArtifactSummary, buildBudgetSnapshot, buildCostSnapshot, buildEventSummaries, buildLoopPreview, resolveReceiptIntegrity, buildSuggestedPromptNames, buildSuggestedResourceUris, buildVerificationSummary } from "./tool-support.js";
|
|
2
|
+
import { resolveTrustedLoopRepoRoot } from "../server-validation.js";
|
|
2
3
|
import { loadDetailedLoopRecord, readAttemptArtifactFiles, readLedgerEvents } from "./run-store.js";
|
|
4
|
+
import { readRunControlState } from "./run-controls.js";
|
|
5
|
+
import { martinEvalTool } from "./eval.js";
|
|
6
|
+
import { assessRunRisk, inspectRepoSignals } from "./workflow-governance.js";
|
|
3
7
|
export async function martinRunDossierTool(input) {
|
|
4
8
|
const detail = await loadDetailedLoopRecord(input);
|
|
5
9
|
const ledgerEvents = await readLedgerEvents(detail);
|
|
6
10
|
const verification = buildVerificationSummary(detail.loop, ledgerEvents);
|
|
11
|
+
const control = await readRunControlState(detail);
|
|
12
|
+
const evaluation = await martinEvalTool(input);
|
|
13
|
+
const repoRoot = resolveTrustedLoopRepoRoot(detail.loop.task?.repoRoot);
|
|
14
|
+
const risk = assessRunRisk({
|
|
15
|
+
objective: detail.loop.task?.objective ?? detail.loop.loopId,
|
|
16
|
+
allowedPaths: detail.loop.task?.allowedPaths ?? [],
|
|
17
|
+
blockedPaths: detail.loop.task?.deniedPaths ?? [],
|
|
18
|
+
verifiers: detail.loop.task?.verificationPlan ?? [],
|
|
19
|
+
signals: inspectRepoSignals(repoRoot)
|
|
20
|
+
});
|
|
7
21
|
const attempts = await Promise.all(detail.loop.attempts.map(async (attempt) => ({
|
|
8
22
|
index: attempt.index,
|
|
9
23
|
...(attempt.attemptId ? { attemptId: attempt.attemptId } : {}),
|
|
@@ -16,12 +30,29 @@ export async function martinRunDossierTool(input) {
|
|
|
16
30
|
...(attempt.summary ? { summary: attempt.summary } : {}),
|
|
17
31
|
artifactFiles: await readAttemptArtifactFiles(detail, attempt.index)
|
|
18
32
|
})));
|
|
19
|
-
|
|
33
|
+
const review = {
|
|
34
|
+
diffSummary: attempts.length > 0
|
|
35
|
+
? `Run touched ${attempts.length} attempt(s); latest summary: ${attempts.at(-1)?.summary ?? "No attempt summary recorded."}`
|
|
36
|
+
: "No attempts were recorded for this run.",
|
|
37
|
+
risk,
|
|
38
|
+
outcome: verification.status === "passed"
|
|
39
|
+
? "passed"
|
|
40
|
+
: verification.status === "failed"
|
|
41
|
+
? "failed"
|
|
42
|
+
: "needs_review",
|
|
43
|
+
nextAction: verification.status === "passed"
|
|
44
|
+
? "Review the dossier and evaluation, then decide whether to merge or promote."
|
|
45
|
+
: verification.status === "failed"
|
|
46
|
+
? "Investigate the latest verifier failure before retrying or promoting."
|
|
47
|
+
: "Collect more evidence before claiming completion."
|
|
48
|
+
};
|
|
49
|
+
const output = {
|
|
20
50
|
source: detail.source,
|
|
21
51
|
sourceKind: detail.sourceKind,
|
|
22
52
|
loop: buildLoopPreview(detail.loop),
|
|
23
53
|
budget: buildBudgetSnapshot(detail.loop.budget),
|
|
24
54
|
cost: buildCostSnapshot(detail.loop.cost),
|
|
55
|
+
receiptIntegrity: resolveReceiptIntegrity(detail.loop),
|
|
25
56
|
attempts,
|
|
26
57
|
verification,
|
|
27
58
|
artifacts: buildArtifactSummary(detail.loop),
|
|
@@ -30,6 +61,10 @@ export async function martinRunDossierTool(input) {
|
|
|
30
61
|
resources: buildSuggestedResourceUris(detail.loop.loopId),
|
|
31
62
|
prompts: buildSuggestedPromptNames()
|
|
32
63
|
},
|
|
64
|
+
review,
|
|
65
|
+
evaluation,
|
|
66
|
+
control,
|
|
67
|
+
format: input.format ?? "json",
|
|
33
68
|
inspection: {
|
|
34
69
|
runsRoot: detail.runsRoot,
|
|
35
70
|
...(detail.canonicalRunDirectory ? { canonicalRunDirectory: detail.canonicalRunDirectory } : {}),
|
|
@@ -38,4 +73,31 @@ export async function martinRunDossierTool(input) {
|
|
|
38
73
|
},
|
|
39
74
|
warnings: [...detail.warnings, ...verification.warnings]
|
|
40
75
|
};
|
|
76
|
+
if (output.format !== "json") {
|
|
77
|
+
output.rendered = renderDossier(output);
|
|
78
|
+
}
|
|
79
|
+
return output;
|
|
80
|
+
}
|
|
81
|
+
function renderDossier(output) {
|
|
82
|
+
const lines = [
|
|
83
|
+
output.format === "github-pr" ? "## MartinLoop Run Dossier" : "# MartinLoop Run Dossier",
|
|
84
|
+
"",
|
|
85
|
+
`Objective: ${output.loop.objective}`,
|
|
86
|
+
`Run: ${output.loop.loopId}`,
|
|
87
|
+
`Status: ${output.loop.status} / ${output.loop.lifecycleState}`,
|
|
88
|
+
`Attempts: ${output.attempts.length}`,
|
|
89
|
+
`Verifiers: ${output.verification.status}`,
|
|
90
|
+
`Risk: ${output.review.risk.level} (${output.review.risk.score})`,
|
|
91
|
+
`Allowed paths: ${output.review.risk.reasons.length > 0 ? output.review.risk.reasons.join("; ") : "No major risk reasons recorded."}`,
|
|
92
|
+
"",
|
|
93
|
+
"Review Summary:",
|
|
94
|
+
output.review.diffSummary,
|
|
95
|
+
"",
|
|
96
|
+
"Next Action:",
|
|
97
|
+
output.review.nextAction
|
|
98
|
+
];
|
|
99
|
+
if (output.format === "github-pr") {
|
|
100
|
+
lines.push("", `Evaluation: ${output.evaluation.grade} (${output.evaluation.score})`);
|
|
101
|
+
}
|
|
102
|
+
return lines.join("\n");
|
|
41
103
|
}
|
package/dist/tools/run-loop.d.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { type LoopBudget } from "../vendor/contracts/index.js";
|
|
1
|
+
import { type LoopBudget, type ReceiptScope } from "../vendor/contracts/index.js";
|
|
2
2
|
import { buildArtifactSummary, buildVerificationSummary, buildLoopPreview, type MartinEngine } from "./tool-support.js";
|
|
3
3
|
export interface RunLoopInput {
|
|
4
4
|
objective: string;
|
|
5
5
|
workingDirectory?: string;
|
|
6
|
-
engine?: "claude" | "codex";
|
|
6
|
+
engine?: "claude" | "codex" | "gemini";
|
|
7
7
|
model?: string;
|
|
8
8
|
maxUsd?: number;
|
|
9
9
|
maxIterations?: number;
|
|
@@ -35,6 +35,7 @@ export interface RunLoopOutput {
|
|
|
35
35
|
runDirectory: string;
|
|
36
36
|
loopRecordPath: string;
|
|
37
37
|
ledgerPath: string;
|
|
38
|
+
receiptScope: ReceiptScope;
|
|
38
39
|
loop: ReturnType<typeof buildLoopPreview>;
|
|
39
40
|
verification: ReturnType<typeof buildVerificationSummary>;
|
|
40
41
|
artifacts: ReturnType<typeof buildArtifactSummary>;
|
package/dist/tools/run-loop.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { createClaudeCliAdapter, createCodexCliAdapter,
|
|
1
|
+
import { createClaudeCliAdapter, createCodexCliAdapter, createGeminiCliAdapter, probeCodexLaunch, resolveCliCommandAvailability, createVerifierOnlyAdapter } from "../vendor/adapters/index.js";
|
|
2
2
|
import { createFileRunStore, evaluateCostGovernor, resolveRunsRoot, runMartin } from "../vendor/core/index.js";
|
|
3
3
|
import { DEFAULT_BUDGET } from "../vendor/contracts/index.js";
|
|
4
4
|
import { normalizeSafePathPatterns, resolveSafeRepoRoot } from "../server-validation.js";
|
|
@@ -11,19 +11,57 @@ export async function runLoopTool(input) {
|
|
|
11
11
|
const allowedPaths = normalizeSafePathPatterns(input.allowedPaths, "allowedPaths");
|
|
12
12
|
const deniedPaths = normalizeSafePathPatterns(input.deniedPaths, "deniedPaths");
|
|
13
13
|
const executionMode = resolveExecutionMode();
|
|
14
|
-
const
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
14
|
+
const workspaceRoot = resolveSafeRepoRoot();
|
|
15
|
+
const runsRoot = resolveRunsRoot(process.env);
|
|
16
|
+
const receiptScope = {
|
|
17
|
+
invocationRoot: workspaceRoot,
|
|
18
|
+
workingDirectory,
|
|
19
|
+
repoRoot: workingDirectory,
|
|
20
|
+
runsRoot
|
|
21
|
+
};
|
|
22
|
+
if (executionMode.liveMode) {
|
|
23
|
+
if (engine === "codex") {
|
|
24
|
+
const engineAvailability = resolveCliCommandAvailability("codex");
|
|
25
|
+
if (!engineAvailability.available) {
|
|
26
|
+
throw new MartinToolError("engine_unavailable", `Engine '${engine}' is not available on PATH.`, {
|
|
27
|
+
category: "environment",
|
|
28
|
+
suggestion: "Install the requested CLI or set MARTIN_LIVE=false for a no-spend proof run.",
|
|
29
|
+
retryable: false
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
const codexProbe = probeCodexLaunch({
|
|
33
|
+
workingDirectory,
|
|
34
|
+
availability: engineAvailability
|
|
35
|
+
});
|
|
36
|
+
if (!codexProbe.ok) {
|
|
37
|
+
throw new MartinToolError("engine_unavailable", codexProbe.summary, {
|
|
38
|
+
category: "environment",
|
|
39
|
+
suggestion: "Run martin_doctor or martin_preflight with engine='codex' before retrying live governed work.",
|
|
40
|
+
retryable: false
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
else {
|
|
45
|
+
const engineAvailability = getEngineAvailability(engine);
|
|
46
|
+
if (!engineAvailability.available) {
|
|
47
|
+
throw new MartinToolError("engine_unavailable", `Engine '${engine}' is not available on PATH.`, {
|
|
48
|
+
category: "environment",
|
|
49
|
+
suggestion: "Install the requested CLI or set MARTIN_LIVE=false for a no-spend proof run.",
|
|
50
|
+
retryable: false
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
}
|
|
21
54
|
}
|
|
22
|
-
const adapter =
|
|
23
|
-
?
|
|
55
|
+
const adapter = !executionMode.liveMode
|
|
56
|
+
? createVerifierOnlyAdapter({
|
|
57
|
+
workingDirectory,
|
|
58
|
+
label: "Proof mode adapter (MARTIN_LIVE=false)"
|
|
59
|
+
})
|
|
24
60
|
: engine === "codex"
|
|
25
61
|
? createCodexCliAdapter({ workingDirectory, ...(model ? { model } : {}) })
|
|
26
|
-
:
|
|
62
|
+
: engine === "gemini"
|
|
63
|
+
? createGeminiCliAdapter({ workingDirectory, ...(model ? { model } : {}) })
|
|
64
|
+
: createClaudeCliAdapter({ workingDirectory, ...(model ? { model } : {}) });
|
|
27
65
|
const partialBudget = {};
|
|
28
66
|
if (input.maxUsd !== undefined) {
|
|
29
67
|
partialBudget.maxUsd = input.maxUsd;
|
|
@@ -41,7 +79,8 @@ export async function runLoopTool(input) {
|
|
|
41
79
|
const result = await runMartin({
|
|
42
80
|
workspaceId: input.workspaceId ?? "ws_mcp",
|
|
43
81
|
projectId: input.projectId ?? "proj_mcp",
|
|
44
|
-
store: createFileRunStore({ runsRoot
|
|
82
|
+
store: createFileRunStore({ runsRoot }),
|
|
83
|
+
receiptScope,
|
|
45
84
|
task: {
|
|
46
85
|
title: input.objective.slice(0, 100),
|
|
47
86
|
objective: input.objective,
|
|
@@ -65,7 +104,6 @@ export async function runLoopTool(input) {
|
|
|
65
104
|
},
|
|
66
105
|
attemptsUsed: result.loop.attempts.length
|
|
67
106
|
});
|
|
68
|
-
const runsRoot = resolveRunsRoot(process.env);
|
|
69
107
|
const recordPaths = buildRunRecordPaths(runsRoot, result.loop.loopId);
|
|
70
108
|
const verification = buildVerificationSummary(result.loop);
|
|
71
109
|
const artifacts = buildArtifactSummary(result.loop);
|
|
@@ -87,6 +125,7 @@ export async function runLoopTool(input) {
|
|
|
87
125
|
budget,
|
|
88
126
|
inspection: {
|
|
89
127
|
...recordPaths,
|
|
128
|
+
receiptScope: result.loop.receiptScope ?? receiptScope,
|
|
90
129
|
loop: buildLoopPreview(result.loop),
|
|
91
130
|
verification,
|
|
92
131
|
artifacts
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { MartinErrorCategory } from "../vendor/contracts/index.js";
|
|
2
|
-
export type ToolFailureCode = "attempt_not_found" | "engine_unavailable" | "invalid_arguments" | "invalid_json" | "invalid_path" | "invalid_selector" | "no_loop_records" | "store_unreadable" | "tool_execution_failed" | "unknown_tool" | "unsupported_operation";
|
|
2
|
+
export type ToolFailureCode = "attempt_not_found" | "engine_unavailable" | "invalid_arguments" | "invalid_json" | "invalid_path" | "invalid_selector" | "no_loop_records" | "policy_blocked" | "store_unreadable" | "tool_execution_failed" | "unknown_tool" | "unsupported_operation";
|
|
3
3
|
export type ToolFailureCategory = MartinErrorCategory;
|
|
4
4
|
export interface ToolFailure {
|
|
5
5
|
code: ToolFailureCode;
|
|
@@ -92,7 +92,7 @@ export function toToolFailure(error) {
|
|
|
92
92
|
code: "engine_unavailable",
|
|
93
93
|
category: "environment",
|
|
94
94
|
message,
|
|
95
|
-
suggestion: "Install the requested CLI or set MARTIN_LIVE=false for
|
|
95
|
+
suggestion: "Install the requested CLI or set MARTIN_LIVE=false for a no-spend proof run.",
|
|
96
96
|
retryable: false
|
|
97
97
|
};
|
|
98
98
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import type { LoopArtifact, LoopBudget, LoopCost, LoopEvent, LoopTask } from "../vendor/contracts/index.js";
|
|
1
|
+
import type { LoopArtifact, LoopBudget, LoopCost, LoopEvent, LoopTask, ReceiptIntegritySummary, ReceiptScope } from "../vendor/contracts/index.js";
|
|
2
2
|
import { type LedgerEvent, type LoopAttemptRecord, type LoopRunRecord } from "../vendor/core/index.js";
|
|
3
|
-
export type MartinEngine = "claude" | "codex";
|
|
3
|
+
export type MartinEngine = "claude" | "codex" | "gemini";
|
|
4
4
|
export interface InspectableLoopAttempt extends LoopAttemptRecord {
|
|
5
5
|
attemptId?: string;
|
|
6
6
|
summary?: string;
|
|
@@ -11,6 +11,8 @@ export interface InspectableLoopRecord extends Omit<LoopRunRecord, "attempts" |
|
|
|
11
11
|
artifacts?: LoopArtifact[];
|
|
12
12
|
events?: LoopEvent[];
|
|
13
13
|
metadata?: Record<string, string>;
|
|
14
|
+
receiptIntegrity?: ReceiptIntegritySummary;
|
|
15
|
+
receiptScope?: ReceiptScope;
|
|
14
16
|
}
|
|
15
17
|
export interface LoopPreview {
|
|
16
18
|
loopId: string;
|
|
@@ -90,7 +92,7 @@ export interface CliAvailability {
|
|
|
90
92
|
}
|
|
91
93
|
export interface ExecutionMode {
|
|
92
94
|
liveMode: boolean;
|
|
93
|
-
mode: "live" | "
|
|
95
|
+
mode: "live" | "proof";
|
|
94
96
|
detail: string;
|
|
95
97
|
}
|
|
96
98
|
export interface RunStoreInspection extends LoopCollectionSummary {
|
|
@@ -113,6 +115,7 @@ export declare function buildLoopPreview(loop: InspectableLoopRecord): LoopPrevi
|
|
|
113
115
|
export declare function buildAttemptSummary(attempt: InspectableLoopAttempt, artifacts?: AttemptArtifactFiles): AttemptSummary;
|
|
114
116
|
export declare function buildArtifactSummary(loop: InspectableLoopRecord): ArtifactSummary;
|
|
115
117
|
export declare function buildVerificationSummary(loop: InspectableLoopRecord, ledgerEvents?: LedgerEvent[]): VerificationSummary;
|
|
118
|
+
export declare function resolveReceiptIntegrity(loop: InspectableLoopRecord): ReceiptIntegritySummary;
|
|
116
119
|
export declare function buildEventSummaries(loop: InspectableLoopRecord, limit?: number): EventSummary[];
|
|
117
120
|
export declare function buildLoopCollectionSummary(loops: Array<LoopRunRecord | InspectableLoopRecord>): LoopCollectionSummary;
|
|
118
121
|
export declare function inspectRunsRoot(runsRoot?: string): Promise<RunStoreInspection>;
|
|
@@ -11,10 +11,10 @@ export function resolveExecutionMode() {
|
|
|
11
11
|
const liveMode = process.env.MARTIN_LIVE !== "false";
|
|
12
12
|
return {
|
|
13
13
|
liveMode,
|
|
14
|
-
mode: liveMode ? "live" : "
|
|
14
|
+
mode: liveMode ? "live" : "proof",
|
|
15
15
|
detail: liveMode
|
|
16
16
|
? "Live CLI execution is enabled."
|
|
17
|
-
: "
|
|
17
|
+
: "Proof mode is active because MARTIN_LIVE=false."
|
|
18
18
|
};
|
|
19
19
|
}
|
|
20
20
|
export function detectCliAvailability(command) {
|
|
@@ -126,7 +126,11 @@ export function buildVerificationSummary(loop, ledgerEvents = []) {
|
|
|
126
126
|
const verificationEvents = (loop.events ?? []).filter((event) => event.type === "verification.completed");
|
|
127
127
|
const verificationLedgerEvents = ledgerEvents.filter((event) => event.kind === "verification.completed");
|
|
128
128
|
const warnings = [];
|
|
129
|
+
const integrity = resolveReceiptIntegrity(loop);
|
|
129
130
|
const ledgerWarnings = getLedgerWarnings(ledgerEvents);
|
|
131
|
+
if (integrity.state !== "verified") {
|
|
132
|
+
warnings.push(`Receipt integrity is ${integrity.state}; persisted verifier evidence is not trustworthy yet.`);
|
|
133
|
+
}
|
|
130
134
|
warnings.push(...ledgerWarnings);
|
|
131
135
|
if (verificationEvents.length === 0) {
|
|
132
136
|
warnings.push(verificationLedgerEvents.length > 0
|
|
@@ -163,6 +167,12 @@ export function buildVerificationSummary(loop, ledgerEvents = []) {
|
|
|
163
167
|
warnings
|
|
164
168
|
};
|
|
165
169
|
}
|
|
170
|
+
export function resolveReceiptIntegrity(loop) {
|
|
171
|
+
return (loop.receiptIntegrity ?? {
|
|
172
|
+
state: "unsigned",
|
|
173
|
+
reason: "Receipt integrity metadata was not available on the loop record."
|
|
174
|
+
});
|
|
175
|
+
}
|
|
166
176
|
export function buildEventSummaries(loop, limit = 5) {
|
|
167
177
|
return (loop.events ?? [])
|
|
168
178
|
.slice(-limit)
|
|
@@ -272,18 +282,42 @@ export function buildSuggestedResourceUris(loopId) {
|
|
|
272
282
|
"martin://server/health",
|
|
273
283
|
"martin://runs/recent",
|
|
274
284
|
"martin://runs/triage",
|
|
285
|
+
"martin://runs/latest",
|
|
286
|
+
"martin://runs/latest/summary",
|
|
287
|
+
"martin://runs/latest/proof-card",
|
|
288
|
+
"martin://runs/latest/budget-status",
|
|
289
|
+
"martin://runs/latest/verifier-evidence",
|
|
290
|
+
"martin://runs/latest/rollback-evidence",
|
|
291
|
+
"martin://policies/current",
|
|
292
|
+
"martin://repo/risk-map",
|
|
293
|
+
"martin://verifiers/results",
|
|
294
|
+
"martin://agent/next-step",
|
|
275
295
|
`martin://runs/${loopId}`,
|
|
296
|
+
`martin://runs/${loopId}/dossier`,
|
|
276
297
|
`martin://runs/${loopId}/verification`,
|
|
277
298
|
"martin://guides/mcp-usage",
|
|
299
|
+
"martin://guides/agent-start",
|
|
278
300
|
"martin://guides/publish-readiness"
|
|
279
301
|
];
|
|
280
302
|
}
|
|
281
303
|
export function buildSuggestedPromptNames() {
|
|
282
304
|
return [
|
|
305
|
+
"martin_start",
|
|
306
|
+
"martin_preflight",
|
|
307
|
+
"martin_triage",
|
|
308
|
+
"martin_resume",
|
|
309
|
+
"martin_prove",
|
|
310
|
+
"martin_release_check",
|
|
283
311
|
"martin_governed_coding_kickoff",
|
|
284
312
|
"martin_debug_failed_run",
|
|
285
313
|
"martin_publish_readiness_review",
|
|
286
|
-
"martin_triage_run_store"
|
|
314
|
+
"martin_triage_run_store",
|
|
315
|
+
"safe_bug_fix",
|
|
316
|
+
"write_tests_first",
|
|
317
|
+
"small_refactor",
|
|
318
|
+
"security_review",
|
|
319
|
+
"pr_review",
|
|
320
|
+
"release_check"
|
|
287
321
|
];
|
|
288
322
|
}
|
|
289
323
|
function countBy(values, key) {
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { type RunStoreInspection } from "./tool-support.js";
|
|
2
|
+
export type MartinPolicyPack = "solo-founder" | "startup-team" | "enterprise-strict" | "oss-maintainer" | "security-sensitive";
|
|
3
|
+
export interface RepoGitState {
|
|
4
|
+
available: boolean;
|
|
5
|
+
isRepo: boolean;
|
|
6
|
+
clean: boolean;
|
|
7
|
+
branch?: string;
|
|
8
|
+
upstream?: string;
|
|
9
|
+
ahead?: number;
|
|
10
|
+
behind?: number;
|
|
11
|
+
}
|
|
12
|
+
export interface RepoSignals {
|
|
13
|
+
workingDirectory: string;
|
|
14
|
+
packageManager: "pnpm" | "npm" | "yarn" | "bun" | "unknown";
|
|
15
|
+
languages: string[];
|
|
16
|
+
frameworks: string[];
|
|
17
|
+
verifiers: {
|
|
18
|
+
test: string[];
|
|
19
|
+
lint: string[];
|
|
20
|
+
build: string[];
|
|
21
|
+
defaultPlan: string[];
|
|
22
|
+
};
|
|
23
|
+
packageScripts: Record<string, string>;
|
|
24
|
+
git: RepoGitState;
|
|
25
|
+
sensitivePaths: string[];
|
|
26
|
+
availableHosts: Record<"claude" | "codex" | "cursor" | "gemini", {
|
|
27
|
+
available: boolean;
|
|
28
|
+
detail: string;
|
|
29
|
+
resolvedPath?: string;
|
|
30
|
+
}>;
|
|
31
|
+
}
|
|
32
|
+
export interface MartinRiskAssessment {
|
|
33
|
+
score: number;
|
|
34
|
+
level: "low" | "medium" | "high";
|
|
35
|
+
reasons: string[];
|
|
36
|
+
recommendedAction: "proceed" | "review" | "require_human_approval";
|
|
37
|
+
}
|
|
38
|
+
export interface MartinPolicyPackDefinition {
|
|
39
|
+
name: MartinPolicyPack;
|
|
40
|
+
summary: string;
|
|
41
|
+
defaultVerifiers: string[];
|
|
42
|
+
defaultAllowedPaths: string[];
|
|
43
|
+
defaultBlockedPaths: string[];
|
|
44
|
+
dossierExpectations: string[];
|
|
45
|
+
requireApprovalAtOrAbove: MartinRiskAssessment["level"];
|
|
46
|
+
}
|
|
47
|
+
export interface MartinPlanBudget {
|
|
48
|
+
maxUsd: number;
|
|
49
|
+
softLimitUsd: number;
|
|
50
|
+
maxIterations: number;
|
|
51
|
+
maxTokens: number;
|
|
52
|
+
maxMinutes: number;
|
|
53
|
+
maxFilesChanged: number;
|
|
54
|
+
maxCommands: number;
|
|
55
|
+
}
|
|
56
|
+
export interface MartinRunContract {
|
|
57
|
+
objective: string;
|
|
58
|
+
context?: string;
|
|
59
|
+
allowedPaths: string[];
|
|
60
|
+
blockedPaths: string[];
|
|
61
|
+
budget: MartinPlanBudget;
|
|
62
|
+
verifiers: string[];
|
|
63
|
+
risk: MartinRiskAssessment;
|
|
64
|
+
policyPack: MartinPolicyPack;
|
|
65
|
+
requiresApproval: boolean;
|
|
66
|
+
}
|
|
67
|
+
export interface MartinPlanProposal {
|
|
68
|
+
objective: string;
|
|
69
|
+
implementationSummary: string;
|
|
70
|
+
proposedFileScope: {
|
|
71
|
+
allowedPaths: string[];
|
|
72
|
+
blockedPaths: string[];
|
|
73
|
+
};
|
|
74
|
+
proposedVerifiers: string[];
|
|
75
|
+
estimatedBudget: MartinPlanBudget;
|
|
76
|
+
risk: MartinRiskAssessment;
|
|
77
|
+
approvalRecommendation: "not_required" | "recommended" | "required";
|
|
78
|
+
policyPack: MartinPolicyPackDefinition;
|
|
79
|
+
nextSteps: string[];
|
|
80
|
+
}
|
|
81
|
+
export interface MartinReadinessReport {
|
|
82
|
+
score: number;
|
|
83
|
+
level: "low" | "medium" | "high";
|
|
84
|
+
missingSafeguards: string[];
|
|
85
|
+
repo: {
|
|
86
|
+
git: RepoGitState;
|
|
87
|
+
packageManager: RepoSignals["packageManager"];
|
|
88
|
+
languages: string[];
|
|
89
|
+
frameworks: string[];
|
|
90
|
+
};
|
|
91
|
+
safeguards: {
|
|
92
|
+
verifierDetected: boolean;
|
|
93
|
+
repoScoped: boolean;
|
|
94
|
+
branchSafe: boolean;
|
|
95
|
+
runStoreHealthy: boolean;
|
|
96
|
+
};
|
|
97
|
+
availableHosts: RepoSignals["availableHosts"];
|
|
98
|
+
}
|
|
99
|
+
interface ContractOverrides {
|
|
100
|
+
objective: string;
|
|
101
|
+
context?: string;
|
|
102
|
+
verificationPlan?: string[];
|
|
103
|
+
allowedPaths?: string[];
|
|
104
|
+
deniedPaths?: string[];
|
|
105
|
+
policyPack?: MartinPolicyPack;
|
|
106
|
+
maxUsd?: number;
|
|
107
|
+
maxIterations?: number;
|
|
108
|
+
maxTokens?: number;
|
|
109
|
+
maxMinutes?: number;
|
|
110
|
+
maxFilesChanged?: number;
|
|
111
|
+
maxCommands?: number;
|
|
112
|
+
}
|
|
113
|
+
export declare function inspectRepoSignals(workingDirectory: string): RepoSignals;
|
|
114
|
+
export declare function buildReadinessReport(signals: RepoSignals, runStore: RunStoreInspection): MartinReadinessReport;
|
|
115
|
+
export declare function buildPolicyPackDefinition(policyPack: MartinPolicyPack | undefined, signals: RepoSignals): MartinPolicyPackDefinition;
|
|
116
|
+
export declare function buildPlanProposal(workingDirectory: string, overrides: ContractOverrides): MartinPlanProposal;
|
|
117
|
+
export declare function buildRunContract(workingDirectory: string, overrides: ContractOverrides): MartinRunContract;
|
|
118
|
+
export declare function assessRunRisk(input: {
|
|
119
|
+
objective: string;
|
|
120
|
+
context?: string;
|
|
121
|
+
allowedPaths: string[];
|
|
122
|
+
blockedPaths: string[];
|
|
123
|
+
verifiers: string[];
|
|
124
|
+
signals: RepoSignals;
|
|
125
|
+
}): MartinRiskAssessment;
|
|
126
|
+
export declare function buildRepoRiskMap(signals: RepoSignals): {
|
|
127
|
+
workingDirectory: string;
|
|
128
|
+
packageManager: RepoSignals["packageManager"];
|
|
129
|
+
frameworks: string[];
|
|
130
|
+
sensitivePaths: string[];
|
|
131
|
+
recommendedPolicyPack: MartinPolicyPack;
|
|
132
|
+
};
|
|
133
|
+
export {};
|