@martinloop/mcp 0.2.0 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -182
- package/dist/discovery-metadata.d.ts +21 -0
- package/dist/discovery-metadata.js +152 -0
- package/dist/discovery-support.d.ts +62 -0
- package/dist/discovery-support.js +224 -0
- package/dist/package-version.d.ts +1 -0
- package/dist/package-version.js +3 -0
- package/dist/prompts.d.ts +13 -3
- package/dist/prompts.js +537 -74
- package/dist/resources.d.ts +35 -5
- package/dist/resources.js +788 -71
- package/dist/server-validation.d.ts +2 -3
- package/dist/server-validation.js +375 -119
- package/dist/server.d.ts +76 -7
- package/dist/server.js +1478 -394
- package/dist/tools/doctor.d.ts +2 -0
- package/dist/tools/doctor.js +18 -6
- package/dist/tools/eval.d.ts +24 -0
- package/dist/tools/eval.js +65 -0
- package/dist/tools/get-attempt.d.ts +13 -6
- package/dist/tools/get-attempt.js +14 -5
- package/dist/tools/get-run.d.ts +19 -12
- package/dist/tools/get-run.js +20 -11
- package/dist/tools/get-status.d.ts +19 -0
- package/dist/tools/get-status.js +30 -2
- package/dist/tools/get-verification-results.d.ts +10 -7
- package/dist/tools/get-verification-results.js +11 -6
- package/dist/tools/inspect-loop.d.ts +9 -0
- package/dist/tools/inspect-loop.js +11 -2
- package/dist/tools/list-runs.d.ts +25 -5
- package/dist/tools/list-runs.js +21 -4
- package/dist/tools/logs.d.ts +25 -0
- package/dist/tools/logs.js +49 -0
- package/dist/tools/plan.d.ts +20 -0
- package/dist/tools/plan.js +10 -0
- package/dist/tools/pr-tools.d.ts +31 -0
- package/dist/tools/pr-tools.js +111 -0
- package/dist/tools/preflight.d.ts +10 -0
- package/dist/tools/preflight.js +18 -4
- package/dist/tools/run-controls.d.ts +36 -0
- package/dist/tools/run-controls.js +88 -0
- package/dist/tools/run-dossier.d.ts +51 -4
- package/dist/tools/run-dossier.js +100 -5
- package/dist/tools/run-loop.d.ts +19 -0
- package/dist/tools/run-loop.js +61 -4
- package/dist/tools/run-store.d.ts +57 -3
- package/dist/tools/run-store.js +404 -53
- package/dist/tools/tool-errors.d.ts +37 -0
- package/dist/tools/tool-errors.js +170 -0
- package/dist/tools/tool-response.d.ts +16 -0
- package/dist/tools/tool-response.js +34 -0
- package/dist/tools/tool-support.d.ts +92 -2
- package/dist/tools/tool-support.js +385 -63
- package/dist/tools/triage-runs.d.ts +33 -0
- package/dist/tools/triage-runs.js +138 -0
- package/dist/tools/workflow-governance.d.ts +133 -0
- package/dist/tools/workflow-governance.js +581 -0
- package/dist/vendor/adapters/claude-cli.js +0 -1
- package/dist/vendor/adapters/cli-bridge.d.ts +5 -0
- package/dist/vendor/adapters/cli-bridge.js +16 -9
- package/dist/vendor/adapters/direct-provider.js +0 -1
- package/dist/vendor/adapters/index.d.ts +2 -1
- package/dist/vendor/adapters/index.js +2 -1
- package/dist/vendor/adapters/openai-compatible.d.ts +47 -0
- package/dist/vendor/adapters/openai-compatible.js +242 -0
- package/dist/vendor/adapters/runtime-support.js +0 -1
- package/dist/vendor/adapters/stub-agent-cli.js +0 -1
- package/dist/vendor/adapters/stub-direct-provider.js +0 -1
- package/dist/vendor/adapters/verifier-only.js +0 -1
- package/dist/vendor/contracts/governance.js +0 -1
- package/dist/vendor/contracts/index.d.ts +2 -0
- package/dist/vendor/contracts/index.js +1 -1
- package/dist/vendor/contracts/operator.d.ts +19 -0
- package/dist/vendor/contracts/operator.js +11 -0
- package/dist/vendor/core/compiler.js +0 -1
- package/dist/vendor/core/context-integrity.js +0 -1
- package/dist/vendor/core/grounding.js +0 -1
- package/dist/vendor/core/index.js +1 -2
- package/dist/vendor/core/leash.js +19 -12
- package/dist/vendor/core/persistence/compiler.js +0 -1
- package/dist/vendor/core/persistence/index.js +0 -1
- package/dist/vendor/core/persistence/ledger.js +0 -1
- package/dist/vendor/core/persistence/runs-reader.js +0 -1
- package/dist/vendor/core/persistence/store.js +0 -1
- package/dist/vendor/core/policy.js +0 -1
- package/dist/vendor/core/red-blue/red-phase.d.ts +64 -0
- package/dist/vendor/core/red-blue/red-phase.js +135 -0
- package/dist/vendor/core/red-blue/risk-tiers.d.ts +22 -0
- package/dist/vendor/core/red-blue/risk-tiers.js +32 -0
- package/dist/vendor/core/rollback.js +2 -3
- package/dist/workflow-state.d.ts +25 -0
- package/dist/workflow-state.js +102 -0
- package/package.json +12 -7
- package/server.json +2 -2
- package/dist/tools/cockpit-support.d.ts +0 -69
- package/dist/tools/cockpit-support.js +0 -108
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import { PROBE_COUNTS, RED_PHASE_MODEL, resolveRedBudgetPolicy } from "./risk-tiers.js";
|
|
2
|
+
// ─── Programmatic probes ──────────────────────────────────────────────────────
|
|
3
|
+
function runProgrammaticProbes(patch, paranoid) {
|
|
4
|
+
const findings = [];
|
|
5
|
+
// Probe 1: assertion deletion
|
|
6
|
+
if (/^\-.*(?:expect\(.*\)\.to|assert\.(?:equal|is|ok|strictEqual)|t\.(?:is|truthy|falsy|deepEqual))/m.test(patch.diff)) {
|
|
7
|
+
findings.push({
|
|
8
|
+
trapId: "T01",
|
|
9
|
+
severity: "warn",
|
|
10
|
+
description: "Patch removes test assertions — possible weak-test false green."
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
// Probe 2: grounding evasion pragma
|
|
14
|
+
if (/@martin-ignore|@ts-nocheck|eslint-disable/i.test(patch.diff)) {
|
|
15
|
+
findings.push({
|
|
16
|
+
trapId: "T11",
|
|
17
|
+
severity: "block",
|
|
18
|
+
description: "Grounding evasion pragma detected in patch."
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
// Probe 3: context poisoning
|
|
22
|
+
if (/\.martin\/|\.claude\//.test(patch.diff)) {
|
|
23
|
+
findings.push({
|
|
24
|
+
trapId: "T07",
|
|
25
|
+
severity: "block",
|
|
26
|
+
description: "Patch writes to agent context directory (.martin/ or .claude/)."
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
// Probe 4: budget self-report
|
|
30
|
+
if (/budgetSource.*model_reported|model_reported.*budget/i.test(patch.diff)) {
|
|
31
|
+
findings.push({
|
|
32
|
+
trapId: "T10",
|
|
33
|
+
severity: "block",
|
|
34
|
+
description: "Patch introduces model-reported budget sourcing."
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
if (paranoid) {
|
|
38
|
+
// Probe 5: scope creep — manifest changes
|
|
39
|
+
if (/^\+.*"[^"]+"\s*:\s*"[^"]+".*$/m.test(patch.diff) &&
|
|
40
|
+
/package\.json|Cargo\.toml|go\.mod/i.test(patch.changedFiles.join(","))) {
|
|
41
|
+
findings.push({
|
|
42
|
+
trapId: "T03",
|
|
43
|
+
severity: "warn",
|
|
44
|
+
description: "Paranoid scan: substantive manifest change detected."
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
// Probe 6: silent revert — removal of recently added symbols
|
|
48
|
+
const removedExportPattern = /^\-.*export\s+(function|const|class)\s+\w+/m;
|
|
49
|
+
if (removedExportPattern.test(patch.diff)) {
|
|
50
|
+
findings.push({
|
|
51
|
+
trapId: "T02",
|
|
52
|
+
severity: "warn",
|
|
53
|
+
description: "Paranoid scan: exported symbol removed — potential silent revert."
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return findings;
|
|
58
|
+
}
|
|
59
|
+
// ─── Red phase runner ─────────────────────────────────────────────────────────
|
|
60
|
+
/**
|
|
61
|
+
* Runs the Red phase for a given patch and risk tier.
|
|
62
|
+
*
|
|
63
|
+
* - baseline: programmatic probes only, no model call
|
|
64
|
+
* - high_risk: paranoid programmatic scan, no model call
|
|
65
|
+
* - release_critical: paranoid scan + one Haiku model call
|
|
66
|
+
*/
|
|
67
|
+
export async function runRedPhase(patch, tier, blueBudgetUsd, options = {}) {
|
|
68
|
+
const policy = resolveRedBudgetPolicy(tier, blueBudgetUsd);
|
|
69
|
+
const paranoid = tier !== "baseline";
|
|
70
|
+
let findings = runProgrammaticProbes(patch, paranoid);
|
|
71
|
+
let modelCallMade = false;
|
|
72
|
+
let modelUsed;
|
|
73
|
+
let budgetUsedUsd = 0;
|
|
74
|
+
const probesRun = PROBE_COUNTS[tier];
|
|
75
|
+
if (policy.modelCallAllowed && options.modelClient) {
|
|
76
|
+
const prompt = buildRedPhasePrompt(patch, findings);
|
|
77
|
+
const result = await options.modelClient.complete(prompt);
|
|
78
|
+
findings = [...findings, ...result.findings];
|
|
79
|
+
modelCallMade = true;
|
|
80
|
+
modelUsed = RED_PHASE_MODEL;
|
|
81
|
+
budgetUsedUsd += result.costUsd;
|
|
82
|
+
}
|
|
83
|
+
const result = {
|
|
84
|
+
riskTier: tier,
|
|
85
|
+
probesRun,
|
|
86
|
+
findingsCount: findings.length,
|
|
87
|
+
findings,
|
|
88
|
+
modelCallMade,
|
|
89
|
+
...(modelUsed !== undefined ? { modelUsed } : {}),
|
|
90
|
+
budgetUsedUsd
|
|
91
|
+
};
|
|
92
|
+
options.onLedgerEvent?.({
|
|
93
|
+
type: "red_phase_findings",
|
|
94
|
+
riskTier: tier,
|
|
95
|
+
probesRun,
|
|
96
|
+
findingsCount: findings.length,
|
|
97
|
+
modelCallMade,
|
|
98
|
+
timestamp: new Date().toISOString()
|
|
99
|
+
});
|
|
100
|
+
return result;
|
|
101
|
+
}
|
|
102
|
+
// ─── Policy helpers ───────────────────────────────────────────────────────────
|
|
103
|
+
/**
|
|
104
|
+
* Returns true only if the findings contain zero block-severity entries.
|
|
105
|
+
* A single block finding rejects the patch regardless of warn count.
|
|
106
|
+
*/
|
|
107
|
+
export function shouldAcceptPatch(findings) {
|
|
108
|
+
return !findings.findings.some((f) => f.severity === "block");
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Convenience builder for RedFindings — useful in tests and policy engine.
|
|
112
|
+
*/
|
|
113
|
+
export function buildRedFindings(input) {
|
|
114
|
+
return {
|
|
115
|
+
riskTier: input.riskTier,
|
|
116
|
+
probesRun: input.probesRun ?? PROBE_COUNTS[input.riskTier],
|
|
117
|
+
findingsCount: input.findings.length,
|
|
118
|
+
findings: input.findings,
|
|
119
|
+
modelCallMade: input.modelCallMade ?? false,
|
|
120
|
+
...(input.modelUsed !== undefined ? { modelUsed: input.modelUsed } : {}),
|
|
121
|
+
budgetUsedUsd: input.budgetUsedUsd ?? 0
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
// ─── Internal helpers ─────────────────────────────────────────────────────────
|
|
125
|
+
function buildRedPhasePrompt(patch, existingFindings) {
|
|
126
|
+
return [
|
|
127
|
+
"You are a security-focused code reviewer running adversarial analysis.",
|
|
128
|
+
"Analyze this patch for adversarial patterns not caught by programmatic probes.",
|
|
129
|
+
"Return JSON array of findings with: trapId, severity (warn|block), description.",
|
|
130
|
+
`Already detected: ${JSON.stringify(existingFindings.map(f => f.trapId))}`,
|
|
131
|
+
`Changed files: ${patch.changedFiles.join(", ")}`,
|
|
132
|
+
"Diff (truncated to 2000 chars):",
|
|
133
|
+
patch.diff.slice(0, 2000)
|
|
134
|
+
].join("\n");
|
|
135
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export type RiskTier = "baseline" | "high_risk" | "release_critical";
|
|
2
|
+
export interface RedBudgetPolicy {
|
|
3
|
+
riskTier: RiskTier;
|
|
4
|
+
blueBudgetUsd: number;
|
|
5
|
+
/** Cap on Red phase spend: 30% / 100% / 150% of Blue */
|
|
6
|
+
redBudgetCapUsd: number;
|
|
7
|
+
/** Only release_critical permits a Haiku model call */
|
|
8
|
+
modelCallAllowed: boolean;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Returns the Red phase budget policy for a given risk tier and Blue budget.
|
|
12
|
+
*/
|
|
13
|
+
export declare function resolveRedBudgetPolicy(tier: RiskTier, blueBudgetUsd: number): RedBudgetPolicy;
|
|
14
|
+
/**
|
|
15
|
+
* Probe counts per tier.
|
|
16
|
+
* baseline = standard 6-probe sweep
|
|
17
|
+
* high_risk = paranoid 12-probe sweep
|
|
18
|
+
* release_critical = paranoid 12-probe sweep + model
|
|
19
|
+
*/
|
|
20
|
+
export declare const PROBE_COUNTS: Record<RiskTier, number>;
|
|
21
|
+
/** The only model ever permitted in the Red phase. */
|
|
22
|
+
export declare const RED_PHASE_MODEL: "claude-haiku-4-5-20251001";
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
// ─── Risk Tier Definitions ────────────────────────────────────────────────────
|
|
2
|
+
// Governs how aggressively Red phase probes a patch and whether a model call
|
|
3
|
+
// is permitted. Budget caps are expressed as fractions of the Blue phase budget.
|
|
4
|
+
const BUDGET_MULTIPLIERS = {
|
|
5
|
+
baseline: 0.30,
|
|
6
|
+
high_risk: 1.00,
|
|
7
|
+
release_critical: 1.50
|
|
8
|
+
};
|
|
9
|
+
/**
|
|
10
|
+
* Returns the Red phase budget policy for a given risk tier and Blue budget.
|
|
11
|
+
*/
|
|
12
|
+
export function resolveRedBudgetPolicy(tier, blueBudgetUsd) {
|
|
13
|
+
return {
|
|
14
|
+
riskTier: tier,
|
|
15
|
+
blueBudgetUsd,
|
|
16
|
+
redBudgetCapUsd: blueBudgetUsd * BUDGET_MULTIPLIERS[tier],
|
|
17
|
+
modelCallAllowed: tier === "release_critical"
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Probe counts per tier.
|
|
22
|
+
* baseline = standard 6-probe sweep
|
|
23
|
+
* high_risk = paranoid 12-probe sweep
|
|
24
|
+
* release_critical = paranoid 12-probe sweep + model
|
|
25
|
+
*/
|
|
26
|
+
export const PROBE_COUNTS = {
|
|
27
|
+
baseline: 6,
|
|
28
|
+
high_risk: 12,
|
|
29
|
+
release_critical: 12
|
|
30
|
+
};
|
|
31
|
+
/** The only model ever permitted in the Red phase. */
|
|
32
|
+
export const RED_PHASE_MODEL = "claude-haiku-4-5-20251001";
|
|
@@ -114,8 +114,8 @@ export async function restoreRollbackBoundary(input) {
|
|
|
114
114
|
}
|
|
115
115
|
function readRepoState(repoRoot) {
|
|
116
116
|
return {
|
|
117
|
-
trackedDirtyFiles: readGitLines(repoRoot, ["diff", "--name-only", "HEAD"]),
|
|
118
|
-
untrackedFiles: readGitLines(repoRoot, ["ls-files", "--others", "--exclude-standard"])
|
|
117
|
+
trackedDirtyFiles: readGitLines(repoRoot, ["diff", "--name-only", "HEAD", "--", "."]),
|
|
118
|
+
untrackedFiles: readGitLines(repoRoot, ["ls-files", "--others", "--exclude-standard", "--", "."])
|
|
119
119
|
};
|
|
120
120
|
}
|
|
121
121
|
function readGitLines(repoRoot, args) {
|
|
@@ -216,4 +216,3 @@ function emptyRepoState() {
|
|
|
216
216
|
function toErrorMessage(error) {
|
|
217
217
|
return error instanceof Error ? error.message : String(error);
|
|
218
218
|
}
|
|
219
|
-
//# sourceMappingURL=rollback.js.map
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
type McpWorkflowStepName = "doctor" | "plan" | "preflight";
|
|
2
|
+
export interface RecordMcpWorkflowStepInput {
|
|
3
|
+
runsRoot: string;
|
|
4
|
+
step: McpWorkflowStepName;
|
|
5
|
+
workingDirectory: string;
|
|
6
|
+
objective?: string;
|
|
7
|
+
engine?: string;
|
|
8
|
+
verificationPlan?: string[];
|
|
9
|
+
}
|
|
10
|
+
export interface EvaluateMcpRunGateInput {
|
|
11
|
+
runsRoot: string;
|
|
12
|
+
workingDirectory: string;
|
|
13
|
+
objective: string;
|
|
14
|
+
engine?: string;
|
|
15
|
+
verificationPlan?: string[];
|
|
16
|
+
}
|
|
17
|
+
export interface McpRunGateResult {
|
|
18
|
+
allowed: boolean;
|
|
19
|
+
nextAction: string;
|
|
20
|
+
summary: string;
|
|
21
|
+
missingSteps: McpWorkflowStepName[];
|
|
22
|
+
}
|
|
23
|
+
export declare function recordMcpWorkflowStep(input: RecordMcpWorkflowStepInput): Promise<void>;
|
|
24
|
+
export declare function evaluateMcpRunGate(input: EvaluateMcpRunGateInput): Promise<McpRunGateResult>;
|
|
25
|
+
export {};
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
3
|
+
import { join, resolve } from "node:path";
|
|
4
|
+
const WORKFLOW_STATE_DIRECTORY = "_martin";
|
|
5
|
+
const WORKFLOW_STATE_FILENAME = "workflow-state.json";
|
|
6
|
+
const DOCTOR_TTL_MS = 24 * 60 * 60 * 1000;
|
|
7
|
+
const PLAN_TTL_MS = 24 * 60 * 60 * 1000;
|
|
8
|
+
const PREFLIGHT_TTL_MS = 6 * 60 * 60 * 1000;
|
|
9
|
+
export async function recordMcpWorkflowStep(input) {
|
|
10
|
+
const state = await readWorkflowState(input.runsRoot);
|
|
11
|
+
state.mcp ??= {};
|
|
12
|
+
state.mcp[input.step] = {
|
|
13
|
+
step: input.step,
|
|
14
|
+
recordedAt: new Date().toISOString(),
|
|
15
|
+
workingDirectory: normalizeWorkingDirectory(input.workingDirectory),
|
|
16
|
+
...(input.objective ? { objectiveKey: normalizeObjective(input.objective) } : {}),
|
|
17
|
+
...(input.engine ? { engine: input.engine } : {}),
|
|
18
|
+
...(input.verificationPlan ? { verificationPlanKey: hashVerificationPlan(input.verificationPlan) } : {})
|
|
19
|
+
};
|
|
20
|
+
await writeWorkflowState(input.runsRoot, state);
|
|
21
|
+
}
|
|
22
|
+
export async function evaluateMcpRunGate(input) {
|
|
23
|
+
const state = await readWorkflowState(input.runsRoot);
|
|
24
|
+
const mcpState = state.mcp ?? {};
|
|
25
|
+
const workingDirectory = normalizeWorkingDirectory(input.workingDirectory);
|
|
26
|
+
const objectiveKey = normalizeObjective(input.objective);
|
|
27
|
+
const engine = input.engine ?? "claude";
|
|
28
|
+
const verificationPlanKey = hashVerificationPlan(input.verificationPlan ?? []);
|
|
29
|
+
const missingSteps = [];
|
|
30
|
+
if (!isFresh(mcpState["doctor"], DOCTOR_TTL_MS, (receipt) => receipt.workingDirectory === workingDirectory)) {
|
|
31
|
+
missingSteps.push("doctor");
|
|
32
|
+
}
|
|
33
|
+
if (!isFresh(mcpState["plan"], PLAN_TTL_MS, (receipt) => receipt.workingDirectory === workingDirectory &&
|
|
34
|
+
receipt.objectiveKey === objectiveKey)) {
|
|
35
|
+
missingSteps.push("plan");
|
|
36
|
+
}
|
|
37
|
+
if (!isFresh(mcpState["preflight"], PREFLIGHT_TTL_MS, (receipt) => receipt.workingDirectory === workingDirectory &&
|
|
38
|
+
receipt.objectiveKey === objectiveKey &&
|
|
39
|
+
receipt.engine === engine &&
|
|
40
|
+
receipt.verificationPlanKey === verificationPlanKey)) {
|
|
41
|
+
missingSteps.push("preflight");
|
|
42
|
+
}
|
|
43
|
+
if (missingSteps.length === 0) {
|
|
44
|
+
return {
|
|
45
|
+
allowed: true,
|
|
46
|
+
nextAction: "martin_run",
|
|
47
|
+
summary: "Martin MCP governance receipts are present for this task.",
|
|
48
|
+
missingSteps
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
const nextAction = missingSteps[0] === "doctor"
|
|
52
|
+
? "Call martin_doctor for this workingDirectory before any real run."
|
|
53
|
+
: missingSteps[0] === "plan"
|
|
54
|
+
? "Call martin_plan with the exact objective before martin_run."
|
|
55
|
+
: "Call martin_preflight with the exact objective, verifier plan, and engine before martin_run.";
|
|
56
|
+
return {
|
|
57
|
+
allowed: false,
|
|
58
|
+
nextAction,
|
|
59
|
+
summary: `martin_run is blocked until Martin MCP receipts exist for ${missingSteps.join(", ")}.`,
|
|
60
|
+
missingSteps
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
async function readWorkflowState(runsRoot) {
|
|
64
|
+
const statePath = resolveWorkflowStatePath(runsRoot);
|
|
65
|
+
try {
|
|
66
|
+
const raw = await readFile(statePath, "utf8");
|
|
67
|
+
const parsed = JSON.parse(raw);
|
|
68
|
+
return parsed.version === 1 ? parsed : { version: 1 };
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
return { version: 1 };
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
async function writeWorkflowState(runsRoot, state) {
|
|
75
|
+
const statePath = resolveWorkflowStatePath(runsRoot);
|
|
76
|
+
await mkdir(join(resolve(runsRoot), WORKFLOW_STATE_DIRECTORY), { recursive: true });
|
|
77
|
+
await writeFile(statePath, JSON.stringify(state, null, 2), "utf8");
|
|
78
|
+
}
|
|
79
|
+
function resolveWorkflowStatePath(runsRoot) {
|
|
80
|
+
return join(resolve(runsRoot), WORKFLOW_STATE_DIRECTORY, WORKFLOW_STATE_FILENAME);
|
|
81
|
+
}
|
|
82
|
+
function isFresh(receipt, ttlMs, predicate) {
|
|
83
|
+
if (!receipt || !predicate(receipt)) {
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
const recordedAt = Date.parse(receipt.recordedAt);
|
|
87
|
+
if (Number.isNaN(recordedAt)) {
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
return Date.now() - recordedAt <= ttlMs;
|
|
91
|
+
}
|
|
92
|
+
function normalizeWorkingDirectory(workingDirectory) {
|
|
93
|
+
const resolved = resolve(workingDirectory);
|
|
94
|
+
return process.platform === "win32" ? resolved.toLowerCase() : resolved;
|
|
95
|
+
}
|
|
96
|
+
function normalizeObjective(objective) {
|
|
97
|
+
return objective.trim().replace(/\s+/gu, " ").toLowerCase();
|
|
98
|
+
}
|
|
99
|
+
function hashVerificationPlan(verificationPlan) {
|
|
100
|
+
const normalized = verificationPlan.map((step) => step.trim()).filter(Boolean);
|
|
101
|
+
return createHash("sha256").update(JSON.stringify(normalized)).digest("hex").slice(0, 12);
|
|
102
|
+
}
|
package/package.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@martinloop/mcp",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.7",
|
|
4
4
|
"mcpName": "io.github.Keesan12/martin-loop",
|
|
5
5
|
"private": false,
|
|
6
6
|
"type": "module",
|
|
7
7
|
"description": "Governed MCP server for AI coding agents with budgets, verifier gates, and inspectable runs.",
|
|
8
8
|
"license": "Apache-2.0",
|
|
9
|
-
"author": "
|
|
9
|
+
"author": "MartinLoop contributors",
|
|
10
10
|
"homepage": "https://martinloop.com/",
|
|
11
11
|
"repository": {
|
|
12
12
|
"type": "git",
|
|
@@ -24,9 +24,10 @@
|
|
|
24
24
|
"claude",
|
|
25
25
|
"codex",
|
|
26
26
|
"martin_doctor",
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
27
|
+
"martin_triage_runs",
|
|
28
|
+
"martin_run_dossier",
|
|
29
|
+
"mcp-resources",
|
|
30
|
+
"mcp-prompts"
|
|
30
31
|
],
|
|
31
32
|
"bin": {
|
|
32
33
|
"mcp": "./dist/server.js",
|
|
@@ -56,15 +57,19 @@
|
|
|
56
57
|
"verify:release": "node --test ../../scripts/tests/publish-mcp-workflow.test.mjs ../../scripts/tests/mcp-publish-reliability.test.mjs ../../scripts/tests/mcp-release-docs.test.mjs",
|
|
57
58
|
"test": "vitest run",
|
|
58
59
|
"lint": "tsc -p tsconfig.json --noEmit",
|
|
59
|
-
"start": "node dist/server.js"
|
|
60
|
+
"start": "node dist/server.js",
|
|
61
|
+
"inspect:live": "node ./scripts/inspect-live.mjs"
|
|
60
62
|
},
|
|
61
63
|
"dependencies": {
|
|
62
|
-
"@modelcontextprotocol/sdk": "^1.
|
|
64
|
+
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
63
65
|
"@open-policy-agent/opa-wasm": "^1.10.0",
|
|
64
66
|
"@opentelemetry/api-logs": "^0.214.0",
|
|
65
67
|
"@opentelemetry/exporter-logs-otlp-http": "^0.214.0",
|
|
66
68
|
"@opentelemetry/resources": "^2.6.1",
|
|
67
69
|
"@opentelemetry/sdk-logs": "^0.214.0",
|
|
68
70
|
"ts-morph": "^21.0.0"
|
|
71
|
+
},
|
|
72
|
+
"devDependencies": {
|
|
73
|
+
"@martin/contracts": "workspace:*"
|
|
69
74
|
}
|
|
70
75
|
}
|
package/server.json
CHANGED
|
@@ -7,12 +7,12 @@
|
|
|
7
7
|
"url": "https://github.com/Keesan12/martin-loop",
|
|
8
8
|
"source": "github"
|
|
9
9
|
},
|
|
10
|
-
"version": "0.2.
|
|
10
|
+
"version": "0.2.7",
|
|
11
11
|
"packages": [
|
|
12
12
|
{
|
|
13
13
|
"registryType": "npm",
|
|
14
14
|
"identifier": "@martinloop/mcp",
|
|
15
|
-
"version": "0.2.
|
|
15
|
+
"version": "0.2.7",
|
|
16
16
|
"transport": {
|
|
17
17
|
"type": "stdio"
|
|
18
18
|
}
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import { type LoopRunRecord } from "../vendor/core/index.js";
|
|
2
|
-
export interface RunSelectorInput {
|
|
3
|
-
loopId?: string;
|
|
4
|
-
runsDir?: string;
|
|
5
|
-
latest?: boolean;
|
|
6
|
-
}
|
|
7
|
-
export interface RunSummary {
|
|
8
|
-
loopId: string;
|
|
9
|
-
title: string;
|
|
10
|
-
objective: string;
|
|
11
|
-
status: string;
|
|
12
|
-
lifecycleState: string;
|
|
13
|
-
createdAt: string;
|
|
14
|
-
updatedAt: string;
|
|
15
|
-
attempts: number;
|
|
16
|
-
costUsd: number;
|
|
17
|
-
avoidedUsd: number;
|
|
18
|
-
pressure: string;
|
|
19
|
-
shouldStop: boolean;
|
|
20
|
-
verificationCount: number;
|
|
21
|
-
}
|
|
22
|
-
export interface VerificationResultSummary {
|
|
23
|
-
eventId?: string;
|
|
24
|
-
timestamp?: string;
|
|
25
|
-
lifecycleState?: string;
|
|
26
|
-
passed?: boolean;
|
|
27
|
-
summary?: string;
|
|
28
|
-
}
|
|
29
|
-
export declare function summarizeRun(loop: LoopRunRecord): RunSummary;
|
|
30
|
-
export declare function listRunSummaries(input?: {
|
|
31
|
-
runsDir?: string;
|
|
32
|
-
limit?: number;
|
|
33
|
-
}): Promise<RunSummary[]>;
|
|
34
|
-
export declare function loadSelectedRun(input: RunSelectorInput): Promise<LoopRunRecord>;
|
|
35
|
-
export declare function extractVerificationResults(loop: LoopRunRecord): VerificationResultSummary[];
|
|
36
|
-
export declare function getAttempt(loop: LoopRunRecord, attemptIndex: number): import("../vendor/core/index.js").LoopAttemptRecord;
|
|
37
|
-
export declare function buildRunDossier(loop: LoopRunRecord): {
|
|
38
|
-
loopId: string;
|
|
39
|
-
generatedAt: string;
|
|
40
|
-
sections: ({
|
|
41
|
-
kind: string;
|
|
42
|
-
content: {
|
|
43
|
-
title: string;
|
|
44
|
-
objective: string;
|
|
45
|
-
};
|
|
46
|
-
} | {
|
|
47
|
-
kind: string;
|
|
48
|
-
content: {
|
|
49
|
-
budget: {
|
|
50
|
-
maxUsd: number;
|
|
51
|
-
softLimitUsd: number;
|
|
52
|
-
maxIterations: number;
|
|
53
|
-
maxTokens: number;
|
|
54
|
-
};
|
|
55
|
-
cost: {
|
|
56
|
-
actualUsd: number;
|
|
57
|
-
tokensIn: number;
|
|
58
|
-
tokensOut: number;
|
|
59
|
-
avoidedUsd?: number;
|
|
60
|
-
};
|
|
61
|
-
};
|
|
62
|
-
} | {
|
|
63
|
-
kind: string;
|
|
64
|
-
content: import("../vendor/core/index.js").LoopAttemptRecord[];
|
|
65
|
-
} | {
|
|
66
|
-
kind: string;
|
|
67
|
-
content: VerificationResultSummary[];
|
|
68
|
-
})[];
|
|
69
|
-
};
|
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
import { evaluateCostGovernor } from "../vendor/core/index.js";
|
|
2
|
-
import { loadLoopRecordForStatus, loadLoopRecordsForInspect } from "./run-store.js";
|
|
3
|
-
export function summarizeRun(loop) {
|
|
4
|
-
const costState = evaluateCostGovernor({
|
|
5
|
-
budget: loop.budget,
|
|
6
|
-
cost: {
|
|
7
|
-
actualUsd: loop.cost.actualUsd,
|
|
8
|
-
avoidedUsd: loop.cost.avoidedUsd ?? 0,
|
|
9
|
-
tokensIn: loop.cost.tokensIn,
|
|
10
|
-
tokensOut: loop.cost.tokensOut
|
|
11
|
-
},
|
|
12
|
-
attemptsUsed: loop.attempts.length
|
|
13
|
-
});
|
|
14
|
-
return {
|
|
15
|
-
loopId: loop.loopId,
|
|
16
|
-
title: loop.task.title,
|
|
17
|
-
objective: loop.task.objective,
|
|
18
|
-
status: loop.status,
|
|
19
|
-
lifecycleState: loop.lifecycleState,
|
|
20
|
-
createdAt: loop.createdAt,
|
|
21
|
-
updatedAt: loop.updatedAt,
|
|
22
|
-
attempts: loop.attempts.length,
|
|
23
|
-
costUsd: loop.cost.actualUsd,
|
|
24
|
-
avoidedUsd: loop.cost.avoidedUsd ?? 0,
|
|
25
|
-
pressure: costState.pressure,
|
|
26
|
-
shouldStop: costState.shouldStop,
|
|
27
|
-
verificationCount: extractVerificationResults(loop).length
|
|
28
|
-
};
|
|
29
|
-
}
|
|
30
|
-
export async function listRunSummaries(input = {}) {
|
|
31
|
-
const inspection = await loadLoopRecordsForInspect({ runsDir: input.runsDir });
|
|
32
|
-
const summaries = inspection.loops.map((loop) => summarizeRun(loop));
|
|
33
|
-
summaries.sort((left, right) => {
|
|
34
|
-
const leftTime = Date.parse(left.updatedAt ?? left.createdAt);
|
|
35
|
-
const rightTime = Date.parse(right.updatedAt ?? right.createdAt);
|
|
36
|
-
return (Number.isFinite(rightTime) ? rightTime : 0) - (Number.isFinite(leftTime) ? leftTime : 0);
|
|
37
|
-
});
|
|
38
|
-
return summaries.slice(0, input.limit ?? 20);
|
|
39
|
-
}
|
|
40
|
-
export async function loadSelectedRun(input) {
|
|
41
|
-
const selectors = [input.loopId ? "loopId" : null, input.latest ? "latest" : null].filter(Boolean);
|
|
42
|
-
if (selectors.length !== 1) {
|
|
43
|
-
throw new Error("Provide exactly one of loopId or latest.");
|
|
44
|
-
}
|
|
45
|
-
const source = await loadLoopRecordForStatus({
|
|
46
|
-
...(input.loopId ? { loopId: input.loopId } : {}),
|
|
47
|
-
...(input.latest ? { latest: true } : {}),
|
|
48
|
-
...(input.runsDir ? { runsDir: input.runsDir } : {})
|
|
49
|
-
});
|
|
50
|
-
return source.loop;
|
|
51
|
-
}
|
|
52
|
-
export function extractVerificationResults(loop) {
|
|
53
|
-
const events = "events" in loop && Array.isArray(loop.events) ? loop.events : [];
|
|
54
|
-
return events
|
|
55
|
-
.filter((event) => event?.type === "verification.completed")
|
|
56
|
-
.map((event) => {
|
|
57
|
-
const payload = isRecord(event.payload) ? event.payload : {};
|
|
58
|
-
return {
|
|
59
|
-
...(typeof event.eventId === "string" ? { eventId: event.eventId } : {}),
|
|
60
|
-
...(typeof event.timestamp === "string" ? { timestamp: event.timestamp } : {}),
|
|
61
|
-
...(typeof event.lifecycleState === "string" ? { lifecycleState: event.lifecycleState } : {}),
|
|
62
|
-
...(typeof payload.passed === "boolean" ? { passed: payload.passed } : {}),
|
|
63
|
-
...(typeof payload.summary === "string" ? { summary: payload.summary } : {})
|
|
64
|
-
};
|
|
65
|
-
});
|
|
66
|
-
}
|
|
67
|
-
export function getAttempt(loop, attemptIndex) {
|
|
68
|
-
const attempt = loop.attempts.find((candidate) => candidate.index === attemptIndex);
|
|
69
|
-
if (!attempt) {
|
|
70
|
-
throw new Error("Attempt not found.");
|
|
71
|
-
}
|
|
72
|
-
return attempt;
|
|
73
|
-
}
|
|
74
|
-
export function buildRunDossier(loop) {
|
|
75
|
-
return {
|
|
76
|
-
loopId: loop.loopId,
|
|
77
|
-
generatedAt: new Date().toISOString(),
|
|
78
|
-
sections: [
|
|
79
|
-
{
|
|
80
|
-
kind: "summary",
|
|
81
|
-
content: summarizeRun(loop)
|
|
82
|
-
},
|
|
83
|
-
{
|
|
84
|
-
kind: "task",
|
|
85
|
-
content: loop.task
|
|
86
|
-
},
|
|
87
|
-
{
|
|
88
|
-
kind: "budget",
|
|
89
|
-
content: {
|
|
90
|
-
budget: loop.budget,
|
|
91
|
-
cost: loop.cost
|
|
92
|
-
}
|
|
93
|
-
},
|
|
94
|
-
{
|
|
95
|
-
kind: "attempts",
|
|
96
|
-
content: loop.attempts
|
|
97
|
-
},
|
|
98
|
-
{
|
|
99
|
-
kind: "verification",
|
|
100
|
-
content: extractVerificationResults(loop)
|
|
101
|
-
}
|
|
102
|
-
]
|
|
103
|
-
};
|
|
104
|
-
}
|
|
105
|
-
function isRecord(value) {
|
|
106
|
-
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
107
|
-
}
|
|
108
|
-
//# sourceMappingURL=cockpit-support.js.map
|