@martinloop/mcp 0.2.0 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +131 -158
- package/dist/discovery-metadata.d.ts +16 -0
- package/dist/discovery-metadata.js +62 -0
- package/dist/discovery-support.d.ts +62 -0
- package/dist/discovery-support.js +224 -0
- package/dist/package-version.d.ts +1 -0
- package/dist/package-version.js +3 -0
- package/dist/prompts.d.ts +13 -3
- package/dist/prompts.js +445 -74
- package/dist/resources.d.ts +27 -5
- package/dist/resources.js +557 -71
- package/dist/server-validation.d.ts +2 -3
- package/dist/server-validation.js +262 -122
- package/dist/server.d.ts +76 -7
- package/dist/server.js +1126 -400
- package/dist/tools/doctor.js +14 -6
- package/dist/tools/get-attempt.d.ts +13 -6
- package/dist/tools/get-attempt.js +14 -5
- package/dist/tools/get-run.d.ts +19 -12
- package/dist/tools/get-run.js +20 -11
- package/dist/tools/get-status.d.ts +11 -0
- package/dist/tools/get-status.js +12 -2
- package/dist/tools/get-verification-results.d.ts +10 -7
- package/dist/tools/get-verification-results.js +11 -6
- package/dist/tools/inspect-loop.d.ts +9 -0
- package/dist/tools/inspect-loop.js +11 -2
- package/dist/tools/list-runs.d.ts +25 -5
- package/dist/tools/list-runs.js +21 -4
- package/dist/tools/preflight.js +7 -2
- package/dist/tools/run-dossier.d.ts +37 -4
- package/dist/tools/run-dossier.js +40 -5
- package/dist/tools/run-loop.d.ts +19 -0
- package/dist/tools/run-loop.js +41 -3
- package/dist/tools/run-store.d.ts +57 -3
- package/dist/tools/run-store.js +404 -53
- package/dist/tools/tool-errors.d.ts +37 -0
- package/dist/tools/tool-errors.js +170 -0
- package/dist/tools/tool-response.d.ts +16 -0
- package/dist/tools/tool-response.js +34 -0
- package/dist/tools/tool-support.d.ts +92 -2
- package/dist/tools/tool-support.js +358 -63
- package/dist/tools/triage-runs.d.ts +33 -0
- package/dist/tools/triage-runs.js +138 -0
- package/dist/vendor/adapters/claude-cli.js +0 -1
- package/dist/vendor/adapters/cli-bridge.js +0 -1
- package/dist/vendor/adapters/direct-provider.js +0 -1
- package/dist/vendor/adapters/index.js +0 -1
- package/dist/vendor/adapters/runtime-support.js +0 -1
- package/dist/vendor/adapters/stub-agent-cli.js +0 -1
- package/dist/vendor/adapters/stub-direct-provider.js +0 -1
- package/dist/vendor/adapters/verifier-only.js +0 -1
- package/dist/vendor/contracts/governance.js +0 -1
- package/dist/vendor/contracts/index.d.ts +2 -0
- package/dist/vendor/contracts/index.js +1 -1
- package/dist/vendor/contracts/operator.d.ts +19 -0
- package/dist/vendor/contracts/operator.js +11 -0
- package/dist/vendor/core/compiler.js +0 -1
- package/dist/vendor/core/context-integrity.js +0 -1
- package/dist/vendor/core/grounding.js +0 -1
- package/dist/vendor/core/index.js +1 -2
- package/dist/vendor/core/leash.js +19 -12
- package/dist/vendor/core/persistence/compiler.js +0 -1
- package/dist/vendor/core/persistence/index.js +0 -1
- package/dist/vendor/core/persistence/ledger.js +0 -1
- package/dist/vendor/core/persistence/runs-reader.js +0 -1
- package/dist/vendor/core/persistence/store.js +0 -1
- package/dist/vendor/core/policy.js +0 -1
- package/dist/vendor/core/red-blue/red-phase.d.ts +64 -0
- package/dist/vendor/core/red-blue/red-phase.js +135 -0
- package/dist/vendor/core/red-blue/risk-tiers.d.ts +22 -0
- package/dist/vendor/core/red-blue/risk-tiers.js +32 -0
- package/dist/vendor/core/rollback.js +2 -3
- package/package.json +10 -5
- package/server.json +2 -2
- package/dist/tools/cockpit-support.d.ts +0 -69
- package/dist/tools/cockpit-support.js +0 -108
|
@@ -108,6 +108,8 @@ export interface LoopRecordDraft {
|
|
|
108
108
|
createdAt?: string;
|
|
109
109
|
updatedAt?: string;
|
|
110
110
|
}
|
|
111
|
+
export type { MartinErrorCategory, MartinOutputMode, MartinRunListFilters, MartinRunSelector } from "./operator.js";
|
|
112
|
+
export { MARTIN_ERROR_CATEGORIES } from "./operator.js";
|
|
111
113
|
export interface LoopEventDraft {
|
|
112
114
|
type: LoopEventType;
|
|
113
115
|
lifecycleState?: LoopLifecycleState;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
export { MARTIN_ERROR_CATEGORIES } from "./operator.js";
|
|
1
2
|
export const DEFAULT_BUDGET = {
|
|
2
3
|
maxUsd: 25,
|
|
3
4
|
softLimitUsd: 15,
|
|
@@ -200,4 +201,3 @@ function hasText(value) {
|
|
|
200
201
|
return typeof value === "string" && value.trim().length > 0;
|
|
201
202
|
}
|
|
202
203
|
export { createGovernanceSnapshot } from "./governance.js";
|
|
203
|
-
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export declare const MARTIN_ERROR_CATEGORIES: readonly ["invalid_input", "environment", "auth", "not_found", "store_unreadable", "verification_failed", "policy_blocked", "budget_exit", "transient"];
|
|
2
|
+
export type MartinErrorCategory = (typeof MARTIN_ERROR_CATEGORIES)[number];
|
|
3
|
+
export type MartinOutputMode = "human" | "json" | "quiet";
|
|
4
|
+
export interface MartinRunSelector {
|
|
5
|
+
runsDir?: string;
|
|
6
|
+
file?: string;
|
|
7
|
+
loopId?: string;
|
|
8
|
+
latest?: boolean;
|
|
9
|
+
attemptIndex?: number;
|
|
10
|
+
}
|
|
11
|
+
export interface MartinRunListFilters {
|
|
12
|
+
runsDir?: string;
|
|
13
|
+
limit?: number;
|
|
14
|
+
status?: string;
|
|
15
|
+
lifecycleState?: string;
|
|
16
|
+
adapterId?: string;
|
|
17
|
+
model?: string;
|
|
18
|
+
updatedAfter?: string;
|
|
19
|
+
}
|
|
@@ -1099,7 +1099,7 @@ function resolveChangedFiles(result, repoRoot) {
|
|
|
1099
1099
|
return [];
|
|
1100
1100
|
}
|
|
1101
1101
|
try {
|
|
1102
|
-
const diff = spawnSync("git", ["diff", "--name-only", "HEAD"], {
|
|
1102
|
+
const diff = spawnSync("git", ["diff", "--name-only", "HEAD", "--", "."], {
|
|
1103
1103
|
cwd: repoRoot,
|
|
1104
1104
|
encoding: "utf8"
|
|
1105
1105
|
});
|
|
@@ -1254,4 +1254,3 @@ function applyPatchFailureToLoop(loop, input) {
|
|
|
1254
1254
|
: attempt)
|
|
1255
1255
|
};
|
|
1256
1256
|
}
|
|
1257
|
-
//# sourceMappingURL=index.js.map
|
|
@@ -9,7 +9,7 @@ const BLOCKED_PATTERNS = [
|
|
|
9
9
|
/(^|\s)mkfs(\.|\s|$)/u,
|
|
10
10
|
/(^|\s)dd\s+if=/u,
|
|
11
11
|
/(shutdown|reboot)(\s|$)/iu,
|
|
12
|
-
/:\(\)\{
|
|
12
|
+
/:\(\)\s*\{\s*:\|:&\s*\}\s*;\s*:/u,
|
|
13
13
|
/chmod\s+-R\s+777\s+\//iu,
|
|
14
14
|
/(kubectl|docker)\s+.*\b(delete|prune|rm)\b/iu,
|
|
15
15
|
/ssh\s+/iu,
|
|
@@ -314,17 +314,25 @@ function normalizeChangedFile(file, repoRoot) {
|
|
|
314
314
|
};
|
|
315
315
|
}
|
|
316
316
|
function matchesPathPattern(file, pattern) {
|
|
317
|
-
const normalizedFile = file
|
|
318
|
-
const normalizedPattern = pattern
|
|
319
|
-
if (normalizedPattern.includes("
|
|
320
|
-
|
|
321
|
-
|
|
317
|
+
const normalizedFile = normalizePathForMatching(file);
|
|
318
|
+
const normalizedPattern = normalizePathForMatching(pattern);
|
|
319
|
+
if (!normalizedPattern.includes("*")) {
|
|
320
|
+
return (normalizedFile === normalizedPattern ||
|
|
321
|
+
normalizedFile.startsWith(`${normalizedPattern.replace(/\/$/u, "")}/`));
|
|
322
322
|
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
323
|
+
const regexStr = normalizedPattern
|
|
324
|
+
.replace(/[.+^${}()|[\]\\]/gu, "\\$&")
|
|
325
|
+
.replace(/\*\*/gu, "__DOUBLESTAR__")
|
|
326
|
+
.replace(/\*/gu, "[^/]*")
|
|
327
|
+
.replace(/__DOUBLESTAR__/gu, ".*");
|
|
328
|
+
return new RegExp(`^${regexStr}$`, "u").test(normalizedFile);
|
|
329
|
+
}
|
|
330
|
+
function normalizePathForMatching(value) {
|
|
331
|
+
return value
|
|
332
|
+
.replace(/\\/gu, "/")
|
|
333
|
+
.replace(/^\.\//u, "")
|
|
334
|
+
.replace(/\/{2,}/gu, "/")
|
|
335
|
+
.replace(/\/$/u, "");
|
|
328
336
|
}
|
|
329
337
|
function buildNetworkViolation(command, profile) {
|
|
330
338
|
const targets = extractNetworkTargets(command);
|
|
@@ -405,4 +413,3 @@ function isConfigChangeFile(file) {
|
|
|
405
413
|
normalized.startsWith("infrastructure/") ||
|
|
406
414
|
normalized.startsWith("ops/"));
|
|
407
415
|
}
|
|
408
|
-
//# sourceMappingURL=leash.js.map
|
|
@@ -2,4 +2,3 @@ export { makeLedgerEvent } from "./ledger.js";
|
|
|
2
2
|
export { artifactDir, createFileRunStore, resolveRunsRoot, runDir } from "./store.js";
|
|
3
3
|
export { readAllLoopRecords, readLatestLoopRecord, readLatestLoopRecordFromFile, readLoopRecordsFromFile } from "./runs-reader.js";
|
|
4
4
|
export { compileAndPersistContext } from "./compiler.js";
|
|
5
|
-
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { type RiskTier } from "./risk-tiers.js";
|
|
2
|
+
export interface RedFinding {
|
|
3
|
+
trapId: string;
|
|
4
|
+
severity: "warn" | "block";
|
|
5
|
+
description: string;
|
|
6
|
+
resolvedAt?: string;
|
|
7
|
+
}
|
|
8
|
+
export interface RedFindings {
|
|
9
|
+
riskTier: RiskTier;
|
|
10
|
+
probesRun: number;
|
|
11
|
+
findingsCount: number;
|
|
12
|
+
findings: RedFinding[];
|
|
13
|
+
modelCallMade: boolean;
|
|
14
|
+
modelUsed?: string;
|
|
15
|
+
budgetUsedUsd: number;
|
|
16
|
+
}
|
|
17
|
+
/** Minimal interface for the Anthropic model client (mockable in tests). */
|
|
18
|
+
export interface MockModelClient {
|
|
19
|
+
complete(prompt: string): Promise<{
|
|
20
|
+
findings: RedFinding[];
|
|
21
|
+
tokensUsed: number;
|
|
22
|
+
costUsd: number;
|
|
23
|
+
}>;
|
|
24
|
+
}
|
|
25
|
+
export interface RunRedPhaseOptions {
|
|
26
|
+
/** Inject a mock or real Anthropic client. Required for release_critical tier. */
|
|
27
|
+
modelClient?: MockModelClient;
|
|
28
|
+
/** Callback fired with each ledger event produced by the phase. */
|
|
29
|
+
onLedgerEvent?: (event: RedLedgerEvent) => void;
|
|
30
|
+
}
|
|
31
|
+
export interface RedLedgerEvent {
|
|
32
|
+
type: "red_phase_findings";
|
|
33
|
+
runId?: string;
|
|
34
|
+
riskTier: RiskTier;
|
|
35
|
+
probesRun: number;
|
|
36
|
+
findingsCount: number;
|
|
37
|
+
modelCallMade: boolean;
|
|
38
|
+
timestamp: string;
|
|
39
|
+
}
|
|
40
|
+
export interface PatchInput {
|
|
41
|
+
patchId: string;
|
|
42
|
+
diff: string;
|
|
43
|
+
changedFiles: string[];
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Runs the Red phase for a given patch and risk tier.
|
|
47
|
+
*
|
|
48
|
+
* - baseline: programmatic probes only, no model call
|
|
49
|
+
* - high_risk: paranoid programmatic scan, no model call
|
|
50
|
+
* - release_critical: paranoid scan + one Haiku model call
|
|
51
|
+
*/
|
|
52
|
+
export declare function runRedPhase(patch: PatchInput, tier: RiskTier, blueBudgetUsd: number, options?: RunRedPhaseOptions): Promise<RedFindings>;
|
|
53
|
+
/**
|
|
54
|
+
* Returns true only if the findings contain zero block-severity entries.
|
|
55
|
+
* A single block finding rejects the patch regardless of warn count.
|
|
56
|
+
*/
|
|
57
|
+
export declare function shouldAcceptPatch(findings: RedFindings): boolean;
|
|
58
|
+
/**
|
|
59
|
+
* Convenience builder for RedFindings — useful in tests and policy engine.
|
|
60
|
+
*/
|
|
61
|
+
export declare function buildRedFindings(input: Partial<RedFindings> & {
|
|
62
|
+
riskTier: RiskTier;
|
|
63
|
+
findings: RedFinding[];
|
|
64
|
+
}): RedFindings;
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import { PROBE_COUNTS, RED_PHASE_MODEL, resolveRedBudgetPolicy } from "./risk-tiers.js";
|
|
2
|
+
// ─── Programmatic probes ──────────────────────────────────────────────────────
|
|
3
|
+
function runProgrammaticProbes(patch, paranoid) {
|
|
4
|
+
const findings = [];
|
|
5
|
+
// Probe 1: assertion deletion
|
|
6
|
+
if (/^\-.*(?:expect\(.*\)\.to|assert\.(?:equal|is|ok|strictEqual)|t\.(?:is|truthy|falsy|deepEqual))/m.test(patch.diff)) {
|
|
7
|
+
findings.push({
|
|
8
|
+
trapId: "T01",
|
|
9
|
+
severity: "warn",
|
|
10
|
+
description: "Patch removes test assertions — possible weak-test false green."
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
// Probe 2: grounding evasion pragma
|
|
14
|
+
if (/@martin-ignore|@ts-nocheck|eslint-disable/i.test(patch.diff)) {
|
|
15
|
+
findings.push({
|
|
16
|
+
trapId: "T11",
|
|
17
|
+
severity: "block",
|
|
18
|
+
description: "Grounding evasion pragma detected in patch."
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
// Probe 3: context poisoning
|
|
22
|
+
if (/\.martin\/|\.claude\//.test(patch.diff)) {
|
|
23
|
+
findings.push({
|
|
24
|
+
trapId: "T07",
|
|
25
|
+
severity: "block",
|
|
26
|
+
description: "Patch writes to agent context directory (.martin/ or .claude/)."
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
// Probe 4: budget self-report
|
|
30
|
+
if (/budgetSource.*model_reported|model_reported.*budget/i.test(patch.diff)) {
|
|
31
|
+
findings.push({
|
|
32
|
+
trapId: "T10",
|
|
33
|
+
severity: "block",
|
|
34
|
+
description: "Patch introduces model-reported budget sourcing."
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
if (paranoid) {
|
|
38
|
+
// Probe 5: scope creep — manifest changes
|
|
39
|
+
if (/^\+.*"[^"]+"\s*:\s*"[^"]+".*$/m.test(patch.diff) &&
|
|
40
|
+
/package\.json|Cargo\.toml|go\.mod/i.test(patch.changedFiles.join(","))) {
|
|
41
|
+
findings.push({
|
|
42
|
+
trapId: "T03",
|
|
43
|
+
severity: "warn",
|
|
44
|
+
description: "Paranoid scan: substantive manifest change detected."
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
// Probe 6: silent revert — removal of recently added symbols
|
|
48
|
+
const removedExportPattern = /^\-.*export\s+(function|const|class)\s+\w+/m;
|
|
49
|
+
if (removedExportPattern.test(patch.diff)) {
|
|
50
|
+
findings.push({
|
|
51
|
+
trapId: "T02",
|
|
52
|
+
severity: "warn",
|
|
53
|
+
description: "Paranoid scan: exported symbol removed — potential silent revert."
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return findings;
|
|
58
|
+
}
|
|
59
|
+
// ─── Red phase runner ─────────────────────────────────────────────────────────
|
|
60
|
+
/**
|
|
61
|
+
* Runs the Red phase for a given patch and risk tier.
|
|
62
|
+
*
|
|
63
|
+
* - baseline: programmatic probes only, no model call
|
|
64
|
+
* - high_risk: paranoid programmatic scan, no model call
|
|
65
|
+
* - release_critical: paranoid scan + one Haiku model call
|
|
66
|
+
*/
|
|
67
|
+
export async function runRedPhase(patch, tier, blueBudgetUsd, options = {}) {
|
|
68
|
+
const policy = resolveRedBudgetPolicy(tier, blueBudgetUsd);
|
|
69
|
+
const paranoid = tier !== "baseline";
|
|
70
|
+
let findings = runProgrammaticProbes(patch, paranoid);
|
|
71
|
+
let modelCallMade = false;
|
|
72
|
+
let modelUsed;
|
|
73
|
+
let budgetUsedUsd = 0;
|
|
74
|
+
const probesRun = PROBE_COUNTS[tier];
|
|
75
|
+
if (policy.modelCallAllowed && options.modelClient) {
|
|
76
|
+
const prompt = buildRedPhasePrompt(patch, findings);
|
|
77
|
+
const result = await options.modelClient.complete(prompt);
|
|
78
|
+
findings = [...findings, ...result.findings];
|
|
79
|
+
modelCallMade = true;
|
|
80
|
+
modelUsed = RED_PHASE_MODEL;
|
|
81
|
+
budgetUsedUsd += result.costUsd;
|
|
82
|
+
}
|
|
83
|
+
const result = {
|
|
84
|
+
riskTier: tier,
|
|
85
|
+
probesRun,
|
|
86
|
+
findingsCount: findings.length,
|
|
87
|
+
findings,
|
|
88
|
+
modelCallMade,
|
|
89
|
+
...(modelUsed !== undefined ? { modelUsed } : {}),
|
|
90
|
+
budgetUsedUsd
|
|
91
|
+
};
|
|
92
|
+
options.onLedgerEvent?.({
|
|
93
|
+
type: "red_phase_findings",
|
|
94
|
+
riskTier: tier,
|
|
95
|
+
probesRun,
|
|
96
|
+
findingsCount: findings.length,
|
|
97
|
+
modelCallMade,
|
|
98
|
+
timestamp: new Date().toISOString()
|
|
99
|
+
});
|
|
100
|
+
return result;
|
|
101
|
+
}
|
|
102
|
+
// ─── Policy helpers ───────────────────────────────────────────────────────────
|
|
103
|
+
/**
|
|
104
|
+
* Returns true only if the findings contain zero block-severity entries.
|
|
105
|
+
* A single block finding rejects the patch regardless of warn count.
|
|
106
|
+
*/
|
|
107
|
+
export function shouldAcceptPatch(findings) {
|
|
108
|
+
return !findings.findings.some((f) => f.severity === "block");
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Convenience builder for RedFindings — useful in tests and policy engine.
|
|
112
|
+
*/
|
|
113
|
+
export function buildRedFindings(input) {
|
|
114
|
+
return {
|
|
115
|
+
riskTier: input.riskTier,
|
|
116
|
+
probesRun: input.probesRun ?? PROBE_COUNTS[input.riskTier],
|
|
117
|
+
findingsCount: input.findings.length,
|
|
118
|
+
findings: input.findings,
|
|
119
|
+
modelCallMade: input.modelCallMade ?? false,
|
|
120
|
+
...(input.modelUsed !== undefined ? { modelUsed: input.modelUsed } : {}),
|
|
121
|
+
budgetUsedUsd: input.budgetUsedUsd ?? 0
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
// ─── Internal helpers ─────────────────────────────────────────────────────────
|
|
125
|
+
function buildRedPhasePrompt(patch, existingFindings) {
|
|
126
|
+
return [
|
|
127
|
+
"You are a security-focused code reviewer running adversarial analysis.",
|
|
128
|
+
"Analyze this patch for adversarial patterns not caught by programmatic probes.",
|
|
129
|
+
"Return JSON array of findings with: trapId, severity (warn|block), description.",
|
|
130
|
+
`Already detected: ${JSON.stringify(existingFindings.map(f => f.trapId))}`,
|
|
131
|
+
`Changed files: ${patch.changedFiles.join(", ")}`,
|
|
132
|
+
"Diff (truncated to 2000 chars):",
|
|
133
|
+
patch.diff.slice(0, 2000)
|
|
134
|
+
].join("\n");
|
|
135
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export type RiskTier = "baseline" | "high_risk" | "release_critical";
|
|
2
|
+
export interface RedBudgetPolicy {
|
|
3
|
+
riskTier: RiskTier;
|
|
4
|
+
blueBudgetUsd: number;
|
|
5
|
+
/** Cap on Red phase spend: 30% / 100% / 150% of Blue */
|
|
6
|
+
redBudgetCapUsd: number;
|
|
7
|
+
/** Only release_critical permits a Haiku model call */
|
|
8
|
+
modelCallAllowed: boolean;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Returns the Red phase budget policy for a given risk tier and Blue budget.
|
|
12
|
+
*/
|
|
13
|
+
export declare function resolveRedBudgetPolicy(tier: RiskTier, blueBudgetUsd: number): RedBudgetPolicy;
|
|
14
|
+
/**
|
|
15
|
+
* Probe counts per tier.
|
|
16
|
+
* baseline = standard 6-probe sweep
|
|
17
|
+
* high_risk = paranoid 12-probe sweep
|
|
18
|
+
* release_critical = paranoid 12-probe sweep + model
|
|
19
|
+
*/
|
|
20
|
+
export declare const PROBE_COUNTS: Record<RiskTier, number>;
|
|
21
|
+
/** The only model ever permitted in the Red phase. */
|
|
22
|
+
export declare const RED_PHASE_MODEL: "claude-haiku-4-5-20251001";
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
// ─── Risk Tier Definitions ────────────────────────────────────────────────────
|
|
2
|
+
// Governs how aggressively Red phase probes a patch and whether a model call
|
|
3
|
+
// is permitted. Budget caps are expressed as fractions of the Blue phase budget.
|
|
4
|
+
const BUDGET_MULTIPLIERS = {
|
|
5
|
+
baseline: 0.30,
|
|
6
|
+
high_risk: 1.00,
|
|
7
|
+
release_critical: 1.50
|
|
8
|
+
};
|
|
9
|
+
/**
|
|
10
|
+
* Returns the Red phase budget policy for a given risk tier and Blue budget.
|
|
11
|
+
*/
|
|
12
|
+
export function resolveRedBudgetPolicy(tier, blueBudgetUsd) {
|
|
13
|
+
return {
|
|
14
|
+
riskTier: tier,
|
|
15
|
+
blueBudgetUsd,
|
|
16
|
+
redBudgetCapUsd: blueBudgetUsd * BUDGET_MULTIPLIERS[tier],
|
|
17
|
+
modelCallAllowed: tier === "release_critical"
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Probe counts per tier.
|
|
22
|
+
* baseline = standard 6-probe sweep
|
|
23
|
+
* high_risk = paranoid 12-probe sweep
|
|
24
|
+
* release_critical = paranoid 12-probe sweep + model
|
|
25
|
+
*/
|
|
26
|
+
export const PROBE_COUNTS = {
|
|
27
|
+
baseline: 6,
|
|
28
|
+
high_risk: 12,
|
|
29
|
+
release_critical: 12
|
|
30
|
+
};
|
|
31
|
+
/** The only model ever permitted in the Red phase. */
|
|
32
|
+
export const RED_PHASE_MODEL = "claude-haiku-4-5-20251001";
|
|
@@ -114,8 +114,8 @@ export async function restoreRollbackBoundary(input) {
|
|
|
114
114
|
}
|
|
115
115
|
function readRepoState(repoRoot) {
|
|
116
116
|
return {
|
|
117
|
-
trackedDirtyFiles: readGitLines(repoRoot, ["diff", "--name-only", "HEAD"]),
|
|
118
|
-
untrackedFiles: readGitLines(repoRoot, ["ls-files", "--others", "--exclude-standard"])
|
|
117
|
+
trackedDirtyFiles: readGitLines(repoRoot, ["diff", "--name-only", "HEAD", "--", "."]),
|
|
118
|
+
untrackedFiles: readGitLines(repoRoot, ["ls-files", "--others", "--exclude-standard", "--", "."])
|
|
119
119
|
};
|
|
120
120
|
}
|
|
121
121
|
function readGitLines(repoRoot, args) {
|
|
@@ -216,4 +216,3 @@ function emptyRepoState() {
|
|
|
216
216
|
function toErrorMessage(error) {
|
|
217
217
|
return error instanceof Error ? error.message : String(error);
|
|
218
218
|
}
|
|
219
|
-
//# sourceMappingURL=rollback.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@martinloop/mcp",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.5",
|
|
4
4
|
"mcpName": "io.github.Keesan12/martin-loop",
|
|
5
5
|
"private": false,
|
|
6
6
|
"type": "module",
|
|
@@ -24,9 +24,10 @@
|
|
|
24
24
|
"claude",
|
|
25
25
|
"codex",
|
|
26
26
|
"martin_doctor",
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
27
|
+
"martin_triage_runs",
|
|
28
|
+
"martin_run_dossier",
|
|
29
|
+
"mcp-resources",
|
|
30
|
+
"mcp-prompts"
|
|
30
31
|
],
|
|
31
32
|
"bin": {
|
|
32
33
|
"mcp": "./dist/server.js",
|
|
@@ -56,7 +57,8 @@
|
|
|
56
57
|
"verify:release": "node --test ../../scripts/tests/publish-mcp-workflow.test.mjs ../../scripts/tests/mcp-publish-reliability.test.mjs ../../scripts/tests/mcp-release-docs.test.mjs",
|
|
57
58
|
"test": "vitest run",
|
|
58
59
|
"lint": "tsc -p tsconfig.json --noEmit",
|
|
59
|
-
"start": "node dist/server.js"
|
|
60
|
+
"start": "node dist/server.js",
|
|
61
|
+
"inspect:live": "node ./scripts/inspect-live.mjs"
|
|
60
62
|
},
|
|
61
63
|
"dependencies": {
|
|
62
64
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
@@ -66,5 +68,8 @@
|
|
|
66
68
|
"@opentelemetry/resources": "^2.6.1",
|
|
67
69
|
"@opentelemetry/sdk-logs": "^0.214.0",
|
|
68
70
|
"ts-morph": "^21.0.0"
|
|
71
|
+
},
|
|
72
|
+
"devDependencies": {
|
|
73
|
+
"@martin/contracts": "workspace:*"
|
|
69
74
|
}
|
|
70
75
|
}
|
package/server.json
CHANGED
|
@@ -7,12 +7,12 @@
|
|
|
7
7
|
"url": "https://github.com/Keesan12/martin-loop",
|
|
8
8
|
"source": "github"
|
|
9
9
|
},
|
|
10
|
-
"version": "0.2.
|
|
10
|
+
"version": "0.2.5",
|
|
11
11
|
"packages": [
|
|
12
12
|
{
|
|
13
13
|
"registryType": "npm",
|
|
14
14
|
"identifier": "@martinloop/mcp",
|
|
15
|
-
"version": "0.2.
|
|
15
|
+
"version": "0.2.5",
|
|
16
16
|
"transport": {
|
|
17
17
|
"type": "stdio"
|
|
18
18
|
}
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import { type LoopRunRecord } from "../vendor/core/index.js";
|
|
2
|
-
export interface RunSelectorInput {
|
|
3
|
-
loopId?: string;
|
|
4
|
-
runsDir?: string;
|
|
5
|
-
latest?: boolean;
|
|
6
|
-
}
|
|
7
|
-
export interface RunSummary {
|
|
8
|
-
loopId: string;
|
|
9
|
-
title: string;
|
|
10
|
-
objective: string;
|
|
11
|
-
status: string;
|
|
12
|
-
lifecycleState: string;
|
|
13
|
-
createdAt: string;
|
|
14
|
-
updatedAt: string;
|
|
15
|
-
attempts: number;
|
|
16
|
-
costUsd: number;
|
|
17
|
-
avoidedUsd: number;
|
|
18
|
-
pressure: string;
|
|
19
|
-
shouldStop: boolean;
|
|
20
|
-
verificationCount: number;
|
|
21
|
-
}
|
|
22
|
-
export interface VerificationResultSummary {
|
|
23
|
-
eventId?: string;
|
|
24
|
-
timestamp?: string;
|
|
25
|
-
lifecycleState?: string;
|
|
26
|
-
passed?: boolean;
|
|
27
|
-
summary?: string;
|
|
28
|
-
}
|
|
29
|
-
export declare function summarizeRun(loop: LoopRunRecord): RunSummary;
|
|
30
|
-
export declare function listRunSummaries(input?: {
|
|
31
|
-
runsDir?: string;
|
|
32
|
-
limit?: number;
|
|
33
|
-
}): Promise<RunSummary[]>;
|
|
34
|
-
export declare function loadSelectedRun(input: RunSelectorInput): Promise<LoopRunRecord>;
|
|
35
|
-
export declare function extractVerificationResults(loop: LoopRunRecord): VerificationResultSummary[];
|
|
36
|
-
export declare function getAttempt(loop: LoopRunRecord, attemptIndex: number): import("../vendor/core/index.js").LoopAttemptRecord;
|
|
37
|
-
export declare function buildRunDossier(loop: LoopRunRecord): {
|
|
38
|
-
loopId: string;
|
|
39
|
-
generatedAt: string;
|
|
40
|
-
sections: ({
|
|
41
|
-
kind: string;
|
|
42
|
-
content: {
|
|
43
|
-
title: string;
|
|
44
|
-
objective: string;
|
|
45
|
-
};
|
|
46
|
-
} | {
|
|
47
|
-
kind: string;
|
|
48
|
-
content: {
|
|
49
|
-
budget: {
|
|
50
|
-
maxUsd: number;
|
|
51
|
-
softLimitUsd: number;
|
|
52
|
-
maxIterations: number;
|
|
53
|
-
maxTokens: number;
|
|
54
|
-
};
|
|
55
|
-
cost: {
|
|
56
|
-
actualUsd: number;
|
|
57
|
-
tokensIn: number;
|
|
58
|
-
tokensOut: number;
|
|
59
|
-
avoidedUsd?: number;
|
|
60
|
-
};
|
|
61
|
-
};
|
|
62
|
-
} | {
|
|
63
|
-
kind: string;
|
|
64
|
-
content: import("../vendor/core/index.js").LoopAttemptRecord[];
|
|
65
|
-
} | {
|
|
66
|
-
kind: string;
|
|
67
|
-
content: VerificationResultSummary[];
|
|
68
|
-
})[];
|
|
69
|
-
};
|