@martinloop/mcp 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +59 -0
  2. package/dist/server.d.ts +20 -0
  3. package/dist/server.js +153 -0
  4. package/dist/tools/get-status.d.ts +18 -0
  5. package/dist/tools/get-status.js +23 -0
  6. package/dist/tools/inspect-loop.d.ts +11 -0
  7. package/dist/tools/inspect-loop.js +15 -0
  8. package/dist/tools/run-loop.d.ts +22 -0
  9. package/dist/tools/run-loop.js +50 -0
  10. package/dist/vendor/adapters/claude-cli.d.ts +89 -0
  11. package/dist/vendor/adapters/claude-cli.js +555 -0
  12. package/dist/vendor/adapters/cli-bridge.d.ts +28 -0
  13. package/dist/vendor/adapters/cli-bridge.js +127 -0
  14. package/dist/vendor/adapters/direct-provider.d.ts +10 -0
  15. package/dist/vendor/adapters/direct-provider.js +41 -0
  16. package/dist/vendor/adapters/index.d.ts +5 -0
  17. package/dist/vendor/adapters/index.js +5 -0
  18. package/dist/vendor/adapters/runtime-support.d.ts +14 -0
  19. package/dist/vendor/adapters/runtime-support.js +52 -0
  20. package/dist/vendor/adapters/stub-agent-cli.d.ts +8 -0
  21. package/dist/vendor/adapters/stub-agent-cli.js +41 -0
  22. package/dist/vendor/adapters/stub-direct-provider.d.ts +8 -0
  23. package/dist/vendor/adapters/stub-direct-provider.js +10 -0
  24. package/dist/vendor/contracts/governance.d.ts +21 -0
  25. package/dist/vendor/contracts/governance.js +12 -0
  26. package/dist/vendor/contracts/index.d.ts +330 -0
  27. package/dist/vendor/contracts/index.js +203 -0
  28. package/dist/vendor/core/compiler.d.ts +50 -0
  29. package/dist/vendor/core/compiler.js +47 -0
  30. package/dist/vendor/core/grounding.d.ts +37 -0
  31. package/dist/vendor/core/grounding.js +270 -0
  32. package/dist/vendor/core/index.d.ts +145 -0
  33. package/dist/vendor/core/index.js +1099 -0
  34. package/dist/vendor/core/leash.d.ts +48 -0
  35. package/dist/vendor/core/leash.js +408 -0
  36. package/dist/vendor/core/persistence/compiler.d.ts +18 -0
  37. package/dist/vendor/core/persistence/compiler.js +35 -0
  38. package/dist/vendor/core/persistence/index.d.ts +6 -0
  39. package/dist/vendor/core/persistence/index.js +4 -0
  40. package/dist/vendor/core/persistence/ledger.d.ts +23 -0
  41. package/dist/vendor/core/persistence/ledger.js +10 -0
  42. package/dist/vendor/core/persistence/store.d.ts +77 -0
  43. package/dist/vendor/core/persistence/store.js +84 -0
  44. package/dist/vendor/core/policy.d.ts +126 -0
  45. package/dist/vendor/core/policy.js +625 -0
  46. package/dist/vendor/core/rollback.d.ts +11 -0
  47. package/dist/vendor/core/rollback.js +219 -0
  48. package/package.json +60 -0
package/README.md ADDED
@@ -0,0 +1,59 @@
1
+ # @martinloop/mcp
2
+
3
+ Martin Loop's installable Model Context Protocol server.
4
+
5
+ It exposes three MCP tools over stdio:
6
+
7
+ - `martin_run`
8
+ - `martin_inspect`
9
+ - `martin_status`
10
+
11
+ ## Quickstart
12
+
13
+ Run the packaged server directly:
14
+
15
+ ```sh
16
+ npx @martinloop/mcp
17
+ ```
18
+
19
+ Add it to Claude Code:
20
+
21
+ ```sh
22
+ # macOS/Linux
23
+ claude mcp add --scope user martin-loop -- npx @martinloop/mcp
24
+
25
+ # Windows PowerShell/cmd
26
+ claude mcp add --scope user martin-loop cmd /c "npx @martinloop/mcp"
27
+ ```
28
+
29
+ For clients that want explicit command/args:
30
+
31
+ - Command: `npx`
32
+ - Args: `@martinloop/mcp`
33
+
34
+ ## Official MCP Registry
35
+
36
+ This package is prepared for the official MCP Registry metadata flow:
37
+
38
+ - npm package: `@martinloop/mcp`
39
+ - registry server name: `io.github.keesan12/martin-loop`
40
+ - manifest file: `packages/mcp/server.json`
41
+
42
+ The official registry publish flow is separate from npm publication. After publishing the package to npm, run the publisher from `packages/mcp`:
43
+
44
+ ```sh
45
+ mcp-publisher login github
46
+ mcp-publisher publish
47
+ ```
48
+
49
+ ## Local Verification
50
+
51
+ From the repository root:
52
+
53
+ ```sh
54
+ pnpm --filter @martinloop/mcp build
55
+ pnpm --filter @martinloop/mcp test
56
+ pnpm --filter @martinloop/mcp smoke:pack
57
+ ```
58
+
59
+ `smoke:pack` packs the tarball, launches it through `npx`, performs the MCP handshake, lists tools, and verifies a `martin_status` call.
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Martin Loop MCP Server
4
+ *
5
+ * Exposes three tools over the Model Context Protocol (stdio transport):
6
+ * martin_run — execute a full Martin loop on a coding task
7
+ * martin_inspect — summarise a saved loop record file
8
+ * martin_status — return cost and pressure state from a loop record
9
+ *
10
+ * Setup (Claude Code):
11
+ * macOS/Linux: claude mcp add --scope user martin-loop -- npx @martinloop/mcp
12
+ * Windows: claude mcp add --scope user martin-loop cmd /c "npx @martinloop/mcp"
13
+ *
14
+ * Packaged smoke test:
15
+ * pnpm --filter @martinloop/mcp smoke:pack
16
+ *
17
+ * Manual start:
18
+ * node dist/server.js
19
+ */
20
+ export {};
package/dist/server.js ADDED
@@ -0,0 +1,153 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Martin Loop MCP Server
4
+ *
5
+ * Exposes three tools over the Model Context Protocol (stdio transport):
6
+ * martin_run — execute a full Martin loop on a coding task
7
+ * martin_inspect — summarise a saved loop record file
8
+ * martin_status — return cost and pressure state from a loop record
9
+ *
10
+ * Setup (Claude Code):
11
+ * macOS/Linux: claude mcp add --scope user martin-loop -- npx @martinloop/mcp
12
+ * Windows: claude mcp add --scope user martin-loop cmd /c "npx @martinloop/mcp"
13
+ *
14
+ * Packaged smoke test:
15
+ * pnpm --filter @martinloop/mcp smoke:pack
16
+ *
17
+ * Manual start:
18
+ * node dist/server.js
19
+ */
20
+ import { Server } from "@modelcontextprotocol/sdk/server/index.js";
21
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
22
+ import { CallToolRequestSchema, ListToolsRequestSchema } from "@modelcontextprotocol/sdk/types.js";
23
+ import { getStatusTool } from "./tools/get-status.js";
24
+ import { inspectLoopTool } from "./tools/inspect-loop.js";
25
+ import { runLoopTool } from "./tools/run-loop.js";
26
+ const server = new Server({ name: "martin-loop", version: "0.1.1" }, { capabilities: { tools: {} } });
27
+ // ---------------------------------------------------------------------------
28
+ // Tool manifest
29
+ // ---------------------------------------------------------------------------
30
+ server.setRequestHandler(ListToolsRequestSchema, () => ({
31
+ tools: [
32
+ {
33
+ name: "martin_run",
34
+ description: "Execute a full Martin Loop on a coding task. Martin spawns the selected agent CLI (claude or codex), runs the task, classifies failures, and retries within the specified budget. Returns the loop outcome including lifecycle state, attempt count, and spend.",
35
+ inputSchema: {
36
+ type: "object",
37
+ properties: {
38
+ objective: {
39
+ type: "string",
40
+ description: "The coding task to complete. Be specific about what needs to change."
41
+ },
42
+ workingDirectory: {
43
+ type: "string",
44
+ description: "Absolute path to the project root. Defaults to the current working directory."
45
+ },
46
+ engine: {
47
+ type: "string",
48
+ enum: ["claude", "codex"],
49
+ description: "Which agent CLI to use. Defaults to 'claude'."
50
+ },
51
+ model: {
52
+ type: "string",
53
+ description: "Model override passed to the CLI (e.g. 'claude-opus-4-6', 'o3')."
54
+ },
55
+ maxUsd: {
56
+ type: "number",
57
+ description: "Hard budget ceiling in USD. Defaults to 25."
58
+ },
59
+ maxIterations: {
60
+ type: "number",
61
+ description: "Maximum number of loop attempts. Defaults to 8."
62
+ },
63
+ maxTokens: {
64
+ type: "number",
65
+ description: "Maximum total tokens across all attempts. Defaults to 80000."
66
+ },
67
+ verificationPlan: {
68
+ type: "array",
69
+ items: { type: "string" },
70
+ description: "Shell commands that must all exit 0 for the task to be considered complete (e.g. ['pnpm test', 'pnpm build'])."
71
+ },
72
+ workspaceId: {
73
+ type: "string",
74
+ description: "Workspace identifier for telemetry. Defaults to 'ws_mcp'."
75
+ },
76
+ projectId: {
77
+ type: "string",
78
+ description: "Project identifier for telemetry. Defaults to 'proj_mcp'."
79
+ }
80
+ },
81
+ required: ["objective"]
82
+ }
83
+ },
84
+ {
85
+ name: "martin_inspect",
86
+ description: "Summarise a saved Martin loop record file. Reads a JSON file containing one or more LoopRecords and returns portfolio-level statistics: total spend, avoided spend, token counts, and loop counts.",
87
+ inputSchema: {
88
+ type: "object",
89
+ properties: {
90
+ file: {
91
+ type: "string",
92
+ description: "Absolute or relative path to a LoopRecord JSON file."
93
+ }
94
+ },
95
+ required: ["file"]
96
+ }
97
+ },
98
+ {
99
+ name: "martin_status",
100
+ description: "Return the current budget and cost state of a Martin loop record. Useful for monitoring in-progress or completed loops.",
101
+ inputSchema: {
102
+ type: "object",
103
+ properties: {
104
+ loopJson: {
105
+ type: "string",
106
+ description: "JSON-serialized LoopRecord."
107
+ }
108
+ },
109
+ required: ["loopJson"]
110
+ }
111
+ }
112
+ ]
113
+ }));
114
+ // ---------------------------------------------------------------------------
115
+ // Tool dispatch
116
+ // ---------------------------------------------------------------------------
117
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
118
+ const { name, arguments: args } = request.params;
119
+ try {
120
+ if (name === "martin_run") {
121
+ const input = args;
122
+ const output = await runLoopTool(input);
123
+ return { content: [{ type: "text", text: JSON.stringify(output, null, 2) }] };
124
+ }
125
+ if (name === "martin_inspect") {
126
+ const input = args;
127
+ const output = await inspectLoopTool(input);
128
+ return { content: [{ type: "text", text: JSON.stringify(output, null, 2) }] };
129
+ }
130
+ if (name === "martin_status") {
131
+ const input = args;
132
+ const output = getStatusTool(input);
133
+ return { content: [{ type: "text", text: JSON.stringify(output, null, 2) }] };
134
+ }
135
+ return {
136
+ content: [{ type: "text", text: `Unknown tool: ${name}` }],
137
+ isError: true
138
+ };
139
+ }
140
+ catch (error) {
141
+ const message = error instanceof Error ? error.message : String(error);
142
+ return {
143
+ content: [{ type: "text", text: `Tool error: ${message}` }],
144
+ isError: true
145
+ };
146
+ }
147
+ });
148
+ // ---------------------------------------------------------------------------
149
+ // Start
150
+ // ---------------------------------------------------------------------------
151
+ const transport = new StdioServerTransport();
152
+ await server.connect(transport);
153
+ //# sourceMappingURL=server.js.map
@@ -0,0 +1,18 @@
1
+ export interface GetStatusInput {
2
+ /** JSON-serialized LoopRecord. */
3
+ loopJson: string;
4
+ }
5
+ export interface GetStatusOutput {
6
+ loopId: string;
7
+ status: string;
8
+ lifecycleState: string;
9
+ attempts: number;
10
+ costUsd: number;
11
+ avoidedUsd: number;
12
+ pressure: string;
13
+ shouldStop: boolean;
14
+ remainingBudgetUsd: number;
15
+ remainingIterations: number;
16
+ remainingTokens: number;
17
+ }
18
+ export declare function getStatusTool(input: GetStatusInput): GetStatusOutput;
@@ -0,0 +1,23 @@
1
+ import { evaluateCostGovernor } from "../vendor/core/index.js";
2
+ export function getStatusTool(input) {
3
+ const loop = JSON.parse(input.loopJson);
4
+ const costState = evaluateCostGovernor({
5
+ budget: loop.budget,
6
+ cost: loop.cost,
7
+ attemptsUsed: loop.attempts.length
8
+ });
9
+ return {
10
+ loopId: loop.loopId,
11
+ status: loop.status,
12
+ lifecycleState: loop.lifecycleState,
13
+ attempts: loop.attempts.length,
14
+ costUsd: loop.cost.actualUsd,
15
+ avoidedUsd: loop.cost.avoidedUsd,
16
+ pressure: costState.pressure,
17
+ shouldStop: costState.shouldStop,
18
+ remainingBudgetUsd: costState.remainingBudgetUsd,
19
+ remainingIterations: costState.remainingIterations,
20
+ remainingTokens: costState.remainingTokens
21
+ };
22
+ }
23
+ //# sourceMappingURL=get-status.js.map
@@ -0,0 +1,11 @@
1
+ import { type PortfolioSnapshot } from "../vendor/contracts/index.js";
2
+ export interface InspectLoopInput {
3
+ /** Absolute or relative path to a JSON file containing a LoopRecord or LoopRecord[]. */
4
+ file: string;
5
+ }
6
+ export interface InspectLoopOutput {
7
+ source: string;
8
+ loopCount: number;
9
+ portfolio: PortfolioSnapshot;
10
+ }
11
+ export declare function inspectLoopTool(input: InspectLoopInput): Promise<InspectLoopOutput>;
@@ -0,0 +1,15 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { buildPortfolioSnapshot } from "../vendor/contracts/index.js";
3
+ export async function inspectLoopTool(input) {
4
+ const raw = await readFile(input.file, "utf8");
5
+ const parsed = JSON.parse(raw);
6
+ const loops = Array.isArray(parsed)
7
+ ? parsed
8
+ : [parsed];
9
+ return {
10
+ source: input.file,
11
+ loopCount: loops.length,
12
+ portfolio: buildPortfolioSnapshot(loops)
13
+ };
14
+ }
15
+ //# sourceMappingURL=inspect-loop.js.map
@@ -0,0 +1,22 @@
1
+ export interface RunLoopInput {
2
+ objective: string;
3
+ workingDirectory?: string;
4
+ engine?: "claude" | "codex";
5
+ model?: string;
6
+ maxUsd?: number;
7
+ maxIterations?: number;
8
+ maxTokens?: number;
9
+ verificationPlan?: string[];
10
+ workspaceId?: string;
11
+ projectId?: string;
12
+ }
13
+ export interface RunLoopOutput {
14
+ status: string;
15
+ lifecycleState: string;
16
+ reason: string;
17
+ attempts: number;
18
+ costUsd: number;
19
+ verificationPassed: boolean;
20
+ loopId: string;
21
+ }
22
+ export declare function runLoopTool(input: RunLoopInput): Promise<RunLoopOutput>;
@@ -0,0 +1,50 @@
1
+ import { createClaudeCliAdapter, createCodexCliAdapter, createStubDirectProviderAdapter } from "../vendor/adapters/index.js";
2
+ import { runMartin } from "../vendor/core/index.js";
3
+ import { DEFAULT_BUDGET } from "../vendor/contracts/index.js";
4
+ export async function runLoopTool(input) {
5
+ const workingDirectory = input.workingDirectory ?? process.cwd();
6
+ const engine = input.engine ?? "claude";
7
+ const model = input.model;
8
+ const adapter = process.env.MARTIN_LIVE === "false"
9
+ ? createStubDirectProviderAdapter({ label: "Stub adapter (MARTIN_LIVE=false)", providerId: "stub", model: "stub" })
10
+ : engine === "codex"
11
+ ? createCodexCliAdapter({ workingDirectory, ...(model ? { model } : {}) })
12
+ : createClaudeCliAdapter({ workingDirectory, ...(model ? { model } : {}) });
13
+ const partialBudget = {};
14
+ if (input.maxUsd !== undefined) {
15
+ partialBudget.maxUsd = input.maxUsd;
16
+ }
17
+ if (input.maxIterations !== undefined) {
18
+ partialBudget.maxIterations = input.maxIterations;
19
+ }
20
+ if (input.maxTokens !== undefined) {
21
+ partialBudget.maxTokens = input.maxTokens;
22
+ }
23
+ const budget = {
24
+ ...DEFAULT_BUDGET,
25
+ ...partialBudget
26
+ };
27
+ const result = await runMartin({
28
+ workspaceId: input.workspaceId ?? "ws_mcp",
29
+ projectId: input.projectId ?? "proj_mcp",
30
+ task: {
31
+ title: input.objective.slice(0, 100),
32
+ objective: input.objective,
33
+ verificationPlan: input.verificationPlan ?? []
34
+ },
35
+ budget,
36
+ adapter
37
+ });
38
+ const lastAttempt = result.loop.attempts.at(-1);
39
+ const verificationPassed = lastAttempt !== undefined && result.decision.lifecycleState === "completed";
40
+ return {
41
+ status: result.loop.status,
42
+ lifecycleState: result.decision.lifecycleState,
43
+ reason: result.decision.reason,
44
+ attempts: result.loop.attempts.length,
45
+ costUsd: result.loop.cost.actualUsd,
46
+ verificationPassed,
47
+ loopId: result.loop.loopId
48
+ };
49
+ }
50
+ //# sourceMappingURL=run-loop.js.map
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Real agent-CLI adapters.
3
+ *
4
+ * Exports a generic factory (`createAgentCliAdapter`) and two pre-configured
5
+ * factories (`createClaudeCliAdapter`, `createCodexCliAdapter`) that spawn
6
+ * the respective AI coding CLI as a child subprocess.
7
+ *
8
+ * Usage in CLI:
9
+ * createClaudeCliAdapter({ workingDirectory: process.cwd() })
10
+ * createCodexCliAdapter({ workingDirectory: process.cwd() })
11
+ *
12
+ * MCP tools and integration tests use the same factories.
13
+ */
14
+ import type { MartinAdapter } from "../core/index.js";
15
+ import { type SpawnLike } from "./cli-bridge.js";
16
+ /**
17
+ * Given a prompt string, returns the full argv array to pass to spawn().
18
+ * Example for Claude: (p) => ["--print", p, "--dangerously-skip-permissions"]
19
+ * Example for Codex: (p) => ["--full-auto", p]
20
+ */
21
+ export type CliArgsBuilder = (prompt: string) => string[];
22
+ export interface AgentCliAdapterOptions {
23
+ /** The executable to spawn (e.g. "claude", "codex"). */
24
+ command: string;
25
+ /** Converts a prompt string into the argv array passed to spawn(). */
26
+ argsBuilder: CliArgsBuilder;
27
+ /** Adapter ID suffix. Defaults to command. */
28
+ adapterIdSuffix?: string;
29
+ /** Working directory for all subprocesses. Defaults to process.cwd(). */
30
+ workingDirectory?: string;
31
+ /** Timeout for the agent subprocess in ms. Defaults to 300_000 (5 min). */
32
+ timeoutMs?: number;
33
+ /** Timeout per verification command in ms. Defaults to 60_000 (1 min). */
34
+ verifyTimeoutMs?: number;
35
+ /** Human-readable label shown in loop records. */
36
+ label?: string;
37
+ /** Model name surfaced in adapter metadata (also used for cost estimation). */
38
+ model?: string;
39
+ /**
40
+ * Whether the CLI outputs JSON when --output-format json is passed.
41
+ * Set to false for CLIs that don't support this flag (e.g. Codex).
42
+ * Defaults to true for Claude.
43
+ */
44
+ supportsJsonOutput?: boolean;
45
+ /** Test-only override for subprocess spawning. */
46
+ spawnImpl?: SpawnLike;
47
+ }
48
+ export interface ClaudeCliAdapterOptions {
49
+ workingDirectory?: string;
50
+ timeoutMs?: number;
51
+ verifyTimeoutMs?: number;
52
+ label?: string;
53
+ /** Override the model passed via --model flag. */
54
+ model?: string;
55
+ /** Extra args appended after core args (before prompt). */
56
+ extraArgs?: string[];
57
+ spawnImpl?: SpawnLike;
58
+ }
59
+ export interface CodexCliAdapterOptions {
60
+ workingDirectory?: string;
61
+ timeoutMs?: number;
62
+ verifyTimeoutMs?: number;
63
+ label?: string;
64
+ /** Override the model passed via --model flag. */
65
+ model?: string;
66
+ /** Run in full-auto mode (--full-auto). Defaults to true. */
67
+ fullAuto?: boolean;
68
+ /** Extra args appended after core args (before prompt). */
69
+ extraArgs?: string[];
70
+ spawnImpl?: SpawnLike;
71
+ }
72
+ export declare function createAgentCliAdapter(options: AgentCliAdapterOptions): MartinAdapter;
73
+ /**
74
+ * Spawns `claude --output-format json --print "<prompt>" --dangerously-skip-permissions [extraArgs]`.
75
+ *
76
+ * The --output-format json flag causes Claude CLI to return structured JSON
77
+ * including real token usage counts, enabling accurate cost tracking.
78
+ *
79
+ * Requires the Claude Code CLI to be installed and authenticated:
80
+ * https://docs.anthropic.com/claude-code
81
+ */
82
+ export declare function createClaudeCliAdapter(options?: ClaudeCliAdapterOptions): MartinAdapter;
83
+ /**
84
+ * Spawns `codex [--full-auto] [--model <model>] "<prompt>" [extraArgs]`.
85
+ *
86
+ * Requires the Codex CLI to be installed and authenticated:
87
+ * npm install -g @openai/codex
88
+ */
89
+ export declare function createCodexCliAdapter(options?: CodexCliAdapterOptions): MartinAdapter;