reasonix 0.3.0-alpha.1 → 0.3.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -94,22 +94,43 @@ with your own API key: `npx tsx benchmarks/tau-bench/runner.ts --repeats 3`.
94
94
  ### Extends to MCP (v0.3-alpha)
95
95
 
96
96
  Any [MCP](https://spec.modelcontextprotocol.io/) server's tools inherit
97
- the same Cache-First benefits. Live run with an MCP tool call in the
98
- middle of a conversation:
97
+ the same Cache-First benefits. Two live runs, two data points:
99
98
 
100
- | turn | what happened | cache hit |
101
- |---|---|---:|
102
- | 1 | user asks, model decides to call `add` tool via MCP stdio | 0.0% (first-ever prefix) |
103
- | 1 (continued) | model receives tool result (42), writes final answer | **96.6%** |
99
+ | server | turns | tool calls | cache hit | cost | vs Claude |
100
+ |---|---:|---:|---:|---:|---:|
101
+ | bundled demo (`add` / `echo` / `get_time`) | 2 | 1 | **96.6%** (turn 2) | $0.000254 | −94.0% |
102
+ | official `@modelcontextprotocol/server-filesystem` | 5 | 4 | **96.7%** overall | $0.001235 | −97.0% |
103
+ | **both concurrently** (`demo_add` + `fs_write_file`) | 5 | 4 | **81.1%** | $0.001852 | −95.9% |
104
104
 
105
- The MCP round-trip did not disturb the byte-stable prefix server-side
106
- prompt cache kicked in on turn 2. Cost $0.000254 total, **94% cheaper
107
- than Claude** at equivalent token counts. Reference transcript:
108
- [`mcp-demo.add.jsonl`][mcp]. Reproduce:
105
+ The third row is the ecosystem proof: two MCP servers running as
106
+ separate subprocesses, tools from both exercised in one conversation
107
+ (compute `17+25` with the demo server, write the result to a real file
108
+ via the filesystem server). **One single prefix hash across all 5
109
+ turns** — byte-stability survives concurrent MCP subprocesses.
110
+
111
+ **Reproduce without an API key** (replay the committed transcripts):
112
+
113
+ ```bash
114
+ npx reasonix replay benchmarks/tau-bench/transcripts/mcp-demo.add.jsonl
115
+ npx reasonix replay benchmarks/tau-bench/transcripts/mcp-filesystem.jsonl
116
+ ```
117
+
118
+ **Reproduce with your own key** (live, ~$0.002):
109
119
 
110
120
  ```bash
111
- reasonix chat --mcp "node --import tsx examples/mcp-server-demo.ts"
112
- # ask "use add to compute 17 + 25" — model calls the MCP tool, cache holds
121
+ # Don't know what MCP servers exist? Start here:
122
+ reasonix mcp list
123
+ # Prints a curated catalog (filesystem, fetch, github, sqlite, …) with
124
+ # ready-to-paste --mcp commands.
125
+
126
+ # One server:
127
+ reasonix chat --mcp "filesystem=npx -y @modelcontextprotocol/server-filesystem /tmp/safe"
128
+
129
+ # Multiple servers at once — each gets its own namespace prefix:
130
+ reasonix chat \
131
+ --mcp "fs=npx -y @modelcontextprotocol/server-filesystem /tmp/safe" \
132
+ --mcp "mem=npx -y @modelcontextprotocol/server-memory"
133
+ # Tools land in a shared registry as fs_read_file, mem_set, etc.
113
134
  ```
114
135
 
115
136
  [mcp]: ./benchmarks/tau-bench/transcripts/mcp-demo.add.jsonl
package/dist/cli/index.js CHANGED
@@ -2004,11 +2004,25 @@ var StdioTransport = class {
2004
2004
  stdoutBuffer = "";
2005
2005
  constructor(opts) {
2006
2006
  const env = opts.replaceEnv ? { ...opts.env ?? {} } : { ...process.env, ...opts.env ?? {} };
2007
- this.child = spawn(opts.command, opts.args ?? [], {
2008
- env,
2009
- cwd: opts.cwd,
2010
- stdio: ["pipe", "pipe", "inherit"]
2011
- });
2007
+ const shell = opts.shell ?? process.platform === "win32";
2008
+ if (shell) {
2009
+ const line = [
2010
+ opts.command,
2011
+ ...(opts.args ?? []).map((a) => quoteArg(a, process.platform === "win32"))
2012
+ ].join(" ");
2013
+ this.child = spawn(line, [], {
2014
+ env,
2015
+ cwd: opts.cwd,
2016
+ stdio: ["pipe", "pipe", "inherit"],
2017
+ shell: true
2018
+ });
2019
+ } else {
2020
+ this.child = spawn(opts.command, opts.args ?? [], {
2021
+ env,
2022
+ cwd: opts.cwd,
2023
+ stdio: ["pipe", "pipe", "inherit"]
2024
+ });
2025
+ }
2012
2026
  this.child.stdout.setEncoding("utf8");
2013
2027
  this.child.stdout.on("data", (chunk) => this.onStdout(chunk));
2014
2028
  this.child.on("close", () => this.onClose());
@@ -2082,6 +2096,12 @@ var StdioTransport = class {
2082
2096
  else this.queue.push(msg);
2083
2097
  }
2084
2098
  };
2099
+ function quoteArg(s, windows) {
2100
+ if (!windows) {
2101
+ return `'${s.replace(/'/g, "'\\''")}'`;
2102
+ }
2103
+ return `"${s.replace(/"/g, '""')}"`;
2104
+ }
2085
2105
 
2086
2106
  // src/mcp/registry.ts
2087
2107
  async function bridgeMcpTools(client, opts = {}) {
@@ -2166,7 +2186,7 @@ function redactKey(key) {
2166
2186
  }
2167
2187
 
2168
2188
  // src/index.ts
2169
- var VERSION = "0.3.0-alpha.1";
2189
+ var VERSION = "0.3.0-alpha.3";
2170
2190
 
2171
2191
  // src/cli/commands/chat.tsx
2172
2192
  import { render } from "ink";
@@ -2201,11 +2221,6 @@ function shellSplit(input) {
2201
2221
  i++;
2202
2222
  continue;
2203
2223
  }
2204
- if (ch === "\\" && i + 1 < s.length) {
2205
- cur += s[i + 1];
2206
- i += 2;
2207
- continue;
2208
- }
2209
2224
  if (ch === " " || ch === " ") {
2210
2225
  if (cur.length > 0) {
2211
2226
  tokens.push(cur);
@@ -2226,6 +2241,24 @@ function shellSplit(input) {
2226
2241
  return tokens;
2227
2242
  }
2228
2243
 
2244
+ // src/mcp/spec.ts
2245
+ var NAME_PREFIX = /^([a-zA-Z_][a-zA-Z0-9_]*)=(.*)$/;
2246
+ function parseMcpSpec(input) {
2247
+ const trimmed = input.trim();
2248
+ if (!trimmed) {
2249
+ throw new Error("empty MCP spec");
2250
+ }
2251
+ const nameMatch = NAME_PREFIX.exec(trimmed);
2252
+ const name = nameMatch ? nameMatch[1] : null;
2253
+ const body = nameMatch ? nameMatch[2] : trimmed;
2254
+ const argv = shellSplit(body);
2255
+ if (argv.length === 0) {
2256
+ throw new Error(`MCP spec has name but no command: ${input}`);
2257
+ }
2258
+ const [command, ...args] = argv;
2259
+ return { name, command, args };
2260
+ }
2261
+
2229
2262
  // src/cli/ui/App.tsx
2230
2263
  import { Box as Box6, Static, Text as Text6, useApp } from "ink";
2231
2264
  import React6, { useCallback, useEffect as useEffect2, useMemo, useRef, useState as useState2 } from "react";
@@ -2974,34 +3007,31 @@ function Root({ initialKey, tools, ...appProps }) {
2974
3007
  async function chatCommand(opts) {
2975
3008
  loadDotenv();
2976
3009
  const initialKey = loadApiKey();
2977
- let mcp;
3010
+ const mcpSpecs = opts.mcp ?? [];
3011
+ const clients = [];
2978
3012
  let tools;
2979
- if (opts.mcp) {
2980
- const argv = shellSplit(opts.mcp);
2981
- if (argv.length === 0) {
2982
- process.stderr.write("error: --mcp requires a command\n");
2983
- process.exit(2);
2984
- }
2985
- const [command, ...args] = argv;
2986
- if (!command) {
2987
- process.stderr.write("error: --mcp command is empty\n");
2988
- process.exit(2);
2989
- }
2990
- const transport = new StdioTransport({ command, args });
2991
- mcp = new McpClient({ transport });
2992
- try {
2993
- await mcp.initialize();
2994
- const bridge = await bridgeMcpTools(mcp, { namePrefix: opts.mcpPrefix });
2995
- tools = bridge.registry;
2996
- process.stderr.write(
2997
- `\u25B8 MCP: ${bridge.registeredNames.length} tool(s) from ${argv.join(" ")}
3013
+ if (mcpSpecs.length > 0) {
3014
+ tools = new ToolRegistry();
3015
+ for (const raw of mcpSpecs) {
3016
+ try {
3017
+ const spec = parseMcpSpec(raw);
3018
+ const prefix = spec.name ? `${spec.name}_` : mcpSpecs.length === 1 && opts.mcpPrefix ? opts.mcpPrefix : "";
3019
+ const transport = new StdioTransport({ command: spec.command, args: spec.args });
3020
+ const mcp2 = new McpClient({ transport });
3021
+ await mcp2.initialize();
3022
+ const bridge = await bridgeMcpTools(mcp2, { registry: tools, namePrefix: prefix });
3023
+ const label = spec.name ?? "anon";
3024
+ process.stderr.write(
3025
+ `\u25B8 MCP[${label}]: ${bridge.registeredNames.length} tool(s) from ${spec.command} ${spec.args.join(" ")}
2998
3026
  `
2999
- );
3000
- } catch (err) {
3001
- process.stderr.write(`MCP setup failed: ${err.message}
3027
+ );
3028
+ clients.push(mcp2);
3029
+ } catch (err) {
3030
+ process.stderr.write(`MCP setup failed for "${raw}": ${err.message}
3002
3031
  `);
3003
- await mcp.close();
3004
- process.exit(1);
3032
+ for (const c of clients) await c.close();
3033
+ process.exit(1);
3034
+ }
3005
3035
  }
3006
3036
  }
3007
3037
  const { waitUntilExit } = render(/* @__PURE__ */ React8.createElement(Root, { initialKey, tools, ...opts }), {
@@ -3010,7 +3040,7 @@ async function chatCommand(opts) {
3010
3040
  try {
3011
3041
  await waitUntilExit();
3012
3042
  } finally {
3013
- await mcp?.close();
3043
+ for (const c of clients) await c.close();
3014
3044
  }
3015
3045
  }
3016
3046
 
@@ -3176,6 +3206,82 @@ markdown report written to ${opts.mdPath}`);
3176
3206
  console.log(renderSummaryTable(report));
3177
3207
  }
3178
3208
 
3209
+ // src/mcp/catalog.ts
3210
+ var MCP_CATALOG = [
3211
+ {
3212
+ name: "filesystem",
3213
+ summary: "read/write/search files inside a sandboxed directory",
3214
+ package: "@modelcontextprotocol/server-filesystem",
3215
+ userArgs: "<dir>",
3216
+ note: "the directory is a hard sandbox \u2014 the server refuses access outside it"
3217
+ },
3218
+ {
3219
+ name: "fetch",
3220
+ summary: "fetch URLs (markdown-friendly extraction, not a full browser)",
3221
+ package: "@modelcontextprotocol/server-fetch"
3222
+ },
3223
+ {
3224
+ name: "memory",
3225
+ summary: "persistent key-value memory across sessions",
3226
+ package: "@modelcontextprotocol/server-memory"
3227
+ },
3228
+ {
3229
+ name: "github",
3230
+ summary: "read issues, PRs, code search (needs GITHUB_PERSONAL_ACCESS_TOKEN)",
3231
+ package: "@modelcontextprotocol/server-github",
3232
+ note: "set GITHUB_PERSONAL_ACCESS_TOKEN in your env before spawning"
3233
+ },
3234
+ {
3235
+ name: "sqlite",
3236
+ summary: "read/write a sqlite database file",
3237
+ package: "@modelcontextprotocol/server-sqlite",
3238
+ userArgs: "<db.sqlite>"
3239
+ },
3240
+ {
3241
+ name: "puppeteer",
3242
+ summary: "browser automation \u2014 take screenshots, click, type",
3243
+ package: "@modelcontextprotocol/server-puppeteer",
3244
+ note: "downloads Chromium on first run (~200 MB)"
3245
+ },
3246
+ {
3247
+ name: "everything",
3248
+ summary: "official test server \u2014 exercises every MCP feature",
3249
+ package: "@modelcontextprotocol/server-everything",
3250
+ note: "useful for debugging your Reasonix setup"
3251
+ }
3252
+ ];
3253
+ function mcpCommandFor(entry) {
3254
+ const pkg = entry.package;
3255
+ const tail = entry.userArgs ? ` ${entry.userArgs}` : "";
3256
+ return `--mcp "${entry.name}=npx -y ${pkg}${tail}"`;
3257
+ }
3258
+
3259
+ // src/cli/commands/mcp.ts
3260
+ function mcpListCommand(opts) {
3261
+ if (opts.json) {
3262
+ console.log(JSON.stringify(MCP_CATALOG, null, 2));
3263
+ return;
3264
+ }
3265
+ console.log("Popular MCP servers you can bridge into Reasonix:");
3266
+ console.log("");
3267
+ for (const entry of MCP_CATALOG) {
3268
+ console.log(` ${pad(entry.name, 12)} ${entry.summary}`);
3269
+ console.log(` ${mcpCommandFor(entry)}`);
3270
+ if (entry.note) console.log(` \xB7 ${entry.note}`);
3271
+ console.log("");
3272
+ }
3273
+ console.log("Usage: reasonix chat <one-of-the---mcp-lines-above>");
3274
+ console.log(
3275
+ "Docs: https://github.com/modelcontextprotocol/servers \u2014 Anthropic's official server repo"
3276
+ );
3277
+ console.log(
3278
+ " https://mcp.so \u2014 community-maintained catalog"
3279
+ );
3280
+ }
3281
+ function pad(s, width) {
3282
+ return s.length >= width ? s : s + " ".repeat(width - s.length);
3283
+ }
3284
+
3179
3285
  // src/cli/commands/replay.ts
3180
3286
  import { render as render3 } from "ink";
3181
3287
  import React13 from "react";
@@ -3366,29 +3472,31 @@ async function runCommand(opts) {
3366
3472
  loadDotenv();
3367
3473
  const apiKey = await ensureApiKey();
3368
3474
  process.env.DEEPSEEK_API_KEY = apiKey;
3369
- let mcp;
3475
+ const mcpSpecs = opts.mcp ?? [];
3476
+ const clients = [];
3370
3477
  let tools;
3371
- if (opts.mcp) {
3372
- const argv = shellSplit(opts.mcp);
3373
- const [command, ...args] = argv;
3374
- if (!command) {
3375
- process.stderr.write("error: --mcp command is empty\n");
3376
- process.exit(2);
3377
- }
3378
- mcp = new McpClient({ transport: new StdioTransport({ command, args }) });
3379
- try {
3380
- await mcp.initialize();
3381
- const bridge = await bridgeMcpTools(mcp, { namePrefix: opts.mcpPrefix });
3382
- tools = bridge.registry;
3383
- process.stderr.write(
3384
- `\u25B8 MCP: ${bridge.registeredNames.length} tool(s) from ${argv.join(" ")}
3478
+ if (mcpSpecs.length > 0) {
3479
+ tools = new ToolRegistry();
3480
+ for (const raw of mcpSpecs) {
3481
+ try {
3482
+ const spec = parseMcpSpec(raw);
3483
+ const prefix2 = spec.name ? `${spec.name}_` : mcpSpecs.length === 1 && opts.mcpPrefix ? opts.mcpPrefix : "";
3484
+ const mcp2 = new McpClient({
3485
+ transport: new StdioTransport({ command: spec.command, args: spec.args })
3486
+ });
3487
+ await mcp2.initialize();
3488
+ const bridge = await bridgeMcpTools(mcp2, { registry: tools, namePrefix: prefix2 });
3489
+ process.stderr.write(
3490
+ `\u25B8 MCP[${spec.name ?? "anon"}]: ${bridge.registeredNames.length} tool(s) from ${spec.command} ${spec.args.join(" ")}
3385
3491
  `
3386
- );
3387
- } catch (err) {
3388
- process.stderr.write(`MCP setup failed: ${err.message}
3492
+ );
3493
+ clients.push(mcp2);
3494
+ } catch (err) {
3495
+ process.stderr.write(`MCP setup failed for "${raw}": ${err.message}
3389
3496
  `);
3390
- await mcp.close();
3391
- process.exit(1);
3497
+ for (const c of clients) await c.close();
3498
+ process.exit(1);
3499
+ }
3392
3500
  }
3393
3501
  }
3394
3502
  const client = new DeepSeekClient();
@@ -3450,7 +3558,7 @@ transcript: ${opts.transcript}
3450
3558
  process.stdout.write(` \u2192 npx reasonix replay ${opts.transcript}
3451
3559
  `);
3452
3560
  }
3453
- await mcp?.close();
3561
+ for (const c of clients) await c.close();
3454
3562
  }
3455
3563
 
3456
3564
  // src/cli/commands/sessions.ts
@@ -3575,9 +3683,14 @@ program.command("chat").description("Interactive Ink TUI with live cache/cost pa
3575
3683
  "--session <name>",
3576
3684
  "Use a named session (default: 'default'). Resume the same session next time."
3577
3685
  ).option("--no-session", "Disable session persistence for this run (ephemeral chat)").option(
3578
- "--mcp <command>",
3579
- 'Spawn an MCP server and bridge its tools. Shell-quoted: --mcp "npx -y @scope/server-x /path"'
3580
- ).option("--mcp-prefix <str>", "Prefix prepended to every MCP tool name (avoid collisions)").action(async (opts) => {
3686
+ "--mcp <spec>",
3687
+ 'MCP server spec; repeatable. Forms: "name=cmd args..." (namespaced, tools get `name_` prefix) or "cmd args..." (anonymous). Example: --mcp "fs=npx -y @scope/fs /tmp" --mcp "gh=npx -y @scope/gh"',
3688
+ (value, previous = []) => [...previous, value],
3689
+ []
3690
+ ).option(
3691
+ "--mcp-prefix <str>",
3692
+ "Global prefix applied to every MCP tool (only honored when no per-spec name is set; avoids collisions with a single anonymous server)"
3693
+ ).action(async (opts) => {
3581
3694
  let session;
3582
3695
  if (opts.session === false) {
3583
3696
  session = void 0;
@@ -3605,9 +3718,14 @@ program.command("run <task>").description("Run a single task non-interactively,
3605
3718
  "Self-consistency: run N parallel samples per turn and pick the most confident",
3606
3719
  (v) => Number.parseInt(v, 10)
3607
3720
  ).option("--transcript <path>", "Write a JSONL transcript to this path for replay/diff").option(
3608
- "--mcp <command>",
3609
- 'Spawn an MCP server and bridge its tools. Shell-quoted: --mcp "npx -y @scope/server-x /path"'
3610
- ).option("--mcp-prefix <str>", "Prefix prepended to every MCP tool name (avoid collisions)").action(async (task, opts) => {
3721
+ "--mcp <spec>",
3722
+ 'MCP server spec; repeatable. "name=cmd args..." or "cmd args...".',
3723
+ (value, previous = []) => [...previous, value],
3724
+ []
3725
+ ).option(
3726
+ "--mcp-prefix <str>",
3727
+ "Global prefix (only honored when no per-spec name is set; for a single anonymous server)"
3728
+ ).action(async (task, opts) => {
3611
3729
  await runCommand({
3612
3730
  task,
3613
3731
  model: opts.model,
@@ -3648,6 +3766,10 @@ program.command("diff <a> <b>").description(
3648
3766
  tui: !!opts.tui
3649
3767
  });
3650
3768
  });
3769
+ var mcp = program.command("mcp").description("Model Context Protocol helpers \u2014 discover servers, test your setup.");
3770
+ mcp.command("list").description("Show a curated catalog of popular MCP servers with ready-to-use --mcp commands.").option("--json", "Emit the catalog as JSON instead of the human-readable table").action((opts) => {
3771
+ mcpListCommand({ json: !!opts.json });
3772
+ });
3651
3773
  program.command("version").description("Print Reasonix version.").action(versionCommand);
3652
3774
  program.parseAsync(process.argv).catch((err) => {
3653
3775
  console.error(err);