reasonix 0.3.0-alpha.2 → 0.3.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -100,12 +100,13 @@ the same Cache-First benefits. Two live runs, two data points:
100
100
  |---|---:|---:|---:|---:|---:|
101
101
  | bundled demo (`add` / `echo` / `get_time`) | 2 | 1 | **96.6%** (turn 2) | $0.000254 | −94.0% |
102
102
  | official `@modelcontextprotocol/server-filesystem` | 5 | 4 | **96.7%** overall | $0.001235 | −97.0% |
103
+ | **both concurrently** (`demo_add` + `fs_write_file`) | 5 | 4 | **81.1%** | $0.001852 | −95.9% |
103
104
 
104
- The second run is the interesting one it's through an *external*,
105
- production MCP server (no code we control). Five turns including
106
- `list_directory`, a permission-denied recovery via
107
- `list_allowed_directories`, a successful retry, and `read_text_file`.
108
- Byte-stable prefix held across every turn; cache hit stayed at 96.7%.
105
+ The third row is the ecosystem proof: two MCP servers running as
106
+ separate subprocesses, tools from both exercised in one conversation
107
+ (compute `17+25` with the demo server, write the result to a real file
108
+ via the filesystem server). **One single prefix hash across all 5
109
+ turns** byte-stability survives concurrent MCP subprocesses.
109
110
 
110
111
  **Reproduce without an API key** (replay the committed transcripts):
111
112
 
@@ -117,9 +118,19 @@ npx reasonix replay benchmarks/tau-bench/transcripts/mcp-filesystem.jsonl
117
118
  **Reproduce with your own key** (live, ~$0.002):
118
119
 
119
120
  ```bash
120
- reasonix chat --mcp "node --import tsx examples/mcp-server-demo.ts"
121
- # or against the real filesystem server:
122
- reasonix chat --mcp "npx -y @modelcontextprotocol/server-filesystem /path/to/safe/dir"
121
+ # Don't know what MCP servers exist? Start here:
122
+ reasonix mcp list
123
+ # Prints a curated catalog (filesystem, fetch, github, sqlite, …) with
124
+ # ready-to-paste --mcp commands.
125
+
126
+ # One server:
127
+ reasonix chat --mcp "filesystem=npx -y @modelcontextprotocol/server-filesystem /tmp/safe"
128
+
129
+ # Multiple servers at once — each gets its own namespace prefix:
130
+ reasonix chat \
131
+ --mcp "fs=npx -y @modelcontextprotocol/server-filesystem /tmp/safe" \
132
+ --mcp "mem=npx -y @modelcontextprotocol/server-memory"
133
+ # Tools land in a shared registry as fs_read_file, mem_set, etc.
123
134
  ```
124
135
 
125
136
  [mcp]: ./benchmarks/tau-bench/transcripts/mcp-demo.add.jsonl
package/dist/cli/index.js CHANGED
@@ -2186,7 +2186,7 @@ function redactKey(key) {
2186
2186
  }
2187
2187
 
2188
2188
  // src/index.ts
2189
- var VERSION = "0.3.0-alpha.2";
2189
+ var VERSION = "0.3.0-alpha.3";
2190
2190
 
2191
2191
  // src/cli/commands/chat.tsx
2192
2192
  import { render } from "ink";
@@ -2221,11 +2221,6 @@ function shellSplit(input) {
2221
2221
  i++;
2222
2222
  continue;
2223
2223
  }
2224
- if (ch === "\\" && i + 1 < s.length) {
2225
- cur += s[i + 1];
2226
- i += 2;
2227
- continue;
2228
- }
2229
2224
  if (ch === " " || ch === " ") {
2230
2225
  if (cur.length > 0) {
2231
2226
  tokens.push(cur);
@@ -2246,6 +2241,24 @@ function shellSplit(input) {
2246
2241
  return tokens;
2247
2242
  }
2248
2243
 
2244
+ // src/mcp/spec.ts
2245
+ var NAME_PREFIX = /^([a-zA-Z_][a-zA-Z0-9_]*)=(.*)$/;
2246
+ function parseMcpSpec(input) {
2247
+ const trimmed = input.trim();
2248
+ if (!trimmed) {
2249
+ throw new Error("empty MCP spec");
2250
+ }
2251
+ const nameMatch = NAME_PREFIX.exec(trimmed);
2252
+ const name = nameMatch ? nameMatch[1] : null;
2253
+ const body = nameMatch ? nameMatch[2] : trimmed;
2254
+ const argv = shellSplit(body);
2255
+ if (argv.length === 0) {
2256
+ throw new Error(`MCP spec has name but no command: ${input}`);
2257
+ }
2258
+ const [command, ...args] = argv;
2259
+ return { name, command, args };
2260
+ }
2261
+
2249
2262
  // src/cli/ui/App.tsx
2250
2263
  import { Box as Box6, Static, Text as Text6, useApp } from "ink";
2251
2264
  import React6, { useCallback, useEffect as useEffect2, useMemo, useRef, useState as useState2 } from "react";
@@ -2994,34 +3007,31 @@ function Root({ initialKey, tools, ...appProps }) {
2994
3007
  async function chatCommand(opts) {
2995
3008
  loadDotenv();
2996
3009
  const initialKey = loadApiKey();
2997
- let mcp;
3010
+ const mcpSpecs = opts.mcp ?? [];
3011
+ const clients = [];
2998
3012
  let tools;
2999
- if (opts.mcp) {
3000
- const argv = shellSplit(opts.mcp);
3001
- if (argv.length === 0) {
3002
- process.stderr.write("error: --mcp requires a command\n");
3003
- process.exit(2);
3004
- }
3005
- const [command, ...args] = argv;
3006
- if (!command) {
3007
- process.stderr.write("error: --mcp command is empty\n");
3008
- process.exit(2);
3009
- }
3010
- const transport = new StdioTransport({ command, args });
3011
- mcp = new McpClient({ transport });
3012
- try {
3013
- await mcp.initialize();
3014
- const bridge = await bridgeMcpTools(mcp, { namePrefix: opts.mcpPrefix });
3015
- tools = bridge.registry;
3016
- process.stderr.write(
3017
- `\u25B8 MCP: ${bridge.registeredNames.length} tool(s) from ${argv.join(" ")}
3013
+ if (mcpSpecs.length > 0) {
3014
+ tools = new ToolRegistry();
3015
+ for (const raw of mcpSpecs) {
3016
+ try {
3017
+ const spec = parseMcpSpec(raw);
3018
+ const prefix = spec.name ? `${spec.name}_` : mcpSpecs.length === 1 && opts.mcpPrefix ? opts.mcpPrefix : "";
3019
+ const transport = new StdioTransport({ command: spec.command, args: spec.args });
3020
+ const mcp2 = new McpClient({ transport });
3021
+ await mcp2.initialize();
3022
+ const bridge = await bridgeMcpTools(mcp2, { registry: tools, namePrefix: prefix });
3023
+ const label = spec.name ?? "anon";
3024
+ process.stderr.write(
3025
+ `\u25B8 MCP[${label}]: ${bridge.registeredNames.length} tool(s) from ${spec.command} ${spec.args.join(" ")}
3018
3026
  `
3019
- );
3020
- } catch (err) {
3021
- process.stderr.write(`MCP setup failed: ${err.message}
3027
+ );
3028
+ clients.push(mcp2);
3029
+ } catch (err) {
3030
+ process.stderr.write(`MCP setup failed for "${raw}": ${err.message}
3022
3031
  `);
3023
- await mcp.close();
3024
- process.exit(1);
3032
+ for (const c of clients) await c.close();
3033
+ process.exit(1);
3034
+ }
3025
3035
  }
3026
3036
  }
3027
3037
  const { waitUntilExit } = render(/* @__PURE__ */ React8.createElement(Root, { initialKey, tools, ...opts }), {
@@ -3030,7 +3040,7 @@ async function chatCommand(opts) {
3030
3040
  try {
3031
3041
  await waitUntilExit();
3032
3042
  } finally {
3033
- await mcp?.close();
3043
+ for (const c of clients) await c.close();
3034
3044
  }
3035
3045
  }
3036
3046
 
@@ -3196,6 +3206,82 @@ markdown report written to ${opts.mdPath}`);
3196
3206
  console.log(renderSummaryTable(report));
3197
3207
  }
3198
3208
 
3209
+ // src/mcp/catalog.ts
3210
+ var MCP_CATALOG = [
3211
+ {
3212
+ name: "filesystem",
3213
+ summary: "read/write/search files inside a sandboxed directory",
3214
+ package: "@modelcontextprotocol/server-filesystem",
3215
+ userArgs: "<dir>",
3216
+ note: "the directory is a hard sandbox \u2014 the server refuses access outside it"
3217
+ },
3218
+ {
3219
+ name: "fetch",
3220
+ summary: "fetch URLs (markdown-friendly extraction, not a full browser)",
3221
+ package: "@modelcontextprotocol/server-fetch"
3222
+ },
3223
+ {
3224
+ name: "memory",
3225
+ summary: "persistent key-value memory across sessions",
3226
+ package: "@modelcontextprotocol/server-memory"
3227
+ },
3228
+ {
3229
+ name: "github",
3230
+ summary: "read issues, PRs, code search (needs GITHUB_PERSONAL_ACCESS_TOKEN)",
3231
+ package: "@modelcontextprotocol/server-github",
3232
+ note: "set GITHUB_PERSONAL_ACCESS_TOKEN in your env before spawning"
3233
+ },
3234
+ {
3235
+ name: "sqlite",
3236
+ summary: "read/write a sqlite database file",
3237
+ package: "@modelcontextprotocol/server-sqlite",
3238
+ userArgs: "<db.sqlite>"
3239
+ },
3240
+ {
3241
+ name: "puppeteer",
3242
+ summary: "browser automation \u2014 take screenshots, click, type",
3243
+ package: "@modelcontextprotocol/server-puppeteer",
3244
+ note: "downloads Chromium on first run (~200 MB)"
3245
+ },
3246
+ {
3247
+ name: "everything",
3248
+ summary: "official test server \u2014 exercises every MCP feature",
3249
+ package: "@modelcontextprotocol/server-everything",
3250
+ note: "useful for debugging your Reasonix setup"
3251
+ }
3252
+ ];
3253
+ function mcpCommandFor(entry) {
3254
+ const pkg = entry.package;
3255
+ const tail = entry.userArgs ? ` ${entry.userArgs}` : "";
3256
+ return `--mcp "${entry.name}=npx -y ${pkg}${tail}"`;
3257
+ }
3258
+
3259
+ // src/cli/commands/mcp.ts
3260
+ function mcpListCommand(opts) {
3261
+ if (opts.json) {
3262
+ console.log(JSON.stringify(MCP_CATALOG, null, 2));
3263
+ return;
3264
+ }
3265
+ console.log("Popular MCP servers you can bridge into Reasonix:");
3266
+ console.log("");
3267
+ for (const entry of MCP_CATALOG) {
3268
+ console.log(` ${pad(entry.name, 12)} ${entry.summary}`);
3269
+ console.log(` ${mcpCommandFor(entry)}`);
3270
+ if (entry.note) console.log(` \xB7 ${entry.note}`);
3271
+ console.log("");
3272
+ }
3273
+ console.log("Usage: reasonix chat <one-of-the---mcp-lines-above>");
3274
+ console.log(
3275
+ "Docs: https://github.com/modelcontextprotocol/servers \u2014 Anthropic's official server repo"
3276
+ );
3277
+ console.log(
3278
+ " https://mcp.so \u2014 community-maintained catalog"
3279
+ );
3280
+ }
3281
+ function pad(s, width) {
3282
+ return s.length >= width ? s : s + " ".repeat(width - s.length);
3283
+ }
3284
+
3199
3285
  // src/cli/commands/replay.ts
3200
3286
  import { render as render3 } from "ink";
3201
3287
  import React13 from "react";
@@ -3386,29 +3472,31 @@ async function runCommand(opts) {
3386
3472
  loadDotenv();
3387
3473
  const apiKey = await ensureApiKey();
3388
3474
  process.env.DEEPSEEK_API_KEY = apiKey;
3389
- let mcp;
3475
+ const mcpSpecs = opts.mcp ?? [];
3476
+ const clients = [];
3390
3477
  let tools;
3391
- if (opts.mcp) {
3392
- const argv = shellSplit(opts.mcp);
3393
- const [command, ...args] = argv;
3394
- if (!command) {
3395
- process.stderr.write("error: --mcp command is empty\n");
3396
- process.exit(2);
3397
- }
3398
- mcp = new McpClient({ transport: new StdioTransport({ command, args }) });
3399
- try {
3400
- await mcp.initialize();
3401
- const bridge = await bridgeMcpTools(mcp, { namePrefix: opts.mcpPrefix });
3402
- tools = bridge.registry;
3403
- process.stderr.write(
3404
- `\u25B8 MCP: ${bridge.registeredNames.length} tool(s) from ${argv.join(" ")}
3478
+ if (mcpSpecs.length > 0) {
3479
+ tools = new ToolRegistry();
3480
+ for (const raw of mcpSpecs) {
3481
+ try {
3482
+ const spec = parseMcpSpec(raw);
3483
+ const prefix2 = spec.name ? `${spec.name}_` : mcpSpecs.length === 1 && opts.mcpPrefix ? opts.mcpPrefix : "";
3484
+ const mcp2 = new McpClient({
3485
+ transport: new StdioTransport({ command: spec.command, args: spec.args })
3486
+ });
3487
+ await mcp2.initialize();
3488
+ const bridge = await bridgeMcpTools(mcp2, { registry: tools, namePrefix: prefix2 });
3489
+ process.stderr.write(
3490
+ `\u25B8 MCP[${spec.name ?? "anon"}]: ${bridge.registeredNames.length} tool(s) from ${spec.command} ${spec.args.join(" ")}
3405
3491
  `
3406
- );
3407
- } catch (err) {
3408
- process.stderr.write(`MCP setup failed: ${err.message}
3492
+ );
3493
+ clients.push(mcp2);
3494
+ } catch (err) {
3495
+ process.stderr.write(`MCP setup failed for "${raw}": ${err.message}
3409
3496
  `);
3410
- await mcp.close();
3411
- process.exit(1);
3497
+ for (const c of clients) await c.close();
3498
+ process.exit(1);
3499
+ }
3412
3500
  }
3413
3501
  }
3414
3502
  const client = new DeepSeekClient();
@@ -3470,7 +3558,7 @@ transcript: ${opts.transcript}
3470
3558
  process.stdout.write(` \u2192 npx reasonix replay ${opts.transcript}
3471
3559
  `);
3472
3560
  }
3473
- await mcp?.close();
3561
+ for (const c of clients) await c.close();
3474
3562
  }
3475
3563
 
3476
3564
  // src/cli/commands/sessions.ts
@@ -3595,9 +3683,14 @@ program.command("chat").description("Interactive Ink TUI with live cache/cost pa
3595
3683
  "--session <name>",
3596
3684
  "Use a named session (default: 'default'). Resume the same session next time."
3597
3685
  ).option("--no-session", "Disable session persistence for this run (ephemeral chat)").option(
3598
- "--mcp <command>",
3599
- 'Spawn an MCP server and bridge its tools. Shell-quoted: --mcp "npx -y @scope/server-x /path"'
3600
- ).option("--mcp-prefix <str>", "Prefix prepended to every MCP tool name (avoid collisions)").action(async (opts) => {
3686
+ "--mcp <spec>",
3687
+ 'MCP server spec; repeatable. Forms: "name=cmd args..." (namespaced, tools get `name_` prefix) or "cmd args..." (anonymous). Example: --mcp "fs=npx -y @scope/fs /tmp" --mcp "gh=npx -y @scope/gh"',
3688
+ (value, previous = []) => [...previous, value],
3689
+ []
3690
+ ).option(
3691
+ "--mcp-prefix <str>",
3692
+ "Global prefix applied to every MCP tool (only honored when no per-spec name is set; avoids collisions with a single anonymous server)"
3693
+ ).action(async (opts) => {
3601
3694
  let session;
3602
3695
  if (opts.session === false) {
3603
3696
  session = void 0;
@@ -3625,9 +3718,14 @@ program.command("run <task>").description("Run a single task non-interactively,
3625
3718
  "Self-consistency: run N parallel samples per turn and pick the most confident",
3626
3719
  (v) => Number.parseInt(v, 10)
3627
3720
  ).option("--transcript <path>", "Write a JSONL transcript to this path for replay/diff").option(
3628
- "--mcp <command>",
3629
- 'Spawn an MCP server and bridge its tools. Shell-quoted: --mcp "npx -y @scope/server-x /path"'
3630
- ).option("--mcp-prefix <str>", "Prefix prepended to every MCP tool name (avoid collisions)").action(async (task, opts) => {
3721
+ "--mcp <spec>",
3722
+ 'MCP server spec; repeatable. "name=cmd args..." or "cmd args...".',
3723
+ (value, previous = []) => [...previous, value],
3724
+ []
3725
+ ).option(
3726
+ "--mcp-prefix <str>",
3727
+ "Global prefix (only honored when no per-spec name is set; for a single anonymous server)"
3728
+ ).action(async (task, opts) => {
3631
3729
  await runCommand({
3632
3730
  task,
3633
3731
  model: opts.model,
@@ -3668,6 +3766,10 @@ program.command("diff <a> <b>").description(
3668
3766
  tui: !!opts.tui
3669
3767
  });
3670
3768
  });
3769
+ var mcp = program.command("mcp").description("Model Context Protocol helpers \u2014 discover servers, test your setup.");
3770
+ mcp.command("list").description("Show a curated catalog of popular MCP servers with ready-to-use --mcp commands.").option("--json", "Emit the catalog as JSON instead of the human-readable table").action((opts) => {
3771
+ mcpListCommand({ json: !!opts.json });
3772
+ });
3671
3773
  program.command("version").description("Print Reasonix version.").action(versionCommand);
3672
3774
  program.parseAsync(process.argv).catch((err) => {
3673
3775
  console.error(err);