reasonix 0.3.0-alpha.1 → 0.3.0-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -12
- package/dist/cli/index.js +186 -64
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.ts +8 -1
- package/dist/index.js +26 -6
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -94,22 +94,43 @@ with your own API key: `npx tsx benchmarks/tau-bench/runner.ts --repeats 3`.
|
|
|
94
94
|
### Extends to MCP (v0.3-alpha)
|
|
95
95
|
|
|
96
96
|
Any [MCP](https://spec.modelcontextprotocol.io/) server's tools inherit
|
|
97
|
-
the same Cache-First benefits.
|
|
98
|
-
middle of a conversation:
|
|
97
|
+
the same Cache-First benefits. Two live runs, two data points:
|
|
99
98
|
|
|
100
|
-
|
|
|
101
|
-
|
|
102
|
-
|
|
|
103
|
-
|
|
|
99
|
+
| server | turns | tool calls | cache hit | cost | vs Claude |
|
|
100
|
+
|---|---:|---:|---:|---:|---:|
|
|
101
|
+
| bundled demo (`add` / `echo` / `get_time`) | 2 | 1 | **96.6%** (turn 2) | $0.000254 | −94.0% |
|
|
102
|
+
| official `@modelcontextprotocol/server-filesystem` | 5 | 4 | **96.7%** overall | $0.001235 | −97.0% |
|
|
103
|
+
| **both concurrently** (`demo_add` + `fs_write_file`) | 5 | 4 | **81.1%** | $0.001852 | −95.9% |
|
|
104
104
|
|
|
105
|
-
The
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
105
|
+
The third row is the ecosystem proof: two MCP servers running as
|
|
106
|
+
separate subprocesses, tools from both exercised in one conversation
|
|
107
|
+
(compute `17+25` with the demo server, write the result to a real file
|
|
108
|
+
via the filesystem server). **One single prefix hash across all 5
|
|
109
|
+
turns** — byte-stability survives concurrent MCP subprocesses.
|
|
110
|
+
|
|
111
|
+
**Reproduce without an API key** (replay the committed transcripts):
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
npx reasonix replay benchmarks/tau-bench/transcripts/mcp-demo.add.jsonl
|
|
115
|
+
npx reasonix replay benchmarks/tau-bench/transcripts/mcp-filesystem.jsonl
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
**Reproduce with your own key** (live, ~$0.002):
|
|
109
119
|
|
|
110
120
|
```bash
|
|
111
|
-
|
|
112
|
-
|
|
121
|
+
# Don't know what MCP servers exist? Start here:
|
|
122
|
+
reasonix mcp list
|
|
123
|
+
# Prints a curated catalog (filesystem, fetch, github, sqlite, …) with
|
|
124
|
+
# ready-to-paste --mcp commands.
|
|
125
|
+
|
|
126
|
+
# One server:
|
|
127
|
+
reasonix chat --mcp "filesystem=npx -y @modelcontextprotocol/server-filesystem /tmp/safe"
|
|
128
|
+
|
|
129
|
+
# Multiple servers at once — each gets its own namespace prefix:
|
|
130
|
+
reasonix chat \
|
|
131
|
+
--mcp "fs=npx -y @modelcontextprotocol/server-filesystem /tmp/safe" \
|
|
132
|
+
--mcp "mem=npx -y @modelcontextprotocol/server-memory"
|
|
133
|
+
# Tools land in a shared registry as fs_read_file, mem_set, etc.
|
|
113
134
|
```
|
|
114
135
|
|
|
115
136
|
[mcp]: ./benchmarks/tau-bench/transcripts/mcp-demo.add.jsonl
|
package/dist/cli/index.js
CHANGED
|
@@ -2004,11 +2004,25 @@ var StdioTransport = class {
|
|
|
2004
2004
|
stdoutBuffer = "";
|
|
2005
2005
|
constructor(opts) {
|
|
2006
2006
|
const env = opts.replaceEnv ? { ...opts.env ?? {} } : { ...process.env, ...opts.env ?? {} };
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2007
|
+
const shell = opts.shell ?? process.platform === "win32";
|
|
2008
|
+
if (shell) {
|
|
2009
|
+
const line = [
|
|
2010
|
+
opts.command,
|
|
2011
|
+
...(opts.args ?? []).map((a) => quoteArg(a, process.platform === "win32"))
|
|
2012
|
+
].join(" ");
|
|
2013
|
+
this.child = spawn(line, [], {
|
|
2014
|
+
env,
|
|
2015
|
+
cwd: opts.cwd,
|
|
2016
|
+
stdio: ["pipe", "pipe", "inherit"],
|
|
2017
|
+
shell: true
|
|
2018
|
+
});
|
|
2019
|
+
} else {
|
|
2020
|
+
this.child = spawn(opts.command, opts.args ?? [], {
|
|
2021
|
+
env,
|
|
2022
|
+
cwd: opts.cwd,
|
|
2023
|
+
stdio: ["pipe", "pipe", "inherit"]
|
|
2024
|
+
});
|
|
2025
|
+
}
|
|
2012
2026
|
this.child.stdout.setEncoding("utf8");
|
|
2013
2027
|
this.child.stdout.on("data", (chunk) => this.onStdout(chunk));
|
|
2014
2028
|
this.child.on("close", () => this.onClose());
|
|
@@ -2082,6 +2096,12 @@ var StdioTransport = class {
|
|
|
2082
2096
|
else this.queue.push(msg);
|
|
2083
2097
|
}
|
|
2084
2098
|
};
|
|
2099
|
+
function quoteArg(s, windows) {
|
|
2100
|
+
if (!windows) {
|
|
2101
|
+
return `'${s.replace(/'/g, "'\\''")}'`;
|
|
2102
|
+
}
|
|
2103
|
+
return `"${s.replace(/"/g, '""')}"`;
|
|
2104
|
+
}
|
|
2085
2105
|
|
|
2086
2106
|
// src/mcp/registry.ts
|
|
2087
2107
|
async function bridgeMcpTools(client, opts = {}) {
|
|
@@ -2166,7 +2186,7 @@ function redactKey(key) {
|
|
|
2166
2186
|
}
|
|
2167
2187
|
|
|
2168
2188
|
// src/index.ts
|
|
2169
|
-
var VERSION = "0.3.0-alpha.
|
|
2189
|
+
var VERSION = "0.3.0-alpha.3";
|
|
2170
2190
|
|
|
2171
2191
|
// src/cli/commands/chat.tsx
|
|
2172
2192
|
import { render } from "ink";
|
|
@@ -2201,11 +2221,6 @@ function shellSplit(input) {
|
|
|
2201
2221
|
i++;
|
|
2202
2222
|
continue;
|
|
2203
2223
|
}
|
|
2204
|
-
if (ch === "\\" && i + 1 < s.length) {
|
|
2205
|
-
cur += s[i + 1];
|
|
2206
|
-
i += 2;
|
|
2207
|
-
continue;
|
|
2208
|
-
}
|
|
2209
2224
|
if (ch === " " || ch === " ") {
|
|
2210
2225
|
if (cur.length > 0) {
|
|
2211
2226
|
tokens.push(cur);
|
|
@@ -2226,6 +2241,24 @@ function shellSplit(input) {
|
|
|
2226
2241
|
return tokens;
|
|
2227
2242
|
}
|
|
2228
2243
|
|
|
2244
|
+
// src/mcp/spec.ts
|
|
2245
|
+
var NAME_PREFIX = /^([a-zA-Z_][a-zA-Z0-9_]*)=(.*)$/;
|
|
2246
|
+
function parseMcpSpec(input) {
|
|
2247
|
+
const trimmed = input.trim();
|
|
2248
|
+
if (!trimmed) {
|
|
2249
|
+
throw new Error("empty MCP spec");
|
|
2250
|
+
}
|
|
2251
|
+
const nameMatch = NAME_PREFIX.exec(trimmed);
|
|
2252
|
+
const name = nameMatch ? nameMatch[1] : null;
|
|
2253
|
+
const body = nameMatch ? nameMatch[2] : trimmed;
|
|
2254
|
+
const argv = shellSplit(body);
|
|
2255
|
+
if (argv.length === 0) {
|
|
2256
|
+
throw new Error(`MCP spec has name but no command: ${input}`);
|
|
2257
|
+
}
|
|
2258
|
+
const [command, ...args] = argv;
|
|
2259
|
+
return { name, command, args };
|
|
2260
|
+
}
|
|
2261
|
+
|
|
2229
2262
|
// src/cli/ui/App.tsx
|
|
2230
2263
|
import { Box as Box6, Static, Text as Text6, useApp } from "ink";
|
|
2231
2264
|
import React6, { useCallback, useEffect as useEffect2, useMemo, useRef, useState as useState2 } from "react";
|
|
@@ -2974,34 +3007,31 @@ function Root({ initialKey, tools, ...appProps }) {
|
|
|
2974
3007
|
async function chatCommand(opts) {
|
|
2975
3008
|
loadDotenv();
|
|
2976
3009
|
const initialKey = loadApiKey();
|
|
2977
|
-
|
|
3010
|
+
const mcpSpecs = opts.mcp ?? [];
|
|
3011
|
+
const clients = [];
|
|
2978
3012
|
let tools;
|
|
2979
|
-
if (
|
|
2980
|
-
|
|
2981
|
-
|
|
2982
|
-
|
|
2983
|
-
|
|
2984
|
-
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
try {
|
|
2993
|
-
await mcp.initialize();
|
|
2994
|
-
const bridge = await bridgeMcpTools(mcp, { namePrefix: opts.mcpPrefix });
|
|
2995
|
-
tools = bridge.registry;
|
|
2996
|
-
process.stderr.write(
|
|
2997
|
-
`\u25B8 MCP: ${bridge.registeredNames.length} tool(s) from ${argv.join(" ")}
|
|
3013
|
+
if (mcpSpecs.length > 0) {
|
|
3014
|
+
tools = new ToolRegistry();
|
|
3015
|
+
for (const raw of mcpSpecs) {
|
|
3016
|
+
try {
|
|
3017
|
+
const spec = parseMcpSpec(raw);
|
|
3018
|
+
const prefix = spec.name ? `${spec.name}_` : mcpSpecs.length === 1 && opts.mcpPrefix ? opts.mcpPrefix : "";
|
|
3019
|
+
const transport = new StdioTransport({ command: spec.command, args: spec.args });
|
|
3020
|
+
const mcp2 = new McpClient({ transport });
|
|
3021
|
+
await mcp2.initialize();
|
|
3022
|
+
const bridge = await bridgeMcpTools(mcp2, { registry: tools, namePrefix: prefix });
|
|
3023
|
+
const label = spec.name ?? "anon";
|
|
3024
|
+
process.stderr.write(
|
|
3025
|
+
`\u25B8 MCP[${label}]: ${bridge.registeredNames.length} tool(s) from ${spec.command} ${spec.args.join(" ")}
|
|
2998
3026
|
`
|
|
2999
|
-
|
|
3000
|
-
|
|
3001
|
-
|
|
3027
|
+
);
|
|
3028
|
+
clients.push(mcp2);
|
|
3029
|
+
} catch (err) {
|
|
3030
|
+
process.stderr.write(`MCP setup failed for "${raw}": ${err.message}
|
|
3002
3031
|
`);
|
|
3003
|
-
|
|
3004
|
-
|
|
3032
|
+
for (const c of clients) await c.close();
|
|
3033
|
+
process.exit(1);
|
|
3034
|
+
}
|
|
3005
3035
|
}
|
|
3006
3036
|
}
|
|
3007
3037
|
const { waitUntilExit } = render(/* @__PURE__ */ React8.createElement(Root, { initialKey, tools, ...opts }), {
|
|
@@ -3010,7 +3040,7 @@ async function chatCommand(opts) {
|
|
|
3010
3040
|
try {
|
|
3011
3041
|
await waitUntilExit();
|
|
3012
3042
|
} finally {
|
|
3013
|
-
await
|
|
3043
|
+
for (const c of clients) await c.close();
|
|
3014
3044
|
}
|
|
3015
3045
|
}
|
|
3016
3046
|
|
|
@@ -3176,6 +3206,82 @@ markdown report written to ${opts.mdPath}`);
|
|
|
3176
3206
|
console.log(renderSummaryTable(report));
|
|
3177
3207
|
}
|
|
3178
3208
|
|
|
3209
|
+
// src/mcp/catalog.ts
|
|
3210
|
+
var MCP_CATALOG = [
|
|
3211
|
+
{
|
|
3212
|
+
name: "filesystem",
|
|
3213
|
+
summary: "read/write/search files inside a sandboxed directory",
|
|
3214
|
+
package: "@modelcontextprotocol/server-filesystem",
|
|
3215
|
+
userArgs: "<dir>",
|
|
3216
|
+
note: "the directory is a hard sandbox \u2014 the server refuses access outside it"
|
|
3217
|
+
},
|
|
3218
|
+
{
|
|
3219
|
+
name: "fetch",
|
|
3220
|
+
summary: "fetch URLs (markdown-friendly extraction, not a full browser)",
|
|
3221
|
+
package: "@modelcontextprotocol/server-fetch"
|
|
3222
|
+
},
|
|
3223
|
+
{
|
|
3224
|
+
name: "memory",
|
|
3225
|
+
summary: "persistent key-value memory across sessions",
|
|
3226
|
+
package: "@modelcontextprotocol/server-memory"
|
|
3227
|
+
},
|
|
3228
|
+
{
|
|
3229
|
+
name: "github",
|
|
3230
|
+
summary: "read issues, PRs, code search (needs GITHUB_PERSONAL_ACCESS_TOKEN)",
|
|
3231
|
+
package: "@modelcontextprotocol/server-github",
|
|
3232
|
+
note: "set GITHUB_PERSONAL_ACCESS_TOKEN in your env before spawning"
|
|
3233
|
+
},
|
|
3234
|
+
{
|
|
3235
|
+
name: "sqlite",
|
|
3236
|
+
summary: "read/write a sqlite database file",
|
|
3237
|
+
package: "@modelcontextprotocol/server-sqlite",
|
|
3238
|
+
userArgs: "<db.sqlite>"
|
|
3239
|
+
},
|
|
3240
|
+
{
|
|
3241
|
+
name: "puppeteer",
|
|
3242
|
+
summary: "browser automation \u2014 take screenshots, click, type",
|
|
3243
|
+
package: "@modelcontextprotocol/server-puppeteer",
|
|
3244
|
+
note: "downloads Chromium on first run (~200 MB)"
|
|
3245
|
+
},
|
|
3246
|
+
{
|
|
3247
|
+
name: "everything",
|
|
3248
|
+
summary: "official test server \u2014 exercises every MCP feature",
|
|
3249
|
+
package: "@modelcontextprotocol/server-everything",
|
|
3250
|
+
note: "useful for debugging your Reasonix setup"
|
|
3251
|
+
}
|
|
3252
|
+
];
|
|
3253
|
+
function mcpCommandFor(entry) {
|
|
3254
|
+
const pkg = entry.package;
|
|
3255
|
+
const tail = entry.userArgs ? ` ${entry.userArgs}` : "";
|
|
3256
|
+
return `--mcp "${entry.name}=npx -y ${pkg}${tail}"`;
|
|
3257
|
+
}
|
|
3258
|
+
|
|
3259
|
+
// src/cli/commands/mcp.ts
|
|
3260
|
+
function mcpListCommand(opts) {
|
|
3261
|
+
if (opts.json) {
|
|
3262
|
+
console.log(JSON.stringify(MCP_CATALOG, null, 2));
|
|
3263
|
+
return;
|
|
3264
|
+
}
|
|
3265
|
+
console.log("Popular MCP servers you can bridge into Reasonix:");
|
|
3266
|
+
console.log("");
|
|
3267
|
+
for (const entry of MCP_CATALOG) {
|
|
3268
|
+
console.log(` ${pad(entry.name, 12)} ${entry.summary}`);
|
|
3269
|
+
console.log(` ${mcpCommandFor(entry)}`);
|
|
3270
|
+
if (entry.note) console.log(` \xB7 ${entry.note}`);
|
|
3271
|
+
console.log("");
|
|
3272
|
+
}
|
|
3273
|
+
console.log("Usage: reasonix chat <one-of-the---mcp-lines-above>");
|
|
3274
|
+
console.log(
|
|
3275
|
+
"Docs: https://github.com/modelcontextprotocol/servers \u2014 Anthropic's official server repo"
|
|
3276
|
+
);
|
|
3277
|
+
console.log(
|
|
3278
|
+
" https://mcp.so \u2014 community-maintained catalog"
|
|
3279
|
+
);
|
|
3280
|
+
}
|
|
3281
|
+
function pad(s, width) {
|
|
3282
|
+
return s.length >= width ? s : s + " ".repeat(width - s.length);
|
|
3283
|
+
}
|
|
3284
|
+
|
|
3179
3285
|
// src/cli/commands/replay.ts
|
|
3180
3286
|
import { render as render3 } from "ink";
|
|
3181
3287
|
import React13 from "react";
|
|
@@ -3366,29 +3472,31 @@ async function runCommand(opts) {
|
|
|
3366
3472
|
loadDotenv();
|
|
3367
3473
|
const apiKey = await ensureApiKey();
|
|
3368
3474
|
process.env.DEEPSEEK_API_KEY = apiKey;
|
|
3369
|
-
|
|
3475
|
+
const mcpSpecs = opts.mcp ?? [];
|
|
3476
|
+
const clients = [];
|
|
3370
3477
|
let tools;
|
|
3371
|
-
if (
|
|
3372
|
-
|
|
3373
|
-
const
|
|
3374
|
-
|
|
3375
|
-
|
|
3376
|
-
|
|
3377
|
-
|
|
3378
|
-
|
|
3379
|
-
|
|
3380
|
-
|
|
3381
|
-
|
|
3382
|
-
|
|
3383
|
-
|
|
3384
|
-
`\u25B8 MCP: ${bridge.registeredNames.length} tool(s) from ${argv.join(" ")}
|
|
3478
|
+
if (mcpSpecs.length > 0) {
|
|
3479
|
+
tools = new ToolRegistry();
|
|
3480
|
+
for (const raw of mcpSpecs) {
|
|
3481
|
+
try {
|
|
3482
|
+
const spec = parseMcpSpec(raw);
|
|
3483
|
+
const prefix2 = spec.name ? `${spec.name}_` : mcpSpecs.length === 1 && opts.mcpPrefix ? opts.mcpPrefix : "";
|
|
3484
|
+
const mcp2 = new McpClient({
|
|
3485
|
+
transport: new StdioTransport({ command: spec.command, args: spec.args })
|
|
3486
|
+
});
|
|
3487
|
+
await mcp2.initialize();
|
|
3488
|
+
const bridge = await bridgeMcpTools(mcp2, { registry: tools, namePrefix: prefix2 });
|
|
3489
|
+
process.stderr.write(
|
|
3490
|
+
`\u25B8 MCP[${spec.name ?? "anon"}]: ${bridge.registeredNames.length} tool(s) from ${spec.command} ${spec.args.join(" ")}
|
|
3385
3491
|
`
|
|
3386
|
-
|
|
3387
|
-
|
|
3388
|
-
|
|
3492
|
+
);
|
|
3493
|
+
clients.push(mcp2);
|
|
3494
|
+
} catch (err) {
|
|
3495
|
+
process.stderr.write(`MCP setup failed for "${raw}": ${err.message}
|
|
3389
3496
|
`);
|
|
3390
|
-
|
|
3391
|
-
|
|
3497
|
+
for (const c of clients) await c.close();
|
|
3498
|
+
process.exit(1);
|
|
3499
|
+
}
|
|
3392
3500
|
}
|
|
3393
3501
|
}
|
|
3394
3502
|
const client = new DeepSeekClient();
|
|
@@ -3450,7 +3558,7 @@ transcript: ${opts.transcript}
|
|
|
3450
3558
|
process.stdout.write(` \u2192 npx reasonix replay ${opts.transcript}
|
|
3451
3559
|
`);
|
|
3452
3560
|
}
|
|
3453
|
-
await
|
|
3561
|
+
for (const c of clients) await c.close();
|
|
3454
3562
|
}
|
|
3455
3563
|
|
|
3456
3564
|
// src/cli/commands/sessions.ts
|
|
@@ -3575,9 +3683,14 @@ program.command("chat").description("Interactive Ink TUI with live cache/cost pa
|
|
|
3575
3683
|
"--session <name>",
|
|
3576
3684
|
"Use a named session (default: 'default'). Resume the same session next time."
|
|
3577
3685
|
).option("--no-session", "Disable session persistence for this run (ephemeral chat)").option(
|
|
3578
|
-
"--mcp <
|
|
3579
|
-
'
|
|
3580
|
-
|
|
3686
|
+
"--mcp <spec>",
|
|
3687
|
+
'MCP server spec; repeatable. Forms: "name=cmd args..." (namespaced, tools get `name_` prefix) or "cmd args..." (anonymous). Example: --mcp "fs=npx -y @scope/fs /tmp" --mcp "gh=npx -y @scope/gh"',
|
|
3688
|
+
(value, previous = []) => [...previous, value],
|
|
3689
|
+
[]
|
|
3690
|
+
).option(
|
|
3691
|
+
"--mcp-prefix <str>",
|
|
3692
|
+
"Global prefix applied to every MCP tool (only honored when no per-spec name is set; avoids collisions with a single anonymous server)"
|
|
3693
|
+
).action(async (opts) => {
|
|
3581
3694
|
let session;
|
|
3582
3695
|
if (opts.session === false) {
|
|
3583
3696
|
session = void 0;
|
|
@@ -3605,9 +3718,14 @@ program.command("run <task>").description("Run a single task non-interactively,
|
|
|
3605
3718
|
"Self-consistency: run N parallel samples per turn and pick the most confident",
|
|
3606
3719
|
(v) => Number.parseInt(v, 10)
|
|
3607
3720
|
).option("--transcript <path>", "Write a JSONL transcript to this path for replay/diff").option(
|
|
3608
|
-
"--mcp <
|
|
3609
|
-
'
|
|
3610
|
-
|
|
3721
|
+
"--mcp <spec>",
|
|
3722
|
+
'MCP server spec; repeatable. "name=cmd args..." or "cmd args...".',
|
|
3723
|
+
(value, previous = []) => [...previous, value],
|
|
3724
|
+
[]
|
|
3725
|
+
).option(
|
|
3726
|
+
"--mcp-prefix <str>",
|
|
3727
|
+
"Global prefix (only honored when no per-spec name is set; for a single anonymous server)"
|
|
3728
|
+
).action(async (task, opts) => {
|
|
3611
3729
|
await runCommand({
|
|
3612
3730
|
task,
|
|
3613
3731
|
model: opts.model,
|
|
@@ -3648,6 +3766,10 @@ program.command("diff <a> <b>").description(
|
|
|
3648
3766
|
tui: !!opts.tui
|
|
3649
3767
|
});
|
|
3650
3768
|
});
|
|
3769
|
+
var mcp = program.command("mcp").description("Model Context Protocol helpers \u2014 discover servers, test your setup.");
|
|
3770
|
+
mcp.command("list").description("Show a curated catalog of popular MCP servers with ready-to-use --mcp commands.").option("--json", "Emit the catalog as JSON instead of the human-readable table").action((opts) => {
|
|
3771
|
+
mcpListCommand({ json: !!opts.json });
|
|
3772
|
+
});
|
|
3651
3773
|
program.command("version").description("Print Reasonix version.").action(versionCommand);
|
|
3652
3774
|
program.parseAsync(process.argv).catch((err) => {
|
|
3653
3775
|
console.error(err);
|