rlm-cli 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +184 -0
- package/bin/rlm.mjs +45 -0
- package/dist/cli.d.ts +15 -0
- package/dist/cli.js +185 -0
- package/dist/config.d.ts +13 -0
- package/dist/config.js +73 -0
- package/dist/env.d.ts +9 -0
- package/dist/env.js +34 -0
- package/dist/interactive.d.ts +10 -0
- package/dist/interactive.js +789 -0
- package/dist/main.d.ts +11 -0
- package/dist/main.js +144 -0
- package/dist/repl.d.ts +47 -0
- package/dist/repl.js +183 -0
- package/dist/rlm.d.ts +55 -0
- package/dist/rlm.js +354 -0
- package/dist/runtime.py +185 -0
- package/dist/viewer.d.ts +12 -0
- package/dist/viewer.js +828 -0
- package/package.json +48 -0
- package/rlm_config.yaml +17 -0
package/dist/main.d.ts
ADDED
package/dist/main.js
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
#!/usr/bin/env tsx
|
|
2
|
+
/**
|
|
3
|
+
* rlm — Recursive Language Model CLI
|
|
4
|
+
*
|
|
5
|
+
* Entry point for the `rlm` command.
|
|
6
|
+
*
|
|
7
|
+
* rlm → interactive terminal (default)
|
|
8
|
+
* rlm run → single-shot CLI run
|
|
9
|
+
* rlm help → show help
|
|
10
|
+
*/
|
|
11
|
+
const HELP = `
|
|
12
|
+
\x1b[36m╔══════════════════════════════════════════════════════════════╗
|
|
13
|
+
║ rlm — Recursive Language Models ║
|
|
14
|
+
║ CLI for large-context LLM processing ║
|
|
15
|
+
║ arXiv:2512.24601 ║
|
|
16
|
+
╚══════════════════════════════════════════════════════════════╝\x1b[0m
|
|
17
|
+
|
|
18
|
+
\x1b[1mUSAGE\x1b[0m
|
|
19
|
+
\x1b[33mrlm\x1b[0m Interactive terminal (default)
|
|
20
|
+
\x1b[33mrlm run\x1b[0m [options] "<query>" Run a single query
|
|
21
|
+
\x1b[33mrlm viewer\x1b[0m Browse saved trajectory files
|
|
22
|
+
\x1b[33mrlm benchmark\x1b[0m <name> [--idx] Run benchmark (direct LLM vs RLM)
|
|
23
|
+
|
|
24
|
+
\x1b[1mRUN OPTIONS\x1b[0m
|
|
25
|
+
--model <id> Override model (default: RLM_MODEL from .env)
|
|
26
|
+
--file <path> Read context from a file
|
|
27
|
+
--url <url> Fetch context from a URL
|
|
28
|
+
--stdin Read context from stdin
|
|
29
|
+
--verbose Show iteration progress
|
|
30
|
+
|
|
31
|
+
\x1b[1mCONFIGURATION\x1b[0m
|
|
32
|
+
.env file:
|
|
33
|
+
ANTHROPIC_API_KEY=sk-ant-...
|
|
34
|
+
RLM_MODEL=claude-sonnet-4-5-20250929
|
|
35
|
+
|
|
36
|
+
rlm_config.yaml:
|
|
37
|
+
max_iterations: 20
|
|
38
|
+
max_depth: 3
|
|
39
|
+
max_sub_queries: 50
|
|
40
|
+
truncate_len: 5000
|
|
41
|
+
`.trim();
|
|
42
|
+
async function main() {
|
|
43
|
+
const args = process.argv.slice(2);
|
|
44
|
+
const command = args[0] || "interactive";
|
|
45
|
+
switch (command) {
|
|
46
|
+
case "interactive":
|
|
47
|
+
case "i": {
|
|
48
|
+
await import("./interactive.js");
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
case "viewer":
|
|
52
|
+
case "view": {
|
|
53
|
+
// Strip the subcommand from argv so viewer.ts doesn't see it as a file path
|
|
54
|
+
process.argv = [process.argv[0], process.argv[1], ...args.slice(1)];
|
|
55
|
+
await import("./viewer.js");
|
|
56
|
+
break;
|
|
57
|
+
}
|
|
58
|
+
case "run": {
|
|
59
|
+
process.argv = [process.argv[0], process.argv[1], ...args.slice(1)];
|
|
60
|
+
await import("./cli.js");
|
|
61
|
+
break;
|
|
62
|
+
}
|
|
63
|
+
case "benchmark":
|
|
64
|
+
case "bench": {
|
|
65
|
+
const benchName = args[1];
|
|
66
|
+
const benchArgs = args.slice(2);
|
|
67
|
+
const benchScripts = {
|
|
68
|
+
oolong: "benchmarks/oolong_synth.ts",
|
|
69
|
+
longbench: "benchmarks/longbench_narrativeqa.ts",
|
|
70
|
+
};
|
|
71
|
+
if (benchName && benchScripts[benchName]) {
|
|
72
|
+
const { spawn } = await import("node:child_process");
|
|
73
|
+
const { dirname, join } = await import("node:path");
|
|
74
|
+
const { fileURLToPath } = await import("node:url");
|
|
75
|
+
const root = join(dirname(fileURLToPath(import.meta.url)), "..");
|
|
76
|
+
const script = join(root, benchScripts[benchName]);
|
|
77
|
+
const tsxBin = join(root, "node_modules", ".bin", "tsx");
|
|
78
|
+
await new Promise((resolve, reject) => {
|
|
79
|
+
const child = spawn(tsxBin, [script, ...benchArgs], {
|
|
80
|
+
stdio: "inherit",
|
|
81
|
+
cwd: root,
|
|
82
|
+
});
|
|
83
|
+
child.on("exit", (code) => {
|
|
84
|
+
process.exitCode = code ?? 1;
|
|
85
|
+
resolve();
|
|
86
|
+
});
|
|
87
|
+
child.on("error", (err) => {
|
|
88
|
+
reject(new Error(`Failed to spawn benchmark: ${err.message}`));
|
|
89
|
+
});
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
console.log(`\x1b[36m\x1b[1mrlm benchmark\x1b[0m — Run direct LLM vs RLM comparison\n`);
|
|
94
|
+
console.log(`\x1b[1mUSAGE\x1b[0m`);
|
|
95
|
+
console.log(` \x1b[33mrlm benchmark oolong\x1b[0m [--idx N] Oolong Synth (synthetic long-context)`);
|
|
96
|
+
console.log(` \x1b[33mrlm benchmark longbench\x1b[0m [--idx N] LongBench NarrativeQA (reading comprehension)\n`);
|
|
97
|
+
console.log(`Python dependencies are auto-installed into .venv on first run.\n`);
|
|
98
|
+
console.log(`Each benchmark loads a dataset example, runs it through both direct LLM`);
|
|
99
|
+
console.log(`and RLM, then prints a side-by-side comparison with timing.`);
|
|
100
|
+
}
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
case "help":
|
|
104
|
+
case "--help":
|
|
105
|
+
case "-h": {
|
|
106
|
+
console.log(HELP);
|
|
107
|
+
break;
|
|
108
|
+
}
|
|
109
|
+
case "version":
|
|
110
|
+
case "--version":
|
|
111
|
+
case "-v": {
|
|
112
|
+
try {
|
|
113
|
+
const { readFileSync } = await import("node:fs");
|
|
114
|
+
const { dirname, join } = await import("node:path");
|
|
115
|
+
const { fileURLToPath } = await import("node:url");
|
|
116
|
+
const __dir = dirname(fileURLToPath(import.meta.url));
|
|
117
|
+
const pkgPath = join(__dir, "..", "package.json");
|
|
118
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
119
|
+
console.log(`rlm v${pkg.version}`);
|
|
120
|
+
}
|
|
121
|
+
catch {
|
|
122
|
+
console.log("rlm v0.1.0");
|
|
123
|
+
}
|
|
124
|
+
break;
|
|
125
|
+
}
|
|
126
|
+
default: {
|
|
127
|
+
if (command.startsWith("--")) {
|
|
128
|
+
// Flags without subcommand → assume "run"
|
|
129
|
+
await import("./cli.js");
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
console.error(`Unknown command: ${command}`);
|
|
133
|
+
console.error('Run "rlm help" for usage information.');
|
|
134
|
+
process.exit(1);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
main().catch((err) => {
|
|
140
|
+
console.error("Fatal error:", err);
|
|
141
|
+
process.exit(1);
|
|
142
|
+
});
|
|
143
|
+
export {};
|
|
144
|
+
//# sourceMappingURL=main.js.map
|
package/dist/repl.d.ts
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Persistent Python REPL manager for the RLM CLI.
|
|
3
|
+
*
|
|
4
|
+
* Spawns a single Python subprocess running `runtime.py` and keeps it alive
|
|
5
|
+
* across multiple RLM iterations. Communication uses line-delimited JSON
|
|
6
|
+
* over stdin/stdout.
|
|
7
|
+
*/
|
|
8
|
+
/** Result of executing a code snippet in the REPL. */
|
|
9
|
+
export interface ExecResult {
|
|
10
|
+
stdout: string;
|
|
11
|
+
stderr: string;
|
|
12
|
+
hasFinal: boolean;
|
|
13
|
+
finalValue: string | null;
|
|
14
|
+
}
|
|
15
|
+
/** Callback the host provides to handle llm_query() calls from Python. */
|
|
16
|
+
export type LlmQueryHandler = (subContext: string, instruction: string) => Promise<string>;
|
|
17
|
+
export declare class PythonRepl {
|
|
18
|
+
private proc;
|
|
19
|
+
private rl;
|
|
20
|
+
private llmQueryHandler;
|
|
21
|
+
/**
|
|
22
|
+
* Pending resolvers for messages we're waiting on from Python.
|
|
23
|
+
* Each entry maps a message type to a one-shot resolve/reject pair.
|
|
24
|
+
*/
|
|
25
|
+
private pending;
|
|
26
|
+
/** Whether the REPL subprocess is alive. */
|
|
27
|
+
get isAlive(): boolean;
|
|
28
|
+
/**
|
|
29
|
+
* Start the Python subprocess and wait for it to signal readiness.
|
|
30
|
+
*/
|
|
31
|
+
start(signal?: AbortSignal): Promise<void>;
|
|
32
|
+
/** Register the callback that handles llm_query() calls from Python. */
|
|
33
|
+
setLlmQueryHandler(handler: LlmQueryHandler): void;
|
|
34
|
+
/** Inject the full context string into the Python REPL. */
|
|
35
|
+
setContext(text: string): Promise<void>;
|
|
36
|
+
/** Reset the Final sentinel variable. */
|
|
37
|
+
resetFinal(): Promise<void>;
|
|
38
|
+
/** Execute a code snippet and return the result. */
|
|
39
|
+
execute(code: string): Promise<ExecResult>;
|
|
40
|
+
/** Gracefully shut down the Python subprocess. */
|
|
41
|
+
shutdown(): void;
|
|
42
|
+
private send;
|
|
43
|
+
private handleLine;
|
|
44
|
+
private handleLlmQueryMessage;
|
|
45
|
+
private waitForMessage;
|
|
46
|
+
private cleanup;
|
|
47
|
+
}
|
package/dist/repl.js
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Persistent Python REPL manager for the RLM CLI.
|
|
3
|
+
*
|
|
4
|
+
* Spawns a single Python subprocess running `runtime.py` and keeps it alive
|
|
5
|
+
* across multiple RLM iterations. Communication uses line-delimited JSON
|
|
6
|
+
* over stdin/stdout.
|
|
7
|
+
*/
|
|
8
|
+
import { spawn } from "node:child_process";
|
|
9
|
+
import * as path from "node:path";
|
|
10
|
+
import * as readline from "node:readline";
|
|
11
|
+
// ── REPL class ──────────────────────────────────────────────────────────────
|
|
12
|
+
export class PythonRepl {
|
|
13
|
+
proc = null;
|
|
14
|
+
rl = null;
|
|
15
|
+
llmQueryHandler = null;
|
|
16
|
+
/**
|
|
17
|
+
* Pending resolvers for messages we're waiting on from Python.
|
|
18
|
+
* Each entry maps a message type to a one-shot resolve/reject pair.
|
|
19
|
+
*/
|
|
20
|
+
pending = new Map();
|
|
21
|
+
/** Whether the REPL subprocess is alive. */
|
|
22
|
+
get isAlive() {
|
|
23
|
+
return this.proc !== null && this.proc.exitCode === null;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Start the Python subprocess and wait for it to signal readiness.
|
|
27
|
+
*/
|
|
28
|
+
async start(signal) {
|
|
29
|
+
if (this.isAlive)
|
|
30
|
+
return;
|
|
31
|
+
const runtimePath = path.join(path.dirname(new URL(import.meta.url).pathname), "runtime.py");
|
|
32
|
+
this.proc = spawn("python3", [runtimePath], {
|
|
33
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
34
|
+
env: {
|
|
35
|
+
// Only pass what Python actually needs — not API keys or secrets
|
|
36
|
+
PATH: process.env.PATH,
|
|
37
|
+
HOME: process.env.HOME,
|
|
38
|
+
PYTHONUNBUFFERED: "1",
|
|
39
|
+
},
|
|
40
|
+
});
|
|
41
|
+
this.rl = readline.createInterface({ input: this.proc.stdout });
|
|
42
|
+
this.rl.on("line", (line) => this.handleLine(line));
|
|
43
|
+
this.proc.stderr.on("data", (chunk) => {
|
|
44
|
+
const text = chunk.toString();
|
|
45
|
+
if (text.trim()) {
|
|
46
|
+
process.stderr.write(`[rlm-repl-python] ${text}`);
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
this.proc.on("close", () => {
|
|
50
|
+
this.cleanup();
|
|
51
|
+
});
|
|
52
|
+
if (signal) {
|
|
53
|
+
signal.addEventListener("abort", () => {
|
|
54
|
+
this.shutdown();
|
|
55
|
+
}, { once: true });
|
|
56
|
+
}
|
|
57
|
+
await this.waitForMessage("ready");
|
|
58
|
+
}
|
|
59
|
+
/** Register the callback that handles llm_query() calls from Python. */
|
|
60
|
+
setLlmQueryHandler(handler) {
|
|
61
|
+
this.llmQueryHandler = handler;
|
|
62
|
+
}
|
|
63
|
+
/** Inject the full context string into the Python REPL. */
|
|
64
|
+
async setContext(text) {
|
|
65
|
+
this.send({ type: "set_context", value: text });
|
|
66
|
+
await this.waitForMessage("context_set");
|
|
67
|
+
}
|
|
68
|
+
/** Reset the Final sentinel variable. */
|
|
69
|
+
async resetFinal() {
|
|
70
|
+
this.send({ type: "reset_final" });
|
|
71
|
+
await this.waitForMessage("final_reset");
|
|
72
|
+
}
|
|
73
|
+
/** Execute a code snippet and return the result. */
|
|
74
|
+
async execute(code) {
|
|
75
|
+
this.send({ type: "exec", code });
|
|
76
|
+
const msg = (await this.waitForMessage("exec_done"));
|
|
77
|
+
return {
|
|
78
|
+
stdout: msg.stdout,
|
|
79
|
+
stderr: msg.stderr,
|
|
80
|
+
hasFinal: msg.has_final,
|
|
81
|
+
finalValue: msg.final_value,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
/** Gracefully shut down the Python subprocess. */
|
|
85
|
+
shutdown() {
|
|
86
|
+
if (this.proc && this.proc.exitCode === null) {
|
|
87
|
+
try {
|
|
88
|
+
this.send({ type: "shutdown" });
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
// stdin may already be closed
|
|
92
|
+
}
|
|
93
|
+
this.proc.kill("SIGTERM");
|
|
94
|
+
}
|
|
95
|
+
this.cleanup();
|
|
96
|
+
}
|
|
97
|
+
// ── Internal ─────────────────────────────────────────────────────────────
|
|
98
|
+
send(msg) {
|
|
99
|
+
if (!this.proc || !this.proc.stdin || this.proc.stdin.destroyed) {
|
|
100
|
+
throw new Error("REPL subprocess is not running");
|
|
101
|
+
}
|
|
102
|
+
this.proc.stdin.write(`${JSON.stringify(msg)}\n`);
|
|
103
|
+
}
|
|
104
|
+
handleLine(line) {
|
|
105
|
+
const trimmed = line.trim();
|
|
106
|
+
if (!trimmed)
|
|
107
|
+
return;
|
|
108
|
+
let msg;
|
|
109
|
+
try {
|
|
110
|
+
msg = JSON.parse(trimmed);
|
|
111
|
+
}
|
|
112
|
+
catch {
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
if (msg.type === "llm_query") {
|
|
116
|
+
this.handleLlmQueryMessage(msg);
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
const entry = this.pending.get(msg.type);
|
|
120
|
+
if (entry) {
|
|
121
|
+
this.pending.delete(msg.type);
|
|
122
|
+
entry.resolve(msg);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
async handleLlmQueryMessage(msg) {
|
|
126
|
+
if (!this.llmQueryHandler) {
|
|
127
|
+
this.send({
|
|
128
|
+
type: "llm_result",
|
|
129
|
+
id: msg.id,
|
|
130
|
+
result: "[ERROR] No LLM query handler registered",
|
|
131
|
+
});
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
try {
|
|
135
|
+
const result = await this.llmQueryHandler(msg.sub_context, msg.instruction);
|
|
136
|
+
this.send({ type: "llm_result", id: msg.id, result });
|
|
137
|
+
}
|
|
138
|
+
catch (err) {
|
|
139
|
+
const errorText = err instanceof Error ? err.message : String(err);
|
|
140
|
+
this.send({
|
|
141
|
+
type: "llm_result",
|
|
142
|
+
id: msg.id,
|
|
143
|
+
result: `[ERROR] LLM query failed: ${errorText}`,
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
waitForMessage(type) {
|
|
148
|
+
return new Promise((resolve, reject) => {
|
|
149
|
+
if (!this.isAlive) {
|
|
150
|
+
reject(new Error(`REPL subprocess is not running (waiting for "${type}")`));
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
const timeout = setTimeout(() => {
|
|
154
|
+
if (this.pending.has(type)) {
|
|
155
|
+
this.pending.delete(type);
|
|
156
|
+
reject(new Error(`Timeout waiting for "${type}" from Python REPL`));
|
|
157
|
+
}
|
|
158
|
+
}, 300_000);
|
|
159
|
+
this.pending.set(type, {
|
|
160
|
+
resolve: (msg) => {
|
|
161
|
+
clearTimeout(timeout);
|
|
162
|
+
resolve(msg);
|
|
163
|
+
},
|
|
164
|
+
reject: (err) => {
|
|
165
|
+
clearTimeout(timeout);
|
|
166
|
+
reject(err);
|
|
167
|
+
},
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
cleanup() {
|
|
172
|
+
this.rl?.close();
|
|
173
|
+
this.rl = null;
|
|
174
|
+
this.proc = null;
|
|
175
|
+
// Reject all pending promises so callers unblock immediately
|
|
176
|
+
const abortError = new Error("REPL shut down");
|
|
177
|
+
for (const [, entry] of this.pending) {
|
|
178
|
+
entry.reject(abortError);
|
|
179
|
+
}
|
|
180
|
+
this.pending.clear();
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
//# sourceMappingURL=repl.js.map
|
package/dist/rlm.d.ts
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RLM Loop — implements Algorithm 1 from "Recursive Language Models" (arXiv:2512.24601).
|
|
3
|
+
*
|
|
4
|
+
* The loop works as follows:
|
|
5
|
+
* 1. Inject the full context into a persistent Python REPL as a variable.
|
|
6
|
+
* 2. Send the LLM metadata about the context plus the user's query.
|
|
7
|
+
* The LLM writes Python code that can inspect/slice/query `context`,
|
|
8
|
+
* call `llm_query()` recursively, and call FINAL() when done.
|
|
9
|
+
* 3. Execute the code, capture stdout.
|
|
10
|
+
* 4. If FINAL is set, return it. Otherwise loop.
|
|
11
|
+
*/
|
|
12
|
+
import { type Api, type Model } from "@mariozechner/pi-ai";
|
|
13
|
+
import type { PythonRepl } from "./repl.js";
|
|
14
|
+
export interface RlmOptions {
|
|
15
|
+
context: string;
|
|
16
|
+
query: string;
|
|
17
|
+
model: Model<Api>;
|
|
18
|
+
repl: PythonRepl;
|
|
19
|
+
signal?: AbortSignal;
|
|
20
|
+
onProgress?: (info: RlmProgress) => void;
|
|
21
|
+
onSubQueryStart?: (info: SubQueryStartInfo) => void;
|
|
22
|
+
onSubQuery?: (info: SubQueryInfo) => void;
|
|
23
|
+
}
|
|
24
|
+
export interface RlmProgress {
|
|
25
|
+
iteration: number;
|
|
26
|
+
maxIterations: number;
|
|
27
|
+
subQueries: number;
|
|
28
|
+
phase: "generating_code" | "executing" | "checking_final";
|
|
29
|
+
code?: string;
|
|
30
|
+
stdout?: string;
|
|
31
|
+
stderr?: string;
|
|
32
|
+
userMessage?: string;
|
|
33
|
+
rawResponse?: string;
|
|
34
|
+
systemPrompt?: string;
|
|
35
|
+
}
|
|
36
|
+
export interface SubQueryStartInfo {
|
|
37
|
+
index: number;
|
|
38
|
+
contextLength: number;
|
|
39
|
+
instruction: string;
|
|
40
|
+
}
|
|
41
|
+
export interface SubQueryInfo {
|
|
42
|
+
index: number;
|
|
43
|
+
contextLength: number;
|
|
44
|
+
instruction: string;
|
|
45
|
+
resultLength: number;
|
|
46
|
+
resultPreview: string;
|
|
47
|
+
elapsedMs: number;
|
|
48
|
+
}
|
|
49
|
+
export interface RlmResult {
|
|
50
|
+
answer: string;
|
|
51
|
+
iterations: number;
|
|
52
|
+
totalSubQueries: number;
|
|
53
|
+
completed: boolean;
|
|
54
|
+
}
|
|
55
|
+
export declare function runRlmLoop(options: RlmOptions): Promise<RlmResult>;
|