rlm-cli 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/rlm.js ADDED
@@ -0,0 +1,354 @@
1
+ /**
2
+ * RLM Loop — implements Algorithm 1 from "Recursive Language Models" (arXiv:2512.24601).
3
+ *
4
+ * The loop works as follows:
5
+ * 1. Inject the full context into a persistent Python REPL as a variable.
6
+ * 2. Send the LLM metadata about the context plus the user's query.
7
+ * The LLM writes Python code that can inspect/slice/query `context`,
8
+ * call `llm_query()` recursively, and call FINAL() when done.
9
+ * 3. Execute the code, capture stdout.
10
+ * 4. If FINAL is set, return it. Otherwise loop.
11
+ */
12
+ import { completeSimple, } from "@mariozechner/pi-ai";
13
+ import { loadConfig } from "./config.js";
14
+ // ── Load config ─────────────────────────────────────────────────────────────
15
+ const config = loadConfig();
16
+ // ── System prompt (inspired by fast-rlm) ────────────────────────────────────
17
+ function buildSystemPrompt() {
18
+ return `You are a Recursive Language Model (RLM) agent. You process large contexts by writing Python code that runs in a persistent REPL.
19
+
20
+ ## Available in the REPL
21
+
22
+ 1. A \`context\` variable containing the full input text (may be very large). You should check the content of the \`context\` variable to understand what you are working with.
23
+
24
+ 2. A \`llm_query(sub_context, instruction)\` function that sends a sub-piece of the context to an LLM with an instruction and returns the response. Use this for summarization, extraction, classification, etc. on chunks. For parallel queries, use \`async_llm_query()\` with \`asyncio.gather()\`.
25
+
26
+ 3. Two functions to return your answer:
27
+ - \`FINAL("your answer")\` — provide the answer as a string
28
+ - \`FINAL_VAR(variable)\` — return a variable you built up in the REPL
29
+
30
+ ## Rules
31
+
32
+ 1. Write valid Python 3 code. You have access to the standard library.
33
+ 2. Use \`print()\` to output metadata/intermediate results visible in the next iteration.
34
+ 3. Use \`len(context)\` and slicing to understand the context size before processing.
35
+ 4. For large contexts, split into chunks and use \`llm_query()\` on each chunk, then aggregate.
36
+ 5. Call \`FINAL("answer")\` or \`FINAL_VAR(var)\` only when you have a complete answer.
37
+ 6. Do NOT call FINAL prematurely — if you need more iterations, just print your intermediate state.
38
+ 7. Be efficient: minimize the number of \`llm_query()\` calls by using smart chunking.
39
+ 8. Print output will be truncated to last ${config.truncate_len} characters. Keep printed output concise.
40
+
41
+ ## How to control sub-agent behavior
42
+
43
+ - When calling \`llm_query()\`, give clear instructions at the beginning of the context. If you only pass context without instructions, the sub-agent cannot do its task.
44
+ - To extract data verbatim: instruct the sub-agent to use \`FINAL_VAR\` and slice important sections.
45
+ - To summarize or analyze: instruct the sub-agent to explore and generate the answer.
46
+ - Help sub-agents by describing the data format (dict, list, etc.) — clarity is important!
47
+
48
+ ## Important notes
49
+
50
+ - This is a multi-turn environment. You do NOT need to answer in one shot.
51
+ - Before returning via FINAL, it is advisable to print the answer first to inspect formatting.
52
+ - The REPL persists state like a Jupyter notebook — past variables and code are maintained. Do NOT rewrite old code or accidentally delete the \`context\` variable.
53
+ - You will only see truncated outputs, so use \`llm_query()\` for semantic analysis of large text.
54
+ - You can use variables as buffers to build up your final answer across iterations.
55
+
56
+ ## Output format
57
+
58
+ Respond with ONLY a Python code block. No explanation before or after.
59
+
60
+ \`\`\`python
61
+ # Your working python code
62
+ print(f"Context length: {len(context)} chars")
63
+ \`\`\`
64
+
65
+ ## Example strategies
66
+
67
+ **Chunking for large contexts:**
68
+ \`\`\`python
69
+ chunk_size = len(context) // 5
70
+ buffers = []
71
+ for i in range(5):
72
+ start = i * chunk_size
73
+ end = (i + 1) * chunk_size if i < 4 else len(context)
74
+ chunk = context[start:end]
75
+ result = llm_query(chunk, f"Extract key information relevant to: {query}")
76
+ buffers.append(result)
77
+ print(f"Chunk {i+1}/5 done: {len(result)} chars")
78
+ \`\`\`
79
+
80
+ **Parallel queries with asyncio:**
81
+ \`\`\`python
82
+ import asyncio
83
+ tasks = []
84
+ for i, chunk in enumerate(chunks):
85
+ tasks.append(async_llm_query(chunk, f"Summarize chunk {i}"))
86
+ results = await asyncio.gather(*tasks)
87
+ \`\`\`
88
+
89
+ **Building up a final answer:**
90
+ \`\`\`python
91
+ # After collecting all results in a buffer
92
+ final_answer = llm_query("\\n".join(buffers), f"Synthesize these summaries to answer: {query}")
93
+ FINAL(final_answer)
94
+ \`\`\``;
95
+ }
96
+ // ── Abort helper ────────────────────────────────────────────────────────
97
+ /** Race a promise against an AbortSignal so Ctrl+C cancels long API calls. */
98
+ function raceAbort(promise, signal) {
99
+ if (!signal)
100
+ return promise;
101
+ if (signal.aborted)
102
+ return Promise.reject(new Error("Aborted"));
103
+ let onAbort;
104
+ const abortPromise = new Promise((_, reject) => {
105
+ onAbort = () => reject(new Error("Aborted"));
106
+ signal.addEventListener("abort", onAbort, { once: true });
107
+ });
108
+ return Promise.race([promise, abortPromise]).finally(() => {
109
+ if (onAbort)
110
+ signal.removeEventListener("abort", onAbort);
111
+ });
112
+ }
113
+ // ── Helpers ─────────────────────────────────────────────────────────────────
114
+ function buildContextMetadata(context) {
115
+ const lines = context.split("\n");
116
+ const charCount = context.length;
117
+ const lineCount = lines.length;
118
+ const previewStart = lines.slice(0, config.metadata_preview_lines).join("\n");
119
+ const previewEnd = lines.slice(-config.metadata_preview_lines).join("\n");
120
+ return [
121
+ `Context statistics:`,
122
+ ` - ${charCount.toLocaleString()} characters`,
123
+ ` - ${lineCount.toLocaleString()} lines`,
124
+ ``,
125
+ `First ${config.metadata_preview_lines} lines:`,
126
+ previewStart,
127
+ ``,
128
+ `Last ${config.metadata_preview_lines} lines:`,
129
+ previewEnd,
130
+ ].join("\n");
131
+ }
132
+ function extractCodeFromResponse(response) {
133
+ for (const block of response.content) {
134
+ if (block.type !== "text")
135
+ continue;
136
+ const text = block.text;
137
+ // Try ```python or ```repl blocks
138
+ const fenceMatch = text.match(/```(?:python|repl)?\s*\n([\s\S]*?)```/);
139
+ if (fenceMatch)
140
+ return fenceMatch[1].trim();
141
+ // Fallback: if the response looks like raw Python code
142
+ const trimmed = text.trim();
143
+ if (trimmed &&
144
+ !trimmed.startsWith("#") &&
145
+ (trimmed.includes("=") ||
146
+ trimmed.includes("print") ||
147
+ trimmed.includes("import") ||
148
+ trimmed.includes("for ") ||
149
+ trimmed.includes("def "))) {
150
+ return trimmed;
151
+ }
152
+ }
153
+ return null;
154
+ }
155
+ function truncateOutput(text) {
156
+ if (text.length <= config.truncate_len) {
157
+ if (text.length === 0)
158
+ return "[EMPTY OUTPUT]";
159
+ return text;
160
+ }
161
+ return `[TRUNCATED: Last ${config.truncate_len} chars shown].. ${text.slice(-config.truncate_len)}`;
162
+ }
163
+ // ── Main loop ───────────────────────────────────────────────────────────────
164
+ export async function runRlmLoop(options) {
165
+ const { context, query, model, repl, signal, onProgress, onSubQueryStart, onSubQuery } = options;
166
+ let totalSubQueries = 0;
167
+ let iterationSubQueries = 0;
168
+ const llmQueryHandler = async (subContext, instruction) => {
169
+ if (signal?.aborted)
170
+ throw new Error("Aborted");
171
+ if (totalSubQueries >= config.max_sub_queries) {
172
+ return `[ERROR] Maximum sub-query limit (${config.max_sub_queries}) reached. Call FINAL() with your best answer.`;
173
+ }
174
+ ++totalSubQueries;
175
+ const queryIndex = ++iterationSubQueries;
176
+ const sqStart = Date.now();
177
+ onSubQueryStart?.({
178
+ index: queryIndex,
179
+ contextLength: subContext.length,
180
+ instruction,
181
+ });
182
+ const response = await raceAbort(completeSimple(model, {
183
+ systemPrompt: `You are a helpful assistant. Answer the user's question based on the provided context. Respond in natural language (not code). Be concise but thorough.`,
184
+ messages: [
185
+ {
186
+ role: "user",
187
+ content: `Context:\n${subContext}\n\nInstruction: ${instruction}`,
188
+ timestamp: Date.now(),
189
+ },
190
+ ],
191
+ }), signal);
192
+ const textParts = response.content.filter((b) => b.type === "text").map((b) => b.text);
193
+ const result = textParts.join("\n");
194
+ onSubQuery?.({
195
+ index: queryIndex,
196
+ contextLength: subContext.length,
197
+ instruction,
198
+ resultLength: result.length,
199
+ resultPreview: result,
200
+ elapsedMs: Date.now() - sqStart,
201
+ });
202
+ return result;
203
+ };
204
+ /** Set up (or re-set up) the REPL with context and handler. */
205
+ async function initRepl() {
206
+ await repl.setContext(context);
207
+ await repl.resetFinal();
208
+ repl.setLlmQueryHandler(llmQueryHandler);
209
+ }
210
+ await initRepl();
211
+ const metadata = buildContextMetadata(context);
212
+ const conversationHistory = [
213
+ {
214
+ role: "user",
215
+ content: `${metadata}\n\nQuery: ${query}`,
216
+ timestamp: Date.now(),
217
+ },
218
+ ];
219
+ for (let iteration = 1; iteration <= config.max_iterations; iteration++) {
220
+ iterationSubQueries = 0;
221
+ if (signal?.aborted) {
222
+ return { answer: "[Aborted]", iterations: iteration, totalSubQueries, completed: false };
223
+ }
224
+ const lastUserMsg = conversationHistory
225
+ .filter((m) => m.role === "user")
226
+ .at(-1);
227
+ const userMsgText = typeof lastUserMsg?.content === "string"
228
+ ? lastUserMsg.content
229
+ : "";
230
+ onProgress?.({
231
+ iteration,
232
+ maxIterations: config.max_iterations,
233
+ subQueries: totalSubQueries,
234
+ phase: "generating_code",
235
+ userMessage: userMsgText,
236
+ systemPrompt: iteration === 1 ? buildSystemPrompt() : undefined,
237
+ });
238
+ const response = await raceAbort(completeSimple(model, {
239
+ systemPrompt: buildSystemPrompt(),
240
+ messages: conversationHistory,
241
+ }), signal);
242
+ if (signal?.aborted) {
243
+ return { answer: "[Aborted]", iterations: iteration, totalSubQueries, completed: false };
244
+ }
245
+ // Surface API errors
246
+ if ("errorMessage" in response && response.errorMessage) {
247
+ const errMsg = response.errorMessage;
248
+ if (errMsg.includes("authentication") || errMsg.includes("401")) {
249
+ return {
250
+ answer: `[API Authentication Error] ${errMsg}\n\nCheck your ANTHROPIC_API_KEY in .env.`,
251
+ iterations: iteration,
252
+ totalSubQueries,
253
+ completed: false,
254
+ };
255
+ }
256
+ process.stderr.write(`[rlm] API error: ${errMsg}\n`);
257
+ }
258
+ const rawResponseText = response.content
259
+ .filter((b) => b.type === "text")
260
+ .map((b) => b.text)
261
+ .join("\n");
262
+ const code = extractCodeFromResponse(response);
263
+ if (!code) {
264
+ // No code block found — might be a direct answer or extraction failure
265
+ conversationHistory.push(response);
266
+ conversationHistory.push({
267
+ role: "user",
268
+ content: "Error: Could not extract code. Make sure to wrap your code in ```python ... ``` blocks.",
269
+ timestamp: Date.now(),
270
+ });
271
+ continue;
272
+ }
273
+ conversationHistory.push(response);
274
+ onProgress?.({
275
+ iteration,
276
+ maxIterations: config.max_iterations,
277
+ subQueries: totalSubQueries,
278
+ phase: "executing",
279
+ code,
280
+ rawResponse: rawResponseText,
281
+ });
282
+ let execResult;
283
+ try {
284
+ execResult = await repl.execute(code);
285
+ }
286
+ catch (err) {
287
+ if (signal?.aborted) {
288
+ return { answer: "[Aborted]", iterations: iteration, totalSubQueries, completed: false };
289
+ }
290
+ const errorMsg = err instanceof Error ? err.message : String(err);
291
+ // If the REPL timed out or crashed, restart it so next iteration works
292
+ if (errorMsg.includes("Timeout") || errorMsg.includes("not running") || errorMsg.includes("shut down")) {
293
+ try {
294
+ repl.shutdown();
295
+ await repl.start(signal);
296
+ await initRepl();
297
+ }
298
+ catch {
299
+ return { answer: "[REPL crashed and could not restart]", iterations: iteration, totalSubQueries, completed: false };
300
+ }
301
+ }
302
+ conversationHistory.push({
303
+ role: "user",
304
+ content: `Execution error: ${errorMsg}\n\nPlease fix the code and try again.`,
305
+ timestamp: Date.now(),
306
+ });
307
+ continue;
308
+ }
309
+ if (signal?.aborted) {
310
+ return { answer: "[Aborted]", iterations: iteration, totalSubQueries, completed: false };
311
+ }
312
+ onProgress?.({
313
+ iteration,
314
+ maxIterations: config.max_iterations,
315
+ subQueries: totalSubQueries,
316
+ phase: "checking_final",
317
+ stdout: execResult.stdout,
318
+ stderr: execResult.stderr,
319
+ });
320
+ if (execResult.hasFinal && execResult.finalValue !== null) {
321
+ return {
322
+ answer: execResult.finalValue,
323
+ iterations: iteration,
324
+ totalSubQueries,
325
+ completed: true,
326
+ };
327
+ }
328
+ // Build next user message with truncated output
329
+ const parts = [];
330
+ if (execResult.stdout) {
331
+ parts.push(`Output:\n${truncateOutput(execResult.stdout)}`);
332
+ }
333
+ if (execResult.stderr) {
334
+ parts.push(`Stderr:\n${execResult.stderr.slice(0, 5000)}`);
335
+ }
336
+ if (parts.length === 0) {
337
+ parts.push("(No output produced. The code ran without printing anything.)");
338
+ }
339
+ parts.push(`\nIteration ${iteration}/${config.max_iterations}. Sub-queries used: ${totalSubQueries}/${config.max_sub_queries}.`);
340
+ parts.push("Continue processing or call FINAL() when you have the answer.");
341
+ conversationHistory.push({
342
+ role: "user",
343
+ content: parts.join("\n\n"),
344
+ timestamp: Date.now(),
345
+ });
346
+ }
347
+ return {
348
+ answer: "[Maximum iterations reached without calling FINAL]",
349
+ iterations: config.max_iterations,
350
+ totalSubQueries,
351
+ completed: false,
352
+ };
353
+ }
354
+ //# sourceMappingURL=rlm.js.map
@@ -0,0 +1,185 @@
1
+ """
2
+ RLM Runtime — Python-side helpers for the Recursive Language Model CLI.
3
+
4
+ This module runs in a persistent Python subprocess. It provides:
5
+ - `context`: the full prompt/document as a string variable
6
+ - `llm_query(sub_context, instruction)`: bridge to parent LLM for sub-queries
7
+ - `FINAL(x)`: set final answer string and terminate loop
8
+ - `FINAL_VAR(x)`: set final answer from a variable
9
+
10
+ Communication protocol (line-delimited JSON over stdio):
11
+ -> stdout: {"type":"llm_query","sub_context":"...","instruction":"...","id":"..."}
12
+ <- stdin: {"type":"llm_result","id":"...","result":"..."}
13
+ -> stdout: {"type":"exec_done","stdout":"...","stderr":"...","has_final":bool,"final_value":"..."|null}
14
+
15
+ All protocol I/O uses saved references to the original sys.stdout/sys.stdin
16
+ so that exec'd code can freely redirect sys.stdout for print() capture.
17
+ """
18
+
19
+ import json
20
+ import sys
21
+ import uuid
22
+ import io
23
+ import traceback
24
+ import asyncio
25
+
26
+ # Real stdio handles — saved before exec() can redirect sys.stdout/sys.stderr.
27
+ _real_stdout = sys.stdout
28
+ _real_stdin = sys.stdin
29
+
30
+ # Will be set by the TypeScript host before each execution
31
+ context: str = ""
32
+
33
+ # Sentinel — when set to a non-None value, the loop terminates
34
+ __final_result__ = None
35
+
36
+
37
+ def FINAL(x):
38
+ """Set the final answer as a string and terminate the RLM loop."""
39
+ global __final_result__
40
+ __final_result__ = str(x)
41
+
42
+
43
+ def FINAL_VAR(x):
44
+ """Set the final answer from a variable and terminate the RLM loop."""
45
+ global __final_result__
46
+ __final_result__ = str(x) if x is not None else None
47
+
48
+
49
+ def llm_query(sub_context: str, instruction: str = "") -> str:
50
+ """Send a sub-context and instruction to the parent LLM and return the response.
51
+
52
+ Can be called synchronously from regular code, or used with await in async code.
53
+ For parallel queries, use:
54
+ results = await asyncio.gather(
55
+ async_llm_query(ctx1, instr1),
56
+ async_llm_query(ctx2, instr2),
57
+ )
58
+ """
59
+ # If called with just one arg, treat the whole thing as context+instruction combined
60
+ if not instruction:
61
+ instruction = ""
62
+
63
+ request_id = uuid.uuid4().hex[:12]
64
+ request = {
65
+ "type": "llm_query",
66
+ "sub_context": sub_context,
67
+ "instruction": instruction,
68
+ "id": request_id,
69
+ }
70
+ _real_stdout.write(json.dumps(request) + "\n")
71
+ _real_stdout.flush()
72
+
73
+ # Block until the TypeScript host sends back the result
74
+ while True:
75
+ line = _real_stdin.readline()
76
+ if not line:
77
+ raise RuntimeError("REPL stdin closed unexpectedly")
78
+ line = line.strip()
79
+ if not line:
80
+ continue
81
+ try:
82
+ response = json.loads(line)
83
+ except json.JSONDecodeError:
84
+ continue
85
+ if response.get("type") == "llm_result" and response.get("id") == request_id:
86
+ return response.get("result", "")
87
+
88
+
89
+ async def async_llm_query(sub_context: str, instruction: str = "") -> str:
90
+ """Async wrapper around llm_query for use with asyncio.gather().
91
+
92
+ Usage:
93
+ import asyncio
94
+ results = await asyncio.gather(
95
+ async_llm_query(chunk1, "summarize"),
96
+ async_llm_query(chunk2, "summarize"),
97
+ )
98
+ """
99
+ loop = asyncio.get_event_loop()
100
+ return await loop.run_in_executor(None, llm_query, sub_context, instruction)
101
+
102
+
103
+ def _execute_code(code: str) -> None:
104
+ """Execute a code snippet in the module's global scope, capturing output."""
105
+ global __final_result__
106
+ captured_stdout = io.StringIO()
107
+ captured_stderr = io.StringIO()
108
+ old_stdout = sys.stdout
109
+ old_stderr = sys.stderr
110
+
111
+ try:
112
+ sys.stdout = captured_stdout
113
+ sys.stderr = captured_stderr
114
+ # Support both sync and async code (await expressions)
115
+ try:
116
+ # Try to compile as regular code first
117
+ compiled = compile(code, "<repl>", "exec")
118
+ exec(compiled, globals())
119
+ except SyntaxError as e:
120
+ if "await" in str(code):
121
+ # Code contains await — run it in an async context
122
+ async_code = f"async def __async_exec__():\n"
123
+ for line in code.split("\n"):
124
+ async_code += f" {line}\n"
125
+ async_code += "\nimport asyncio\nasyncio.get_event_loop().run_until_complete(__async_exec__())"
126
+ exec(compile(async_code, "<repl>", "exec"), globals())
127
+ else:
128
+ raise e
129
+ except Exception:
130
+ traceback.print_exc(file=captured_stderr)
131
+ finally:
132
+ sys.stdout = old_stdout
133
+ sys.stderr = old_stderr
134
+
135
+ stdout_val = captured_stdout.getvalue()
136
+ stderr_val = captured_stderr.getvalue()
137
+
138
+ result = {
139
+ "type": "exec_done",
140
+ "stdout": stdout_val,
141
+ "stderr": stderr_val,
142
+ "has_final": __final_result__ is not None,
143
+ "final_value": str(__final_result__) if __final_result__ is not None else None,
144
+ }
145
+ _real_stdout.write(json.dumps(result) + "\n")
146
+ _real_stdout.flush()
147
+
148
+
149
+ def _main_loop() -> None:
150
+ """Read execution requests from stdin in a loop."""
151
+ while True:
152
+ line = _real_stdin.readline()
153
+ if not line:
154
+ break
155
+ line = line.strip()
156
+ if not line:
157
+ continue
158
+ try:
159
+ msg = json.loads(line)
160
+ except json.JSONDecodeError:
161
+ continue
162
+
163
+ if msg.get("type") == "exec":
164
+ _execute_code(msg.get("code", ""))
165
+ elif msg.get("type") == "set_context":
166
+ global context
167
+ context = msg.get("value", "")
168
+ ack = {"type": "context_set"}
169
+ _real_stdout.write(json.dumps(ack) + "\n")
170
+ _real_stdout.flush()
171
+ elif msg.get("type") == "reset_final":
172
+ global __final_result__
173
+ __final_result__ = None
174
+ ack = {"type": "final_reset"}
175
+ _real_stdout.write(json.dumps(ack) + "\n")
176
+ _real_stdout.flush()
177
+ elif msg.get("type") == "shutdown":
178
+ break
179
+
180
+
181
+ if __name__ == "__main__":
182
+ ready = {"type": "ready"}
183
+ _real_stdout.write(json.dumps(ready) + "\n")
184
+ _real_stdout.flush()
185
+ _main_loop()
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env tsx
2
+ /**
3
+ * RLM Trajectory Viewer — interactive TUI for browsing saved trajectory JSON files.
4
+ *
5
+ * Navigate through iterations with arrow keys, view code, REPL output,
6
+ * sub-queries, and the final answer in a beautifully formatted display.
7
+ *
8
+ * Usage:
9
+ * rlm viewer # pick from list
10
+ * rlm viewer trajectories/file.json # open specific file
11
+ */
12
+ export {};