npm - rlm-cli - Versions diffs - 0.2.0 - Mend

rlm-cli 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/dist/rlm.js ADDED Viewed

@@ -0,0 +1,354 @@
+/**
+ * RLM Loop — implements Algorithm 1 from "Recursive Language Models" (arXiv:2512.24601).
+ *
+ * The loop works as follows:
+ *   1. Inject the full context into a persistent Python REPL as a variable.
+ *   2. Send the LLM metadata about the context plus the user's query.
+ *      The LLM writes Python code that can inspect/slice/query `context`,
+ *      call `llm_query()` recursively, and call FINAL() when done.
+ *   3. Execute the code, capture stdout.
+ *   4. If FINAL is set, return it. Otherwise loop.
+ */
+import { completeSimple, } from "@mariozechner/pi-ai";
+import { loadConfig } from "./config.js";
+// ── Load config ─────────────────────────────────────────────────────────────
+const config = loadConfig();
+// ── System prompt (inspired by fast-rlm) ────────────────────────────────────
+function buildSystemPrompt() {
+    return `You are a Recursive Language Model (RLM) agent. You process large contexts by writing Python code that runs in a persistent REPL.
+## Available in the REPL
+1. A \`context\` variable containing the full input text (may be very large). You should check the content of the \`context\` variable to understand what you are working with.
+2. A \`llm_query(sub_context, instruction)\` function that sends a sub-piece of the context to an LLM with an instruction and returns the response. Use this for summarization, extraction, classification, etc. on chunks. For parallel queries, use \`async_llm_query()\` with \`asyncio.gather()\`.
+3. Two functions to return your answer:
+   - \`FINAL("your answer")\` — provide the answer as a string
+   - \`FINAL_VAR(variable)\` — return a variable you built up in the REPL
+## Rules
+1. Write valid Python 3 code. You have access to the standard library.
+2. Use \`print()\` to output metadata/intermediate results visible in the next iteration.
+3. Use \`len(context)\` and slicing to understand the context size before processing.
+4. For large contexts, split into chunks and use \`llm_query()\` on each chunk, then aggregate.
+5. Call \`FINAL("answer")\` or \`FINAL_VAR(var)\` only when you have a complete answer.
+6. Do NOT call FINAL prematurely — if you need more iterations, just print your intermediate state.
+7. Be efficient: minimize the number of \`llm_query()\` calls by using smart chunking.
+8. Print output will be truncated to last ${config.truncate_len} characters. Keep printed output concise.
+## How to control sub-agent behavior
+- When calling \`llm_query()\`, give clear instructions at the beginning of the context. If you only pass context without instructions, the sub-agent cannot do its task.
+- To extract data verbatim: instruct the sub-agent to use \`FINAL_VAR\` and slice important sections.
+- To summarize or analyze: instruct the sub-agent to explore and generate the answer.
+- Help sub-agents by describing the data format (dict, list, etc.) — clarity is important!
+## Important notes
+- This is a multi-turn environment. You do NOT need to answer in one shot.
+- Before returning via FINAL, it is advisable to print the answer first to inspect formatting.
+- The REPL persists state like a Jupyter notebook — past variables and code are maintained. Do NOT rewrite old code or accidentally delete the \`context\` variable.
+- You will only see truncated outputs, so use \`llm_query()\` for semantic analysis of large text.
+- You can use variables as buffers to build up your final answer across iterations.
+## Output format
+Respond with ONLY a Python code block. No explanation before or after.
+\`\`\`python
+# Your working python code
+print(f"Context length: {len(context)} chars")
+\`\`\`
+## Example strategies
+**Chunking for large contexts:**
+\`\`\`python
+chunk_size = len(context) // 5
+buffers = []
+for i in range(5):
+    start = i * chunk_size
+    end = (i + 1) * chunk_size if i < 4 else len(context)
+    chunk = context[start:end]
+    result = llm_query(chunk, f"Extract key information relevant to: {query}")
+    buffers.append(result)
+    print(f"Chunk {i+1}/5 done: {len(result)} chars")
+\`\`\`
+**Parallel queries with asyncio:**
+\`\`\`python
+import asyncio
+tasks = []
+for i, chunk in enumerate(chunks):
+    tasks.append(async_llm_query(chunk, f"Summarize chunk {i}"))
+results = await asyncio.gather(*tasks)
+\`\`\`
+**Building up a final answer:**
+\`\`\`python
+# After collecting all results in a buffer
+final_answer = llm_query("\\n".join(buffers), f"Synthesize these summaries to answer: {query}")
+FINAL(final_answer)
+\`\`\``;
+}
+// ── Abort helper ────────────────────────────────────────────────────────
+/** Race a promise against an AbortSignal so Ctrl+C cancels long API calls. */
+function raceAbort(promise, signal) {
+    if (!signal)
+        return promise;
+    if (signal.aborted)
+        return Promise.reject(new Error("Aborted"));
+    let onAbort;
+    const abortPromise = new Promise((_, reject) => {
+        onAbort = () => reject(new Error("Aborted"));
+        signal.addEventListener("abort", onAbort, { once: true });
+    });
+    return Promise.race([promise, abortPromise]).finally(() => {
+        if (onAbort)
+            signal.removeEventListener("abort", onAbort);
+    });
+}
+// ── Helpers ─────────────────────────────────────────────────────────────────
+function buildContextMetadata(context) {
+    const lines = context.split("\n");
+    const charCount = context.length;
+    const lineCount = lines.length;
+    const previewStart = lines.slice(0, config.metadata_preview_lines).join("\n");
+    const previewEnd = lines.slice(-config.metadata_preview_lines).join("\n");
+    return [
+        `Context statistics:`,
+        `  - ${charCount.toLocaleString()} characters`,
+        `  - ${lineCount.toLocaleString()} lines`,
+        ``,
+        `First ${config.metadata_preview_lines} lines:`,
+        previewStart,
+        ``,
+        `Last ${config.metadata_preview_lines} lines:`,
+        previewEnd,
+    ].join("\n");
+}
+function extractCodeFromResponse(response) {
+    for (const block of response.content) {
+        if (block.type !== "text")
+            continue;
+        const text = block.text;
+        // Try ```python or ```repl blocks
+        const fenceMatch = text.match(/```(?:python|repl)?\s*\n([\s\S]*?)```/);
+        if (fenceMatch)
+            return fenceMatch[1].trim();
+        // Fallback: if the response looks like raw Python code
+        const trimmed = text.trim();
+        if (trimmed &&
+            !trimmed.startsWith("#") &&
+            (trimmed.includes("=") ||
+                trimmed.includes("print") ||
+                trimmed.includes("import") ||
+                trimmed.includes("for ") ||
+                trimmed.includes("def "))) {
+            return trimmed;
+        }
+    }
+    return null;
+}
+function truncateOutput(text) {
+    if (text.length <= config.truncate_len) {
+        if (text.length === 0)
+            return "[EMPTY OUTPUT]";
+        return text;
+    }
+    return `[TRUNCATED: Last ${config.truncate_len} chars shown].. ${text.slice(-config.truncate_len)}`;
+}
+// ── Main loop ───────────────────────────────────────────────────────────────
+export async function runRlmLoop(options) {
+    const { context, query, model, repl, signal, onProgress, onSubQueryStart, onSubQuery } = options;
+    let totalSubQueries = 0;
+    let iterationSubQueries = 0;
+    const llmQueryHandler = async (subContext, instruction) => {
+        if (signal?.aborted)
+            throw new Error("Aborted");
+        if (totalSubQueries >= config.max_sub_queries) {
+            return `[ERROR] Maximum sub-query limit (${config.max_sub_queries}) reached. Call FINAL() with your best answer.`;
+        }
+        ++totalSubQueries;
+        const queryIndex = ++iterationSubQueries;
+        const sqStart = Date.now();
+        onSubQueryStart?.({
+            index: queryIndex,
+            contextLength: subContext.length,
+            instruction,
+        });
+        const response = await raceAbort(completeSimple(model, {
+            systemPrompt: `You are a helpful assistant. Answer the user's question based on the provided context. Respond in natural language (not code). Be concise but thorough.`,
+            messages: [
+                {
+                    role: "user",
+                    content: `Context:\n${subContext}\n\nInstruction: ${instruction}`,
+                    timestamp: Date.now(),
+                },
+            ],
+        }), signal);
+        const textParts = response.content.filter((b) => b.type === "text").map((b) => b.text);
+        const result = textParts.join("\n");
+        onSubQuery?.({
+            index: queryIndex,
+            contextLength: subContext.length,
+            instruction,
+            resultLength: result.length,
+            resultPreview: result,
+            elapsedMs: Date.now() - sqStart,
+        });
+        return result;
+    };
+    /** Set up (or re-set up) the REPL with context and handler. */
+    async function initRepl() {
+        await repl.setContext(context);
+        await repl.resetFinal();
+        repl.setLlmQueryHandler(llmQueryHandler);
+    }
+    await initRepl();
+    const metadata = buildContextMetadata(context);
+    const conversationHistory = [
+        {
+            role: "user",
+            content: `${metadata}\n\nQuery: ${query}`,
+            timestamp: Date.now(),
+        },
+    ];
+    for (let iteration = 1; iteration <= config.max_iterations; iteration++) {
+        iterationSubQueries = 0;
+        if (signal?.aborted) {
+            return { answer: "[Aborted]", iterations: iteration, totalSubQueries, completed: false };
+        }
+        const lastUserMsg = conversationHistory
+            .filter((m) => m.role === "user")
+            .at(-1);
+        const userMsgText = typeof lastUserMsg?.content === "string"
+            ? lastUserMsg.content
+            : "";
+        onProgress?.({
+            iteration,
+            maxIterations: config.max_iterations,
+            subQueries: totalSubQueries,
+            phase: "generating_code",
+            userMessage: userMsgText,
+            systemPrompt: iteration === 1 ? buildSystemPrompt() : undefined,
+        });
+        const response = await raceAbort(completeSimple(model, {
+            systemPrompt: buildSystemPrompt(),
+            messages: conversationHistory,
+        }), signal);
+        if (signal?.aborted) {
+            return { answer: "[Aborted]", iterations: iteration, totalSubQueries, completed: false };
+        }
+        // Surface API errors
+        if ("errorMessage" in response && response.errorMessage) {
+            const errMsg = response.errorMessage;
+            if (errMsg.includes("authentication") || errMsg.includes("401")) {
+                return {
+                    answer: `[API Authentication Error] ${errMsg}\n\nCheck your ANTHROPIC_API_KEY in .env.`,
+                    iterations: iteration,
+                    totalSubQueries,
+                    completed: false,
+                };
+            }
+            process.stderr.write(`[rlm] API error: ${errMsg}\n`);
+        }
+        const rawResponseText = response.content
+            .filter((b) => b.type === "text")
+            .map((b) => b.text)
+            .join("\n");
+        const code = extractCodeFromResponse(response);
+        if (!code) {
+            // No code block found — might be a direct answer or extraction failure
+            conversationHistory.push(response);
+            conversationHistory.push({
+                role: "user",
+                content: "Error: Could not extract code. Make sure to wrap your code in ```python ... ``` blocks.",
+                timestamp: Date.now(),
+            });
+            continue;
+        }
+        conversationHistory.push(response);
+        onProgress?.({
+            iteration,
+            maxIterations: config.max_iterations,
+            subQueries: totalSubQueries,
+            phase: "executing",
+            code,
+            rawResponse: rawResponseText,
+        });
+        let execResult;
+        try {
+            execResult = await repl.execute(code);
+        }
+        catch (err) {
+            if (signal?.aborted) {
+                return { answer: "[Aborted]", iterations: iteration, totalSubQueries, completed: false };
+            }
+            const errorMsg = err instanceof Error ? err.message : String(err);
+            // If the REPL timed out or crashed, restart it so next iteration works
+            if (errorMsg.includes("Timeout") || errorMsg.includes("not running") || errorMsg.includes("shut down")) {
+                try {
+                    repl.shutdown();
+                    await repl.start(signal);
+                    await initRepl();
+                }
+                catch {
+                    return { answer: "[REPL crashed and could not restart]", iterations: iteration, totalSubQueries, completed: false };
+                }
+            }
+            conversationHistory.push({
+                role: "user",
+                content: `Execution error: ${errorMsg}\n\nPlease fix the code and try again.`,
+                timestamp: Date.now(),
+            });
+            continue;
+        }
+        if (signal?.aborted) {
+            return { answer: "[Aborted]", iterations: iteration, totalSubQueries, completed: false };
+        }
+        onProgress?.({
+            iteration,
+            maxIterations: config.max_iterations,
+            subQueries: totalSubQueries,
+            phase: "checking_final",
+            stdout: execResult.stdout,
+            stderr: execResult.stderr,
+        });
+        if (execResult.hasFinal && execResult.finalValue !== null) {
+            return {
+                answer: execResult.finalValue,
+                iterations: iteration,
+                totalSubQueries,
+                completed: true,
+            };
+        }
+        // Build next user message with truncated output
+        const parts = [];
+        if (execResult.stdout) {
+            parts.push(`Output:\n${truncateOutput(execResult.stdout)}`);
+        }
+        if (execResult.stderr) {
+            parts.push(`Stderr:\n${execResult.stderr.slice(0, 5000)}`);
+        }
+        if (parts.length === 0) {
+            parts.push("(No output produced. The code ran without printing anything.)");
+        }
+        parts.push(`\nIteration ${iteration}/${config.max_iterations}. Sub-queries used: ${totalSubQueries}/${config.max_sub_queries}.`);
+        parts.push("Continue processing or call FINAL() when you have the answer.");
+        conversationHistory.push({
+            role: "user",
+            content: parts.join("\n\n"),
+            timestamp: Date.now(),
+        });
+    }
+    return {
+        answer: "[Maximum iterations reached without calling FINAL]",
+        iterations: config.max_iterations,
+        totalSubQueries,
+        completed: false,
+    };
+}
+//# sourceMappingURL=rlm.js.map

package/dist/runtime.py ADDED Viewed

@@ -0,0 +1,185 @@
+"""
+RLM Runtime — Python-side helpers for the Recursive Language Model CLI.
+This module runs in a persistent Python subprocess. It provides:
+  - `context`: the full prompt/document as a string variable
+  - `llm_query(sub_context, instruction)`: bridge to parent LLM for sub-queries
+  - `FINAL(x)`: set final answer string and terminate loop
+  - `FINAL_VAR(x)`: set final answer from a variable
+Communication protocol (line-delimited JSON over stdio):
+  -> stdout: {"type":"llm_query","sub_context":"...","instruction":"...","id":"..."}
+  <- stdin:  {"type":"llm_result","id":"...","result":"..."}
+  -> stdout: {"type":"exec_done","stdout":"...","stderr":"...","has_final":bool,"final_value":"..."|null}
+All protocol I/O uses saved references to the original sys.stdout/sys.stdin
+so that exec'd code can freely redirect sys.stdout for print() capture.
+"""
+import json
+import sys
+import uuid
+import io
+import traceback
+import asyncio
+# Real stdio handles — saved before exec() can redirect sys.stdout/sys.stderr.
+_real_stdout = sys.stdout
+_real_stdin = sys.stdin
+# Will be set by the TypeScript host before each execution
+context: str = ""
+# Sentinel — when set to a non-None value, the loop terminates
+__final_result__ = None
+def FINAL(x):
+    """Set the final answer as a string and terminate the RLM loop."""
+    global __final_result__
+    __final_result__ = str(x)
+def FINAL_VAR(x):
+    """Set the final answer from a variable and terminate the RLM loop."""
+    global __final_result__
+    __final_result__ = str(x) if x is not None else None
+def llm_query(sub_context: str, instruction: str = "") -> str:
+    """Send a sub-context and instruction to the parent LLM and return the response.
+    Can be called synchronously from regular code, or used with await in async code.
+    For parallel queries, use:
+        results = await asyncio.gather(
+            async_llm_query(ctx1, instr1),
+            async_llm_query(ctx2, instr2),
+        )
+    """
+    # If called with just one arg, treat the whole thing as context+instruction combined
+    if not instruction:
+        instruction = ""
+    request_id = uuid.uuid4().hex[:12]
+    request = {
+        "type": "llm_query",
+        "sub_context": sub_context,
+        "instruction": instruction,
+        "id": request_id,
+    }
+    _real_stdout.write(json.dumps(request) + "\n")
+    _real_stdout.flush()
+    # Block until the TypeScript host sends back the result
+    while True:
+        line = _real_stdin.readline()
+        if not line:
+            raise RuntimeError("REPL stdin closed unexpectedly")
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            response = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if response.get("type") == "llm_result" and response.get("id") == request_id:
+            return response.get("result", "")
+async def async_llm_query(sub_context: str, instruction: str = "") -> str:
+    """Async wrapper around llm_query for use with asyncio.gather().
+    Usage:
+        import asyncio
+        results = await asyncio.gather(
+            async_llm_query(chunk1, "summarize"),
+            async_llm_query(chunk2, "summarize"),
+        )
+    """
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(None, llm_query, sub_context, instruction)
+def _execute_code(code: str) -> None:
+    """Execute a code snippet in the module's global scope, capturing output."""
+    global __final_result__
+    captured_stdout = io.StringIO()
+    captured_stderr = io.StringIO()
+    old_stdout = sys.stdout
+    old_stderr = sys.stderr
+    try:
+        sys.stdout = captured_stdout
+        sys.stderr = captured_stderr
+        # Support both sync and async code (await expressions)
+        try:
+            # Try to compile as regular code first
+            compiled = compile(code, "<repl>", "exec")
+            exec(compiled, globals())
+        except SyntaxError as e:
+            if "await" in str(code):
+                # Code contains await — run it in an async context
+                async_code = f"async def __async_exec__():\n"
+                for line in code.split("\n"):
+                    async_code += f"    {line}\n"
+                async_code += "\nimport asyncio\nasyncio.get_event_loop().run_until_complete(__async_exec__())"
+                exec(compile(async_code, "<repl>", "exec"), globals())
+            else:
+                raise e
+    except Exception:
+        traceback.print_exc(file=captured_stderr)
+    finally:
+        sys.stdout = old_stdout
+        sys.stderr = old_stderr
+    stdout_val = captured_stdout.getvalue()
+    stderr_val = captured_stderr.getvalue()
+    result = {
+        "type": "exec_done",
+        "stdout": stdout_val,
+        "stderr": stderr_val,
+        "has_final": __final_result__ is not None,
+        "final_value": str(__final_result__) if __final_result__ is not None else None,
+    }
+    _real_stdout.write(json.dumps(result) + "\n")
+    _real_stdout.flush()
+def _main_loop() -> None:
+    """Read execution requests from stdin in a loop."""
+    while True:
+        line = _real_stdin.readline()
+        if not line:
+            break
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            msg = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if msg.get("type") == "exec":
+            _execute_code(msg.get("code", ""))
+        elif msg.get("type") == "set_context":
+            global context
+            context = msg.get("value", "")
+            ack = {"type": "context_set"}
+            _real_stdout.write(json.dumps(ack) + "\n")
+            _real_stdout.flush()
+        elif msg.get("type") == "reset_final":
+            global __final_result__
+            __final_result__ = None
+            ack = {"type": "final_reset"}
+            _real_stdout.write(json.dumps(ack) + "\n")
+            _real_stdout.flush()
+        elif msg.get("type") == "shutdown":
+            break
+if __name__ == "__main__":
+    ready = {"type": "ready"}
+    _real_stdout.write(json.dumps(ready) + "\n")
+    _real_stdout.flush()
+    _main_loop()

package/dist/viewer.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+#!/usr/bin/env tsx
+/**
+ * RLM Trajectory Viewer — interactive TUI for browsing saved trajectory JSON files.
+ *
+ * Navigate through iterations with arrow keys, view code, REPL output,
+ * sub-queries, and the final answer in a beautifully formatted display.
+ *
+ * Usage:
+ *   rlm viewer                              # pick from list
+ *   rlm viewer trajectories/file.json       # open specific file
+ */
+export {};