matryoshka-rlm 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +259 -105
- package/config.example.json +0 -5
- package/dist/adapters/base.d.ts +43 -0
- package/dist/adapters/base.d.ts.map +1 -0
- package/dist/adapters/base.js +181 -0
- package/dist/adapters/base.js.map +1 -0
- package/dist/adapters/deepseek.d.ts +14 -0
- package/dist/adapters/deepseek.d.ts.map +1 -0
- package/dist/adapters/deepseek.js +139 -0
- package/dist/adapters/deepseek.js.map +1 -0
- package/dist/adapters/index.d.ts +39 -0
- package/dist/adapters/index.d.ts.map +1 -0
- package/dist/adapters/index.js +90 -0
- package/dist/adapters/index.js.map +1 -0
- package/dist/adapters/nucleus.d.ts +18 -0
- package/dist/adapters/nucleus.d.ts.map +1 -0
- package/dist/adapters/nucleus.js +323 -0
- package/dist/adapters/nucleus.js.map +1 -0
- package/dist/adapters/qwen-barliman.d.ts +16 -0
- package/dist/adapters/qwen-barliman.d.ts.map +1 -0
- package/dist/adapters/qwen-barliman.js +165 -0
- package/dist/adapters/qwen-barliman.js.map +1 -0
- package/dist/adapters/qwen-synthesis.d.ts +13 -0
- package/dist/adapters/qwen-synthesis.d.ts.map +1 -0
- package/dist/adapters/qwen-synthesis.js +329 -0
- package/dist/adapters/qwen-synthesis.js.map +1 -0
- package/dist/adapters/qwen.d.ts +14 -0
- package/dist/adapters/qwen.d.ts.map +1 -0
- package/dist/adapters/qwen.js +216 -0
- package/dist/adapters/qwen.js.map +1 -0
- package/dist/adapters/types.d.ts +78 -0
- package/dist/adapters/types.d.ts.map +1 -0
- package/dist/adapters/types.js +9 -0
- package/dist/adapters/types.js.map +1 -0
- package/dist/config.d.ts +6 -6
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +0 -6
- package/dist/config.js.map +1 -1
- package/dist/constraints/index.d.ts +10 -0
- package/dist/constraints/index.d.ts.map +1 -0
- package/dist/constraints/index.js +9 -0
- package/dist/constraints/index.js.map +1 -0
- package/dist/constraints/types.d.ts +78 -0
- package/dist/constraints/types.d.ts.map +1 -0
- package/dist/constraints/types.js +45 -0
- package/dist/constraints/types.js.map +1 -0
- package/dist/constraints/verifier.d.ts +24 -0
- package/dist/constraints/verifier.d.ts.map +1 -0
- package/dist/constraints/verifier.js +228 -0
- package/dist/constraints/verifier.js.map +1 -0
- package/dist/engine/index.d.ts +7 -0
- package/dist/engine/index.d.ts.map +1 -0
- package/dist/engine/index.js +7 -0
- package/dist/engine/index.js.map +1 -0
- package/dist/engine/nucleus-engine.d.ts +115 -0
- package/dist/engine/nucleus-engine.d.ts.map +1 -0
- package/dist/engine/nucleus-engine.js +342 -0
- package/dist/engine/nucleus-engine.js.map +1 -0
- package/dist/feedback/error-analyzer.d.ts +35 -0
- package/dist/feedback/error-analyzer.d.ts.map +1 -0
- package/dist/feedback/error-analyzer.js +346 -0
- package/dist/feedback/error-analyzer.js.map +1 -0
- package/dist/feedback/execution-feedback.d.ts +31 -0
- package/dist/feedback/execution-feedback.d.ts.map +1 -0
- package/dist/feedback/execution-feedback.js +169 -0
- package/dist/feedback/execution-feedback.js.map +1 -0
- package/dist/index.js +56 -1
- package/dist/index.js.map +1 -1
- package/dist/lattice-mcp-server.d.ts +20 -0
- package/dist/lattice-mcp-server.d.ts.map +1 -0
- package/dist/lattice-mcp-server.js +363 -0
- package/dist/lattice-mcp-server.js.map +1 -0
- package/dist/lib.d.ts +17 -0
- package/dist/lib.d.ts.map +1 -0
- package/dist/lib.js +24 -0
- package/dist/lib.js.map +1 -0
- package/dist/llm/deepseek.d.ts.map +1 -1
- package/dist/llm/deepseek.js +11 -6
- package/dist/llm/deepseek.js.map +1 -1
- package/dist/llm/index.d.ts +41 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +69 -4
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/ollama.d.ts.map +1 -1
- package/dist/llm/ollama.js +14 -9
- package/dist/llm/ollama.js.map +1 -1
- package/dist/llm/types.d.ts +5 -1
- package/dist/llm/types.d.ts.map +1 -1
- package/dist/logic/constraint-resolver.d.ts +34 -0
- package/dist/logic/constraint-resolver.d.ts.map +1 -0
- package/dist/logic/constraint-resolver.js +214 -0
- package/dist/logic/constraint-resolver.js.map +1 -0
- package/dist/logic/index.d.ts +9 -0
- package/dist/logic/index.d.ts.map +1 -0
- package/dist/logic/index.js +9 -0
- package/dist/logic/index.js.map +1 -0
- package/dist/logic/lc-compiler.d.ts +25 -0
- package/dist/logic/lc-compiler.d.ts.map +1 -0
- package/dist/logic/lc-compiler.js +174 -0
- package/dist/logic/lc-compiler.js.map +1 -0
- package/dist/logic/lc-interpreter.d.ts +63 -0
- package/dist/logic/lc-interpreter.d.ts.map +1 -0
- package/dist/logic/lc-interpreter.js +276 -0
- package/dist/logic/lc-interpreter.js.map +1 -0
- package/dist/logic/lc-parser.d.ts +26 -0
- package/dist/logic/lc-parser.d.ts.map +1 -0
- package/dist/logic/lc-parser.js +757 -0
- package/dist/logic/lc-parser.js.map +1 -0
- package/dist/logic/lc-solver.d.ts +60 -0
- package/dist/logic/lc-solver.d.ts.map +1 -0
- package/dist/logic/lc-solver.js +1005 -0
- package/dist/logic/lc-solver.js.map +1 -0
- package/dist/logic/relational-solver.d.ts +45 -0
- package/dist/logic/relational-solver.d.ts.map +1 -0
- package/dist/logic/relational-solver.js +606 -0
- package/dist/logic/relational-solver.js.map +1 -0
- package/dist/logic/synthesis-integrator.d.ts +133 -0
- package/dist/logic/synthesis-integrator.d.ts.map +1 -0
- package/dist/logic/synthesis-integrator.js +798 -0
- package/dist/logic/synthesis-integrator.js.map +1 -0
- package/dist/logic/type-inference.d.ts +36 -0
- package/dist/logic/type-inference.d.ts.map +1 -0
- package/dist/logic/type-inference.js +287 -0
- package/dist/logic/type-inference.js.map +1 -0
- package/dist/logic/types.d.ts +343 -0
- package/dist/logic/types.d.ts.map +1 -0
- package/dist/logic/types.js +8 -0
- package/dist/logic/types.js.map +1 -0
- package/dist/mcp-server.d.ts +4 -0
- package/dist/mcp-server.d.ts.map +1 -1
- package/dist/mcp-server.js +151 -27
- package/dist/mcp-server.js.map +1 -1
- package/dist/minikanren/common.d.ts +17 -0
- package/dist/minikanren/common.d.ts.map +1 -0
- package/dist/minikanren/common.js +59 -0
- package/dist/minikanren/common.js.map +1 -0
- package/dist/minikanren/goals.d.ts +10 -0
- package/dist/minikanren/goals.d.ts.map +1 -0
- package/dist/minikanren/goals.js +49 -0
- package/dist/minikanren/goals.js.map +1 -0
- package/dist/minikanren/index.d.ts +12 -0
- package/dist/minikanren/index.d.ts.map +1 -0
- package/dist/minikanren/index.js +16 -0
- package/dist/minikanren/index.js.map +1 -0
- package/dist/minikanren/ramo.d.ts +9 -0
- package/dist/minikanren/ramo.d.ts.map +1 -0
- package/dist/minikanren/ramo.js +22 -0
- package/dist/minikanren/ramo.js.map +1 -0
- package/dist/minikanren/reify.d.ts +3 -0
- package/dist/minikanren/reify.d.ts.map +1 -0
- package/dist/minikanren/reify.js +27 -0
- package/dist/minikanren/reify.js.map +1 -0
- package/dist/minikanren/streams.d.ts +14 -0
- package/dist/minikanren/streams.d.ts.map +1 -0
- package/dist/minikanren/streams.js +44 -0
- package/dist/minikanren/streams.js.map +1 -0
- package/dist/minikanren/sugar.d.ts +16 -0
- package/dist/minikanren/sugar.d.ts.map +1 -0
- package/dist/minikanren/sugar.js +76 -0
- package/dist/minikanren/sugar.js.map +1 -0
- package/dist/minikanren/unify.d.ts +3 -0
- package/dist/minikanren/unify.d.ts.map +1 -0
- package/dist/minikanren/unify.js +31 -0
- package/dist/minikanren/unify.js.map +1 -0
- package/dist/rag/index.d.ts +12 -0
- package/dist/rag/index.d.ts.map +1 -0
- package/dist/rag/index.js +13 -0
- package/dist/rag/index.js.map +1 -0
- package/dist/rag/knowledge-base.d.ts +39 -0
- package/dist/rag/knowledge-base.d.ts.map +1 -0
- package/dist/rag/knowledge-base.js +227 -0
- package/dist/rag/knowledge-base.js.map +1 -0
- package/dist/rag/manager.d.ts +109 -0
- package/dist/rag/manager.d.ts.map +1 -0
- package/dist/rag/manager.js +236 -0
- package/dist/rag/manager.js.map +1 -0
- package/dist/rag/similarity.d.ts +63 -0
- package/dist/rag/similarity.d.ts.map +1 -0
- package/dist/rag/similarity.js +153 -0
- package/dist/rag/similarity.js.map +1 -0
- package/dist/repl/index.d.ts +8 -0
- package/dist/repl/index.d.ts.map +1 -0
- package/dist/repl/index.js +8 -0
- package/dist/repl/index.js.map +1 -0
- package/dist/repl/lattice-repl.d.ts +31 -0
- package/dist/repl/lattice-repl.d.ts.map +1 -0
- package/dist/repl/lattice-repl.js +334 -0
- package/dist/repl/lattice-repl.js.map +1 -0
- package/dist/repl/nucleus-repl.d.ts +31 -0
- package/dist/repl/nucleus-repl.d.ts.map +1 -0
- package/dist/repl/nucleus-repl.js +334 -0
- package/dist/repl/nucleus-repl.js.map +1 -0
- package/dist/rlm.d.ts +16 -4
- package/dist/rlm.d.ts.map +1 -1
- package/dist/rlm.js +768 -119
- package/dist/rlm.js.map +1 -1
- package/dist/sandbox/code-validator.d.ts +24 -0
- package/dist/sandbox/code-validator.d.ts.map +1 -0
- package/dist/sandbox/code-validator.js +195 -0
- package/dist/sandbox/code-validator.js.map +1 -0
- package/dist/sandbox.d.ts +4 -1
- package/dist/sandbox.d.ts.map +1 -1
- package/dist/sandbox.js +182 -12
- package/dist/sandbox.js.map +1 -1
- package/dist/session.d.ts +49 -0
- package/dist/session.d.ts.map +1 -0
- package/dist/session.js +78 -0
- package/dist/session.js.map +1 -0
- package/dist/synthesis/coordinator.d.ts +129 -0
- package/dist/synthesis/coordinator.d.ts.map +1 -0
- package/dist/synthesis/coordinator.js +456 -0
- package/dist/synthesis/coordinator.js.map +1 -0
- package/dist/synthesis/evalo/compile.d.ts +31 -0
- package/dist/synthesis/evalo/compile.d.ts.map +1 -0
- package/dist/synthesis/evalo/compile.js +135 -0
- package/dist/synthesis/evalo/compile.js.map +1 -0
- package/dist/synthesis/evalo/evalo.d.ts +45 -0
- package/dist/synthesis/evalo/evalo.d.ts.map +1 -0
- package/dist/synthesis/evalo/evalo.js +298 -0
- package/dist/synthesis/evalo/evalo.js.map +1 -0
- package/dist/synthesis/evalo/index.d.ts +18 -0
- package/dist/synthesis/evalo/index.d.ts.map +1 -0
- package/dist/synthesis/evalo/index.js +20 -0
- package/dist/synthesis/evalo/index.js.map +1 -0
- package/dist/synthesis/evalo/typeo.d.ts +47 -0
- package/dist/synthesis/evalo/typeo.d.ts.map +1 -0
- package/dist/synthesis/evalo/typeo.js +145 -0
- package/dist/synthesis/evalo/typeo.js.map +1 -0
- package/dist/synthesis/evalo/types.d.ts +84 -0
- package/dist/synthesis/evalo/types.d.ts.map +1 -0
- package/dist/synthesis/evalo/types.js +51 -0
- package/dist/synthesis/evalo/types.js.map +1 -0
- package/dist/synthesis/evolutionary.d.ts +88 -0
- package/dist/synthesis/evolutionary.d.ts.map +1 -0
- package/dist/synthesis/evolutionary.js +306 -0
- package/dist/synthesis/evolutionary.js.map +1 -0
- package/dist/synthesis/example-collector.d.ts +67 -0
- package/dist/synthesis/example-collector.d.ts.map +1 -0
- package/dist/synthesis/example-collector.js +159 -0
- package/dist/synthesis/example-collector.js.map +1 -0
- package/dist/synthesis/extractor/synthesis.d.ts +46 -0
- package/dist/synthesis/extractor/synthesis.d.ts.map +1 -0
- package/dist/synthesis/extractor/synthesis.js +441 -0
- package/dist/synthesis/extractor/synthesis.js.map +1 -0
- package/dist/synthesis/index.d.ts +13 -0
- package/dist/synthesis/index.d.ts.map +1 -0
- package/dist/synthesis/index.js +21 -0
- package/dist/synthesis/index.js.map +1 -0
- package/dist/synthesis/knowledge-base.d.ts +99 -0
- package/dist/synthesis/knowledge-base.d.ts.map +1 -0
- package/dist/synthesis/knowledge-base.js +229 -0
- package/dist/synthesis/knowledge-base.js.map +1 -0
- package/dist/synthesis/minikanren/core.d.ts +57 -0
- package/dist/synthesis/minikanren/core.d.ts.map +1 -0
- package/dist/synthesis/minikanren/core.js +203 -0
- package/dist/synthesis/minikanren/core.js.map +1 -0
- package/dist/synthesis/regex/synthesis.d.ts +48 -0
- package/dist/synthesis/regex/synthesis.d.ts.map +1 -0
- package/dist/synthesis/regex/synthesis.js +457 -0
- package/dist/synthesis/regex/synthesis.js.map +1 -0
- package/dist/synthesis/relational/coordinator.d.ts +114 -0
- package/dist/synthesis/relational/coordinator.d.ts.map +1 -0
- package/dist/synthesis/relational/coordinator.js +280 -0
- package/dist/synthesis/relational/coordinator.js.map +1 -0
- package/dist/synthesis/relational/engine.d.ts +123 -0
- package/dist/synthesis/relational/engine.d.ts.map +1 -0
- package/dist/synthesis/relational/engine.js +341 -0
- package/dist/synthesis/relational/engine.js.map +1 -0
- package/dist/synthesis/relational/interpreter.d.ts +95 -0
- package/dist/synthesis/relational/interpreter.d.ts.map +1 -0
- package/dist/synthesis/relational/interpreter.js +238 -0
- package/dist/synthesis/relational/interpreter.js.map +1 -0
- package/dist/synthesis/sandbox-tools.d.ts +28 -0
- package/dist/synthesis/sandbox-tools.d.ts.map +1 -0
- package/dist/synthesis/sandbox-tools.js +573 -0
- package/dist/synthesis/sandbox-tools.js.map +1 -0
- package/dist/tool/adapters/claude-code.d.ts +72 -0
- package/dist/tool/adapters/claude-code.d.ts.map +1 -0
- package/dist/tool/adapters/claude-code.js +210 -0
- package/dist/tool/adapters/claude-code.js.map +1 -0
- package/dist/tool/adapters/http.d.ts +114 -0
- package/dist/tool/adapters/http.d.ts.map +1 -0
- package/dist/tool/adapters/http.js +453 -0
- package/dist/tool/adapters/http.js.map +1 -0
- package/dist/tool/adapters/index.d.ts +12 -0
- package/dist/tool/adapters/index.d.ts.map +1 -0
- package/dist/tool/adapters/index.js +12 -0
- package/dist/tool/adapters/index.js.map +1 -0
- package/dist/tool/adapters/pipe.d.ts +67 -0
- package/dist/tool/adapters/pipe.d.ts.map +1 -0
- package/dist/tool/adapters/pipe.js +208 -0
- package/dist/tool/adapters/pipe.js.map +1 -0
- package/dist/tool/index.d.ts +17 -0
- package/dist/tool/index.d.ts.map +1 -0
- package/dist/tool/index.js +19 -0
- package/dist/tool/index.js.map +1 -0
- package/dist/tool/lattice-tool.d.ts +118 -0
- package/dist/tool/lattice-tool.d.ts.map +1 -0
- package/dist/tool/lattice-tool.js +304 -0
- package/dist/tool/lattice-tool.js.map +1 -0
- package/dist/tool/nucleus-tool.d.ts +118 -0
- package/dist/tool/nucleus-tool.d.ts.map +1 -0
- package/dist/tool/nucleus-tool.js +304 -0
- package/dist/tool/nucleus-tool.js.map +1 -0
- package/dist/tools.d.ts.map +1 -1
- package/dist/tools.js +101 -4
- package/dist/tools.js.map +1 -1
- package/dist/version.d.ts +8 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +28 -0
- package/dist/version.js.map +1 -0
- package/package.json +26 -7
package/README.md
CHANGED
|
@@ -4,10 +4,110 @@ Process documents 100x larger than your LLM's context window—without vector da
|
|
|
4
4
|
|
|
5
5
|
## The Problem
|
|
6
6
|
|
|
7
|
-
LLMs have fixed context windows. Traditional solutions (RAG, chunking) lose information or miss connections across chunks. RLM takes a different approach: the model
|
|
7
|
+
LLMs have fixed context windows. Traditional solutions (RAG, chunking) lose information or miss connections across chunks. RLM takes a different approach: the model reasons about your query and outputs symbolic commands that a logic engine executes against the document.
|
|
8
8
|
|
|
9
9
|
Based on the [Recursive Language Models paper](https://arxiv.org/abs/2512.24601).
|
|
10
10
|
|
|
11
|
+
## How It Works
|
|
12
|
+
|
|
13
|
+
Unlike traditional approaches where an LLM writes arbitrary code, RLM uses **[Nucleus](https://github.com/michaelwhitford/nucleus)**—a constrained symbolic language based on S-expressions. The LLM outputs Nucleus commands, which are parsed, type-checked, and executed by **Lattice**, our logic engine.
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
|
|
17
|
+
│ User Query │────▶│ LLM Reasons │────▶│ Nucleus Command │
|
|
18
|
+
│ "total sales?" │ │ about intent │ │ (sum RESULTS) │
|
|
19
|
+
└─────────────────┘ └─────────────────┘ └────────┬────────┘
|
|
20
|
+
│
|
|
21
|
+
┌─────────────────┐ ┌─────────────────┐ ┌────────▼────────┐
|
|
22
|
+
│ Final Answer │◀────│ Lattice Engine │◀────│ Parser │
|
|
23
|
+
│ 13,000,000 │ │ Executes │ │ Validates │
|
|
24
|
+
└─────────────────┘ └─────────────────┘ └─────────────────┘
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
**Why this works better than code generation:**
|
|
28
|
+
|
|
29
|
+
1. **Reduced entropy** - Nucleus has a rigid grammar with fewer valid outputs than JavaScript
|
|
30
|
+
2. **Fail-fast validation** - Parser rejects malformed commands before execution
|
|
31
|
+
3. **Safe execution** - Lattice only executes known operations, no arbitrary code
|
|
32
|
+
4. **Small model friendly** - 7B models handle symbolic grammars better than freeform code
|
|
33
|
+
|
|
34
|
+
## Architecture
|
|
35
|
+
|
|
36
|
+
### The Nucleus DSL
|
|
37
|
+
|
|
38
|
+
The LLM outputs commands in the Nucleus DSL—an S-expression language designed for document analysis:
|
|
39
|
+
|
|
40
|
+
```scheme
|
|
41
|
+
; Search for patterns
|
|
42
|
+
(grep "SALES_DATA")
|
|
43
|
+
|
|
44
|
+
; Filter results
|
|
45
|
+
(filter RESULTS (lambda x (match x "NORTH" 0)))
|
|
46
|
+
|
|
47
|
+
; Aggregate
|
|
48
|
+
(sum RESULTS) ; Auto-extracts numbers like "$2,340,000" from lines
|
|
49
|
+
(count RESULTS) ; Count matching items
|
|
50
|
+
|
|
51
|
+
; Final answer
|
|
52
|
+
<<<FINAL>>>13000000<<<END>>>
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### The Lattice Engine
|
|
56
|
+
|
|
57
|
+
The Lattice engine (`src/logic/`) processes Nucleus commands:
|
|
58
|
+
|
|
59
|
+
1. **Parser** (`lc-parser.ts`) - Parses S-expressions into an AST
|
|
60
|
+
2. **Type Inference** (`type-inference.ts`) - Validates types before execution
|
|
61
|
+
3. **Constraint Resolver** (`constraint-resolver.ts`) - Handles symbolic constraints like `[Σ⚡μ]`
|
|
62
|
+
4. **Solver** (`lc-solver.ts`) - Executes commands against the document
|
|
63
|
+
|
|
64
|
+
Lattice uses **miniKanren** (a relational programming engine) for pattern classification and filtering operations.
|
|
65
|
+
|
|
66
|
+
### Pre-Search Optimization
|
|
67
|
+
|
|
68
|
+
Before calling the LLM, the system extracts keywords from your query and pre-runs grep:
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
Query: "What is the total of all north sales data values?"
|
|
72
|
+
│
|
|
73
|
+
▼
|
|
74
|
+
┌─────────────────────────────────────────────────────┐
|
|
75
|
+
│ Pre-search extracts: "north", "sales", "data" │
|
|
76
|
+
│ Tries compound patterns: SALES.*NORTH, NORTH.*SALES │
|
|
77
|
+
│ Pre-populates RESULTS before LLM is called │
|
|
78
|
+
└─────────────────────────────────────────────────────┘
|
|
79
|
+
│
|
|
80
|
+
▼
|
|
81
|
+
┌─────────────────────────────────────────────────────┐
|
|
82
|
+
│ LLM receives: "RESULTS has 1 match" │
|
|
83
|
+
│ LLM outputs: (sum RESULTS) ← skips search step! │
|
|
84
|
+
└─────────────────────────────────────────────────────┘
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
This saves turns by pre-populating `RESULTS` so the model can immediately aggregate.
|
|
88
|
+
|
|
89
|
+
### The Role of the LLM
|
|
90
|
+
|
|
91
|
+
The LLM does **reasoning**, not code generation:
|
|
92
|
+
|
|
93
|
+
1. **Understands intent** - Interprets "total of north sales" as needing grep + filter + sum
|
|
94
|
+
2. **Chooses operations** - Decides which Nucleus commands achieve the goal
|
|
95
|
+
3. **Verifies results** - Checks if the current results answer the query
|
|
96
|
+
4. **Iterates** - Refines search if results are too broad or narrow
|
|
97
|
+
|
|
98
|
+
The LLM never writes JavaScript. It outputs Nucleus commands that Lattice executes safely.
|
|
99
|
+
|
|
100
|
+
### Components Summary
|
|
101
|
+
|
|
102
|
+
| Component | Purpose |
|
|
103
|
+
|-----------|---------|
|
|
104
|
+
| **Nucleus Adapter** | Prompts LLM to output Nucleus commands |
|
|
105
|
+
| **Lattice Parser** | Parses S-expressions to AST |
|
|
106
|
+
| **Lattice Solver** | Executes commands against document |
|
|
107
|
+
| **miniKanren** | Relational engine for classification |
|
|
108
|
+
| **Pre-Search** | Extracts keywords and pre-runs grep |
|
|
109
|
+
| **RAG Hints** | Few-shot examples from past successes |
|
|
110
|
+
|
|
11
111
|
## Installation
|
|
12
112
|
|
|
13
113
|
### npm (recommended)
|
|
@@ -19,7 +119,7 @@ npm install -g matryoshka-rlm
|
|
|
19
119
|
### npx (no install)
|
|
20
120
|
|
|
21
121
|
```bash
|
|
22
|
-
npx matryoshka-rlm "
|
|
122
|
+
npx matryoshka-rlm "What is the total of all sales values?" ./report.txt
|
|
23
123
|
```
|
|
24
124
|
|
|
25
125
|
### From source
|
|
@@ -43,7 +143,7 @@ Copy `config.example.json` to `config.json` and configure your LLM provider:
|
|
|
43
143
|
"providers": {
|
|
44
144
|
"ollama": {
|
|
45
145
|
"baseUrl": "http://localhost:11434",
|
|
46
|
-
"model": "
|
|
146
|
+
"model": "qwen2.5-coder:7b",
|
|
47
147
|
"options": { "temperature": 0.2, "num_ctx": 8192 }
|
|
48
148
|
},
|
|
49
149
|
"deepseek": {
|
|
@@ -62,10 +162,10 @@ Copy `config.example.json` to `config.json` and configure your LLM provider:
|
|
|
62
162
|
|
|
63
163
|
```bash
|
|
64
164
|
# Basic usage
|
|
65
|
-
rlm "
|
|
165
|
+
rlm "What is the total of all sales values?" ./report.txt
|
|
66
166
|
|
|
67
167
|
# With options
|
|
68
|
-
rlm "
|
|
168
|
+
rlm "Count all ERROR entries" ./logs.txt --max-turns 15 --verbose
|
|
69
169
|
|
|
70
170
|
# See all options
|
|
71
171
|
rlm --help
|
|
@@ -73,7 +173,7 @@ rlm --help
|
|
|
73
173
|
|
|
74
174
|
### MCP Integration
|
|
75
175
|
|
|
76
|
-
RLM includes an MCP (Model Context Protocol) server that exposes the `analyze_document` tool. This allows coding agents
|
|
176
|
+
RLM includes an MCP (Model Context Protocol) server that exposes the `analyze_document` tool. This allows coding agents to analyze documents that exceed their context window.
|
|
77
177
|
|
|
78
178
|
#### MCP Tool: `analyze_document`
|
|
79
179
|
|
|
@@ -84,9 +184,7 @@ RLM includes an MCP (Model Context Protocol) server that exposes the `analyze_do
|
|
|
84
184
|
| `maxTurns` | number | No | Maximum exploration turns (default: 10) |
|
|
85
185
|
| `timeoutMs` | number | No | Timeout per turn in milliseconds (default: 30000) |
|
|
86
186
|
|
|
87
|
-
####
|
|
88
|
-
|
|
89
|
-
Add to your `crush.json` config:
|
|
187
|
+
#### Example MCP config
|
|
90
188
|
|
|
91
189
|
```json
|
|
92
190
|
{
|
|
@@ -98,16 +196,10 @@ Add to your `crush.json` config:
|
|
|
98
196
|
}
|
|
99
197
|
}
|
|
100
198
|
```
|
|
101
|
-
Then ask Crush to analyze documents:
|
|
102
|
-
|
|
103
|
-
> Use the analyze_document tool to find all sales figures in /path/to/report.txt and calculate the total
|
|
104
|
-
|
|
105
|
-
See [Crush](https://github.com/charmbracelet/crush) for more details.
|
|
106
199
|
|
|
107
200
|
#### Testing the MCP Server
|
|
108
201
|
|
|
109
202
|
```bash
|
|
110
|
-
# Verify the server starts correctly
|
|
111
203
|
rlm-mcp --test
|
|
112
204
|
# Output: MCP server ready
|
|
113
205
|
# Output: Available tools: analyze_document
|
|
@@ -121,143 +213,204 @@ import { createLLMClient } from "matryoshka-rlm";
|
|
|
121
213
|
|
|
122
214
|
const llmClient = createLLMClient("ollama", {
|
|
123
215
|
baseUrl: "http://localhost:11434",
|
|
124
|
-
model: "
|
|
216
|
+
model: "qwen2.5-coder:7b",
|
|
125
217
|
options: { temperature: 0.2 }
|
|
126
218
|
});
|
|
127
219
|
|
|
128
|
-
const result = await runRLM("What
|
|
220
|
+
const result = await runRLM("What is the total of all sales values?", "./report.txt", {
|
|
129
221
|
llmClient,
|
|
130
222
|
maxTurns: 10,
|
|
131
223
|
turnTimeoutMs: 30000,
|
|
132
224
|
});
|
|
133
225
|
```
|
|
134
226
|
|
|
135
|
-
##
|
|
227
|
+
## Example Session
|
|
136
228
|
|
|
137
|
-
```mermaid
|
|
138
|
-
sequenceDiagram
|
|
139
|
-
participant User
|
|
140
|
-
participant RLM as RLM Engine
|
|
141
|
-
participant Sandbox as JavaScript Sandbox
|
|
142
|
-
participant LLM as LLM Provider
|
|
143
|
-
|
|
144
|
-
User->>RLM: query + document path
|
|
145
|
-
RLM->>Sandbox: Create sandbox with document as `context`
|
|
146
|
-
|
|
147
|
-
loop Until FINAL or maxTurns
|
|
148
|
-
RLM->>LLM: System prompt + history
|
|
149
|
-
LLM-->>RLM: JavaScript code block
|
|
150
|
-
RLM->>Sandbox: Execute code (with timeout)
|
|
151
|
-
Sandbox-->>RLM: { result, logs, error }
|
|
152
|
-
Note over RLM: Append output to history
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
RLM-->>User: Final answer
|
|
156
229
|
```
|
|
230
|
+
$ rlm "What is the total of all north sales data values?" ./report.txt --verbose
|
|
157
231
|
|
|
158
|
-
|
|
232
|
+
[Pre-search] Found 1 data matches for "SALES.*NORTH"
|
|
233
|
+
[Pre-search] RESULTS pre-populated with 1 matches
|
|
159
234
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
235
|
+
──────────────────────────────────────────────────
|
|
236
|
+
[Turn 1/10] Querying LLM...
|
|
237
|
+
[Turn 1] Term: (sum RESULTS)
|
|
238
|
+
[Turn 1] Console output:
|
|
239
|
+
[Lattice] Summing 1 values
|
|
240
|
+
[Lattice] Sum = 2340000
|
|
241
|
+
[Turn 1] Result: 2340000
|
|
166
242
|
|
|
167
|
-
|
|
243
|
+
──────────────────────────────────────────────────
|
|
244
|
+
[Turn 2/10] Querying LLM...
|
|
245
|
+
[Turn 2] Final answer received
|
|
168
246
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
3. Code executes in sandbox, results feed back to LLM
|
|
172
|
-
4. LLM iterates until it outputs `<<<FINAL>>>answer<<<END>>>`
|
|
173
|
-
5. Sub-queries via `llm_query()` enable recursive decomposition
|
|
247
|
+
2340000
|
|
248
|
+
```
|
|
174
249
|
|
|
175
|
-
|
|
250
|
+
The model:
|
|
251
|
+
1. Received pre-populated RESULTS (pre-search found the data)
|
|
252
|
+
2. Immediately summed the results (no grep needed)
|
|
253
|
+
3. Output the final answer
|
|
176
254
|
|
|
177
|
-
|
|
255
|
+
## Nucleus DSL Reference
|
|
178
256
|
|
|
179
|
-
|
|
180
|
-
|------|-------------|
|
|
181
|
-
| `text_stats()` | Returns document metadata: length, line count, samples from start/middle/end |
|
|
182
|
-
| `fuzzy_search(query, limit?)` | Finds approximate matches, returns lines with scores |
|
|
183
|
-
| `llm_query(prompt)` | Spawns a sub-LLM call for complex analysis (limited by `maxSubCalls`) |
|
|
184
|
-
| `context` | The full document text (read-only string) |
|
|
185
|
-
| `memory` | Persistent array to accumulate findings across turns |
|
|
257
|
+
### Search Commands
|
|
186
258
|
|
|
187
|
-
|
|
259
|
+
```scheme
|
|
260
|
+
(grep "pattern") ; Regex search, returns matches with line numbers
|
|
261
|
+
(fuzzy_search "query" 10) ; Fuzzy search, returns top N matches with scores
|
|
262
|
+
(text_stats) ; Document metadata (length, line count, samples)
|
|
263
|
+
```
|
|
188
264
|
|
|
189
|
-
|
|
190
|
-
- Configurable timeout per turn
|
|
191
|
-
- `maxSubCalls` limit prevents infinite recursion
|
|
192
|
-
- Sub-LLM calls receive only the prompt, never parent context
|
|
193
|
-
- Auto-fixes common syntax errors in LLM-generated code
|
|
265
|
+
### Collection Operations
|
|
194
266
|
|
|
195
|
-
|
|
267
|
+
```scheme
|
|
268
|
+
(filter RESULTS (lambda x (match x "pattern" 0))) ; Filter by regex
|
|
269
|
+
(map RESULTS (lambda x (match x "(\\d+)" 1))) ; Extract from each
|
|
270
|
+
(sum RESULTS) ; Sum numbers in results
|
|
271
|
+
(count RESULTS) ; Count items
|
|
272
|
+
```
|
|
196
273
|
|
|
197
|
-
###
|
|
274
|
+
### String Operations
|
|
198
275
|
|
|
199
|
-
|
|
276
|
+
```scheme
|
|
277
|
+
(match str "pattern" 0) ; Regex match, return group N
|
|
278
|
+
(replace str "from" "to") ; String replacement
|
|
279
|
+
(split str "," 0) ; Split and get index
|
|
280
|
+
(parseInt str) ; Parse integer
|
|
281
|
+
(parseFloat str) ; Parse float
|
|
282
|
+
```
|
|
200
283
|
|
|
201
|
-
|
|
284
|
+
### Type Coercion
|
|
202
285
|
|
|
203
|
-
|
|
286
|
+
When the model sees data that needs parsing, it can use declarative type coercion:
|
|
287
|
+
|
|
288
|
+
```scheme
|
|
289
|
+
; Date parsing (returns ISO format YYYY-MM-DD)
|
|
290
|
+
(parseDate "Jan 15, 2024") ; -> "2024-01-15"
|
|
291
|
+
(parseDate "01/15/2024" "US") ; -> "2024-01-15" (MM/DD/YYYY)
|
|
292
|
+
(parseDate "15/01/2024" "EU") ; -> "2024-01-15" (DD/MM/YYYY)
|
|
293
|
+
|
|
294
|
+
; Currency parsing (handles $, €, commas, etc.)
|
|
295
|
+
(parseCurrency "$1,234.56") ; -> 1234.56
|
|
296
|
+
(parseCurrency "€1.234,56") ; -> 1234.56 (EU format)
|
|
204
297
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
# Vague (may cause hallucination)
|
|
209
|
-
rlm "What are the sales figures?" ./report.txt
|
|
298
|
+
; Number parsing
|
|
299
|
+
(parseNumber "1,234,567") ; -> 1234567
|
|
300
|
+
(parseNumber "50%") ; -> 0.5
|
|
210
301
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
```bash
|
|
216
|
-
rlm "Find lines matching 'Total:' and extract the numbers" ./data.txt
|
|
217
|
-
```
|
|
302
|
+
; General coercion
|
|
303
|
+
(coerce value "date") ; Coerce to date
|
|
304
|
+
(coerce value "currency") ; Coerce to currency
|
|
305
|
+
(coerce value "number") ; Coerce to number
|
|
218
306
|
|
|
219
|
-
|
|
307
|
+
; Extract and coerce in one step
|
|
308
|
+
(extract str "\\$[\\d,]+" 0 "currency") ; Extract and parse as currency
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
Use in map for batch transformations:
|
|
312
|
+
|
|
313
|
+
```scheme
|
|
314
|
+
; Parse all dates in results
|
|
315
|
+
(map RESULTS (lambda x (parseDate (match x "[A-Za-z]+ \\d+, \\d+" 0))))
|
|
316
|
+
|
|
317
|
+
; Extract and sum currencies
|
|
318
|
+
(map RESULTS (lambda x (parseCurrency (match x "\\$[\\d,]+" 0))))
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
### Program Synthesis
|
|
322
|
+
|
|
323
|
+
For complex transformations, the model can synthesize functions from examples:
|
|
324
|
+
|
|
325
|
+
```scheme
|
|
326
|
+
; Synthesize from input/output pairs
|
|
327
|
+
(synthesize
|
|
328
|
+
("$100" 100)
|
|
329
|
+
("$1,234" 1234)
|
|
330
|
+
("$50,000" 50000))
|
|
331
|
+
; -> Returns a function that extracts numbers from currency strings
|
|
332
|
+
```
|
|
220
333
|
|
|
221
|
-
|
|
334
|
+
This uses Barliman-style relational synthesis with miniKanren to automatically build extraction functions.
|
|
222
335
|
|
|
223
|
-
|
|
336
|
+
### Cross-Turn State
|
|
337
|
+
|
|
338
|
+
Results from previous turns are available:
|
|
339
|
+
- `RESULTS` - Latest array result (updated by grep, filter)
|
|
340
|
+
- `_0`, `_1`, `_2`, ... - Results from specific turns
|
|
341
|
+
|
|
342
|
+
### Final Answer
|
|
343
|
+
|
|
344
|
+
```scheme
|
|
345
|
+
<<<FINAL>>>your answer here<<<END>>>
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
## Troubleshooting
|
|
349
|
+
|
|
350
|
+
### Model Answers Without Exploring
|
|
351
|
+
|
|
352
|
+
**Symptom**: The model provides an answer immediately with hallucinated data.
|
|
224
353
|
|
|
225
354
|
**Solutions**:
|
|
355
|
+
1. Use a more capable model (7B+ recommended)
|
|
356
|
+
2. Be specific in your query: "Find lines containing SALES_DATA and sum the dollar amounts"
|
|
357
|
+
|
|
358
|
+
### Max Turns Reached
|
|
226
359
|
|
|
360
|
+
**Symptom**: "Max turns (N) reached without final answer"
|
|
361
|
+
|
|
362
|
+
**Solutions**:
|
|
227
363
|
1. Increase `--max-turns` for complex documents
|
|
228
|
-
2. Check
|
|
229
|
-
3. Simplify the query
|
|
364
|
+
2. Check `--verbose` output for repeated patterns (model stuck in loop)
|
|
365
|
+
3. Simplify the query
|
|
230
366
|
|
|
231
|
-
###
|
|
367
|
+
### Parse Errors
|
|
232
368
|
|
|
233
|
-
**Symptom**:
|
|
369
|
+
**Symptom**: "Parse error: no valid command"
|
|
234
370
|
|
|
235
|
-
**Cause**:
|
|
371
|
+
**Cause**: Model output malformed S-expression.
|
|
236
372
|
|
|
237
373
|
**Solutions**:
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
3. Use `--verbose` to see what code the model is generating
|
|
374
|
+
1. The system auto-converts JSON to S-expressions as fallback
|
|
375
|
+
2. Use `--verbose` to see what the model is generating
|
|
376
|
+
3. Try a different model tuned for code/symbolic output
|
|
242
377
|
|
|
243
378
|
## Development
|
|
244
379
|
|
|
245
380
|
```bash
|
|
246
|
-
# Run tests
|
|
247
|
-
npm test
|
|
381
|
+
npm test # Run tests
|
|
382
|
+
npm test -- --coverage # With coverage
|
|
383
|
+
RUN_E2E=1 npm test -- tests/e2e.test.ts # E2E tests (requires Ollama)
|
|
384
|
+
npm run build # Build
|
|
385
|
+
npm run typecheck # Type check
|
|
386
|
+
```
|
|
248
387
|
|
|
249
|
-
|
|
250
|
-
npm test -- --coverage
|
|
388
|
+
## Project Structure
|
|
251
389
|
|
|
252
|
-
|
|
253
|
-
|
|
390
|
+
```
|
|
391
|
+
src/
|
|
392
|
+
├── adapters/ # Model-specific prompting
|
|
393
|
+
│ ├── nucleus.ts # Nucleus DSL adapter
|
|
394
|
+
│ └── types.ts # Adapter interface
|
|
395
|
+
├── logic/ # Lattice engine
|
|
396
|
+
│ ├── lc-parser.ts # Nucleus parser
|
|
397
|
+
│ ├── lc-solver.ts # Command executor (uses miniKanren)
|
|
398
|
+
│ ├── type-inference.ts
|
|
399
|
+
│ └── constraint-resolver.ts
|
|
400
|
+
├── minikanren/ # Relational programming engine
|
|
401
|
+
├── synthesis/ # Program synthesis (Barliman-style)
|
|
402
|
+
│ └── evalo/ # Extractor DSL
|
|
403
|
+
├── rag/ # Few-shot hint retrieval
|
|
404
|
+
└── rlm.ts # Main execution loop
|
|
405
|
+
```
|
|
254
406
|
|
|
255
|
-
|
|
256
|
-
npm run build
|
|
407
|
+
## Acknowledgements
|
|
257
408
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
409
|
+
This project incorporates ideas and code from:
|
|
410
|
+
|
|
411
|
+
- **[Nucleus](https://github.com/michaelwhitford/nucleus)** - A symbolic S-expression language by Michael Whitford. RLM uses Nucleus syntax for the constrained DSL that the LLM outputs, providing a rigid grammar that reduces model errors.
|
|
412
|
+
- **[ramo](https://github.com/wjlewis/ramo)** - A miniKanren implementation in TypeScript by Will Lewis. Used for constraint-based program synthesis.
|
|
413
|
+
- **[Barliman](https://github.com/webyrd/Barliman)** - A prototype smart editor by William Byrd and Greg Rosenblatt that uses program synthesis to assist programmers. The Barliman-style approach of providing input/output constraints instead of code inspired the synthesis workflow.
|
|
261
414
|
|
|
262
415
|
## License
|
|
263
416
|
|
|
@@ -268,3 +421,4 @@ MIT
|
|
|
268
421
|
- [RLM Paper](https://arxiv.org/abs/2512.24601)
|
|
269
422
|
- [Original Implementation](https://github.com/alexzhang13/rlm)
|
|
270
423
|
- [Model Context Protocol](https://modelcontextprotocol.io/)
|
|
424
|
+
- [miniKanren](http://minikanren.org/)
|
package/config.example.json
CHANGED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base Model Adapter
|
|
3
|
+
*
|
|
4
|
+
* Default adapter implementation that works with most models.
|
|
5
|
+
* Other adapters can spread this and override specific methods.
|
|
6
|
+
*/
|
|
7
|
+
import type { ModelAdapter, FinalVarMarker, RAGHints } from "./types.js";
|
|
8
|
+
/**
|
|
9
|
+
* Build the default system prompt for the RLM
|
|
10
|
+
*/
|
|
11
|
+
declare function buildSystemPrompt(contextLength: number, toolInterfaces: string, hints?: RAGHints): string;
|
|
12
|
+
/**
|
|
13
|
+
* Extract code from LLM response
|
|
14
|
+
*/
|
|
15
|
+
declare function extractCode(response: string): string | null;
|
|
16
|
+
/**
|
|
17
|
+
* Extract final answer from LLM response
|
|
18
|
+
*/
|
|
19
|
+
declare function extractFinalAnswer(response: string | undefined | null): string | FinalVarMarker | null;
|
|
20
|
+
/**
|
|
21
|
+
* Get feedback message when model provides no code block
|
|
22
|
+
*/
|
|
23
|
+
declare function getNoCodeFeedback(): string;
|
|
24
|
+
/**
|
|
25
|
+
* Get feedback message when code execution fails
|
|
26
|
+
*/
|
|
27
|
+
declare function getErrorFeedback(error: string): string;
|
|
28
|
+
/**
|
|
29
|
+
* Get feedback message after successful code execution
|
|
30
|
+
* Generic reminder about continuing exploration or providing final answer
|
|
31
|
+
*/
|
|
32
|
+
declare function getSuccessFeedback(): string;
|
|
33
|
+
/**
|
|
34
|
+
* Get feedback message when model repeats the same code
|
|
35
|
+
* Encourages a different approach
|
|
36
|
+
*/
|
|
37
|
+
declare function getRepeatedCodeFeedback(): string;
|
|
38
|
+
/**
|
|
39
|
+
* Create the base adapter instance
|
|
40
|
+
*/
|
|
41
|
+
export declare function createBaseAdapter(): ModelAdapter;
|
|
42
|
+
export { buildSystemPrompt as baseBuildSystemPrompt, extractCode as baseExtractCode, extractFinalAnswer as baseExtractFinalAnswer, getNoCodeFeedback as baseGetNoCodeFeedback, getErrorFeedback as baseGetErrorFeedback, getSuccessFeedback as baseGetSuccessFeedback, getRepeatedCodeFeedback as baseGetRepeatedCodeFeedback, };
|
|
43
|
+
//# sourceMappingURL=base.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../src/adapters/base.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEzE;;GAEG;AACH,iBAAS,iBAAiB,CACxB,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,MAAM,EACtB,KAAK,CAAC,EAAE,QAAQ,GACf,MAAM,CAyDR;AAED;;GAEG;AACH,iBAAS,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAUpD;AAED;;GAEG;AACH,iBAAS,kBAAkB,CACzB,QAAQ,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,GAClC,MAAM,GAAG,cAAc,GAAG,IAAI,CA6ChC;AAED;;GAEG;AACH,iBAAS,iBAAiB,IAAI,MAAM,CAMnC;AAED;;GAEG;AACH,iBAAS,gBAAgB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAE/C;AAED;;;GAGG;AACH,iBAAS,kBAAkB,IAAI,MAAM,CAEpC;AAED;;;GAGG;AACH,iBAAS,uBAAuB,IAAI,MAAM,CASzC;AAED;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,YAAY,CAWhD;AAGD,OAAO,EACL,iBAAiB,IAAI,qBAAqB,EAC1C,WAAW,IAAI,eAAe,EAC9B,kBAAkB,IAAI,sBAAsB,EAC5C,iBAAiB,IAAI,qBAAqB,EAC1C,gBAAgB,IAAI,oBAAoB,EACxC,kBAAkB,IAAI,sBAAsB,EAC5C,uBAAuB,IAAI,2BAA2B,GACvD,CAAC"}
|