recursive-llm-ts 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +201 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +7 -0
- package/dist/rlm-bridge.d.ts +26 -0
- package/dist/rlm-bridge.js +101 -0
- package/dist/rlm.d.ts +9 -0
- package/dist/rlm.js +18 -0
- package/package.json +46 -0
- package/recursive-llm/pyproject.toml +70 -0
- package/recursive-llm/src/rlm/__init__.py +14 -0
- package/recursive-llm/src/rlm/core.py +305 -0
- package/recursive-llm/src/rlm/parser.py +93 -0
- package/recursive-llm/src/rlm/prompts.py +50 -0
- package/recursive-llm/src/rlm/repl.py +235 -0
- package/recursive-llm/src/rlm/types.py +37 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 recursive-llm-ts contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# recursive-llm-ts
|
|
2
|
+
|
|
3
|
+
TypeScript bridge for [recursive-llm](https://github.com/grigori-gvadzabia/recursive-llm): Recursive Language Models for unbounded context processing.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install recursive-llm-ts
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
**That's it!** Python is bundled via [JSPyBridge](https://github.com/extremeheat/JSPyBridge) - no additional setup required.
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```typescript
|
|
16
|
+
import { RLM } from 'recursive-llm-ts';
|
|
17
|
+
|
|
18
|
+
// Initialize RLM with a model
|
|
19
|
+
const rlm = new RLM('gpt-4o-mini', {
|
|
20
|
+
max_iterations: 15,
|
|
21
|
+
api_key: process.env.OPENAI_API_KEY
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
// Process a query with unbounded context
|
|
25
|
+
const result = await rlm.completion(
|
|
26
|
+
'What are the key points in this document?',
|
|
27
|
+
longDocument
|
|
28
|
+
);
|
|
29
|
+
|
|
30
|
+
console.log(result.result);
|
|
31
|
+
console.log('Stats:', result.stats);
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
## API
|
|
36
|
+
|
|
37
|
+
### `RLM`
|
|
38
|
+
|
|
39
|
+
Main class for recursive language model completions.
|
|
40
|
+
|
|
41
|
+
**Constructor:**
|
|
42
|
+
```typescript
|
|
43
|
+
constructor(model: string, rlmConfig?: RLMConfig)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
- `model`: The LLM model to use (e.g., 'gpt-4o-mini', 'claude-3-sonnet')
|
|
47
|
+
- `rlmConfig`: Optional configuration object
|
|
48
|
+
|
|
49
|
+
**Methods:**
|
|
50
|
+
|
|
51
|
+
#### `completion(query: string, context: string): Promise<RLMResult>`
|
|
52
|
+
|
|
53
|
+
Process a query with the given context using recursive language models.
|
|
54
|
+
|
|
55
|
+
**Parameters:**
|
|
56
|
+
- `query`: The question or task to perform
|
|
57
|
+
- `context`: The context/document to process (can be arbitrarily large)
|
|
58
|
+
|
|
59
|
+
**Returns:**
|
|
60
|
+
- `Promise<RLMResult>`: Result containing the answer and statistics
|
|
61
|
+
|
|
62
|
+
#### `cleanup(): Promise<void>`
|
|
63
|
+
|
|
64
|
+
Clean up the Python bridge and free resources.
|
|
65
|
+
|
|
66
|
+
```typescript
|
|
67
|
+
await rlm.cleanup();
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Types
|
|
71
|
+
|
|
72
|
+
```typescript
|
|
73
|
+
interface RLMConfig {
|
|
74
|
+
// Model configuration
|
|
75
|
+
recursive_model?: string; // Model to use for recursive calls (defaults to main model)
|
|
76
|
+
|
|
77
|
+
// API configuration
|
|
78
|
+
api_base?: string; // Custom API base URL (for Azure, Bedrock, etc.)
|
|
79
|
+
api_key?: string; // API key for authentication
|
|
80
|
+
|
|
81
|
+
// Execution limits
|
|
82
|
+
max_depth?: number; // Maximum recursion depth (default: 5)
|
|
83
|
+
max_iterations?: number; // Maximum REPL iterations per call (default: 30)
|
|
84
|
+
|
|
85
|
+
// LiteLLM parameters - pass any additional parameters supported by LiteLLM
|
|
86
|
+
api_version?: string; // API version (e.g., for Azure)
|
|
87
|
+
timeout?: number; // Request timeout in seconds
|
|
88
|
+
temperature?: number; // Sampling temperature
|
|
89
|
+
max_tokens?: number; // Maximum tokens in response
|
|
90
|
+
[key: string]: any; // Any other LiteLLM parameters
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
interface RLMResult {
|
|
94
|
+
result: string;
|
|
95
|
+
stats: RLMStats;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
interface RLMStats {
|
|
99
|
+
llm_calls: number;
|
|
100
|
+
iterations: number;
|
|
101
|
+
depth: number;
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Environment Variables
|
|
106
|
+
|
|
107
|
+
Set your API key as an environment variable:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
export OPENAI_API_KEY='your-api-key-here'
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Or pass it in the configuration:
|
|
114
|
+
|
|
115
|
+
```typescript
|
|
116
|
+
const rlm = new RLM('gpt-4o-mini', {
|
|
117
|
+
api_key: 'your-api-key-here'
|
|
118
|
+
});
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Custom Providers
|
|
122
|
+
|
|
123
|
+
This package uses [LiteLLM](https://github.com/BerriAI/litellm) under the hood, which supports **100+ LLM providers** including OpenAI, Anthropic, AWS Bedrock, Azure, Cohere, and more.
|
|
124
|
+
|
|
125
|
+
### Quick Reference
|
|
126
|
+
|
|
127
|
+
| Provider | Model Format | Required Config |
|
|
128
|
+
|----------|-------------|----------------|
|
|
129
|
+
| OpenAI | `gpt-4o`, `gpt-4o-mini` | `api_key` |
|
|
130
|
+
| Anthropic | `claude-3-5-sonnet-20241022` | `api_key` |
|
|
131
|
+
| AWS Bedrock | `bedrock/anthropic.claude-3-sonnet...` | AWS env vars |
|
|
132
|
+
| Azure OpenAI | `azure/gpt-4o` | `api_base`, `api_key`, `api_version` |
|
|
133
|
+
| Ollama | `ollama/llama3.2` | `api_base` (optional) |
|
|
134
|
+
| Custom | `openai/your-model` | `api_base`, `api_key` |
|
|
135
|
+
|
|
136
|
+
### Amazon Bedrock
|
|
137
|
+
|
|
138
|
+
```typescript
|
|
139
|
+
import { RLM } from 'recursive-llm-ts';
|
|
140
|
+
|
|
141
|
+
const rlm = new RLM('bedrock/anthropic.claude-3-sonnet-20240229-v1:0', {
|
|
142
|
+
api_key: process.env.AWS_ACCESS_KEY_ID,
|
|
143
|
+
max_iterations: 15
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
// Set AWS credentials via environment variables:
|
|
147
|
+
// AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION_NAME
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Azure OpenAI
|
|
151
|
+
|
|
152
|
+
```typescript
|
|
153
|
+
const rlm = new RLM('azure/gpt-4o', {
|
|
154
|
+
api_base: 'https://your-resource.openai.azure.com',
|
|
155
|
+
api_key: process.env.AZURE_API_KEY,
|
|
156
|
+
api_version: '2024-02-15-preview' // Pass any LiteLLM params
|
|
157
|
+
});
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Custom OpenAI-Compatible APIs
|
|
161
|
+
|
|
162
|
+
For providers with OpenAI-compatible APIs (e.g., local models, vLLM, Ollama):
|
|
163
|
+
|
|
164
|
+
```typescript
|
|
165
|
+
const rlm = new RLM('openai/your-model', {
|
|
166
|
+
api_base: 'https://your-custom-endpoint.com/v1',
|
|
167
|
+
api_key: 'your-key-here'
|
|
168
|
+
});
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Other Providers
|
|
172
|
+
|
|
173
|
+
See the [LiteLLM documentation](https://docs.litellm.ai/docs/providers) for the complete list of supported providers and their configuration.
|
|
174
|
+
|
|
175
|
+
## How It Works
|
|
176
|
+
|
|
177
|
+
This package provides a TypeScript wrapper around the Python `recursive-llm` package, enabling seamless integration into Node.js/TypeScript applications. It uses [JSPyBridge (pythonia)](https://github.com/extremeheat/JSPyBridge) to provide direct Python interop - Python is bundled and runs in-process, so no external Python installation is needed.
|
|
178
|
+
|
|
179
|
+
The recursive-llm approach breaks down large contexts into manageable chunks and processes them recursively, allowing you to work with documents of any size without hitting token limits.
|
|
180
|
+
|
|
181
|
+
### Key Features
|
|
182
|
+
|
|
183
|
+
- ✅ **Zero Python setup** - Python runtime bundled via JSPyBridge
|
|
184
|
+
- ✅ **Direct interop** - Native Python-JavaScript bridge (no JSON serialization)
|
|
185
|
+
- ✅ **Type-safe** - Full TypeScript type definitions
|
|
186
|
+
- ✅ **Simple API** - Just `npm install` and start using
|
|
187
|
+
|
|
188
|
+
## Publishing
|
|
189
|
+
|
|
190
|
+
This package uses automated GitHub Actions workflows to publish to npm. See [RELEASE.md](RELEASE.md) for detailed instructions on publishing new versions.
|
|
191
|
+
|
|
192
|
+
**Quick start:**
|
|
193
|
+
```bash
|
|
194
|
+
npm version patch # Bump version
|
|
195
|
+
git push origin main --tags # Push tag
|
|
196
|
+
# Then create a GitHub release to trigger automatic npm publish
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## License
|
|
200
|
+
|
|
201
|
+
MIT
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.RLMBridge = exports.RLM = void 0;
|
|
4
|
+
var rlm_1 = require("./rlm");
|
|
5
|
+
Object.defineProperty(exports, "RLM", { enumerable: true, get: function () { return rlm_1.RLM; } });
|
|
6
|
+
var rlm_bridge_1 = require("./rlm-bridge");
|
|
7
|
+
Object.defineProperty(exports, "RLMBridge", { enumerable: true, get: function () { return rlm_bridge_1.RLMBridge; } });
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export interface RLMStats {
|
|
2
|
+
llm_calls: number;
|
|
3
|
+
iterations: number;
|
|
4
|
+
depth: number;
|
|
5
|
+
}
|
|
6
|
+
export interface RLMResult {
|
|
7
|
+
result: string;
|
|
8
|
+
stats: RLMStats;
|
|
9
|
+
}
|
|
10
|
+
export interface RLMError {
|
|
11
|
+
error: string;
|
|
12
|
+
}
|
|
13
|
+
export interface RLMConfig {
|
|
14
|
+
recursive_model?: string;
|
|
15
|
+
api_base?: string;
|
|
16
|
+
api_key?: string;
|
|
17
|
+
max_depth?: number;
|
|
18
|
+
max_iterations?: number;
|
|
19
|
+
[key: string]: any;
|
|
20
|
+
}
|
|
21
|
+
export declare class RLMBridge {
|
|
22
|
+
private rlmModule;
|
|
23
|
+
private ensureRLMModule;
|
|
24
|
+
completion(model: string, query: string, context: string, rlmConfig?: RLMConfig): Promise<RLMResult>;
|
|
25
|
+
cleanup(): Promise<void>;
|
|
26
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
36
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
37
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
38
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
39
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
40
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
41
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
42
|
+
});
|
|
43
|
+
};
|
|
44
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
|
+
exports.RLMBridge = void 0;
|
|
46
|
+
const pythonia_1 = require("pythonia");
|
|
47
|
+
const path = __importStar(require("path"));
|
|
48
|
+
class RLMBridge {
|
|
49
|
+
constructor() {
|
|
50
|
+
this.rlmModule = null;
|
|
51
|
+
}
|
|
52
|
+
ensureRLMModule() {
|
|
53
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
54
|
+
if (this.rlmModule)
|
|
55
|
+
return;
|
|
56
|
+
// Get path to recursive-llm Python module
|
|
57
|
+
const rlmPath = path.join(__dirname, '..', 'recursive-llm', 'src', 'rlm');
|
|
58
|
+
// Import sys module to add path
|
|
59
|
+
const sys = yield (0, pythonia_1.python)('sys');
|
|
60
|
+
const pathList = yield sys.path;
|
|
61
|
+
yield pathList.insert(0, path.join(__dirname, '..', 'recursive-llm', 'src'));
|
|
62
|
+
// Import the rlm module
|
|
63
|
+
this.rlmModule = yield (0, pythonia_1.python)('rlm');
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
completion(model_1, query_1, context_1) {
|
|
67
|
+
return __awaiter(this, arguments, void 0, function* (model, query, context, rlmConfig = {}) {
|
|
68
|
+
yield this.ensureRLMModule();
|
|
69
|
+
try {
|
|
70
|
+
// Create RLM instance with config
|
|
71
|
+
const RLMClass = yield this.rlmModule.RLM;
|
|
72
|
+
const rlmInstance = yield RLMClass(model, rlmConfig);
|
|
73
|
+
// Call completion method
|
|
74
|
+
const result = yield rlmInstance.completion(query, context);
|
|
75
|
+
const stats = yield rlmInstance.stats;
|
|
76
|
+
// Convert Python stats dict to JS object
|
|
77
|
+
const statsObj = {
|
|
78
|
+
llm_calls: yield stats.llm_calls,
|
|
79
|
+
iterations: yield stats.iterations,
|
|
80
|
+
depth: yield stats.depth
|
|
81
|
+
};
|
|
82
|
+
return {
|
|
83
|
+
result: String(result),
|
|
84
|
+
stats: statsObj
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
catch (error) {
|
|
88
|
+
throw new Error(`RLM completion failed: ${error.message || error}`);
|
|
89
|
+
}
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
cleanup() {
|
|
93
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
94
|
+
if (this.rlmModule) {
|
|
95
|
+
pythonia_1.python.exit();
|
|
96
|
+
this.rlmModule = null;
|
|
97
|
+
}
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
exports.RLMBridge = RLMBridge;
|
package/dist/rlm.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { RLMConfig, RLMResult } from './rlm-bridge';
|
|
2
|
+
export declare class RLM {
|
|
3
|
+
private bridge;
|
|
4
|
+
private model;
|
|
5
|
+
private rlmConfig;
|
|
6
|
+
constructor(model: string, rlmConfig?: RLMConfig);
|
|
7
|
+
completion(query: string, context: string): Promise<RLMResult>;
|
|
8
|
+
cleanup(): Promise<void>;
|
|
9
|
+
}
|
package/dist/rlm.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.RLM = void 0;
|
|
4
|
+
const rlm_bridge_1 = require("./rlm-bridge");
|
|
5
|
+
class RLM {
|
|
6
|
+
constructor(model, rlmConfig = {}) {
|
|
7
|
+
this.bridge = new rlm_bridge_1.RLMBridge();
|
|
8
|
+
this.model = model;
|
|
9
|
+
this.rlmConfig = rlmConfig;
|
|
10
|
+
}
|
|
11
|
+
completion(query, context) {
|
|
12
|
+
return this.bridge.completion(this.model, query, context, this.rlmConfig);
|
|
13
|
+
}
|
|
14
|
+
cleanup() {
|
|
15
|
+
return this.bridge.cleanup();
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
exports.RLM = RLM;
|
package/package.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "recursive-llm-ts",
|
|
3
|
+
"version": "1.0.1",
|
|
4
|
+
"description": "TypeScript bridge for recursive-llm: Recursive Language Models for unbounded context processing",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"files": [
|
|
8
|
+
"dist",
|
|
9
|
+
"recursive-llm/src",
|
|
10
|
+
"recursive-llm/pyproject.toml"
|
|
11
|
+
],
|
|
12
|
+
"scripts": {
|
|
13
|
+
"build": "tsc",
|
|
14
|
+
"prepublishOnly": "npm run build",
|
|
15
|
+
"release": "./scripts/release.sh",
|
|
16
|
+
"start": "ts-node test/test.ts",
|
|
17
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
18
|
+
},
|
|
19
|
+
"keywords": [
|
|
20
|
+
"llm",
|
|
21
|
+
"ai",
|
|
22
|
+
"recursive",
|
|
23
|
+
"context",
|
|
24
|
+
"nlp",
|
|
25
|
+
"language-model"
|
|
26
|
+
],
|
|
27
|
+
"author": "",
|
|
28
|
+
"license": "MIT",
|
|
29
|
+
"repository": {
|
|
30
|
+
"type": "git",
|
|
31
|
+
"url": "https://github.com/yourusername/recursive-llm-ts.git"
|
|
32
|
+
},
|
|
33
|
+
"bugs": {
|
|
34
|
+
"url": "https://github.com/yourusername/recursive-llm-ts/issues"
|
|
35
|
+
},
|
|
36
|
+
"homepage": "https://github.com/yourusername/recursive-llm-ts#readme",
|
|
37
|
+
"dependencies": {
|
|
38
|
+
"pythonia": "^1.2.6"
|
|
39
|
+
},
|
|
40
|
+
"devDependencies": {
|
|
41
|
+
"@types/node": "^20.11.19",
|
|
42
|
+
"dotenv": "^16.4.5",
|
|
43
|
+
"ts-node": "^10.9.2",
|
|
44
|
+
"typescript": "^5.3.3"
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "recursive-llm"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Recursive Language Models for unbounded context processing"
|
|
9
|
+
authors = [{name = "Grigori Gvadzabia"}]
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
license = {text = "MIT"}
|
|
13
|
+
keywords = ["llm", "ai", "nlp", "recursive", "language-models"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.9",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
dependencies = [
|
|
26
|
+
"litellm>=1.0.0",
|
|
27
|
+
"RestrictedPython>=6.0",
|
|
28
|
+
"python-dotenv>=1.0.0",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
dev = [
|
|
33
|
+
"pytest>=7.0.0",
|
|
34
|
+
"pytest-asyncio>=0.21.0",
|
|
35
|
+
"pytest-cov>=4.0.0",
|
|
36
|
+
"black>=24.0.0",
|
|
37
|
+
"ruff>=0.1.0",
|
|
38
|
+
"mypy>=1.0.0",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[project.urls]
|
|
42
|
+
Homepage = "https://github.com/yourusername/recursive-llm"
|
|
43
|
+
Documentation = "https://github.com/yourusername/recursive-llm"
|
|
44
|
+
Repository = "https://github.com/yourusername/recursive-llm"
|
|
45
|
+
Issues = "https://github.com/yourusername/recursive-llm/issues"
|
|
46
|
+
|
|
47
|
+
[tool.setuptools.packages.find]
|
|
48
|
+
where = ["src"]
|
|
49
|
+
|
|
50
|
+
[tool.pytest.ini_options]
|
|
51
|
+
asyncio_mode = "auto"
|
|
52
|
+
testpaths = ["tests"]
|
|
53
|
+
python_files = ["test_*.py"]
|
|
54
|
+
python_classes = ["Test*"]
|
|
55
|
+
python_functions = ["test_*"]
|
|
56
|
+
addopts = "-v"
|
|
57
|
+
|
|
58
|
+
[tool.black]
|
|
59
|
+
line-length = 100
|
|
60
|
+
target-version = ['py39']
|
|
61
|
+
|
|
62
|
+
[tool.ruff]
|
|
63
|
+
line-length = 100
|
|
64
|
+
target-version = "py39"
|
|
65
|
+
|
|
66
|
+
[tool.mypy]
|
|
67
|
+
python_version = "3.9"
|
|
68
|
+
warn_return_any = true
|
|
69
|
+
warn_unused_configs = true
|
|
70
|
+
disallow_untyped_defs = true
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Recursive Language Models for unbounded context processing."""
|
|
2
|
+
|
|
3
|
+
from .core import RLM, RLMError, MaxIterationsError, MaxDepthError
|
|
4
|
+
from .repl import REPLError
|
|
5
|
+
|
|
6
|
+
__version__ = "0.1.0"
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"RLM",
|
|
10
|
+
"RLMError",
|
|
11
|
+
"MaxIterationsError",
|
|
12
|
+
"MaxDepthError",
|
|
13
|
+
"REPLError",
|
|
14
|
+
]
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
"""Core RLM implementation."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import re
|
|
5
|
+
from typing import Optional, Dict, Any, List
|
|
6
|
+
|
|
7
|
+
import litellm
|
|
8
|
+
|
|
9
|
+
from .types import Message
|
|
10
|
+
from .repl import REPLExecutor, REPLError
|
|
11
|
+
from .prompts import build_system_prompt
|
|
12
|
+
from .parser import parse_response, is_final
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RLMError(Exception):
|
|
16
|
+
"""Base error for RLM."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class MaxIterationsError(RLMError):
|
|
21
|
+
"""Max iterations exceeded."""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MaxDepthError(RLMError):
|
|
26
|
+
"""Max recursion depth exceeded."""
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class RLM:
|
|
31
|
+
"""Recursive Language Model."""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
model: str,
|
|
36
|
+
recursive_model: Optional[str] = None,
|
|
37
|
+
api_base: Optional[str] = None,
|
|
38
|
+
api_key: Optional[str] = None,
|
|
39
|
+
max_depth: int = 5,
|
|
40
|
+
max_iterations: int = 30,
|
|
41
|
+
_current_depth: int = 0,
|
|
42
|
+
**llm_kwargs: Any
|
|
43
|
+
):
|
|
44
|
+
"""
|
|
45
|
+
Initialize RLM.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
model: Model name (e.g., "gpt-4o", "claude-sonnet-4", "ollama/llama3.2")
|
|
49
|
+
recursive_model: Optional cheaper model for recursive calls
|
|
50
|
+
api_base: Optional API base URL
|
|
51
|
+
api_key: Optional API key
|
|
52
|
+
max_depth: Maximum recursion depth
|
|
53
|
+
max_iterations: Maximum REPL iterations per call
|
|
54
|
+
_current_depth: Internal current depth tracker
|
|
55
|
+
**llm_kwargs: Additional LiteLLM parameters
|
|
56
|
+
"""
|
|
57
|
+
self.model = model
|
|
58
|
+
self.recursive_model = recursive_model or model
|
|
59
|
+
self.api_base = api_base
|
|
60
|
+
self.api_key = api_key
|
|
61
|
+
self.max_depth = max_depth
|
|
62
|
+
self.max_iterations = max_iterations
|
|
63
|
+
self._current_depth = _current_depth
|
|
64
|
+
self.llm_kwargs = llm_kwargs
|
|
65
|
+
|
|
66
|
+
self.repl = REPLExecutor()
|
|
67
|
+
|
|
68
|
+
# Stats
|
|
69
|
+
self._llm_calls = 0
|
|
70
|
+
self._iterations = 0
|
|
71
|
+
|
|
72
|
+
def completion(
|
|
73
|
+
self,
|
|
74
|
+
query: str = "",
|
|
75
|
+
context: str = "",
|
|
76
|
+
**kwargs: Any
|
|
77
|
+
) -> str:
|
|
78
|
+
"""
|
|
79
|
+
Sync wrapper for acompletion.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
query: User query (optional if query is in context)
|
|
83
|
+
context: Context to process (optional, can pass query here)
|
|
84
|
+
**kwargs: Additional LiteLLM parameters
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Final answer string
|
|
88
|
+
|
|
89
|
+
Examples:
|
|
90
|
+
# Standard usage
|
|
91
|
+
rlm.completion(query="Summarize this", context=document)
|
|
92
|
+
|
|
93
|
+
# Query in context (RLM will extract task)
|
|
94
|
+
rlm.completion(context="Summarize this document: ...")
|
|
95
|
+
|
|
96
|
+
# Single string (treat as context)
|
|
97
|
+
rlm.completion("Process this text and extract dates")
|
|
98
|
+
"""
|
|
99
|
+
# If only one argument provided, treat it as context
|
|
100
|
+
if query and not context:
|
|
101
|
+
context = query
|
|
102
|
+
query = ""
|
|
103
|
+
|
|
104
|
+
return asyncio.run(self.acompletion(query, context, **kwargs))
|
|
105
|
+
|
|
106
|
+
async def acompletion(
|
|
107
|
+
self,
|
|
108
|
+
query: str = "",
|
|
109
|
+
context: str = "",
|
|
110
|
+
**kwargs: Any
|
|
111
|
+
) -> str:
|
|
112
|
+
"""
|
|
113
|
+
Main async completion method.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
query: User query (optional if query is in context)
|
|
117
|
+
context: Context to process (optional, can pass query here)
|
|
118
|
+
**kwargs: Additional LiteLLM parameters
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Final answer string
|
|
122
|
+
|
|
123
|
+
Raises:
|
|
124
|
+
MaxIterationsError: If max iterations exceeded
|
|
125
|
+
MaxDepthError: If max recursion depth exceeded
|
|
126
|
+
|
|
127
|
+
Examples:
|
|
128
|
+
# Explicit query and context
|
|
129
|
+
await rlm.acompletion(query="What is this?", context=doc)
|
|
130
|
+
|
|
131
|
+
# Query embedded in context
|
|
132
|
+
await rlm.acompletion(context="Extract all dates from: ...")
|
|
133
|
+
|
|
134
|
+
# LLM will figure out the task
|
|
135
|
+
await rlm.acompletion(context=document_with_instructions)
|
|
136
|
+
"""
|
|
137
|
+
# If only query provided, treat it as context
|
|
138
|
+
if query and not context:
|
|
139
|
+
context = query
|
|
140
|
+
query = ""
|
|
141
|
+
if self._current_depth >= self.max_depth:
|
|
142
|
+
raise MaxDepthError(f"Max recursion depth ({self.max_depth}) exceeded")
|
|
143
|
+
|
|
144
|
+
# Initialize REPL environment
|
|
145
|
+
repl_env = self._build_repl_env(query, context)
|
|
146
|
+
|
|
147
|
+
# Build initial messages
|
|
148
|
+
system_prompt = build_system_prompt(len(context), self._current_depth)
|
|
149
|
+
messages: List[Message] = [
|
|
150
|
+
{"role": "system", "content": system_prompt},
|
|
151
|
+
{"role": "user", "content": query}
|
|
152
|
+
]
|
|
153
|
+
|
|
154
|
+
# Main loop
|
|
155
|
+
for iteration in range(self.max_iterations):
|
|
156
|
+
self._iterations = iteration + 1
|
|
157
|
+
|
|
158
|
+
# Call LLM
|
|
159
|
+
response = await self._call_llm(messages, **kwargs)
|
|
160
|
+
|
|
161
|
+
# Check for FINAL
|
|
162
|
+
if is_final(response):
|
|
163
|
+
answer = parse_response(response, repl_env)
|
|
164
|
+
if answer is not None:
|
|
165
|
+
return answer
|
|
166
|
+
|
|
167
|
+
# Execute code in REPL
|
|
168
|
+
try:
|
|
169
|
+
exec_result = self.repl.execute(response, repl_env)
|
|
170
|
+
except REPLError as e:
|
|
171
|
+
exec_result = f"Error: {str(e)}"
|
|
172
|
+
except Exception as e:
|
|
173
|
+
exec_result = f"Unexpected error: {str(e)}"
|
|
174
|
+
|
|
175
|
+
# Add to conversation
|
|
176
|
+
messages.append({"role": "assistant", "content": response})
|
|
177
|
+
messages.append({"role": "user", "content": exec_result})
|
|
178
|
+
|
|
179
|
+
raise MaxIterationsError(
|
|
180
|
+
f"Max iterations ({self.max_iterations}) exceeded without FINAL()"
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
async def _call_llm(
|
|
184
|
+
self,
|
|
185
|
+
messages: List[Message],
|
|
186
|
+
**kwargs: Any
|
|
187
|
+
) -> str:
|
|
188
|
+
"""
|
|
189
|
+
Call LLM API.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
messages: Conversation messages
|
|
193
|
+
**kwargs: Additional parameters (can override model here)
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
LLM response text
|
|
197
|
+
"""
|
|
198
|
+
self._llm_calls += 1
|
|
199
|
+
|
|
200
|
+
# Choose model based on depth
|
|
201
|
+
default_model = self.model if self._current_depth == 0 else self.recursive_model
|
|
202
|
+
|
|
203
|
+
# Allow override via kwargs
|
|
204
|
+
model = kwargs.pop('model', default_model)
|
|
205
|
+
|
|
206
|
+
# Merge kwargs
|
|
207
|
+
call_kwargs = {**self.llm_kwargs, **kwargs}
|
|
208
|
+
if self.api_base:
|
|
209
|
+
call_kwargs['api_base'] = self.api_base
|
|
210
|
+
if self.api_key:
|
|
211
|
+
call_kwargs['api_key'] = self.api_key
|
|
212
|
+
|
|
213
|
+
# Call LiteLLM
|
|
214
|
+
response = await litellm.acompletion(
|
|
215
|
+
model=model,
|
|
216
|
+
messages=messages,
|
|
217
|
+
**call_kwargs
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Extract text
|
|
221
|
+
return response.choices[0].message.content
|
|
222
|
+
|
|
223
|
+
def _build_repl_env(self, query: str, context: str) -> Dict[str, Any]:
|
|
224
|
+
"""
|
|
225
|
+
Build REPL environment.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
query: User query
|
|
229
|
+
context: Context string
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
Environment dict
|
|
233
|
+
"""
|
|
234
|
+
env: Dict[str, Any] = {
|
|
235
|
+
'context': context,
|
|
236
|
+
'query': query,
|
|
237
|
+
'recursive_llm': self._make_recursive_fn(),
|
|
238
|
+
're': re, # Whitelist re module
|
|
239
|
+
}
|
|
240
|
+
return env
|
|
241
|
+
|
|
242
|
+
def _make_recursive_fn(self) -> Any:
|
|
243
|
+
"""
|
|
244
|
+
Create recursive LLM function for REPL.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
Async function that can be called from REPL
|
|
248
|
+
"""
|
|
249
|
+
async def recursive_llm(sub_query: str, sub_context: str) -> str:
|
|
250
|
+
"""
|
|
251
|
+
Recursively process sub-context.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
sub_query: Query for sub-context
|
|
255
|
+
sub_context: Sub-context to process
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
Answer from recursive call
|
|
259
|
+
"""
|
|
260
|
+
if self._current_depth + 1 >= self.max_depth:
|
|
261
|
+
return f"Max recursion depth ({self.max_depth}) reached"
|
|
262
|
+
|
|
263
|
+
# Create sub-RLM with increased depth
|
|
264
|
+
sub_rlm = RLM(
|
|
265
|
+
model=self.recursive_model,
|
|
266
|
+
recursive_model=self.recursive_model,
|
|
267
|
+
api_base=self.api_base,
|
|
268
|
+
api_key=self.api_key,
|
|
269
|
+
max_depth=self.max_depth,
|
|
270
|
+
max_iterations=self.max_iterations,
|
|
271
|
+
_current_depth=self._current_depth + 1,
|
|
272
|
+
**self.llm_kwargs
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
return await sub_rlm.acompletion(sub_query, sub_context)
|
|
276
|
+
|
|
277
|
+
# Wrap in sync function for REPL compatibility
|
|
278
|
+
def sync_recursive_llm(sub_query: str, sub_context: str) -> str:
|
|
279
|
+
"""Sync wrapper for recursive_llm."""
|
|
280
|
+
# Check if we're in an async context
|
|
281
|
+
try:
|
|
282
|
+
loop = asyncio.get_running_loop()
|
|
283
|
+
# We're in async context, but REPL is sync
|
|
284
|
+
# Create a new thread to run async code
|
|
285
|
+
import concurrent.futures
|
|
286
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
287
|
+
future = executor.submit(
|
|
288
|
+
asyncio.run,
|
|
289
|
+
recursive_llm(sub_query, sub_context)
|
|
290
|
+
)
|
|
291
|
+
return future.result()
|
|
292
|
+
except RuntimeError:
|
|
293
|
+
# No running loop, safe to use asyncio.run
|
|
294
|
+
return asyncio.run(recursive_llm(sub_query, sub_context))
|
|
295
|
+
|
|
296
|
+
return sync_recursive_llm
|
|
297
|
+
|
|
298
|
+
@property
|
|
299
|
+
def stats(self) -> Dict[str, int]:
|
|
300
|
+
"""Get execution statistics."""
|
|
301
|
+
return {
|
|
302
|
+
'llm_calls': self._llm_calls,
|
|
303
|
+
'iterations': self._iterations,
|
|
304
|
+
'depth': self._current_depth,
|
|
305
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Parse FINAL() and FINAL_VAR() statements from LLM responses."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Optional, Dict, Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def extract_final(response: str) -> Optional[str]:
|
|
8
|
+
"""
|
|
9
|
+
Extract answer from FINAL() statement.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
response: LLM response text
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Extracted answer or None if not found
|
|
16
|
+
"""
|
|
17
|
+
# Look for FINAL("answer") or FINAL('answer')
|
|
18
|
+
patterns = [
|
|
19
|
+
r'FINAL\s*\(\s*"""(.*)"""', # FINAL("""answer""") - triple double quotes
|
|
20
|
+
r"FINAL\s*\(\s*'''(.*)'''", # FINAL('''answer''') - triple single quotes
|
|
21
|
+
r'FINAL\s*\(\s*"([^"]*)"', # FINAL("answer") - double quotes
|
|
22
|
+
r"FINAL\s*\(\s*'([^']*)'", # FINAL('answer') - single quotes
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
for pattern in patterns:
|
|
26
|
+
match = re.search(pattern, response, re.DOTALL)
|
|
27
|
+
if match:
|
|
28
|
+
return match.group(1).strip()
|
|
29
|
+
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def extract_final_var(response: str, env: Dict[str, Any]) -> Optional[str]:
|
|
34
|
+
"""
|
|
35
|
+
Extract answer from FINAL_VAR() statement.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
response: LLM response text
|
|
39
|
+
env: REPL environment with variables
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Variable value as string or None if not found
|
|
43
|
+
"""
|
|
44
|
+
# Look for FINAL_VAR(var_name)
|
|
45
|
+
match = re.search(r'FINAL_VAR\s*\(\s*(\w+)\s*\)', response)
|
|
46
|
+
if not match:
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
var_name = match.group(1)
|
|
50
|
+
|
|
51
|
+
# Get variable from environment
|
|
52
|
+
if var_name in env:
|
|
53
|
+
value = env[var_name]
|
|
54
|
+
return str(value)
|
|
55
|
+
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def is_final(response: str) -> bool:
|
|
60
|
+
"""
|
|
61
|
+
Check if response contains FINAL() or FINAL_VAR().
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
response: LLM response text
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
True if response contains final statement
|
|
68
|
+
"""
|
|
69
|
+
return 'FINAL(' in response or 'FINAL_VAR(' in response
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def parse_response(response: str, env: Dict[str, Any]) -> Optional[str]:
|
|
73
|
+
"""
|
|
74
|
+
Parse response for any final statement.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
response: LLM response text
|
|
78
|
+
env: REPL environment
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Final answer or None
|
|
82
|
+
"""
|
|
83
|
+
# Try FINAL() first
|
|
84
|
+
answer = extract_final(response)
|
|
85
|
+
if answer is not None:
|
|
86
|
+
return answer
|
|
87
|
+
|
|
88
|
+
# Try FINAL_VAR()
|
|
89
|
+
answer = extract_final_var(response, env)
|
|
90
|
+
if answer is not None:
|
|
91
|
+
return answer
|
|
92
|
+
|
|
93
|
+
return None
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""System prompt templates for RLM."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def build_system_prompt(context_size: int, depth: int = 0) -> str:
|
|
5
|
+
"""
|
|
6
|
+
Build system prompt for RLM.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
context_size: Size of context in characters
|
|
10
|
+
depth: Current recursion depth
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
System prompt string
|
|
14
|
+
"""
|
|
15
|
+
# Minimal prompt (paper-style)
|
|
16
|
+
prompt = f"""You are a Recursive Language Model. You interact with context through a Python REPL environment.
|
|
17
|
+
|
|
18
|
+
The context is stored in variable `context` (not in this prompt). Size: {context_size:,} characters.
|
|
19
|
+
|
|
20
|
+
Available in environment:
|
|
21
|
+
- context: str (the document to analyze)
|
|
22
|
+
- query: str (the question: "{"{"}query{"}"}")
|
|
23
|
+
- recursive_llm(sub_query, sub_context) -> str (recursively process sub-context)
|
|
24
|
+
- re: already imported regex module (use re.findall, re.search, etc.)
|
|
25
|
+
|
|
26
|
+
Write Python code to answer the query. The last expression or print() output will be shown to you.
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
- print(context[:100]) # See first 100 chars
|
|
30
|
+
- errors = re.findall(r'ERROR', context) # Find all ERROR
|
|
31
|
+
- count = len(errors); print(count) # Count and show
|
|
32
|
+
|
|
33
|
+
When you have the answer, use FINAL("answer") - this is NOT a function, just write it as text.
|
|
34
|
+
|
|
35
|
+
Depth: {depth}"""
|
|
36
|
+
|
|
37
|
+
return prompt
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def build_user_prompt(query: str) -> str:
|
|
41
|
+
"""
|
|
42
|
+
Build user prompt.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
query: User's question
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
User prompt string
|
|
49
|
+
"""
|
|
50
|
+
return query
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
"""Safe REPL executor using RestrictedPython."""
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import sys
|
|
5
|
+
from typing import Dict, Any, Optional
|
|
6
|
+
from RestrictedPython import compile_restricted_exec, safe_globals, limited_builtins, utility_builtins
|
|
7
|
+
from RestrictedPython.Guards import guarded_iter_unpack_sequence, safer_getattr
|
|
8
|
+
from RestrictedPython.PrintCollector import PrintCollector
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class REPLError(Exception):
|
|
12
|
+
"""Error during REPL execution."""
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class REPLExecutor:
|
|
17
|
+
"""Safe Python code executor."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, timeout: int = 5, max_output_chars: int = 2000):
|
|
20
|
+
"""
|
|
21
|
+
Initialize REPL executor.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
timeout: Execution timeout in seconds (not currently enforced)
|
|
25
|
+
max_output_chars: Maximum characters to return (truncate if longer)
|
|
26
|
+
"""
|
|
27
|
+
self.timeout = timeout
|
|
28
|
+
self.max_output_chars = max_output_chars
|
|
29
|
+
|
|
30
|
+
def execute(self, code: str, env: Dict[str, Any]) -> str:
|
|
31
|
+
"""
|
|
32
|
+
Execute Python code in restricted environment.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
code: Python code to execute
|
|
36
|
+
env: Environment with context, query, recursive_llm, etc.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
String result of execution (stdout or last expression)
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
REPLError: If code execution fails
|
|
43
|
+
"""
|
|
44
|
+
# Filter out code blocks if present (LLM might wrap code)
|
|
45
|
+
code = self._extract_code(code)
|
|
46
|
+
|
|
47
|
+
if not code.strip():
|
|
48
|
+
return "No code to execute"
|
|
49
|
+
|
|
50
|
+
# Build restricted globals
|
|
51
|
+
restricted_globals = self._build_globals(env)
|
|
52
|
+
|
|
53
|
+
# Capture stdout
|
|
54
|
+
old_stdout = sys.stdout
|
|
55
|
+
sys.stdout = captured_output = io.StringIO()
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
# Compile with RestrictedPython
|
|
59
|
+
byte_code = compile_restricted_exec(code)
|
|
60
|
+
|
|
61
|
+
if byte_code.errors:
|
|
62
|
+
raise REPLError(f"Compilation error: {', '.join(byte_code.errors)}")
|
|
63
|
+
|
|
64
|
+
# Execute
|
|
65
|
+
exec(byte_code.code, restricted_globals, env)
|
|
66
|
+
|
|
67
|
+
# Get output from stdout
|
|
68
|
+
output = captured_output.getvalue()
|
|
69
|
+
|
|
70
|
+
# Get output from PrintCollector if available
|
|
71
|
+
if '_print' in env and hasattr(env['_print'], '__call__'):
|
|
72
|
+
# PrintCollector stores prints in its txt attribute
|
|
73
|
+
print_collector = env['_print']
|
|
74
|
+
if hasattr(print_collector, 'txt'):
|
|
75
|
+
output += ''.join(print_collector.txt)
|
|
76
|
+
|
|
77
|
+
# Check if last line was an expression (try to get its value)
|
|
78
|
+
# This handles cases like: error_count (should return its value)
|
|
79
|
+
lines = code.strip().split('\n')
|
|
80
|
+
if lines:
|
|
81
|
+
last_line = lines[-1].strip()
|
|
82
|
+
# If last line is a simple expression (no assignment, no keyword)
|
|
83
|
+
if last_line and not any(kw in last_line for kw in ['=', 'import', 'def', 'class', 'if', 'for', 'while', 'with']):
|
|
84
|
+
try:
|
|
85
|
+
# Try to evaluate the last line as expression
|
|
86
|
+
result = eval(last_line, restricted_globals, env)
|
|
87
|
+
if result is not None:
|
|
88
|
+
output += str(result) + '\n'
|
|
89
|
+
except:
|
|
90
|
+
pass # Not an expression, ignore
|
|
91
|
+
|
|
92
|
+
if not output:
|
|
93
|
+
return "Code executed successfully (no output)"
|
|
94
|
+
|
|
95
|
+
# Truncate output if too long (as per paper: "truncated version of output")
|
|
96
|
+
if len(output) > self.max_output_chars:
|
|
97
|
+
truncated = output[:self.max_output_chars]
|
|
98
|
+
return f"{truncated}\n\n[Output truncated: {len(output)} chars total, showing first {self.max_output_chars}]"
|
|
99
|
+
|
|
100
|
+
return output.strip()
|
|
101
|
+
|
|
102
|
+
except Exception as e:
|
|
103
|
+
raise REPLError(f"Execution error: {str(e)}")
|
|
104
|
+
|
|
105
|
+
finally:
|
|
106
|
+
sys.stdout = old_stdout
|
|
107
|
+
|
|
108
|
+
def _extract_code(self, text: str) -> str:
|
|
109
|
+
"""
|
|
110
|
+
Extract code from markdown code blocks if present.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
text: Raw text that might contain code
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Extracted code
|
|
117
|
+
"""
|
|
118
|
+
# Check for markdown code blocks
|
|
119
|
+
if '```python' in text:
|
|
120
|
+
start = text.find('```python') + len('```python')
|
|
121
|
+
end = text.find('```', start)
|
|
122
|
+
if end != -1:
|
|
123
|
+
return text[start:end].strip()
|
|
124
|
+
|
|
125
|
+
if '```' in text:
|
|
126
|
+
start = text.find('```') + 3
|
|
127
|
+
end = text.find('```', start)
|
|
128
|
+
if end != -1:
|
|
129
|
+
return text[start:end].strip()
|
|
130
|
+
|
|
131
|
+
return text
|
|
132
|
+
|
|
133
|
+
def _build_globals(self, env: Dict[str, Any]) -> Dict[str, Any]:
|
|
134
|
+
"""
|
|
135
|
+
Build restricted globals for safe execution.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
env: User environment
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Safe globals dict
|
|
142
|
+
"""
|
|
143
|
+
restricted_globals = safe_globals.copy()
|
|
144
|
+
restricted_globals.update(limited_builtins)
|
|
145
|
+
restricted_globals.update(utility_builtins)
|
|
146
|
+
|
|
147
|
+
# Add guards
|
|
148
|
+
restricted_globals['_iter_unpack_sequence_'] = guarded_iter_unpack_sequence
|
|
149
|
+
restricted_globals['_getattr_'] = safer_getattr
|
|
150
|
+
restricted_globals['_getitem_'] = lambda obj, index: obj[index]
|
|
151
|
+
restricted_globals['_getiter_'] = iter
|
|
152
|
+
restricted_globals['_print_'] = PrintCollector
|
|
153
|
+
|
|
154
|
+
# Add additional safe builtins
|
|
155
|
+
restricted_globals.update({
|
|
156
|
+
# Types
|
|
157
|
+
'len': len,
|
|
158
|
+
'str': str,
|
|
159
|
+
'int': int,
|
|
160
|
+
'float': float,
|
|
161
|
+
'bool': bool,
|
|
162
|
+
'list': list,
|
|
163
|
+
'dict': dict,
|
|
164
|
+
'tuple': tuple,
|
|
165
|
+
'set': set,
|
|
166
|
+
'frozenset': frozenset,
|
|
167
|
+
'bytes': bytes,
|
|
168
|
+
'bytearray': bytearray,
|
|
169
|
+
|
|
170
|
+
# Iteration
|
|
171
|
+
'range': range,
|
|
172
|
+
'enumerate': enumerate,
|
|
173
|
+
'zip': zip,
|
|
174
|
+
'map': map,
|
|
175
|
+
'filter': filter,
|
|
176
|
+
'reversed': reversed,
|
|
177
|
+
'iter': iter,
|
|
178
|
+
'next': next,
|
|
179
|
+
|
|
180
|
+
# Aggregation
|
|
181
|
+
'sorted': sorted,
|
|
182
|
+
'sum': sum,
|
|
183
|
+
'min': min,
|
|
184
|
+
'max': max,
|
|
185
|
+
'any': any,
|
|
186
|
+
'all': all,
|
|
187
|
+
|
|
188
|
+
# Math
|
|
189
|
+
'abs': abs,
|
|
190
|
+
'round': round,
|
|
191
|
+
'pow': pow,
|
|
192
|
+
'divmod': divmod,
|
|
193
|
+
|
|
194
|
+
# String/repr
|
|
195
|
+
'chr': chr,
|
|
196
|
+
'ord': ord,
|
|
197
|
+
'hex': hex,
|
|
198
|
+
'oct': oct,
|
|
199
|
+
'bin': bin,
|
|
200
|
+
'repr': repr,
|
|
201
|
+
'ascii': ascii,
|
|
202
|
+
'format': format,
|
|
203
|
+
|
|
204
|
+
# Type checking
|
|
205
|
+
'isinstance': isinstance,
|
|
206
|
+
'issubclass': issubclass,
|
|
207
|
+
'callable': callable,
|
|
208
|
+
'type': type,
|
|
209
|
+
'hasattr': hasattr,
|
|
210
|
+
|
|
211
|
+
# Constants
|
|
212
|
+
'True': True,
|
|
213
|
+
'False': False,
|
|
214
|
+
'None': None,
|
|
215
|
+
})
|
|
216
|
+
|
|
217
|
+
# Add safe standard library modules
|
|
218
|
+
# These are read-only and don't allow file/network access
|
|
219
|
+
import re
|
|
220
|
+
import json
|
|
221
|
+
import math
|
|
222
|
+
from datetime import datetime, timedelta
|
|
223
|
+
from collections import Counter, defaultdict
|
|
224
|
+
|
|
225
|
+
restricted_globals.update({
|
|
226
|
+
're': re, # Regex (read-only)
|
|
227
|
+
'json': json, # JSON parsing (read-only)
|
|
228
|
+
'math': math, # Math functions
|
|
229
|
+
'datetime': datetime, # Date parsing
|
|
230
|
+
'timedelta': timedelta, # Time deltas
|
|
231
|
+
'Counter': Counter, # Counting helper
|
|
232
|
+
'defaultdict': defaultdict, # Dict with defaults
|
|
233
|
+
})
|
|
234
|
+
|
|
235
|
+
return restricted_globals
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Type definitions for RLM."""
|
|
2
|
+
|
|
3
|
+
from typing import TypedDict, Optional, Any, Callable, Awaitable
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Message(TypedDict):
|
|
7
|
+
"""LLM message format."""
|
|
8
|
+
role: str
|
|
9
|
+
content: str
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RLMConfig(TypedDict, total=False):
|
|
13
|
+
"""Configuration for RLM instance."""
|
|
14
|
+
model: str
|
|
15
|
+
recursive_model: Optional[str]
|
|
16
|
+
api_base: Optional[str]
|
|
17
|
+
api_key: Optional[str]
|
|
18
|
+
max_depth: int
|
|
19
|
+
max_iterations: int
|
|
20
|
+
temperature: float
|
|
21
|
+
timeout: int
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class REPLEnvironment(TypedDict, total=False):
|
|
25
|
+
"""REPL execution environment."""
|
|
26
|
+
context: str
|
|
27
|
+
query: str
|
|
28
|
+
recursive_llm: Callable[[str, str], Awaitable[str]]
|
|
29
|
+
re: Any # re module
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class CompletionResult(TypedDict):
|
|
33
|
+
"""Result from RLM completion."""
|
|
34
|
+
answer: str
|
|
35
|
+
iterations: int
|
|
36
|
+
depth: int
|
|
37
|
+
llm_calls: int
|