recursive-llm-ts 4.9.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/bin/rlm-go +0 -0
- package/dist/bridge-interface.d.ts +149 -0
- package/go/cmd/rlm/main.go +39 -6
- package/go/go.mod +13 -3
- package/go/go.sum +53 -2
- package/go/rlm/compression.go +59 -0
- package/go/rlm/context_overflow.go +21 -36
- package/go/rlm/context_savings_test.go +387 -0
- package/go/rlm/json_extraction.go +140 -0
- package/go/rlm/lcm_agentic_map.go +317 -0
- package/go/rlm/lcm_context_loop.go +309 -0
- package/go/rlm/lcm_delegation.go +257 -0
- package/go/rlm/lcm_episodes.go +313 -0
- package/go/rlm/lcm_episodes_test.go +384 -0
- package/go/rlm/lcm_files.go +424 -0
- package/go/rlm/lcm_map.go +348 -0
- package/go/rlm/lcm_store.go +615 -0
- package/go/rlm/lcm_summarizer.go +239 -0
- package/go/rlm/lcm_test.go +1407 -0
- package/go/rlm/rlm.go +124 -1
- package/go/rlm/store_backend.go +121 -0
- package/go/rlm/store_backend_test.go +428 -0
- package/go/rlm/store_sqlite.go +575 -0
- package/go/rlm/structured.go +6 -83
- package/go/rlm/token_tracking_test.go +25 -11
- package/go/rlm/tokenizer.go +216 -0
- package/go/rlm/tokenizer_test.go +305 -0
- package/go/rlm/types.go +23 -1
- package/go/rlm.test +0 -0
- package/package.json +1 -1
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
package rlm
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"bufio"
|
|
5
|
+
"encoding/json"
|
|
6
|
+
"fmt"
|
|
7
|
+
"os"
|
|
8
|
+
"strings"
|
|
9
|
+
"sync"
|
|
10
|
+
"sync/atomic"
|
|
11
|
+
"time"
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
// ─── LLM-Map Operator ──────────────────────────────────────────────────────
|
|
15
|
+
// Implements Operator-Level Recursion from the LCM paper (Section 3.1).
|
|
16
|
+
// Processes each item in a JSONL file via independent LLM calls with
|
|
17
|
+
// schema validation, retry logic, and database-backed execution tracking.
|
|
18
|
+
|
|
19
|
+
// LLMMapConfig configures an LLM-Map operation.
|
|
20
|
+
type LLMMapConfig struct {
|
|
21
|
+
InputPath string `json:"input_path"` // Path to JSONL input file
|
|
22
|
+
OutputPath string `json:"output_path"` // Path to JSONL output file
|
|
23
|
+
Prompt string `json:"prompt"` // Prompt template ({{item}} is replaced)
|
|
24
|
+
OutputSchema *JSONSchema `json:"output_schema"` // Schema for validation
|
|
25
|
+
Concurrency int `json:"concurrency"` // Worker pool size (default: 16)
|
|
26
|
+
MaxRetries int `json:"max_retries"` // Per-item retry limit (default: 3)
|
|
27
|
+
Model string `json:"model"` // Model to use (default: engine model)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// LLMMapResult contains the results of an LLM-Map operation.
|
|
31
|
+
type LLMMapResult struct {
|
|
32
|
+
TotalItems int `json:"total_items"`
|
|
33
|
+
Completed int `json:"completed"`
|
|
34
|
+
Failed int `json:"failed"`
|
|
35
|
+
OutputPath string `json:"output_path"`
|
|
36
|
+
Duration time.Duration `json:"duration"`
|
|
37
|
+
TokensUsed int `json:"tokens_used"`
|
|
38
|
+
ItemResults []MapItemResult `json:"item_results,omitempty"`
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// MapItemResult tracks the status of a single item in the map operation.
|
|
42
|
+
type MapItemResult struct {
|
|
43
|
+
Index int `json:"index"`
|
|
44
|
+
Status MapItemStatus `json:"status"`
|
|
45
|
+
Output json.RawMessage `json:"output,omitempty"`
|
|
46
|
+
Error string `json:"error,omitempty"`
|
|
47
|
+
Retries int `json:"retries"`
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// MapItemStatus represents the execution status of a map item.
|
|
51
|
+
type MapItemStatus string
|
|
52
|
+
|
|
53
|
+
const (
|
|
54
|
+
MapItemPending MapItemStatus = "pending"
|
|
55
|
+
MapItemRunning MapItemStatus = "running"
|
|
56
|
+
MapItemCompleted MapItemStatus = "completed"
|
|
57
|
+
MapItemFailed MapItemStatus = "failed"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
// LLMMapper executes LLM-Map operations.
|
|
61
|
+
type LLMMapper struct {
|
|
62
|
+
model string
|
|
63
|
+
apiBase string
|
|
64
|
+
apiKey string
|
|
65
|
+
timeout int
|
|
66
|
+
extraParams map[string]interface{}
|
|
67
|
+
observer *Observer
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// NewLLMMapper creates a new LLM-Map executor.
|
|
71
|
+
func NewLLMMapper(model, apiBase, apiKey string, timeout int, extraParams map[string]interface{}, observer *Observer) *LLMMapper {
|
|
72
|
+
return &LLMMapper{
|
|
73
|
+
model: model,
|
|
74
|
+
apiBase: apiBase,
|
|
75
|
+
apiKey: apiKey,
|
|
76
|
+
timeout: timeout,
|
|
77
|
+
extraParams: extraParams,
|
|
78
|
+
observer: observer,
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Execute runs an LLM-Map operation: parallel LLM calls over JSONL input.
|
|
83
|
+
func (m *LLMMapper) Execute(config LLMMapConfig) (*LLMMapResult, error) {
|
|
84
|
+
start := time.Now()
|
|
85
|
+
|
|
86
|
+
// Apply defaults
|
|
87
|
+
if config.Concurrency <= 0 {
|
|
88
|
+
config.Concurrency = 16
|
|
89
|
+
}
|
|
90
|
+
if config.MaxRetries <= 0 {
|
|
91
|
+
config.MaxRetries = 3
|
|
92
|
+
}
|
|
93
|
+
model := config.Model
|
|
94
|
+
if model == "" {
|
|
95
|
+
model = m.model
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
m.observer.Debug("lcm.map", "Starting LLM-Map: input=%s, concurrency=%d, model=%s",
|
|
99
|
+
config.InputPath, config.Concurrency, model)
|
|
100
|
+
|
|
101
|
+
// Read input items
|
|
102
|
+
items, err := readJSONLFile(config.InputPath)
|
|
103
|
+
if err != nil {
|
|
104
|
+
return nil, fmt.Errorf("failed to read input: %w", err)
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
m.observer.Debug("lcm.map", "Read %d items from %s", len(items), config.InputPath)
|
|
108
|
+
|
|
109
|
+
// Initialize results tracking
|
|
110
|
+
results := make([]MapItemResult, len(items))
|
|
111
|
+
for i := range results {
|
|
112
|
+
results[i] = MapItemResult{
|
|
113
|
+
Index: i,
|
|
114
|
+
Status: MapItemPending,
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Worker pool
|
|
119
|
+
var wg sync.WaitGroup
|
|
120
|
+
itemChan := make(chan int, len(items))
|
|
121
|
+
var totalTokens int64
|
|
122
|
+
|
|
123
|
+
// Feed items to workers
|
|
124
|
+
for i := range items {
|
|
125
|
+
itemChan <- i
|
|
126
|
+
}
|
|
127
|
+
close(itemChan)
|
|
128
|
+
|
|
129
|
+
// Spawn workers
|
|
130
|
+
var mu sync.Mutex
|
|
131
|
+
for w := 0; w < config.Concurrency && w < len(items); w++ {
|
|
132
|
+
wg.Add(1)
|
|
133
|
+
go func() {
|
|
134
|
+
defer wg.Done()
|
|
135
|
+
for idx := range itemChan {
|
|
136
|
+
result := m.processItem(items[idx], config.Prompt, config.OutputSchema, model, config.MaxRetries)
|
|
137
|
+
atomic.AddInt64(&totalTokens, int64(result.tokensUsed))
|
|
138
|
+
|
|
139
|
+
mu.Lock()
|
|
140
|
+
results[idx] = MapItemResult{
|
|
141
|
+
Index: idx,
|
|
142
|
+
Status: result.status,
|
|
143
|
+
Output: result.output,
|
|
144
|
+
Error: result.errMsg,
|
|
145
|
+
Retries: result.retries,
|
|
146
|
+
}
|
|
147
|
+
mu.Unlock()
|
|
148
|
+
}
|
|
149
|
+
}()
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
wg.Wait()
|
|
153
|
+
|
|
154
|
+
// Write output file
|
|
155
|
+
if config.OutputPath != "" {
|
|
156
|
+
if err := writeJSONLOutput(config.OutputPath, results); err != nil {
|
|
157
|
+
return nil, fmt.Errorf("failed to write output: %w", err)
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Count results
|
|
162
|
+
completed, failed := 0, 0
|
|
163
|
+
for _, r := range results {
|
|
164
|
+
switch r.Status {
|
|
165
|
+
case MapItemCompleted:
|
|
166
|
+
completed++
|
|
167
|
+
case MapItemFailed:
|
|
168
|
+
failed++
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
duration := time.Since(start)
|
|
173
|
+
m.observer.Debug("lcm.map", "LLM-Map complete: %d/%d succeeded, %d failed in %s",
|
|
174
|
+
completed, len(items), failed, duration)
|
|
175
|
+
m.observer.Event("lcm.map_complete", map[string]string{
|
|
176
|
+
"total_items": fmt.Sprintf("%d", len(items)),
|
|
177
|
+
"completed": fmt.Sprintf("%d", completed),
|
|
178
|
+
"failed": fmt.Sprintf("%d", failed),
|
|
179
|
+
"duration_ms": fmt.Sprintf("%d", duration.Milliseconds()),
|
|
180
|
+
"tokens_used": fmt.Sprintf("%d", totalTokens),
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
return &LLMMapResult{
|
|
184
|
+
TotalItems: len(items),
|
|
185
|
+
Completed: completed,
|
|
186
|
+
Failed: failed,
|
|
187
|
+
OutputPath: config.OutputPath,
|
|
188
|
+
Duration: duration,
|
|
189
|
+
TokensUsed: int(totalTokens),
|
|
190
|
+
ItemResults: results,
|
|
191
|
+
}, nil
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// ─── Per-Item Processing ────────────────────────────────────────────────────
|
|
195
|
+
|
|
196
|
+
type itemProcessResult struct {
|
|
197
|
+
status MapItemStatus
|
|
198
|
+
output json.RawMessage
|
|
199
|
+
errMsg string
|
|
200
|
+
retries int
|
|
201
|
+
tokensUsed int
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
func (m *LLMMapper) processItem(item json.RawMessage, promptTemplate string, schema *JSONSchema, model string, maxRetries int) itemProcessResult {
|
|
205
|
+
// Build prompt by replacing {{item}} placeholder
|
|
206
|
+
prompt := strings.ReplaceAll(promptTemplate, "{{item}}", string(item))
|
|
207
|
+
|
|
208
|
+
var lastErr string
|
|
209
|
+
for attempt := 0; attempt <= maxRetries; attempt++ {
|
|
210
|
+
// Add validation feedback on retry
|
|
211
|
+
if attempt > 0 && lastErr != "" {
|
|
212
|
+
prompt = fmt.Sprintf("%s\n\nPrevious attempt failed validation: %s\nPlease fix the output to match the required schema.", prompt, lastErr)
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
request := ChatRequest{
|
|
216
|
+
Model: model,
|
|
217
|
+
Messages: []Message{
|
|
218
|
+
{Role: "user", Content: prompt},
|
|
219
|
+
},
|
|
220
|
+
APIBase: m.apiBase,
|
|
221
|
+
APIKey: m.apiKey,
|
|
222
|
+
Timeout: m.timeout,
|
|
223
|
+
ExtraParams: m.extraParams,
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
result, err := CallChatCompletion(request)
|
|
227
|
+
if err != nil {
|
|
228
|
+
lastErr = err.Error()
|
|
229
|
+
continue
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
tokensUsed := 0
|
|
233
|
+
if result.Usage != nil {
|
|
234
|
+
tokensUsed = result.Usage.TotalTokens
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Extract JSON from response
|
|
238
|
+
output := extractJSON(result.Content)
|
|
239
|
+
if output == nil {
|
|
240
|
+
lastErr = "no valid JSON found in response"
|
|
241
|
+
continue
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Validate against schema if provided
|
|
245
|
+
if schema != nil {
|
|
246
|
+
if validationErr := validateMapOutput(output, schema); validationErr != "" {
|
|
247
|
+
lastErr = validationErr
|
|
248
|
+
continue
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
return itemProcessResult{
|
|
253
|
+
status: MapItemCompleted,
|
|
254
|
+
output: output,
|
|
255
|
+
retries: attempt,
|
|
256
|
+
tokensUsed: tokensUsed,
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return itemProcessResult{
|
|
261
|
+
status: MapItemFailed,
|
|
262
|
+
errMsg: lastErr,
|
|
263
|
+
retries: maxRetries,
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// ─── JSONL I/O ──────────────────────────────────────────────────────────────
|
|
268
|
+
|
|
269
|
+
func readJSONLFile(path string) ([]json.RawMessage, error) {
|
|
270
|
+
f, err := os.Open(path)
|
|
271
|
+
if err != nil {
|
|
272
|
+
return nil, err
|
|
273
|
+
}
|
|
274
|
+
defer func() { _ = f.Close() }()
|
|
275
|
+
|
|
276
|
+
var items []json.RawMessage
|
|
277
|
+
scanner := bufio.NewScanner(f)
|
|
278
|
+
// Increase buffer for large lines
|
|
279
|
+
scanner.Buffer(make([]byte, 0, 1024*1024), 10*1024*1024)
|
|
280
|
+
|
|
281
|
+
for scanner.Scan() {
|
|
282
|
+
line := strings.TrimSpace(scanner.Text())
|
|
283
|
+
if line == "" {
|
|
284
|
+
continue
|
|
285
|
+
}
|
|
286
|
+
items = append(items, json.RawMessage(line))
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
if err := scanner.Err(); err != nil {
|
|
290
|
+
return nil, fmt.Errorf("error reading JSONL: %w", err)
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
return items, nil
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
func writeJSONLOutput(path string, results []MapItemResult) error {
|
|
297
|
+
f, err := os.Create(path)
|
|
298
|
+
if err != nil {
|
|
299
|
+
return err
|
|
300
|
+
}
|
|
301
|
+
defer func() { _ = f.Close() }()
|
|
302
|
+
|
|
303
|
+
w := bufio.NewWriter(f)
|
|
304
|
+
for _, r := range results {
|
|
305
|
+
if r.Status == MapItemCompleted && r.Output != nil {
|
|
306
|
+
if _, writeErr := w.Write(r.Output); writeErr != nil {
|
|
307
|
+
return writeErr
|
|
308
|
+
}
|
|
309
|
+
if _, writeErr := w.WriteString("\n"); writeErr != nil {
|
|
310
|
+
return writeErr
|
|
311
|
+
}
|
|
312
|
+
} else {
|
|
313
|
+
// Write error record
|
|
314
|
+
errRecord := map[string]interface{}{
|
|
315
|
+
"_error": r.Error,
|
|
316
|
+
"_index": r.Index,
|
|
317
|
+
}
|
|
318
|
+
data, _ := json.Marshal(errRecord)
|
|
319
|
+
if _, writeErr := w.Write(data); writeErr != nil {
|
|
320
|
+
return writeErr
|
|
321
|
+
}
|
|
322
|
+
if _, writeErr := w.WriteString("\n"); writeErr != nil {
|
|
323
|
+
return writeErr
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
return w.Flush()
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// ─── Helpers ────────────────────────────────────────────────────────────────
|
|
332
|
+
|
|
333
|
+
// extractJSON delegates to the shared ExtractFirstJSON (json_extraction.go).
|
|
334
|
+
var extractJSON = ExtractFirstJSON
|
|
335
|
+
|
|
336
|
+
// validateMapOutput validates JSON output against a schema.
|
|
337
|
+
func validateMapOutput(output json.RawMessage, schema *JSONSchema) string {
|
|
338
|
+
var parsed interface{}
|
|
339
|
+
if err := json.Unmarshal(output, &parsed); err != nil {
|
|
340
|
+
return fmt.Sprintf("invalid JSON: %v", err)
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Use the existing schema validation infrastructure
|
|
344
|
+
if err := validateValue(parsed, schema); err != nil {
|
|
345
|
+
return err.Error()
|
|
346
|
+
}
|
|
347
|
+
return ""
|
|
348
|
+
}
|