recursive-llm-ts 4.9.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,348 @@
1
+ package rlm
2
+
3
+ import (
4
+ "bufio"
5
+ "encoding/json"
6
+ "fmt"
7
+ "os"
8
+ "strings"
9
+ "sync"
10
+ "sync/atomic"
11
+ "time"
12
+ )
13
+
14
+ // ─── LLM-Map Operator ──────────────────────────────────────────────────────
15
+ // Implements Operator-Level Recursion from the LCM paper (Section 3.1).
16
+ // Processes each item in a JSONL file via independent LLM calls with
17
+ // schema validation, retry logic, and database-backed execution tracking.
18
+
19
+ // LLMMapConfig configures an LLM-Map operation.
20
+ type LLMMapConfig struct {
21
+ InputPath string `json:"input_path"` // Path to JSONL input file
22
+ OutputPath string `json:"output_path"` // Path to JSONL output file
23
+ Prompt string `json:"prompt"` // Prompt template ({{item}} is replaced)
24
+ OutputSchema *JSONSchema `json:"output_schema"` // Schema for validation
25
+ Concurrency int `json:"concurrency"` // Worker pool size (default: 16)
26
+ MaxRetries int `json:"max_retries"` // Per-item retry limit (default: 3)
27
+ Model string `json:"model"` // Model to use (default: engine model)
28
+ }
29
+
30
+ // LLMMapResult contains the results of an LLM-Map operation.
31
+ type LLMMapResult struct {
32
+ TotalItems int `json:"total_items"`
33
+ Completed int `json:"completed"`
34
+ Failed int `json:"failed"`
35
+ OutputPath string `json:"output_path"`
36
+ Duration time.Duration `json:"duration"`
37
+ TokensUsed int `json:"tokens_used"`
38
+ ItemResults []MapItemResult `json:"item_results,omitempty"`
39
+ }
40
+
41
+ // MapItemResult tracks the status of a single item in the map operation.
42
+ type MapItemResult struct {
43
+ Index int `json:"index"`
44
+ Status MapItemStatus `json:"status"`
45
+ Output json.RawMessage `json:"output,omitempty"`
46
+ Error string `json:"error,omitempty"`
47
+ Retries int `json:"retries"`
48
+ }
49
+
50
+ // MapItemStatus represents the execution status of a map item.
51
+ type MapItemStatus string
52
+
53
+ const (
54
+ MapItemPending MapItemStatus = "pending"
55
+ MapItemRunning MapItemStatus = "running"
56
+ MapItemCompleted MapItemStatus = "completed"
57
+ MapItemFailed MapItemStatus = "failed"
58
+ )
59
+
60
+ // LLMMapper executes LLM-Map operations.
61
+ type LLMMapper struct {
62
+ model string
63
+ apiBase string
64
+ apiKey string
65
+ timeout int
66
+ extraParams map[string]interface{}
67
+ observer *Observer
68
+ }
69
+
70
+ // NewLLMMapper creates a new LLM-Map executor.
71
+ func NewLLMMapper(model, apiBase, apiKey string, timeout int, extraParams map[string]interface{}, observer *Observer) *LLMMapper {
72
+ return &LLMMapper{
73
+ model: model,
74
+ apiBase: apiBase,
75
+ apiKey: apiKey,
76
+ timeout: timeout,
77
+ extraParams: extraParams,
78
+ observer: observer,
79
+ }
80
+ }
81
+
82
+ // Execute runs an LLM-Map operation: parallel LLM calls over JSONL input.
83
+ func (m *LLMMapper) Execute(config LLMMapConfig) (*LLMMapResult, error) {
84
+ start := time.Now()
85
+
86
+ // Apply defaults
87
+ if config.Concurrency <= 0 {
88
+ config.Concurrency = 16
89
+ }
90
+ if config.MaxRetries <= 0 {
91
+ config.MaxRetries = 3
92
+ }
93
+ model := config.Model
94
+ if model == "" {
95
+ model = m.model
96
+ }
97
+
98
+ m.observer.Debug("lcm.map", "Starting LLM-Map: input=%s, concurrency=%d, model=%s",
99
+ config.InputPath, config.Concurrency, model)
100
+
101
+ // Read input items
102
+ items, err := readJSONLFile(config.InputPath)
103
+ if err != nil {
104
+ return nil, fmt.Errorf("failed to read input: %w", err)
105
+ }
106
+
107
+ m.observer.Debug("lcm.map", "Read %d items from %s", len(items), config.InputPath)
108
+
109
+ // Initialize results tracking
110
+ results := make([]MapItemResult, len(items))
111
+ for i := range results {
112
+ results[i] = MapItemResult{
113
+ Index: i,
114
+ Status: MapItemPending,
115
+ }
116
+ }
117
+
118
+ // Worker pool
119
+ var wg sync.WaitGroup
120
+ itemChan := make(chan int, len(items))
121
+ var totalTokens int64
122
+
123
+ // Feed items to workers
124
+ for i := range items {
125
+ itemChan <- i
126
+ }
127
+ close(itemChan)
128
+
129
+ // Spawn workers
130
+ var mu sync.Mutex
131
+ for w := 0; w < config.Concurrency && w < len(items); w++ {
132
+ wg.Add(1)
133
+ go func() {
134
+ defer wg.Done()
135
+ for idx := range itemChan {
136
+ result := m.processItem(items[idx], config.Prompt, config.OutputSchema, model, config.MaxRetries)
137
+ atomic.AddInt64(&totalTokens, int64(result.tokensUsed))
138
+
139
+ mu.Lock()
140
+ results[idx] = MapItemResult{
141
+ Index: idx,
142
+ Status: result.status,
143
+ Output: result.output,
144
+ Error: result.errMsg,
145
+ Retries: result.retries,
146
+ }
147
+ mu.Unlock()
148
+ }
149
+ }()
150
+ }
151
+
152
+ wg.Wait()
153
+
154
+ // Write output file
155
+ if config.OutputPath != "" {
156
+ if err := writeJSONLOutput(config.OutputPath, results); err != nil {
157
+ return nil, fmt.Errorf("failed to write output: %w", err)
158
+ }
159
+ }
160
+
161
+ // Count results
162
+ completed, failed := 0, 0
163
+ for _, r := range results {
164
+ switch r.Status {
165
+ case MapItemCompleted:
166
+ completed++
167
+ case MapItemFailed:
168
+ failed++
169
+ }
170
+ }
171
+
172
+ duration := time.Since(start)
173
+ m.observer.Debug("lcm.map", "LLM-Map complete: %d/%d succeeded, %d failed in %s",
174
+ completed, len(items), failed, duration)
175
+ m.observer.Event("lcm.map_complete", map[string]string{
176
+ "total_items": fmt.Sprintf("%d", len(items)),
177
+ "completed": fmt.Sprintf("%d", completed),
178
+ "failed": fmt.Sprintf("%d", failed),
179
+ "duration_ms": fmt.Sprintf("%d", duration.Milliseconds()),
180
+ "tokens_used": fmt.Sprintf("%d", totalTokens),
181
+ })
182
+
183
+ return &LLMMapResult{
184
+ TotalItems: len(items),
185
+ Completed: completed,
186
+ Failed: failed,
187
+ OutputPath: config.OutputPath,
188
+ Duration: duration,
189
+ TokensUsed: int(totalTokens),
190
+ ItemResults: results,
191
+ }, nil
192
+ }
193
+
194
+ // ─── Per-Item Processing ────────────────────────────────────────────────────
195
+
196
+ type itemProcessResult struct {
197
+ status MapItemStatus
198
+ output json.RawMessage
199
+ errMsg string
200
+ retries int
201
+ tokensUsed int
202
+ }
203
+
204
+ func (m *LLMMapper) processItem(item json.RawMessage, promptTemplate string, schema *JSONSchema, model string, maxRetries int) itemProcessResult {
205
+ // Build prompt by replacing {{item}} placeholder
206
+ prompt := strings.ReplaceAll(promptTemplate, "{{item}}", string(item))
207
+
208
+ var lastErr string
209
+ for attempt := 0; attempt <= maxRetries; attempt++ {
210
+ // Add validation feedback on retry
211
+ if attempt > 0 && lastErr != "" {
212
+ prompt = fmt.Sprintf("%s\n\nPrevious attempt failed validation: %s\nPlease fix the output to match the required schema.", prompt, lastErr)
213
+ }
214
+
215
+ request := ChatRequest{
216
+ Model: model,
217
+ Messages: []Message{
218
+ {Role: "user", Content: prompt},
219
+ },
220
+ APIBase: m.apiBase,
221
+ APIKey: m.apiKey,
222
+ Timeout: m.timeout,
223
+ ExtraParams: m.extraParams,
224
+ }
225
+
226
+ result, err := CallChatCompletion(request)
227
+ if err != nil {
228
+ lastErr = err.Error()
229
+ continue
230
+ }
231
+
232
+ tokensUsed := 0
233
+ if result.Usage != nil {
234
+ tokensUsed = result.Usage.TotalTokens
235
+ }
236
+
237
+ // Extract JSON from response
238
+ output := extractJSON(result.Content)
239
+ if output == nil {
240
+ lastErr = "no valid JSON found in response"
241
+ continue
242
+ }
243
+
244
+ // Validate against schema if provided
245
+ if schema != nil {
246
+ if validationErr := validateMapOutput(output, schema); validationErr != "" {
247
+ lastErr = validationErr
248
+ continue
249
+ }
250
+ }
251
+
252
+ return itemProcessResult{
253
+ status: MapItemCompleted,
254
+ output: output,
255
+ retries: attempt,
256
+ tokensUsed: tokensUsed,
257
+ }
258
+ }
259
+
260
+ return itemProcessResult{
261
+ status: MapItemFailed,
262
+ errMsg: lastErr,
263
+ retries: maxRetries,
264
+ }
265
+ }
266
+
267
+ // ─── JSONL I/O ──────────────────────────────────────────────────────────────
268
+
269
+ func readJSONLFile(path string) ([]json.RawMessage, error) {
270
+ f, err := os.Open(path)
271
+ if err != nil {
272
+ return nil, err
273
+ }
274
+ defer func() { _ = f.Close() }()
275
+
276
+ var items []json.RawMessage
277
+ scanner := bufio.NewScanner(f)
278
+ // Increase buffer for large lines
279
+ scanner.Buffer(make([]byte, 0, 1024*1024), 10*1024*1024)
280
+
281
+ for scanner.Scan() {
282
+ line := strings.TrimSpace(scanner.Text())
283
+ if line == "" {
284
+ continue
285
+ }
286
+ items = append(items, json.RawMessage(line))
287
+ }
288
+
289
+ if err := scanner.Err(); err != nil {
290
+ return nil, fmt.Errorf("error reading JSONL: %w", err)
291
+ }
292
+
293
+ return items, nil
294
+ }
295
+
296
+ func writeJSONLOutput(path string, results []MapItemResult) error {
297
+ f, err := os.Create(path)
298
+ if err != nil {
299
+ return err
300
+ }
301
+ defer func() { _ = f.Close() }()
302
+
303
+ w := bufio.NewWriter(f)
304
+ for _, r := range results {
305
+ if r.Status == MapItemCompleted && r.Output != nil {
306
+ if _, writeErr := w.Write(r.Output); writeErr != nil {
307
+ return writeErr
308
+ }
309
+ if _, writeErr := w.WriteString("\n"); writeErr != nil {
310
+ return writeErr
311
+ }
312
+ } else {
313
+ // Write error record
314
+ errRecord := map[string]interface{}{
315
+ "_error": r.Error,
316
+ "_index": r.Index,
317
+ }
318
+ data, _ := json.Marshal(errRecord)
319
+ if _, writeErr := w.Write(data); writeErr != nil {
320
+ return writeErr
321
+ }
322
+ if _, writeErr := w.WriteString("\n"); writeErr != nil {
323
+ return writeErr
324
+ }
325
+ }
326
+ }
327
+
328
+ return w.Flush()
329
+ }
330
+
331
+ // ─── Helpers ────────────────────────────────────────────────────────────────
332
+
333
+ // extractJSON delegates to the shared ExtractFirstJSON (json_extraction.go).
334
+ var extractJSON = ExtractFirstJSON
335
+
336
+ // validateMapOutput validates JSON output against a schema.
337
+ func validateMapOutput(output json.RawMessage, schema *JSONSchema) string {
338
+ var parsed interface{}
339
+ if err := json.Unmarshal(output, &parsed); err != nil {
340
+ return fmt.Sprintf("invalid JSON: %v", err)
341
+ }
342
+
343
+ // Use the existing schema validation infrastructure
344
+ if err := validateValue(parsed, schema); err != nil {
345
+ return err.Error()
346
+ }
347
+ return ""
348
+ }