recursive-llm-ts 4.8.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ package rlm
2
+
3
+ import (
4
+ "encoding/json"
5
+ "strings"
6
+ )
7
+
8
+ // ─── Shared JSON Extraction Utilities ───────────────────────────────────────
9
+ // Consolidated from structured.go and lcm_map.go to eliminate duplication.
10
+ // Both the structured output parser and the LLM-Map operator need to extract
11
+ // valid JSON from LLM responses that may contain markdown, explanatory text,
12
+ // or malformed output.
13
+
14
+ // StripMarkdownCodeBlock removes markdown ``` fencing from LLM output.
15
+ func StripMarkdownCodeBlock(s string) string {
16
+ s = strings.TrimSpace(s)
17
+ if strings.HasPrefix(s, "```") {
18
+ lines := strings.Split(s, "\n")
19
+ if len(lines) >= 3 {
20
+ s = strings.Join(lines[1:len(lines)-1], "\n")
21
+ s = strings.TrimSpace(s)
22
+ }
23
+ }
24
+ return s
25
+ }
26
+
27
+ // ExtractBalancedBraces finds the first balanced JSON object or array
28
+ // starting with startChar ('{' or '['). Handles nested structures,
29
+ // string escaping, and arbitrary depth.
30
+ // Returns the balanced substring or "" if no balanced match is found.
31
+ func ExtractBalancedBraces(s string, startChar byte) string {
32
+ endChar := byte('}')
33
+ if startChar == '[' {
34
+ endChar = ']'
35
+ }
36
+
37
+ depth := 0
38
+ inString := false
39
+ escape := false
40
+
41
+ for i := 0; i < len(s); i++ {
42
+ c := s[i]
43
+ if escape {
44
+ escape = false
45
+ continue
46
+ }
47
+ if c == '\\' && inString {
48
+ escape = true
49
+ continue
50
+ }
51
+ if c == '"' {
52
+ inString = !inString
53
+ continue
54
+ }
55
+ if inString {
56
+ continue
57
+ }
58
+ switch c {
59
+ case startChar:
60
+ depth++
61
+ case endChar:
62
+ depth--
63
+ if depth == 0 {
64
+ return s[:i+1]
65
+ }
66
+ }
67
+ }
68
+ return ""
69
+ }
70
+
71
+ // ExtractAllBalancedJSON finds all top-level JSON objects in a string by tracking
72
+ // balanced braces. Handles arbitrary nesting depth and string escaping.
73
+ // Used by structured output parsing which needs all candidates for schema matching.
74
+ func ExtractAllBalancedJSON(s string) []string {
75
+ var results []string
76
+ inString := false
77
+ escaped := false
78
+
79
+ for i := 0; i < len(s); i++ {
80
+ c := s[i]
81
+
82
+ if c == '{' && !inString {
83
+ // Found start of a potential JSON object; extract balanced match
84
+ balanced := ExtractBalancedBraces(s[i:], '{')
85
+ if balanced != "" {
86
+ results = append(results, balanced)
87
+ i += len(balanced) - 1 // skip past this object
88
+ continue
89
+ }
90
+ }
91
+
92
+ // Track string state in the outer scan (for skipping { inside strings)
93
+ if escaped {
94
+ escaped = false
95
+ continue
96
+ }
97
+ if c == '\\' && inString {
98
+ escaped = true
99
+ continue
100
+ }
101
+ if c == '"' {
102
+ inString = !inString
103
+ }
104
+ }
105
+
106
+ return results
107
+ }
108
+
109
+ // ExtractFirstJSON finds the first valid JSON object or array in a string.
110
+ // Tries full content first, then searches for { or [ and attempts balanced extraction.
111
+ // Returns nil if no valid JSON is found.
112
+ func ExtractFirstJSON(content string) json.RawMessage {
113
+ content = StripMarkdownCodeBlock(content)
114
+
115
+ // Try to parse the whole content as JSON
116
+ var js json.RawMessage
117
+ if err := json.Unmarshal([]byte(content), &js); err == nil {
118
+ return js
119
+ }
120
+
121
+ // Find first { or [ and try balanced extraction
122
+ for _, startChar := range []byte{'{', '['} {
123
+ idx := strings.IndexByte(content, startChar)
124
+ if idx >= 0 {
125
+ sub := content[idx:]
126
+ // Try full remainder first
127
+ if err := json.Unmarshal([]byte(sub), &js); err == nil {
128
+ return js
129
+ }
130
+ // Try balanced brace extraction
131
+ if balanced := ExtractBalancedBraces(sub, startChar); balanced != "" {
132
+ if err := json.Unmarshal([]byte(balanced), &js); err == nil {
133
+ return js
134
+ }
135
+ }
136
+ }
137
+ }
138
+
139
+ return nil
140
+ }
@@ -0,0 +1,317 @@
1
+ package rlm
2
+
3
+ import (
4
+ "bufio"
5
+ "encoding/json"
6
+ "fmt"
7
+ "os"
8
+ "strings"
9
+ "sync"
10
+ "sync/atomic"
11
+ "time"
12
+ )
13
+
14
+ // ─── Agentic-Map Operator ───────────────────────────────────────────────────
15
+ // Implements Operator-Level Recursion from the LCM paper (Section 3.1).
16
+ // Similar to LLM-Map, but spawns a full sub-agent session for each item
17
+ // with access to tools (file I/O, code execution, multi-step reasoning).
18
+
19
+ // AgenticMapConfig configures an Agentic-Map operation.
20
+ type AgenticMapConfig struct {
21
+ InputPath string `json:"input_path"` // Path to JSONL input file
22
+ OutputPath string `json:"output_path"` // Path to JSONL output file
23
+ Prompt string `json:"prompt"` // Prompt template ({{item}} is replaced)
24
+ OutputSchema *JSONSchema `json:"output_schema"` // Schema for output validation
25
+ Concurrency int `json:"concurrency"` // Worker pool size (default: 8, lower than LLM-Map)
26
+ MaxRetries int `json:"max_retries"` // Per-item retry limit (default: 2)
27
+ Model string `json:"model"` // Model for sub-agents (default: engine model)
28
+ ReadOnly bool `json:"read_only"` // If true, sub-agents cannot modify filesystem
29
+ MaxDepth int `json:"max_depth"` // Max recursion depth for sub-agents (default: 3)
30
+ MaxIter int `json:"max_iterations"` // Max iterations per sub-agent (default: 15)
31
+ }
32
+
33
+ // AgenticMapResult contains results of an Agentic-Map operation.
34
+ type AgenticMapResult struct {
35
+ TotalItems int `json:"total_items"`
36
+ Completed int `json:"completed"`
37
+ Failed int `json:"failed"`
38
+ OutputPath string `json:"output_path"`
39
+ Duration time.Duration `json:"duration"`
40
+ TokensUsed int `json:"tokens_used"`
41
+ ItemResults []AgenticItemResult `json:"item_results,omitempty"`
42
+ }
43
+
44
+ // AgenticItemResult tracks the status of a single agentic-map item.
45
+ type AgenticItemResult struct {
46
+ Index int `json:"index"`
47
+ Status MapItemStatus `json:"status"`
48
+ Output json.RawMessage `json:"output,omitempty"`
49
+ Error string `json:"error,omitempty"`
50
+ Retries int `json:"retries"`
51
+ LLMCalls int `json:"llm_calls"`
52
+ Iterations int `json:"iterations"`
53
+ }
54
+
55
+ // AgenticMapper executes Agentic-Map operations using full sub-agent sessions.
56
+ type AgenticMapper struct {
57
+ model string
58
+ apiBase string
59
+ apiKey string
60
+ timeout int
61
+ extraParams map[string]interface{}
62
+ observer *Observer
63
+ }
64
+
65
+ // NewAgenticMapper creates a new Agentic-Map executor.
66
+ func NewAgenticMapper(model, apiBase, apiKey string, timeout int, extraParams map[string]interface{}, observer *Observer) *AgenticMapper {
67
+ return &AgenticMapper{
68
+ model: model,
69
+ apiBase: apiBase,
70
+ apiKey: apiKey,
71
+ timeout: timeout,
72
+ extraParams: extraParams,
73
+ observer: observer,
74
+ }
75
+ }
76
+
77
+ // Execute runs an Agentic-Map operation: parallel sub-agent sessions over JSONL input.
78
+ func (am *AgenticMapper) Execute(config AgenticMapConfig) (*AgenticMapResult, error) {
79
+ start := time.Now()
80
+
81
+ // Apply defaults
82
+ if config.Concurrency <= 0 {
83
+ config.Concurrency = 8 // Lower default than LLM-Map due to heavier per-item cost
84
+ }
85
+ if config.MaxRetries <= 0 {
86
+ config.MaxRetries = 2
87
+ }
88
+ if config.MaxDepth <= 0 {
89
+ config.MaxDepth = 3
90
+ }
91
+ if config.MaxIter <= 0 {
92
+ config.MaxIter = 15
93
+ }
94
+ model := config.Model
95
+ if model == "" {
96
+ model = am.model
97
+ }
98
+
99
+ am.observer.Debug("lcm.agentic_map", "Starting Agentic-Map: input=%s, concurrency=%d, model=%s, read_only=%v",
100
+ config.InputPath, config.Concurrency, model, config.ReadOnly)
101
+
102
+ // Read input items
103
+ items, err := readJSONLFile(config.InputPath)
104
+ if err != nil {
105
+ return nil, fmt.Errorf("failed to read input: %w", err)
106
+ }
107
+
108
+ am.observer.Debug("lcm.agentic_map", "Read %d items from %s", len(items), config.InputPath)
109
+
110
+ // Initialize results
111
+ results := make([]AgenticItemResult, len(items))
112
+ for i := range results {
113
+ results[i] = AgenticItemResult{
114
+ Index: i,
115
+ Status: MapItemPending,
116
+ }
117
+ }
118
+
119
+ // Worker pool
120
+ var wg sync.WaitGroup
121
+ itemChan := make(chan int, len(items))
122
+ var totalTokens int64
123
+
124
+ for i := range items {
125
+ itemChan <- i
126
+ }
127
+ close(itemChan)
128
+
129
+ var mu sync.Mutex
130
+ for w := 0; w < config.Concurrency && w < len(items); w++ {
131
+ wg.Add(1)
132
+ go func() {
133
+ defer wg.Done()
134
+ for idx := range itemChan {
135
+ result := am.processAgenticItem(items[idx], config, model)
136
+ atomic.AddInt64(&totalTokens, int64(result.tokensUsed))
137
+
138
+ mu.Lock()
139
+ results[idx] = AgenticItemResult{
140
+ Index: idx,
141
+ Status: result.status,
142
+ Output: result.output,
143
+ Error: result.errMsg,
144
+ Retries: result.retries,
145
+ LLMCalls: result.llmCalls,
146
+ Iterations: result.iterations,
147
+ }
148
+ mu.Unlock()
149
+ }
150
+ }()
151
+ }
152
+
153
+ wg.Wait()
154
+
155
+ // Write output file
156
+ if config.OutputPath != "" {
157
+ if err := writeAgenticOutput(config.OutputPath, results); err != nil {
158
+ return nil, fmt.Errorf("failed to write output: %w", err)
159
+ }
160
+ }
161
+
162
+ completed, failed := 0, 0
163
+ for _, r := range results {
164
+ switch r.Status {
165
+ case MapItemCompleted:
166
+ completed++
167
+ case MapItemFailed:
168
+ failed++
169
+ }
170
+ }
171
+
172
+ duration := time.Since(start)
173
+ am.observer.Debug("lcm.agentic_map", "Agentic-Map complete: %d/%d succeeded, %d failed in %s",
174
+ completed, len(items), failed, duration)
175
+ am.observer.Event("lcm.agentic_map_complete", map[string]string{
176
+ "total_items": fmt.Sprintf("%d", len(items)),
177
+ "completed": fmt.Sprintf("%d", completed),
178
+ "failed": fmt.Sprintf("%d", failed),
179
+ "duration_ms": fmt.Sprintf("%d", duration.Milliseconds()),
180
+ "tokens_used": fmt.Sprintf("%d", totalTokens),
181
+ })
182
+
183
+ return &AgenticMapResult{
184
+ TotalItems: len(items),
185
+ Completed: completed,
186
+ Failed: failed,
187
+ OutputPath: config.OutputPath,
188
+ Duration: duration,
189
+ TokensUsed: int(totalTokens),
190
+ ItemResults: results,
191
+ }, nil
192
+ }
193
+
194
+ // ─── Per-Item Sub-Agent Processing ──────────────────────────────────────────
195
+
196
+ type agenticItemResult struct {
197
+ status MapItemStatus
198
+ output json.RawMessage
199
+ errMsg string
200
+ retries int
201
+ tokensUsed int
202
+ llmCalls int
203
+ iterations int
204
+ }
205
+
206
+ func (am *AgenticMapper) processAgenticItem(item json.RawMessage, config AgenticMapConfig, model string) agenticItemResult {
207
+ prompt := strings.ReplaceAll(config.Prompt, "{{item}}", string(item))
208
+
209
+ var lastErr string
210
+ for attempt := 0; attempt <= config.MaxRetries; attempt++ {
211
+ currentPrompt := prompt
212
+ if attempt > 0 && lastErr != "" {
213
+ currentPrompt = fmt.Sprintf("%s\n\nPrevious attempt failed: %s\nPlease fix the output.", prompt, lastErr)
214
+ }
215
+
216
+ // Spawn a full sub-agent (RLM instance) for this item
217
+ subConfig := Config{
218
+ RecursiveModel: model,
219
+ APIBase: am.apiBase,
220
+ APIKey: am.apiKey,
221
+ MaxDepth: config.MaxDepth,
222
+ MaxIterations: config.MaxIter,
223
+ TimeoutSeconds: am.timeout,
224
+ ExtraParams: am.extraParams,
225
+ }
226
+
227
+ subRLM := New(model, subConfig)
228
+ subRLM.currentDepth = 1 // Sub-agents start at depth 1
229
+ subRLM.observer = am.observer
230
+
231
+ // Build context with schema instructions if provided
232
+ context := ""
233
+ if config.OutputSchema != nil {
234
+ schemaJSON, _ := json.MarshalIndent(config.OutputSchema, "", " ")
235
+ context = fmt.Sprintf("You must output valid JSON matching this schema:\n%s\n\nRespond with ONLY the JSON output.", string(schemaJSON))
236
+ }
237
+
238
+ result, stats, err := subRLM.Completion(currentPrompt, context)
239
+ subRLM.Shutdown()
240
+
241
+ tokensUsed := stats.TotalTokens
242
+
243
+ if err != nil {
244
+ lastErr = err.Error()
245
+ continue
246
+ }
247
+
248
+ // Extract JSON from the sub-agent's output
249
+ output := extractJSON(result)
250
+ if output == nil {
251
+ // Try wrapping the raw result as a string value
252
+ wrapped, _ := json.Marshal(result)
253
+ output = wrapped
254
+ }
255
+
256
+ // Validate against schema if provided
257
+ if config.OutputSchema != nil && output != nil {
258
+ if validationErr := validateMapOutput(output, config.OutputSchema); validationErr != "" {
259
+ lastErr = validationErr
260
+ continue
261
+ }
262
+ }
263
+
264
+ return agenticItemResult{
265
+ status: MapItemCompleted,
266
+ output: output,
267
+ retries: attempt,
268
+ tokensUsed: tokensUsed,
269
+ llmCalls: stats.LlmCalls,
270
+ iterations: stats.Iterations,
271
+ }
272
+ }
273
+
274
+ return agenticItemResult{
275
+ status: MapItemFailed,
276
+ errMsg: lastErr,
277
+ retries: config.MaxRetries,
278
+ }
279
+ }
280
+
281
+ // ─── Output Writing ─────────────────────────────────────────────────────────
282
+
283
+ func writeAgenticOutput(path string, results []AgenticItemResult) error {
284
+ f, err := os.Create(path)
285
+ if err != nil {
286
+ return err
287
+ }
288
+ defer func() { _ = f.Close() }()
289
+
290
+ w := bufio.NewWriter(f)
291
+ for _, r := range results {
292
+ if r.Status == MapItemCompleted && r.Output != nil {
293
+ if _, writeErr := w.Write(r.Output); writeErr != nil {
294
+ return writeErr
295
+ }
296
+ if _, writeErr := w.WriteString("\n"); writeErr != nil {
297
+ return writeErr
298
+ }
299
+ } else {
300
+ errRecord := map[string]interface{}{
301
+ "_error": r.Error,
302
+ "_index": r.Index,
303
+ "_llm_calls": r.LLMCalls,
304
+ "_iterations": r.Iterations,
305
+ }
306
+ data, _ := json.Marshal(errRecord)
307
+ if _, writeErr := w.Write(data); writeErr != nil {
308
+ return writeErr
309
+ }
310
+ if _, writeErr := w.WriteString("\n"); writeErr != nil {
311
+ return writeErr
312
+ }
313
+ }
314
+ }
315
+
316
+ return w.Flush()
317
+ }