recursive-llm-ts 4.7.0 → 4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/bin/rlm-go +0 -0
- package/dist/bridge-interface.d.ts +3 -0
- package/dist/rlm.js +10 -0
- package/go/README.md +2 -2
- package/go/cmd/rlm/main.go +1 -1
- package/go/go.mod +1 -1
- package/go/rlm/context_overflow.go +181 -29
- package/go/rlm/context_overflow_test.go +373 -3
- package/go/rlm/doc.go +2 -2
- package/go/rlm/meta_agent.go +18 -2
- package/go/rlm/observability.go +6 -0
- package/go/rlm/openai.go +27 -10
- package/go/rlm/rlm.go +86 -3
- package/go/rlm/structured.go +23 -0
- package/go/rlm/token_tracking_test.go +845 -0
- package/go/rlm/types.go +7 -4
- package/package.json +4 -4
package/go/rlm/structured.go
CHANGED
|
@@ -46,6 +46,20 @@ func (r *RLM) StructuredCompletion(query string, context string, config *Structu
|
|
|
46
46
|
subTasks := decomposeSchema(config.Schema)
|
|
47
47
|
r.observer.Debug("structured", "Schema decomposed into %d subtasks", len(subTasks))
|
|
48
48
|
|
|
49
|
+
// Pre-emptive overflow check: reduce context BEFORE building the prompt.
|
|
50
|
+
// Structured completion embeds the full context in the user message, so this is
|
|
51
|
+
// critical to prevent overflow on the first LLM call (following the RLM paper's
|
|
52
|
+
// principle: "the context window of the root LM is rarely clogged").
|
|
53
|
+
schemaJSON, _ := json.Marshal(config.Schema)
|
|
54
|
+
schemaOverhead := EstimateTokens(string(schemaJSON)) + structuredPromptOverhead
|
|
55
|
+
reducedCtx, wasReduced, reduceErr := r.PreemptiveReduceContext(query, context, schemaOverhead)
|
|
56
|
+
if reduceErr != nil {
|
|
57
|
+
r.observer.Error("structured", "Pre-emptive reduction failed: %v (proceeding with original context)", reduceErr)
|
|
58
|
+
} else if wasReduced {
|
|
59
|
+
r.observer.Debug("structured", "Pre-emptive reduction applied: %d -> %d chars", len(context), len(reducedCtx))
|
|
60
|
+
context = reducedCtx
|
|
61
|
+
}
|
|
62
|
+
|
|
49
63
|
// If simple schema or parallel disabled, use direct method
|
|
50
64
|
if len(subTasks) <= 2 || !config.ParallelExecution {
|
|
51
65
|
r.observer.Debug("structured", "Using direct completion method")
|
|
@@ -182,9 +196,15 @@ func (r *RLM) structuredCompletionDirect(query string, context string, config *S
|
|
|
182
196
|
}
|
|
183
197
|
|
|
184
198
|
stats.ParsingRetries = attempt
|
|
199
|
+
stats.TotalTokens = r.stats.TotalTokens
|
|
200
|
+
stats.PromptTokens = r.stats.PromptTokens
|
|
201
|
+
stats.CompletionTokens = r.stats.CompletionTokens
|
|
185
202
|
return parsed, stats, nil
|
|
186
203
|
}
|
|
187
204
|
|
|
205
|
+
stats.TotalTokens = r.stats.TotalTokens
|
|
206
|
+
stats.PromptTokens = r.stats.PromptTokens
|
|
207
|
+
stats.CompletionTokens = r.stats.CompletionTokens
|
|
188
208
|
return nil, stats, fmt.Errorf("failed to get valid structured output after %d attempts: %v", config.MaxRetries, lastErr)
|
|
189
209
|
}
|
|
190
210
|
|
|
@@ -251,6 +271,9 @@ func (r *RLM) structuredCompletionParallel(query string, context string, config
|
|
|
251
271
|
totalStats.Depth = stats.Depth
|
|
252
272
|
}
|
|
253
273
|
totalStats.ParsingRetries += stats.ParsingRetries
|
|
274
|
+
totalStats.TotalTokens += stats.TotalTokens
|
|
275
|
+
totalStats.PromptTokens += stats.PromptTokens
|
|
276
|
+
totalStats.CompletionTokens += stats.CompletionTokens
|
|
254
277
|
statsMutex.Unlock()
|
|
255
278
|
}(i, task)
|
|
256
279
|
}
|