recursive-llm-ts 4.5.0 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,572 @@
1
+ package rlm
2
+
3
+ import (
4
+ "fmt"
5
+ "strings"
6
+ "sync"
7
+ )
8
+
9
+ // ContextOverflowConfig configures automatic context overflow handling.
10
+ type ContextOverflowConfig struct {
11
+ // Enabled turns on automatic overflow detection and recovery (default: true when config present)
12
+ Enabled bool `json:"enabled"`
13
+ // MaxModelTokens overrides the detected model token limit (0 = auto-detect from API errors)
14
+ MaxModelTokens int `json:"max_model_tokens,omitempty"`
15
+ // Strategy for reducing context: "mapreduce" (default), "truncate", "chunked", "tfidf", "textrank", "refine"
16
+ Strategy string `json:"strategy,omitempty"`
17
+ // SafetyMargin is the fraction of token budget to reserve for prompts/overhead (default: 0.15)
18
+ SafetyMargin float64 `json:"safety_margin,omitempty"`
19
+ // MaxReductionAttempts is how many times to retry with smaller context (default: 3)
20
+ MaxReductionAttempts int `json:"max_reduction_attempts,omitempty"`
21
+ }
22
+
23
+ // DefaultContextOverflowConfig returns sensible defaults for overflow handling.
24
+ func DefaultContextOverflowConfig() ContextOverflowConfig {
25
+ return ContextOverflowConfig{
26
+ Enabled: true,
27
+ Strategy: "mapreduce",
28
+ SafetyMargin: 0.15,
29
+ MaxReductionAttempts: 3,
30
+ }
31
+ }
32
+
33
+ // ─── Token Estimation ────────────────────────────────────────────────────────
34
+
35
+ // EstimateTokens provides a fast approximation of token count for a string.
36
+ // Uses a character-to-token ratio heuristic. This is intentionally conservative
37
+ // (over-estimates slightly) to avoid overflow.
38
+ //
39
+ // Approximate ratios for common encodings:
40
+ // - English text: ~4 chars/token (cl100k_base)
41
+ // - JSON/code: ~3.5 chars/token
42
+ // - CJK text: ~1.5 chars/token
43
+ // - Mixed: ~3.5 chars/token (safe default)
44
+ func EstimateTokens(text string) int {
45
+ if len(text) == 0 {
46
+ return 0
47
+ }
48
+ // Use 3.5 chars/token as conservative estimate
49
+ return (len(text)*10 + 34) / 35 // equivalent to ceil(len/3.5)
50
+ }
51
+
52
+ // EstimateMessagesTokens estimates the total tokens for a set of chat messages.
53
+ // Includes per-message overhead (~4 tokens per message for role + formatting).
54
+ func EstimateMessagesTokens(messages []Message) int {
55
+ total := 3 // Every reply is primed with <|im_start|>assistant<|im_sep|>
56
+ for _, msg := range messages {
57
+ total += 4 // role + formatting overhead
58
+ total += EstimateTokens(msg.Content)
59
+ }
60
+ return total
61
+ }
62
+
63
+ // ─── Context Chunking ────────────────────────────────────────────────────────
64
+
65
+ // ChunkContext splits context into chunks that fit within a token budget.
66
+ // Uses paragraph/sentence boundaries when possible, with overlap for context continuity.
67
+ func ChunkContext(context string, maxTokensPerChunk int) []string {
68
+ if maxTokensPerChunk <= 0 {
69
+ maxTokensPerChunk = 4000
70
+ }
71
+
72
+ // Estimate max chars per chunk (slightly conservative)
73
+ maxCharsPerChunk := maxTokensPerChunk * 3 // Use 3 chars/token to leave room
74
+
75
+ if len(context) <= maxCharsPerChunk {
76
+ return []string{context}
77
+ }
78
+
79
+ var chunks []string
80
+ overlapChars := maxCharsPerChunk / 10 // 10% overlap for context continuity
81
+
82
+ pos := 0
83
+ for pos < len(context) {
84
+ end := pos + maxCharsPerChunk
85
+ if end >= len(context) {
86
+ chunks = append(chunks, context[pos:])
87
+ break
88
+ }
89
+
90
+ // Try to find a good break point (paragraph boundary first, then sentence, then word)
91
+ breakPoint := findBreakPoint(context, pos, end)
92
+ chunks = append(chunks, context[pos:breakPoint])
93
+
94
+ // Move position back by overlap amount to maintain context continuity
95
+ pos = breakPoint - overlapChars
96
+ if pos < 0 {
97
+ pos = 0
98
+ }
99
+ // Ensure we make forward progress
100
+ if pos <= (breakPoint - maxCharsPerChunk) {
101
+ pos = breakPoint
102
+ }
103
+ }
104
+
105
+ return chunks
106
+ }
107
+
108
+ // findBreakPoint finds the best position to split text near the target end position.
109
+ // Prefers paragraph breaks (\n\n), then line breaks (\n), then sentence ends (. ! ?), then word breaks.
110
+ func findBreakPoint(text string, start int, targetEnd int) int {
111
+ if targetEnd >= len(text) {
112
+ return len(text)
113
+ }
114
+
115
+ // Search window: look back from targetEnd up to 20% of the chunk
116
+ searchStart := targetEnd - (targetEnd-start)/5
117
+ if searchStart < start {
118
+ searchStart = start
119
+ }
120
+
121
+ searchRegion := text[searchStart:targetEnd]
122
+
123
+ // Try paragraph break first
124
+ if idx := strings.LastIndex(searchRegion, "\n\n"); idx >= 0 {
125
+ return searchStart + idx + 2
126
+ }
127
+
128
+ // Try line break
129
+ if idx := strings.LastIndex(searchRegion, "\n"); idx >= 0 {
130
+ return searchStart + idx + 1
131
+ }
132
+
133
+ // Try sentence end
134
+ for _, sep := range []string{". ", "! ", "? "} {
135
+ if idx := strings.LastIndex(searchRegion, sep); idx >= 0 {
136
+ return searchStart + idx + len(sep)
137
+ }
138
+ }
139
+
140
+ // Try word break
141
+ if idx := strings.LastIndex(searchRegion, " "); idx >= 0 {
142
+ return searchStart + idx + 1
143
+ }
144
+
145
+ // No good break point, just split at target
146
+ return targetEnd
147
+ }
148
+
149
+ // ─── MapReduce Context Reduction ─────────────────────────────────────────────
150
+
151
+ // MapReduceResult holds the result of a MapReduce context reduction
152
+ type MapReduceResult struct {
153
+ ReducedContext string
154
+ ChunkCount int
155
+ OriginalTokens int
156
+ ReducedTokens int
157
+ }
158
+
159
+ // contextReducer manages context reduction for overflow recovery
160
+ type contextReducer struct {
161
+ rlm *RLM
162
+ config ContextOverflowConfig
163
+ obs *Observer
164
+ }
165
+
166
+ // newContextReducer creates a reducer bound to an RLM engine
167
+ func newContextReducer(rlm *RLM, config ContextOverflowConfig, obs *Observer) *contextReducer {
168
+ return &contextReducer{rlm: rlm, config: config, obs: obs}
169
+ }
170
+
171
+ // getResponseTokenBudget extracts max_tokens or max_completion_tokens from ExtraParams.
172
+ // This represents how many tokens the API will reserve for the response, which must be
173
+ // subtracted from the model's total capacity when sizing input chunks.
174
+ func (cr *contextReducer) getResponseTokenBudget(modelLimit int) int {
175
+ if cr.rlm.extraParams == nil {
176
+ return 0
177
+ }
178
+ // Check max_completion_tokens first (newer API parameter), then max_tokens
179
+ for _, key := range []string{"max_completion_tokens", "max_tokens"} {
180
+ if v, ok := cr.rlm.extraParams[key]; ok {
181
+ switch n := v.(type) {
182
+ case float64:
183
+ return int(n)
184
+ case int:
185
+ return n
186
+ case int64:
187
+ return int(n)
188
+ }
189
+ }
190
+ }
191
+ return 0
192
+ }
193
+
194
+ // makeMapPhaseParams creates ExtraParams suitable for map-phase LLM calls (summarization).
195
+ // It copies the user's ExtraParams but overrides max_tokens to a smaller value since
196
+ // summaries don't need as many tokens as the original completion.
197
+ func (cr *contextReducer) makeMapPhaseParams(modelLimit int) map[string]interface{} {
198
+ params := make(map[string]interface{})
199
+ // Copy all user params (custom_llm_provider, temperature, etc.)
200
+ for k, v := range cr.rlm.extraParams {
201
+ params[k] = v
202
+ }
203
+ // Override max_tokens for map-phase: use at most 1/4 of model limit or 2000, whichever is smaller
204
+ mapMaxTokens := modelLimit / 4
205
+ if mapMaxTokens > 2000 {
206
+ mapMaxTokens = 2000
207
+ }
208
+ if mapMaxTokens < 256 {
209
+ mapMaxTokens = 256
210
+ }
211
+ params["max_tokens"] = mapMaxTokens
212
+ // Remove max_completion_tokens if present to avoid conflicts
213
+ delete(params, "max_completion_tokens")
214
+ return params
215
+ }
216
+
217
+ // ReduceForCompletion handles context overflow for a regular completion.
218
+ // It chunks the context, summarizes each chunk, and combines the summaries.
219
+ func (cr *contextReducer) ReduceForCompletion(query string, context string, modelLimit int) (string, error) {
220
+ cr.obs.Debug("overflow", "Starting context reduction: %d estimated tokens, limit %d", EstimateTokens(context), modelLimit)
221
+
222
+ // Calculate safe token budget per chunk
223
+ // Reserve tokens for: system prompt (~500), query, overhead, safety margin, response budget
224
+ queryTokens := EstimateTokens(query)
225
+ responseTokens := cr.getResponseTokenBudget(modelLimit)
226
+ overhead := 500 + queryTokens + int(float64(modelLimit)*cr.config.SafetyMargin) + responseTokens
227
+ safeTokensPerChunk := modelLimit - overhead
228
+
229
+ if safeTokensPerChunk <= 0 {
230
+ safeTokensPerChunk = modelLimit / 4
231
+ }
232
+
233
+ cr.obs.Debug("overflow", "Budget: overhead=%d (query=%d, response=%d, safety=%d), chunk budget=%d",
234
+ overhead, queryTokens, responseTokens, int(float64(modelLimit)*cr.config.SafetyMargin), safeTokensPerChunk)
235
+
236
+ chunks := ChunkContext(context, safeTokensPerChunk)
237
+ cr.obs.Debug("overflow", "Split context into %d chunks (budget: %d tokens/chunk)", len(chunks), safeTokensPerChunk)
238
+
239
+ if len(chunks) == 1 {
240
+ // Context is already small enough (or couldn't be meaningfully split)
241
+ return context, nil
242
+ }
243
+
244
+ switch cr.config.Strategy {
245
+ case "truncate":
246
+ return cr.reduceByTruncation(context, modelLimit, overhead)
247
+ case "chunked":
248
+ return cr.reduceByChunkedExtraction(query, chunks, modelLimit, overhead)
249
+ case "tfidf":
250
+ return cr.reduceByTFIDF(context, modelLimit, overhead)
251
+ case "textrank":
252
+ return cr.reduceByTextRank(context, modelLimit, overhead)
253
+ case "refine":
254
+ return cr.reduceByRefine(query, chunks, modelLimit, overhead)
255
+ default: // "mapreduce"
256
+ return cr.reduceByMapReduce(query, chunks, modelLimit, overhead)
257
+ }
258
+ }
259
+
260
+ // reduceByMapReduce summarizes each chunk and combines the summaries.
261
+ func (cr *contextReducer) reduceByMapReduce(query string, chunks []string, modelLimit int, overhead int) (string, error) {
262
+ cr.obs.Debug("overflow", "Using MapReduce strategy with %d chunks", len(chunks))
263
+
264
+ // Use map-phase-specific params with reduced max_tokens for summarization
265
+ mapPhaseParams := cr.makeMapPhaseParams(modelLimit)
266
+
267
+ summaries := make([]string, len(chunks))
268
+ errs := make([]error, len(chunks))
269
+ var wg sync.WaitGroup
270
+
271
+ // Map phase: summarize each chunk in parallel
272
+ for i, chunk := range chunks {
273
+ wg.Add(1)
274
+ go func(idx int, chunkText string) {
275
+ defer wg.Done()
276
+
277
+ mapPrompt := fmt.Sprintf(
278
+ "Summarize the following text chunk, preserving all key facts, data points, names, numbers, and specific details that would be needed to answer the question: %q\n\n"+
279
+ "IMPORTANT: Be thorough and retain specific data. Do not omit numbers, percentages, dates, or named entities.\n\n"+
280
+ "Text chunk (%d of %d):\n%s",
281
+ query, idx+1, len(chunks), chunkText,
282
+ )
283
+
284
+ messages := []Message{
285
+ {Role: "system", Content: "You are a precise summarization assistant. Preserve all factual details, data points, and specific information."},
286
+ {Role: "user", Content: mapPrompt},
287
+ }
288
+
289
+ result, err := CallChatCompletion(ChatRequest{
290
+ Model: cr.rlm.model,
291
+ Messages: messages,
292
+ APIBase: cr.rlm.apiBase,
293
+ APIKey: cr.rlm.apiKey,
294
+ Timeout: cr.rlm.timeoutSeconds,
295
+ ExtraParams: mapPhaseParams,
296
+ })
297
+ if err != nil {
298
+ errs[idx] = fmt.Errorf("map phase chunk %d: %w", idx+1, err)
299
+ return
300
+ }
301
+
302
+ cr.rlm.stats.LlmCalls++
303
+ summaries[idx] = result
304
+ cr.obs.Debug("overflow", "Chunk %d/%d summarized: %d -> %d chars", idx+1, len(chunks), len(chunkText), len(result))
305
+ }(i, chunk)
306
+ }
307
+
308
+ wg.Wait()
309
+
310
+ // Check for errors - if map phase overflows, fall back to tfidf
311
+ var mapErrors []string
312
+ hasOverflow := false
313
+ for _, err := range errs {
314
+ if err != nil {
315
+ mapErrors = append(mapErrors, err.Error())
316
+ if _, isOverflow := IsContextOverflow(err); isOverflow {
317
+ hasOverflow = true
318
+ }
319
+ }
320
+ }
321
+ if len(mapErrors) > 0 {
322
+ if hasOverflow {
323
+ cr.obs.Debug("overflow", "MapReduce map phase hit overflow, falling back to TF-IDF strategy")
324
+ return cr.reduceByTFIDF(strings.Join(chunks, "\n\n"), modelLimit, overhead)
325
+ }
326
+ return "", fmt.Errorf("MapReduce map phase failed: %s", strings.Join(mapErrors, "; "))
327
+ }
328
+
329
+ // Reduce phase: combine summaries
330
+ combined := strings.Join(summaries, "\n\n---\n\n")
331
+
332
+ // Check if combined summaries fit in the budget
333
+ if EstimateTokens(combined)+overhead < modelLimit {
334
+ cr.obs.Debug("overflow", "MapReduce complete: %d -> %d estimated tokens", EstimateTokens(strings.Join(chunks, "")), EstimateTokens(combined))
335
+ return combined, nil
336
+ }
337
+
338
+ // If summaries are still too large, recursively reduce
339
+ cr.obs.Debug("overflow", "Combined summaries still too large (%d tokens), reducing recursively", EstimateTokens(combined))
340
+ return cr.ReduceForCompletion(query, combined, modelLimit)
341
+ }
342
+
343
+ // reduceByTruncation simply truncates context to fit within the limit.
344
+ func (cr *contextReducer) reduceByTruncation(context string, modelLimit int, overhead int) (string, error) {
345
+ cr.obs.Debug("overflow", "Using truncation strategy")
346
+
347
+ availableTokens := modelLimit - overhead
348
+ maxChars := availableTokens * 3 // Conservative chars-to-tokens
349
+
350
+ if maxChars >= len(context) {
351
+ return context, nil
352
+ }
353
+
354
+ // Keep beginning and end, truncate middle (addresses "lost in the middle" problem)
355
+ keepFromStart := maxChars * 2 / 3
356
+ keepFromEnd := maxChars / 3
357
+
358
+ truncated := context[:keepFromStart] +
359
+ "\n\n[... context truncated due to token limit ...]\n\n" +
360
+ context[len(context)-keepFromEnd:]
361
+
362
+ cr.obs.Debug("overflow", "Truncated context: %d -> %d chars", len(context), len(truncated))
363
+ return truncated, nil
364
+ }
365
+
366
+ // reduceByChunkedExtraction processes each chunk independently and returns all extracted content.
367
+ func (cr *contextReducer) reduceByChunkedExtraction(query string, chunks []string, modelLimit int, overhead int) (string, error) {
368
+ cr.obs.Debug("overflow", "Using chunked extraction strategy with %d chunks", len(chunks))
369
+
370
+ // Use map-phase-specific params with reduced max_tokens
371
+ mapPhaseParams := cr.makeMapPhaseParams(modelLimit)
372
+
373
+ results := make([]string, len(chunks))
374
+ errs := make([]error, len(chunks))
375
+ var wg sync.WaitGroup
376
+
377
+ for i, chunk := range chunks {
378
+ wg.Add(1)
379
+ go func(idx int, chunkText string) {
380
+ defer wg.Done()
381
+
382
+ extractPrompt := fmt.Sprintf(
383
+ "Extract all information relevant to the following question from this text chunk. "+
384
+ "Include specific data, facts, quotes, and details. If nothing relevant is found, respond with 'NO_RELEVANT_CONTENT'.\n\n"+
385
+ "Question: %s\n\nText chunk (%d of %d):\n%s",
386
+ query, idx+1, len(chunks), chunkText,
387
+ )
388
+
389
+ messages := []Message{
390
+ {Role: "system", Content: "You are a precise information extraction assistant. Extract only relevant information."},
391
+ {Role: "user", Content: extractPrompt},
392
+ }
393
+
394
+ result, err := CallChatCompletion(ChatRequest{
395
+ Model: cr.rlm.model,
396
+ Messages: messages,
397
+ APIBase: cr.rlm.apiBase,
398
+ APIKey: cr.rlm.apiKey,
399
+ Timeout: cr.rlm.timeoutSeconds,
400
+ ExtraParams: mapPhaseParams,
401
+ })
402
+ if err != nil {
403
+ errs[idx] = fmt.Errorf("chunked extraction chunk %d: %w", idx+1, err)
404
+ return
405
+ }
406
+
407
+ cr.rlm.stats.LlmCalls++
408
+ if strings.TrimSpace(result) != "NO_RELEVANT_CONTENT" {
409
+ results[idx] = result
410
+ }
411
+ }(i, chunk)
412
+ }
413
+
414
+ wg.Wait()
415
+
416
+ var extractErrors []string
417
+ hasOverflow := false
418
+ for _, err := range errs {
419
+ if err != nil {
420
+ extractErrors = append(extractErrors, err.Error())
421
+ if _, isOverflow := IsContextOverflow(err); isOverflow {
422
+ hasOverflow = true
423
+ }
424
+ }
425
+ }
426
+ if len(extractErrors) > 0 {
427
+ if hasOverflow {
428
+ cr.obs.Debug("overflow", "Chunked extraction hit overflow, falling back to TF-IDF strategy")
429
+ return cr.reduceByTFIDF(strings.Join(chunks, "\n\n"), modelLimit, overhead)
430
+ }
431
+ return "", fmt.Errorf("chunked extraction failed: %s", strings.Join(extractErrors, "; "))
432
+ }
433
+
434
+ // Combine non-empty results
435
+ var parts []string
436
+ for _, r := range results {
437
+ if r != "" {
438
+ parts = append(parts, r)
439
+ }
440
+ }
441
+
442
+ if len(parts) == 0 {
443
+ return "No relevant content found across all chunks.", nil
444
+ }
445
+
446
+ return strings.Join(parts, "\n\n---\n\n"), nil
447
+ }
448
+
449
+ // ─── Refine Sequential Strategy ─────────────────────────────────────────────
450
+
451
+ // reduceByRefine processes chunks sequentially, where the first chunk generates
452
+ // an initial answer and each subsequent chunk refines it. This approach has the
453
+ // highest information fidelity because every chunk is processed in context of
454
+ // the cumulative answer, but is sequential (not parallelizable).
455
+ func (cr *contextReducer) reduceByRefine(query string, chunks []string, modelLimit int, overhead int) (string, error) {
456
+ cr.obs.Debug("overflow", "Using refine strategy with %d chunks", len(chunks))
457
+
458
+ if len(chunks) == 0 {
459
+ return "", fmt.Errorf("refine strategy: no chunks to process")
460
+ }
461
+
462
+ // Use map-phase-specific params with reduced max_tokens
463
+ mapPhaseParams := cr.makeMapPhaseParams(modelLimit)
464
+
465
+ // Phase 1: Generate initial answer from the first chunk
466
+ initialPrompt := fmt.Sprintf(
467
+ "Using the following context, provide a comprehensive answer to the question.\n"+
468
+ "Preserve all key facts, data points, names, numbers, and specific details.\n\n"+
469
+ "Question: %s\n\nContext:\n%s",
470
+ query, chunks[0],
471
+ )
472
+
473
+ messages := []Message{
474
+ {Role: "system", Content: "You are a precise information synthesis assistant. Preserve all factual details and specific data points."},
475
+ {Role: "user", Content: initialPrompt},
476
+ }
477
+
478
+ currentAnswer, err := CallChatCompletion(ChatRequest{
479
+ Model: cr.rlm.model,
480
+ Messages: messages,
481
+ APIBase: cr.rlm.apiBase,
482
+ APIKey: cr.rlm.apiKey,
483
+ Timeout: cr.rlm.timeoutSeconds,
484
+ ExtraParams: mapPhaseParams,
485
+ })
486
+ if err != nil {
487
+ return "", fmt.Errorf("refine initial chunk: %w", err)
488
+ }
489
+ cr.rlm.stats.LlmCalls++
490
+ cr.obs.Debug("overflow", "Refine: initial answer from chunk 1/%d (%d chars)", len(chunks), len(currentAnswer))
491
+
492
+ // Phase 2: Refine the answer with each subsequent chunk
493
+ for i := 1; i < len(chunks); i++ {
494
+ refinePrompt := fmt.Sprintf(
495
+ "You have an existing answer to the question: %q\n\n"+
496
+ "Existing answer:\n%s\n\n"+
497
+ "Now you have additional context that may contain new information, corrections, or supporting details.\n"+
498
+ "Refine the existing answer by incorporating any relevant new information from this context.\n"+
499
+ "If this context adds nothing new, return the existing answer unchanged.\n"+
500
+ "IMPORTANT: Never remove information from the existing answer unless it is contradicted by the new context.\n\n"+
501
+ "Additional context (chunk %d of %d):\n%s",
502
+ query, currentAnswer, i+1, len(chunks), chunks[i],
503
+ )
504
+
505
+ messages := []Message{
506
+ {Role: "system", Content: "You are a precise information synthesis assistant. Refine answers by incorporating new context without losing existing information."},
507
+ {Role: "user", Content: refinePrompt},
508
+ }
509
+
510
+ refined, err := CallChatCompletion(ChatRequest{
511
+ Model: cr.rlm.model,
512
+ Messages: messages,
513
+ APIBase: cr.rlm.apiBase,
514
+ APIKey: cr.rlm.apiKey,
515
+ Timeout: cr.rlm.timeoutSeconds,
516
+ ExtraParams: mapPhaseParams,
517
+ })
518
+ if err != nil {
519
+ cr.obs.Debug("overflow", "Refine: chunk %d/%d failed: %v, keeping current answer", i+1, len(chunks), err)
520
+ // On error, keep current answer rather than failing entirely
521
+ continue
522
+ }
523
+ cr.rlm.stats.LlmCalls++
524
+ currentAnswer = refined
525
+ cr.obs.Debug("overflow", "Refine: incorporated chunk %d/%d (%d chars)", i+1, len(chunks), len(currentAnswer))
526
+ }
527
+
528
+ // Verify the refined answer fits within budget
529
+ if EstimateTokens(currentAnswer)+overhead < modelLimit {
530
+ cr.obs.Debug("overflow", "Refine complete: answer is %d estimated tokens", EstimateTokens(currentAnswer))
531
+ return currentAnswer, nil
532
+ }
533
+
534
+ // If the refined answer is still too large, truncate it
535
+ cr.obs.Debug("overflow", "Refine answer too large (%d tokens), truncating", EstimateTokens(currentAnswer))
536
+ return cr.reduceByTruncation(currentAnswer, modelLimit, overhead)
537
+ }
538
+
539
+ // ─── TF-IDF Strategy (wrapper for contextReducer) ───────────────────────────
540
+
541
+ // reduceByTFIDF uses TF-IDF extractive compression - pure algorithmic, no API calls.
542
+ func (cr *contextReducer) reduceByTFIDF(context string, modelLimit int, overhead int) (string, error) {
543
+ cr.obs.Debug("overflow", "Using TF-IDF extractive strategy")
544
+
545
+ availableTokens := modelLimit - overhead
546
+ if availableTokens <= 0 {
547
+ availableTokens = modelLimit / 2
548
+ }
549
+
550
+ result := CompressContextTFIDF(context, availableTokens)
551
+ cr.obs.Debug("overflow", "TF-IDF compressed: %d -> %d chars (%d -> %d est. tokens)",
552
+ len(context), len(result), EstimateTokens(context), EstimateTokens(result))
553
+ return result, nil
554
+ }
555
+
556
+ // ─── TextRank Strategy (wrapper for contextReducer) ─────────────────────────
557
+
558
+ // reduceByTextRank uses TextRank graph-based ranking - pure algorithmic, no API calls.
559
+ func (cr *contextReducer) reduceByTextRank(context string, modelLimit int, overhead int) (string, error) {
560
+ cr.obs.Debug("overflow", "Using TextRank graph-based strategy")
561
+
562
+ availableTokens := modelLimit - overhead
563
+ if availableTokens <= 0 {
564
+ availableTokens = modelLimit / 2
565
+ }
566
+
567
+ result := CompressContextTextRank(context, availableTokens)
568
+ cr.obs.Debug("overflow", "TextRank compressed: %d -> %d chars (%d -> %d est. tokens)",
569
+ len(context), len(result), EstimateTokens(context), EstimateTokens(result))
570
+ return result, nil
571
+ }
572
+