recursive-llm-ts 4.8.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,18 +48,18 @@ var modelTokenLimits = map[string]int{
48
48
  "o1-preview": 128000,
49
49
  "o3-mini": 200000,
50
50
  // Anthropic (via LiteLLM/proxy)
51
- "claude-3-opus": 200000,
52
- "claude-3-sonnet": 200000,
53
- "claude-3-haiku": 200000,
54
- "claude-3.5-sonnet": 200000,
55
- "claude-3.5-haiku": 200000,
56
- "claude-sonnet-4": 200000,
57
- "claude-opus-4": 200000,
51
+ "claude-3-opus": 200000,
52
+ "claude-3-sonnet": 200000,
53
+ "claude-3-haiku": 200000,
54
+ "claude-3.5-sonnet": 200000,
55
+ "claude-3.5-haiku": 200000,
56
+ "claude-sonnet-4": 200000,
57
+ "claude-opus-4": 200000,
58
58
  // Llama (common vLLM deployments)
59
- "llama-3": 8192,
60
- "llama-3.1": 128000,
61
- "llama-3.2": 128000,
62
- "llama-3.3": 128000,
59
+ "llama-3": 8192,
60
+ "llama-3.1": 128000,
61
+ "llama-3.2": 128000,
62
+ "llama-3.3": 128000,
63
63
  // Mistral
64
64
  "mistral-7b": 32768,
65
65
  "mixtral-8x7b": 32768,
@@ -181,21 +181,11 @@ func (r *RLM) getResponseTokenBudget() int {
181
181
 
182
182
  // ─── Token Estimation ────────────────────────────────────────────────────────
183
183
 
184
- // EstimateTokens provides a fast approximation of token count for a string.
185
- // Uses a character-to-token ratio heuristic. This is intentionally conservative
186
- // (over-estimates slightly) to avoid overflow.
187
- //
188
- // Approximate ratios for common encodings:
189
- // - English text: ~4 chars/token (cl100k_base)
190
- // - JSON/code: ~3.5 chars/token
191
- // - CJK text: ~1.5 chars/token
192
- // - Mixed: ~3.5 chars/token (safe default)
184
+ // EstimateTokens returns the token count for a string using the global tokenizer.
185
+ // When SetDefaultTokenizer has been called with a model name, this uses accurate
186
+ // BPE tokenization via tiktoken. Otherwise falls back to a ~3.5 chars/token heuristic.
193
187
  func EstimateTokens(text string) int {
194
- if len(text) == 0 {
195
- return 0
196
- }
197
- // Use 3.5 chars/token as conservative estimate
198
- return (len(text)*10 + 34) / 35 // equivalent to ceil(len/3.5)
188
+ return GetTokenizer().CountTokens(text)
199
189
  }
200
190
 
201
191
  // EstimateMessagesTokens estimates the total tokens for a set of chat messages.
@@ -431,8 +421,13 @@ func (cr *contextReducer) reduceByMapReduce(query string, chunks []string, model
431
421
  }
432
422
 
433
423
  cr.rlm.stats.LlmCalls++
434
- summaries[idx] = result
435
- cr.obs.Debug("overflow", "Chunk %d/%d summarized: %d -> %d chars", idx+1, len(chunks), len(chunkText), len(result))
424
+ if result.Usage != nil {
425
+ cr.rlm.stats.PromptTokens += result.Usage.PromptTokens
426
+ cr.rlm.stats.CompletionTokens += result.Usage.CompletionTokens
427
+ cr.rlm.stats.TotalTokens += result.Usage.TotalTokens
428
+ }
429
+ summaries[idx] = result.Content
430
+ cr.obs.Debug("overflow", "Chunk %d/%d summarized: %d -> %d chars", idx+1, len(chunks), len(chunkText), len(result.Content))
436
431
  }(i, chunk)
437
432
  }
438
433
 
@@ -472,24 +467,20 @@ func (cr *contextReducer) reduceByMapReduce(query string, chunks []string, model
472
467
  }
473
468
 
474
469
  // reduceByTruncation simply truncates context to fit within the limit.
470
+ // Uses the shared TruncateText utility (compression.go).
475
471
  func (cr *contextReducer) reduceByTruncation(context string, modelLimit int, overhead int) (string, error) {
476
472
  cr.obs.Debug("overflow", "Using truncation strategy")
477
473
 
478
474
  availableTokens := modelLimit - overhead
479
- maxChars := availableTokens * 3 // Conservative chars-to-tokens
475
+ truncated := TruncateText(context, TruncateTextParams{
476
+ MaxTokens: availableTokens,
477
+ MarkerText: "\n\n[... context truncated due to token limit ...]\n\n",
478
+ })
480
479
 
481
- if maxChars >= len(context) {
480
+ if truncated == context {
482
481
  return context, nil
483
482
  }
484
483
 
485
- // Keep beginning and end, truncate middle (addresses "lost in the middle" problem)
486
- keepFromStart := maxChars * 2 / 3
487
- keepFromEnd := maxChars / 3
488
-
489
- truncated := context[:keepFromStart] +
490
- "\n\n[... context truncated due to token limit ...]\n\n" +
491
- context[len(context)-keepFromEnd:]
492
-
493
484
  cr.obs.Debug("overflow", "Truncated context: %d -> %d chars", len(context), len(truncated))
494
485
  return truncated, nil
495
486
  }
@@ -536,8 +527,13 @@ func (cr *contextReducer) reduceByChunkedExtraction(query string, chunks []strin
536
527
  }
537
528
 
538
529
  cr.rlm.stats.LlmCalls++
539
- if strings.TrimSpace(result) != "NO_RELEVANT_CONTENT" {
540
- results[idx] = result
530
+ if result.Usage != nil {
531
+ cr.rlm.stats.PromptTokens += result.Usage.PromptTokens
532
+ cr.rlm.stats.CompletionTokens += result.Usage.CompletionTokens
533
+ cr.rlm.stats.TotalTokens += result.Usage.TotalTokens
534
+ }
535
+ if strings.TrimSpace(result.Content) != "NO_RELEVANT_CONTENT" {
536
+ results[idx] = result.Content
541
537
  }
542
538
  }(i, chunk)
543
539
  }
@@ -606,7 +602,7 @@ func (cr *contextReducer) reduceByRefine(query string, chunks []string, modelLim
606
602
  {Role: "user", Content: initialPrompt},
607
603
  }
608
604
 
609
- currentAnswer, err := CallChatCompletion(ChatRequest{
605
+ initialResult, err := CallChatCompletion(ChatRequest{
610
606
  Model: cr.rlm.model,
611
607
  Messages: messages,
612
608
  APIBase: cr.rlm.apiBase,
@@ -618,6 +614,12 @@ func (cr *contextReducer) reduceByRefine(query string, chunks []string, modelLim
618
614
  return "", fmt.Errorf("refine initial chunk: %w", err)
619
615
  }
620
616
  cr.rlm.stats.LlmCalls++
617
+ if initialResult.Usage != nil {
618
+ cr.rlm.stats.PromptTokens += initialResult.Usage.PromptTokens
619
+ cr.rlm.stats.CompletionTokens += initialResult.Usage.CompletionTokens
620
+ cr.rlm.stats.TotalTokens += initialResult.Usage.TotalTokens
621
+ }
622
+ currentAnswer := initialResult.Content
621
623
  cr.obs.Debug("overflow", "Refine: initial answer from chunk 1/%d (%d chars)", len(chunks), len(currentAnswer))
622
624
 
623
625
  // Phase 2: Refine the answer with each subsequent chunk
@@ -638,7 +640,7 @@ func (cr *contextReducer) reduceByRefine(query string, chunks []string, modelLim
638
640
  {Role: "user", Content: refinePrompt},
639
641
  }
640
642
 
641
- refined, err := CallChatCompletion(ChatRequest{
643
+ refineResult, err := CallChatCompletion(ChatRequest{
642
644
  Model: cr.rlm.model,
643
645
  Messages: messages,
644
646
  APIBase: cr.rlm.apiBase,
@@ -652,7 +654,12 @@ func (cr *contextReducer) reduceByRefine(query string, chunks []string, modelLim
652
654
  continue
653
655
  }
654
656
  cr.rlm.stats.LlmCalls++
655
- currentAnswer = refined
657
+ if refineResult.Usage != nil {
658
+ cr.rlm.stats.PromptTokens += refineResult.Usage.PromptTokens
659
+ cr.rlm.stats.CompletionTokens += refineResult.Usage.CompletionTokens
660
+ cr.rlm.stats.TotalTokens += refineResult.Usage.TotalTokens
661
+ }
662
+ currentAnswer = refineResult.Content
656
663
  cr.obs.Debug("overflow", "Refine: incorporated chunk %d/%d (%d chars)", i+1, len(chunks), len(currentAnswer))
657
664
  }
658
665
 
@@ -700,4 +707,3 @@ func (cr *contextReducer) reduceByTextRank(context string, modelLimit int, overh
700
707
  len(context), len(result), EstimateTokens(context), EstimateTokens(result))
701
708
  return result, nil
702
709
  }
703
-
@@ -0,0 +1,387 @@
1
+ package rlm
2
+
3
+ import (
4
+ "fmt"
5
+ "strings"
6
+ "testing"
7
+ "time"
8
+ )
9
+
10
+ func deterministicSentence(idx int) string {
11
+ topics := []string{
12
+ "architecture", "testing", "performance", "reliability", "observability",
13
+ "security", "scalability", "maintainability", "usability", "automation",
14
+ }
15
+ details := []string{
16
+ "input validation", "error handling", "resource limits", "data flow", "boundary conditions",
17
+ "traceability", "deployment safety", "schema consistency", "latency targets", "integration behavior",
18
+ }
19
+
20
+ topic := topics[idx%len(topics)]
21
+ detail := details[(idx*7)%len(details)]
22
+ return fmt.Sprintf("Sentence %d discusses topic %s with details about %s. ", idx, topic, detail)
23
+ }
24
+
25
+ func generateDeterministicContext(targetTokens int) string {
26
+ if targetTokens <= 0 {
27
+ return ""
28
+ }
29
+
30
+ var b strings.Builder
31
+ total := 0
32
+ for i := 1; total < targetTokens; i++ {
33
+ s := deterministicSentence(i)
34
+ b.WriteString(s)
35
+ total += EstimateTokens(s)
36
+ }
37
+ return b.String()
38
+ }
39
+
40
+ func fixedEnglishProse500Words() string {
41
+ words := []string{
42
+ "software", "teams", "benefit", "from", "clear", "requirements", "because", "stable", "interfaces", "reduce",
43
+ "rework", "and", "improve", "delivery", "predictability", "when", "engineers", "document", "assumptions", "carefully",
44
+ "review", "cycles", "become", "faster", "while", "quality", "signals", "remain", "visible", "across",
45
+ "planning", "implementation", "testing", "and", "maintenance", "phases", "in", "long", "lived", "systems",
46
+ }
47
+
48
+ var b strings.Builder
49
+ for i := 0; i < 500; i++ {
50
+ if i > 0 {
51
+ b.WriteByte(' ')
52
+ }
53
+ w := words[i%len(words)]
54
+ if (i+1)%25 == 0 {
55
+ w += "."
56
+ }
57
+ b.WriteString(w)
58
+ }
59
+ return b.String()
60
+ }
61
+
62
+ func percentDifference(base, compare int) float64 {
63
+ if base == 0 {
64
+ return 0
65
+ }
66
+ return (float64(compare-base) / float64(base)) * 100
67
+ }
68
+
69
+ func percentSavings(original, reduced int) float64 {
70
+ if original <= 0 {
71
+ return 0
72
+ }
73
+ return (float64(original-reduced) / float64(original)) * 100
74
+ }
75
+
76
+ func yesNo(v bool) string {
77
+ if v {
78
+ return "yes"
79
+ }
80
+ return "no"
81
+ }
82
+
83
+ func preservesOriginalSentences(original, reduced string) bool {
84
+ originalSentences := SplitSentences(original)
85
+ if len(originalSentences) == 0 {
86
+ return true
87
+ }
88
+
89
+ origSet := make(map[string]bool, len(originalSentences))
90
+ for _, s := range originalSentences {
91
+ origSet[strings.TrimSpace(s)] = true
92
+ }
93
+
94
+ for _, s := range SplitSentences(reduced) {
95
+ s = strings.TrimSpace(s)
96
+ if s == "" {
97
+ continue
98
+ }
99
+ if strings.Contains(s, "content truncated") {
100
+ continue
101
+ }
102
+ if !origSet[s] {
103
+ return false
104
+ }
105
+ }
106
+ return true
107
+ }
108
+
109
+ func episodeContextCost(episodes []*Episode) int {
110
+ total := 0
111
+ for _, ep := range episodes {
112
+ cost := ep.Tokens
113
+ if ep.Status != EpisodeActive && ep.SummaryTokens > 0 {
114
+ cost = ep.SummaryTokens
115
+ }
116
+ total += cost
117
+ }
118
+ return total
119
+ }
120
+
121
+ func TestContextSavings_TokenizerAccuracy(t *testing.T) {
122
+ useHeuristicTokenizerForTest(t)
123
+
124
+ bpeTokenizer, err := NewTiktokenTokenizer("gpt-4o")
125
+ if err != nil {
126
+ t.Fatalf("failed to create BPE tokenizer: %v", err)
127
+ }
128
+
129
+ goSnippet := `package main
130
+
131
+ import (
132
+ "fmt"
133
+ "strings"
134
+ )
135
+
136
+ func summarize(items []string) map[string]int {
137
+ result := map[string]int{}
138
+ for _, item := range items {
139
+ normalized := strings.TrimSpace(strings.ToLower(item))
140
+ if normalized == "" {
141
+ continue
142
+ }
143
+ result[normalized]++
144
+ }
145
+ return result
146
+ }
147
+
148
+ func main() {
149
+ data := []string{"alpha", "beta", "alpha", "gamma", "beta", "alpha"}
150
+ stats := summarize(data)
151
+ fmt.Println("stats:", stats)
152
+ }
153
+ `
154
+
155
+ jsonData := `{
156
+ "project": "recursive-llm-ts",
157
+ "version": "1.0.0",
158
+ "features": {
159
+ "lcm": true,
160
+ "observability": true,
161
+ "context_overflow": {
162
+ "enabled": true,
163
+ "strategy": "tfidf",
164
+ "max_reduction_attempts": 3
165
+ }
166
+ },
167
+ "items": [
168
+ {"id": 1, "name": "alpha", "priority": "high"},
169
+ {"id": 2, "name": "beta", "priority": "medium"},
170
+ {"id": 3, "name": "gamma", "priority": "low"}
171
+ ]
172
+ }`
173
+
174
+ cjkText := "这是一个固定的中文测试句子,用于衡量分词稳定性。日本語の固定テスト文を使ってトークン数を比較します。고정된 한국어 문장으로 토큰 계산 결과를 확인합니다。"
175
+
176
+ testCases := []struct {
177
+ name string
178
+ content string
179
+ }{
180
+ {name: "english_prose", content: fixedEnglishProse500Words()},
181
+ {name: "go_code", content: goSnippet},
182
+ {name: "json", content: jsonData},
183
+ {name: "cjk", content: cjkText},
184
+ }
185
+
186
+ t.Logf("Tokenizer accuracy comparison (heuristic default + direct BPE)")
187
+ for _, tc := range testCases {
188
+ heuristic := EstimateTokens(tc.content)
189
+ bpe := bpeTokenizer.CountTokens(tc.content)
190
+ chars := len([]rune(tc.content))
191
+ diffPct := percentDifference(bpe, heuristic)
192
+
193
+ t.Logf("type=%-14s chars=%5d heuristic=%5d bpe=%5d diff=%7.2f%%", tc.name, chars, heuristic, bpe, diffPct)
194
+
195
+ if heuristic <= 0 {
196
+ t.Fatalf("heuristic token count should be > 0 for %s", tc.name)
197
+ }
198
+ if bpe <= 0 {
199
+ t.Fatalf("BPE token count should be > 0 for %s", tc.name)
200
+ }
201
+ }
202
+ }
203
+
204
+ func TestContextSavings_FiveLevelEscalation(t *testing.T) {
205
+ useHeuristicTokenizerForTest(t)
206
+
207
+ original := generateDeterministicContext(5000)
208
+ originalTokens := EstimateTokens(original)
209
+
210
+ level3 := CompressContextTFIDF(original, 2000)
211
+ level4 := CompressContextTextRank(original, 2000)
212
+ level5 := TruncateText(original, TruncateTextParams{MaxTokens: 2000})
213
+
214
+ level3Tokens := EstimateTokens(level3)
215
+ level4Tokens := EstimateTokens(level4)
216
+ level5Tokens := EstimateTokens(level5)
217
+
218
+ t.Logf("Five-level non-LLM escalation comparison")
219
+ t.Logf("original_tokens=%d", originalTokens)
220
+ t.Logf("level=3 strategy=tfidf tokens=%d reduction=%6.2f%% sentence_preserved=%s", level3Tokens, percentSavings(originalTokens, level3Tokens), yesNo(preservesOriginalSentences(original, level3)))
221
+ t.Logf("level=4 strategy=textrank tokens=%d reduction=%6.2f%% sentence_preserved=%s", level4Tokens, percentSavings(originalTokens, level4Tokens), yesNo(preservesOriginalSentences(original, level4)))
222
+ t.Logf("level=5 strategy=truncate tokens=%d reduction=%6.2f%% sentence_preserved=%s", level5Tokens, percentSavings(originalTokens, level5Tokens), yesNo(preservesOriginalSentences(original, level5)))
223
+
224
+ if level3Tokens >= originalTokens {
225
+ t.Fatalf("expected TF-IDF to reduce tokens: original=%d level3=%d", originalTokens, level3Tokens)
226
+ }
227
+ if level4Tokens >= originalTokens {
228
+ t.Fatalf("expected TextRank to reduce tokens: original=%d level4=%d", originalTokens, level4Tokens)
229
+ }
230
+ if level5Tokens >= originalTokens {
231
+ t.Fatalf("expected Truncate to reduce tokens: original=%d level5=%d", originalTokens, level5Tokens)
232
+ }
233
+ }
234
+
235
+ func TestContextSavings_EpisodicMemoryBudget(t *testing.T) {
236
+ useHeuristicTokenizerForTest(t)
237
+
238
+ manager := NewEpisodeManager("ctx-savings-episodes", EpisodeConfig{
239
+ MaxEpisodeMessages: 5,
240
+ MaxEpisodeTokens: 500,
241
+ TopicChangeThreshold: 0.5,
242
+ AutoCompactAfterClose: false,
243
+ })
244
+
245
+ baseTime := time.Date(2024, 1, 2, 3, 4, 5, 0, time.UTC)
246
+ rawTokens := 0
247
+ for i := 0; i < 50; i++ {
248
+ content := fmt.Sprintf("Message %d. %s", i+1, generateDeterministicContext(100))
249
+ tokens := EstimateTokens(content)
250
+ rawTokens += tokens
251
+
252
+ manager.AddMessage(&StoreMessage{
253
+ ID: fmt.Sprintf("msg_%03d", i+1),
254
+ Role: RoleUser,
255
+ Content: content,
256
+ Tokens: tokens,
257
+ Timestamp: baseTime.Add(time.Duration(i) * time.Minute),
258
+ })
259
+ }
260
+
261
+ episodes := manager.GetAllEpisodes()
262
+ t.Logf("episodes_created=%d (expected around 10)", len(episodes))
263
+ if len(episodes) < 9 || len(episodes) > 11 {
264
+ t.Fatalf("expected around 10 episodes, got %d", len(episodes))
265
+ }
266
+
267
+ for i := 0; i < len(episodes)-1; i++ {
268
+ summary := fmt.Sprintf("Episode %d summary. %s", i+1, generateDeterministicContext(30))
269
+ if err := manager.CompactEpisode(episodes[i].ID, summary); err != nil {
270
+ t.Fatalf("failed to compact episode %s: %v", episodes[i].ID, err)
271
+ }
272
+ }
273
+
274
+ budgets := []int{200, 500, 1000, 2000}
275
+ for _, budget := range budgets {
276
+ selected := manager.GetEpisodesForContext(budget)
277
+ contextTokens := episodeContextCost(selected)
278
+ savings := percentSavings(rawTokens, contextTokens)
279
+ t.Logf("budget=%4d episodes=%2d context_tokens=%5d raw_tokens=%5d savings=%6.2f%%", budget, len(selected), contextTokens, rawTokens, savings)
280
+
281
+ if len(selected) == 0 {
282
+ t.Fatalf("expected at least one episode for budget %d", budget)
283
+ }
284
+ }
285
+ }
286
+
287
+ func TestContextSavings_AllStrategiesComparison(t *testing.T) {
288
+ useHeuristicTokenizerForTest(t)
289
+
290
+ original := generateDeterministicContext(35000)
291
+ originalTokens := EstimateTokens(original)
292
+ target := 16000
293
+
294
+ tfidf := CompressContextTFIDF(original, target)
295
+ textrank := CompressContextTextRank(original, target)
296
+ truncated := TruncateText(original, TruncateTextParams{MaxTokens: target})
297
+
298
+ results := []struct {
299
+ strategy string
300
+ content string
301
+ tokens int
302
+ preserved bool
303
+ }{
304
+ {strategy: "TF-IDF", content: tfidf, tokens: EstimateTokens(tfidf), preserved: preservesOriginalSentences(original, tfidf)},
305
+ {strategy: "TextRank", content: textrank, tokens: EstimateTokens(textrank), preserved: preservesOriginalSentences(original, textrank)},
306
+ {strategy: "Truncate", content: truncated, tokens: EstimateTokens(truncated), preserved: preservesOriginalSentences(original, truncated)},
307
+ }
308
+
309
+ t.Logf("strategy comparison for target=%d tokens (original=%d)", target, originalTokens)
310
+ t.Logf("strategy output_tokens reduction%% sentence_preserved")
311
+ for _, r := range results {
312
+ t.Logf("%-9s %12d %9.2f%% %s", r.strategy, r.tokens, percentSavings(originalTokens, r.tokens), yesNo(r.preserved))
313
+ if r.tokens >= originalTokens {
314
+ t.Fatalf("strategy %s did not reduce tokens: original=%d output=%d", r.strategy, originalTokens, r.tokens)
315
+ }
316
+ }
317
+ }
318
+
319
+ func TestContextSavings_CombinedPipeline(t *testing.T) {
320
+ useHeuristicTokenizerForTest(t)
321
+
322
+ manager := NewEpisodeManager("ctx-savings-pipeline", EpisodeConfig{
323
+ MaxEpisodeMessages: 10,
324
+ MaxEpisodeTokens: 1000000,
325
+ TopicChangeThreshold: 0.5,
326
+ AutoCompactAfterClose: false,
327
+ })
328
+
329
+ baseTime := time.Date(2024, 5, 10, 9, 30, 0, 0, time.UTC)
330
+ messageContentByID := make(map[string]string)
331
+ rawTokens := 0
332
+
333
+ for i := 0; i < 100; i++ {
334
+ id := fmt.Sprintf("pipeline_msg_%03d", i+1)
335
+ content := fmt.Sprintf("Message %d segment. %s", i+1, generateDeterministicContext(500))
336
+ tokens := EstimateTokens(content)
337
+ rawTokens += tokens
338
+ messageContentByID[id] = content
339
+
340
+ manager.AddMessage(&StoreMessage{
341
+ ID: id,
342
+ Role: RoleUser,
343
+ Content: content,
344
+ Tokens: tokens,
345
+ Timestamp: baseTime.Add(time.Duration(i) * time.Minute),
346
+ })
347
+ }
348
+
349
+ episodes := manager.GetAllEpisodes()
350
+ if len(episodes) != 10 {
351
+ t.Fatalf("expected 10 episodes from 100 messages with MaxEpisodeMessages=10, got %d", len(episodes))
352
+ }
353
+
354
+ afterGrouping := episodeContextCost(episodes)
355
+
356
+ for i := 0; i < len(episodes)-1; i++ {
357
+ ep := episodes[i]
358
+ var b strings.Builder
359
+ for _, msgID := range ep.MessageIDs {
360
+ b.WriteString(messageContentByID[msgID])
361
+ b.WriteString("\n")
362
+ }
363
+ summary := CompressContextTFIDF(b.String(), 300)
364
+ if err := manager.CompactEpisode(ep.ID, summary); err != nil {
365
+ t.Fatalf("failed to compact episode %s: %v", ep.ID, err)
366
+ }
367
+ }
368
+
369
+ afterCompaction := episodeContextCost(manager.GetAllEpisodes())
370
+ selected := manager.GetEpisodesForContext(8000)
371
+ afterBudgetSelection := episodeContextCost(selected)
372
+ totalSavings := percentSavings(rawTokens, afterBudgetSelection)
373
+
374
+ t.Logf("Combined pipeline results")
375
+ t.Logf("original_total_tokens=%d", rawTokens)
376
+ t.Logf("after_episodic_grouping=%d", afterGrouping)
377
+ t.Logf("after_compaction=%d", afterCompaction)
378
+ t.Logf("after_budget_selection=%d", afterBudgetSelection)
379
+ t.Logf("total_savings=%6.2f%%", totalSavings)
380
+
381
+ if afterCompaction >= afterGrouping {
382
+ t.Fatalf("expected compaction to reduce context tokens: grouped=%d compacted=%d", afterGrouping, afterCompaction)
383
+ }
384
+ if afterBudgetSelection > 8000 && len(selected) > 0 && selected[0].Status != EpisodeActive {
385
+ t.Fatalf("expected selected context <= budget when active episode is not the reason for overflow: selected=%d budget=8000", afterBudgetSelection)
386
+ }
387
+ }
package/go/rlm/doc.go CHANGED
@@ -8,13 +8,13 @@
8
8
  //
9
9
  // To use this package in your Go project:
10
10
  //
11
- // go get github.com/jbeck018/recursive-llm-ts/go
11
+ // go get github.com/howlerops/recursive-llm-ts/go
12
12
  //
13
13
  // # Basic Usage
14
14
  //
15
15
  // Create an RLM engine and execute a completion:
16
16
  //
17
- // import "github.com/jbeck018/recursive-llm-ts/go/rlm"
17
+ // import "github.com/howlerops/recursive-llm-ts/go/rlm"
18
18
  //
19
19
  // config := rlm.Config{
20
20
  // MaxDepth: 5,