recursive-llm-ts 4.9.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,13 @@ import (
10
10
  "testing"
11
11
  )
12
12
 
13
+ func useHeuristicTokenizerForTest(t *testing.T) {
14
+ t.Helper()
15
+ ResetDefaultTokenizer()
16
+ t.Cleanup(func() {
17
+ ResetDefaultTokenizer()
18
+ })
19
+ }
13
20
  // ─── Token Tracking Unit Tests ──────────────────────────────────────────────
14
21
 
15
22
  func TestTokenUsage_ParsedFromAPIResponse(t *testing.T) {
@@ -25,7 +32,7 @@ func TestTokenUsage_ParsedFromAPIResponse(t *testing.T) {
25
32
  "total_tokens": 175,
26
33
  },
27
34
  }
28
- json.NewEncoder(w).Encode(resp)
35
+ _ = json.NewEncoder(w).Encode(resp)
29
36
  }))
30
37
  defer server.Close()
31
38
 
@@ -62,7 +69,7 @@ func TestTokenUsage_NilWhenAPIDoesNotReturnUsage(t *testing.T) {
62
69
  {"message": map[string]string{"content": "Hello"}},
63
70
  },
64
71
  }
65
- json.NewEncoder(w).Encode(resp)
72
+ _ = json.NewEncoder(w).Encode(resp)
66
73
  }))
67
74
  defer server.Close()
68
75
 
@@ -95,7 +102,7 @@ func TestRLMStats_TokenAccumulation(t *testing.T) {
95
102
  "total_tokens": 120 * callCount,
96
103
  },
97
104
  }
98
- json.NewEncoder(w).Encode(resp)
105
+ _ = json.NewEncoder(w).Encode(resp)
99
106
  }))
100
107
  defer server.Close()
101
108
 
@@ -144,7 +151,7 @@ func TestRLMStats_TokenAccumulation_MultipleIterations(t *testing.T) {
144
151
  "total_tokens": 250,
145
152
  },
146
153
  }
147
- json.NewEncoder(w).Encode(resp)
154
+ _ = json.NewEncoder(w).Encode(resp)
148
155
  }))
149
156
  defer server.Close()
150
157
 
@@ -367,10 +374,11 @@ func generateLargeContext(targetTokens int) string {
367
374
  }
368
375
 
369
376
  func TestTokenEfficiency_TFIDFUsesFewerTokens(t *testing.T) {
377
+ useHeuristicTokenizerForTest(t)
378
+
370
379
  // Generate a large context (~35,000 tokens, well over 32k)
371
380
  largeContext := generateLargeContext(35000)
372
381
  originalTokens := EstimateTokens(largeContext)
373
-
374
382
  if originalTokens < 32000 {
375
383
  t.Fatalf("generated context is too small: %d tokens, need at least 32000", originalTokens)
376
384
  }
@@ -407,9 +415,10 @@ func TestTokenEfficiency_TFIDFUsesFewerTokens(t *testing.T) {
407
415
  }
408
416
 
409
417
  func TestTokenEfficiency_TextRankUsesFewerTokens(t *testing.T) {
418
+ useHeuristicTokenizerForTest(t)
419
+
410
420
  largeContext := generateLargeContext(35000)
411
421
  originalTokens := EstimateTokens(largeContext)
412
-
413
422
  if originalTokens < 32000 {
414
423
  t.Fatalf("generated context is too small: %d tokens, need at least 32000", originalTokens)
415
424
  }
@@ -442,6 +451,8 @@ func TestTokenEfficiency_TextRankUsesFewerTokens(t *testing.T) {
442
451
  }
443
452
 
444
453
  func TestTokenEfficiency_TruncateUsesFewerTokens(t *testing.T) {
454
+ useHeuristicTokenizerForTest(t)
455
+
445
456
  largeContext := generateLargeContext(35000)
446
457
  originalTokens := EstimateTokens(largeContext)
447
458
 
@@ -569,10 +580,11 @@ func TestTokenEfficiency_PreemptiveReduction(t *testing.T) {
569
580
  }
570
581
 
571
582
  func TestTokenEfficiency_AllStrategiesCompared(t *testing.T) {
583
+ useHeuristicTokenizerForTest(t)
584
+
572
585
  // Generate a 40k token context (well over 32k limit)
573
586
  largeContext := generateLargeContext(40000)
574
587
  originalTokens := EstimateTokens(largeContext)
575
-
576
588
  if originalTokens < 35000 {
577
589
  t.Fatalf("generated context is too small: %d tokens, need at least 35000", originalTokens)
578
590
  }
@@ -663,10 +675,11 @@ func TestTokenEfficiency_AllStrategiesCompared(t *testing.T) {
663
675
  }
664
676
 
665
677
  func TestTokenEfficiency_VeryLargeContext_100kTokens(t *testing.T) {
678
+ useHeuristicTokenizerForTest(t)
679
+
666
680
  // Test with a very large context (~100k tokens) to prove scaling
667
681
  largeContext := generateLargeContext(100000)
668
682
  originalTokens := EstimateTokens(largeContext)
669
-
670
683
  if originalTokens < 90000 {
671
684
  t.Fatalf("generated context is too small: %d tokens, need at least 90000", originalTokens)
672
685
  }
@@ -721,7 +734,7 @@ func TestTokenEfficiency_MapReduceTracksTokens(t *testing.T) {
721
734
  "total_tokens": 530 + callCount*50,
722
735
  },
723
736
  }
724
- json.NewEncoder(w).Encode(resp)
737
+ _ = json.NewEncoder(w).Encode(resp)
725
738
  }))
726
739
  defer server.Close()
727
740
 
@@ -780,7 +793,7 @@ func TestTokenEfficiency_StructuredCompletion_TracksTokens(t *testing.T) {
780
793
  "total_tokens": 315,
781
794
  },
782
795
  }
783
- json.NewEncoder(w).Encode(resp)
796
+ _ = json.NewEncoder(w).Encode(resp)
784
797
  }))
785
798
  defer server.Close()
786
799
 
@@ -827,10 +840,11 @@ func TestTokenEfficiency_StructuredCompletion_TracksTokens(t *testing.T) {
827
840
  // ─── Token Estimation Accuracy Tests ─────────────────────────────────────────
828
841
 
829
842
  func TestEstimateTokens_AccuracyForLargeContent(t *testing.T) {
843
+ useHeuristicTokenizerForTest(t)
844
+
830
845
  // Verify that our estimation stays reasonable for large content
831
846
  content := generateLargeContext(32000)
832
847
  estimated := EstimateTokens(content)
833
-
834
848
  // Real tokenizer would give different results, but our estimation should be
835
849
  // within a reasonable range. The key property: conservative (over-estimates slightly)
836
850
  charToTokenRatio := float64(len(content)) / float64(estimated)
@@ -0,0 +1,216 @@
1
+ package rlm
2
+
3
+ import (
4
+ "strings"
5
+ "sync"
6
+ "sync/atomic"
7
+
8
+ "github.com/cespare/xxhash/v2"
9
+ tiktoken "github.com/pkoukk/tiktoken-go"
10
+ )
11
+
12
+ // ─── Tokenizer Interface ─────────────────────────────────────────────────────
13
+ // Provides accurate token counting with model-specific BPE encoding.
14
+ // Replaces the heuristic ~3.5 chars/token estimation with real tokenization.
15
+
16
+ // Tokenizer counts tokens in text strings.
17
+ type Tokenizer interface {
18
+ CountTokens(text string) int
19
+ }
20
+
21
+ // ─── Tiktoken BPE Tokenizer ──────────────────────────────────────────────────
22
+
23
+ // TiktokenTokenizer uses the tiktoken BPE encoding for accurate token counting.
24
+ type TiktokenTokenizer struct {
25
+ encoding *tiktoken.Tiktoken
26
+ name string
27
+ }
28
+
29
+ // modelEncodingMap maps model name prefixes to their tiktoken encoding names.
30
+ var modelEncodingMap = map[string]string{
31
+ // OpenAI o200k_base models
32
+ "gpt-4o": "o200k_base",
33
+ "gpt-4o-": "o200k_base",
34
+ "o1": "o200k_base",
35
+ "o3": "o200k_base",
36
+ "o4": "o200k_base",
37
+ // OpenAI cl100k_base models
38
+ "gpt-4-": "cl100k_base",
39
+ "gpt-4": "cl100k_base",
40
+ "gpt-3.5": "cl100k_base",
41
+ // Anthropic (closest approximation)
42
+ "claude": "cl100k_base",
43
+ // Meta Llama
44
+ "llama": "cl100k_base",
45
+ // Mistral
46
+ "mistral": "cl100k_base",
47
+ "mixtral": "cl100k_base",
48
+ // Qwen
49
+ "qwen": "cl100k_base",
50
+ }
51
+
52
+ // encodingForModel returns the tiktoken encoding name for a given model.
53
+ func encodingForModel(model string) string {
54
+ lower := strings.ToLower(model)
55
+
56
+ // Try exact match first
57
+ if enc, ok := modelEncodingMap[lower]; ok {
58
+ return enc
59
+ }
60
+
61
+ // Try prefix matching (longest prefix wins)
62
+ bestMatch := ""
63
+ bestEnc := "cl100k_base"
64
+ for prefix, enc := range modelEncodingMap {
65
+ if strings.HasPrefix(lower, prefix) && len(prefix) > len(bestMatch) {
66
+ bestMatch = prefix
67
+ bestEnc = enc
68
+ }
69
+ }
70
+
71
+ return bestEnc
72
+ }
73
+
74
+ // NewTiktokenTokenizer creates a tokenizer using the appropriate BPE encoding for the model.
75
+ // Returns nil and an error if the encoding cannot be loaded.
76
+ func NewTiktokenTokenizer(model string) (*TiktokenTokenizer, error) {
77
+ encName := encodingForModel(model)
78
+ enc, err := tiktoken.GetEncoding(encName)
79
+ if err != nil {
80
+ return nil, err
81
+ }
82
+ return &TiktokenTokenizer{
83
+ encoding: enc,
84
+ name: encName,
85
+ }, nil
86
+ }
87
+
88
+ // CountTokens returns the exact BPE token count for the text.
89
+ func (t *TiktokenTokenizer) CountTokens(text string) int {
90
+ if len(text) == 0 {
91
+ return 0
92
+ }
93
+ tokens := t.encoding.Encode(text, nil, nil)
94
+ return len(tokens)
95
+ }
96
+
97
+ // EncodingName returns the name of the encoding used.
98
+ func (t *TiktokenTokenizer) EncodingName() string {
99
+ return t.name
100
+ }
101
+
102
+ // ─── Heuristic Tokenizer (Fallback) ──────────────────────────────────────────
103
+
104
+ // HeuristicTokenizer uses the character-to-token ratio heuristic.
105
+ // This is the original EstimateTokens logic, kept as a fallback
106
+ // when tiktoken encodings are unavailable.
107
+ type HeuristicTokenizer struct{}
108
+
109
+ // CountTokens provides a fast approximation of token count.
110
+ // Uses ~3.5 chars/token ratio, intentionally conservative (over-estimates).
111
+ func (h *HeuristicTokenizer) CountTokens(text string) int {
112
+ if len(text) == 0 {
113
+ return 0
114
+ }
115
+ return (len(text)*10 + 34) / 35
116
+ }
117
+
118
+ // ─── Cached Tokenizer ────────────────────────────────────────────────────────
119
+
120
+ const maxCacheSize = 10000
121
+
122
+ // CachedTokenizer wraps another Tokenizer with an LRU-style cache.
123
+ // Uses xxhash for fast key hashing and sync.Map for concurrent access.
124
+ type CachedTokenizer struct {
125
+ inner Tokenizer
126
+ cache sync.Map // map[uint64]int
127
+ size atomic.Int64
128
+ }
129
+
130
+ // NewCachedTokenizer wraps a tokenizer with caching.
131
+ func NewCachedTokenizer(inner Tokenizer) *CachedTokenizer {
132
+ return &CachedTokenizer{
133
+ inner: inner,
134
+ }
135
+ }
136
+
137
+ // CountTokens returns the cached token count, computing and caching on miss.
138
+ func (c *CachedTokenizer) CountTokens(text string) int {
139
+ if len(text) == 0 {
140
+ return 0
141
+ }
142
+
143
+ // Hash the text for cache key
144
+ key := xxhash.Sum64String(text)
145
+
146
+ // Check cache
147
+ if val, ok := c.cache.Load(key); ok {
148
+ return val.(int)
149
+ }
150
+
151
+ // Compute
152
+ count := c.inner.CountTokens(text)
153
+
154
+ // Store if under max size; evict all if over (simple strategy)
155
+ if c.size.Load() < maxCacheSize {
156
+ if _, loaded := c.cache.LoadOrStore(key, count); !loaded {
157
+ c.size.Add(1)
158
+ }
159
+ } else {
160
+ // Simple eviction: clear the cache when full
161
+ c.cache.Clear()
162
+ c.size.Store(0)
163
+ c.cache.Store(key, count)
164
+ c.size.Add(1)
165
+ }
166
+
167
+ return count
168
+ }
169
+
170
+ // CacheSize returns the current number of cached entries.
171
+ func (c *CachedTokenizer) CacheSize() int64 {
172
+ return c.size.Load()
173
+ }
174
+
175
+ // Inner returns the underlying tokenizer.
176
+ func (c *CachedTokenizer) Inner() Tokenizer {
177
+ return c.inner
178
+ }
179
+
180
+ // ─── Global Default Tokenizer ────────────────────────────────────────────────
181
+
182
+ var (
183
+ defaultTokenizer Tokenizer = &HeuristicTokenizer{}
184
+ tokenizerMu sync.RWMutex
185
+ )
186
+
187
+ // SetDefaultTokenizer configures the global tokenizer for a given model.
188
+ // Tries tiktoken BPE first, falls back to heuristic on failure.
189
+ // The tokenizer is wrapped with caching for performance.
190
+ func SetDefaultTokenizer(model string) {
191
+ tokenizerMu.Lock()
192
+ defer tokenizerMu.Unlock()
193
+
194
+ tok, err := NewTiktokenTokenizer(model)
195
+ if err != nil {
196
+ // Fall back to heuristic with caching
197
+ defaultTokenizer = NewCachedTokenizer(&HeuristicTokenizer{})
198
+ return
199
+ }
200
+
201
+ defaultTokenizer = NewCachedTokenizer(tok)
202
+ }
203
+
204
+ // GetTokenizer returns the current global tokenizer.
205
+ func GetTokenizer() Tokenizer {
206
+ tokenizerMu.RLock()
207
+ defer tokenizerMu.RUnlock()
208
+ return defaultTokenizer
209
+ }
210
+
211
+ // ResetDefaultTokenizer resets to the heuristic tokenizer (used in tests).
212
+ func ResetDefaultTokenizer() {
213
+ tokenizerMu.Lock()
214
+ defer tokenizerMu.Unlock()
215
+ defaultTokenizer = &HeuristicTokenizer{}
216
+ }
@@ -0,0 +1,305 @@
1
+ package rlm
2
+
3
+ import (
4
+ "strings"
5
+ "testing"
6
+ )
7
+
8
+ // ─── Tiktoken BPE Tokenizer Tests ────────────────────────────────────────────
9
+
10
+ func TestTiktokenTokenizer_English(t *testing.T) {
11
+ tok, err := NewTiktokenTokenizer("gpt-4o")
12
+ if err != nil {
13
+ t.Fatalf("failed to create tokenizer: %v", err)
14
+ }
15
+
16
+ text := "Hello, world! This is a test of the tokenizer."
17
+ bpeCount := tok.CountTokens(text)
18
+ heuristic := (&HeuristicTokenizer{}).CountTokens(text)
19
+
20
+ t.Logf("English text: BPE=%d, Heuristic=%d", bpeCount, heuristic)
21
+
22
+ if bpeCount <= 0 {
23
+ t.Error("BPE count should be > 0")
24
+ }
25
+ // BPE and heuristic should give different counts for most text
26
+ // (they may coincidentally match for very short strings, so just verify BPE is reasonable)
27
+ if bpeCount > len(text) {
28
+ t.Errorf("BPE count %d should not exceed character count %d", bpeCount, len(text))
29
+ }
30
+ }
31
+
32
+ func TestTiktokenTokenizer_Code(t *testing.T) {
33
+ tok, err := NewTiktokenTokenizer("gpt-4o")
34
+ if err != nil {
35
+ t.Fatalf("failed to create tokenizer: %v", err)
36
+ }
37
+
38
+ code := `func main() {
39
+ fmt.Println("Hello, World!")
40
+ for i := 0; i < 100; i++ {
41
+ result = append(result, processItem(items[i]))
42
+ }
43
+ }`
44
+ bpeCount := tok.CountTokens(code)
45
+ heuristic := (&HeuristicTokenizer{}).CountTokens(code)
46
+
47
+ t.Logf("Code: BPE=%d, Heuristic=%d (chars=%d)", bpeCount, heuristic, len(code))
48
+
49
+ if bpeCount <= 0 {
50
+ t.Error("BPE count should be > 0 for code")
51
+ }
52
+ }
53
+
54
+ func TestTiktokenTokenizer_JSON(t *testing.T) {
55
+ tok, err := NewTiktokenTokenizer("gpt-4o")
56
+ if err != nil {
57
+ t.Fatalf("failed to create tokenizer: %v", err)
58
+ }
59
+
60
+ jsonData := `{"name": "test", "values": [1, 2, 3, 4, 5], "nested": {"key": "value", "count": 42}}`
61
+ bpeCount := tok.CountTokens(jsonData)
62
+ heuristic := (&HeuristicTokenizer{}).CountTokens(jsonData)
63
+
64
+ t.Logf("JSON: BPE=%d, Heuristic=%d (chars=%d)", bpeCount, heuristic, len(jsonData))
65
+
66
+ if bpeCount <= 0 {
67
+ t.Error("BPE count should be > 0 for JSON")
68
+ }
69
+ }
70
+
71
+ func TestTiktokenTokenizer_CJK(t *testing.T) {
72
+ tok, err := NewTiktokenTokenizer("gpt-4o")
73
+ if err != nil {
74
+ t.Fatalf("failed to create tokenizer: %v", err)
75
+ }
76
+
77
+ // Chinese, Japanese, and Korean text
78
+ cjkText := "这是一个测试。日本語のテスト。한국어 테스트입니다。"
79
+ bpeCount := tok.CountTokens(cjkText)
80
+ heuristic := (&HeuristicTokenizer{}).CountTokens(cjkText)
81
+
82
+ t.Logf("CJK text: BPE=%d, Heuristic=%d (chars=%d, bytes=%d)", bpeCount, heuristic, len([]rune(cjkText)), len(cjkText))
83
+
84
+ if bpeCount <= 0 {
85
+ t.Error("BPE count should be > 0 for CJK")
86
+ }
87
+ // CJK text has ~1.5 chars per token but heuristic assumes ~3.5
88
+ // So BPE should count MORE tokens than heuristic for CJK
89
+ if bpeCount <= heuristic {
90
+ t.Logf("WARNING: BPE (%d) should typically be > heuristic (%d) for CJK text", bpeCount, heuristic)
91
+ }
92
+ }
93
+
94
+ func TestTiktokenTokenizer_Empty(t *testing.T) {
95
+ tok, err := NewTiktokenTokenizer("gpt-4o")
96
+ if err != nil {
97
+ t.Fatalf("failed to create tokenizer: %v", err)
98
+ }
99
+
100
+ if tok.CountTokens("") != 0 {
101
+ t.Error("empty string should return 0 tokens")
102
+ }
103
+ }
104
+
105
+ func TestTiktokenTokenizer_EncodingSelection(t *testing.T) {
106
+ tests := []struct {
107
+ model string
108
+ expected string
109
+ }{
110
+ {"gpt-4o", "o200k_base"},
111
+ {"gpt-4o-mini", "o200k_base"},
112
+ {"gpt-4o-mini-2024-07-18", "o200k_base"},
113
+ {"gpt-4", "cl100k_base"},
114
+ {"gpt-4-turbo", "cl100k_base"},
115
+ {"gpt-3.5-turbo", "cl100k_base"},
116
+ {"claude-3-opus", "cl100k_base"},
117
+ {"claude-sonnet-4", "cl100k_base"},
118
+ {"o1", "o200k_base"},
119
+ {"o3-mini", "o200k_base"},
120
+ {"llama-3.1", "cl100k_base"},
121
+ {"unknown-model", "cl100k_base"},
122
+ }
123
+
124
+ for _, tt := range tests {
125
+ t.Run(tt.model, func(t *testing.T) {
126
+ enc := encodingForModel(tt.model)
127
+ if enc != tt.expected {
128
+ t.Errorf("encodingForModel(%q) = %q, want %q", tt.model, enc, tt.expected)
129
+ }
130
+ })
131
+ }
132
+ }
133
+
134
+ // ─── Heuristic Tokenizer Tests ───────────────────────────────────────────────
135
+
136
+ func TestHeuristicTokenizer_Fallback(t *testing.T) {
137
+ h := &HeuristicTokenizer{}
138
+
139
+ if h.CountTokens("") != 0 {
140
+ t.Error("empty string should be 0")
141
+ }
142
+
143
+ // "Hello" = 5 chars, ceil(5/3.5) = 2
144
+ count := h.CountTokens("Hello")
145
+ if count <= 0 {
146
+ t.Error("should count > 0 tokens for 'Hello'")
147
+ }
148
+
149
+ // Longer text
150
+ longText := strings.Repeat("word ", 1000)
151
+ longCount := h.CountTokens(longText)
152
+ expected := (len(longText)*10 + 34) / 35
153
+ if longCount != expected {
154
+ t.Errorf("heuristic count %d != expected %d", longCount, expected)
155
+ }
156
+ }
157
+
158
+ // ─── Cached Tokenizer Tests ─────────────────────────────────────────────────
159
+
160
+ func TestCachedTokenizer_CacheHit(t *testing.T) {
161
+ callCount := 0
162
+ inner := &countingTokenizer{fn: func(text string) int {
163
+ callCount++
164
+ return len(text) / 4
165
+ }}
166
+
167
+ cached := NewCachedTokenizer(inner)
168
+
169
+ text := "This is a test string for caching"
170
+
171
+ // First call: cache miss
172
+ count1 := cached.CountTokens(text)
173
+ if callCount != 1 {
174
+ t.Errorf("expected 1 inner call, got %d", callCount)
175
+ }
176
+
177
+ // Second call: cache hit (inner should NOT be called again)
178
+ count2 := cached.CountTokens(text)
179
+ if callCount != 1 {
180
+ t.Errorf("expected still 1 inner call after cache hit, got %d", callCount)
181
+ }
182
+
183
+ if count1 != count2 {
184
+ t.Errorf("cache returned different values: %d vs %d", count1, count2)
185
+ }
186
+
187
+ if cached.CacheSize() != 1 {
188
+ t.Errorf("cache size should be 1, got %d", cached.CacheSize())
189
+ }
190
+ }
191
+
192
+ func TestCachedTokenizer_Empty(t *testing.T) {
193
+ inner := &HeuristicTokenizer{}
194
+ cached := NewCachedTokenizer(inner)
195
+
196
+ if cached.CountTokens("") != 0 {
197
+ t.Error("empty string should return 0 without caching")
198
+ }
199
+ if cached.CacheSize() != 0 {
200
+ t.Error("cache should not store empty strings")
201
+ }
202
+ }
203
+
204
+ func TestCachedTokenizer_DifferentStrings(t *testing.T) {
205
+ inner := &HeuristicTokenizer{}
206
+ cached := NewCachedTokenizer(inner)
207
+
208
+ cached.CountTokens("string one")
209
+ cached.CountTokens("string two")
210
+ cached.CountTokens("string three")
211
+
212
+ if cached.CacheSize() != 3 {
213
+ t.Errorf("cache size should be 3, got %d", cached.CacheSize())
214
+ }
215
+ }
216
+
217
+ func TestCachedTokenizer_Inner(t *testing.T) {
218
+ inner := &HeuristicTokenizer{}
219
+ cached := NewCachedTokenizer(inner)
220
+
221
+ if cached.Inner() != inner {
222
+ t.Error("Inner() should return the wrapped tokenizer")
223
+ }
224
+ }
225
+
226
+ // countingTokenizer tracks how many times CountTokens is called.
227
+ type countingTokenizer struct {
228
+ fn func(string) int
229
+ }
230
+
231
+ func (c *countingTokenizer) CountTokens(text string) int {
232
+ return c.fn(text)
233
+ }
234
+
235
+ // ─── Global Default Tokenizer Tests ──────────────────────────────────────────
236
+
237
+ func TestSetDefaultTokenizer_KnownModel(t *testing.T) {
238
+ defer ResetDefaultTokenizer()
239
+
240
+ SetDefaultTokenizer("gpt-4o")
241
+ tok := GetTokenizer()
242
+
243
+ // Should be a CachedTokenizer wrapping a TiktokenTokenizer
244
+ cached, ok := tok.(*CachedTokenizer)
245
+ if !ok {
246
+ t.Fatalf("expected CachedTokenizer, got %T", tok)
247
+ }
248
+
249
+ inner, ok := cached.Inner().(*TiktokenTokenizer)
250
+ if !ok {
251
+ t.Fatalf("expected inner TiktokenTokenizer, got %T", cached.Inner())
252
+ }
253
+
254
+ if inner.EncodingName() != "o200k_base" {
255
+ t.Errorf("expected o200k_base encoding, got %s", inner.EncodingName())
256
+ }
257
+
258
+ // Verify it actually counts tokens
259
+ count := tok.CountTokens("Hello, world!")
260
+ if count <= 0 {
261
+ t.Error("tokenizer should count > 0 tokens")
262
+ }
263
+ }
264
+
265
+ func TestSetDefaultTokenizer_UnknownModel(t *testing.T) {
266
+ defer ResetDefaultTokenizer()
267
+
268
+ // Even unknown models should work because we default to cl100k_base
269
+ SetDefaultTokenizer("totally-unknown-model-xyz")
270
+ tok := GetTokenizer()
271
+
272
+ count := tok.CountTokens("Hello, world!")
273
+ if count <= 0 {
274
+ t.Error("tokenizer should count > 0 tokens even for unknown model")
275
+ }
276
+ }
277
+
278
+ func TestEstimateTokens_UsesDefaultTokenizer(t *testing.T) {
279
+ defer ResetDefaultTokenizer()
280
+
281
+ // With heuristic default
282
+ heuristicCount := EstimateTokens("Hello, world! This is a test.")
283
+
284
+ // Switch to BPE
285
+ SetDefaultTokenizer("gpt-4o")
286
+ bpeCount := EstimateTokens("Hello, world! This is a test.")
287
+
288
+ t.Logf("EstimateTokens: heuristic=%d, bpe=%d", heuristicCount, bpeCount)
289
+
290
+ // Both should be > 0
291
+ if heuristicCount <= 0 || bpeCount <= 0 {
292
+ t.Errorf("both counts should be > 0: heuristic=%d, bpe=%d", heuristicCount, bpeCount)
293
+ }
294
+ }
295
+
296
+ func TestResetDefaultTokenizer(t *testing.T) {
297
+ SetDefaultTokenizer("gpt-4o")
298
+ ResetDefaultTokenizer()
299
+
300
+ tok := GetTokenizer()
301
+ _, ok := tok.(*HeuristicTokenizer)
302
+ if !ok {
303
+ t.Errorf("after reset, expected HeuristicTokenizer, got %T", tok)
304
+ }
305
+ }
package/go/rlm/types.go CHANGED
@@ -70,6 +70,7 @@ type Config struct {
70
70
  MetaAgent *MetaAgentConfig
71
71
  Observability *ObservabilityConfig
72
72
  ContextOverflow *ContextOverflowConfig
73
+ LCM *LCMConfig // Lossless Context Management configuration
73
74
  }
74
75
 
75
76
  func ConfigFromMap(config map[string]interface{}) Config {
@@ -126,6 +127,27 @@ func ConfigFromMap(config map[string]interface{}) Config {
126
127
  parsed.ContextOverflow = &co
127
128
  }
128
129
 
130
+ // Extract LCM config
131
+ if lcmConfig, ok := config["lcm"].(map[string]interface{}); ok {
132
+ lcm := DefaultLCMConfig()
133
+ if v, ok := lcmConfig["enabled"].(bool); ok {
134
+ lcm.Enabled = v
135
+ }
136
+ if v, ok := toInt(lcmConfig["soft_threshold"]); ok {
137
+ lcm.SoftThreshold = v
138
+ }
139
+ if v, ok := toInt(lcmConfig["hard_threshold"]); ok {
140
+ lcm.HardThreshold = v
141
+ }
142
+ if v, ok := toInt(lcmConfig["compaction_block_size"]); ok {
143
+ lcm.CompactionBlockSize = v
144
+ }
145
+ if v, ok := toInt(lcmConfig["summary_target_tokens"]); ok {
146
+ lcm.SummaryTargetTokens = v
147
+ }
148
+ parsed.LCM = &lcm
149
+ }
150
+
129
151
  for key, value := range config {
130
152
  switch key {
131
153
  case "recursive_model":
@@ -159,7 +181,7 @@ func ConfigFromMap(config map[string]interface{}) Config {
159
181
  "trace_endpoint", "service_name", "log_output",
160
182
  "langfuse_enabled", "langfuse_public_key",
161
183
  "langfuse_secret_key", "langfuse_host",
162
- "context_overflow":
184
+ "context_overflow", "lcm":
163
185
  // ignore bridge-only config, meta_agent, observability, context_overflow (handled above/separately)
164
186
  default:
165
187
  parsed.ExtraParams[key] = value
package/go/rlm.test ADDED
Binary file