recursive-llm-ts 4.9.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/bin/rlm-go +0 -0
- package/dist/bridge-interface.d.ts +149 -0
- package/go/cmd/rlm/main.go +39 -6
- package/go/go.mod +13 -3
- package/go/go.sum +53 -2
- package/go/rlm/compression.go +59 -0
- package/go/rlm/context_overflow.go +21 -36
- package/go/rlm/context_savings_test.go +387 -0
- package/go/rlm/json_extraction.go +140 -0
- package/go/rlm/lcm_agentic_map.go +317 -0
- package/go/rlm/lcm_context_loop.go +309 -0
- package/go/rlm/lcm_delegation.go +257 -0
- package/go/rlm/lcm_episodes.go +313 -0
- package/go/rlm/lcm_episodes_test.go +384 -0
- package/go/rlm/lcm_files.go +424 -0
- package/go/rlm/lcm_map.go +348 -0
- package/go/rlm/lcm_store.go +615 -0
- package/go/rlm/lcm_summarizer.go +239 -0
- package/go/rlm/lcm_test.go +1407 -0
- package/go/rlm/rlm.go +124 -1
- package/go/rlm/store_backend.go +121 -0
- package/go/rlm/store_backend_test.go +428 -0
- package/go/rlm/store_sqlite.go +575 -0
- package/go/rlm/structured.go +6 -83
- package/go/rlm/token_tracking_test.go +25 -11
- package/go/rlm/tokenizer.go +216 -0
- package/go/rlm/tokenizer_test.go +305 -0
- package/go/rlm/types.go +23 -1
- package/go/rlm.test +0 -0
- package/package.json +1 -1
|
@@ -10,6 +10,13 @@ import (
|
|
|
10
10
|
"testing"
|
|
11
11
|
)
|
|
12
12
|
|
|
13
|
+
func useHeuristicTokenizerForTest(t *testing.T) {
|
|
14
|
+
t.Helper()
|
|
15
|
+
ResetDefaultTokenizer()
|
|
16
|
+
t.Cleanup(func() {
|
|
17
|
+
ResetDefaultTokenizer()
|
|
18
|
+
})
|
|
19
|
+
}
|
|
13
20
|
// ─── Token Tracking Unit Tests ──────────────────────────────────────────────
|
|
14
21
|
|
|
15
22
|
func TestTokenUsage_ParsedFromAPIResponse(t *testing.T) {
|
|
@@ -25,7 +32,7 @@ func TestTokenUsage_ParsedFromAPIResponse(t *testing.T) {
|
|
|
25
32
|
"total_tokens": 175,
|
|
26
33
|
},
|
|
27
34
|
}
|
|
28
|
-
json.NewEncoder(w).Encode(resp)
|
|
35
|
+
_ = json.NewEncoder(w).Encode(resp)
|
|
29
36
|
}))
|
|
30
37
|
defer server.Close()
|
|
31
38
|
|
|
@@ -62,7 +69,7 @@ func TestTokenUsage_NilWhenAPIDoesNotReturnUsage(t *testing.T) {
|
|
|
62
69
|
{"message": map[string]string{"content": "Hello"}},
|
|
63
70
|
},
|
|
64
71
|
}
|
|
65
|
-
json.NewEncoder(w).Encode(resp)
|
|
72
|
+
_ = json.NewEncoder(w).Encode(resp)
|
|
66
73
|
}))
|
|
67
74
|
defer server.Close()
|
|
68
75
|
|
|
@@ -95,7 +102,7 @@ func TestRLMStats_TokenAccumulation(t *testing.T) {
|
|
|
95
102
|
"total_tokens": 120 * callCount,
|
|
96
103
|
},
|
|
97
104
|
}
|
|
98
|
-
json.NewEncoder(w).Encode(resp)
|
|
105
|
+
_ = json.NewEncoder(w).Encode(resp)
|
|
99
106
|
}))
|
|
100
107
|
defer server.Close()
|
|
101
108
|
|
|
@@ -144,7 +151,7 @@ func TestRLMStats_TokenAccumulation_MultipleIterations(t *testing.T) {
|
|
|
144
151
|
"total_tokens": 250,
|
|
145
152
|
},
|
|
146
153
|
}
|
|
147
|
-
json.NewEncoder(w).Encode(resp)
|
|
154
|
+
_ = json.NewEncoder(w).Encode(resp)
|
|
148
155
|
}))
|
|
149
156
|
defer server.Close()
|
|
150
157
|
|
|
@@ -367,10 +374,11 @@ func generateLargeContext(targetTokens int) string {
|
|
|
367
374
|
}
|
|
368
375
|
|
|
369
376
|
func TestTokenEfficiency_TFIDFUsesFewerTokens(t *testing.T) {
|
|
377
|
+
useHeuristicTokenizerForTest(t)
|
|
378
|
+
|
|
370
379
|
// Generate a large context (~35,000 tokens, well over 32k)
|
|
371
380
|
largeContext := generateLargeContext(35000)
|
|
372
381
|
originalTokens := EstimateTokens(largeContext)
|
|
373
|
-
|
|
374
382
|
if originalTokens < 32000 {
|
|
375
383
|
t.Fatalf("generated context is too small: %d tokens, need at least 32000", originalTokens)
|
|
376
384
|
}
|
|
@@ -407,9 +415,10 @@ func TestTokenEfficiency_TFIDFUsesFewerTokens(t *testing.T) {
|
|
|
407
415
|
}
|
|
408
416
|
|
|
409
417
|
func TestTokenEfficiency_TextRankUsesFewerTokens(t *testing.T) {
|
|
418
|
+
useHeuristicTokenizerForTest(t)
|
|
419
|
+
|
|
410
420
|
largeContext := generateLargeContext(35000)
|
|
411
421
|
originalTokens := EstimateTokens(largeContext)
|
|
412
|
-
|
|
413
422
|
if originalTokens < 32000 {
|
|
414
423
|
t.Fatalf("generated context is too small: %d tokens, need at least 32000", originalTokens)
|
|
415
424
|
}
|
|
@@ -442,6 +451,8 @@ func TestTokenEfficiency_TextRankUsesFewerTokens(t *testing.T) {
|
|
|
442
451
|
}
|
|
443
452
|
|
|
444
453
|
func TestTokenEfficiency_TruncateUsesFewerTokens(t *testing.T) {
|
|
454
|
+
useHeuristicTokenizerForTest(t)
|
|
455
|
+
|
|
445
456
|
largeContext := generateLargeContext(35000)
|
|
446
457
|
originalTokens := EstimateTokens(largeContext)
|
|
447
458
|
|
|
@@ -569,10 +580,11 @@ func TestTokenEfficiency_PreemptiveReduction(t *testing.T) {
|
|
|
569
580
|
}
|
|
570
581
|
|
|
571
582
|
func TestTokenEfficiency_AllStrategiesCompared(t *testing.T) {
|
|
583
|
+
useHeuristicTokenizerForTest(t)
|
|
584
|
+
|
|
572
585
|
// Generate a 40k token context (well over 32k limit)
|
|
573
586
|
largeContext := generateLargeContext(40000)
|
|
574
587
|
originalTokens := EstimateTokens(largeContext)
|
|
575
|
-
|
|
576
588
|
if originalTokens < 35000 {
|
|
577
589
|
t.Fatalf("generated context is too small: %d tokens, need at least 35000", originalTokens)
|
|
578
590
|
}
|
|
@@ -663,10 +675,11 @@ func TestTokenEfficiency_AllStrategiesCompared(t *testing.T) {
|
|
|
663
675
|
}
|
|
664
676
|
|
|
665
677
|
func TestTokenEfficiency_VeryLargeContext_100kTokens(t *testing.T) {
|
|
678
|
+
useHeuristicTokenizerForTest(t)
|
|
679
|
+
|
|
666
680
|
// Test with a very large context (~100k tokens) to prove scaling
|
|
667
681
|
largeContext := generateLargeContext(100000)
|
|
668
682
|
originalTokens := EstimateTokens(largeContext)
|
|
669
|
-
|
|
670
683
|
if originalTokens < 90000 {
|
|
671
684
|
t.Fatalf("generated context is too small: %d tokens, need at least 90000", originalTokens)
|
|
672
685
|
}
|
|
@@ -721,7 +734,7 @@ func TestTokenEfficiency_MapReduceTracksTokens(t *testing.T) {
|
|
|
721
734
|
"total_tokens": 530 + callCount*50,
|
|
722
735
|
},
|
|
723
736
|
}
|
|
724
|
-
json.NewEncoder(w).Encode(resp)
|
|
737
|
+
_ = json.NewEncoder(w).Encode(resp)
|
|
725
738
|
}))
|
|
726
739
|
defer server.Close()
|
|
727
740
|
|
|
@@ -780,7 +793,7 @@ func TestTokenEfficiency_StructuredCompletion_TracksTokens(t *testing.T) {
|
|
|
780
793
|
"total_tokens": 315,
|
|
781
794
|
},
|
|
782
795
|
}
|
|
783
|
-
json.NewEncoder(w).Encode(resp)
|
|
796
|
+
_ = json.NewEncoder(w).Encode(resp)
|
|
784
797
|
}))
|
|
785
798
|
defer server.Close()
|
|
786
799
|
|
|
@@ -827,10 +840,11 @@ func TestTokenEfficiency_StructuredCompletion_TracksTokens(t *testing.T) {
|
|
|
827
840
|
// ─── Token Estimation Accuracy Tests ─────────────────────────────────────────
|
|
828
841
|
|
|
829
842
|
func TestEstimateTokens_AccuracyForLargeContent(t *testing.T) {
|
|
843
|
+
useHeuristicTokenizerForTest(t)
|
|
844
|
+
|
|
830
845
|
// Verify that our estimation stays reasonable for large content
|
|
831
846
|
content := generateLargeContext(32000)
|
|
832
847
|
estimated := EstimateTokens(content)
|
|
833
|
-
|
|
834
848
|
// Real tokenizer would give different results, but our estimation should be
|
|
835
849
|
// within a reasonable range. The key property: conservative (over-estimates slightly)
|
|
836
850
|
charToTokenRatio := float64(len(content)) / float64(estimated)
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
package rlm
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"strings"
|
|
5
|
+
"sync"
|
|
6
|
+
"sync/atomic"
|
|
7
|
+
|
|
8
|
+
"github.com/cespare/xxhash/v2"
|
|
9
|
+
tiktoken "github.com/pkoukk/tiktoken-go"
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
// ─── Tokenizer Interface ─────────────────────────────────────────────────────
|
|
13
|
+
// Provides accurate token counting with model-specific BPE encoding.
|
|
14
|
+
// Replaces the heuristic ~3.5 chars/token estimation with real tokenization.
|
|
15
|
+
|
|
16
|
+
// Tokenizer counts tokens in text strings.
|
|
17
|
+
type Tokenizer interface {
|
|
18
|
+
CountTokens(text string) int
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// ─── Tiktoken BPE Tokenizer ──────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
// TiktokenTokenizer uses the tiktoken BPE encoding for accurate token counting.
|
|
24
|
+
type TiktokenTokenizer struct {
|
|
25
|
+
encoding *tiktoken.Tiktoken
|
|
26
|
+
name string
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// modelEncodingMap maps model name prefixes to their tiktoken encoding names.
|
|
30
|
+
var modelEncodingMap = map[string]string{
|
|
31
|
+
// OpenAI o200k_base models
|
|
32
|
+
"gpt-4o": "o200k_base",
|
|
33
|
+
"gpt-4o-": "o200k_base",
|
|
34
|
+
"o1": "o200k_base",
|
|
35
|
+
"o3": "o200k_base",
|
|
36
|
+
"o4": "o200k_base",
|
|
37
|
+
// OpenAI cl100k_base models
|
|
38
|
+
"gpt-4-": "cl100k_base",
|
|
39
|
+
"gpt-4": "cl100k_base",
|
|
40
|
+
"gpt-3.5": "cl100k_base",
|
|
41
|
+
// Anthropic (closest approximation)
|
|
42
|
+
"claude": "cl100k_base",
|
|
43
|
+
// Meta Llama
|
|
44
|
+
"llama": "cl100k_base",
|
|
45
|
+
// Mistral
|
|
46
|
+
"mistral": "cl100k_base",
|
|
47
|
+
"mixtral": "cl100k_base",
|
|
48
|
+
// Qwen
|
|
49
|
+
"qwen": "cl100k_base",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// encodingForModel returns the tiktoken encoding name for a given model.
|
|
53
|
+
func encodingForModel(model string) string {
|
|
54
|
+
lower := strings.ToLower(model)
|
|
55
|
+
|
|
56
|
+
// Try exact match first
|
|
57
|
+
if enc, ok := modelEncodingMap[lower]; ok {
|
|
58
|
+
return enc
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Try prefix matching (longest prefix wins)
|
|
62
|
+
bestMatch := ""
|
|
63
|
+
bestEnc := "cl100k_base"
|
|
64
|
+
for prefix, enc := range modelEncodingMap {
|
|
65
|
+
if strings.HasPrefix(lower, prefix) && len(prefix) > len(bestMatch) {
|
|
66
|
+
bestMatch = prefix
|
|
67
|
+
bestEnc = enc
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return bestEnc
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// NewTiktokenTokenizer creates a tokenizer using the appropriate BPE encoding for the model.
|
|
75
|
+
// Returns nil and an error if the encoding cannot be loaded.
|
|
76
|
+
func NewTiktokenTokenizer(model string) (*TiktokenTokenizer, error) {
|
|
77
|
+
encName := encodingForModel(model)
|
|
78
|
+
enc, err := tiktoken.GetEncoding(encName)
|
|
79
|
+
if err != nil {
|
|
80
|
+
return nil, err
|
|
81
|
+
}
|
|
82
|
+
return &TiktokenTokenizer{
|
|
83
|
+
encoding: enc,
|
|
84
|
+
name: encName,
|
|
85
|
+
}, nil
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// CountTokens returns the exact BPE token count for the text.
|
|
89
|
+
func (t *TiktokenTokenizer) CountTokens(text string) int {
|
|
90
|
+
if len(text) == 0 {
|
|
91
|
+
return 0
|
|
92
|
+
}
|
|
93
|
+
tokens := t.encoding.Encode(text, nil, nil)
|
|
94
|
+
return len(tokens)
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// EncodingName returns the name of the encoding used.
|
|
98
|
+
func (t *TiktokenTokenizer) EncodingName() string {
|
|
99
|
+
return t.name
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// ─── Heuristic Tokenizer (Fallback) ──────────────────────────────────────────
|
|
103
|
+
|
|
104
|
+
// HeuristicTokenizer uses the character-to-token ratio heuristic.
|
|
105
|
+
// This is the original EstimateTokens logic, kept as a fallback
|
|
106
|
+
// when tiktoken encodings are unavailable.
|
|
107
|
+
type HeuristicTokenizer struct{}
|
|
108
|
+
|
|
109
|
+
// CountTokens provides a fast approximation of token count.
|
|
110
|
+
// Uses ~3.5 chars/token ratio, intentionally conservative (over-estimates).
|
|
111
|
+
func (h *HeuristicTokenizer) CountTokens(text string) int {
|
|
112
|
+
if len(text) == 0 {
|
|
113
|
+
return 0
|
|
114
|
+
}
|
|
115
|
+
return (len(text)*10 + 34) / 35
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ─── Cached Tokenizer ────────────────────────────────────────────────────────
|
|
119
|
+
|
|
120
|
+
const maxCacheSize = 10000
|
|
121
|
+
|
|
122
|
+
// CachedTokenizer wraps another Tokenizer with an LRU-style cache.
|
|
123
|
+
// Uses xxhash for fast key hashing and sync.Map for concurrent access.
|
|
124
|
+
type CachedTokenizer struct {
|
|
125
|
+
inner Tokenizer
|
|
126
|
+
cache sync.Map // map[uint64]int
|
|
127
|
+
size atomic.Int64
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// NewCachedTokenizer wraps a tokenizer with caching.
|
|
131
|
+
func NewCachedTokenizer(inner Tokenizer) *CachedTokenizer {
|
|
132
|
+
return &CachedTokenizer{
|
|
133
|
+
inner: inner,
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// CountTokens returns the cached token count, computing and caching on miss.
|
|
138
|
+
func (c *CachedTokenizer) CountTokens(text string) int {
|
|
139
|
+
if len(text) == 0 {
|
|
140
|
+
return 0
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Hash the text for cache key
|
|
144
|
+
key := xxhash.Sum64String(text)
|
|
145
|
+
|
|
146
|
+
// Check cache
|
|
147
|
+
if val, ok := c.cache.Load(key); ok {
|
|
148
|
+
return val.(int)
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Compute
|
|
152
|
+
count := c.inner.CountTokens(text)
|
|
153
|
+
|
|
154
|
+
// Store if under max size; evict all if over (simple strategy)
|
|
155
|
+
if c.size.Load() < maxCacheSize {
|
|
156
|
+
if _, loaded := c.cache.LoadOrStore(key, count); !loaded {
|
|
157
|
+
c.size.Add(1)
|
|
158
|
+
}
|
|
159
|
+
} else {
|
|
160
|
+
// Simple eviction: clear the cache when full
|
|
161
|
+
c.cache.Clear()
|
|
162
|
+
c.size.Store(0)
|
|
163
|
+
c.cache.Store(key, count)
|
|
164
|
+
c.size.Add(1)
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return count
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// CacheSize returns the current number of cached entries.
|
|
171
|
+
func (c *CachedTokenizer) CacheSize() int64 {
|
|
172
|
+
return c.size.Load()
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Inner returns the underlying tokenizer.
|
|
176
|
+
func (c *CachedTokenizer) Inner() Tokenizer {
|
|
177
|
+
return c.inner
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// ─── Global Default Tokenizer ────────────────────────────────────────────────
|
|
181
|
+
|
|
182
|
+
var (
|
|
183
|
+
defaultTokenizer Tokenizer = &HeuristicTokenizer{}
|
|
184
|
+
tokenizerMu sync.RWMutex
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
// SetDefaultTokenizer configures the global tokenizer for a given model.
|
|
188
|
+
// Tries tiktoken BPE first, falls back to heuristic on failure.
|
|
189
|
+
// The tokenizer is wrapped with caching for performance.
|
|
190
|
+
func SetDefaultTokenizer(model string) {
|
|
191
|
+
tokenizerMu.Lock()
|
|
192
|
+
defer tokenizerMu.Unlock()
|
|
193
|
+
|
|
194
|
+
tok, err := NewTiktokenTokenizer(model)
|
|
195
|
+
if err != nil {
|
|
196
|
+
// Fall back to heuristic with caching
|
|
197
|
+
defaultTokenizer = NewCachedTokenizer(&HeuristicTokenizer{})
|
|
198
|
+
return
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
defaultTokenizer = NewCachedTokenizer(tok)
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// GetTokenizer returns the current global tokenizer.
|
|
205
|
+
func GetTokenizer() Tokenizer {
|
|
206
|
+
tokenizerMu.RLock()
|
|
207
|
+
defer tokenizerMu.RUnlock()
|
|
208
|
+
return defaultTokenizer
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// ResetDefaultTokenizer resets to the heuristic tokenizer (used in tests).
|
|
212
|
+
func ResetDefaultTokenizer() {
|
|
213
|
+
tokenizerMu.Lock()
|
|
214
|
+
defer tokenizerMu.Unlock()
|
|
215
|
+
defaultTokenizer = &HeuristicTokenizer{}
|
|
216
|
+
}
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
package rlm
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"strings"
|
|
5
|
+
"testing"
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
// ─── Tiktoken BPE Tokenizer Tests ────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
func TestTiktokenTokenizer_English(t *testing.T) {
|
|
11
|
+
tok, err := NewTiktokenTokenizer("gpt-4o")
|
|
12
|
+
if err != nil {
|
|
13
|
+
t.Fatalf("failed to create tokenizer: %v", err)
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
text := "Hello, world! This is a test of the tokenizer."
|
|
17
|
+
bpeCount := tok.CountTokens(text)
|
|
18
|
+
heuristic := (&HeuristicTokenizer{}).CountTokens(text)
|
|
19
|
+
|
|
20
|
+
t.Logf("English text: BPE=%d, Heuristic=%d", bpeCount, heuristic)
|
|
21
|
+
|
|
22
|
+
if bpeCount <= 0 {
|
|
23
|
+
t.Error("BPE count should be > 0")
|
|
24
|
+
}
|
|
25
|
+
// BPE and heuristic should give different counts for most text
|
|
26
|
+
// (they may coincidentally match for very short strings, so just verify BPE is reasonable)
|
|
27
|
+
if bpeCount > len(text) {
|
|
28
|
+
t.Errorf("BPE count %d should not exceed character count %d", bpeCount, len(text))
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
func TestTiktokenTokenizer_Code(t *testing.T) {
|
|
33
|
+
tok, err := NewTiktokenTokenizer("gpt-4o")
|
|
34
|
+
if err != nil {
|
|
35
|
+
t.Fatalf("failed to create tokenizer: %v", err)
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
code := `func main() {
|
|
39
|
+
fmt.Println("Hello, World!")
|
|
40
|
+
for i := 0; i < 100; i++ {
|
|
41
|
+
result = append(result, processItem(items[i]))
|
|
42
|
+
}
|
|
43
|
+
}`
|
|
44
|
+
bpeCount := tok.CountTokens(code)
|
|
45
|
+
heuristic := (&HeuristicTokenizer{}).CountTokens(code)
|
|
46
|
+
|
|
47
|
+
t.Logf("Code: BPE=%d, Heuristic=%d (chars=%d)", bpeCount, heuristic, len(code))
|
|
48
|
+
|
|
49
|
+
if bpeCount <= 0 {
|
|
50
|
+
t.Error("BPE count should be > 0 for code")
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
func TestTiktokenTokenizer_JSON(t *testing.T) {
|
|
55
|
+
tok, err := NewTiktokenTokenizer("gpt-4o")
|
|
56
|
+
if err != nil {
|
|
57
|
+
t.Fatalf("failed to create tokenizer: %v", err)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
jsonData := `{"name": "test", "values": [1, 2, 3, 4, 5], "nested": {"key": "value", "count": 42}}`
|
|
61
|
+
bpeCount := tok.CountTokens(jsonData)
|
|
62
|
+
heuristic := (&HeuristicTokenizer{}).CountTokens(jsonData)
|
|
63
|
+
|
|
64
|
+
t.Logf("JSON: BPE=%d, Heuristic=%d (chars=%d)", bpeCount, heuristic, len(jsonData))
|
|
65
|
+
|
|
66
|
+
if bpeCount <= 0 {
|
|
67
|
+
t.Error("BPE count should be > 0 for JSON")
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
func TestTiktokenTokenizer_CJK(t *testing.T) {
|
|
72
|
+
tok, err := NewTiktokenTokenizer("gpt-4o")
|
|
73
|
+
if err != nil {
|
|
74
|
+
t.Fatalf("failed to create tokenizer: %v", err)
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Chinese, Japanese, and Korean text
|
|
78
|
+
cjkText := "这是一个测试。日本語のテスト。한국어 테스트입니다。"
|
|
79
|
+
bpeCount := tok.CountTokens(cjkText)
|
|
80
|
+
heuristic := (&HeuristicTokenizer{}).CountTokens(cjkText)
|
|
81
|
+
|
|
82
|
+
t.Logf("CJK text: BPE=%d, Heuristic=%d (chars=%d, bytes=%d)", bpeCount, heuristic, len([]rune(cjkText)), len(cjkText))
|
|
83
|
+
|
|
84
|
+
if bpeCount <= 0 {
|
|
85
|
+
t.Error("BPE count should be > 0 for CJK")
|
|
86
|
+
}
|
|
87
|
+
// CJK text has ~1.5 chars per token but heuristic assumes ~3.5
|
|
88
|
+
// So BPE should count MORE tokens than heuristic for CJK
|
|
89
|
+
if bpeCount <= heuristic {
|
|
90
|
+
t.Logf("WARNING: BPE (%d) should typically be > heuristic (%d) for CJK text", bpeCount, heuristic)
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
func TestTiktokenTokenizer_Empty(t *testing.T) {
|
|
95
|
+
tok, err := NewTiktokenTokenizer("gpt-4o")
|
|
96
|
+
if err != nil {
|
|
97
|
+
t.Fatalf("failed to create tokenizer: %v", err)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if tok.CountTokens("") != 0 {
|
|
101
|
+
t.Error("empty string should return 0 tokens")
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
func TestTiktokenTokenizer_EncodingSelection(t *testing.T) {
|
|
106
|
+
tests := []struct {
|
|
107
|
+
model string
|
|
108
|
+
expected string
|
|
109
|
+
}{
|
|
110
|
+
{"gpt-4o", "o200k_base"},
|
|
111
|
+
{"gpt-4o-mini", "o200k_base"},
|
|
112
|
+
{"gpt-4o-mini-2024-07-18", "o200k_base"},
|
|
113
|
+
{"gpt-4", "cl100k_base"},
|
|
114
|
+
{"gpt-4-turbo", "cl100k_base"},
|
|
115
|
+
{"gpt-3.5-turbo", "cl100k_base"},
|
|
116
|
+
{"claude-3-opus", "cl100k_base"},
|
|
117
|
+
{"claude-sonnet-4", "cl100k_base"},
|
|
118
|
+
{"o1", "o200k_base"},
|
|
119
|
+
{"o3-mini", "o200k_base"},
|
|
120
|
+
{"llama-3.1", "cl100k_base"},
|
|
121
|
+
{"unknown-model", "cl100k_base"},
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
for _, tt := range tests {
|
|
125
|
+
t.Run(tt.model, func(t *testing.T) {
|
|
126
|
+
enc := encodingForModel(tt.model)
|
|
127
|
+
if enc != tt.expected {
|
|
128
|
+
t.Errorf("encodingForModel(%q) = %q, want %q", tt.model, enc, tt.expected)
|
|
129
|
+
}
|
|
130
|
+
})
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// ─── Heuristic Tokenizer Tests ───────────────────────────────────────────────
|
|
135
|
+
|
|
136
|
+
func TestHeuristicTokenizer_Fallback(t *testing.T) {
|
|
137
|
+
h := &HeuristicTokenizer{}
|
|
138
|
+
|
|
139
|
+
if h.CountTokens("") != 0 {
|
|
140
|
+
t.Error("empty string should be 0")
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// "Hello" = 5 chars, ceil(5/3.5) = 2
|
|
144
|
+
count := h.CountTokens("Hello")
|
|
145
|
+
if count <= 0 {
|
|
146
|
+
t.Error("should count > 0 tokens for 'Hello'")
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Longer text
|
|
150
|
+
longText := strings.Repeat("word ", 1000)
|
|
151
|
+
longCount := h.CountTokens(longText)
|
|
152
|
+
expected := (len(longText)*10 + 34) / 35
|
|
153
|
+
if longCount != expected {
|
|
154
|
+
t.Errorf("heuristic count %d != expected %d", longCount, expected)
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// ─── Cached Tokenizer Tests ─────────────────────────────────────────────────
|
|
159
|
+
|
|
160
|
+
func TestCachedTokenizer_CacheHit(t *testing.T) {
|
|
161
|
+
callCount := 0
|
|
162
|
+
inner := &countingTokenizer{fn: func(text string) int {
|
|
163
|
+
callCount++
|
|
164
|
+
return len(text) / 4
|
|
165
|
+
}}
|
|
166
|
+
|
|
167
|
+
cached := NewCachedTokenizer(inner)
|
|
168
|
+
|
|
169
|
+
text := "This is a test string for caching"
|
|
170
|
+
|
|
171
|
+
// First call: cache miss
|
|
172
|
+
count1 := cached.CountTokens(text)
|
|
173
|
+
if callCount != 1 {
|
|
174
|
+
t.Errorf("expected 1 inner call, got %d", callCount)
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Second call: cache hit (inner should NOT be called again)
|
|
178
|
+
count2 := cached.CountTokens(text)
|
|
179
|
+
if callCount != 1 {
|
|
180
|
+
t.Errorf("expected still 1 inner call after cache hit, got %d", callCount)
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if count1 != count2 {
|
|
184
|
+
t.Errorf("cache returned different values: %d vs %d", count1, count2)
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if cached.CacheSize() != 1 {
|
|
188
|
+
t.Errorf("cache size should be 1, got %d", cached.CacheSize())
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
func TestCachedTokenizer_Empty(t *testing.T) {
|
|
193
|
+
inner := &HeuristicTokenizer{}
|
|
194
|
+
cached := NewCachedTokenizer(inner)
|
|
195
|
+
|
|
196
|
+
if cached.CountTokens("") != 0 {
|
|
197
|
+
t.Error("empty string should return 0 without caching")
|
|
198
|
+
}
|
|
199
|
+
if cached.CacheSize() != 0 {
|
|
200
|
+
t.Error("cache should not store empty strings")
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
func TestCachedTokenizer_DifferentStrings(t *testing.T) {
|
|
205
|
+
inner := &HeuristicTokenizer{}
|
|
206
|
+
cached := NewCachedTokenizer(inner)
|
|
207
|
+
|
|
208
|
+
cached.CountTokens("string one")
|
|
209
|
+
cached.CountTokens("string two")
|
|
210
|
+
cached.CountTokens("string three")
|
|
211
|
+
|
|
212
|
+
if cached.CacheSize() != 3 {
|
|
213
|
+
t.Errorf("cache size should be 3, got %d", cached.CacheSize())
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
func TestCachedTokenizer_Inner(t *testing.T) {
|
|
218
|
+
inner := &HeuristicTokenizer{}
|
|
219
|
+
cached := NewCachedTokenizer(inner)
|
|
220
|
+
|
|
221
|
+
if cached.Inner() != inner {
|
|
222
|
+
t.Error("Inner() should return the wrapped tokenizer")
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// countingTokenizer tracks how many times CountTokens is called.
|
|
227
|
+
type countingTokenizer struct {
|
|
228
|
+
fn func(string) int
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
func (c *countingTokenizer) CountTokens(text string) int {
|
|
232
|
+
return c.fn(text)
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// ─── Global Default Tokenizer Tests ──────────────────────────────────────────
|
|
236
|
+
|
|
237
|
+
func TestSetDefaultTokenizer_KnownModel(t *testing.T) {
|
|
238
|
+
defer ResetDefaultTokenizer()
|
|
239
|
+
|
|
240
|
+
SetDefaultTokenizer("gpt-4o")
|
|
241
|
+
tok := GetTokenizer()
|
|
242
|
+
|
|
243
|
+
// Should be a CachedTokenizer wrapping a TiktokenTokenizer
|
|
244
|
+
cached, ok := tok.(*CachedTokenizer)
|
|
245
|
+
if !ok {
|
|
246
|
+
t.Fatalf("expected CachedTokenizer, got %T", tok)
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
inner, ok := cached.Inner().(*TiktokenTokenizer)
|
|
250
|
+
if !ok {
|
|
251
|
+
t.Fatalf("expected inner TiktokenTokenizer, got %T", cached.Inner())
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
if inner.EncodingName() != "o200k_base" {
|
|
255
|
+
t.Errorf("expected o200k_base encoding, got %s", inner.EncodingName())
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Verify it actually counts tokens
|
|
259
|
+
count := tok.CountTokens("Hello, world!")
|
|
260
|
+
if count <= 0 {
|
|
261
|
+
t.Error("tokenizer should count > 0 tokens")
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
func TestSetDefaultTokenizer_UnknownModel(t *testing.T) {
|
|
266
|
+
defer ResetDefaultTokenizer()
|
|
267
|
+
|
|
268
|
+
// Even unknown models should work because we default to cl100k_base
|
|
269
|
+
SetDefaultTokenizer("totally-unknown-model-xyz")
|
|
270
|
+
tok := GetTokenizer()
|
|
271
|
+
|
|
272
|
+
count := tok.CountTokens("Hello, world!")
|
|
273
|
+
if count <= 0 {
|
|
274
|
+
t.Error("tokenizer should count > 0 tokens even for unknown model")
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
func TestEstimateTokens_UsesDefaultTokenizer(t *testing.T) {
|
|
279
|
+
defer ResetDefaultTokenizer()
|
|
280
|
+
|
|
281
|
+
// With heuristic default
|
|
282
|
+
heuristicCount := EstimateTokens("Hello, world! This is a test.")
|
|
283
|
+
|
|
284
|
+
// Switch to BPE
|
|
285
|
+
SetDefaultTokenizer("gpt-4o")
|
|
286
|
+
bpeCount := EstimateTokens("Hello, world! This is a test.")
|
|
287
|
+
|
|
288
|
+
t.Logf("EstimateTokens: heuristic=%d, bpe=%d", heuristicCount, bpeCount)
|
|
289
|
+
|
|
290
|
+
// Both should be > 0
|
|
291
|
+
if heuristicCount <= 0 || bpeCount <= 0 {
|
|
292
|
+
t.Errorf("both counts should be > 0: heuristic=%d, bpe=%d", heuristicCount, bpeCount)
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
func TestResetDefaultTokenizer(t *testing.T) {
|
|
297
|
+
SetDefaultTokenizer("gpt-4o")
|
|
298
|
+
ResetDefaultTokenizer()
|
|
299
|
+
|
|
300
|
+
tok := GetTokenizer()
|
|
301
|
+
_, ok := tok.(*HeuristicTokenizer)
|
|
302
|
+
if !ok {
|
|
303
|
+
t.Errorf("after reset, expected HeuristicTokenizer, got %T", tok)
|
|
304
|
+
}
|
|
305
|
+
}
|
package/go/rlm/types.go
CHANGED
|
@@ -70,6 +70,7 @@ type Config struct {
|
|
|
70
70
|
MetaAgent *MetaAgentConfig
|
|
71
71
|
Observability *ObservabilityConfig
|
|
72
72
|
ContextOverflow *ContextOverflowConfig
|
|
73
|
+
LCM *LCMConfig // Lossless Context Management configuration
|
|
73
74
|
}
|
|
74
75
|
|
|
75
76
|
func ConfigFromMap(config map[string]interface{}) Config {
|
|
@@ -126,6 +127,27 @@ func ConfigFromMap(config map[string]interface{}) Config {
|
|
|
126
127
|
parsed.ContextOverflow = &co
|
|
127
128
|
}
|
|
128
129
|
|
|
130
|
+
// Extract LCM config
|
|
131
|
+
if lcmConfig, ok := config["lcm"].(map[string]interface{}); ok {
|
|
132
|
+
lcm := DefaultLCMConfig()
|
|
133
|
+
if v, ok := lcmConfig["enabled"].(bool); ok {
|
|
134
|
+
lcm.Enabled = v
|
|
135
|
+
}
|
|
136
|
+
if v, ok := toInt(lcmConfig["soft_threshold"]); ok {
|
|
137
|
+
lcm.SoftThreshold = v
|
|
138
|
+
}
|
|
139
|
+
if v, ok := toInt(lcmConfig["hard_threshold"]); ok {
|
|
140
|
+
lcm.HardThreshold = v
|
|
141
|
+
}
|
|
142
|
+
if v, ok := toInt(lcmConfig["compaction_block_size"]); ok {
|
|
143
|
+
lcm.CompactionBlockSize = v
|
|
144
|
+
}
|
|
145
|
+
if v, ok := toInt(lcmConfig["summary_target_tokens"]); ok {
|
|
146
|
+
lcm.SummaryTargetTokens = v
|
|
147
|
+
}
|
|
148
|
+
parsed.LCM = &lcm
|
|
149
|
+
}
|
|
150
|
+
|
|
129
151
|
for key, value := range config {
|
|
130
152
|
switch key {
|
|
131
153
|
case "recursive_model":
|
|
@@ -159,7 +181,7 @@ func ConfigFromMap(config map[string]interface{}) Config {
|
|
|
159
181
|
"trace_endpoint", "service_name", "log_output",
|
|
160
182
|
"langfuse_enabled", "langfuse_public_key",
|
|
161
183
|
"langfuse_secret_key", "langfuse_host",
|
|
162
|
-
"context_overflow":
|
|
184
|
+
"context_overflow", "lcm":
|
|
163
185
|
// ignore bridge-only config, meta_agent, observability, context_overflow (handled above/separately)
|
|
164
186
|
default:
|
|
165
187
|
parsed.ExtraParams[key] = value
|
package/go/rlm.test
ADDED
|
Binary file
|