recursive-llm-ts 4.8.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/bin/rlm-go +0 -0
- package/dist/bridge-interface.d.ts +152 -0
- package/dist/rlm.js +10 -0
- package/go/README.md +2 -2
- package/go/cmd/rlm/main.go +40 -7
- package/go/go.mod +14 -4
- package/go/go.sum +53 -2
- package/go/rlm/compression.go +59 -0
- package/go/rlm/context_overflow.go +49 -43
- package/go/rlm/context_savings_test.go +387 -0
- package/go/rlm/doc.go +2 -2
- package/go/rlm/json_extraction.go +140 -0
- package/go/rlm/lcm_agentic_map.go +317 -0
- package/go/rlm/lcm_context_loop.go +309 -0
- package/go/rlm/lcm_delegation.go +257 -0
- package/go/rlm/lcm_episodes.go +313 -0
- package/go/rlm/lcm_episodes_test.go +384 -0
- package/go/rlm/lcm_files.go +424 -0
- package/go/rlm/lcm_map.go +348 -0
- package/go/rlm/lcm_store.go +615 -0
- package/go/rlm/lcm_summarizer.go +239 -0
- package/go/rlm/lcm_test.go +1407 -0
- package/go/rlm/meta_agent.go +18 -2
- package/go/rlm/observability.go +6 -0
- package/go/rlm/openai.go +27 -10
- package/go/rlm/rlm.go +135 -4
- package/go/rlm/store_backend.go +121 -0
- package/go/rlm/store_backend_test.go +428 -0
- package/go/rlm/store_sqlite.go +575 -0
- package/go/rlm/structured.go +15 -83
- package/go/rlm/token_tracking_test.go +859 -0
- package/go/rlm/tokenizer.go +216 -0
- package/go/rlm/tokenizer_test.go +305 -0
- package/go/rlm/types.go +30 -5
- package/go/rlm.test +0 -0
- package/package.json +4 -4
|
@@ -48,18 +48,18 @@ var modelTokenLimits = map[string]int{
|
|
|
48
48
|
"o1-preview": 128000,
|
|
49
49
|
"o3-mini": 200000,
|
|
50
50
|
// Anthropic (via LiteLLM/proxy)
|
|
51
|
-
"claude-3-opus":
|
|
52
|
-
"claude-3-sonnet":
|
|
53
|
-
"claude-3-haiku":
|
|
54
|
-
"claude-3.5-sonnet":
|
|
55
|
-
"claude-3.5-haiku":
|
|
56
|
-
"claude-sonnet-4":
|
|
57
|
-
"claude-opus-4":
|
|
51
|
+
"claude-3-opus": 200000,
|
|
52
|
+
"claude-3-sonnet": 200000,
|
|
53
|
+
"claude-3-haiku": 200000,
|
|
54
|
+
"claude-3.5-sonnet": 200000,
|
|
55
|
+
"claude-3.5-haiku": 200000,
|
|
56
|
+
"claude-sonnet-4": 200000,
|
|
57
|
+
"claude-opus-4": 200000,
|
|
58
58
|
// Llama (common vLLM deployments)
|
|
59
|
-
"llama-3":
|
|
60
|
-
"llama-3.1":
|
|
61
|
-
"llama-3.2":
|
|
62
|
-
"llama-3.3":
|
|
59
|
+
"llama-3": 8192,
|
|
60
|
+
"llama-3.1": 128000,
|
|
61
|
+
"llama-3.2": 128000,
|
|
62
|
+
"llama-3.3": 128000,
|
|
63
63
|
// Mistral
|
|
64
64
|
"mistral-7b": 32768,
|
|
65
65
|
"mixtral-8x7b": 32768,
|
|
@@ -181,21 +181,11 @@ func (r *RLM) getResponseTokenBudget() int {
|
|
|
181
181
|
|
|
182
182
|
// ─── Token Estimation ────────────────────────────────────────────────────────
|
|
183
183
|
|
|
184
|
-
// EstimateTokens
|
|
185
|
-
//
|
|
186
|
-
//
|
|
187
|
-
//
|
|
188
|
-
// Approximate ratios for common encodings:
|
|
189
|
-
// - English text: ~4 chars/token (cl100k_base)
|
|
190
|
-
// - JSON/code: ~3.5 chars/token
|
|
191
|
-
// - CJK text: ~1.5 chars/token
|
|
192
|
-
// - Mixed: ~3.5 chars/token (safe default)
|
|
184
|
+
// EstimateTokens returns the token count for a string using the global tokenizer.
|
|
185
|
+
// When SetDefaultTokenizer has been called with a model name, this uses accurate
|
|
186
|
+
// BPE tokenization via tiktoken. Otherwise falls back to a ~3.5 chars/token heuristic.
|
|
193
187
|
func EstimateTokens(text string) int {
|
|
194
|
-
|
|
195
|
-
return 0
|
|
196
|
-
}
|
|
197
|
-
// Use 3.5 chars/token as conservative estimate
|
|
198
|
-
return (len(text)*10 + 34) / 35 // equivalent to ceil(len/3.5)
|
|
188
|
+
return GetTokenizer().CountTokens(text)
|
|
199
189
|
}
|
|
200
190
|
|
|
201
191
|
// EstimateMessagesTokens estimates the total tokens for a set of chat messages.
|
|
@@ -431,8 +421,13 @@ func (cr *contextReducer) reduceByMapReduce(query string, chunks []string, model
|
|
|
431
421
|
}
|
|
432
422
|
|
|
433
423
|
cr.rlm.stats.LlmCalls++
|
|
434
|
-
|
|
435
|
-
|
|
424
|
+
if result.Usage != nil {
|
|
425
|
+
cr.rlm.stats.PromptTokens += result.Usage.PromptTokens
|
|
426
|
+
cr.rlm.stats.CompletionTokens += result.Usage.CompletionTokens
|
|
427
|
+
cr.rlm.stats.TotalTokens += result.Usage.TotalTokens
|
|
428
|
+
}
|
|
429
|
+
summaries[idx] = result.Content
|
|
430
|
+
cr.obs.Debug("overflow", "Chunk %d/%d summarized: %d -> %d chars", idx+1, len(chunks), len(chunkText), len(result.Content))
|
|
436
431
|
}(i, chunk)
|
|
437
432
|
}
|
|
438
433
|
|
|
@@ -472,24 +467,20 @@ func (cr *contextReducer) reduceByMapReduce(query string, chunks []string, model
|
|
|
472
467
|
}
|
|
473
468
|
|
|
474
469
|
// reduceByTruncation simply truncates context to fit within the limit.
|
|
470
|
+
// Uses the shared TruncateText utility (compression.go).
|
|
475
471
|
func (cr *contextReducer) reduceByTruncation(context string, modelLimit int, overhead int) (string, error) {
|
|
476
472
|
cr.obs.Debug("overflow", "Using truncation strategy")
|
|
477
473
|
|
|
478
474
|
availableTokens := modelLimit - overhead
|
|
479
|
-
|
|
475
|
+
truncated := TruncateText(context, TruncateTextParams{
|
|
476
|
+
MaxTokens: availableTokens,
|
|
477
|
+
MarkerText: "\n\n[... context truncated due to token limit ...]\n\n",
|
|
478
|
+
})
|
|
480
479
|
|
|
481
|
-
if
|
|
480
|
+
if truncated == context {
|
|
482
481
|
return context, nil
|
|
483
482
|
}
|
|
484
483
|
|
|
485
|
-
// Keep beginning and end, truncate middle (addresses "lost in the middle" problem)
|
|
486
|
-
keepFromStart := maxChars * 2 / 3
|
|
487
|
-
keepFromEnd := maxChars / 3
|
|
488
|
-
|
|
489
|
-
truncated := context[:keepFromStart] +
|
|
490
|
-
"\n\n[... context truncated due to token limit ...]\n\n" +
|
|
491
|
-
context[len(context)-keepFromEnd:]
|
|
492
|
-
|
|
493
484
|
cr.obs.Debug("overflow", "Truncated context: %d -> %d chars", len(context), len(truncated))
|
|
494
485
|
return truncated, nil
|
|
495
486
|
}
|
|
@@ -536,8 +527,13 @@ func (cr *contextReducer) reduceByChunkedExtraction(query string, chunks []strin
|
|
|
536
527
|
}
|
|
537
528
|
|
|
538
529
|
cr.rlm.stats.LlmCalls++
|
|
539
|
-
if
|
|
540
|
-
|
|
530
|
+
if result.Usage != nil {
|
|
531
|
+
cr.rlm.stats.PromptTokens += result.Usage.PromptTokens
|
|
532
|
+
cr.rlm.stats.CompletionTokens += result.Usage.CompletionTokens
|
|
533
|
+
cr.rlm.stats.TotalTokens += result.Usage.TotalTokens
|
|
534
|
+
}
|
|
535
|
+
if strings.TrimSpace(result.Content) != "NO_RELEVANT_CONTENT" {
|
|
536
|
+
results[idx] = result.Content
|
|
541
537
|
}
|
|
542
538
|
}(i, chunk)
|
|
543
539
|
}
|
|
@@ -606,7 +602,7 @@ func (cr *contextReducer) reduceByRefine(query string, chunks []string, modelLim
|
|
|
606
602
|
{Role: "user", Content: initialPrompt},
|
|
607
603
|
}
|
|
608
604
|
|
|
609
|
-
|
|
605
|
+
initialResult, err := CallChatCompletion(ChatRequest{
|
|
610
606
|
Model: cr.rlm.model,
|
|
611
607
|
Messages: messages,
|
|
612
608
|
APIBase: cr.rlm.apiBase,
|
|
@@ -618,6 +614,12 @@ func (cr *contextReducer) reduceByRefine(query string, chunks []string, modelLim
|
|
|
618
614
|
return "", fmt.Errorf("refine initial chunk: %w", err)
|
|
619
615
|
}
|
|
620
616
|
cr.rlm.stats.LlmCalls++
|
|
617
|
+
if initialResult.Usage != nil {
|
|
618
|
+
cr.rlm.stats.PromptTokens += initialResult.Usage.PromptTokens
|
|
619
|
+
cr.rlm.stats.CompletionTokens += initialResult.Usage.CompletionTokens
|
|
620
|
+
cr.rlm.stats.TotalTokens += initialResult.Usage.TotalTokens
|
|
621
|
+
}
|
|
622
|
+
currentAnswer := initialResult.Content
|
|
621
623
|
cr.obs.Debug("overflow", "Refine: initial answer from chunk 1/%d (%d chars)", len(chunks), len(currentAnswer))
|
|
622
624
|
|
|
623
625
|
// Phase 2: Refine the answer with each subsequent chunk
|
|
@@ -638,7 +640,7 @@ func (cr *contextReducer) reduceByRefine(query string, chunks []string, modelLim
|
|
|
638
640
|
{Role: "user", Content: refinePrompt},
|
|
639
641
|
}
|
|
640
642
|
|
|
641
|
-
|
|
643
|
+
refineResult, err := CallChatCompletion(ChatRequest{
|
|
642
644
|
Model: cr.rlm.model,
|
|
643
645
|
Messages: messages,
|
|
644
646
|
APIBase: cr.rlm.apiBase,
|
|
@@ -652,7 +654,12 @@ func (cr *contextReducer) reduceByRefine(query string, chunks []string, modelLim
|
|
|
652
654
|
continue
|
|
653
655
|
}
|
|
654
656
|
cr.rlm.stats.LlmCalls++
|
|
655
|
-
|
|
657
|
+
if refineResult.Usage != nil {
|
|
658
|
+
cr.rlm.stats.PromptTokens += refineResult.Usage.PromptTokens
|
|
659
|
+
cr.rlm.stats.CompletionTokens += refineResult.Usage.CompletionTokens
|
|
660
|
+
cr.rlm.stats.TotalTokens += refineResult.Usage.TotalTokens
|
|
661
|
+
}
|
|
662
|
+
currentAnswer = refineResult.Content
|
|
656
663
|
cr.obs.Debug("overflow", "Refine: incorporated chunk %d/%d (%d chars)", i+1, len(chunks), len(currentAnswer))
|
|
657
664
|
}
|
|
658
665
|
|
|
@@ -700,4 +707,3 @@ func (cr *contextReducer) reduceByTextRank(context string, modelLimit int, overh
|
|
|
700
707
|
len(context), len(result), EstimateTokens(context), EstimateTokens(result))
|
|
701
708
|
return result, nil
|
|
702
709
|
}
|
|
703
|
-
|
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
package rlm
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"fmt"
|
|
5
|
+
"strings"
|
|
6
|
+
"testing"
|
|
7
|
+
"time"
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
func deterministicSentence(idx int) string {
|
|
11
|
+
topics := []string{
|
|
12
|
+
"architecture", "testing", "performance", "reliability", "observability",
|
|
13
|
+
"security", "scalability", "maintainability", "usability", "automation",
|
|
14
|
+
}
|
|
15
|
+
details := []string{
|
|
16
|
+
"input validation", "error handling", "resource limits", "data flow", "boundary conditions",
|
|
17
|
+
"traceability", "deployment safety", "schema consistency", "latency targets", "integration behavior",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
topic := topics[idx%len(topics)]
|
|
21
|
+
detail := details[(idx*7)%len(details)]
|
|
22
|
+
return fmt.Sprintf("Sentence %d discusses topic %s with details about %s. ", idx, topic, detail)
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
func generateDeterministicContext(targetTokens int) string {
|
|
26
|
+
if targetTokens <= 0 {
|
|
27
|
+
return ""
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
var b strings.Builder
|
|
31
|
+
total := 0
|
|
32
|
+
for i := 1; total < targetTokens; i++ {
|
|
33
|
+
s := deterministicSentence(i)
|
|
34
|
+
b.WriteString(s)
|
|
35
|
+
total += EstimateTokens(s)
|
|
36
|
+
}
|
|
37
|
+
return b.String()
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
func fixedEnglishProse500Words() string {
|
|
41
|
+
words := []string{
|
|
42
|
+
"software", "teams", "benefit", "from", "clear", "requirements", "because", "stable", "interfaces", "reduce",
|
|
43
|
+
"rework", "and", "improve", "delivery", "predictability", "when", "engineers", "document", "assumptions", "carefully",
|
|
44
|
+
"review", "cycles", "become", "faster", "while", "quality", "signals", "remain", "visible", "across",
|
|
45
|
+
"planning", "implementation", "testing", "and", "maintenance", "phases", "in", "long", "lived", "systems",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
var b strings.Builder
|
|
49
|
+
for i := 0; i < 500; i++ {
|
|
50
|
+
if i > 0 {
|
|
51
|
+
b.WriteByte(' ')
|
|
52
|
+
}
|
|
53
|
+
w := words[i%len(words)]
|
|
54
|
+
if (i+1)%25 == 0 {
|
|
55
|
+
w += "."
|
|
56
|
+
}
|
|
57
|
+
b.WriteString(w)
|
|
58
|
+
}
|
|
59
|
+
return b.String()
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
func percentDifference(base, compare int) float64 {
|
|
63
|
+
if base == 0 {
|
|
64
|
+
return 0
|
|
65
|
+
}
|
|
66
|
+
return (float64(compare-base) / float64(base)) * 100
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
func percentSavings(original, reduced int) float64 {
|
|
70
|
+
if original <= 0 {
|
|
71
|
+
return 0
|
|
72
|
+
}
|
|
73
|
+
return (float64(original-reduced) / float64(original)) * 100
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
func yesNo(v bool) string {
|
|
77
|
+
if v {
|
|
78
|
+
return "yes"
|
|
79
|
+
}
|
|
80
|
+
return "no"
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
func preservesOriginalSentences(original, reduced string) bool {
|
|
84
|
+
originalSentences := SplitSentences(original)
|
|
85
|
+
if len(originalSentences) == 0 {
|
|
86
|
+
return true
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
origSet := make(map[string]bool, len(originalSentences))
|
|
90
|
+
for _, s := range originalSentences {
|
|
91
|
+
origSet[strings.TrimSpace(s)] = true
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
for _, s := range SplitSentences(reduced) {
|
|
95
|
+
s = strings.TrimSpace(s)
|
|
96
|
+
if s == "" {
|
|
97
|
+
continue
|
|
98
|
+
}
|
|
99
|
+
if strings.Contains(s, "content truncated") {
|
|
100
|
+
continue
|
|
101
|
+
}
|
|
102
|
+
if !origSet[s] {
|
|
103
|
+
return false
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return true
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
func episodeContextCost(episodes []*Episode) int {
|
|
110
|
+
total := 0
|
|
111
|
+
for _, ep := range episodes {
|
|
112
|
+
cost := ep.Tokens
|
|
113
|
+
if ep.Status != EpisodeActive && ep.SummaryTokens > 0 {
|
|
114
|
+
cost = ep.SummaryTokens
|
|
115
|
+
}
|
|
116
|
+
total += cost
|
|
117
|
+
}
|
|
118
|
+
return total
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
func TestContextSavings_TokenizerAccuracy(t *testing.T) {
|
|
122
|
+
useHeuristicTokenizerForTest(t)
|
|
123
|
+
|
|
124
|
+
bpeTokenizer, err := NewTiktokenTokenizer("gpt-4o")
|
|
125
|
+
if err != nil {
|
|
126
|
+
t.Fatalf("failed to create BPE tokenizer: %v", err)
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
goSnippet := `package main
|
|
130
|
+
|
|
131
|
+
import (
|
|
132
|
+
"fmt"
|
|
133
|
+
"strings"
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
func summarize(items []string) map[string]int {
|
|
137
|
+
result := map[string]int{}
|
|
138
|
+
for _, item := range items {
|
|
139
|
+
normalized := strings.TrimSpace(strings.ToLower(item))
|
|
140
|
+
if normalized == "" {
|
|
141
|
+
continue
|
|
142
|
+
}
|
|
143
|
+
result[normalized]++
|
|
144
|
+
}
|
|
145
|
+
return result
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
func main() {
|
|
149
|
+
data := []string{"alpha", "beta", "alpha", "gamma", "beta", "alpha"}
|
|
150
|
+
stats := summarize(data)
|
|
151
|
+
fmt.Println("stats:", stats)
|
|
152
|
+
}
|
|
153
|
+
`
|
|
154
|
+
|
|
155
|
+
jsonData := `{
|
|
156
|
+
"project": "recursive-llm-ts",
|
|
157
|
+
"version": "1.0.0",
|
|
158
|
+
"features": {
|
|
159
|
+
"lcm": true,
|
|
160
|
+
"observability": true,
|
|
161
|
+
"context_overflow": {
|
|
162
|
+
"enabled": true,
|
|
163
|
+
"strategy": "tfidf",
|
|
164
|
+
"max_reduction_attempts": 3
|
|
165
|
+
}
|
|
166
|
+
},
|
|
167
|
+
"items": [
|
|
168
|
+
{"id": 1, "name": "alpha", "priority": "high"},
|
|
169
|
+
{"id": 2, "name": "beta", "priority": "medium"},
|
|
170
|
+
{"id": 3, "name": "gamma", "priority": "low"}
|
|
171
|
+
]
|
|
172
|
+
}`
|
|
173
|
+
|
|
174
|
+
cjkText := "这是一个固定的中文测试句子,用于衡量分词稳定性。日本語の固定テスト文を使ってトークン数を比較します。고정된 한국어 문장으로 토큰 계산 결과를 확인합니다。"
|
|
175
|
+
|
|
176
|
+
testCases := []struct {
|
|
177
|
+
name string
|
|
178
|
+
content string
|
|
179
|
+
}{
|
|
180
|
+
{name: "english_prose", content: fixedEnglishProse500Words()},
|
|
181
|
+
{name: "go_code", content: goSnippet},
|
|
182
|
+
{name: "json", content: jsonData},
|
|
183
|
+
{name: "cjk", content: cjkText},
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
t.Logf("Tokenizer accuracy comparison (heuristic default + direct BPE)")
|
|
187
|
+
for _, tc := range testCases {
|
|
188
|
+
heuristic := EstimateTokens(tc.content)
|
|
189
|
+
bpe := bpeTokenizer.CountTokens(tc.content)
|
|
190
|
+
chars := len([]rune(tc.content))
|
|
191
|
+
diffPct := percentDifference(bpe, heuristic)
|
|
192
|
+
|
|
193
|
+
t.Logf("type=%-14s chars=%5d heuristic=%5d bpe=%5d diff=%7.2f%%", tc.name, chars, heuristic, bpe, diffPct)
|
|
194
|
+
|
|
195
|
+
if heuristic <= 0 {
|
|
196
|
+
t.Fatalf("heuristic token count should be > 0 for %s", tc.name)
|
|
197
|
+
}
|
|
198
|
+
if bpe <= 0 {
|
|
199
|
+
t.Fatalf("BPE token count should be > 0 for %s", tc.name)
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
func TestContextSavings_FiveLevelEscalation(t *testing.T) {
|
|
205
|
+
useHeuristicTokenizerForTest(t)
|
|
206
|
+
|
|
207
|
+
original := generateDeterministicContext(5000)
|
|
208
|
+
originalTokens := EstimateTokens(original)
|
|
209
|
+
|
|
210
|
+
level3 := CompressContextTFIDF(original, 2000)
|
|
211
|
+
level4 := CompressContextTextRank(original, 2000)
|
|
212
|
+
level5 := TruncateText(original, TruncateTextParams{MaxTokens: 2000})
|
|
213
|
+
|
|
214
|
+
level3Tokens := EstimateTokens(level3)
|
|
215
|
+
level4Tokens := EstimateTokens(level4)
|
|
216
|
+
level5Tokens := EstimateTokens(level5)
|
|
217
|
+
|
|
218
|
+
t.Logf("Five-level non-LLM escalation comparison")
|
|
219
|
+
t.Logf("original_tokens=%d", originalTokens)
|
|
220
|
+
t.Logf("level=3 strategy=tfidf tokens=%d reduction=%6.2f%% sentence_preserved=%s", level3Tokens, percentSavings(originalTokens, level3Tokens), yesNo(preservesOriginalSentences(original, level3)))
|
|
221
|
+
t.Logf("level=4 strategy=textrank tokens=%d reduction=%6.2f%% sentence_preserved=%s", level4Tokens, percentSavings(originalTokens, level4Tokens), yesNo(preservesOriginalSentences(original, level4)))
|
|
222
|
+
t.Logf("level=5 strategy=truncate tokens=%d reduction=%6.2f%% sentence_preserved=%s", level5Tokens, percentSavings(originalTokens, level5Tokens), yesNo(preservesOriginalSentences(original, level5)))
|
|
223
|
+
|
|
224
|
+
if level3Tokens >= originalTokens {
|
|
225
|
+
t.Fatalf("expected TF-IDF to reduce tokens: original=%d level3=%d", originalTokens, level3Tokens)
|
|
226
|
+
}
|
|
227
|
+
if level4Tokens >= originalTokens {
|
|
228
|
+
t.Fatalf("expected TextRank to reduce tokens: original=%d level4=%d", originalTokens, level4Tokens)
|
|
229
|
+
}
|
|
230
|
+
if level5Tokens >= originalTokens {
|
|
231
|
+
t.Fatalf("expected Truncate to reduce tokens: original=%d level5=%d", originalTokens, level5Tokens)
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
func TestContextSavings_EpisodicMemoryBudget(t *testing.T) {
|
|
236
|
+
useHeuristicTokenizerForTest(t)
|
|
237
|
+
|
|
238
|
+
manager := NewEpisodeManager("ctx-savings-episodes", EpisodeConfig{
|
|
239
|
+
MaxEpisodeMessages: 5,
|
|
240
|
+
MaxEpisodeTokens: 500,
|
|
241
|
+
TopicChangeThreshold: 0.5,
|
|
242
|
+
AutoCompactAfterClose: false,
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
baseTime := time.Date(2024, 1, 2, 3, 4, 5, 0, time.UTC)
|
|
246
|
+
rawTokens := 0
|
|
247
|
+
for i := 0; i < 50; i++ {
|
|
248
|
+
content := fmt.Sprintf("Message %d. %s", i+1, generateDeterministicContext(100))
|
|
249
|
+
tokens := EstimateTokens(content)
|
|
250
|
+
rawTokens += tokens
|
|
251
|
+
|
|
252
|
+
manager.AddMessage(&StoreMessage{
|
|
253
|
+
ID: fmt.Sprintf("msg_%03d", i+1),
|
|
254
|
+
Role: RoleUser,
|
|
255
|
+
Content: content,
|
|
256
|
+
Tokens: tokens,
|
|
257
|
+
Timestamp: baseTime.Add(time.Duration(i) * time.Minute),
|
|
258
|
+
})
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
episodes := manager.GetAllEpisodes()
|
|
262
|
+
t.Logf("episodes_created=%d (expected around 10)", len(episodes))
|
|
263
|
+
if len(episodes) < 9 || len(episodes) > 11 {
|
|
264
|
+
t.Fatalf("expected around 10 episodes, got %d", len(episodes))
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
for i := 0; i < len(episodes)-1; i++ {
|
|
268
|
+
summary := fmt.Sprintf("Episode %d summary. %s", i+1, generateDeterministicContext(30))
|
|
269
|
+
if err := manager.CompactEpisode(episodes[i].ID, summary); err != nil {
|
|
270
|
+
t.Fatalf("failed to compact episode %s: %v", episodes[i].ID, err)
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
budgets := []int{200, 500, 1000, 2000}
|
|
275
|
+
for _, budget := range budgets {
|
|
276
|
+
selected := manager.GetEpisodesForContext(budget)
|
|
277
|
+
contextTokens := episodeContextCost(selected)
|
|
278
|
+
savings := percentSavings(rawTokens, contextTokens)
|
|
279
|
+
t.Logf("budget=%4d episodes=%2d context_tokens=%5d raw_tokens=%5d savings=%6.2f%%", budget, len(selected), contextTokens, rawTokens, savings)
|
|
280
|
+
|
|
281
|
+
if len(selected) == 0 {
|
|
282
|
+
t.Fatalf("expected at least one episode for budget %d", budget)
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
func TestContextSavings_AllStrategiesComparison(t *testing.T) {
|
|
288
|
+
useHeuristicTokenizerForTest(t)
|
|
289
|
+
|
|
290
|
+
original := generateDeterministicContext(35000)
|
|
291
|
+
originalTokens := EstimateTokens(original)
|
|
292
|
+
target := 16000
|
|
293
|
+
|
|
294
|
+
tfidf := CompressContextTFIDF(original, target)
|
|
295
|
+
textrank := CompressContextTextRank(original, target)
|
|
296
|
+
truncated := TruncateText(original, TruncateTextParams{MaxTokens: target})
|
|
297
|
+
|
|
298
|
+
results := []struct {
|
|
299
|
+
strategy string
|
|
300
|
+
content string
|
|
301
|
+
tokens int
|
|
302
|
+
preserved bool
|
|
303
|
+
}{
|
|
304
|
+
{strategy: "TF-IDF", content: tfidf, tokens: EstimateTokens(tfidf), preserved: preservesOriginalSentences(original, tfidf)},
|
|
305
|
+
{strategy: "TextRank", content: textrank, tokens: EstimateTokens(textrank), preserved: preservesOriginalSentences(original, textrank)},
|
|
306
|
+
{strategy: "Truncate", content: truncated, tokens: EstimateTokens(truncated), preserved: preservesOriginalSentences(original, truncated)},
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
t.Logf("strategy comparison for target=%d tokens (original=%d)", target, originalTokens)
|
|
310
|
+
t.Logf("strategy output_tokens reduction%% sentence_preserved")
|
|
311
|
+
for _, r := range results {
|
|
312
|
+
t.Logf("%-9s %12d %9.2f%% %s", r.strategy, r.tokens, percentSavings(originalTokens, r.tokens), yesNo(r.preserved))
|
|
313
|
+
if r.tokens >= originalTokens {
|
|
314
|
+
t.Fatalf("strategy %s did not reduce tokens: original=%d output=%d", r.strategy, originalTokens, r.tokens)
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
func TestContextSavings_CombinedPipeline(t *testing.T) {
|
|
320
|
+
useHeuristicTokenizerForTest(t)
|
|
321
|
+
|
|
322
|
+
manager := NewEpisodeManager("ctx-savings-pipeline", EpisodeConfig{
|
|
323
|
+
MaxEpisodeMessages: 10,
|
|
324
|
+
MaxEpisodeTokens: 1000000,
|
|
325
|
+
TopicChangeThreshold: 0.5,
|
|
326
|
+
AutoCompactAfterClose: false,
|
|
327
|
+
})
|
|
328
|
+
|
|
329
|
+
baseTime := time.Date(2024, 5, 10, 9, 30, 0, 0, time.UTC)
|
|
330
|
+
messageContentByID := make(map[string]string)
|
|
331
|
+
rawTokens := 0
|
|
332
|
+
|
|
333
|
+
for i := 0; i < 100; i++ {
|
|
334
|
+
id := fmt.Sprintf("pipeline_msg_%03d", i+1)
|
|
335
|
+
content := fmt.Sprintf("Message %d segment. %s", i+1, generateDeterministicContext(500))
|
|
336
|
+
tokens := EstimateTokens(content)
|
|
337
|
+
rawTokens += tokens
|
|
338
|
+
messageContentByID[id] = content
|
|
339
|
+
|
|
340
|
+
manager.AddMessage(&StoreMessage{
|
|
341
|
+
ID: id,
|
|
342
|
+
Role: RoleUser,
|
|
343
|
+
Content: content,
|
|
344
|
+
Tokens: tokens,
|
|
345
|
+
Timestamp: baseTime.Add(time.Duration(i) * time.Minute),
|
|
346
|
+
})
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
episodes := manager.GetAllEpisodes()
|
|
350
|
+
if len(episodes) != 10 {
|
|
351
|
+
t.Fatalf("expected 10 episodes from 100 messages with MaxEpisodeMessages=10, got %d", len(episodes))
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
afterGrouping := episodeContextCost(episodes)
|
|
355
|
+
|
|
356
|
+
for i := 0; i < len(episodes)-1; i++ {
|
|
357
|
+
ep := episodes[i]
|
|
358
|
+
var b strings.Builder
|
|
359
|
+
for _, msgID := range ep.MessageIDs {
|
|
360
|
+
b.WriteString(messageContentByID[msgID])
|
|
361
|
+
b.WriteString("\n")
|
|
362
|
+
}
|
|
363
|
+
summary := CompressContextTFIDF(b.String(), 300)
|
|
364
|
+
if err := manager.CompactEpisode(ep.ID, summary); err != nil {
|
|
365
|
+
t.Fatalf("failed to compact episode %s: %v", ep.ID, err)
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
afterCompaction := episodeContextCost(manager.GetAllEpisodes())
|
|
370
|
+
selected := manager.GetEpisodesForContext(8000)
|
|
371
|
+
afterBudgetSelection := episodeContextCost(selected)
|
|
372
|
+
totalSavings := percentSavings(rawTokens, afterBudgetSelection)
|
|
373
|
+
|
|
374
|
+
t.Logf("Combined pipeline results")
|
|
375
|
+
t.Logf("original_total_tokens=%d", rawTokens)
|
|
376
|
+
t.Logf("after_episodic_grouping=%d", afterGrouping)
|
|
377
|
+
t.Logf("after_compaction=%d", afterCompaction)
|
|
378
|
+
t.Logf("after_budget_selection=%d", afterBudgetSelection)
|
|
379
|
+
t.Logf("total_savings=%6.2f%%", totalSavings)
|
|
380
|
+
|
|
381
|
+
if afterCompaction >= afterGrouping {
|
|
382
|
+
t.Fatalf("expected compaction to reduce context tokens: grouped=%d compacted=%d", afterGrouping, afterCompaction)
|
|
383
|
+
}
|
|
384
|
+
if afterBudgetSelection > 8000 && len(selected) > 0 && selected[0].Status != EpisodeActive {
|
|
385
|
+
t.Fatalf("expected selected context <= budget when active episode is not the reason for overflow: selected=%d budget=8000", afterBudgetSelection)
|
|
386
|
+
}
|
|
387
|
+
}
|
package/go/rlm/doc.go
CHANGED
|
@@ -8,13 +8,13 @@
|
|
|
8
8
|
//
|
|
9
9
|
// To use this package in your Go project:
|
|
10
10
|
//
|
|
11
|
-
// go get github.com/
|
|
11
|
+
// go get github.com/howlerops/recursive-llm-ts/go
|
|
12
12
|
//
|
|
13
13
|
// # Basic Usage
|
|
14
14
|
//
|
|
15
15
|
// Create an RLM engine and execute a completion:
|
|
16
16
|
//
|
|
17
|
-
// import "github.com/
|
|
17
|
+
// import "github.com/howlerops/recursive-llm-ts/go/rlm"
|
|
18
18
|
//
|
|
19
19
|
// config := rlm.Config{
|
|
20
20
|
// MaxDepth: 5,
|