recursive-llm-ts 4.9.0 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -15,6 +15,8 @@ TypeScript/JavaScript package for [Recursive Language Models (RLM)](https://gith
15
15
  **Performance & Resilience**
16
16
  - **Pure Go Backend** - 50x faster startup, 3x less memory vs Python
17
17
  - **Context Overflow Recovery** - Automatic detection and 6 reduction strategies (mapreduce, truncate, chunked, tfidf, textrank, refine)
18
+ - **Lossless Context Management** - Episodic memory, 5-level summarization escalation, SQLite persistence with FTS5 search
19
+ - **BPE Token Counting** - Accurate model-specific tokenization via tiktoken (o200k, cl100k) with cached counting
18
20
  - **Caching** - Exact-match caching with in-memory and file-based backends
19
21
  - **Retry & Fallback** - Exponential backoff, jitter, and multi-provider fallback chains
20
22
  - **AbortController** - Cancel any operation mid-flight
@@ -32,7 +34,7 @@ TypeScript/JavaScript package for [Recursive Language Models (RLM)](https://gith
32
34
  - **Meta-Agent Mode** - Automatically optimize queries for better results
33
35
  - **Observability** - OpenTelemetry tracing, Langfuse integration, and debug logging
34
36
  - **File Storage** - Process local directories or S3/MinIO/LocalStack buckets as LLM context
35
- - **150+ Tests** - Comprehensive Vitest + Go test suites
37
+ - **200+ Tests** - Comprehensive Vitest + Go test suites
36
38
 
37
39
  ## Installation
38
40
 
package/bin/rlm-go CHANGED
Binary file
@@ -51,6 +51,154 @@ export interface ContextOverflowConfig {
51
51
  /** Maximum reduction attempts before giving up (default: 3) */
52
52
  max_reduction_attempts?: number;
53
53
  }
54
+ export interface LCMConfig {
55
+ /** Enable LCM context management (default: false for backward compat) */
56
+ enabled?: boolean;
57
+ /** Soft token threshold — async compaction begins above this (default: 70% of model limit) */
58
+ soft_threshold?: number;
59
+ /** Hard token threshold — blocking compaction above this (default: 90% of model limit) */
60
+ hard_threshold?: number;
61
+ /** Number of messages to compact at once (default: 10) */
62
+ compaction_block_size?: number;
63
+ /** Target tokens per summary node (default: 500) */
64
+ summary_target_tokens?: number;
65
+ /** Large file handling configuration */
66
+ file_handling?: LCMFileConfig;
67
+ /** Episode-based context grouping configuration */
68
+ episodes?: EpisodeConfig;
69
+ /** Persistence backend configuration (default: in-memory) */
70
+ store_backend?: StoreBackendConfig;
71
+ }
72
+ export interface LCMFileConfig {
73
+ /** Token count above which files are stored externally with exploration summaries (default: 25000) */
74
+ token_threshold?: number;
75
+ }
76
+ export interface EpisodeConfig {
77
+ /** Max tokens before auto-closing an episode (default: 2000) */
78
+ max_episode_tokens?: number;
79
+ /** Max messages before auto-closing an episode (default: 20) */
80
+ max_episode_messages?: number;
81
+ /** Topic change sensitivity 0-1 (reserved for future semantic detection) */
82
+ topic_change_threshold?: number;
83
+ /** Auto-generate summary when episode closes (default: true) */
84
+ auto_compact_after_close?: boolean;
85
+ }
86
+ export interface Episode {
87
+ id: string;
88
+ title: string;
89
+ message_ids: string[];
90
+ start_time: string;
91
+ end_time: string;
92
+ tokens: number;
93
+ summary?: string;
94
+ summary_tokens?: number;
95
+ status: 'active' | 'compacted' | 'archived';
96
+ tags?: string[];
97
+ parent_episode_id?: string;
98
+ }
99
+ export interface StoreBackendConfig {
100
+ /** Backend type: 'memory' (default) or 'sqlite' */
101
+ type?: 'memory' | 'sqlite';
102
+ /** Path for SQLite database file (required when type is 'sqlite', use ':memory:' for in-memory SQLite) */
103
+ path?: string;
104
+ }
105
+ export interface LLMMapConfig {
106
+ /** Path to JSONL input file */
107
+ input_path: string;
108
+ /** Path to JSONL output file */
109
+ output_path: string;
110
+ /** Prompt template — use {{item}} as placeholder for each item */
111
+ prompt: string;
112
+ /** JSON Schema for output validation */
113
+ output_schema?: Record<string, any>;
114
+ /** Worker pool concurrency (default: 16) */
115
+ concurrency?: number;
116
+ /** Per-item retry limit (default: 3) */
117
+ max_retries?: number;
118
+ /** Model to use (defaults to engine model) */
119
+ model?: string;
120
+ }
121
+ export interface LLMMapResult {
122
+ total_items: number;
123
+ completed: number;
124
+ failed: number;
125
+ output_path: string;
126
+ duration_ms: number;
127
+ tokens_used: number;
128
+ }
129
+ export interface AgenticMapConfig {
130
+ /** Path to JSONL input file */
131
+ input_path: string;
132
+ /** Path to JSONL output file */
133
+ output_path: string;
134
+ /** Prompt template — use {{item}} as placeholder for each item */
135
+ prompt: string;
136
+ /** JSON Schema for output validation */
137
+ output_schema?: Record<string, any>;
138
+ /** Worker pool concurrency (default: 8) */
139
+ concurrency?: number;
140
+ /** Per-item retry limit (default: 2) */
141
+ max_retries?: number;
142
+ /** Model for sub-agents (defaults to engine model) */
143
+ model?: string;
144
+ /** If true, sub-agents cannot modify filesystem */
145
+ read_only?: boolean;
146
+ /** Max recursion depth for sub-agents (default: 3) */
147
+ max_depth?: number;
148
+ /** Max iterations per sub-agent (default: 15) */
149
+ max_iterations?: number;
150
+ }
151
+ export interface AgenticMapResult {
152
+ total_items: number;
153
+ completed: number;
154
+ failed: number;
155
+ output_path: string;
156
+ duration_ms: number;
157
+ tokens_used: number;
158
+ }
159
+ export interface DelegationRequest {
160
+ /** Task description for the sub-agent */
161
+ prompt: string;
162
+ /** Specific slice of work being handed off (required for non-root) */
163
+ delegated_scope?: string;
164
+ /** Work the caller retains (required for non-root) */
165
+ kept_work?: string;
166
+ /** Read-only exploration agent (exempt from guard) */
167
+ read_only?: boolean;
168
+ /** Parallel decomposition (exempt from guard) */
169
+ parallel?: boolean;
170
+ }
171
+ export interface LCMStoreStats {
172
+ total_messages: number;
173
+ total_summaries: number;
174
+ active_context_items: number;
175
+ active_context_tokens: number;
176
+ immutable_store_tokens: number;
177
+ compression_ratio: number;
178
+ }
179
+ export interface LCMGrepResult {
180
+ message_id: string;
181
+ role: string;
182
+ content: string;
183
+ summary_id?: string;
184
+ match_line: string;
185
+ }
186
+ export interface LCMDescribeResult {
187
+ type: 'message' | 'summary';
188
+ id: string;
189
+ tokens: number;
190
+ role?: string;
191
+ kind?: 'leaf' | 'condensed';
192
+ level?: number;
193
+ covered_ids?: string[];
194
+ file_ids?: string[];
195
+ content?: string;
196
+ }
197
+ export interface EpisodeListResult {
198
+ episodes: Episode[];
199
+ active_episode_id?: string;
200
+ total_episodes: number;
201
+ }
54
202
  export interface RLMConfig {
55
203
  recursive_model?: string;
56
204
  api_base?: string;
@@ -62,6 +210,7 @@ export interface RLMConfig {
62
210
  meta_agent?: MetaAgentConfig;
63
211
  observability?: ObservabilityConfig;
64
212
  context_overflow?: ContextOverflowConfig;
213
+ lcm?: LCMConfig;
65
214
  debug?: boolean;
66
215
  api_version?: string;
67
216
  timeout?: number;
@@ -14,7 +14,9 @@ type requestPayload struct {
14
14
  Query string `json:"query"`
15
15
  Context string `json:"context"`
16
16
  Config map[string]interface{} `json:"config"`
17
- Structured *structuredRequest `json:"structured,omitempty"`
17
+ Structured *structuredRequest `json:"structured,omitempty"`
18
+ LLMMap *rlm.LLMMapConfig `json:"llm_map,omitempty"` // LCM LLM-Map operation
19
+ AgenticMap *rlm.AgenticMapConfig `json:"agentic_map,omitempty"` // LCM Agentic-Map operation
18
20
  }
19
21
 
20
22
  type structuredRequest struct {
@@ -24,10 +26,13 @@ type structuredRequest struct {
24
26
  }
25
27
 
26
28
  type responsePayload struct {
27
- Result interface{} `json:"result"`
28
- Stats rlm.RLMStats `json:"stats"`
29
- StructuredResult bool `json:"structured_result,omitempty"`
30
- TraceEvents interface{} `json:"trace_events,omitempty"`
29
+ Result interface{} `json:"result"`
30
+ Stats rlm.RLMStats `json:"stats"`
31
+ StructuredResult bool `json:"structured_result,omitempty"`
32
+ TraceEvents interface{} `json:"trace_events,omitempty"`
33
+ LCMStats *rlm.LCMStoreStats `json:"lcm_stats,omitempty"`
34
+ LLMMapResult *rlm.LLMMapResult `json:"llm_map_result,omitempty"`
35
+ AgenticMapResult *rlm.AgenticMapResult `json:"agentic_map_result,omitempty"`
31
36
  }
32
37
 
33
38
  func main() {
@@ -54,8 +59,30 @@ func main() {
54
59
 
55
60
  var resp responsePayload
56
61
 
62
+ // Handle LLM-Map operation if requested
63
+ if req.LLMMap != nil {
64
+ mapResult, err := engine.LLMMap(*req.LLMMap)
65
+ if err != nil {
66
+ fmt.Fprintln(os.Stderr, err)
67
+ os.Exit(1)
68
+ }
69
+ resp = responsePayload{
70
+ Result: "llm_map_complete",
71
+ LLMMapResult: mapResult,
72
+ }
73
+ } else if req.AgenticMap != nil {
74
+ // Handle Agentic-Map operation
75
+ agenticResult, err := engine.AgenticMap(*req.AgenticMap)
76
+ if err != nil {
77
+ fmt.Fprintln(os.Stderr, err)
78
+ os.Exit(1)
79
+ }
80
+ resp = responsePayload{
81
+ Result: "agentic_map_complete",
82
+ AgenticMapResult: agenticResult,
83
+ }
84
+ } else if req.Structured != nil {
57
85
  // Handle structured completion if requested
58
- if req.Structured != nil {
59
86
  structuredConfig := &rlm.StructuredConfig{
60
87
  Schema: req.Structured.Schema,
61
88
  ParallelExecution: req.Structured.ParallelExecution,
@@ -96,6 +123,12 @@ func main() {
96
123
  }
97
124
  }
98
125
 
126
+ // Include LCM stats if enabled
127
+ if lcmEngine := engine.GetLCMEngine(); lcmEngine != nil {
128
+ stats := lcmEngine.GetStore().Stats()
129
+ resp.LCMStats = &stats
130
+ }
131
+
99
132
  payload, err := json.Marshal(resp)
100
133
  if err != nil {
101
134
  fmt.Fprintln(os.Stderr, "Failed to encode response JSON:", err)
package/go/go.mod CHANGED
@@ -5,24 +5,34 @@ go 1.25.0
5
5
  toolchain go1.25.1
6
6
 
7
7
  require (
8
+ github.com/cespare/xxhash/v2 v2.3.0
8
9
  github.com/dop251/goja v0.0.0-20231027120936-b396bb4c349d
9
10
  github.com/google/jsonschema-go v0.4.2
11
+ github.com/pkoukk/tiktoken-go v0.1.8
10
12
  go.opentelemetry.io/otel v1.40.0
11
13
  go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.40.0
12
14
  go.opentelemetry.io/otel/sdk v1.40.0
13
15
  go.opentelemetry.io/otel/trace v1.40.0
16
+ modernc.org/sqlite v1.46.1
14
17
  )
15
18
 
16
19
  require (
17
- github.com/cespare/xxhash/v2 v2.3.0 // indirect
18
- github.com/dlclark/regexp2 v1.7.0 // indirect
20
+ github.com/dlclark/regexp2 v1.10.0 // indirect
21
+ github.com/dustin/go-humanize v1.0.1 // indirect
19
22
  github.com/go-logr/logr v1.4.3 // indirect
20
23
  github.com/go-logr/stdr v1.2.2 // indirect
21
24
  github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect
22
- github.com/google/pprof v0.0.0-20230207041349-798e818bf904 // indirect
25
+ github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect
23
26
  github.com/google/uuid v1.6.0 // indirect
27
+ github.com/mattn/go-isatty v0.0.20 // indirect
28
+ github.com/ncruces/go-strftime v1.0.0 // indirect
29
+ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
24
30
  go.opentelemetry.io/auto/sdk v1.2.1 // indirect
25
31
  go.opentelemetry.io/otel/metric v1.40.0 // indirect
32
+ golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect
26
33
  golang.org/x/sys v0.40.0 // indirect
27
34
  golang.org/x/text v0.3.8 // indirect
35
+ modernc.org/libc v1.67.6 // indirect
36
+ modernc.org/mathutil v1.7.1 // indirect
37
+ modernc.org/memory v1.11.0 // indirect
28
38
  )
package/go/go.sum CHANGED
@@ -7,13 +7,16 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
7
7
  github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
8
8
  github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
9
9
  github.com/dlclark/regexp2 v1.4.1-0.20201116162257-a2a8dda75c91/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
10
- github.com/dlclark/regexp2 v1.7.0 h1:7lJfhqlPssTb1WQx4yvTHN0uElPEv52sbaECrAQxjAo=
11
10
  github.com/dlclark/regexp2 v1.7.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
11
+ github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
12
+ github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
12
13
  github.com/dop251/goja v0.0.0-20211022113120-dc8c55024d06/go.mod h1:R9ET47fwRVRPZnOGvHxxhuZcbrMCuiqOz3Rlrh4KSnk=
13
14
  github.com/dop251/goja v0.0.0-20231027120936-b396bb4c349d h1:wi6jN5LVt/ljaBG4ue79Ekzb12QfJ52L9Q98tl8SWhw=
14
15
  github.com/dop251/goja v0.0.0-20231027120936-b396bb4c349d/go.mod h1:QMWlm50DNe14hD7t24KEqZuUdC9sOTy8W6XbCU1mlw4=
15
16
  github.com/dop251/goja_nodejs v0.0.0-20210225215109-d91c329300e7/go.mod h1:hn7BA7c8pLvoGndExHudxTDKZ84Pyvv+90pbBjbTz0Y=
16
17
  github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d/go.mod h1:DngW8aVqWbuLRMHItjPUyqdj+HWPvnQe8V8y1nDpIbM=
18
+ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
19
+ github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
17
20
  github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
18
21
  github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
19
22
  github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
@@ -25,10 +28,13 @@ github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
25
28
  github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
26
29
  github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8=
27
30
  github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
28
- github.com/google/pprof v0.0.0-20230207041349-798e818bf904 h1:4/hN5RUoecvl+RmJRE2YxKWtnnQls6rQjjW5oV7qg2U=
29
31
  github.com/google/pprof v0.0.0-20230207041349-798e818bf904/go.mod h1:uglQLonpP8qtYCYyzA+8c/9qtqgA3qsXGYqCPKARAFg=
32
+ github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
33
+ github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
30
34
  github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
31
35
  github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
36
+ github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
37
+ github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
32
38
  github.com/ianlancetaylor/demangle v0.0.0-20220319035150-800ac71e25c2/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
33
39
  github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
34
40
  github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
@@ -36,8 +42,16 @@ github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NB
36
42
  github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
37
43
  github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
38
44
  github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
45
+ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
46
+ github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
47
+ github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
48
+ github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
49
+ github.com/pkoukk/tiktoken-go v0.1.8 h1:85ENo+3FpWgAACBaEUVp+lctuTcYUO7BtmfhlN/QTRo=
50
+ github.com/pkoukk/tiktoken-go v0.1.8/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
39
51
  github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
40
52
  github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
53
+ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
54
+ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
41
55
  github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
42
56
  github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
43
57
  github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
@@ -60,18 +74,25 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
60
74
  go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
61
75
  golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
62
76
  golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
77
+ golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY=
78
+ golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70=
63
79
  golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
80
+ golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
81
+ golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
64
82
  golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
65
83
  golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
66
84
  golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
67
85
  golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
68
86
  golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
87
+ golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
88
+ golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
69
89
  golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
70
90
  golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
71
91
  golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
72
92
  golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
73
93
  golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
74
94
  golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
95
+ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
75
96
  golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
76
97
  golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
77
98
  golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
@@ -85,6 +106,8 @@ golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
85
106
  golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
86
107
  golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
87
108
  golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
109
+ golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
110
+ golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
88
111
  golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
89
112
  gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
90
113
  gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@@ -94,3 +117,31 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
94
117
  gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
95
118
  gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
96
119
  gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
120
+ modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis=
121
+ modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
122
+ modernc.org/ccgo/v4 v4.30.1 h1:4r4U1J6Fhj98NKfSjnPUN7Ze2c6MnAdL0hWw6+LrJpc=
123
+ modernc.org/ccgo/v4 v4.30.1/go.mod h1:bIOeI1JL54Utlxn+LwrFyjCx2n2RDiYEaJVSrgdrRfM=
124
+ modernc.org/fileutil v1.3.40 h1:ZGMswMNc9JOCrcrakF1HrvmergNLAmxOPjizirpfqBA=
125
+ modernc.org/fileutil v1.3.40/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc=
126
+ modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
127
+ modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
128
+ modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE=
129
+ modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
130
+ modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
131
+ modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
132
+ modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI=
133
+ modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE=
134
+ modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
135
+ modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
136
+ modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
137
+ modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
138
+ modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
139
+ modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
140
+ modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
141
+ modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
142
+ modernc.org/sqlite v1.46.1 h1:eFJ2ShBLIEnUWlLy12raN0Z1plqmFX9Qe3rjQTKt6sU=
143
+ modernc.org/sqlite v1.46.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA=
144
+ modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
145
+ modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
146
+ modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
147
+ modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
@@ -0,0 +1,59 @@
1
+ package rlm
2
+
3
+ // ─── Shared Text Compression Utilities ──────────────────────────────────────
4
+ // Consolidated from context_overflow.go and lcm_summarizer.go to eliminate
5
+ // duplication of the "keep start + end, truncate middle" strategy.
6
+
7
+ // TruncateTextParams configures deterministic text truncation.
8
+ type TruncateTextParams struct {
9
+ // MaxTokens is the target token count.
10
+ MaxTokens int
11
+ // MarkerText is inserted at the truncation point.
12
+ // Default: "\n[... content truncated ...]\n"
13
+ MarkerText string
14
+ // StartFraction is the fraction of budget kept from the start (default: 2/3).
15
+ StartFraction float64
16
+ }
17
+
18
+ // TruncateText performs deterministic truncation using the "keep start + end"
19
+ // strategy, which addresses the "lost in the middle" problem by preserving
20
+ // both the beginning and end of the text.
21
+ //
22
+ // This is used as the guaranteed-convergence fallback in both:
23
+ // - LCM's three-level escalation (Level 3)
24
+ // - Context overflow's truncation strategy
25
+ func TruncateText(text string, params TruncateTextParams) string {
26
+ // Apply defaults
27
+ if params.StartFraction <= 0 || params.StartFraction >= 1 {
28
+ params.StartFraction = 2.0 / 3.0
29
+ }
30
+ if params.MarkerText == "" {
31
+ params.MarkerText = "\n[... content truncated ...]\n"
32
+ }
33
+
34
+ // Conservative chars-to-tokens ratio: 3 chars per token
35
+ maxChars := params.MaxTokens * 3
36
+ if len(text) <= maxChars {
37
+ return text
38
+ }
39
+
40
+ startChars := int(float64(maxChars) * params.StartFraction)
41
+ endChars := maxChars - startChars
42
+
43
+ // Guard against edge cases
44
+ if startChars > len(text) {
45
+ startChars = len(text)
46
+ }
47
+ if endChars > len(text)-startChars {
48
+ endChars = len(text) - startChars
49
+ }
50
+ if endChars < 0 {
51
+ endChars = 0
52
+ }
53
+
54
+ if endChars == 0 {
55
+ return text[:startChars] + params.MarkerText
56
+ }
57
+
58
+ return text[:startChars] + params.MarkerText + text[len(text)-endChars:]
59
+ }
@@ -48,18 +48,18 @@ var modelTokenLimits = map[string]int{
48
48
  "o1-preview": 128000,
49
49
  "o3-mini": 200000,
50
50
  // Anthropic (via LiteLLM/proxy)
51
- "claude-3-opus": 200000,
52
- "claude-3-sonnet": 200000,
53
- "claude-3-haiku": 200000,
54
- "claude-3.5-sonnet": 200000,
55
- "claude-3.5-haiku": 200000,
56
- "claude-sonnet-4": 200000,
57
- "claude-opus-4": 200000,
51
+ "claude-3-opus": 200000,
52
+ "claude-3-sonnet": 200000,
53
+ "claude-3-haiku": 200000,
54
+ "claude-3.5-sonnet": 200000,
55
+ "claude-3.5-haiku": 200000,
56
+ "claude-sonnet-4": 200000,
57
+ "claude-opus-4": 200000,
58
58
  // Llama (common vLLM deployments)
59
- "llama-3": 8192,
60
- "llama-3.1": 128000,
61
- "llama-3.2": 128000,
62
- "llama-3.3": 128000,
59
+ "llama-3": 8192,
60
+ "llama-3.1": 128000,
61
+ "llama-3.2": 128000,
62
+ "llama-3.3": 128000,
63
63
  // Mistral
64
64
  "mistral-7b": 32768,
65
65
  "mixtral-8x7b": 32768,
@@ -181,21 +181,11 @@ func (r *RLM) getResponseTokenBudget() int {
181
181
 
182
182
  // ─── Token Estimation ────────────────────────────────────────────────────────
183
183
 
184
- // EstimateTokens provides a fast approximation of token count for a string.
185
- // Uses a character-to-token ratio heuristic. This is intentionally conservative
186
- // (over-estimates slightly) to avoid overflow.
187
- //
188
- // Approximate ratios for common encodings:
189
- // - English text: ~4 chars/token (cl100k_base)
190
- // - JSON/code: ~3.5 chars/token
191
- // - CJK text: ~1.5 chars/token
192
- // - Mixed: ~3.5 chars/token (safe default)
184
+ // EstimateTokens returns the token count for a string using the global tokenizer.
185
+ // When SetDefaultTokenizer has been called with a model name, this uses accurate
186
+ // BPE tokenization via tiktoken. Otherwise falls back to a ~3.5 chars/token heuristic.
193
187
  func EstimateTokens(text string) int {
194
- if len(text) == 0 {
195
- return 0
196
- }
197
- // Use 3.5 chars/token as conservative estimate
198
- return (len(text)*10 + 34) / 35 // equivalent to ceil(len/3.5)
188
+ return GetTokenizer().CountTokens(text)
199
189
  }
200
190
 
201
191
  // EstimateMessagesTokens estimates the total tokens for a set of chat messages.
@@ -477,24 +467,20 @@ func (cr *contextReducer) reduceByMapReduce(query string, chunks []string, model
477
467
  }
478
468
 
479
469
  // reduceByTruncation simply truncates context to fit within the limit.
470
+ // Uses the shared TruncateText utility (compression.go).
480
471
  func (cr *contextReducer) reduceByTruncation(context string, modelLimit int, overhead int) (string, error) {
481
472
  cr.obs.Debug("overflow", "Using truncation strategy")
482
473
 
483
474
  availableTokens := modelLimit - overhead
484
- maxChars := availableTokens * 3 // Conservative chars-to-tokens
475
+ truncated := TruncateText(context, TruncateTextParams{
476
+ MaxTokens: availableTokens,
477
+ MarkerText: "\n\n[... context truncated due to token limit ...]\n\n",
478
+ })
485
479
 
486
- if maxChars >= len(context) {
480
+ if truncated == context {
487
481
  return context, nil
488
482
  }
489
483
 
490
- // Keep beginning and end, truncate middle (addresses "lost in the middle" problem)
491
- keepFromStart := maxChars * 2 / 3
492
- keepFromEnd := maxChars / 3
493
-
494
- truncated := context[:keepFromStart] +
495
- "\n\n[... context truncated due to token limit ...]\n\n" +
496
- context[len(context)-keepFromEnd:]
497
-
498
484
  cr.obs.Debug("overflow", "Truncated context: %d -> %d chars", len(context), len(truncated))
499
485
  return truncated, nil
500
486
  }
@@ -721,4 +707,3 @@ func (cr *contextReducer) reduceByTextRank(context string, modelLimit int, overh
721
707
  len(context), len(result), EstimateTokens(context), EstimateTokens(result))
722
708
  return result, nil
723
709
  }
724
-