recursive-llm-ts 4.9.0 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/bin/rlm-go +0 -0
- package/dist/bridge-interface.d.ts +149 -0
- package/go/cmd/rlm/main.go +39 -6
- package/go/go.mod +13 -3
- package/go/go.sum +53 -2
- package/go/rlm/compression.go +59 -0
- package/go/rlm/context_overflow.go +21 -36
- package/go/rlm/context_savings_test.go +387 -0
- package/go/rlm/json_extraction.go +140 -0
- package/go/rlm/lcm_agentic_map.go +317 -0
- package/go/rlm/lcm_context_loop.go +309 -0
- package/go/rlm/lcm_delegation.go +257 -0
- package/go/rlm/lcm_episodes.go +313 -0
- package/go/rlm/lcm_episodes_test.go +384 -0
- package/go/rlm/lcm_files.go +424 -0
- package/go/rlm/lcm_map.go +348 -0
- package/go/rlm/lcm_store.go +615 -0
- package/go/rlm/lcm_summarizer.go +239 -0
- package/go/rlm/lcm_test.go +1407 -0
- package/go/rlm/rlm.go +124 -1
- package/go/rlm/store_backend.go +121 -0
- package/go/rlm/store_backend_test.go +428 -0
- package/go/rlm/store_sqlite.go +575 -0
- package/go/rlm/structured.go +6 -83
- package/go/rlm/token_tracking_test.go +25 -11
- package/go/rlm/tokenizer.go +216 -0
- package/go/rlm/tokenizer_test.go +305 -0
- package/go/rlm/types.go +23 -1
- package/go/rlm.test +0 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -15,6 +15,8 @@ TypeScript/JavaScript package for [Recursive Language Models (RLM)](https://gith
|
|
|
15
15
|
**Performance & Resilience**
|
|
16
16
|
- **Pure Go Backend** - 50x faster startup, 3x less memory vs Python
|
|
17
17
|
- **Context Overflow Recovery** - Automatic detection and 6 reduction strategies (mapreduce, truncate, chunked, tfidf, textrank, refine)
|
|
18
|
+
- **Lossless Context Management** - Episodic memory, 5-level summarization escalation, SQLite persistence with FTS5 search
|
|
19
|
+
- **BPE Token Counting** - Accurate model-specific tokenization via tiktoken (o200k, cl100k) with cached counting
|
|
18
20
|
- **Caching** - Exact-match caching with in-memory and file-based backends
|
|
19
21
|
- **Retry & Fallback** - Exponential backoff, jitter, and multi-provider fallback chains
|
|
20
22
|
- **AbortController** - Cancel any operation mid-flight
|
|
@@ -32,7 +34,7 @@ TypeScript/JavaScript package for [Recursive Language Models (RLM)](https://gith
|
|
|
32
34
|
- **Meta-Agent Mode** - Automatically optimize queries for better results
|
|
33
35
|
- **Observability** - OpenTelemetry tracing, Langfuse integration, and debug logging
|
|
34
36
|
- **File Storage** - Process local directories or S3/MinIO/LocalStack buckets as LLM context
|
|
35
|
-
- **
|
|
37
|
+
- **200+ Tests** - Comprehensive Vitest + Go test suites
|
|
36
38
|
|
|
37
39
|
## Installation
|
|
38
40
|
|
package/bin/rlm-go
CHANGED
|
Binary file
|
|
@@ -51,6 +51,154 @@ export interface ContextOverflowConfig {
|
|
|
51
51
|
/** Maximum reduction attempts before giving up (default: 3) */
|
|
52
52
|
max_reduction_attempts?: number;
|
|
53
53
|
}
|
|
54
|
+
export interface LCMConfig {
|
|
55
|
+
/** Enable LCM context management (default: false for backward compat) */
|
|
56
|
+
enabled?: boolean;
|
|
57
|
+
/** Soft token threshold — async compaction begins above this (default: 70% of model limit) */
|
|
58
|
+
soft_threshold?: number;
|
|
59
|
+
/** Hard token threshold — blocking compaction above this (default: 90% of model limit) */
|
|
60
|
+
hard_threshold?: number;
|
|
61
|
+
/** Number of messages to compact at once (default: 10) */
|
|
62
|
+
compaction_block_size?: number;
|
|
63
|
+
/** Target tokens per summary node (default: 500) */
|
|
64
|
+
summary_target_tokens?: number;
|
|
65
|
+
/** Large file handling configuration */
|
|
66
|
+
file_handling?: LCMFileConfig;
|
|
67
|
+
/** Episode-based context grouping configuration */
|
|
68
|
+
episodes?: EpisodeConfig;
|
|
69
|
+
/** Persistence backend configuration (default: in-memory) */
|
|
70
|
+
store_backend?: StoreBackendConfig;
|
|
71
|
+
}
|
|
72
|
+
export interface LCMFileConfig {
|
|
73
|
+
/** Token count above which files are stored externally with exploration summaries (default: 25000) */
|
|
74
|
+
token_threshold?: number;
|
|
75
|
+
}
|
|
76
|
+
export interface EpisodeConfig {
|
|
77
|
+
/** Max tokens before auto-closing an episode (default: 2000) */
|
|
78
|
+
max_episode_tokens?: number;
|
|
79
|
+
/** Max messages before auto-closing an episode (default: 20) */
|
|
80
|
+
max_episode_messages?: number;
|
|
81
|
+
/** Topic change sensitivity 0-1 (reserved for future semantic detection) */
|
|
82
|
+
topic_change_threshold?: number;
|
|
83
|
+
/** Auto-generate summary when episode closes (default: true) */
|
|
84
|
+
auto_compact_after_close?: boolean;
|
|
85
|
+
}
|
|
86
|
+
export interface Episode {
|
|
87
|
+
id: string;
|
|
88
|
+
title: string;
|
|
89
|
+
message_ids: string[];
|
|
90
|
+
start_time: string;
|
|
91
|
+
end_time: string;
|
|
92
|
+
tokens: number;
|
|
93
|
+
summary?: string;
|
|
94
|
+
summary_tokens?: number;
|
|
95
|
+
status: 'active' | 'compacted' | 'archived';
|
|
96
|
+
tags?: string[];
|
|
97
|
+
parent_episode_id?: string;
|
|
98
|
+
}
|
|
99
|
+
export interface StoreBackendConfig {
|
|
100
|
+
/** Backend type: 'memory' (default) or 'sqlite' */
|
|
101
|
+
type?: 'memory' | 'sqlite';
|
|
102
|
+
/** Path for SQLite database file (required when type is 'sqlite', use ':memory:' for in-memory SQLite) */
|
|
103
|
+
path?: string;
|
|
104
|
+
}
|
|
105
|
+
export interface LLMMapConfig {
|
|
106
|
+
/** Path to JSONL input file */
|
|
107
|
+
input_path: string;
|
|
108
|
+
/** Path to JSONL output file */
|
|
109
|
+
output_path: string;
|
|
110
|
+
/** Prompt template — use {{item}} as placeholder for each item */
|
|
111
|
+
prompt: string;
|
|
112
|
+
/** JSON Schema for output validation */
|
|
113
|
+
output_schema?: Record<string, any>;
|
|
114
|
+
/** Worker pool concurrency (default: 16) */
|
|
115
|
+
concurrency?: number;
|
|
116
|
+
/** Per-item retry limit (default: 3) */
|
|
117
|
+
max_retries?: number;
|
|
118
|
+
/** Model to use (defaults to engine model) */
|
|
119
|
+
model?: string;
|
|
120
|
+
}
|
|
121
|
+
export interface LLMMapResult {
|
|
122
|
+
total_items: number;
|
|
123
|
+
completed: number;
|
|
124
|
+
failed: number;
|
|
125
|
+
output_path: string;
|
|
126
|
+
duration_ms: number;
|
|
127
|
+
tokens_used: number;
|
|
128
|
+
}
|
|
129
|
+
export interface AgenticMapConfig {
|
|
130
|
+
/** Path to JSONL input file */
|
|
131
|
+
input_path: string;
|
|
132
|
+
/** Path to JSONL output file */
|
|
133
|
+
output_path: string;
|
|
134
|
+
/** Prompt template — use {{item}} as placeholder for each item */
|
|
135
|
+
prompt: string;
|
|
136
|
+
/** JSON Schema for output validation */
|
|
137
|
+
output_schema?: Record<string, any>;
|
|
138
|
+
/** Worker pool concurrency (default: 8) */
|
|
139
|
+
concurrency?: number;
|
|
140
|
+
/** Per-item retry limit (default: 2) */
|
|
141
|
+
max_retries?: number;
|
|
142
|
+
/** Model for sub-agents (defaults to engine model) */
|
|
143
|
+
model?: string;
|
|
144
|
+
/** If true, sub-agents cannot modify filesystem */
|
|
145
|
+
read_only?: boolean;
|
|
146
|
+
/** Max recursion depth for sub-agents (default: 3) */
|
|
147
|
+
max_depth?: number;
|
|
148
|
+
/** Max iterations per sub-agent (default: 15) */
|
|
149
|
+
max_iterations?: number;
|
|
150
|
+
}
|
|
151
|
+
export interface AgenticMapResult {
|
|
152
|
+
total_items: number;
|
|
153
|
+
completed: number;
|
|
154
|
+
failed: number;
|
|
155
|
+
output_path: string;
|
|
156
|
+
duration_ms: number;
|
|
157
|
+
tokens_used: number;
|
|
158
|
+
}
|
|
159
|
+
export interface DelegationRequest {
|
|
160
|
+
/** Task description for the sub-agent */
|
|
161
|
+
prompt: string;
|
|
162
|
+
/** Specific slice of work being handed off (required for non-root) */
|
|
163
|
+
delegated_scope?: string;
|
|
164
|
+
/** Work the caller retains (required for non-root) */
|
|
165
|
+
kept_work?: string;
|
|
166
|
+
/** Read-only exploration agent (exempt from guard) */
|
|
167
|
+
read_only?: boolean;
|
|
168
|
+
/** Parallel decomposition (exempt from guard) */
|
|
169
|
+
parallel?: boolean;
|
|
170
|
+
}
|
|
171
|
+
export interface LCMStoreStats {
|
|
172
|
+
total_messages: number;
|
|
173
|
+
total_summaries: number;
|
|
174
|
+
active_context_items: number;
|
|
175
|
+
active_context_tokens: number;
|
|
176
|
+
immutable_store_tokens: number;
|
|
177
|
+
compression_ratio: number;
|
|
178
|
+
}
|
|
179
|
+
export interface LCMGrepResult {
|
|
180
|
+
message_id: string;
|
|
181
|
+
role: string;
|
|
182
|
+
content: string;
|
|
183
|
+
summary_id?: string;
|
|
184
|
+
match_line: string;
|
|
185
|
+
}
|
|
186
|
+
export interface LCMDescribeResult {
|
|
187
|
+
type: 'message' | 'summary';
|
|
188
|
+
id: string;
|
|
189
|
+
tokens: number;
|
|
190
|
+
role?: string;
|
|
191
|
+
kind?: 'leaf' | 'condensed';
|
|
192
|
+
level?: number;
|
|
193
|
+
covered_ids?: string[];
|
|
194
|
+
file_ids?: string[];
|
|
195
|
+
content?: string;
|
|
196
|
+
}
|
|
197
|
+
export interface EpisodeListResult {
|
|
198
|
+
episodes: Episode[];
|
|
199
|
+
active_episode_id?: string;
|
|
200
|
+
total_episodes: number;
|
|
201
|
+
}
|
|
54
202
|
export interface RLMConfig {
|
|
55
203
|
recursive_model?: string;
|
|
56
204
|
api_base?: string;
|
|
@@ -62,6 +210,7 @@ export interface RLMConfig {
|
|
|
62
210
|
meta_agent?: MetaAgentConfig;
|
|
63
211
|
observability?: ObservabilityConfig;
|
|
64
212
|
context_overflow?: ContextOverflowConfig;
|
|
213
|
+
lcm?: LCMConfig;
|
|
65
214
|
debug?: boolean;
|
|
66
215
|
api_version?: string;
|
|
67
216
|
timeout?: number;
|
package/go/cmd/rlm/main.go
CHANGED
|
@@ -14,7 +14,9 @@ type requestPayload struct {
|
|
|
14
14
|
Query string `json:"query"`
|
|
15
15
|
Context string `json:"context"`
|
|
16
16
|
Config map[string]interface{} `json:"config"`
|
|
17
|
-
Structured
|
|
17
|
+
Structured *structuredRequest `json:"structured,omitempty"`
|
|
18
|
+
LLMMap *rlm.LLMMapConfig `json:"llm_map,omitempty"` // LCM LLM-Map operation
|
|
19
|
+
AgenticMap *rlm.AgenticMapConfig `json:"agentic_map,omitempty"` // LCM Agentic-Map operation
|
|
18
20
|
}
|
|
19
21
|
|
|
20
22
|
type structuredRequest struct {
|
|
@@ -24,10 +26,13 @@ type structuredRequest struct {
|
|
|
24
26
|
}
|
|
25
27
|
|
|
26
28
|
type responsePayload struct {
|
|
27
|
-
Result interface{}
|
|
28
|
-
Stats rlm.RLMStats
|
|
29
|
-
StructuredResult bool
|
|
30
|
-
TraceEvents interface{}
|
|
29
|
+
Result interface{} `json:"result"`
|
|
30
|
+
Stats rlm.RLMStats `json:"stats"`
|
|
31
|
+
StructuredResult bool `json:"structured_result,omitempty"`
|
|
32
|
+
TraceEvents interface{} `json:"trace_events,omitempty"`
|
|
33
|
+
LCMStats *rlm.LCMStoreStats `json:"lcm_stats,omitempty"`
|
|
34
|
+
LLMMapResult *rlm.LLMMapResult `json:"llm_map_result,omitempty"`
|
|
35
|
+
AgenticMapResult *rlm.AgenticMapResult `json:"agentic_map_result,omitempty"`
|
|
31
36
|
}
|
|
32
37
|
|
|
33
38
|
func main() {
|
|
@@ -54,8 +59,30 @@ func main() {
|
|
|
54
59
|
|
|
55
60
|
var resp responsePayload
|
|
56
61
|
|
|
62
|
+
// Handle LLM-Map operation if requested
|
|
63
|
+
if req.LLMMap != nil {
|
|
64
|
+
mapResult, err := engine.LLMMap(*req.LLMMap)
|
|
65
|
+
if err != nil {
|
|
66
|
+
fmt.Fprintln(os.Stderr, err)
|
|
67
|
+
os.Exit(1)
|
|
68
|
+
}
|
|
69
|
+
resp = responsePayload{
|
|
70
|
+
Result: "llm_map_complete",
|
|
71
|
+
LLMMapResult: mapResult,
|
|
72
|
+
}
|
|
73
|
+
} else if req.AgenticMap != nil {
|
|
74
|
+
// Handle Agentic-Map operation
|
|
75
|
+
agenticResult, err := engine.AgenticMap(*req.AgenticMap)
|
|
76
|
+
if err != nil {
|
|
77
|
+
fmt.Fprintln(os.Stderr, err)
|
|
78
|
+
os.Exit(1)
|
|
79
|
+
}
|
|
80
|
+
resp = responsePayload{
|
|
81
|
+
Result: "agentic_map_complete",
|
|
82
|
+
AgenticMapResult: agenticResult,
|
|
83
|
+
}
|
|
84
|
+
} else if req.Structured != nil {
|
|
57
85
|
// Handle structured completion if requested
|
|
58
|
-
if req.Structured != nil {
|
|
59
86
|
structuredConfig := &rlm.StructuredConfig{
|
|
60
87
|
Schema: req.Structured.Schema,
|
|
61
88
|
ParallelExecution: req.Structured.ParallelExecution,
|
|
@@ -96,6 +123,12 @@ func main() {
|
|
|
96
123
|
}
|
|
97
124
|
}
|
|
98
125
|
|
|
126
|
+
// Include LCM stats if enabled
|
|
127
|
+
if lcmEngine := engine.GetLCMEngine(); lcmEngine != nil {
|
|
128
|
+
stats := lcmEngine.GetStore().Stats()
|
|
129
|
+
resp.LCMStats = &stats
|
|
130
|
+
}
|
|
131
|
+
|
|
99
132
|
payload, err := json.Marshal(resp)
|
|
100
133
|
if err != nil {
|
|
101
134
|
fmt.Fprintln(os.Stderr, "Failed to encode response JSON:", err)
|
package/go/go.mod
CHANGED
|
@@ -5,24 +5,34 @@ go 1.25.0
|
|
|
5
5
|
toolchain go1.25.1
|
|
6
6
|
|
|
7
7
|
require (
|
|
8
|
+
github.com/cespare/xxhash/v2 v2.3.0
|
|
8
9
|
github.com/dop251/goja v0.0.0-20231027120936-b396bb4c349d
|
|
9
10
|
github.com/google/jsonschema-go v0.4.2
|
|
11
|
+
github.com/pkoukk/tiktoken-go v0.1.8
|
|
10
12
|
go.opentelemetry.io/otel v1.40.0
|
|
11
13
|
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.40.0
|
|
12
14
|
go.opentelemetry.io/otel/sdk v1.40.0
|
|
13
15
|
go.opentelemetry.io/otel/trace v1.40.0
|
|
16
|
+
modernc.org/sqlite v1.46.1
|
|
14
17
|
)
|
|
15
18
|
|
|
16
19
|
require (
|
|
17
|
-
github.com/
|
|
18
|
-
github.com/
|
|
20
|
+
github.com/dlclark/regexp2 v1.10.0 // indirect
|
|
21
|
+
github.com/dustin/go-humanize v1.0.1 // indirect
|
|
19
22
|
github.com/go-logr/logr v1.4.3 // indirect
|
|
20
23
|
github.com/go-logr/stdr v1.2.2 // indirect
|
|
21
24
|
github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect
|
|
22
|
-
github.com/google/pprof v0.0.0-
|
|
25
|
+
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect
|
|
23
26
|
github.com/google/uuid v1.6.0 // indirect
|
|
27
|
+
github.com/mattn/go-isatty v0.0.20 // indirect
|
|
28
|
+
github.com/ncruces/go-strftime v1.0.0 // indirect
|
|
29
|
+
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
|
24
30
|
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
|
25
31
|
go.opentelemetry.io/otel/metric v1.40.0 // indirect
|
|
32
|
+
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect
|
|
26
33
|
golang.org/x/sys v0.40.0 // indirect
|
|
27
34
|
golang.org/x/text v0.3.8 // indirect
|
|
35
|
+
modernc.org/libc v1.67.6 // indirect
|
|
36
|
+
modernc.org/mathutil v1.7.1 // indirect
|
|
37
|
+
modernc.org/memory v1.11.0 // indirect
|
|
28
38
|
)
|
package/go/go.sum
CHANGED
|
@@ -7,13 +7,16 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
|
|
|
7
7
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
|
8
8
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
|
9
9
|
github.com/dlclark/regexp2 v1.4.1-0.20201116162257-a2a8dda75c91/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
|
|
10
|
-
github.com/dlclark/regexp2 v1.7.0 h1:7lJfhqlPssTb1WQx4yvTHN0uElPEv52sbaECrAQxjAo=
|
|
11
10
|
github.com/dlclark/regexp2 v1.7.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
|
11
|
+
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
|
|
12
|
+
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
|
12
13
|
github.com/dop251/goja v0.0.0-20211022113120-dc8c55024d06/go.mod h1:R9ET47fwRVRPZnOGvHxxhuZcbrMCuiqOz3Rlrh4KSnk=
|
|
13
14
|
github.com/dop251/goja v0.0.0-20231027120936-b396bb4c349d h1:wi6jN5LVt/ljaBG4ue79Ekzb12QfJ52L9Q98tl8SWhw=
|
|
14
15
|
github.com/dop251/goja v0.0.0-20231027120936-b396bb4c349d/go.mod h1:QMWlm50DNe14hD7t24KEqZuUdC9sOTy8W6XbCU1mlw4=
|
|
15
16
|
github.com/dop251/goja_nodejs v0.0.0-20210225215109-d91c329300e7/go.mod h1:hn7BA7c8pLvoGndExHudxTDKZ84Pyvv+90pbBjbTz0Y=
|
|
16
17
|
github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d/go.mod h1:DngW8aVqWbuLRMHItjPUyqdj+HWPvnQe8V8y1nDpIbM=
|
|
18
|
+
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
|
19
|
+
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
|
17
20
|
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
|
18
21
|
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
|
|
19
22
|
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
|
@@ -25,10 +28,13 @@ github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
|
|
25
28
|
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
|
26
29
|
github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8=
|
|
27
30
|
github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
|
|
28
|
-
github.com/google/pprof v0.0.0-20230207041349-798e818bf904 h1:4/hN5RUoecvl+RmJRE2YxKWtnnQls6rQjjW5oV7qg2U=
|
|
29
31
|
github.com/google/pprof v0.0.0-20230207041349-798e818bf904/go.mod h1:uglQLonpP8qtYCYyzA+8c/9qtqgA3qsXGYqCPKARAFg=
|
|
32
|
+
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
|
|
33
|
+
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
|
|
30
34
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
|
31
35
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
|
36
|
+
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
|
37
|
+
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
|
32
38
|
github.com/ianlancetaylor/demangle v0.0.0-20220319035150-800ac71e25c2/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
|
|
33
39
|
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
|
34
40
|
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
|
@@ -36,8 +42,16 @@ github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NB
|
|
|
36
42
|
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
|
37
43
|
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
|
38
44
|
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
|
45
|
+
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
|
46
|
+
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
|
47
|
+
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
|
48
|
+
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
|
49
|
+
github.com/pkoukk/tiktoken-go v0.1.8 h1:85ENo+3FpWgAACBaEUVp+lctuTcYUO7BtmfhlN/QTRo=
|
|
50
|
+
github.com/pkoukk/tiktoken-go v0.1.8/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
|
|
39
51
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
|
40
52
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
|
53
|
+
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
|
54
|
+
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
|
41
55
|
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
|
|
42
56
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
|
43
57
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
|
@@ -60,18 +74,25 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
|
|
60
74
|
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
|
61
75
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
|
62
76
|
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
|
77
|
+
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY=
|
|
78
|
+
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70=
|
|
63
79
|
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
|
80
|
+
golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
|
|
81
|
+
golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
|
|
64
82
|
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
|
65
83
|
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
|
66
84
|
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
|
67
85
|
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
|
68
86
|
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
|
87
|
+
golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
|
|
88
|
+
golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
|
69
89
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
|
70
90
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
71
91
|
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|
72
92
|
golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|
73
93
|
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|
74
94
|
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|
95
|
+
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|
75
96
|
golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
|
|
76
97
|
golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
|
77
98
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
|
@@ -85,6 +106,8 @@ golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
|
|
|
85
106
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
|
86
107
|
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
|
87
108
|
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
|
109
|
+
golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
|
|
110
|
+
golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
|
|
88
111
|
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
|
89
112
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
|
90
113
|
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
|
@@ -94,3 +117,31 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
|
|
94
117
|
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
|
95
118
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
|
96
119
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
|
120
|
+
modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis=
|
|
121
|
+
modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
|
|
122
|
+
modernc.org/ccgo/v4 v4.30.1 h1:4r4U1J6Fhj98NKfSjnPUN7Ze2c6MnAdL0hWw6+LrJpc=
|
|
123
|
+
modernc.org/ccgo/v4 v4.30.1/go.mod h1:bIOeI1JL54Utlxn+LwrFyjCx2n2RDiYEaJVSrgdrRfM=
|
|
124
|
+
modernc.org/fileutil v1.3.40 h1:ZGMswMNc9JOCrcrakF1HrvmergNLAmxOPjizirpfqBA=
|
|
125
|
+
modernc.org/fileutil v1.3.40/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc=
|
|
126
|
+
modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
|
|
127
|
+
modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
|
|
128
|
+
modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE=
|
|
129
|
+
modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
|
|
130
|
+
modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
|
|
131
|
+
modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
|
|
132
|
+
modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI=
|
|
133
|
+
modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE=
|
|
134
|
+
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
|
|
135
|
+
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
|
|
136
|
+
modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
|
|
137
|
+
modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
|
|
138
|
+
modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
|
|
139
|
+
modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
|
|
140
|
+
modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
|
|
141
|
+
modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
|
|
142
|
+
modernc.org/sqlite v1.46.1 h1:eFJ2ShBLIEnUWlLy12raN0Z1plqmFX9Qe3rjQTKt6sU=
|
|
143
|
+
modernc.org/sqlite v1.46.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA=
|
|
144
|
+
modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
|
|
145
|
+
modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
|
|
146
|
+
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
|
|
147
|
+
modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
package rlm
|
|
2
|
+
|
|
3
|
+
// ─── Shared Text Compression Utilities ──────────────────────────────────────
|
|
4
|
+
// Consolidated from context_overflow.go and lcm_summarizer.go to eliminate
|
|
5
|
+
// duplication of the "keep start + end, truncate middle" strategy.
|
|
6
|
+
|
|
7
|
+
// TruncateTextParams configures deterministic text truncation.
|
|
8
|
+
type TruncateTextParams struct {
|
|
9
|
+
// MaxTokens is the target token count.
|
|
10
|
+
MaxTokens int
|
|
11
|
+
// MarkerText is inserted at the truncation point.
|
|
12
|
+
// Default: "\n[... content truncated ...]\n"
|
|
13
|
+
MarkerText string
|
|
14
|
+
// StartFraction is the fraction of budget kept from the start (default: 2/3).
|
|
15
|
+
StartFraction float64
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// TruncateText performs deterministic truncation using the "keep start + end"
|
|
19
|
+
// strategy, which addresses the "lost in the middle" problem by preserving
|
|
20
|
+
// both the beginning and end of the text.
|
|
21
|
+
//
|
|
22
|
+
// This is used as the guaranteed-convergence fallback in both:
|
|
23
|
+
// - LCM's three-level escalation (Level 3)
|
|
24
|
+
// - Context overflow's truncation strategy
|
|
25
|
+
func TruncateText(text string, params TruncateTextParams) string {
|
|
26
|
+
// Apply defaults
|
|
27
|
+
if params.StartFraction <= 0 || params.StartFraction >= 1 {
|
|
28
|
+
params.StartFraction = 2.0 / 3.0
|
|
29
|
+
}
|
|
30
|
+
if params.MarkerText == "" {
|
|
31
|
+
params.MarkerText = "\n[... content truncated ...]\n"
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Conservative chars-to-tokens ratio: 3 chars per token
|
|
35
|
+
maxChars := params.MaxTokens * 3
|
|
36
|
+
if len(text) <= maxChars {
|
|
37
|
+
return text
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
startChars := int(float64(maxChars) * params.StartFraction)
|
|
41
|
+
endChars := maxChars - startChars
|
|
42
|
+
|
|
43
|
+
// Guard against edge cases
|
|
44
|
+
if startChars > len(text) {
|
|
45
|
+
startChars = len(text)
|
|
46
|
+
}
|
|
47
|
+
if endChars > len(text)-startChars {
|
|
48
|
+
endChars = len(text) - startChars
|
|
49
|
+
}
|
|
50
|
+
if endChars < 0 {
|
|
51
|
+
endChars = 0
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if endChars == 0 {
|
|
55
|
+
return text[:startChars] + params.MarkerText
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return text[:startChars] + params.MarkerText + text[len(text)-endChars:]
|
|
59
|
+
}
|
|
@@ -48,18 +48,18 @@ var modelTokenLimits = map[string]int{
|
|
|
48
48
|
"o1-preview": 128000,
|
|
49
49
|
"o3-mini": 200000,
|
|
50
50
|
// Anthropic (via LiteLLM/proxy)
|
|
51
|
-
"claude-3-opus":
|
|
52
|
-
"claude-3-sonnet":
|
|
53
|
-
"claude-3-haiku":
|
|
54
|
-
"claude-3.5-sonnet":
|
|
55
|
-
"claude-3.5-haiku":
|
|
56
|
-
"claude-sonnet-4":
|
|
57
|
-
"claude-opus-4":
|
|
51
|
+
"claude-3-opus": 200000,
|
|
52
|
+
"claude-3-sonnet": 200000,
|
|
53
|
+
"claude-3-haiku": 200000,
|
|
54
|
+
"claude-3.5-sonnet": 200000,
|
|
55
|
+
"claude-3.5-haiku": 200000,
|
|
56
|
+
"claude-sonnet-4": 200000,
|
|
57
|
+
"claude-opus-4": 200000,
|
|
58
58
|
// Llama (common vLLM deployments)
|
|
59
|
-
"llama-3":
|
|
60
|
-
"llama-3.1":
|
|
61
|
-
"llama-3.2":
|
|
62
|
-
"llama-3.3":
|
|
59
|
+
"llama-3": 8192,
|
|
60
|
+
"llama-3.1": 128000,
|
|
61
|
+
"llama-3.2": 128000,
|
|
62
|
+
"llama-3.3": 128000,
|
|
63
63
|
// Mistral
|
|
64
64
|
"mistral-7b": 32768,
|
|
65
65
|
"mixtral-8x7b": 32768,
|
|
@@ -181,21 +181,11 @@ func (r *RLM) getResponseTokenBudget() int {
|
|
|
181
181
|
|
|
182
182
|
// ─── Token Estimation ────────────────────────────────────────────────────────
|
|
183
183
|
|
|
184
|
-
// EstimateTokens
|
|
185
|
-
//
|
|
186
|
-
//
|
|
187
|
-
//
|
|
188
|
-
// Approximate ratios for common encodings:
|
|
189
|
-
// - English text: ~4 chars/token (cl100k_base)
|
|
190
|
-
// - JSON/code: ~3.5 chars/token
|
|
191
|
-
// - CJK text: ~1.5 chars/token
|
|
192
|
-
// - Mixed: ~3.5 chars/token (safe default)
|
|
184
|
+
// EstimateTokens returns the token count for a string using the global tokenizer.
|
|
185
|
+
// When SetDefaultTokenizer has been called with a model name, this uses accurate
|
|
186
|
+
// BPE tokenization via tiktoken. Otherwise falls back to a ~3.5 chars/token heuristic.
|
|
193
187
|
func EstimateTokens(text string) int {
|
|
194
|
-
|
|
195
|
-
return 0
|
|
196
|
-
}
|
|
197
|
-
// Use 3.5 chars/token as conservative estimate
|
|
198
|
-
return (len(text)*10 + 34) / 35 // equivalent to ceil(len/3.5)
|
|
188
|
+
return GetTokenizer().CountTokens(text)
|
|
199
189
|
}
|
|
200
190
|
|
|
201
191
|
// EstimateMessagesTokens estimates the total tokens for a set of chat messages.
|
|
@@ -477,24 +467,20 @@ func (cr *contextReducer) reduceByMapReduce(query string, chunks []string, model
|
|
|
477
467
|
}
|
|
478
468
|
|
|
479
469
|
// reduceByTruncation simply truncates context to fit within the limit.
|
|
470
|
+
// Uses the shared TruncateText utility (compression.go).
|
|
480
471
|
func (cr *contextReducer) reduceByTruncation(context string, modelLimit int, overhead int) (string, error) {
|
|
481
472
|
cr.obs.Debug("overflow", "Using truncation strategy")
|
|
482
473
|
|
|
483
474
|
availableTokens := modelLimit - overhead
|
|
484
|
-
|
|
475
|
+
truncated := TruncateText(context, TruncateTextParams{
|
|
476
|
+
MaxTokens: availableTokens,
|
|
477
|
+
MarkerText: "\n\n[... context truncated due to token limit ...]\n\n",
|
|
478
|
+
})
|
|
485
479
|
|
|
486
|
-
if
|
|
480
|
+
if truncated == context {
|
|
487
481
|
return context, nil
|
|
488
482
|
}
|
|
489
483
|
|
|
490
|
-
// Keep beginning and end, truncate middle (addresses "lost in the middle" problem)
|
|
491
|
-
keepFromStart := maxChars * 2 / 3
|
|
492
|
-
keepFromEnd := maxChars / 3
|
|
493
|
-
|
|
494
|
-
truncated := context[:keepFromStart] +
|
|
495
|
-
"\n\n[... context truncated due to token limit ...]\n\n" +
|
|
496
|
-
context[len(context)-keepFromEnd:]
|
|
497
|
-
|
|
498
484
|
cr.obs.Debug("overflow", "Truncated context: %d -> %d chars", len(context), len(truncated))
|
|
499
485
|
return truncated, nil
|
|
500
486
|
}
|
|
@@ -721,4 +707,3 @@ func (cr *contextReducer) reduceByTextRank(context string, modelLimit int, overh
|
|
|
721
707
|
len(context), len(result), EstimateTokens(context), EstimateTokens(result))
|
|
722
708
|
return result, nil
|
|
723
709
|
}
|
|
724
|
-
|