recursive-llm-ts 5.0.1 → 5.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -3
- package/bin/rlm-go +0 -0
- package/go/README.md +0 -426
- package/go/integration_test.sh +0 -169
- package/go/rlm/benchmark_test.go +0 -168
- package/go/rlm/context_overflow_test.go +0 -1271
- package/go/rlm/context_savings_test.go +0 -387
- package/go/rlm/lcm_episodes_test.go +0 -384
- package/go/rlm/lcm_test.go +0 -1407
- package/go/rlm/meta_agent_test.go +0 -270
- package/go/rlm/observability_test.go +0 -252
- package/go/rlm/parser_test.go +0 -202
- package/go/rlm/repl_test.go +0 -291
- package/go/rlm/schema_test.go +0 -343
- package/go/rlm/store_backend_test.go +0 -428
- package/go/rlm/structured_test.go +0 -895
- package/go/rlm/textrank_test.go +0 -335
- package/go/rlm/tfidf_test.go +0 -272
- package/go/rlm/token_tracking_test.go +0 -859
- package/go/rlm/tokenizer_test.go +0 -305
- package/go/rlm.test +0 -0
- package/go/test_mock.sh +0 -90
- package/go/test_rlm.sh +0 -41
- package/go/test_simple.sh +0 -78
|
@@ -1,1271 +0,0 @@
|
|
|
1
|
-
package rlm
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"errors"
|
|
5
|
-
"fmt"
|
|
6
|
-
"strings"
|
|
7
|
-
"testing"
|
|
8
|
-
)
|
|
9
|
-
|
|
10
|
-
// ─── Error Detection Tests ───────────────────────────────────────────────────
|
|
11
|
-
|
|
12
|
-
func TestIsContextOverflow_DirectType(t *testing.T) {
|
|
13
|
-
err := NewContextOverflowError(400, "test", 32768, 40354)
|
|
14
|
-
coe, ok := IsContextOverflow(err)
|
|
15
|
-
if !ok {
|
|
16
|
-
t.Fatal("expected IsContextOverflow to return true for ContextOverflowError")
|
|
17
|
-
}
|
|
18
|
-
if coe.ModelLimit != 32768 {
|
|
19
|
-
t.Errorf("expected ModelLimit 32768, got %d", coe.ModelLimit)
|
|
20
|
-
}
|
|
21
|
-
if coe.RequestTokens != 40354 {
|
|
22
|
-
t.Errorf("expected RequestTokens 40354, got %d", coe.RequestTokens)
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
func TestIsContextOverflow_FromAPIError_OpenAI(t *testing.T) {
|
|
27
|
-
// Real OpenAI error format
|
|
28
|
-
response := `{"error":{"message":"This model's maximum context length is 32768 tokens. However, your request has 40354 input tokens. Please reduce the length of the input messages.","type":"invalid_request_error","param":"messages","code":"context_length_exceeded"}}`
|
|
29
|
-
apiErr := NewAPIError(400, response)
|
|
30
|
-
|
|
31
|
-
coe, ok := IsContextOverflow(apiErr)
|
|
32
|
-
if !ok {
|
|
33
|
-
t.Fatal("expected IsContextOverflow to detect OpenAI context overflow error")
|
|
34
|
-
}
|
|
35
|
-
if coe.ModelLimit != 32768 {
|
|
36
|
-
t.Errorf("expected ModelLimit 32768, got %d", coe.ModelLimit)
|
|
37
|
-
}
|
|
38
|
-
if coe.RequestTokens != 40354 {
|
|
39
|
-
t.Errorf("expected RequestTokens 40354, got %d", coe.RequestTokens)
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
func TestIsContextOverflow_FromAPIError_vLLM(t *testing.T) {
|
|
44
|
-
// vLLM / Ray Serve error format (the user's actual error)
|
|
45
|
-
response := `{"error":{"message":"Message: This model's maximum context length is 32768 tokens. However, your request has 40354 input tokens. Please reduce the length of the input messages. None (Request ID: ad08ee3b-67df-4ab2-bdeb-1e3135847e2a), Internal exception: ray.llm._internal.serve.core.configs.openai_api_models.OpenAIHTTPException","type":"OpenAIHTTPException","param":null,"code":400}}`
|
|
46
|
-
apiErr := NewAPIError(400, response)
|
|
47
|
-
|
|
48
|
-
coe, ok := IsContextOverflow(apiErr)
|
|
49
|
-
if !ok {
|
|
50
|
-
t.Fatal("expected IsContextOverflow to detect vLLM context overflow error")
|
|
51
|
-
}
|
|
52
|
-
if coe.ModelLimit != 32768 {
|
|
53
|
-
t.Errorf("expected ModelLimit 32768, got %d", coe.ModelLimit)
|
|
54
|
-
}
|
|
55
|
-
if coe.RequestTokens != 40354 {
|
|
56
|
-
t.Errorf("expected RequestTokens 40354, got %d", coe.RequestTokens)
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
func TestIsContextOverflow_FromAPIError_Azure(t *testing.T) {
|
|
61
|
-
// Azure OpenAI format
|
|
62
|
-
response := `{"error":{"message":"This model's maximum context length is 8192 tokens, however you requested 12000 tokens","type":"invalid_request_error","code":"context_length_exceeded"}}`
|
|
63
|
-
apiErr := NewAPIError(400, response)
|
|
64
|
-
|
|
65
|
-
coe, ok := IsContextOverflow(apiErr)
|
|
66
|
-
if !ok {
|
|
67
|
-
t.Fatal("expected IsContextOverflow to detect Azure context overflow error")
|
|
68
|
-
}
|
|
69
|
-
if coe.ModelLimit != 8192 {
|
|
70
|
-
t.Errorf("expected ModelLimit 8192, got %d", coe.ModelLimit)
|
|
71
|
-
}
|
|
72
|
-
if coe.RequestTokens != 12000 {
|
|
73
|
-
t.Errorf("expected RequestTokens 12000, got %d", coe.RequestTokens)
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
func TestIsContextOverflow_NotOverflow(t *testing.T) {
|
|
78
|
-
tests := []error{
|
|
79
|
-
errors.New("rate limit exceeded"),
|
|
80
|
-
errors.New("timeout"),
|
|
81
|
-
NewAPIError(500, "internal server error"),
|
|
82
|
-
NewAPIError(429, "too many requests"),
|
|
83
|
-
NewMaxIterationsError(10),
|
|
84
|
-
NewMaxDepthError(5),
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
for _, err := range tests {
|
|
88
|
-
_, ok := IsContextOverflow(err)
|
|
89
|
-
if ok {
|
|
90
|
-
t.Errorf("expected IsContextOverflow to return false for: %v", err)
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
func TestIsContextOverflow_GenericError(t *testing.T) {
|
|
96
|
-
// Generic error with overflow message
|
|
97
|
-
err := fmt.Errorf("This model's maximum context length is 4096 tokens. However, your request has 5000 input tokens.")
|
|
98
|
-
coe, ok := IsContextOverflow(err)
|
|
99
|
-
if !ok {
|
|
100
|
-
t.Fatal("expected IsContextOverflow to detect overflow from generic error")
|
|
101
|
-
}
|
|
102
|
-
if coe.ModelLimit != 4096 {
|
|
103
|
-
t.Errorf("expected ModelLimit 4096, got %d", coe.ModelLimit)
|
|
104
|
-
}
|
|
105
|
-
if coe.RequestTokens != 5000 {
|
|
106
|
-
t.Errorf("expected RequestTokens 5000, got %d", coe.RequestTokens)
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
func TestIsContextOverflow_MaxTokensTooLarge_vLLM(t *testing.T) {
|
|
111
|
-
// vLLM/Ray Serve error when max_tokens exceeds remaining capacity
|
|
112
|
-
// This is the exact error from the user's production logs
|
|
113
|
-
response := `{"object":"error","message":"'max_tokens' or 'max_completion_tokens' is too large: 10000. This model's maximum context length is 32768 tokens and your request has 30168 input tokens (10000 > 32768 - 30168)","type":"BadRequestError","param":null,"code":400}`
|
|
114
|
-
apiErr := NewAPIError(400, response)
|
|
115
|
-
|
|
116
|
-
coe, ok := IsContextOverflow(apiErr)
|
|
117
|
-
if !ok {
|
|
118
|
-
t.Fatal("expected IsContextOverflow to detect max_tokens too large error")
|
|
119
|
-
}
|
|
120
|
-
if coe.ModelLimit != 32768 {
|
|
121
|
-
t.Errorf("expected ModelLimit 32768, got %d", coe.ModelLimit)
|
|
122
|
-
}
|
|
123
|
-
// Request tokens should include both input + max_tokens: 30168 + 10000 = 40168
|
|
124
|
-
if coe.RequestTokens != 40168 {
|
|
125
|
-
t.Errorf("expected RequestTokens 40168 (input 30168 + max_tokens 10000), got %d", coe.RequestTokens)
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
func TestIsContextOverflow_MaxCompletionTokensTooLarge(t *testing.T) {
|
|
130
|
-
// OpenAI newer API format with max_completion_tokens
|
|
131
|
-
response := `{"error":{"message":"'max_tokens' or 'max_completion_tokens' is too large: 5000. This model's maximum context length is 16384 tokens and your request has 14000 input tokens","type":"invalid_request_error","code":"invalid_request_error"}}`
|
|
132
|
-
apiErr := NewAPIError(400, response)
|
|
133
|
-
|
|
134
|
-
coe, ok := IsContextOverflow(apiErr)
|
|
135
|
-
if !ok {
|
|
136
|
-
t.Fatal("expected IsContextOverflow to detect max_completion_tokens too large error")
|
|
137
|
-
}
|
|
138
|
-
if coe.ModelLimit != 16384 {
|
|
139
|
-
t.Errorf("expected ModelLimit 16384, got %d", coe.ModelLimit)
|
|
140
|
-
}
|
|
141
|
-
if coe.RequestTokens != 19000 {
|
|
142
|
-
t.Errorf("expected RequestTokens 19000 (input 14000 + max_tokens 5000), got %d", coe.RequestTokens)
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
func TestGetResponseTokenBudget(t *testing.T) {
|
|
147
|
-
rlm := &RLM{
|
|
148
|
-
extraParams: map[string]interface{}{
|
|
149
|
-
"max_tokens": float64(10000),
|
|
150
|
-
},
|
|
151
|
-
}
|
|
152
|
-
obs := NewNoopObserver()
|
|
153
|
-
config := DefaultContextOverflowConfig()
|
|
154
|
-
reducer := newContextReducer(rlm, config, obs)
|
|
155
|
-
|
|
156
|
-
budget := reducer.getResponseTokenBudget()
|
|
157
|
-
if budget != 10000 {
|
|
158
|
-
t.Errorf("expected response token budget 10000, got %d", budget)
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
func TestGetResponseTokenBudget_MaxCompletionTokens(t *testing.T) {
|
|
163
|
-
rlm := &RLM{
|
|
164
|
-
extraParams: map[string]interface{}{
|
|
165
|
-
"max_completion_tokens": float64(5000),
|
|
166
|
-
},
|
|
167
|
-
}
|
|
168
|
-
obs := NewNoopObserver()
|
|
169
|
-
config := DefaultContextOverflowConfig()
|
|
170
|
-
reducer := newContextReducer(rlm, config, obs)
|
|
171
|
-
|
|
172
|
-
budget := reducer.getResponseTokenBudget()
|
|
173
|
-
if budget != 5000 {
|
|
174
|
-
t.Errorf("expected response token budget 5000, got %d", budget)
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
func TestGetResponseTokenBudget_NoMaxTokens(t *testing.T) {
|
|
179
|
-
rlm := &RLM{
|
|
180
|
-
extraParams: map[string]interface{}{
|
|
181
|
-
"temperature": 0.7,
|
|
182
|
-
},
|
|
183
|
-
}
|
|
184
|
-
obs := NewNoopObserver()
|
|
185
|
-
config := DefaultContextOverflowConfig()
|
|
186
|
-
reducer := newContextReducer(rlm, config, obs)
|
|
187
|
-
|
|
188
|
-
budget := reducer.getResponseTokenBudget()
|
|
189
|
-
if budget != 0 {
|
|
190
|
-
t.Errorf("expected response token budget 0, got %d", budget)
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
func TestMakeMapPhaseParams(t *testing.T) {
|
|
195
|
-
rlm := &RLM{
|
|
196
|
-
extraParams: map[string]interface{}{
|
|
197
|
-
"max_tokens": float64(10000),
|
|
198
|
-
"custom_llm_provider": "vllm",
|
|
199
|
-
"temperature": 0.7,
|
|
200
|
-
},
|
|
201
|
-
}
|
|
202
|
-
obs := NewNoopObserver()
|
|
203
|
-
config := DefaultContextOverflowConfig()
|
|
204
|
-
reducer := newContextReducer(rlm, config, obs)
|
|
205
|
-
|
|
206
|
-
params := reducer.makeMapPhaseParams(32768)
|
|
207
|
-
|
|
208
|
-
// max_tokens should be capped (32768/4 = 8192, but cap is 2000)
|
|
209
|
-
maxTokens, ok := params["max_tokens"].(int)
|
|
210
|
-
if !ok {
|
|
211
|
-
t.Fatal("expected max_tokens to be int in map phase params")
|
|
212
|
-
}
|
|
213
|
-
if maxTokens > 2000 {
|
|
214
|
-
t.Errorf("expected map phase max_tokens <= 2000, got %d", maxTokens)
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
// custom_llm_provider should be preserved
|
|
218
|
-
if params["custom_llm_provider"] != "vllm" {
|
|
219
|
-
t.Errorf("expected custom_llm_provider to be preserved, got %v", params["custom_llm_provider"])
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
// temperature should be preserved
|
|
223
|
-
if params["temperature"] != 0.7 {
|
|
224
|
-
t.Errorf("expected temperature to be preserved, got %v", params["temperature"])
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
func TestContextOverflowError_OverflowRatio(t *testing.T) {
|
|
229
|
-
tests := []struct {
|
|
230
|
-
limit int
|
|
231
|
-
request int
|
|
232
|
-
expected float64
|
|
233
|
-
}{
|
|
234
|
-
{32768, 40354, 1.2314}, // ~23% over
|
|
235
|
-
{4096, 8192, 2.0}, // 100% over
|
|
236
|
-
{100, 100, 1.0}, // exactly at limit
|
|
237
|
-
{0, 100, 0.0}, // zero limit edge case
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
for _, tt := range tests {
|
|
241
|
-
coe := NewContextOverflowError(400, "test", tt.limit, tt.request)
|
|
242
|
-
ratio := coe.OverflowRatio()
|
|
243
|
-
if ratio < tt.expected-0.01 || ratio > tt.expected+0.01 {
|
|
244
|
-
t.Errorf("OverflowRatio(%d, %d) = %.4f, expected ~%.4f", tt.limit, tt.request, ratio, tt.expected)
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
// ─── Token Estimation Tests ──────────────────────────────────────────────────
|
|
250
|
-
|
|
251
|
-
func TestEstimateTokens(t *testing.T) {
|
|
252
|
-
tests := []struct {
|
|
253
|
-
text string
|
|
254
|
-
minTokens int
|
|
255
|
-
maxTokens int
|
|
256
|
-
}{
|
|
257
|
-
{"", 0, 0},
|
|
258
|
-
{"hello", 1, 3},
|
|
259
|
-
{"Hello, world!", 2, 5},
|
|
260
|
-
{strings.Repeat("a", 100), 20, 40}, // 100 chars -> ~25-30 tokens
|
|
261
|
-
{strings.Repeat("a", 1000), 200, 350}, // 1000 chars -> ~250-300 tokens
|
|
262
|
-
{strings.Repeat("a", 10000), 2000, 3500}, // 10000 chars -> ~2500-3000 tokens
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
for _, tt := range tests {
|
|
266
|
-
tokens := EstimateTokens(tt.text)
|
|
267
|
-
if tokens < tt.minTokens || tokens > tt.maxTokens {
|
|
268
|
-
t.Errorf("EstimateTokens(%d chars) = %d, expected between %d and %d",
|
|
269
|
-
len(tt.text), tokens, tt.minTokens, tt.maxTokens)
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
func TestEstimateTokens_ConservativeForEnglish(t *testing.T) {
|
|
275
|
-
// For English text, OpenAI's cl100k_base gives roughly 1 token per 4 chars
|
|
276
|
-
// Our estimator should be conservative (overestimate) to prevent overflow
|
|
277
|
-
englishText := "The quick brown fox jumped over the lazy dog. This is a test of the token estimation function."
|
|
278
|
-
estimated := EstimateTokens(englishText)
|
|
279
|
-
|
|
280
|
-
// Real token count for this text is about 22 (cl100k_base)
|
|
281
|
-
// We expect our estimate to be >= actual (conservative)
|
|
282
|
-
if estimated < 20 {
|
|
283
|
-
t.Errorf("EstimateTokens for English text should be at least 20, got %d", estimated)
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
func TestEstimateMessagesTokens(t *testing.T) {
|
|
288
|
-
messages := []Message{
|
|
289
|
-
{Role: "system", Content: "You are a helpful assistant."},
|
|
290
|
-
{Role: "user", Content: "Hello, how are you?"},
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
tokens := EstimateMessagesTokens(messages)
|
|
294
|
-
// 3 (base) + 2*(4 overhead) + tokens for both messages
|
|
295
|
-
if tokens < 15 {
|
|
296
|
-
t.Errorf("EstimateMessagesTokens expected at least 15, got %d", tokens)
|
|
297
|
-
}
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
// ─── Context Chunking Tests ─────────────────────────────────────────────────
|
|
301
|
-
|
|
302
|
-
func TestChunkContext_SmallContext(t *testing.T) {
|
|
303
|
-
context := "This is a small context."
|
|
304
|
-
chunks := ChunkContext(context, 1000)
|
|
305
|
-
|
|
306
|
-
if len(chunks) != 1 {
|
|
307
|
-
t.Errorf("expected 1 chunk for small context, got %d", len(chunks))
|
|
308
|
-
}
|
|
309
|
-
if chunks[0] != context {
|
|
310
|
-
t.Error("expected chunk to be the original context")
|
|
311
|
-
}
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
func TestChunkContext_LargeContext(t *testing.T) {
|
|
315
|
-
// Create context that's ~10000 tokens (~35000 chars at 3.5 chars/token)
|
|
316
|
-
context := strings.Repeat("The quick brown fox jumped over the lazy dog. ", 700)
|
|
317
|
-
chunks := ChunkContext(context, 2000)
|
|
318
|
-
|
|
319
|
-
if len(chunks) < 2 {
|
|
320
|
-
t.Errorf("expected at least 2 chunks, got %d", len(chunks))
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
// Verify all content is covered (with overlap, total chars may exceed original)
|
|
324
|
-
totalChars := 0
|
|
325
|
-
for _, chunk := range chunks {
|
|
326
|
-
totalChars += len(chunk)
|
|
327
|
-
// Each chunk should be within the token limit
|
|
328
|
-
chunkTokens := EstimateTokens(chunk)
|
|
329
|
-
if chunkTokens > 2500 { // Allow some slack
|
|
330
|
-
t.Errorf("chunk has %d estimated tokens, expected <= 2500", chunkTokens)
|
|
331
|
-
}
|
|
332
|
-
}
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
func TestChunkContext_ParagraphBoundaries(t *testing.T) {
|
|
336
|
-
// Context with clear paragraph boundaries
|
|
337
|
-
paragraphs := []string{
|
|
338
|
-
"First paragraph with some content here.",
|
|
339
|
-
"Second paragraph with different content.",
|
|
340
|
-
"Third paragraph with more information.",
|
|
341
|
-
"Fourth paragraph wrapping up the text.",
|
|
342
|
-
}
|
|
343
|
-
context := strings.Join(paragraphs, "\n\n")
|
|
344
|
-
|
|
345
|
-
// Use a budget that forces splitting into 2 chunks
|
|
346
|
-
chunks := ChunkContext(context, 30) // ~30 tokens per chunk
|
|
347
|
-
|
|
348
|
-
if len(chunks) < 2 {
|
|
349
|
-
t.Errorf("expected at least 2 chunks, got %d", len(chunks))
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
// Verify chunks preferentially split at paragraph boundaries
|
|
353
|
-
for _, chunk := range chunks {
|
|
354
|
-
trimmed := strings.TrimSpace(chunk)
|
|
355
|
-
if len(trimmed) == 0 {
|
|
356
|
-
t.Error("got empty chunk")
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
func TestChunkContext_ZeroTokenBudget(t *testing.T) {
|
|
362
|
-
context := "Some text content here"
|
|
363
|
-
chunks := ChunkContext(context, 0)
|
|
364
|
-
// Should use default of 4000 tokens
|
|
365
|
-
if len(chunks) != 1 {
|
|
366
|
-
t.Errorf("expected 1 chunk with default budget, got %d", len(chunks))
|
|
367
|
-
}
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
// ─── parseContextOverflowMessage Tests ──────────────────────────────────────
|
|
371
|
-
|
|
372
|
-
func TestParseContextOverflowMessage(t *testing.T) {
|
|
373
|
-
tests := []struct {
|
|
374
|
-
name string
|
|
375
|
-
msg string
|
|
376
|
-
limit int
|
|
377
|
-
request int
|
|
378
|
-
ok bool
|
|
379
|
-
}{
|
|
380
|
-
{
|
|
381
|
-
name: "OpenAI standard",
|
|
382
|
-
msg: "This model's maximum context length is 32768 tokens. However, your request has 40354 input tokens.",
|
|
383
|
-
limit: 32768,
|
|
384
|
-
request: 40354,
|
|
385
|
-
ok: true,
|
|
386
|
-
},
|
|
387
|
-
{
|
|
388
|
-
name: "Azure format",
|
|
389
|
-
msg: "This model's maximum context length is 8192 tokens, however you requested 12000 tokens",
|
|
390
|
-
limit: 8192,
|
|
391
|
-
request: 12000,
|
|
392
|
-
ok: true,
|
|
393
|
-
},
|
|
394
|
-
{
|
|
395
|
-
name: "With comma-separated numbers",
|
|
396
|
-
msg: "This model's maximum context length is 32,768 tokens. However, your request has 40,354 input tokens.",
|
|
397
|
-
limit: 32768,
|
|
398
|
-
request: 40354,
|
|
399
|
-
ok: true,
|
|
400
|
-
},
|
|
401
|
-
{
|
|
402
|
-
name: "context_length_exceeded code",
|
|
403
|
-
msg: `{"code":"context_length_exceeded","message":"This model's maximum context length is 4096 tokens. Your messages resulted in 5000 tokens."}`,
|
|
404
|
-
limit: 4096,
|
|
405
|
-
request: 5000,
|
|
406
|
-
ok: true,
|
|
407
|
-
},
|
|
408
|
-
{
|
|
409
|
-
name: "Not an overflow error",
|
|
410
|
-
msg: "rate limit exceeded",
|
|
411
|
-
limit: 0,
|
|
412
|
-
request: 0,
|
|
413
|
-
ok: false,
|
|
414
|
-
},
|
|
415
|
-
{
|
|
416
|
-
name: "Generic error",
|
|
417
|
-
msg: "internal server error",
|
|
418
|
-
limit: 0,
|
|
419
|
-
request: 0,
|
|
420
|
-
ok: false,
|
|
421
|
-
},
|
|
422
|
-
{
|
|
423
|
-
name: "vLLM wrapped error",
|
|
424
|
-
msg: "Message: This model's maximum context length is 16384 tokens. However, your request has 20000 input tokens. Internal exception: ray.llm._internal.serve",
|
|
425
|
-
limit: 16384,
|
|
426
|
-
request: 20000,
|
|
427
|
-
ok: true,
|
|
428
|
-
},
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
for _, tt := range tests {
|
|
432
|
-
t.Run(tt.name, func(t *testing.T) {
|
|
433
|
-
limit, request, ok := parseContextOverflowMessage(tt.msg)
|
|
434
|
-
if ok != tt.ok {
|
|
435
|
-
t.Errorf("parseContextOverflowMessage ok=%v, expected %v", ok, tt.ok)
|
|
436
|
-
}
|
|
437
|
-
if ok && limit != tt.limit {
|
|
438
|
-
t.Errorf("limit=%d, expected %d", limit, tt.limit)
|
|
439
|
-
}
|
|
440
|
-
if ok && request != tt.request {
|
|
441
|
-
t.Errorf("request=%d, expected %d", request, tt.request)
|
|
442
|
-
}
|
|
443
|
-
})
|
|
444
|
-
}
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
// ─── extractNumber Tests ─────────────────────────────────────────────────────
|
|
448
|
-
|
|
449
|
-
func TestExtractNumber(t *testing.T) {
|
|
450
|
-
tests := []struct {
|
|
451
|
-
s string
|
|
452
|
-
prefix string
|
|
453
|
-
suffix string
|
|
454
|
-
expected int
|
|
455
|
-
}{
|
|
456
|
-
{"maximum context length is 32768 tokens", "maximum context length is ", " tokens", 32768},
|
|
457
|
-
{"your request has 40354 input tokens", "your request has ", " input tokens", 40354},
|
|
458
|
-
{"you requested 12000 tokens", "you requested ", " tokens", 12000},
|
|
459
|
-
{"limit is 32,768 tokens", "limit is ", " tokens", 32768},
|
|
460
|
-
{"no match here", "limit is ", " tokens", 0},
|
|
461
|
-
{"limit is tokens", "limit is ", " tokens", 0}, // empty number
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
for _, tt := range tests {
|
|
465
|
-
result := extractNumber(tt.s, tt.prefix, tt.suffix)
|
|
466
|
-
if result != tt.expected {
|
|
467
|
-
t.Errorf("extractNumber(%q, %q, %q) = %d, expected %d", tt.s, tt.prefix, tt.suffix, result, tt.expected)
|
|
468
|
-
}
|
|
469
|
-
}
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
// ─── ContextOverflowConfig Tests ─────────────────────────────────────────────
|
|
473
|
-
|
|
474
|
-
func TestDefaultContextOverflowConfig(t *testing.T) {
|
|
475
|
-
config := DefaultContextOverflowConfig()
|
|
476
|
-
|
|
477
|
-
if !config.Enabled {
|
|
478
|
-
t.Error("expected default Enabled to be true")
|
|
479
|
-
}
|
|
480
|
-
if config.Strategy != "mapreduce" {
|
|
481
|
-
t.Errorf("expected default Strategy 'mapreduce', got %q", config.Strategy)
|
|
482
|
-
}
|
|
483
|
-
if config.SafetyMargin != 0.15 {
|
|
484
|
-
t.Errorf("expected default SafetyMargin 0.15, got %f", config.SafetyMargin)
|
|
485
|
-
}
|
|
486
|
-
if config.MaxReductionAttempts != 3 {
|
|
487
|
-
t.Errorf("expected default MaxReductionAttempts 3, got %d", config.MaxReductionAttempts)
|
|
488
|
-
}
|
|
489
|
-
if config.MaxModelTokens != 0 {
|
|
490
|
-
t.Errorf("expected default MaxModelTokens 0 (auto-detect), got %d", config.MaxModelTokens)
|
|
491
|
-
}
|
|
492
|
-
}
|
|
493
|
-
|
|
494
|
-
// ─── ConfigFromMap Integration Tests ─────────────────────────────────────────
|
|
495
|
-
|
|
496
|
-
func TestConfigFromMap_ContextOverflow(t *testing.T) {
|
|
497
|
-
configMap := map[string]interface{}{
|
|
498
|
-
"api_key": "test-key",
|
|
499
|
-
"context_overflow": map[string]interface{}{
|
|
500
|
-
"enabled": true,
|
|
501
|
-
"max_model_tokens": float64(32768),
|
|
502
|
-
"strategy": "truncate",
|
|
503
|
-
"safety_margin": 0.2,
|
|
504
|
-
"max_reduction_attempts": float64(5),
|
|
505
|
-
},
|
|
506
|
-
}
|
|
507
|
-
|
|
508
|
-
config := ConfigFromMap(configMap)
|
|
509
|
-
|
|
510
|
-
if config.ContextOverflow == nil {
|
|
511
|
-
t.Fatal("expected ContextOverflow to be set")
|
|
512
|
-
}
|
|
513
|
-
if !config.ContextOverflow.Enabled {
|
|
514
|
-
t.Error("expected Enabled to be true")
|
|
515
|
-
}
|
|
516
|
-
if config.ContextOverflow.MaxModelTokens != 32768 {
|
|
517
|
-
t.Errorf("expected MaxModelTokens 32768, got %d", config.ContextOverflow.MaxModelTokens)
|
|
518
|
-
}
|
|
519
|
-
if config.ContextOverflow.Strategy != "truncate" {
|
|
520
|
-
t.Errorf("expected Strategy 'truncate', got %q", config.ContextOverflow.Strategy)
|
|
521
|
-
}
|
|
522
|
-
if config.ContextOverflow.SafetyMargin != 0.2 {
|
|
523
|
-
t.Errorf("expected SafetyMargin 0.2, got %f", config.ContextOverflow.SafetyMargin)
|
|
524
|
-
}
|
|
525
|
-
if config.ContextOverflow.MaxReductionAttempts != 5 {
|
|
526
|
-
t.Errorf("expected MaxReductionAttempts 5, got %d", config.ContextOverflow.MaxReductionAttempts)
|
|
527
|
-
}
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
func TestConfigFromMap_NoContextOverflow(t *testing.T) {
|
|
531
|
-
configMap := map[string]interface{}{
|
|
532
|
-
"api_key": "test-key",
|
|
533
|
-
}
|
|
534
|
-
|
|
535
|
-
config := ConfigFromMap(configMap)
|
|
536
|
-
|
|
537
|
-
if config.ContextOverflow != nil {
|
|
538
|
-
t.Error("expected ContextOverflow to be nil when not specified in map")
|
|
539
|
-
}
|
|
540
|
-
}
|
|
541
|
-
|
|
542
|
-
// ─── Truncation Strategy Tests ───────────────────────────────────────────────
|
|
543
|
-
|
|
544
|
-
func TestReduceByTruncation(t *testing.T) {
|
|
545
|
-
// Create a large context
|
|
546
|
-
context := strings.Repeat("This is a sentence. ", 500) // ~10000 chars
|
|
547
|
-
|
|
548
|
-
obs := NewNoopObserver()
|
|
549
|
-
config := ContextOverflowConfig{
|
|
550
|
-
Enabled: true,
|
|
551
|
-
Strategy: "truncate",
|
|
552
|
-
SafetyMargin: 0.15,
|
|
553
|
-
MaxReductionAttempts: 3,
|
|
554
|
-
}
|
|
555
|
-
|
|
556
|
-
rlmEngine := &RLM{
|
|
557
|
-
model: "test-model",
|
|
558
|
-
observer: obs,
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
reducer := newContextReducer(rlmEngine, config, obs)
|
|
562
|
-
result, err := reducer.reduceByTruncation(context, 2000, 500)
|
|
563
|
-
|
|
564
|
-
if err != nil {
|
|
565
|
-
t.Fatalf("unexpected error: %v", err)
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
if len(result) >= len(context) {
|
|
569
|
-
t.Errorf("expected truncated context to be shorter: %d >= %d", len(result), len(context))
|
|
570
|
-
}
|
|
571
|
-
|
|
572
|
-
// Should contain the truncation marker
|
|
573
|
-
if !strings.Contains(result, "[... context truncated") {
|
|
574
|
-
t.Error("expected truncation marker in result")
|
|
575
|
-
}
|
|
576
|
-
|
|
577
|
-
// Should preserve beginning and end
|
|
578
|
-
if !strings.HasPrefix(result, "This is") {
|
|
579
|
-
t.Error("expected result to start with original beginning")
|
|
580
|
-
}
|
|
581
|
-
if !strings.HasSuffix(strings.TrimSpace(result), "sentence. ") && !strings.HasSuffix(strings.TrimSpace(result), "sentence.") {
|
|
582
|
-
// Just check it has some of the end content
|
|
583
|
-
if !strings.Contains(result[len(result)/2:], "sentence") {
|
|
584
|
-
t.Error("expected result to contain end content")
|
|
585
|
-
}
|
|
586
|
-
}
|
|
587
|
-
}
|
|
588
|
-
|
|
589
|
-
// ─── findBreakPoint Tests ────────────────────────────────────────────────────
|
|
590
|
-
|
|
591
|
-
func TestFindBreakPoint(t *testing.T) {
|
|
592
|
-
tests := []struct {
|
|
593
|
-
name string
|
|
594
|
-
text string
|
|
595
|
-
start int
|
|
596
|
-
end int
|
|
597
|
-
check func(int) bool
|
|
598
|
-
}{
|
|
599
|
-
{
|
|
600
|
-
name: "Prefers paragraph break",
|
|
601
|
-
text: "First paragraph.\n\nSecond paragraph.\n\nThird paragraph.",
|
|
602
|
-
start: 0,
|
|
603
|
-
end: 20,
|
|
604
|
-
check: func(bp int) bool {
|
|
605
|
-
return bp == 18 // After first \n\n (search window reaches back to position 16)
|
|
606
|
-
},
|
|
607
|
-
},
|
|
608
|
-
{
|
|
609
|
-
name: "Falls back to line break",
|
|
610
|
-
text: "Line one.\nLine two.\nLine three.",
|
|
611
|
-
start: 0,
|
|
612
|
-
end: 20,
|
|
613
|
-
check: func(bp int) bool {
|
|
614
|
-
return bp == 10 || bp == 20 // After a \n
|
|
615
|
-
},
|
|
616
|
-
},
|
|
617
|
-
{
|
|
618
|
-
name: "End of text",
|
|
619
|
-
text: "Short text",
|
|
620
|
-
start: 0,
|
|
621
|
-
end: 100,
|
|
622
|
-
check: func(bp int) bool {
|
|
623
|
-
return bp == 10 // End of text
|
|
624
|
-
},
|
|
625
|
-
},
|
|
626
|
-
}
|
|
627
|
-
|
|
628
|
-
for _, tt := range tests {
|
|
629
|
-
t.Run(tt.name, func(t *testing.T) {
|
|
630
|
-
bp := findBreakPoint(tt.text, tt.start, tt.end)
|
|
631
|
-
if !tt.check(bp) {
|
|
632
|
-
t.Errorf("findBreakPoint returned %d", bp)
|
|
633
|
-
}
|
|
634
|
-
})
|
|
635
|
-
}
|
|
636
|
-
}
|
|
637
|
-
|
|
638
|
-
// ─── Error Chain Tests ───────────────────────────────────────────────────────
|
|
639
|
-
|
|
640
|
-
func TestContextOverflowError_ErrorChain(t *testing.T) {
|
|
641
|
-
coe := NewContextOverflowError(400, "test response", 32768, 40354)
|
|
642
|
-
|
|
643
|
-
// Verify the embedded types are accessible
|
|
644
|
-
if coe.APIError == nil {
|
|
645
|
-
t.Fatal("expected embedded APIError to be non-nil")
|
|
646
|
-
}
|
|
647
|
-
if coe.StatusCode != 400 {
|
|
648
|
-
t.Errorf("expected status 400, got %d", coe.StatusCode)
|
|
649
|
-
}
|
|
650
|
-
if coe.RLMError == nil {
|
|
651
|
-
t.Fatal("expected embedded RLMError to be non-nil")
|
|
652
|
-
}
|
|
653
|
-
|
|
654
|
-
// Verify errors.As finds ContextOverflowError itself
|
|
655
|
-
var coe2 *ContextOverflowError
|
|
656
|
-
if !errors.As(coe, &coe2) {
|
|
657
|
-
t.Error("expected errors.As to find ContextOverflowError")
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
// Verify errors.As finds APIError through Unwrap chain
|
|
661
|
-
var apiErr *APIError
|
|
662
|
-
if !errors.As(coe, &apiErr) {
|
|
663
|
-
t.Error("expected errors.As to find APIError in chain")
|
|
664
|
-
}
|
|
665
|
-
|
|
666
|
-
// Test error message
|
|
667
|
-
msg := coe.Error()
|
|
668
|
-
if !strings.Contains(msg, "context overflow") {
|
|
669
|
-
t.Errorf("expected error message to contain 'context overflow', got: %s", msg)
|
|
670
|
-
}
|
|
671
|
-
if !strings.Contains(msg, "32768") {
|
|
672
|
-
t.Errorf("expected error message to contain model limit, got: %s", msg)
|
|
673
|
-
}
|
|
674
|
-
}
|
|
675
|
-
|
|
676
|
-
// ─── RLM Integration Tests ──────────────────────────────────────────────────
|
|
677
|
-
|
|
678
|
-
func TestRLMDefaultContextOverflow(t *testing.T) {
|
|
679
|
-
// Creating an RLM without explicit context_overflow should enable it by default
|
|
680
|
-
config := Config{
|
|
681
|
-
APIKey: "test",
|
|
682
|
-
MaxDepth: 5,
|
|
683
|
-
MaxIterations: 30,
|
|
684
|
-
}
|
|
685
|
-
|
|
686
|
-
engine := New("test-model", config)
|
|
687
|
-
|
|
688
|
-
if engine.contextOverflow == nil {
|
|
689
|
-
t.Fatal("expected contextOverflow to be set by default")
|
|
690
|
-
}
|
|
691
|
-
if !engine.contextOverflow.Enabled {
|
|
692
|
-
t.Error("expected contextOverflow to be enabled by default")
|
|
693
|
-
}
|
|
694
|
-
if engine.contextOverflow.Strategy != "mapreduce" {
|
|
695
|
-
t.Errorf("expected default strategy 'mapreduce', got %q", engine.contextOverflow.Strategy)
|
|
696
|
-
}
|
|
697
|
-
}
|
|
698
|
-
|
|
699
|
-
func TestRLMExplicitContextOverflow(t *testing.T) {
|
|
700
|
-
config := Config{
|
|
701
|
-
APIKey: "test",
|
|
702
|
-
ContextOverflow: &ContextOverflowConfig{
|
|
703
|
-
Enabled: true,
|
|
704
|
-
MaxModelTokens: 16384,
|
|
705
|
-
Strategy: "truncate",
|
|
706
|
-
},
|
|
707
|
-
}
|
|
708
|
-
|
|
709
|
-
engine := New("test-model", config)
|
|
710
|
-
|
|
711
|
-
if engine.contextOverflow.MaxModelTokens != 16384 {
|
|
712
|
-
t.Errorf("expected MaxModelTokens 16384, got %d", engine.contextOverflow.MaxModelTokens)
|
|
713
|
-
}
|
|
714
|
-
if engine.contextOverflow.Strategy != "truncate" {
|
|
715
|
-
t.Errorf("expected strategy 'truncate', got %q", engine.contextOverflow.Strategy)
|
|
716
|
-
}
|
|
717
|
-
}
|
|
718
|
-
|
|
719
|
-
// ─── New Strategy Config Tests ──────────────────────────────────────────────
|
|
720
|
-
|
|
721
|
-
func TestRLMContextOverflow_TFIDFStrategy(t *testing.T) {
|
|
722
|
-
config := Config{
|
|
723
|
-
APIKey: "test",
|
|
724
|
-
ContextOverflow: &ContextOverflowConfig{
|
|
725
|
-
Enabled: true,
|
|
726
|
-
Strategy: "tfidf",
|
|
727
|
-
},
|
|
728
|
-
}
|
|
729
|
-
engine := New("test-model", config)
|
|
730
|
-
if engine.contextOverflow.Strategy != "tfidf" {
|
|
731
|
-
t.Errorf("expected strategy 'tfidf', got %q", engine.contextOverflow.Strategy)
|
|
732
|
-
}
|
|
733
|
-
}
|
|
734
|
-
|
|
735
|
-
func TestRLMContextOverflow_TextRankStrategy(t *testing.T) {
|
|
736
|
-
config := Config{
|
|
737
|
-
APIKey: "test",
|
|
738
|
-
ContextOverflow: &ContextOverflowConfig{
|
|
739
|
-
Enabled: true,
|
|
740
|
-
Strategy: "textrank",
|
|
741
|
-
},
|
|
742
|
-
}
|
|
743
|
-
engine := New("test-model", config)
|
|
744
|
-
if engine.contextOverflow.Strategy != "textrank" {
|
|
745
|
-
t.Errorf("expected strategy 'textrank', got %q", engine.contextOverflow.Strategy)
|
|
746
|
-
}
|
|
747
|
-
}
|
|
748
|
-
|
|
749
|
-
func TestRLMContextOverflow_RefineStrategy(t *testing.T) {
|
|
750
|
-
config := Config{
|
|
751
|
-
APIKey: "test",
|
|
752
|
-
ContextOverflow: &ContextOverflowConfig{
|
|
753
|
-
Enabled: true,
|
|
754
|
-
Strategy: "refine",
|
|
755
|
-
},
|
|
756
|
-
}
|
|
757
|
-
engine := New("test-model", config)
|
|
758
|
-
if engine.contextOverflow.Strategy != "refine" {
|
|
759
|
-
t.Errorf("expected strategy 'refine', got %q", engine.contextOverflow.Strategy)
|
|
760
|
-
}
|
|
761
|
-
}
|
|
762
|
-
|
|
763
|
-
func TestConfigFromMap_NewStrategies(t *testing.T) {
|
|
764
|
-
for _, strategy := range []string{"tfidf", "textrank", "refine"} {
|
|
765
|
-
configMap := map[string]interface{}{
|
|
766
|
-
"api_key": "test-key",
|
|
767
|
-
"context_overflow": map[string]interface{}{
|
|
768
|
-
"enabled": true,
|
|
769
|
-
"strategy": strategy,
|
|
770
|
-
},
|
|
771
|
-
}
|
|
772
|
-
config := ConfigFromMap(configMap)
|
|
773
|
-
if config.ContextOverflow == nil {
|
|
774
|
-
t.Fatalf("expected ContextOverflow for strategy %q", strategy)
|
|
775
|
-
}
|
|
776
|
-
if config.ContextOverflow.Strategy != strategy {
|
|
777
|
-
t.Errorf("expected strategy %q, got %q", strategy, config.ContextOverflow.Strategy)
|
|
778
|
-
}
|
|
779
|
-
}
|
|
780
|
-
}
|
|
781
|
-
|
|
782
|
-
// ─── TF-IDF Reducer Integration Tests ───────────────────────────────────────
|
|
783
|
-
|
|
784
|
-
func TestReduceByTFIDF(t *testing.T) {
|
|
785
|
-
// Build large context with multiple sentences
|
|
786
|
-
sentences := []string{
|
|
787
|
-
"The quarterly earnings report shows revenue of $4.2 billion.",
|
|
788
|
-
"Weather conditions are expected to be mild this week.",
|
|
789
|
-
"The merger was approved by regulatory authorities in March.",
|
|
790
|
-
"Traffic congestion increased 15% during rush hour.",
|
|
791
|
-
"Operating margins improved to 23.5% from 19.8% last year.",
|
|
792
|
-
"The local park received new playground equipment.",
|
|
793
|
-
"Customer retention rate reached 94% this quarter.",
|
|
794
|
-
"The movie earned $150 million at the box office opening weekend.",
|
|
795
|
-
"Year-over-year growth accelerated to 31% in Q4.",
|
|
796
|
-
"The recipe calls for two cups of flour and one egg.",
|
|
797
|
-
}
|
|
798
|
-
context := strings.Join(sentences, " ")
|
|
799
|
-
|
|
800
|
-
obs := NewNoopObserver()
|
|
801
|
-
config := ContextOverflowConfig{
|
|
802
|
-
Enabled: true,
|
|
803
|
-
Strategy: "tfidf",
|
|
804
|
-
SafetyMargin: 0.15,
|
|
805
|
-
}
|
|
806
|
-
rlmEngine := &RLM{model: "test-model", observer: obs}
|
|
807
|
-
reducer := newContextReducer(rlmEngine, config, obs)
|
|
808
|
-
|
|
809
|
-
result, err := reducer.reduceByTFIDF(context, 50, 10) // Very tight budget
|
|
810
|
-
if err != nil {
|
|
811
|
-
t.Fatalf("unexpected error: %v", err)
|
|
812
|
-
}
|
|
813
|
-
|
|
814
|
-
if len(result) >= len(context) {
|
|
815
|
-
t.Errorf("expected reduced context, got same or larger: %d >= %d", len(result), len(context))
|
|
816
|
-
}
|
|
817
|
-
if len(result) == 0 {
|
|
818
|
-
t.Error("expected non-empty result")
|
|
819
|
-
}
|
|
820
|
-
}
|
|
821
|
-
|
|
822
|
-
// ─── TextRank Reducer Integration Tests ─────────────────────────────────────
|
|
823
|
-
|
|
824
|
-
func TestReduceByTextRank(t *testing.T) {
|
|
825
|
-
sentences := []string{
|
|
826
|
-
"Machine learning algorithms process large datasets.",
|
|
827
|
-
"Deep learning models use neural network architectures.",
|
|
828
|
-
"Natural language processing handles text data efficiently.",
|
|
829
|
-
"The garden needs watering twice a week.",
|
|
830
|
-
"Transformer models revolutionized NLP tasks.",
|
|
831
|
-
"Computer vision detects objects in images.",
|
|
832
|
-
"The recipe requires fresh ingredients only.",
|
|
833
|
-
}
|
|
834
|
-
context := strings.Join(sentences, " ")
|
|
835
|
-
|
|
836
|
-
obs := NewNoopObserver()
|
|
837
|
-
config := ContextOverflowConfig{
|
|
838
|
-
Enabled: true,
|
|
839
|
-
Strategy: "textrank",
|
|
840
|
-
SafetyMargin: 0.15,
|
|
841
|
-
}
|
|
842
|
-
rlmEngine := &RLM{model: "test-model", observer: obs}
|
|
843
|
-
reducer := newContextReducer(rlmEngine, config, obs)
|
|
844
|
-
|
|
845
|
-
result, err := reducer.reduceByTextRank(context, 60, 10)
|
|
846
|
-
if err != nil {
|
|
847
|
-
t.Fatalf("unexpected error: %v", err)
|
|
848
|
-
}
|
|
849
|
-
|
|
850
|
-
if len(result) >= len(context) {
|
|
851
|
-
t.Errorf("expected reduced context: %d >= %d", len(result), len(context))
|
|
852
|
-
}
|
|
853
|
-
if len(result) == 0 {
|
|
854
|
-
t.Error("expected non-empty result")
|
|
855
|
-
}
|
|
856
|
-
}
|
|
857
|
-
|
|
858
|
-
// ─── Strategy Dispatch Tests ────────────────────────────────────────────────
|
|
859
|
-
|
|
860
|
-
func TestReduceForCompletion_DispatchesTFIDF(t *testing.T) {
|
|
861
|
-
// Large context that will need chunking
|
|
862
|
-
context := strings.Repeat("Sentence about machine learning algorithms. ", 100)
|
|
863
|
-
|
|
864
|
-
obs := NewNoopObserver()
|
|
865
|
-
config := ContextOverflowConfig{
|
|
866
|
-
Enabled: true,
|
|
867
|
-
Strategy: "tfidf",
|
|
868
|
-
SafetyMargin: 0.15,
|
|
869
|
-
}
|
|
870
|
-
rlmEngine := &RLM{model: "test-model", observer: obs}
|
|
871
|
-
reducer := newContextReducer(rlmEngine, config, obs)
|
|
872
|
-
|
|
873
|
-
result, err := reducer.ReduceForCompletion("What about ML?", context, 500)
|
|
874
|
-
if err != nil {
|
|
875
|
-
t.Fatalf("unexpected error: %v", err)
|
|
876
|
-
}
|
|
877
|
-
if len(result) >= len(context) {
|
|
878
|
-
t.Errorf("expected reduced context for tfidf strategy")
|
|
879
|
-
}
|
|
880
|
-
}
|
|
881
|
-
|
|
882
|
-
func TestReduceForCompletion_DispatchesTextRank(t *testing.T) {
|
|
883
|
-
context := strings.Repeat("Deep learning models process data efficiently. ", 100)
|
|
884
|
-
|
|
885
|
-
obs := NewNoopObserver()
|
|
886
|
-
config := ContextOverflowConfig{
|
|
887
|
-
Enabled: true,
|
|
888
|
-
Strategy: "textrank",
|
|
889
|
-
SafetyMargin: 0.15,
|
|
890
|
-
}
|
|
891
|
-
rlmEngine := &RLM{model: "test-model", observer: obs}
|
|
892
|
-
reducer := newContextReducer(rlmEngine, config, obs)
|
|
893
|
-
|
|
894
|
-
result, err := reducer.ReduceForCompletion("What about DL?", context, 500)
|
|
895
|
-
if err != nil {
|
|
896
|
-
t.Fatalf("unexpected error: %v", err)
|
|
897
|
-
}
|
|
898
|
-
if len(result) >= len(context) {
|
|
899
|
-
t.Errorf("expected reduced context for textrank strategy")
|
|
900
|
-
}
|
|
901
|
-
}
|
|
902
|
-
|
|
903
|
-
// ─── Model Token Limits Tests ────────────────────────────────────────────────
|
|
904
|
-
|
|
905
|
-
func TestLookupModelTokenLimit_ExactMatch(t *testing.T) {
|
|
906
|
-
tests := []struct {
|
|
907
|
-
model string
|
|
908
|
-
expected int
|
|
909
|
-
}{
|
|
910
|
-
{"gpt-4o", 128000},
|
|
911
|
-
{"gpt-4o-mini", 128000},
|
|
912
|
-
{"gpt-4", 8192},
|
|
913
|
-
{"gpt-4-32k", 32768},
|
|
914
|
-
{"gpt-3.5-turbo", 16385},
|
|
915
|
-
{"claude-3-opus", 200000},
|
|
916
|
-
{"claude-sonnet-4", 200000},
|
|
917
|
-
{"mistral-7b", 32768},
|
|
918
|
-
}
|
|
919
|
-
|
|
920
|
-
for _, tt := range tests {
|
|
921
|
-
limit := LookupModelTokenLimit(tt.model)
|
|
922
|
-
if limit != tt.expected {
|
|
923
|
-
t.Errorf("LookupModelTokenLimit(%q) = %d, expected %d", tt.model, limit, tt.expected)
|
|
924
|
-
}
|
|
925
|
-
}
|
|
926
|
-
}
|
|
927
|
-
|
|
928
|
-
func TestLookupModelTokenLimit_PrefixMatch(t *testing.T) {
|
|
929
|
-
// Versioned model names should match by prefix
|
|
930
|
-
tests := []struct {
|
|
931
|
-
model string
|
|
932
|
-
expected int
|
|
933
|
-
}{
|
|
934
|
-
{"gpt-4o-mini-2024-07-18", 128000},
|
|
935
|
-
{"gpt-4o-2024-05-13", 128000},
|
|
936
|
-
{"claude-3-opus-20240229", 200000},
|
|
937
|
-
{"mistral-7b-instruct-v0.2", 32768},
|
|
938
|
-
}
|
|
939
|
-
|
|
940
|
-
for _, tt := range tests {
|
|
941
|
-
limit := LookupModelTokenLimit(tt.model)
|
|
942
|
-
if limit != tt.expected {
|
|
943
|
-
t.Errorf("LookupModelTokenLimit(%q) = %d, expected %d", tt.model, limit, tt.expected)
|
|
944
|
-
}
|
|
945
|
-
}
|
|
946
|
-
}
|
|
947
|
-
|
|
948
|
-
func TestLookupModelTokenLimit_Unknown(t *testing.T) {
|
|
949
|
-
limit := LookupModelTokenLimit("completely-unknown-model-xyz")
|
|
950
|
-
if limit != 0 {
|
|
951
|
-
t.Errorf("expected 0 for unknown model, got %d", limit)
|
|
952
|
-
}
|
|
953
|
-
}
|
|
954
|
-
|
|
955
|
-
func TestLookupModelTokenLimit_CaseInsensitive(t *testing.T) {
|
|
956
|
-
limit := LookupModelTokenLimit("GPT-4O-MINI")
|
|
957
|
-
if limit != 128000 {
|
|
958
|
-
t.Errorf("expected 128000 for case-insensitive match, got %d", limit)
|
|
959
|
-
}
|
|
960
|
-
}
|
|
961
|
-
|
|
962
|
-
func TestGetModelTokenLimit_ConfigOverride(t *testing.T) {
|
|
963
|
-
engine := New("gpt-4o-mini", Config{
|
|
964
|
-
APIKey: "test",
|
|
965
|
-
ContextOverflow: &ContextOverflowConfig{
|
|
966
|
-
Enabled: true,
|
|
967
|
-
MaxModelTokens: 16384,
|
|
968
|
-
},
|
|
969
|
-
})
|
|
970
|
-
|
|
971
|
-
limit := engine.getModelTokenLimit()
|
|
972
|
-
if limit != 16384 {
|
|
973
|
-
t.Errorf("expected config override 16384, got %d", limit)
|
|
974
|
-
}
|
|
975
|
-
}
|
|
976
|
-
|
|
977
|
-
func TestGetModelTokenLimit_ModelLookup(t *testing.T) {
|
|
978
|
-
engine := New("gpt-4o-mini", Config{
|
|
979
|
-
APIKey: "test",
|
|
980
|
-
})
|
|
981
|
-
|
|
982
|
-
limit := engine.getModelTokenLimit()
|
|
983
|
-
if limit != 128000 {
|
|
984
|
-
t.Errorf("expected model lookup 128000, got %d", limit)
|
|
985
|
-
}
|
|
986
|
-
}
|
|
987
|
-
|
|
988
|
-
func TestGetModelTokenLimit_UnknownModel(t *testing.T) {
|
|
989
|
-
engine := New("custom-local-model", Config{
|
|
990
|
-
APIKey: "test",
|
|
991
|
-
})
|
|
992
|
-
|
|
993
|
-
limit := engine.getModelTokenLimit()
|
|
994
|
-
if limit != 0 {
|
|
995
|
-
t.Errorf("expected 0 for unknown model, got %d", limit)
|
|
996
|
-
}
|
|
997
|
-
}
|
|
998
|
-
|
|
999
|
-
// ─── Pre-emptive Overflow Tests ──────────────────────────────────────────────
|
|
1000
|
-
|
|
1001
|
-
func TestPreemptiveReduceContext_SmallContext(t *testing.T) {
|
|
1002
|
-
engine := New("gpt-4o-mini", Config{
|
|
1003
|
-
APIKey: "test",
|
|
1004
|
-
})
|
|
1005
|
-
|
|
1006
|
-
// Small context should pass through unchanged
|
|
1007
|
-
context := "This is a small context that easily fits."
|
|
1008
|
-
reduced, wasReduced, err := engine.PreemptiveReduceContext("What is this?", context, 500)
|
|
1009
|
-
if err != nil {
|
|
1010
|
-
t.Fatalf("unexpected error: %v", err)
|
|
1011
|
-
}
|
|
1012
|
-
if wasReduced {
|
|
1013
|
-
t.Error("expected no reduction for small context")
|
|
1014
|
-
}
|
|
1015
|
-
if reduced != context {
|
|
1016
|
-
t.Error("expected context to be unchanged")
|
|
1017
|
-
}
|
|
1018
|
-
}
|
|
1019
|
-
|
|
1020
|
-
func TestPreemptiveReduceContext_LargeContext(t *testing.T) {
|
|
1021
|
-
engine := New("gpt-4o-mini", Config{
|
|
1022
|
-
APIKey: "test",
|
|
1023
|
-
ContextOverflow: &ContextOverflowConfig{
|
|
1024
|
-
Enabled: true,
|
|
1025
|
-
MaxModelTokens: 1000, // Very small limit to force overflow
|
|
1026
|
-
Strategy: "truncate",
|
|
1027
|
-
SafetyMargin: 0.15,
|
|
1028
|
-
},
|
|
1029
|
-
})
|
|
1030
|
-
|
|
1031
|
-
// Create large context that exceeds the 1000 token limit
|
|
1032
|
-
context := strings.Repeat("The revenue for Q4 was $4.2 billion, representing 23% year-over-year growth. ", 100)
|
|
1033
|
-
|
|
1034
|
-
reduced, wasReduced, err := engine.PreemptiveReduceContext("Summarize revenue", context, 300)
|
|
1035
|
-
if err != nil {
|
|
1036
|
-
t.Fatalf("unexpected error: %v", err)
|
|
1037
|
-
}
|
|
1038
|
-
if !wasReduced {
|
|
1039
|
-
t.Error("expected context to be reduced")
|
|
1040
|
-
}
|
|
1041
|
-
if len(reduced) >= len(context) {
|
|
1042
|
-
t.Errorf("expected reduced context to be shorter: %d >= %d", len(reduced), len(context))
|
|
1043
|
-
}
|
|
1044
|
-
}
|
|
1045
|
-
|
|
1046
|
-
func TestPreemptiveReduceContext_DisabledOverflow(t *testing.T) {
|
|
1047
|
-
engine := New("gpt-4o-mini", Config{
|
|
1048
|
-
APIKey: "test",
|
|
1049
|
-
ContextOverflow: &ContextOverflowConfig{
|
|
1050
|
-
Enabled: false,
|
|
1051
|
-
},
|
|
1052
|
-
})
|
|
1053
|
-
|
|
1054
|
-
context := strings.Repeat("Large content. ", 10000)
|
|
1055
|
-
reduced, wasReduced, err := engine.PreemptiveReduceContext("query", context, 500)
|
|
1056
|
-
if err != nil {
|
|
1057
|
-
t.Fatalf("unexpected error: %v", err)
|
|
1058
|
-
}
|
|
1059
|
-
if wasReduced {
|
|
1060
|
-
t.Error("expected no reduction when overflow is disabled")
|
|
1061
|
-
}
|
|
1062
|
-
if reduced != context {
|
|
1063
|
-
t.Error("expected context unchanged when overflow is disabled")
|
|
1064
|
-
}
|
|
1065
|
-
}
|
|
1066
|
-
|
|
1067
|
-
func TestPreemptiveReduceContext_UnknownModel(t *testing.T) {
|
|
1068
|
-
engine := New("custom-local-model", Config{
|
|
1069
|
-
APIKey: "test",
|
|
1070
|
-
})
|
|
1071
|
-
|
|
1072
|
-
// Unknown model with no config override → no pre-emptive check
|
|
1073
|
-
context := strings.Repeat("Large content. ", 10000)
|
|
1074
|
-
reduced, wasReduced, err := engine.PreemptiveReduceContext("query", context, 500)
|
|
1075
|
-
if err != nil {
|
|
1076
|
-
t.Fatalf("unexpected error: %v", err)
|
|
1077
|
-
}
|
|
1078
|
-
if wasReduced {
|
|
1079
|
-
t.Error("expected no reduction for unknown model with no config limit")
|
|
1080
|
-
}
|
|
1081
|
-
if reduced != context {
|
|
1082
|
-
t.Error("expected context unchanged")
|
|
1083
|
-
}
|
|
1084
|
-
}
|
|
1085
|
-
|
|
1086
|
-
func TestPreemptiveReduceContext_AccountsForResponseBudget(t *testing.T) {
|
|
1087
|
-
// With a high max_tokens, even moderate context should trigger reduction
|
|
1088
|
-
engine := New("gpt-4o-mini", Config{
|
|
1089
|
-
APIKey: "test",
|
|
1090
|
-
ContextOverflow: &ContextOverflowConfig{
|
|
1091
|
-
Enabled: true,
|
|
1092
|
-
MaxModelTokens: 2000,
|
|
1093
|
-
Strategy: "truncate",
|
|
1094
|
-
SafetyMargin: 0.15,
|
|
1095
|
-
},
|
|
1096
|
-
ExtraParams: map[string]interface{}{
|
|
1097
|
-
"max_tokens": float64(1000), // Large response budget
|
|
1098
|
-
},
|
|
1099
|
-
})
|
|
1100
|
-
|
|
1101
|
-
// Context of ~500 tokens + max_tokens 1000 + overhead = exceeds 2000
|
|
1102
|
-
context := strings.Repeat("Revenue data: the company earned $4.2B in Q4 fiscal year. ", 30)
|
|
1103
|
-
|
|
1104
|
-
reduced, wasReduced, err := engine.PreemptiveReduceContext("Summarize", context, 300)
|
|
1105
|
-
if err != nil {
|
|
1106
|
-
t.Fatalf("unexpected error: %v", err)
|
|
1107
|
-
}
|
|
1108
|
-
if !wasReduced {
|
|
1109
|
-
t.Error("expected reduction when response budget + context exceeds limit")
|
|
1110
|
-
}
|
|
1111
|
-
if len(reduced) >= len(context) {
|
|
1112
|
-
t.Errorf("expected reduced context: %d >= %d", len(reduced), len(context))
|
|
1113
|
-
}
|
|
1114
|
-
}
|
|
1115
|
-
|
|
1116
|
-
func TestPreemptiveReduceContext_TFIDFStrategy(t *testing.T) {
|
|
1117
|
-
engine := New("gpt-4o-mini", Config{
|
|
1118
|
-
APIKey: "test",
|
|
1119
|
-
ContextOverflow: &ContextOverflowConfig{
|
|
1120
|
-
Enabled: true,
|
|
1121
|
-
MaxModelTokens: 500,
|
|
1122
|
-
Strategy: "tfidf",
|
|
1123
|
-
SafetyMargin: 0.15,
|
|
1124
|
-
},
|
|
1125
|
-
})
|
|
1126
|
-
|
|
1127
|
-
context := strings.Repeat("Machine learning models process large datasets effectively. ", 100)
|
|
1128
|
-
|
|
1129
|
-
reduced, wasReduced, err := engine.PreemptiveReduceContext("Tell me about ML", context, 200)
|
|
1130
|
-
if err != nil {
|
|
1131
|
-
t.Fatalf("unexpected error: %v", err)
|
|
1132
|
-
}
|
|
1133
|
-
if !wasReduced {
|
|
1134
|
-
t.Error("expected reduction with tfidf strategy")
|
|
1135
|
-
}
|
|
1136
|
-
if len(reduced) >= len(context) {
|
|
1137
|
-
t.Errorf("expected shorter context: %d >= %d", len(reduced), len(context))
|
|
1138
|
-
}
|
|
1139
|
-
}
|
|
1140
|
-
|
|
1141
|
-
func TestPreemptiveReduceContext_TextRankStrategy(t *testing.T) {
|
|
1142
|
-
engine := New("gpt-4o-mini", Config{
|
|
1143
|
-
APIKey: "test",
|
|
1144
|
-
ContextOverflow: &ContextOverflowConfig{
|
|
1145
|
-
Enabled: true,
|
|
1146
|
-
MaxModelTokens: 500,
|
|
1147
|
-
Strategy: "textrank",
|
|
1148
|
-
SafetyMargin: 0.15,
|
|
1149
|
-
},
|
|
1150
|
-
})
|
|
1151
|
-
|
|
1152
|
-
context := strings.Repeat("Neural networks are powerful computation models. ", 100)
|
|
1153
|
-
|
|
1154
|
-
reduced, wasReduced, err := engine.PreemptiveReduceContext("Explain neural nets", context, 200)
|
|
1155
|
-
if err != nil {
|
|
1156
|
-
t.Fatalf("unexpected error: %v", err)
|
|
1157
|
-
}
|
|
1158
|
-
if !wasReduced {
|
|
1159
|
-
t.Error("expected reduction with textrank strategy")
|
|
1160
|
-
}
|
|
1161
|
-
if len(reduced) >= len(context) {
|
|
1162
|
-
t.Errorf("expected shorter context: %d >= %d", len(reduced), len(context))
|
|
1163
|
-
}
|
|
1164
|
-
}
|
|
1165
|
-
|
|
1166
|
-
func TestGetResponseTokenBudget_RLMMethod(t *testing.T) {
|
|
1167
|
-
engine := &RLM{
|
|
1168
|
-
extraParams: map[string]interface{}{
|
|
1169
|
-
"max_tokens": float64(5000),
|
|
1170
|
-
},
|
|
1171
|
-
}
|
|
1172
|
-
budget := engine.getResponseTokenBudget()
|
|
1173
|
-
if budget != 5000 {
|
|
1174
|
-
t.Errorf("expected 5000, got %d", budget)
|
|
1175
|
-
}
|
|
1176
|
-
}
|
|
1177
|
-
|
|
1178
|
-
func TestGetResponseTokenBudget_MaxCompletionTokensPreferred(t *testing.T) {
|
|
1179
|
-
engine := &RLM{
|
|
1180
|
-
extraParams: map[string]interface{}{
|
|
1181
|
-
"max_tokens": float64(5000),
|
|
1182
|
-
"max_completion_tokens": float64(8000),
|
|
1183
|
-
},
|
|
1184
|
-
}
|
|
1185
|
-
budget := engine.getResponseTokenBudget()
|
|
1186
|
-
if budget != 8000 {
|
|
1187
|
-
t.Errorf("expected max_completion_tokens=8000 preferred, got %d", budget)
|
|
1188
|
-
}
|
|
1189
|
-
}
|
|
1190
|
-
|
|
1191
|
-
func TestGetResponseTokenBudget_NoParams(t *testing.T) {
|
|
1192
|
-
engine := &RLM{
|
|
1193
|
-
extraParams: map[string]interface{}{
|
|
1194
|
-
"temperature": 0.7,
|
|
1195
|
-
},
|
|
1196
|
-
}
|
|
1197
|
-
budget := engine.getResponseTokenBudget()
|
|
1198
|
-
if budget != 0 {
|
|
1199
|
-
t.Errorf("expected 0 when no max_tokens set, got %d", budget)
|
|
1200
|
-
}
|
|
1201
|
-
}
|
|
1202
|
-
|
|
1203
|
-
// ─── Message Pruning Tests ───────────────────────────────────────────────────
|
|
1204
|
-
|
|
1205
|
-
func TestPruneMessages_SmallHistory(t *testing.T) {
|
|
1206
|
-
messages := []Message{
|
|
1207
|
-
{Role: "system", Content: "You are helpful."},
|
|
1208
|
-
{Role: "user", Content: "Hello"},
|
|
1209
|
-
{Role: "assistant", Content: "Hi there!"},
|
|
1210
|
-
}
|
|
1211
|
-
|
|
1212
|
-
result := pruneMessages(messages, 100)
|
|
1213
|
-
if len(result) != 3 {
|
|
1214
|
-
t.Errorf("expected 3 messages (no pruning needed), got %d", len(result))
|
|
1215
|
-
}
|
|
1216
|
-
}
|
|
1217
|
-
|
|
1218
|
-
func TestPruneMessages_PreservesSystemAndLast(t *testing.T) {
|
|
1219
|
-
messages := []Message{
|
|
1220
|
-
{Role: "system", Content: "System prompt"},
|
|
1221
|
-
{Role: "user", Content: "First question"},
|
|
1222
|
-
{Role: "assistant", Content: "First answer"},
|
|
1223
|
-
{Role: "user", Content: "Second question"},
|
|
1224
|
-
{Role: "assistant", Content: "Second answer"},
|
|
1225
|
-
{Role: "user", Content: strings.Repeat("Third question with lots of context. ", 100)},
|
|
1226
|
-
{Role: "assistant", Content: "Third answer"},
|
|
1227
|
-
}
|
|
1228
|
-
|
|
1229
|
-
result := pruneMessages(messages, 50) // Very tight budget
|
|
1230
|
-
|
|
1231
|
-
// Should always keep system prompt (first) and last 2 messages
|
|
1232
|
-
if len(result) < 3 {
|
|
1233
|
-
t.Errorf("expected at least 3 messages, got %d", len(result))
|
|
1234
|
-
}
|
|
1235
|
-
if result[0].Role != "system" {
|
|
1236
|
-
t.Error("first message should be system prompt")
|
|
1237
|
-
}
|
|
1238
|
-
if result[len(result)-1].Content != "Third answer" {
|
|
1239
|
-
t.Error("last message should be the most recent")
|
|
1240
|
-
}
|
|
1241
|
-
if result[len(result)-2].Role != "user" {
|
|
1242
|
-
t.Error("second-to-last should be the most recent user message")
|
|
1243
|
-
}
|
|
1244
|
-
}
|
|
1245
|
-
|
|
1246
|
-
func TestPruneMessages_KeepsRecentMiddleMessages(t *testing.T) {
|
|
1247
|
-
messages := []Message{
|
|
1248
|
-
{Role: "system", Content: "Short."},
|
|
1249
|
-
{Role: "user", Content: "Q1"},
|
|
1250
|
-
{Role: "assistant", Content: "A1"},
|
|
1251
|
-
{Role: "user", Content: "Q2"},
|
|
1252
|
-
{Role: "assistant", Content: "A2"},
|
|
1253
|
-
{Role: "user", Content: "Q3"},
|
|
1254
|
-
{Role: "assistant", Content: "A3"},
|
|
1255
|
-
}
|
|
1256
|
-
|
|
1257
|
-
// Budget large enough for all
|
|
1258
|
-
result := pruneMessages(messages, 10000)
|
|
1259
|
-
if len(result) != 7 {
|
|
1260
|
-
t.Errorf("expected all 7 messages with large budget, got %d", len(result))
|
|
1261
|
-
}
|
|
1262
|
-
}
|
|
1263
|
-
|
|
1264
|
-
// ─── Structured Completion Pre-emptive Integration Tests ─────────────────────
|
|
1265
|
-
|
|
1266
|
-
func TestStructuredPromptOverhead_Constant(t *testing.T) {
|
|
1267
|
-
// Verify the constant is reasonable (300-500 tokens for structured prompt instructions)
|
|
1268
|
-
if structuredPromptOverhead < 200 || structuredPromptOverhead > 600 {
|
|
1269
|
-
t.Errorf("structuredPromptOverhead=%d seems out of range (expected 200-600)", structuredPromptOverhead)
|
|
1270
|
-
}
|
|
1271
|
-
}
|