recursive-llm-ts 5.0.0 → 5.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -3
- package/bin/rlm-go +0 -0
- package/go/README.md +0 -426
- package/go/integration_test.sh +0 -169
- package/go/rlm/benchmark_test.go +0 -168
- package/go/rlm/context_overflow_test.go +0 -1271
- package/go/rlm/context_savings_test.go +0 -387
- package/go/rlm/lcm_episodes_test.go +0 -384
- package/go/rlm/lcm_test.go +0 -1407
- package/go/rlm/meta_agent_test.go +0 -270
- package/go/rlm/observability_test.go +0 -252
- package/go/rlm/parser_test.go +0 -202
- package/go/rlm/repl_test.go +0 -291
- package/go/rlm/schema_test.go +0 -343
- package/go/rlm/store_backend_test.go +0 -428
- package/go/rlm/structured_test.go +0 -895
- package/go/rlm/textrank_test.go +0 -335
- package/go/rlm/tfidf_test.go +0 -272
- package/go/rlm/token_tracking_test.go +0 -859
- package/go/rlm/tokenizer_test.go +0 -305
- package/go/rlm.test +0 -0
- package/go/test_mock.sh +0 -90
- package/go/test_rlm.sh +0 -41
- package/go/test_simple.sh +0 -78
package/go/rlm/textrank_test.go
DELETED
|
@@ -1,335 +0,0 @@
|
|
|
1
|
-
package rlm
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"math"
|
|
5
|
-
"strings"
|
|
6
|
-
"testing"
|
|
7
|
-
)
|
|
8
|
-
|
|
9
|
-
// ─── CosineSimilarity Tests ─────────────────────────────────────────────────
|
|
10
|
-
|
|
11
|
-
func TestCosineSimilarity_IdenticalVectors(t *testing.T) {
|
|
12
|
-
v := tfidfVector{
|
|
13
|
-
terms: map[string]float64{"hello": 1.0, "world": 2.0},
|
|
14
|
-
norm: math.Sqrt(5.0),
|
|
15
|
-
}
|
|
16
|
-
sim := cosineSimilarity(v, v)
|
|
17
|
-
|
|
18
|
-
if sim < 0.99 || sim > 1.01 {
|
|
19
|
-
t.Errorf("expected cosine similarity ~1.0 for identical vectors, got %f", sim)
|
|
20
|
-
}
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
func TestCosineSimilarity_OrthogonalVectors(t *testing.T) {
|
|
24
|
-
a := tfidfVector{
|
|
25
|
-
terms: map[string]float64{"hello": 1.0},
|
|
26
|
-
norm: 1.0,
|
|
27
|
-
}
|
|
28
|
-
b := tfidfVector{
|
|
29
|
-
terms: map[string]float64{"world": 1.0},
|
|
30
|
-
norm: 1.0,
|
|
31
|
-
}
|
|
32
|
-
sim := cosineSimilarity(a, b)
|
|
33
|
-
|
|
34
|
-
if sim != 0 {
|
|
35
|
-
t.Errorf("expected cosine similarity 0 for orthogonal vectors, got %f", sim)
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
func TestCosineSimilarity_ZeroVector(t *testing.T) {
|
|
40
|
-
a := tfidfVector{terms: map[string]float64{}, norm: 0}
|
|
41
|
-
b := tfidfVector{terms: map[string]float64{"hello": 1.0}, norm: 1.0}
|
|
42
|
-
|
|
43
|
-
sim := cosineSimilarity(a, b)
|
|
44
|
-
if sim != 0 {
|
|
45
|
-
t.Errorf("expected 0 for zero vector, got %f", sim)
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
func TestCosineSimilarity_PartialOverlap(t *testing.T) {
|
|
50
|
-
a := tfidfVector{
|
|
51
|
-
terms: map[string]float64{"hello": 1.0, "world": 1.0},
|
|
52
|
-
norm: math.Sqrt(2.0),
|
|
53
|
-
}
|
|
54
|
-
b := tfidfVector{
|
|
55
|
-
terms: map[string]float64{"hello": 1.0, "foo": 1.0},
|
|
56
|
-
norm: math.Sqrt(2.0),
|
|
57
|
-
}
|
|
58
|
-
sim := cosineSimilarity(a, b)
|
|
59
|
-
|
|
60
|
-
// dot = 1*1 = 1, norm product = sqrt(2)*sqrt(2) = 2, sim = 0.5
|
|
61
|
-
if sim < 0.49 || sim > 0.51 {
|
|
62
|
-
t.Errorf("expected cosine similarity ~0.5, got %f", sim)
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
// ─── BuildSimilarityGraph Tests ─────────────────────────────────────────────
|
|
67
|
-
|
|
68
|
-
func TestBuildSimilarityGraph_SimilarSentences(t *testing.T) {
|
|
69
|
-
sentences := []string{
|
|
70
|
-
"The machine learning model processes data efficiently.",
|
|
71
|
-
"The deep learning model processes information quickly.",
|
|
72
|
-
"The weather today is sunny and warm.",
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
config := DefaultTextRankConfig()
|
|
76
|
-
config.MinSimilarity = 0.01 // Very low threshold to ensure edges
|
|
77
|
-
graph := BuildSimilarityGraph(sentences, config)
|
|
78
|
-
|
|
79
|
-
if len(graph) != 3 {
|
|
80
|
-
t.Fatalf("expected 3x3 graph, got %d rows", len(graph))
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
// Sentences 0 and 1 share terms (model, processes, learning) - should have higher similarity
|
|
84
|
-
// Sentence 2 is about weather - should have lower similarity with 0 and 1
|
|
85
|
-
sim01 := graph[0][1]
|
|
86
|
-
sim02 := graph[0][2]
|
|
87
|
-
|
|
88
|
-
if sim01 <= 0 {
|
|
89
|
-
t.Error("expected positive similarity between similar sentences 0 and 1")
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// 0-1 should be more similar than 0-2
|
|
93
|
-
if sim01 <= sim02 {
|
|
94
|
-
t.Errorf("expected sentences 0,1 to be more similar than 0,2: sim01=%f, sim02=%f", sim01, sim02)
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
func TestBuildSimilarityGraph_Symmetric(t *testing.T) {
|
|
99
|
-
sentences := []string{"First sentence here.", "Second sentence there.", "Third different topic."}
|
|
100
|
-
config := DefaultTextRankConfig()
|
|
101
|
-
config.MinSimilarity = 0.0
|
|
102
|
-
graph := BuildSimilarityGraph(sentences, config)
|
|
103
|
-
|
|
104
|
-
for i := 0; i < len(graph); i++ {
|
|
105
|
-
for j := 0; j < len(graph); j++ {
|
|
106
|
-
if graph[i][j] != graph[j][i] {
|
|
107
|
-
t.Errorf("graph not symmetric: [%d][%d]=%f != [%d][%d]=%f",
|
|
108
|
-
i, j, graph[i][j], j, i, graph[j][i])
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
func TestBuildSimilarityGraph_DiagonalZero(t *testing.T) {
|
|
115
|
-
sentences := []string{"First.", "Second.", "Third."}
|
|
116
|
-
config := DefaultTextRankConfig()
|
|
117
|
-
graph := BuildSimilarityGraph(sentences, config)
|
|
118
|
-
|
|
119
|
-
for i := 0; i < len(graph); i++ {
|
|
120
|
-
if graph[i][i] != 0 {
|
|
121
|
-
t.Errorf("expected zero self-similarity, got %f for sentence %d", graph[i][i], i)
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
// ─── PageRank Tests ─────────────────────────────────────────────────────────
|
|
127
|
-
|
|
128
|
-
func TestPageRank_UniformGraph(t *testing.T) {
|
|
129
|
-
// Fully connected graph with equal weights -> all scores should be equal
|
|
130
|
-
n := 4
|
|
131
|
-
graph := make([][]float64, n)
|
|
132
|
-
for i := range graph {
|
|
133
|
-
graph[i] = make([]float64, n)
|
|
134
|
-
for j := range graph[i] {
|
|
135
|
-
if i != j {
|
|
136
|
-
graph[i][j] = 1.0
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
config := DefaultTextRankConfig()
|
|
142
|
-
scores := PageRank(graph, config)
|
|
143
|
-
|
|
144
|
-
if len(scores) != n {
|
|
145
|
-
t.Fatalf("expected %d scores, got %d", n, len(scores))
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
// All scores should be approximately equal
|
|
149
|
-
for i := 1; i < n; i++ {
|
|
150
|
-
if math.Abs(scores[i]-scores[0]) > 0.01 {
|
|
151
|
-
t.Errorf("expected uniform scores, got %v", scores)
|
|
152
|
-
break
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
func TestPageRank_StarGraph(t *testing.T) {
|
|
158
|
-
// Star graph: node 0 connected to all others, others only connected to 0
|
|
159
|
-
// Node 0 should have the highest score
|
|
160
|
-
n := 5
|
|
161
|
-
graph := make([][]float64, n)
|
|
162
|
-
for i := range graph {
|
|
163
|
-
graph[i] = make([]float64, n)
|
|
164
|
-
}
|
|
165
|
-
for i := 1; i < n; i++ {
|
|
166
|
-
graph[0][i] = 1.0
|
|
167
|
-
graph[i][0] = 1.0
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
config := DefaultTextRankConfig()
|
|
171
|
-
scores := PageRank(graph, config)
|
|
172
|
-
|
|
173
|
-
// Node 0 (hub) should have the highest score
|
|
174
|
-
for i := 1; i < n; i++ {
|
|
175
|
-
if scores[0] <= scores[i] {
|
|
176
|
-
t.Errorf("expected hub node (0) to have highest score: scores[0]=%f, scores[%d]=%f",
|
|
177
|
-
scores[0], i, scores[i])
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
func TestPageRank_EmptyGraph(t *testing.T) {
|
|
183
|
-
scores := PageRank(nil, DefaultTextRankConfig())
|
|
184
|
-
if scores != nil {
|
|
185
|
-
t.Errorf("expected nil for empty graph, got %v", scores)
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
func TestPageRank_DisconnectedGraph(t *testing.T) {
|
|
190
|
-
// Graph with no edges -> all scores should be equal (from the (1-d)/n term)
|
|
191
|
-
n := 3
|
|
192
|
-
graph := make([][]float64, n)
|
|
193
|
-
for i := range graph {
|
|
194
|
-
graph[i] = make([]float64, n)
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
config := DefaultTextRankConfig()
|
|
198
|
-
scores := PageRank(graph, config)
|
|
199
|
-
|
|
200
|
-
for i := 1; i < n; i++ {
|
|
201
|
-
if math.Abs(scores[i]-scores[0]) > 0.001 {
|
|
202
|
-
t.Errorf("expected equal scores for disconnected graph, got %v", scores)
|
|
203
|
-
break
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
func TestPageRank_Convergence(t *testing.T) {
|
|
209
|
-
// PageRank should converge (scores sum to approximately 1)
|
|
210
|
-
n := 4
|
|
211
|
-
graph := make([][]float64, n)
|
|
212
|
-
for i := range graph {
|
|
213
|
-
graph[i] = make([]float64, n)
|
|
214
|
-
for j := range graph[i] {
|
|
215
|
-
if i != j {
|
|
216
|
-
graph[i][j] = float64((i + j) % 3) // asymmetric weights
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
config := DefaultTextRankConfig()
|
|
222
|
-
scores := PageRank(graph, config)
|
|
223
|
-
|
|
224
|
-
sum := 0.0
|
|
225
|
-
for _, s := range scores {
|
|
226
|
-
sum += s
|
|
227
|
-
if s < 0 {
|
|
228
|
-
t.Errorf("negative PageRank score: %f", s)
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
// Sum should be approximately 1.0
|
|
233
|
-
if sum < 0.9 || sum > 1.1 {
|
|
234
|
-
t.Errorf("expected PageRank scores to sum to ~1.0, got %f", sum)
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
// ─── CompressContextTextRank Tests ──────────────────────────────────────────
|
|
239
|
-
|
|
240
|
-
func TestCompressContextTextRank_NoCompressionNeeded(t *testing.T) {
|
|
241
|
-
text := "Short text that fits."
|
|
242
|
-
result := CompressContextTextRank(text, 1000)
|
|
243
|
-
|
|
244
|
-
if result != text {
|
|
245
|
-
t.Errorf("expected unchanged text, got %q", result)
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
func TestCompressContextTextRank_CompressesLargeText(t *testing.T) {
|
|
250
|
-
var parts []string
|
|
251
|
-
for i := 0; i < 30; i++ {
|
|
252
|
-
parts = append(parts, "Machine learning algorithms process large datasets to find patterns in the data.")
|
|
253
|
-
}
|
|
254
|
-
parts = append(parts, "The stock price of AAPL rose 15% to $198.50 after the earnings report.")
|
|
255
|
-
parts = append(parts, "Quantum computing achieved 99.9% gate fidelity using topological qubits.")
|
|
256
|
-
text := strings.Join(parts, " ")
|
|
257
|
-
|
|
258
|
-
originalTokens := EstimateTokens(text)
|
|
259
|
-
targetTokens := originalTokens / 3
|
|
260
|
-
|
|
261
|
-
result := CompressContextTextRank(text, targetTokens)
|
|
262
|
-
|
|
263
|
-
resultTokens := EstimateTokens(result)
|
|
264
|
-
if resultTokens > targetTokens+10 {
|
|
265
|
-
t.Errorf("compressed result (%d tokens) exceeds target (%d tokens)", resultTokens, targetTokens)
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
func TestCompressContextTextRank_PreservesOrder(t *testing.T) {
|
|
270
|
-
text := "Alpha is the first Greek letter. Beta comes after alpha. Gamma is the third letter. Delta is fourth. Epsilon follows delta."
|
|
271
|
-
result := CompressContextTextRank(text, 30)
|
|
272
|
-
|
|
273
|
-
sentences := SplitSentences(result)
|
|
274
|
-
for i := 1; i < len(sentences); i++ {
|
|
275
|
-
posI := strings.Index(text, sentences[i])
|
|
276
|
-
posPrev := strings.Index(text, sentences[i-1])
|
|
277
|
-
if posI < posPrev {
|
|
278
|
-
t.Errorf("order not preserved: %q before %q", sentences[i-1], sentences[i])
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
func TestCompressContextTextRank_Empty(t *testing.T) {
|
|
284
|
-
result := CompressContextTextRank("", 100)
|
|
285
|
-
if result != "" {
|
|
286
|
-
t.Errorf("expected empty result, got %q", result)
|
|
287
|
-
}
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
func TestCompressContextTextRank_WithConfig(t *testing.T) {
|
|
291
|
-
text := "Machine learning models. Deep learning networks. Neural network architectures. Data processing pipelines. Cloud computing infrastructure."
|
|
292
|
-
|
|
293
|
-
config := TextRankConfig{
|
|
294
|
-
DampingFactor: 0.85,
|
|
295
|
-
MaxIterations: 50,
|
|
296
|
-
ConvergenceThreshold: 0.001,
|
|
297
|
-
MinSimilarity: 0.0, // Allow all edges
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
result := CompressContextTextRankWithConfig(text, 15, config)
|
|
301
|
-
|
|
302
|
-
if len(result) == 0 {
|
|
303
|
-
t.Error("expected non-empty result")
|
|
304
|
-
}
|
|
305
|
-
if EstimateTokens(result) > 20 { // Some slack
|
|
306
|
-
t.Errorf("result exceeds budget: %d tokens", EstimateTokens(result))
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
// ─── TextRank vs TF-IDF Comparison ──────────────────────────────────────────
|
|
311
|
-
|
|
312
|
-
func TestTextRank_DifferentFromTFIDF(t *testing.T) {
|
|
313
|
-
// TextRank and TF-IDF should produce different rankings because TextRank
|
|
314
|
-
// considers inter-sentence relationships while TF-IDF only considers term rarity
|
|
315
|
-
text := "Machine learning processes data. " +
|
|
316
|
-
"Deep learning is a subset of machine learning. " +
|
|
317
|
-
"Neural networks power deep learning. " +
|
|
318
|
-
"The weather is sunny today. " +
|
|
319
|
-
"Rain is expected tomorrow."
|
|
320
|
-
|
|
321
|
-
tfidfResult := CompressContextTFIDF(text, 20)
|
|
322
|
-
textrankResult := CompressContextTextRank(text, 20)
|
|
323
|
-
|
|
324
|
-
// They should both produce non-empty results
|
|
325
|
-
if len(tfidfResult) == 0 {
|
|
326
|
-
t.Error("TF-IDF result is empty")
|
|
327
|
-
}
|
|
328
|
-
if len(textrankResult) == 0 {
|
|
329
|
-
t.Error("TextRank result is empty")
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
// They CAN be the same for simple inputs, but both should work
|
|
333
|
-
t.Logf("TF-IDF: %q", tfidfResult)
|
|
334
|
-
t.Logf("TextRank: %q", textrankResult)
|
|
335
|
-
}
|
package/go/rlm/tfidf_test.go
DELETED
|
@@ -1,272 +0,0 @@
|
|
|
1
|
-
package rlm
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"strings"
|
|
5
|
-
"testing"
|
|
6
|
-
)
|
|
7
|
-
|
|
8
|
-
// ─── SplitSentences Tests ───────────────────────────────────────────────────
|
|
9
|
-
|
|
10
|
-
func TestSplitSentences_Basic(t *testing.T) {
|
|
11
|
-
text := "First sentence. Second sentence. Third sentence."
|
|
12
|
-
sentences := SplitSentences(text)
|
|
13
|
-
|
|
14
|
-
if len(sentences) != 3 {
|
|
15
|
-
t.Fatalf("expected 3 sentences, got %d: %v", len(sentences), sentences)
|
|
16
|
-
}
|
|
17
|
-
if sentences[0] != "First sentence." {
|
|
18
|
-
t.Errorf("sentence 0: %q", sentences[0])
|
|
19
|
-
}
|
|
20
|
-
if sentences[1] != "Second sentence." {
|
|
21
|
-
t.Errorf("sentence 1: %q", sentences[1])
|
|
22
|
-
}
|
|
23
|
-
if sentences[2] != "Third sentence." {
|
|
24
|
-
t.Errorf("sentence 2: %q", sentences[2])
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
func TestSplitSentences_ParagraphBreaks(t *testing.T) {
|
|
29
|
-
text := "First paragraph.\n\nSecond paragraph.\n\nThird paragraph."
|
|
30
|
-
sentences := SplitSentences(text)
|
|
31
|
-
|
|
32
|
-
if len(sentences) < 3 {
|
|
33
|
-
t.Fatalf("expected at least 3 sentences, got %d: %v", len(sentences), sentences)
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
func TestSplitSentences_MixedPunctuation(t *testing.T) {
|
|
38
|
-
text := "Is this a question? Yes it is! And here is a statement."
|
|
39
|
-
sentences := SplitSentences(text)
|
|
40
|
-
|
|
41
|
-
if len(sentences) != 3 {
|
|
42
|
-
t.Fatalf("expected 3 sentences, got %d: %v", len(sentences), sentences)
|
|
43
|
-
}
|
|
44
|
-
if sentences[0] != "Is this a question?" {
|
|
45
|
-
t.Errorf("sentence 0: %q", sentences[0])
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
func TestSplitSentences_Empty(t *testing.T) {
|
|
50
|
-
sentences := SplitSentences("")
|
|
51
|
-
if len(sentences) != 0 {
|
|
52
|
-
t.Errorf("expected 0 sentences for empty string, got %d", len(sentences))
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
sentences = SplitSentences(" ")
|
|
56
|
-
if len(sentences) != 0 {
|
|
57
|
-
t.Errorf("expected 0 sentences for whitespace, got %d", len(sentences))
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
func TestSplitSentences_NoTerminator(t *testing.T) {
|
|
62
|
-
text := "A sentence without punctuation"
|
|
63
|
-
sentences := SplitSentences(text)
|
|
64
|
-
|
|
65
|
-
if len(sentences) != 1 {
|
|
66
|
-
t.Fatalf("expected 1 sentence, got %d: %v", len(sentences), sentences)
|
|
67
|
-
}
|
|
68
|
-
if sentences[0] != "A sentence without punctuation" {
|
|
69
|
-
t.Errorf("sentence: %q", sentences[0])
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
// ─── TokenizeWords Tests ────────────────────────────────────────────────────
|
|
74
|
-
|
|
75
|
-
func TestTokenizeWords_Basic(t *testing.T) {
|
|
76
|
-
words := TokenizeWords("Hello, World! This is a test.")
|
|
77
|
-
expected := []string{"hello", "world", "this", "is", "a", "test"}
|
|
78
|
-
|
|
79
|
-
if len(words) != len(expected) {
|
|
80
|
-
t.Fatalf("expected %d words, got %d: %v", len(expected), len(words), words)
|
|
81
|
-
}
|
|
82
|
-
for i, w := range words {
|
|
83
|
-
if w != expected[i] {
|
|
84
|
-
t.Errorf("word %d: got %q, expected %q", i, w, expected[i])
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
func TestTokenizeWords_Numbers(t *testing.T) {
|
|
90
|
-
words := TokenizeWords("There are 42 cats and 7 dogs.")
|
|
91
|
-
// Should include numbers
|
|
92
|
-
found42 := false
|
|
93
|
-
for _, w := range words {
|
|
94
|
-
if w == "42" {
|
|
95
|
-
found42 = true
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
if !found42 {
|
|
99
|
-
t.Error("expected tokenized words to include '42'")
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
func TestTokenizeWords_Empty(t *testing.T) {
|
|
104
|
-
words := TokenizeWords("")
|
|
105
|
-
if len(words) != 0 {
|
|
106
|
-
t.Errorf("expected 0 words for empty string, got %d", len(words))
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
// ─── FilterStopWords Tests ──────────────────────────────────────────────────
|
|
111
|
-
|
|
112
|
-
func TestFilterStopWords(t *testing.T) {
|
|
113
|
-
words := []string{"the", "quick", "brown", "fox", "is", "a", "animal"}
|
|
114
|
-
filtered := FilterStopWords(words)
|
|
115
|
-
|
|
116
|
-
for _, w := range filtered {
|
|
117
|
-
if stopWords[w] {
|
|
118
|
-
t.Errorf("stop word %q was not filtered", w)
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
// "quick", "brown", "fox", "animal" should survive
|
|
123
|
-
if len(filtered) < 3 {
|
|
124
|
-
t.Errorf("expected at least 3 content words, got %d: %v", len(filtered), filtered)
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
// ─── ComputeTFIDF Tests ─────────────────────────────────────────────────────
|
|
129
|
-
|
|
130
|
-
func TestComputeTFIDF_Basic(t *testing.T) {
|
|
131
|
-
sentences := []string{
|
|
132
|
-
"The machine learning algorithm processes data efficiently.",
|
|
133
|
-
"Natural language processing uses deep learning models.",
|
|
134
|
-
"The weather today is sunny and warm.",
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
scored := ComputeTFIDF(sentences)
|
|
138
|
-
|
|
139
|
-
if len(scored) != 3 {
|
|
140
|
-
t.Fatalf("expected 3 scored sentences, got %d", len(scored))
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
// All scores should be non-negative
|
|
144
|
-
for i, s := range scored {
|
|
145
|
-
if s.Score < 0 {
|
|
146
|
-
t.Errorf("sentence %d has negative score: %f", i, s.Score)
|
|
147
|
-
}
|
|
148
|
-
if s.Index != i {
|
|
149
|
-
t.Errorf("sentence %d has wrong index: %d", i, s.Index)
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
func TestComputeTFIDF_UniqueTermsScoreHigher(t *testing.T) {
|
|
155
|
-
// A sentence with unique terms (not appearing in other sentences) should score higher
|
|
156
|
-
sentences := []string{
|
|
157
|
-
"Common words appear everywhere in text.",
|
|
158
|
-
"Common words appear everywhere in documents.",
|
|
159
|
-
"Quantum entanglement revolutionizes cryptographic security protocols.",
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
scored := ComputeTFIDF(sentences)
|
|
163
|
-
|
|
164
|
-
// The third sentence has unique terms not shared with others, so it should have a high score
|
|
165
|
-
// (though IDF will boost unique terms)
|
|
166
|
-
if scored[2].Score <= 0 {
|
|
167
|
-
t.Error("sentence with unique terms should have positive score")
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
func TestComputeTFIDF_Empty(t *testing.T) {
|
|
172
|
-
scored := ComputeTFIDF(nil)
|
|
173
|
-
if scored != nil {
|
|
174
|
-
t.Errorf("expected nil for empty input, got %v", scored)
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
func TestComputeTFIDF_PreservesIndex(t *testing.T) {
|
|
179
|
-
sentences := []string{"First.", "Second.", "Third."}
|
|
180
|
-
scored := ComputeTFIDF(sentences)
|
|
181
|
-
|
|
182
|
-
for i, s := range scored {
|
|
183
|
-
if s.Index != i {
|
|
184
|
-
t.Errorf("expected index %d, got %d", i, s.Index)
|
|
185
|
-
}
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
// ─── CompressContextTFIDF Tests ─────────────────────────────────────────────
|
|
190
|
-
|
|
191
|
-
func TestCompressContextTFIDF_NoCompressionNeeded(t *testing.T) {
|
|
192
|
-
text := "Short text that fits easily."
|
|
193
|
-
result := CompressContextTFIDF(text, 1000)
|
|
194
|
-
|
|
195
|
-
if result != text {
|
|
196
|
-
t.Errorf("expected unchanged text when no compression needed, got %q", result)
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
func TestCompressContextTFIDF_CompressesLargeText(t *testing.T) {
|
|
201
|
-
// Build a large document with many sentences
|
|
202
|
-
var sentences []string
|
|
203
|
-
for i := 0; i < 50; i++ {
|
|
204
|
-
sentences = append(sentences, "This is a test sentence with various content and information.")
|
|
205
|
-
}
|
|
206
|
-
// Add some unique high-value sentences
|
|
207
|
-
sentences = append(sentences, "The quantum computing breakthrough enables 1000x faster processing.")
|
|
208
|
-
sentences = append(sentences, "Revenue grew 47% year-over-year to reach $2.3 billion in Q4.")
|
|
209
|
-
text := strings.Join(sentences, " ")
|
|
210
|
-
|
|
211
|
-
// Request much smaller budget than the full text
|
|
212
|
-
originalTokens := EstimateTokens(text)
|
|
213
|
-
targetTokens := originalTokens / 3
|
|
214
|
-
|
|
215
|
-
result := CompressContextTFIDF(text, targetTokens)
|
|
216
|
-
|
|
217
|
-
resultTokens := EstimateTokens(result)
|
|
218
|
-
if resultTokens > targetTokens+10 { // Allow small slack
|
|
219
|
-
t.Errorf("compressed result (%d tokens) exceeds target (%d tokens)", resultTokens, targetTokens)
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
if len(result) >= len(text) {
|
|
223
|
-
t.Errorf("expected compressed result to be shorter: %d >= %d chars", len(result), len(text))
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
func TestCompressContextTFIDF_PreservesOrder(t *testing.T) {
|
|
228
|
-
text := "Alpha sentence first. Beta sentence second. Gamma sentence third. Delta sentence fourth. Epsilon sentence fifth."
|
|
229
|
-
// Very small budget to force selection of only a few sentences
|
|
230
|
-
result := CompressContextTFIDF(text, 20)
|
|
231
|
-
|
|
232
|
-
// The selected sentences should appear in their original order
|
|
233
|
-
sentences := SplitSentences(result)
|
|
234
|
-
if len(sentences) == 0 {
|
|
235
|
-
t.Fatal("expected at least one sentence in result")
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
// Verify order: if we find multiple sentences, their order should be preserved
|
|
239
|
-
for i := 1; i < len(sentences); i++ {
|
|
240
|
-
posI := strings.Index(text, sentences[i])
|
|
241
|
-
posPrev := strings.Index(text, sentences[i-1])
|
|
242
|
-
if posI < posPrev {
|
|
243
|
-
t.Errorf("sentence order not preserved: %q appears before %q in original but after in result",
|
|
244
|
-
sentences[i-1], sentences[i])
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
func TestCompressContextTFIDF_HighValueSentencesSelected(t *testing.T) {
|
|
250
|
-
// Mix of generic and specific/data-rich sentences
|
|
251
|
-
text := "The weather is nice today. " +
|
|
252
|
-
"It is a good day to go outside. " +
|
|
253
|
-
"The GDP of Japan reached $4.2 trillion in 2024 with 2.3% growth. " +
|
|
254
|
-
"Trees are green and the sky is blue. " +
|
|
255
|
-
"CRISPR-Cas9 gene editing achieved 99.7% accuracy in clinical trials at Johns Hopkins."
|
|
256
|
-
|
|
257
|
-
// Budget enough for ~2 sentences
|
|
258
|
-
result := CompressContextTFIDF(text, 40)
|
|
259
|
-
|
|
260
|
-
// The data-rich sentences should be selected over generic ones
|
|
261
|
-
hasSpecific := strings.Contains(result, "trillion") || strings.Contains(result, "CRISPR") || strings.Contains(result, "accuracy")
|
|
262
|
-
if !hasSpecific {
|
|
263
|
-
t.Errorf("expected high-value sentences to be selected, got: %q", result)
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
func TestCompressContextTFIDF_EmptyText(t *testing.T) {
|
|
268
|
-
result := CompressContextTFIDF("", 100)
|
|
269
|
-
if result != "" {
|
|
270
|
-
t.Errorf("expected empty result for empty input, got %q", result)
|
|
271
|
-
}
|
|
272
|
-
}
|