recursive-llm-ts 5.0.0 → 5.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -3
- package/bin/rlm-go +0 -0
- package/go/README.md +0 -426
- package/go/integration_test.sh +0 -169
- package/go/rlm/benchmark_test.go +0 -168
- package/go/rlm/context_overflow_test.go +0 -1271
- package/go/rlm/context_savings_test.go +0 -387
- package/go/rlm/lcm_episodes_test.go +0 -384
- package/go/rlm/lcm_test.go +0 -1407
- package/go/rlm/meta_agent_test.go +0 -270
- package/go/rlm/observability_test.go +0 -252
- package/go/rlm/parser_test.go +0 -202
- package/go/rlm/repl_test.go +0 -291
- package/go/rlm/schema_test.go +0 -343
- package/go/rlm/store_backend_test.go +0 -428
- package/go/rlm/structured_test.go +0 -895
- package/go/rlm/textrank_test.go +0 -335
- package/go/rlm/tfidf_test.go +0 -272
- package/go/rlm/token_tracking_test.go +0 -859
- package/go/rlm/tokenizer_test.go +0 -305
- package/go/rlm.test +0 -0
- package/go/test_mock.sh +0 -90
- package/go/test_rlm.sh +0 -41
- package/go/test_simple.sh +0 -78
package/go/rlm/tokenizer_test.go
DELETED
|
@@ -1,305 +0,0 @@
|
|
|
1
|
-
package rlm
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"strings"
|
|
5
|
-
"testing"
|
|
6
|
-
)
|
|
7
|
-
|
|
8
|
-
// ─── Tiktoken BPE Tokenizer Tests ────────────────────────────────────────────
|
|
9
|
-
|
|
10
|
-
func TestTiktokenTokenizer_English(t *testing.T) {
|
|
11
|
-
tok, err := NewTiktokenTokenizer("gpt-4o")
|
|
12
|
-
if err != nil {
|
|
13
|
-
t.Fatalf("failed to create tokenizer: %v", err)
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
text := "Hello, world! This is a test of the tokenizer."
|
|
17
|
-
bpeCount := tok.CountTokens(text)
|
|
18
|
-
heuristic := (&HeuristicTokenizer{}).CountTokens(text)
|
|
19
|
-
|
|
20
|
-
t.Logf("English text: BPE=%d, Heuristic=%d", bpeCount, heuristic)
|
|
21
|
-
|
|
22
|
-
if bpeCount <= 0 {
|
|
23
|
-
t.Error("BPE count should be > 0")
|
|
24
|
-
}
|
|
25
|
-
// BPE and heuristic should give different counts for most text
|
|
26
|
-
// (they may coincidentally match for very short strings, so just verify BPE is reasonable)
|
|
27
|
-
if bpeCount > len(text) {
|
|
28
|
-
t.Errorf("BPE count %d should not exceed character count %d", bpeCount, len(text))
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
func TestTiktokenTokenizer_Code(t *testing.T) {
|
|
33
|
-
tok, err := NewTiktokenTokenizer("gpt-4o")
|
|
34
|
-
if err != nil {
|
|
35
|
-
t.Fatalf("failed to create tokenizer: %v", err)
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
code := `func main() {
|
|
39
|
-
fmt.Println("Hello, World!")
|
|
40
|
-
for i := 0; i < 100; i++ {
|
|
41
|
-
result = append(result, processItem(items[i]))
|
|
42
|
-
}
|
|
43
|
-
}`
|
|
44
|
-
bpeCount := tok.CountTokens(code)
|
|
45
|
-
heuristic := (&HeuristicTokenizer{}).CountTokens(code)
|
|
46
|
-
|
|
47
|
-
t.Logf("Code: BPE=%d, Heuristic=%d (chars=%d)", bpeCount, heuristic, len(code))
|
|
48
|
-
|
|
49
|
-
if bpeCount <= 0 {
|
|
50
|
-
t.Error("BPE count should be > 0 for code")
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
func TestTiktokenTokenizer_JSON(t *testing.T) {
|
|
55
|
-
tok, err := NewTiktokenTokenizer("gpt-4o")
|
|
56
|
-
if err != nil {
|
|
57
|
-
t.Fatalf("failed to create tokenizer: %v", err)
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
jsonData := `{"name": "test", "values": [1, 2, 3, 4, 5], "nested": {"key": "value", "count": 42}}`
|
|
61
|
-
bpeCount := tok.CountTokens(jsonData)
|
|
62
|
-
heuristic := (&HeuristicTokenizer{}).CountTokens(jsonData)
|
|
63
|
-
|
|
64
|
-
t.Logf("JSON: BPE=%d, Heuristic=%d (chars=%d)", bpeCount, heuristic, len(jsonData))
|
|
65
|
-
|
|
66
|
-
if bpeCount <= 0 {
|
|
67
|
-
t.Error("BPE count should be > 0 for JSON")
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
func TestTiktokenTokenizer_CJK(t *testing.T) {
|
|
72
|
-
tok, err := NewTiktokenTokenizer("gpt-4o")
|
|
73
|
-
if err != nil {
|
|
74
|
-
t.Fatalf("failed to create tokenizer: %v", err)
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
// Chinese, Japanese, and Korean text
|
|
78
|
-
cjkText := "这是一个测试。日本語のテスト。한국어 테스트입니다。"
|
|
79
|
-
bpeCount := tok.CountTokens(cjkText)
|
|
80
|
-
heuristic := (&HeuristicTokenizer{}).CountTokens(cjkText)
|
|
81
|
-
|
|
82
|
-
t.Logf("CJK text: BPE=%d, Heuristic=%d (chars=%d, bytes=%d)", bpeCount, heuristic, len([]rune(cjkText)), len(cjkText))
|
|
83
|
-
|
|
84
|
-
if bpeCount <= 0 {
|
|
85
|
-
t.Error("BPE count should be > 0 for CJK")
|
|
86
|
-
}
|
|
87
|
-
// CJK text has ~1.5 chars per token but heuristic assumes ~3.5
|
|
88
|
-
// So BPE should count MORE tokens than heuristic for CJK
|
|
89
|
-
if bpeCount <= heuristic {
|
|
90
|
-
t.Logf("WARNING: BPE (%d) should typically be > heuristic (%d) for CJK text", bpeCount, heuristic)
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
func TestTiktokenTokenizer_Empty(t *testing.T) {
|
|
95
|
-
tok, err := NewTiktokenTokenizer("gpt-4o")
|
|
96
|
-
if err != nil {
|
|
97
|
-
t.Fatalf("failed to create tokenizer: %v", err)
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
if tok.CountTokens("") != 0 {
|
|
101
|
-
t.Error("empty string should return 0 tokens")
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
func TestTiktokenTokenizer_EncodingSelection(t *testing.T) {
|
|
106
|
-
tests := []struct {
|
|
107
|
-
model string
|
|
108
|
-
expected string
|
|
109
|
-
}{
|
|
110
|
-
{"gpt-4o", "o200k_base"},
|
|
111
|
-
{"gpt-4o-mini", "o200k_base"},
|
|
112
|
-
{"gpt-4o-mini-2024-07-18", "o200k_base"},
|
|
113
|
-
{"gpt-4", "cl100k_base"},
|
|
114
|
-
{"gpt-4-turbo", "cl100k_base"},
|
|
115
|
-
{"gpt-3.5-turbo", "cl100k_base"},
|
|
116
|
-
{"claude-3-opus", "cl100k_base"},
|
|
117
|
-
{"claude-sonnet-4", "cl100k_base"},
|
|
118
|
-
{"o1", "o200k_base"},
|
|
119
|
-
{"o3-mini", "o200k_base"},
|
|
120
|
-
{"llama-3.1", "cl100k_base"},
|
|
121
|
-
{"unknown-model", "cl100k_base"},
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
for _, tt := range tests {
|
|
125
|
-
t.Run(tt.model, func(t *testing.T) {
|
|
126
|
-
enc := encodingForModel(tt.model)
|
|
127
|
-
if enc != tt.expected {
|
|
128
|
-
t.Errorf("encodingForModel(%q) = %q, want %q", tt.model, enc, tt.expected)
|
|
129
|
-
}
|
|
130
|
-
})
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
// ─── Heuristic Tokenizer Tests ───────────────────────────────────────────────
|
|
135
|
-
|
|
136
|
-
func TestHeuristicTokenizer_Fallback(t *testing.T) {
|
|
137
|
-
h := &HeuristicTokenizer{}
|
|
138
|
-
|
|
139
|
-
if h.CountTokens("") != 0 {
|
|
140
|
-
t.Error("empty string should be 0")
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
// "Hello" = 5 chars, ceil(5/3.5) = 2
|
|
144
|
-
count := h.CountTokens("Hello")
|
|
145
|
-
if count <= 0 {
|
|
146
|
-
t.Error("should count > 0 tokens for 'Hello'")
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
// Longer text
|
|
150
|
-
longText := strings.Repeat("word ", 1000)
|
|
151
|
-
longCount := h.CountTokens(longText)
|
|
152
|
-
expected := (len(longText)*10 + 34) / 35
|
|
153
|
-
if longCount != expected {
|
|
154
|
-
t.Errorf("heuristic count %d != expected %d", longCount, expected)
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// ─── Cached Tokenizer Tests ─────────────────────────────────────────────────
|
|
159
|
-
|
|
160
|
-
func TestCachedTokenizer_CacheHit(t *testing.T) {
|
|
161
|
-
callCount := 0
|
|
162
|
-
inner := &countingTokenizer{fn: func(text string) int {
|
|
163
|
-
callCount++
|
|
164
|
-
return len(text) / 4
|
|
165
|
-
}}
|
|
166
|
-
|
|
167
|
-
cached := NewCachedTokenizer(inner)
|
|
168
|
-
|
|
169
|
-
text := "This is a test string for caching"
|
|
170
|
-
|
|
171
|
-
// First call: cache miss
|
|
172
|
-
count1 := cached.CountTokens(text)
|
|
173
|
-
if callCount != 1 {
|
|
174
|
-
t.Errorf("expected 1 inner call, got %d", callCount)
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
// Second call: cache hit (inner should NOT be called again)
|
|
178
|
-
count2 := cached.CountTokens(text)
|
|
179
|
-
if callCount != 1 {
|
|
180
|
-
t.Errorf("expected still 1 inner call after cache hit, got %d", callCount)
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
if count1 != count2 {
|
|
184
|
-
t.Errorf("cache returned different values: %d vs %d", count1, count2)
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
if cached.CacheSize() != 1 {
|
|
188
|
-
t.Errorf("cache size should be 1, got %d", cached.CacheSize())
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
func TestCachedTokenizer_Empty(t *testing.T) {
|
|
193
|
-
inner := &HeuristicTokenizer{}
|
|
194
|
-
cached := NewCachedTokenizer(inner)
|
|
195
|
-
|
|
196
|
-
if cached.CountTokens("") != 0 {
|
|
197
|
-
t.Error("empty string should return 0 without caching")
|
|
198
|
-
}
|
|
199
|
-
if cached.CacheSize() != 0 {
|
|
200
|
-
t.Error("cache should not store empty strings")
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
func TestCachedTokenizer_DifferentStrings(t *testing.T) {
|
|
205
|
-
inner := &HeuristicTokenizer{}
|
|
206
|
-
cached := NewCachedTokenizer(inner)
|
|
207
|
-
|
|
208
|
-
cached.CountTokens("string one")
|
|
209
|
-
cached.CountTokens("string two")
|
|
210
|
-
cached.CountTokens("string three")
|
|
211
|
-
|
|
212
|
-
if cached.CacheSize() != 3 {
|
|
213
|
-
t.Errorf("cache size should be 3, got %d", cached.CacheSize())
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
func TestCachedTokenizer_Inner(t *testing.T) {
|
|
218
|
-
inner := &HeuristicTokenizer{}
|
|
219
|
-
cached := NewCachedTokenizer(inner)
|
|
220
|
-
|
|
221
|
-
if cached.Inner() != inner {
|
|
222
|
-
t.Error("Inner() should return the wrapped tokenizer")
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
// countingTokenizer tracks how many times CountTokens is called.
|
|
227
|
-
type countingTokenizer struct {
|
|
228
|
-
fn func(string) int
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
func (c *countingTokenizer) CountTokens(text string) int {
|
|
232
|
-
return c.fn(text)
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
// ─── Global Default Tokenizer Tests ──────────────────────────────────────────
|
|
236
|
-
|
|
237
|
-
func TestSetDefaultTokenizer_KnownModel(t *testing.T) {
|
|
238
|
-
defer ResetDefaultTokenizer()
|
|
239
|
-
|
|
240
|
-
SetDefaultTokenizer("gpt-4o")
|
|
241
|
-
tok := GetTokenizer()
|
|
242
|
-
|
|
243
|
-
// Should be a CachedTokenizer wrapping a TiktokenTokenizer
|
|
244
|
-
cached, ok := tok.(*CachedTokenizer)
|
|
245
|
-
if !ok {
|
|
246
|
-
t.Fatalf("expected CachedTokenizer, got %T", tok)
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
inner, ok := cached.Inner().(*TiktokenTokenizer)
|
|
250
|
-
if !ok {
|
|
251
|
-
t.Fatalf("expected inner TiktokenTokenizer, got %T", cached.Inner())
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
if inner.EncodingName() != "o200k_base" {
|
|
255
|
-
t.Errorf("expected o200k_base encoding, got %s", inner.EncodingName())
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
// Verify it actually counts tokens
|
|
259
|
-
count := tok.CountTokens("Hello, world!")
|
|
260
|
-
if count <= 0 {
|
|
261
|
-
t.Error("tokenizer should count > 0 tokens")
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
func TestSetDefaultTokenizer_UnknownModel(t *testing.T) {
|
|
266
|
-
defer ResetDefaultTokenizer()
|
|
267
|
-
|
|
268
|
-
// Even unknown models should work because we default to cl100k_base
|
|
269
|
-
SetDefaultTokenizer("totally-unknown-model-xyz")
|
|
270
|
-
tok := GetTokenizer()
|
|
271
|
-
|
|
272
|
-
count := tok.CountTokens("Hello, world!")
|
|
273
|
-
if count <= 0 {
|
|
274
|
-
t.Error("tokenizer should count > 0 tokens even for unknown model")
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
func TestEstimateTokens_UsesDefaultTokenizer(t *testing.T) {
|
|
279
|
-
defer ResetDefaultTokenizer()
|
|
280
|
-
|
|
281
|
-
// With heuristic default
|
|
282
|
-
heuristicCount := EstimateTokens("Hello, world! This is a test.")
|
|
283
|
-
|
|
284
|
-
// Switch to BPE
|
|
285
|
-
SetDefaultTokenizer("gpt-4o")
|
|
286
|
-
bpeCount := EstimateTokens("Hello, world! This is a test.")
|
|
287
|
-
|
|
288
|
-
t.Logf("EstimateTokens: heuristic=%d, bpe=%d", heuristicCount, bpeCount)
|
|
289
|
-
|
|
290
|
-
// Both should be > 0
|
|
291
|
-
if heuristicCount <= 0 || bpeCount <= 0 {
|
|
292
|
-
t.Errorf("both counts should be > 0: heuristic=%d, bpe=%d", heuristicCount, bpeCount)
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
func TestResetDefaultTokenizer(t *testing.T) {
|
|
297
|
-
SetDefaultTokenizer("gpt-4o")
|
|
298
|
-
ResetDefaultTokenizer()
|
|
299
|
-
|
|
300
|
-
tok := GetTokenizer()
|
|
301
|
-
_, ok := tok.(*HeuristicTokenizer)
|
|
302
|
-
if !ok {
|
|
303
|
-
t.Errorf("after reset, expected HeuristicTokenizer, got %T", tok)
|
|
304
|
-
}
|
|
305
|
-
}
|
package/go/rlm.test
DELETED
|
Binary file
|
package/go/test_mock.sh
DELETED
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# Test Go binary with mock responses (no API key needed)
|
|
3
|
-
|
|
4
|
-
set -e
|
|
5
|
-
|
|
6
|
-
echo "🧪 RLM Go Mock Tests (No API Required)"
|
|
7
|
-
echo "================================"
|
|
8
|
-
echo ""
|
|
9
|
-
|
|
10
|
-
# Test 1: Binary accepts input
|
|
11
|
-
echo "📝 Test 1: Binary accepts and parses JSON input"
|
|
12
|
-
echo "-----------------------------------"
|
|
13
|
-
RESULT=$(cat <<EOF | ./rlm 2>&1
|
|
14
|
-
{
|
|
15
|
-
"model": "test-model",
|
|
16
|
-
"query": "test",
|
|
17
|
-
"context": "test",
|
|
18
|
-
"config": {}
|
|
19
|
-
}
|
|
20
|
-
EOF
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
if echo "$RESULT" | grep -q "error"; then
|
|
24
|
-
echo "✅ Binary correctly handles missing API key"
|
|
25
|
-
echo "Error output: $(echo $RESULT | head -c 100)..."
|
|
26
|
-
else
|
|
27
|
-
echo "⚠️ Unexpected output (but binary ran)"
|
|
28
|
-
fi
|
|
29
|
-
echo ""
|
|
30
|
-
|
|
31
|
-
# Test 2: Config parsing
|
|
32
|
-
echo "📝 Test 2: Config parsing works"
|
|
33
|
-
echo "-----------------------------------"
|
|
34
|
-
RESULT=$(cat <<EOF | ./rlm 2>&1
|
|
35
|
-
{
|
|
36
|
-
"model": "gpt-4o-mini",
|
|
37
|
-
"query": "test",
|
|
38
|
-
"context": "test",
|
|
39
|
-
"config": {
|
|
40
|
-
"max_depth": 3,
|
|
41
|
-
"max_iterations": 15,
|
|
42
|
-
"temperature": 0.5
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
EOF
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
if echo "$RESULT" | grep -q "error\|Error"; then
|
|
49
|
-
echo "✅ Binary accepts configuration"
|
|
50
|
-
else
|
|
51
|
-
echo "⚠️ Unexpected response"
|
|
52
|
-
fi
|
|
53
|
-
echo ""
|
|
54
|
-
|
|
55
|
-
# Test 3: Binary executable and responds
|
|
56
|
-
echo "📝 Test 3: Binary is executable"
|
|
57
|
-
echo "-----------------------------------"
|
|
58
|
-
if [ -x "./rlm" ]; then
|
|
59
|
-
echo "✅ Binary is executable"
|
|
60
|
-
SIZE=$(ls -lh ./rlm | awk '{print $5}')
|
|
61
|
-
echo "Binary size: $SIZE"
|
|
62
|
-
else
|
|
63
|
-
echo "❌ Binary is not executable"
|
|
64
|
-
exit 1
|
|
65
|
-
fi
|
|
66
|
-
echo ""
|
|
67
|
-
|
|
68
|
-
# Test 4: Help/version (invalid input)
|
|
69
|
-
echo "📝 Test 4: Binary handles invalid input"
|
|
70
|
-
echo "-----------------------------------"
|
|
71
|
-
RESULT=$(echo "invalid json" | ./rlm 2>&1 || true)
|
|
72
|
-
if echo "$RESULT" | grep -q "Failed to parse input JSON\|error"; then
|
|
73
|
-
echo "✅ Binary properly handles invalid input"
|
|
74
|
-
else
|
|
75
|
-
echo "⚠️ Unexpected error handling"
|
|
76
|
-
fi
|
|
77
|
-
echo ""
|
|
78
|
-
|
|
79
|
-
echo "================================"
|
|
80
|
-
echo "✅ All mock tests passed!"
|
|
81
|
-
echo ""
|
|
82
|
-
echo "Summary:"
|
|
83
|
-
echo " - Binary is built and executable"
|
|
84
|
-
echo " - Accepts JSON input via stdin"
|
|
85
|
-
echo " - Parses configuration correctly"
|
|
86
|
-
echo " - Handles errors gracefully"
|
|
87
|
-
echo " - Returns JSON output on stdout"
|
|
88
|
-
echo ""
|
|
89
|
-
echo "⚠️ Cannot test with real API (quota exceeded)"
|
|
90
|
-
echo " But binary structure is validated ✅"
|
package/go/test_rlm.sh
DELETED
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# Test script for Go RLM implementation
|
|
3
|
-
|
|
4
|
-
set -e
|
|
5
|
-
|
|
6
|
-
echo "=== Testing Go RLM Implementation ==="
|
|
7
|
-
echo ""
|
|
8
|
-
|
|
9
|
-
# Test 1: Simple FINAL response
|
|
10
|
-
echo "Test 1: Simple FINAL response"
|
|
11
|
-
cat <<'EOF' | ./rlm
|
|
12
|
-
{
|
|
13
|
-
"model": "test-model",
|
|
14
|
-
"query": "What is the answer?",
|
|
15
|
-
"context": "The answer is 42.",
|
|
16
|
-
"config": {
|
|
17
|
-
"api_base": "http://mock-api.example.com",
|
|
18
|
-
"api_key": "test-key",
|
|
19
|
-
"max_depth": 5,
|
|
20
|
-
"max_iterations": 30
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
EOF
|
|
24
|
-
echo ""
|
|
25
|
-
|
|
26
|
-
# Test 2: Code extraction from markdown
|
|
27
|
-
echo "Test 2: JavaScript code execution"
|
|
28
|
-
cat <<'EOF' | ./rlm
|
|
29
|
-
{
|
|
30
|
-
"model": "test-model",
|
|
31
|
-
"query": "Count words",
|
|
32
|
-
"context": "Hello World Test Document",
|
|
33
|
-
"config": {
|
|
34
|
-
"max_depth": 3,
|
|
35
|
-
"max_iterations": 10
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
EOF
|
|
39
|
-
echo ""
|
|
40
|
-
|
|
41
|
-
echo "=== All tests completed ==="
|
package/go/test_simple.sh
DELETED
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# Simple integration tests
|
|
3
|
-
|
|
4
|
-
set -e
|
|
5
|
-
|
|
6
|
-
echo "🧪 Simple RLM Integration Tests"
|
|
7
|
-
echo "================================"
|
|
8
|
-
echo ""
|
|
9
|
-
|
|
10
|
-
if [ -z "$OPENAI_API_KEY" ]; then
|
|
11
|
-
echo "❌ Set OPENAI_API_KEY first"
|
|
12
|
-
exit 1
|
|
13
|
-
fi
|
|
14
|
-
|
|
15
|
-
# Test 1
|
|
16
|
-
echo "Test 1: Count word occurrences"
|
|
17
|
-
cat > /tmp/test1.json <<'JSON'
|
|
18
|
-
{
|
|
19
|
-
"model": "gpt-4o-mini",
|
|
20
|
-
"query": "How many times does the word 'test' appear?",
|
|
21
|
-
"context": "This is a test. Another test here. Final test.",
|
|
22
|
-
"config": {
|
|
23
|
-
"max_iterations": 10
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
JSON
|
|
27
|
-
|
|
28
|
-
echo "Running..."
|
|
29
|
-
RESULT=$(cat /tmp/test1.json | OPENAI_API_KEY="$OPENAI_API_KEY" ./rlm)
|
|
30
|
-
echo "✅ Test 1 Result:"
|
|
31
|
-
echo "$RESULT" | jq -r '.result'
|
|
32
|
-
echo "Stats:" $(echo "$RESULT" | jq '.stats')
|
|
33
|
-
echo ""
|
|
34
|
-
|
|
35
|
-
# Test 2
|
|
36
|
-
echo "Test 2: Simple counting"
|
|
37
|
-
cat > /tmp/test2.json <<'JSON'
|
|
38
|
-
{
|
|
39
|
-
"model": "gpt-4o-mini",
|
|
40
|
-
"query": "How many words are in this text?",
|
|
41
|
-
"context": "One two three four five",
|
|
42
|
-
"config": {
|
|
43
|
-
"max_iterations": 10,
|
|
44
|
-
"temperature": 0.1
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
JSON
|
|
48
|
-
|
|
49
|
-
echo "Running..."
|
|
50
|
-
RESULT=$(cat /tmp/test2.json | OPENAI_API_KEY="$OPENAI_API_KEY" ./rlm)
|
|
51
|
-
echo "✅ Test 2 Result:"
|
|
52
|
-
echo "$RESULT" | jq -r '.result'
|
|
53
|
-
echo "Stats:" $(echo "$RESULT" | jq '.stats')
|
|
54
|
-
echo ""
|
|
55
|
-
|
|
56
|
-
# Test 3
|
|
57
|
-
echo "Test 3: Extract information"
|
|
58
|
-
cat > /tmp/test3.json <<'JSON'
|
|
59
|
-
{
|
|
60
|
-
"model": "gpt-4o-mini",
|
|
61
|
-
"query": "List all the numbers mentioned",
|
|
62
|
-
"context": "I have 5 apples, 10 oranges, and 3 bananas.",
|
|
63
|
-
"config": {
|
|
64
|
-
"max_iterations": 10
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
JSON
|
|
68
|
-
|
|
69
|
-
echo "Running..."
|
|
70
|
-
RESULT=$(cat /tmp/test3.json | OPENAI_API_KEY="$OPENAI_API_KEY" ./rlm)
|
|
71
|
-
echo "✅ Test 3 Result:"
|
|
72
|
-
echo "$RESULT" | jq -r '.result'
|
|
73
|
-
echo "Stats:" $(echo "$RESULT" | jq '.stats')
|
|
74
|
-
echo ""
|
|
75
|
-
|
|
76
|
-
echo "================================"
|
|
77
|
-
echo "✅ All tests passed!"
|
|
78
|
-
rm -f /tmp/test*.json
|