qmdr 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +29 -0
- package/.env.example +85 -0
- package/.gitattributes +3 -0
- package/.github/workflows/release.yml +77 -0
- package/AI-SETUP.md +466 -0
- package/LICENSE +22 -0
- package/README.md +78 -0
- package/bun.lock +637 -0
- package/docs/README-zh.md +78 -0
- package/docs/refactor-checklist.md +54 -0
- package/docs/setup-openclaw.md +139 -0
- package/example-index.yml +33 -0
- package/finetune/BALANCED_DISTRIBUTION.md +157 -0
- package/finetune/DATA_IMPROVEMENTS.md +218 -0
- package/finetune/Justfile +43 -0
- package/finetune/Modelfile +16 -0
- package/finetune/README.md +299 -0
- package/finetune/SCORING.md +286 -0
- package/finetune/configs/accelerate_multi_gpu.yaml +17 -0
- package/finetune/configs/grpo.yaml +49 -0
- package/finetune/configs/sft.yaml +42 -0
- package/finetune/configs/sft_local.yaml +40 -0
- package/finetune/convert_gguf.py +221 -0
- package/finetune/data/best_glm_prompt.txt +17 -0
- package/finetune/data/gepa_generated.prompts.json +32 -0
- package/finetune/data/qmd_expansion_balanced_deduped.jsonl +413 -0
- package/finetune/data/qmd_expansion_diverse_addon.jsonl +386 -0
- package/finetune/data/qmd_expansion_handcrafted.jsonl +65 -0
- package/finetune/data/qmd_expansion_handcrafted_only.jsonl +336 -0
- package/finetune/data/qmd_expansion_locations.jsonl +64 -0
- package/finetune/data/qmd_expansion_people.jsonl +46 -0
- package/finetune/data/qmd_expansion_short_nontech.jsonl +200 -0
- package/finetune/data/qmd_expansion_v2.jsonl +1498 -0
- package/finetune/data/qmd_only_sampled.jsonl +399 -0
- package/finetune/dataset/analyze_data.py +369 -0
- package/finetune/dataset/clean_data.py +906 -0
- package/finetune/dataset/generate_balanced.py +823 -0
- package/finetune/dataset/generate_data.py +714 -0
- package/finetune/dataset/generate_data_offline.py +206 -0
- package/finetune/dataset/generate_diverse.py +441 -0
- package/finetune/dataset/generate_ollama.py +326 -0
- package/finetune/dataset/prepare_data.py +197 -0
- package/finetune/dataset/schema.py +73 -0
- package/finetune/dataset/score_data.py +115 -0
- package/finetune/dataset/validate_schema.py +104 -0
- package/finetune/eval.py +196 -0
- package/finetune/evals/queries.txt +56 -0
- package/finetune/gepa/__init__.py +1 -0
- package/finetune/gepa/best_prompt.txt +31 -0
- package/finetune/gepa/best_prompt_glm.txt +1 -0
- package/finetune/gepa/dspy_gepa.py +204 -0
- package/finetune/gepa/example.py +117 -0
- package/finetune/gepa/generate.py +129 -0
- package/finetune/gepa/gepa_outputs.jsonl +10 -0
- package/finetune/gepa/gepa_outputs_glm.jsonl +20 -0
- package/finetune/gepa/model.json +19 -0
- package/finetune/gepa/optimizer.py +70 -0
- package/finetune/gepa/score.py +84 -0
- package/finetune/jobs/eval.py +490 -0
- package/finetune/jobs/eval_common.py +354 -0
- package/finetune/jobs/eval_verbose.py +113 -0
- package/finetune/jobs/grpo.py +141 -0
- package/finetune/jobs/quantize.py +244 -0
- package/finetune/jobs/sft.py +121 -0
- package/finetune/pyproject.toml +23 -0
- package/finetune/reward.py +610 -0
- package/finetune/train.py +611 -0
- package/finetune/uv.lock +4070 -0
- package/flake.lock +61 -0
- package/flake.nix +83 -0
- package/migrate-schema.ts +162 -0
- package/package.json +56 -0
- package/skills/qmdr/SKILL.md +172 -0
- package/skills/qmdr/references/mcp-setup.md +88 -0
- package/src/app/commands/collection.ts +55 -0
- package/src/app/commands/context.ts +82 -0
- package/src/app/commands/document.ts +46 -0
- package/src/app/commands/maintenance.ts +60 -0
- package/src/app/commands/search.ts +45 -0
- package/src/app/ports/llm.ts +13 -0
- package/src/app/services/llm-service.ts +145 -0
- package/src/cli.test.ts +963 -0
- package/src/collections.ts +390 -0
- package/src/eval.test.ts +412 -0
- package/src/formatter.ts +427 -0
- package/src/llm.test.ts +559 -0
- package/src/llm.ts +1990 -0
- package/src/mcp.test.ts +889 -0
- package/src/mcp.ts +626 -0
- package/src/qmd.ts +3330 -0
- package/src/store/collections.ts +7 -0
- package/src/store/context.ts +10 -0
- package/src/store/db.ts +5 -0
- package/src/store/documents.ts +26 -0
- package/src/store/maintenance.ts +15 -0
- package/src/store/path.ts +13 -0
- package/src/store/search.ts +10 -0
- package/src/store-paths.test.ts +395 -0
- package/src/store.test.ts +2483 -0
- package/src/store.ts +2813 -0
- package/test/eval-harness.ts +223 -0
- package/tsconfig.json +29 -0
package/src/llm.test.ts
ADDED
|
@@ -0,0 +1,559 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* llm.test.ts - Unit tests for the LLM abstraction layer (node-llama-cpp)
|
|
3
|
+
*
|
|
4
|
+
* Run with: bun test src/llm.test.ts
|
|
5
|
+
*
|
|
6
|
+
* These tests require the actual models to be downloaded. Run the embed or
|
|
7
|
+
* rerank functions first to trigger model downloads.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { describe, test, expect, beforeAll, afterAll } from "bun:test";
|
|
11
|
+
import {
|
|
12
|
+
LlamaCpp,
|
|
13
|
+
getDefaultLlamaCpp,
|
|
14
|
+
disposeDefaultLlamaCpp,
|
|
15
|
+
withLLMSession,
|
|
16
|
+
canUnloadLLM,
|
|
17
|
+
SessionReleasedError,
|
|
18
|
+
type RerankDocument,
|
|
19
|
+
type ILLMSession,
|
|
20
|
+
} from "./llm.js";
|
|
21
|
+
|
|
22
|
+
// =============================================================================
|
|
23
|
+
// Singleton Tests (no model loading required)
|
|
24
|
+
// =============================================================================
|
|
25
|
+
|
|
26
|
+
describe("Default LlamaCpp Singleton", () => {
|
|
27
|
+
// Test singleton behavior without resetting to avoid orphan instances
|
|
28
|
+
test("getDefaultLlamaCpp returns same instance on subsequent calls", () => {
|
|
29
|
+
const llm1 = getDefaultLlamaCpp();
|
|
30
|
+
const llm2 = getDefaultLlamaCpp();
|
|
31
|
+
expect(llm1).toBe(llm2);
|
|
32
|
+
expect(llm1).toBeInstanceOf(LlamaCpp);
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
// =============================================================================
|
|
37
|
+
// Model Existence Tests
|
|
38
|
+
// =============================================================================
|
|
39
|
+
|
|
40
|
+
describe("LlamaCpp.modelExists", () => {
|
|
41
|
+
test("returns exists:true for HuggingFace model URIs", async () => {
|
|
42
|
+
const llm = getDefaultLlamaCpp();
|
|
43
|
+
const result = await llm.modelExists("hf:org/repo/model.gguf");
|
|
44
|
+
|
|
45
|
+
expect(result.exists).toBe(true);
|
|
46
|
+
expect(result.name).toBe("hf:org/repo/model.gguf");
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
test("returns exists:false for non-existent local paths", async () => {
|
|
50
|
+
const llm = getDefaultLlamaCpp();
|
|
51
|
+
const result = await llm.modelExists("/nonexistent/path/model.gguf");
|
|
52
|
+
|
|
53
|
+
expect(result.exists).toBe(false);
|
|
54
|
+
expect(result.name).toBe("/nonexistent/path/model.gguf");
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
// =============================================================================
|
|
59
|
+
// Integration Tests (require actual models)
|
|
60
|
+
// =============================================================================
|
|
61
|
+
|
|
62
|
+
describe("LlamaCpp Integration", () => {
|
|
63
|
+
// Use the singleton to avoid multiple Metal contexts
|
|
64
|
+
const llm = getDefaultLlamaCpp();
|
|
65
|
+
|
|
66
|
+
afterAll(async () => {
|
|
67
|
+
// Ensure native resources are released to avoid ggml-metal asserts on process exit.
|
|
68
|
+
await disposeDefaultLlamaCpp();
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
describe("embed", () => {
|
|
72
|
+
test("returns embedding with correct dimensions", async () => {
|
|
73
|
+
const result = await llm.embed("Hello world");
|
|
74
|
+
|
|
75
|
+
expect(result).not.toBeNull();
|
|
76
|
+
expect(result!.embedding).toBeInstanceOf(Array);
|
|
77
|
+
expect(result!.embedding.length).toBeGreaterThan(0);
|
|
78
|
+
// embeddinggemma outputs 768 dimensions
|
|
79
|
+
expect(result!.embedding.length).toBe(768);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
test("returns consistent embeddings for same input", async () => {
|
|
83
|
+
const result1 = await llm.embed("test text");
|
|
84
|
+
const result2 = await llm.embed("test text");
|
|
85
|
+
|
|
86
|
+
expect(result1).not.toBeNull();
|
|
87
|
+
expect(result2).not.toBeNull();
|
|
88
|
+
|
|
89
|
+
// Embeddings should be identical for the same input
|
|
90
|
+
for (let i = 0; i < result1!.embedding.length; i++) {
|
|
91
|
+
expect(result1!.embedding[i]).toBeCloseTo(result2!.embedding[i]!, 5);
|
|
92
|
+
}
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test("returns different embeddings for different inputs", async () => {
|
|
96
|
+
const result1 = await llm.embed("cats are great");
|
|
97
|
+
const result2 = await llm.embed("database optimization");
|
|
98
|
+
|
|
99
|
+
expect(result1).not.toBeNull();
|
|
100
|
+
expect(result2).not.toBeNull();
|
|
101
|
+
|
|
102
|
+
// Calculate cosine similarity - should be less than 1.0 (not identical)
|
|
103
|
+
let dotProduct = 0;
|
|
104
|
+
let norm1 = 0;
|
|
105
|
+
let norm2 = 0;
|
|
106
|
+
for (let i = 0; i < result1!.embedding.length; i++) {
|
|
107
|
+
const v1 = result1!.embedding[i]!;
|
|
108
|
+
const v2 = result2!.embedding[i]!;
|
|
109
|
+
dotProduct += v1 * v2;
|
|
110
|
+
norm1 += v1 ** 2;
|
|
111
|
+
norm2 += v2 ** 2;
|
|
112
|
+
}
|
|
113
|
+
const similarity = dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
|
|
114
|
+
|
|
115
|
+
expect(similarity).toBeLessThan(0.95); // Should be meaningfully different
|
|
116
|
+
});
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
describe("embedBatch", () => {
|
|
120
|
+
test("returns embeddings for multiple texts", async () => {
|
|
121
|
+
const texts = ["Hello world", "Test text", "Another document"];
|
|
122
|
+
const results = await llm.embedBatch(texts);
|
|
123
|
+
|
|
124
|
+
expect(results).toHaveLength(3);
|
|
125
|
+
for (const result of results) {
|
|
126
|
+
expect(result).not.toBeNull();
|
|
127
|
+
expect(result!.embedding.length).toBe(768);
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
test("returns same results as individual embed calls", async () => {
|
|
132
|
+
const texts = ["cats are great", "dogs are awesome"];
|
|
133
|
+
|
|
134
|
+
// Get batch embeddings
|
|
135
|
+
const batchResults = await llm.embedBatch(texts);
|
|
136
|
+
|
|
137
|
+
// Get individual embeddings
|
|
138
|
+
const individualResults = await Promise.all(texts.map(t => llm.embed(t)));
|
|
139
|
+
|
|
140
|
+
// Compare - should be identical
|
|
141
|
+
for (let i = 0; i < texts.length; i++) {
|
|
142
|
+
expect(batchResults[i]).not.toBeNull();
|
|
143
|
+
expect(individualResults[i]).not.toBeNull();
|
|
144
|
+
for (let j = 0; j < batchResults[i]!.embedding.length; j++) {
|
|
145
|
+
expect(batchResults[i]!.embedding[j]).toBeCloseTo(individualResults[i]!.embedding[j]!, 5);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
test("handles empty array", async () => {
|
|
151
|
+
const results = await llm.embedBatch([]);
|
|
152
|
+
expect(results).toHaveLength(0);
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
test("batch is faster than sequential", async () => {
|
|
156
|
+
const texts = Array(10).fill(null).map((_, i) => `Document number ${i} with content`);
|
|
157
|
+
|
|
158
|
+
// Time batch
|
|
159
|
+
const batchStart = Date.now();
|
|
160
|
+
await llm.embedBatch(texts);
|
|
161
|
+
const batchTime = Date.now() - batchStart;
|
|
162
|
+
|
|
163
|
+
// Time sequential
|
|
164
|
+
const seqStart = Date.now();
|
|
165
|
+
for (const text of texts) {
|
|
166
|
+
await llm.embed(text);
|
|
167
|
+
}
|
|
168
|
+
const seqTime = Date.now() - seqStart;
|
|
169
|
+
|
|
170
|
+
console.log(`Batch: ${batchTime}ms, Sequential: ${seqTime}ms`);
|
|
171
|
+
// Performance is machine/load dependent. We only assert batch isn't drastically worse.
|
|
172
|
+
expect(batchTime).toBeLessThanOrEqual(seqTime * 3);
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
test("handles concurrent embedBatch calls on fresh instance without race condition", async () => {
|
|
176
|
+
// This test verifies the fix for a race condition where concurrent calls to
|
|
177
|
+
// ensureEmbedContext() could create multiple contexts. Without the promise guard,
|
|
178
|
+
// each concurrent embedBatch call sees embedContext === null and creates its own
|
|
179
|
+
// context, causing resource leaks and potential "Context is disposed" errors.
|
|
180
|
+
//
|
|
181
|
+
// See: https://github.com/tobi/qmd/pull/54
|
|
182
|
+
//
|
|
183
|
+
// The fix uses a promise guard to ensure only one context creation runs at a time.
|
|
184
|
+
// We verify this by instrumenting createEmbeddingContext to count invocations.
|
|
185
|
+
|
|
186
|
+
const freshLlm = new LlamaCpp({});
|
|
187
|
+
let contextCreateCount = 0;
|
|
188
|
+
|
|
189
|
+
// Instrument the model's createEmbeddingContext to count calls
|
|
190
|
+
const originalEnsureEmbedModel = (freshLlm as any).ensureEmbedModel.bind(freshLlm);
|
|
191
|
+
let modelInstrumented = false;
|
|
192
|
+
(freshLlm as any).ensureEmbedModel = async function() {
|
|
193
|
+
const model = await originalEnsureEmbedModel();
|
|
194
|
+
if (!modelInstrumented) {
|
|
195
|
+
modelInstrumented = true;
|
|
196
|
+
const originalCreate = model.createEmbeddingContext.bind(model);
|
|
197
|
+
model.createEmbeddingContext = async function(...args: any[]) {
|
|
198
|
+
contextCreateCount++;
|
|
199
|
+
return originalCreate(...args);
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
return model;
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
const texts = Array(10).fill(null).map((_, i) => `Document ${i}`);
|
|
206
|
+
|
|
207
|
+
// Call embedBatch 5 TIMES in parallel on fresh instance.
|
|
208
|
+
// Without the promise guard fix, this would create 5 contexts (one per call).
|
|
209
|
+
// With the fix, only 1 context should be created.
|
|
210
|
+
const batches = await Promise.all([
|
|
211
|
+
freshLlm.embedBatch(texts.slice(0, 2)),
|
|
212
|
+
freshLlm.embedBatch(texts.slice(2, 4)),
|
|
213
|
+
freshLlm.embedBatch(texts.slice(4, 6)),
|
|
214
|
+
freshLlm.embedBatch(texts.slice(6, 8)),
|
|
215
|
+
freshLlm.embedBatch(texts.slice(8, 10)),
|
|
216
|
+
]);
|
|
217
|
+
|
|
218
|
+
const allResults = batches.flat();
|
|
219
|
+
expect(allResults).toHaveLength(10);
|
|
220
|
+
|
|
221
|
+
const successCount = allResults.filter(r => r !== null).length;
|
|
222
|
+
expect(successCount).toBe(10);
|
|
223
|
+
|
|
224
|
+
// THE KEY ASSERTION: Only 1 context should be created, not 5
|
|
225
|
+
// Without the fix, contextCreateCount would be 5 (one per concurrent embedBatch call)
|
|
226
|
+
console.log(`Context creation count: ${contextCreateCount} (expected: 1)`);
|
|
227
|
+
expect(contextCreateCount).toBe(1);
|
|
228
|
+
|
|
229
|
+
await freshLlm.dispose();
|
|
230
|
+
}, 60000);
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
describe("rerank", () => {
|
|
234
|
+
test("scores capital of France question correctly", async () => {
|
|
235
|
+
const query = "What is the capital of France?";
|
|
236
|
+
const documents: RerankDocument[] = [
|
|
237
|
+
{ file: "butterflies.txt", text: "Butterflies indeed fly through the garden." },
|
|
238
|
+
{ file: "france.txt", text: "The capital of France is Paris." },
|
|
239
|
+
{ file: "canada.txt", text: "The capital of Canada is Ottawa." },
|
|
240
|
+
];
|
|
241
|
+
|
|
242
|
+
const result = await llm.rerank(query, documents);
|
|
243
|
+
|
|
244
|
+
expect(result.results).toHaveLength(3);
|
|
245
|
+
|
|
246
|
+
// The France document should score highest
|
|
247
|
+
expect(result.results[0]!.file).toBe("france.txt");
|
|
248
|
+
expect(result.results[0]!.score).toBeGreaterThan(0.7);
|
|
249
|
+
|
|
250
|
+
// Canada should be somewhat relevant (also about capitals)
|
|
251
|
+
expect(result.results[1]!.file).toBe("canada.txt");
|
|
252
|
+
|
|
253
|
+
// Butterflies should score lowest
|
|
254
|
+
expect(result.results[2]!.file).toBe("butterflies.txt");
|
|
255
|
+
expect(result.results[2]!.score).toBeLessThan(0.6);
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
test("scores authentication query correctly", async () => {
|
|
259
|
+
const query = "How do I configure authentication?";
|
|
260
|
+
const documents: RerankDocument[] = [
|
|
261
|
+
{ file: "weather.md", text: "The weather today is sunny with mild temperatures." },
|
|
262
|
+
{ file: "auth.md", text: "Authentication can be configured by setting the AUTH_SECRET environment variable." },
|
|
263
|
+
{ file: "pizza.md", text: "Our restaurant serves the best pizza in town." },
|
|
264
|
+
{ file: "jwt.md", text: "JWT authentication requires a secret key and expiration time." },
|
|
265
|
+
];
|
|
266
|
+
|
|
267
|
+
const result = await llm.rerank(query, documents);
|
|
268
|
+
|
|
269
|
+
expect(result.results).toHaveLength(4);
|
|
270
|
+
|
|
271
|
+
// Auth documents should score highest
|
|
272
|
+
const topTwo = result.results.slice(0, 2).map((r) => r.file);
|
|
273
|
+
expect(topTwo).toContain("auth.md");
|
|
274
|
+
expect(topTwo).toContain("jwt.md");
|
|
275
|
+
|
|
276
|
+
// Irrelevant documents should score lowest
|
|
277
|
+
const bottomTwo = result.results.slice(2).map((r) => r.file);
|
|
278
|
+
expect(bottomTwo).toContain("weather.md");
|
|
279
|
+
expect(bottomTwo).toContain("pizza.md");
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
test("handles programming queries correctly", async () => {
|
|
283
|
+
const query = "How do I handle errors in JavaScript?";
|
|
284
|
+
const documents: RerankDocument[] = [
|
|
285
|
+
{ file: "cooking.md", text: "To make a good pasta, boil water and add salt." },
|
|
286
|
+
{ file: "errors.md", text: "Use try-catch blocks to handle JavaScript errors gracefully." },
|
|
287
|
+
{ file: "python.md", text: "Python uses try-except for exception handling." },
|
|
288
|
+
];
|
|
289
|
+
|
|
290
|
+
const result = await llm.rerank(query, documents);
|
|
291
|
+
|
|
292
|
+
// JavaScript errors doc should score highest
|
|
293
|
+
expect(result.results[0]!.file).toBe("errors.md");
|
|
294
|
+
expect(result.results[0]!.score).toBeGreaterThan(0.7);
|
|
295
|
+
|
|
296
|
+
// Python doc might be somewhat relevant (same concept, different language)
|
|
297
|
+
// Cooking should be least relevant
|
|
298
|
+
expect(result.results[2]!.file).toBe("cooking.md");
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
test("handles empty document list", async () => {
|
|
302
|
+
const result = await llm.rerank("test query", []);
|
|
303
|
+
expect(result.results).toHaveLength(0);
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
test("handles single document", async () => {
|
|
307
|
+
const result = await llm.rerank("test", [{ file: "doc.md", text: "content" }]);
|
|
308
|
+
expect(result.results).toHaveLength(1);
|
|
309
|
+
expect(result.results[0]!.file).toBe("doc.md");
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
test("preserves original file paths", async () => {
|
|
313
|
+
const documents: RerankDocument[] = [
|
|
314
|
+
{ file: "path/to/doc1.md", text: "content one" },
|
|
315
|
+
{ file: "another/path/doc2.md", text: "content two" },
|
|
316
|
+
];
|
|
317
|
+
|
|
318
|
+
const result = await llm.rerank("query", documents);
|
|
319
|
+
|
|
320
|
+
const files = result.results.map((r) => r.file).sort();
|
|
321
|
+
expect(files).toEqual(["another/path/doc2.md", "path/to/doc1.md"]);
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
test("returns scores between 0 and 1", async () => {
|
|
325
|
+
const documents: RerankDocument[] = [
|
|
326
|
+
{ file: "a.md", text: "The quick brown fox jumps over the lazy dog." },
|
|
327
|
+
{ file: "b.md", text: "Machine learning algorithms process data efficiently." },
|
|
328
|
+
{ file: "c.md", text: "React components use JSX syntax for rendering." },
|
|
329
|
+
];
|
|
330
|
+
|
|
331
|
+
const result = await llm.rerank("Tell me about animals", documents);
|
|
332
|
+
|
|
333
|
+
for (const doc of result.results) {
|
|
334
|
+
expect(doc.score).toBeGreaterThanOrEqual(0);
|
|
335
|
+
expect(doc.score).toBeLessThanOrEqual(1);
|
|
336
|
+
}
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
test("batch reranks multiple documents efficiently", async () => {
|
|
340
|
+
// Create 10 documents to verify batch processing works
|
|
341
|
+
const documents: RerankDocument[] = Array(10)
|
|
342
|
+
.fill(null)
|
|
343
|
+
.map((_, i) => ({
|
|
344
|
+
file: `doc${i}.md`,
|
|
345
|
+
text: `Document number ${i} with some content about topic ${i % 3}`,
|
|
346
|
+
}));
|
|
347
|
+
|
|
348
|
+
const start = Date.now();
|
|
349
|
+
const result = await llm.rerank("topic 1", documents);
|
|
350
|
+
const elapsed = Date.now() - start;
|
|
351
|
+
|
|
352
|
+
expect(result.results).toHaveLength(10);
|
|
353
|
+
|
|
354
|
+
// Verify all documents are returned with valid scores
|
|
355
|
+
for (const doc of result.results) {
|
|
356
|
+
expect(doc.score).toBeGreaterThanOrEqual(0);
|
|
357
|
+
expect(doc.score).toBeLessThanOrEqual(1);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Log timing for monitoring batch performance
|
|
361
|
+
console.log(`Batch rerank of 10 docs took ${elapsed}ms`);
|
|
362
|
+
});
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
describe("expandQuery", () => {
|
|
366
|
+
test("returns query expansions with correct types", async () => {
|
|
367
|
+
const result = await llm.expandQuery("test query");
|
|
368
|
+
|
|
369
|
+
// Result is Queryable[] containing lex, vec, and/or hyde entries
|
|
370
|
+
expect(result.length).toBeGreaterThanOrEqual(1);
|
|
371
|
+
|
|
372
|
+
// Each result should have a valid type
|
|
373
|
+
for (const q of result) {
|
|
374
|
+
expect(["lex", "vec", "hyde"]).toContain(q.type);
|
|
375
|
+
expect(q.text.length).toBeGreaterThan(0);
|
|
376
|
+
}
|
|
377
|
+
}, 30000); // 30s timeout for model loading
|
|
378
|
+
|
|
379
|
+
test("can exclude lexical queries", async () => {
|
|
380
|
+
const result = await llm.expandQuery("authentication setup", { includeLexical: false });
|
|
381
|
+
|
|
382
|
+
// Should not contain any 'lex' type entries
|
|
383
|
+
const lexEntries = result.filter(q => q.type === "lex");
|
|
384
|
+
expect(lexEntries).toHaveLength(0);
|
|
385
|
+
});
|
|
386
|
+
});
|
|
387
|
+
});
|
|
388
|
+
|
|
389
|
+
// =============================================================================
|
|
390
|
+
// Session Management Tests
|
|
391
|
+
// =============================================================================
|
|
392
|
+
|
|
393
|
+
describe("LLM Session Management", () => {
|
|
394
|
+
describe("withLLMSession", () => {
|
|
395
|
+
test("session provides access to LLM operations", async () => {
|
|
396
|
+
const result = await withLLMSession(async (session) => {
|
|
397
|
+
expect(session.isValid).toBe(true);
|
|
398
|
+
const embedding = await session.embed("test text");
|
|
399
|
+
expect(embedding).not.toBeNull();
|
|
400
|
+
expect(embedding!.embedding.length).toBe(768);
|
|
401
|
+
return "success";
|
|
402
|
+
});
|
|
403
|
+
expect(result).toBe("success");
|
|
404
|
+
});
|
|
405
|
+
|
|
406
|
+
test("session is invalid after release", async () => {
|
|
407
|
+
let capturedSession: ILLMSession | null = null;
|
|
408
|
+
|
|
409
|
+
await withLLMSession(async (session) => {
|
|
410
|
+
capturedSession = session;
|
|
411
|
+
expect(session.isValid).toBe(true);
|
|
412
|
+
});
|
|
413
|
+
|
|
414
|
+
// Session should be invalid after withLLMSession returns
|
|
415
|
+
expect(capturedSession).not.toBeNull();
|
|
416
|
+
expect(capturedSession!.isValid).toBe(false);
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
test("session prevents idle unload during operations", async () => {
|
|
420
|
+
await withLLMSession(async (session) => {
|
|
421
|
+
// While inside a session, canUnloadLLM should return false
|
|
422
|
+
expect(canUnloadLLM()).toBe(false);
|
|
423
|
+
|
|
424
|
+
// Perform an operation
|
|
425
|
+
await session.embed("test");
|
|
426
|
+
|
|
427
|
+
// Still should not be able to unload
|
|
428
|
+
expect(canUnloadLLM()).toBe(false);
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
// After session ends, should be able to unload
|
|
432
|
+
expect(canUnloadLLM()).toBe(true);
|
|
433
|
+
});
|
|
434
|
+
|
|
435
|
+
test("nested sessions increment ref count", async () => {
|
|
436
|
+
await withLLMSession(async (outerSession) => {
|
|
437
|
+
expect(canUnloadLLM()).toBe(false);
|
|
438
|
+
|
|
439
|
+
await withLLMSession(async (innerSession) => {
|
|
440
|
+
expect(canUnloadLLM()).toBe(false);
|
|
441
|
+
expect(innerSession.isValid).toBe(true);
|
|
442
|
+
expect(outerSession.isValid).toBe(true);
|
|
443
|
+
});
|
|
444
|
+
|
|
445
|
+
// Inner session released, but outer still active
|
|
446
|
+
expect(canUnloadLLM()).toBe(false);
|
|
447
|
+
expect(outerSession.isValid).toBe(true);
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
// All sessions released
|
|
451
|
+
expect(canUnloadLLM()).toBe(true);
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
test("session embedBatch works correctly", async () => {
|
|
455
|
+
await withLLMSession(async (session) => {
|
|
456
|
+
const texts = ["Hello world", "Test text", "Another document"];
|
|
457
|
+
const results = await session.embedBatch(texts);
|
|
458
|
+
|
|
459
|
+
expect(results).toHaveLength(3);
|
|
460
|
+
for (const result of results) {
|
|
461
|
+
expect(result).not.toBeNull();
|
|
462
|
+
expect(result!.embedding.length).toBe(768);
|
|
463
|
+
}
|
|
464
|
+
});
|
|
465
|
+
});
|
|
466
|
+
|
|
467
|
+
test("session rerank works correctly", async () => {
|
|
468
|
+
await withLLMSession(async (session) => {
|
|
469
|
+
const documents: RerankDocument[] = [
|
|
470
|
+
{ file: "a.txt", text: "The capital of France is Paris." },
|
|
471
|
+
{ file: "b.txt", text: "Dogs are great pets." },
|
|
472
|
+
];
|
|
473
|
+
|
|
474
|
+
const result = await session.rerank("What is the capital of France?", documents);
|
|
475
|
+
|
|
476
|
+
expect(result.results).toHaveLength(2);
|
|
477
|
+
expect(result.results[0]!.file).toBe("a.txt");
|
|
478
|
+
expect(result.results[0]!.score).toBeGreaterThan(result.results[1]!.score);
|
|
479
|
+
});
|
|
480
|
+
});
|
|
481
|
+
|
|
482
|
+
test("max duration aborts session after timeout", async () => {
|
|
483
|
+
let aborted = false;
|
|
484
|
+
|
|
485
|
+
try {
|
|
486
|
+
await withLLMSession(async (session) => {
|
|
487
|
+
// Wait longer than max duration
|
|
488
|
+
await new Promise(resolve => setTimeout(resolve, 150));
|
|
489
|
+
|
|
490
|
+
// This operation should throw because session was aborted
|
|
491
|
+
await session.embed("test");
|
|
492
|
+
}, { maxDuration: 50 }); // 50ms max
|
|
493
|
+
} catch (err) {
|
|
494
|
+
if (err instanceof SessionReleasedError) {
|
|
495
|
+
aborted = true;
|
|
496
|
+
} else {
|
|
497
|
+
throw err;
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
expect(aborted).toBe(true);
|
|
502
|
+
}, 5000);
|
|
503
|
+
|
|
504
|
+
test("external abort signal propagates to session", async () => {
|
|
505
|
+
const abortController = new AbortController();
|
|
506
|
+
let sessionAborted = false;
|
|
507
|
+
|
|
508
|
+
const promise = withLLMSession(async (session) => {
|
|
509
|
+
// Wait a bit then check if aborted
|
|
510
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
511
|
+
|
|
512
|
+
if (!session.isValid) {
|
|
513
|
+
sessionAborted = true;
|
|
514
|
+
throw new SessionReleasedError("Session aborted");
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
return "should not reach";
|
|
518
|
+
}, { signal: abortController.signal });
|
|
519
|
+
|
|
520
|
+
// Abort after 20ms
|
|
521
|
+
setTimeout(() => abortController.abort(), 20);
|
|
522
|
+
|
|
523
|
+
try {
|
|
524
|
+
await promise;
|
|
525
|
+
} catch (err) {
|
|
526
|
+
// Expected
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
expect(sessionAborted).toBe(true);
|
|
530
|
+
}, 5000);
|
|
531
|
+
|
|
532
|
+
test("session provides abort signal for monitoring", async () => {
|
|
533
|
+
await withLLMSession(async (session) => {
|
|
534
|
+
expect(session.signal).toBeInstanceOf(AbortSignal);
|
|
535
|
+
expect(session.signal.aborted).toBe(false);
|
|
536
|
+
});
|
|
537
|
+
});
|
|
538
|
+
|
|
539
|
+
test("returns value from callback", async () => {
|
|
540
|
+
const result = await withLLMSession(async (session) => {
|
|
541
|
+
await session.embed("test");
|
|
542
|
+
return { status: "complete", count: 42 };
|
|
543
|
+
});
|
|
544
|
+
|
|
545
|
+
expect(result).toEqual({ status: "complete", count: 42 });
|
|
546
|
+
});
|
|
547
|
+
|
|
548
|
+
test("propagates errors from callback", async () => {
|
|
549
|
+
const customError = new Error("Custom test error");
|
|
550
|
+
|
|
551
|
+
await expect(
|
|
552
|
+
withLLMSession(async () => {
|
|
553
|
+
throw customError;
|
|
554
|
+
})
|
|
555
|
+
).rejects.toThrow("Custom test error");
|
|
556
|
+
});
|
|
557
|
+
});
|
|
558
|
+
});
|
|
559
|
+
|