qmdr 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/.claude-plugin/marketplace.json +29 -0
  2. package/.env.example +85 -0
  3. package/.gitattributes +3 -0
  4. package/.github/workflows/release.yml +77 -0
  5. package/AI-SETUP.md +466 -0
  6. package/LICENSE +22 -0
  7. package/README.md +78 -0
  8. package/bun.lock +637 -0
  9. package/docs/README-zh.md +78 -0
  10. package/docs/refactor-checklist.md +54 -0
  11. package/docs/setup-openclaw.md +139 -0
  12. package/example-index.yml +33 -0
  13. package/finetune/BALANCED_DISTRIBUTION.md +157 -0
  14. package/finetune/DATA_IMPROVEMENTS.md +218 -0
  15. package/finetune/Justfile +43 -0
  16. package/finetune/Modelfile +16 -0
  17. package/finetune/README.md +299 -0
  18. package/finetune/SCORING.md +286 -0
  19. package/finetune/configs/accelerate_multi_gpu.yaml +17 -0
  20. package/finetune/configs/grpo.yaml +49 -0
  21. package/finetune/configs/sft.yaml +42 -0
  22. package/finetune/configs/sft_local.yaml +40 -0
  23. package/finetune/convert_gguf.py +221 -0
  24. package/finetune/data/best_glm_prompt.txt +17 -0
  25. package/finetune/data/gepa_generated.prompts.json +32 -0
  26. package/finetune/data/qmd_expansion_balanced_deduped.jsonl +413 -0
  27. package/finetune/data/qmd_expansion_diverse_addon.jsonl +386 -0
  28. package/finetune/data/qmd_expansion_handcrafted.jsonl +65 -0
  29. package/finetune/data/qmd_expansion_handcrafted_only.jsonl +336 -0
  30. package/finetune/data/qmd_expansion_locations.jsonl +64 -0
  31. package/finetune/data/qmd_expansion_people.jsonl +46 -0
  32. package/finetune/data/qmd_expansion_short_nontech.jsonl +200 -0
  33. package/finetune/data/qmd_expansion_v2.jsonl +1498 -0
  34. package/finetune/data/qmd_only_sampled.jsonl +399 -0
  35. package/finetune/dataset/analyze_data.py +369 -0
  36. package/finetune/dataset/clean_data.py +906 -0
  37. package/finetune/dataset/generate_balanced.py +823 -0
  38. package/finetune/dataset/generate_data.py +714 -0
  39. package/finetune/dataset/generate_data_offline.py +206 -0
  40. package/finetune/dataset/generate_diverse.py +441 -0
  41. package/finetune/dataset/generate_ollama.py +326 -0
  42. package/finetune/dataset/prepare_data.py +197 -0
  43. package/finetune/dataset/schema.py +73 -0
  44. package/finetune/dataset/score_data.py +115 -0
  45. package/finetune/dataset/validate_schema.py +104 -0
  46. package/finetune/eval.py +196 -0
  47. package/finetune/evals/queries.txt +56 -0
  48. package/finetune/gepa/__init__.py +1 -0
  49. package/finetune/gepa/best_prompt.txt +31 -0
  50. package/finetune/gepa/best_prompt_glm.txt +1 -0
  51. package/finetune/gepa/dspy_gepa.py +204 -0
  52. package/finetune/gepa/example.py +117 -0
  53. package/finetune/gepa/generate.py +129 -0
  54. package/finetune/gepa/gepa_outputs.jsonl +10 -0
  55. package/finetune/gepa/gepa_outputs_glm.jsonl +20 -0
  56. package/finetune/gepa/model.json +19 -0
  57. package/finetune/gepa/optimizer.py +70 -0
  58. package/finetune/gepa/score.py +84 -0
  59. package/finetune/jobs/eval.py +490 -0
  60. package/finetune/jobs/eval_common.py +354 -0
  61. package/finetune/jobs/eval_verbose.py +113 -0
  62. package/finetune/jobs/grpo.py +141 -0
  63. package/finetune/jobs/quantize.py +244 -0
  64. package/finetune/jobs/sft.py +121 -0
  65. package/finetune/pyproject.toml +23 -0
  66. package/finetune/reward.py +610 -0
  67. package/finetune/train.py +611 -0
  68. package/finetune/uv.lock +4070 -0
  69. package/flake.lock +61 -0
  70. package/flake.nix +83 -0
  71. package/migrate-schema.ts +162 -0
  72. package/package.json +56 -0
  73. package/skills/qmdr/SKILL.md +172 -0
  74. package/skills/qmdr/references/mcp-setup.md +88 -0
  75. package/src/app/commands/collection.ts +55 -0
  76. package/src/app/commands/context.ts +82 -0
  77. package/src/app/commands/document.ts +46 -0
  78. package/src/app/commands/maintenance.ts +60 -0
  79. package/src/app/commands/search.ts +45 -0
  80. package/src/app/ports/llm.ts +13 -0
  81. package/src/app/services/llm-service.ts +145 -0
  82. package/src/cli.test.ts +963 -0
  83. package/src/collections.ts +390 -0
  84. package/src/eval.test.ts +412 -0
  85. package/src/formatter.ts +427 -0
  86. package/src/llm.test.ts +559 -0
  87. package/src/llm.ts +1990 -0
  88. package/src/mcp.test.ts +889 -0
  89. package/src/mcp.ts +626 -0
  90. package/src/qmd.ts +3330 -0
  91. package/src/store/collections.ts +7 -0
  92. package/src/store/context.ts +10 -0
  93. package/src/store/db.ts +5 -0
  94. package/src/store/documents.ts +26 -0
  95. package/src/store/maintenance.ts +15 -0
  96. package/src/store/path.ts +13 -0
  97. package/src/store/search.ts +10 -0
  98. package/src/store-paths.test.ts +395 -0
  99. package/src/store.test.ts +2483 -0
  100. package/src/store.ts +2813 -0
  101. package/test/eval-harness.ts +223 -0
  102. package/tsconfig.json +29 -0
@@ -0,0 +1,559 @@
1
+ /**
2
+ * llm.test.ts - Unit tests for the LLM abstraction layer (node-llama-cpp)
3
+ *
4
+ * Run with: bun test src/llm.test.ts
5
+ *
6
+ * These tests require the actual models to be downloaded. Run the embed or
7
+ * rerank functions first to trigger model downloads.
8
+ */
9
+
10
+ import { describe, test, expect, beforeAll, afterAll } from "bun:test";
11
+ import {
12
+ LlamaCpp,
13
+ getDefaultLlamaCpp,
14
+ disposeDefaultLlamaCpp,
15
+ withLLMSession,
16
+ canUnloadLLM,
17
+ SessionReleasedError,
18
+ type RerankDocument,
19
+ type ILLMSession,
20
+ } from "./llm.js";
21
+
22
+ // =============================================================================
23
+ // Singleton Tests (no model loading required)
24
+ // =============================================================================
25
+
26
+ describe("Default LlamaCpp Singleton", () => {
27
+ // Test singleton behavior without resetting to avoid orphan instances
28
+ test("getDefaultLlamaCpp returns same instance on subsequent calls", () => {
29
+ const llm1 = getDefaultLlamaCpp();
30
+ const llm2 = getDefaultLlamaCpp();
31
+ expect(llm1).toBe(llm2);
32
+ expect(llm1).toBeInstanceOf(LlamaCpp);
33
+ });
34
+ });
35
+
36
+ // =============================================================================
37
+ // Model Existence Tests
38
+ // =============================================================================
39
+
40
+ describe("LlamaCpp.modelExists", () => {
41
+ test("returns exists:true for HuggingFace model URIs", async () => {
42
+ const llm = getDefaultLlamaCpp();
43
+ const result = await llm.modelExists("hf:org/repo/model.gguf");
44
+
45
+ expect(result.exists).toBe(true);
46
+ expect(result.name).toBe("hf:org/repo/model.gguf");
47
+ });
48
+
49
+ test("returns exists:false for non-existent local paths", async () => {
50
+ const llm = getDefaultLlamaCpp();
51
+ const result = await llm.modelExists("/nonexistent/path/model.gguf");
52
+
53
+ expect(result.exists).toBe(false);
54
+ expect(result.name).toBe("/nonexistent/path/model.gguf");
55
+ });
56
+ });
57
+
58
+ // =============================================================================
59
+ // Integration Tests (require actual models)
60
+ // =============================================================================
61
+
62
+ describe("LlamaCpp Integration", () => {
63
+ // Use the singleton to avoid multiple Metal contexts
64
+ const llm = getDefaultLlamaCpp();
65
+
66
+ afterAll(async () => {
67
+ // Ensure native resources are released to avoid ggml-metal asserts on process exit.
68
+ await disposeDefaultLlamaCpp();
69
+ });
70
+
71
+ describe("embed", () => {
72
+ test("returns embedding with correct dimensions", async () => {
73
+ const result = await llm.embed("Hello world");
74
+
75
+ expect(result).not.toBeNull();
76
+ expect(result!.embedding).toBeInstanceOf(Array);
77
+ expect(result!.embedding.length).toBeGreaterThan(0);
78
+ // embeddinggemma outputs 768 dimensions
79
+ expect(result!.embedding.length).toBe(768);
80
+ });
81
+
82
+ test("returns consistent embeddings for same input", async () => {
83
+ const result1 = await llm.embed("test text");
84
+ const result2 = await llm.embed("test text");
85
+
86
+ expect(result1).not.toBeNull();
87
+ expect(result2).not.toBeNull();
88
+
89
+ // Embeddings should be identical for the same input
90
+ for (let i = 0; i < result1!.embedding.length; i++) {
91
+ expect(result1!.embedding[i]).toBeCloseTo(result2!.embedding[i]!, 5);
92
+ }
93
+ });
94
+
95
+ test("returns different embeddings for different inputs", async () => {
96
+ const result1 = await llm.embed("cats are great");
97
+ const result2 = await llm.embed("database optimization");
98
+
99
+ expect(result1).not.toBeNull();
100
+ expect(result2).not.toBeNull();
101
+
102
+ // Calculate cosine similarity - should be less than 1.0 (not identical)
103
+ let dotProduct = 0;
104
+ let norm1 = 0;
105
+ let norm2 = 0;
106
+ for (let i = 0; i < result1!.embedding.length; i++) {
107
+ const v1 = result1!.embedding[i]!;
108
+ const v2 = result2!.embedding[i]!;
109
+ dotProduct += v1 * v2;
110
+ norm1 += v1 ** 2;
111
+ norm2 += v2 ** 2;
112
+ }
113
+ const similarity = dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
114
+
115
+ expect(similarity).toBeLessThan(0.95); // Should be meaningfully different
116
+ });
117
+ });
118
+
119
+ describe("embedBatch", () => {
120
+ test("returns embeddings for multiple texts", async () => {
121
+ const texts = ["Hello world", "Test text", "Another document"];
122
+ const results = await llm.embedBatch(texts);
123
+
124
+ expect(results).toHaveLength(3);
125
+ for (const result of results) {
126
+ expect(result).not.toBeNull();
127
+ expect(result!.embedding.length).toBe(768);
128
+ }
129
+ });
130
+
131
+ test("returns same results as individual embed calls", async () => {
132
+ const texts = ["cats are great", "dogs are awesome"];
133
+
134
+ // Get batch embeddings
135
+ const batchResults = await llm.embedBatch(texts);
136
+
137
+ // Get individual embeddings
138
+ const individualResults = await Promise.all(texts.map(t => llm.embed(t)));
139
+
140
+ // Compare - should be identical
141
+ for (let i = 0; i < texts.length; i++) {
142
+ expect(batchResults[i]).not.toBeNull();
143
+ expect(individualResults[i]).not.toBeNull();
144
+ for (let j = 0; j < batchResults[i]!.embedding.length; j++) {
145
+ expect(batchResults[i]!.embedding[j]).toBeCloseTo(individualResults[i]!.embedding[j]!, 5);
146
+ }
147
+ }
148
+ });
149
+
150
+ test("handles empty array", async () => {
151
+ const results = await llm.embedBatch([]);
152
+ expect(results).toHaveLength(0);
153
+ });
154
+
155
+ test("batch is faster than sequential", async () => {
156
+ const texts = Array(10).fill(null).map((_, i) => `Document number ${i} with content`);
157
+
158
+ // Time batch
159
+ const batchStart = Date.now();
160
+ await llm.embedBatch(texts);
161
+ const batchTime = Date.now() - batchStart;
162
+
163
+ // Time sequential
164
+ const seqStart = Date.now();
165
+ for (const text of texts) {
166
+ await llm.embed(text);
167
+ }
168
+ const seqTime = Date.now() - seqStart;
169
+
170
+ console.log(`Batch: ${batchTime}ms, Sequential: ${seqTime}ms`);
171
+ // Performance is machine/load dependent. We only assert batch isn't drastically worse.
172
+ expect(batchTime).toBeLessThanOrEqual(seqTime * 3);
173
+ });
174
+
175
+ test("handles concurrent embedBatch calls on fresh instance without race condition", async () => {
176
+ // This test verifies the fix for a race condition where concurrent calls to
177
+ // ensureEmbedContext() could create multiple contexts. Without the promise guard,
178
+ // each concurrent embedBatch call sees embedContext === null and creates its own
179
+ // context, causing resource leaks and potential "Context is disposed" errors.
180
+ //
181
+ // See: https://github.com/tobi/qmd/pull/54
182
+ //
183
+ // The fix uses a promise guard to ensure only one context creation runs at a time.
184
+ // We verify this by instrumenting createEmbeddingContext to count invocations.
185
+
186
+ const freshLlm = new LlamaCpp({});
187
+ let contextCreateCount = 0;
188
+
189
+ // Instrument the model's createEmbeddingContext to count calls
190
+ const originalEnsureEmbedModel = (freshLlm as any).ensureEmbedModel.bind(freshLlm);
191
+ let modelInstrumented = false;
192
+ (freshLlm as any).ensureEmbedModel = async function() {
193
+ const model = await originalEnsureEmbedModel();
194
+ if (!modelInstrumented) {
195
+ modelInstrumented = true;
196
+ const originalCreate = model.createEmbeddingContext.bind(model);
197
+ model.createEmbeddingContext = async function(...args: any[]) {
198
+ contextCreateCount++;
199
+ return originalCreate(...args);
200
+ };
201
+ }
202
+ return model;
203
+ };
204
+
205
+ const texts = Array(10).fill(null).map((_, i) => `Document ${i}`);
206
+
207
+ // Call embedBatch 5 TIMES in parallel on fresh instance.
208
+ // Without the promise guard fix, this would create 5 contexts (one per call).
209
+ // With the fix, only 1 context should be created.
210
+ const batches = await Promise.all([
211
+ freshLlm.embedBatch(texts.slice(0, 2)),
212
+ freshLlm.embedBatch(texts.slice(2, 4)),
213
+ freshLlm.embedBatch(texts.slice(4, 6)),
214
+ freshLlm.embedBatch(texts.slice(6, 8)),
215
+ freshLlm.embedBatch(texts.slice(8, 10)),
216
+ ]);
217
+
218
+ const allResults = batches.flat();
219
+ expect(allResults).toHaveLength(10);
220
+
221
+ const successCount = allResults.filter(r => r !== null).length;
222
+ expect(successCount).toBe(10);
223
+
224
+ // THE KEY ASSERTION: Only 1 context should be created, not 5
225
+ // Without the fix, contextCreateCount would be 5 (one per concurrent embedBatch call)
226
+ console.log(`Context creation count: ${contextCreateCount} (expected: 1)`);
227
+ expect(contextCreateCount).toBe(1);
228
+
229
+ await freshLlm.dispose();
230
+ }, 60000);
231
+ });
232
+
233
+ describe("rerank", () => {
234
+ test("scores capital of France question correctly", async () => {
235
+ const query = "What is the capital of France?";
236
+ const documents: RerankDocument[] = [
237
+ { file: "butterflies.txt", text: "Butterflies indeed fly through the garden." },
238
+ { file: "france.txt", text: "The capital of France is Paris." },
239
+ { file: "canada.txt", text: "The capital of Canada is Ottawa." },
240
+ ];
241
+
242
+ const result = await llm.rerank(query, documents);
243
+
244
+ expect(result.results).toHaveLength(3);
245
+
246
+ // The France document should score highest
247
+ expect(result.results[0]!.file).toBe("france.txt");
248
+ expect(result.results[0]!.score).toBeGreaterThan(0.7);
249
+
250
+ // Canada should be somewhat relevant (also about capitals)
251
+ expect(result.results[1]!.file).toBe("canada.txt");
252
+
253
+ // Butterflies should score lowest
254
+ expect(result.results[2]!.file).toBe("butterflies.txt");
255
+ expect(result.results[2]!.score).toBeLessThan(0.6);
256
+ });
257
+
258
+ test("scores authentication query correctly", async () => {
259
+ const query = "How do I configure authentication?";
260
+ const documents: RerankDocument[] = [
261
+ { file: "weather.md", text: "The weather today is sunny with mild temperatures." },
262
+ { file: "auth.md", text: "Authentication can be configured by setting the AUTH_SECRET environment variable." },
263
+ { file: "pizza.md", text: "Our restaurant serves the best pizza in town." },
264
+ { file: "jwt.md", text: "JWT authentication requires a secret key and expiration time." },
265
+ ];
266
+
267
+ const result = await llm.rerank(query, documents);
268
+
269
+ expect(result.results).toHaveLength(4);
270
+
271
+ // Auth documents should score highest
272
+ const topTwo = result.results.slice(0, 2).map((r) => r.file);
273
+ expect(topTwo).toContain("auth.md");
274
+ expect(topTwo).toContain("jwt.md");
275
+
276
+ // Irrelevant documents should score lowest
277
+ const bottomTwo = result.results.slice(2).map((r) => r.file);
278
+ expect(bottomTwo).toContain("weather.md");
279
+ expect(bottomTwo).toContain("pizza.md");
280
+ });
281
+
282
+ test("handles programming queries correctly", async () => {
283
+ const query = "How do I handle errors in JavaScript?";
284
+ const documents: RerankDocument[] = [
285
+ { file: "cooking.md", text: "To make a good pasta, boil water and add salt." },
286
+ { file: "errors.md", text: "Use try-catch blocks to handle JavaScript errors gracefully." },
287
+ { file: "python.md", text: "Python uses try-except for exception handling." },
288
+ ];
289
+
290
+ const result = await llm.rerank(query, documents);
291
+
292
+ // JavaScript errors doc should score highest
293
+ expect(result.results[0]!.file).toBe("errors.md");
294
+ expect(result.results[0]!.score).toBeGreaterThan(0.7);
295
+
296
+ // Python doc might be somewhat relevant (same concept, different language)
297
+ // Cooking should be least relevant
298
+ expect(result.results[2]!.file).toBe("cooking.md");
299
+ });
300
+
301
+ test("handles empty document list", async () => {
302
+ const result = await llm.rerank("test query", []);
303
+ expect(result.results).toHaveLength(0);
304
+ });
305
+
306
+ test("handles single document", async () => {
307
+ const result = await llm.rerank("test", [{ file: "doc.md", text: "content" }]);
308
+ expect(result.results).toHaveLength(1);
309
+ expect(result.results[0]!.file).toBe("doc.md");
310
+ });
311
+
312
+ test("preserves original file paths", async () => {
313
+ const documents: RerankDocument[] = [
314
+ { file: "path/to/doc1.md", text: "content one" },
315
+ { file: "another/path/doc2.md", text: "content two" },
316
+ ];
317
+
318
+ const result = await llm.rerank("query", documents);
319
+
320
+ const files = result.results.map((r) => r.file).sort();
321
+ expect(files).toEqual(["another/path/doc2.md", "path/to/doc1.md"]);
322
+ });
323
+
324
+ test("returns scores between 0 and 1", async () => {
325
+ const documents: RerankDocument[] = [
326
+ { file: "a.md", text: "The quick brown fox jumps over the lazy dog." },
327
+ { file: "b.md", text: "Machine learning algorithms process data efficiently." },
328
+ { file: "c.md", text: "React components use JSX syntax for rendering." },
329
+ ];
330
+
331
+ const result = await llm.rerank("Tell me about animals", documents);
332
+
333
+ for (const doc of result.results) {
334
+ expect(doc.score).toBeGreaterThanOrEqual(0);
335
+ expect(doc.score).toBeLessThanOrEqual(1);
336
+ }
337
+ });
338
+
339
+ test("batch reranks multiple documents efficiently", async () => {
340
+ // Create 10 documents to verify batch processing works
341
+ const documents: RerankDocument[] = Array(10)
342
+ .fill(null)
343
+ .map((_, i) => ({
344
+ file: `doc${i}.md`,
345
+ text: `Document number ${i} with some content about topic ${i % 3}`,
346
+ }));
347
+
348
+ const start = Date.now();
349
+ const result = await llm.rerank("topic 1", documents);
350
+ const elapsed = Date.now() - start;
351
+
352
+ expect(result.results).toHaveLength(10);
353
+
354
+ // Verify all documents are returned with valid scores
355
+ for (const doc of result.results) {
356
+ expect(doc.score).toBeGreaterThanOrEqual(0);
357
+ expect(doc.score).toBeLessThanOrEqual(1);
358
+ }
359
+
360
+ // Log timing for monitoring batch performance
361
+ console.log(`Batch rerank of 10 docs took ${elapsed}ms`);
362
+ });
363
+ });
364
+
365
+ describe("expandQuery", () => {
366
+ test("returns query expansions with correct types", async () => {
367
+ const result = await llm.expandQuery("test query");
368
+
369
+ // Result is Queryable[] containing lex, vec, and/or hyde entries
370
+ expect(result.length).toBeGreaterThanOrEqual(1);
371
+
372
+ // Each result should have a valid type
373
+ for (const q of result) {
374
+ expect(["lex", "vec", "hyde"]).toContain(q.type);
375
+ expect(q.text.length).toBeGreaterThan(0);
376
+ }
377
+ }, 30000); // 30s timeout for model loading
378
+
379
+ test("can exclude lexical queries", async () => {
380
+ const result = await llm.expandQuery("authentication setup", { includeLexical: false });
381
+
382
+ // Should not contain any 'lex' type entries
383
+ const lexEntries = result.filter(q => q.type === "lex");
384
+ expect(lexEntries).toHaveLength(0);
385
+ });
386
+ });
387
+ });
388
+
389
+ // =============================================================================
390
+ // Session Management Tests
391
+ // =============================================================================
392
+
393
+ describe("LLM Session Management", () => {
394
+ describe("withLLMSession", () => {
395
+ test("session provides access to LLM operations", async () => {
396
+ const result = await withLLMSession(async (session) => {
397
+ expect(session.isValid).toBe(true);
398
+ const embedding = await session.embed("test text");
399
+ expect(embedding).not.toBeNull();
400
+ expect(embedding!.embedding.length).toBe(768);
401
+ return "success";
402
+ });
403
+ expect(result).toBe("success");
404
+ });
405
+
406
+ test("session is invalid after release", async () => {
407
+ let capturedSession: ILLMSession | null = null;
408
+
409
+ await withLLMSession(async (session) => {
410
+ capturedSession = session;
411
+ expect(session.isValid).toBe(true);
412
+ });
413
+
414
+ // Session should be invalid after withLLMSession returns
415
+ expect(capturedSession).not.toBeNull();
416
+ expect(capturedSession!.isValid).toBe(false);
417
+ });
418
+
419
+ test("session prevents idle unload during operations", async () => {
420
+ await withLLMSession(async (session) => {
421
+ // While inside a session, canUnloadLLM should return false
422
+ expect(canUnloadLLM()).toBe(false);
423
+
424
+ // Perform an operation
425
+ await session.embed("test");
426
+
427
+ // Still should not be able to unload
428
+ expect(canUnloadLLM()).toBe(false);
429
+ });
430
+
431
+ // After session ends, should be able to unload
432
+ expect(canUnloadLLM()).toBe(true);
433
+ });
434
+
435
+ test("nested sessions increment ref count", async () => {
436
+ await withLLMSession(async (outerSession) => {
437
+ expect(canUnloadLLM()).toBe(false);
438
+
439
+ await withLLMSession(async (innerSession) => {
440
+ expect(canUnloadLLM()).toBe(false);
441
+ expect(innerSession.isValid).toBe(true);
442
+ expect(outerSession.isValid).toBe(true);
443
+ });
444
+
445
+ // Inner session released, but outer still active
446
+ expect(canUnloadLLM()).toBe(false);
447
+ expect(outerSession.isValid).toBe(true);
448
+ });
449
+
450
+ // All sessions released
451
+ expect(canUnloadLLM()).toBe(true);
452
+ });
453
+
454
+ test("session embedBatch works correctly", async () => {
455
+ await withLLMSession(async (session) => {
456
+ const texts = ["Hello world", "Test text", "Another document"];
457
+ const results = await session.embedBatch(texts);
458
+
459
+ expect(results).toHaveLength(3);
460
+ for (const result of results) {
461
+ expect(result).not.toBeNull();
462
+ expect(result!.embedding.length).toBe(768);
463
+ }
464
+ });
465
+ });
466
+
467
+ test("session rerank works correctly", async () => {
468
+ await withLLMSession(async (session) => {
469
+ const documents: RerankDocument[] = [
470
+ { file: "a.txt", text: "The capital of France is Paris." },
471
+ { file: "b.txt", text: "Dogs are great pets." },
472
+ ];
473
+
474
+ const result = await session.rerank("What is the capital of France?", documents);
475
+
476
+ expect(result.results).toHaveLength(2);
477
+ expect(result.results[0]!.file).toBe("a.txt");
478
+ expect(result.results[0]!.score).toBeGreaterThan(result.results[1]!.score);
479
+ });
480
+ });
481
+
482
+ test("max duration aborts session after timeout", async () => {
483
+ let aborted = false;
484
+
485
+ try {
486
+ await withLLMSession(async (session) => {
487
+ // Wait longer than max duration
488
+ await new Promise(resolve => setTimeout(resolve, 150));
489
+
490
+ // This operation should throw because session was aborted
491
+ await session.embed("test");
492
+ }, { maxDuration: 50 }); // 50ms max
493
+ } catch (err) {
494
+ if (err instanceof SessionReleasedError) {
495
+ aborted = true;
496
+ } else {
497
+ throw err;
498
+ }
499
+ }
500
+
501
+ expect(aborted).toBe(true);
502
+ }, 5000);
503
+
504
+ test("external abort signal propagates to session", async () => {
505
+ const abortController = new AbortController();
506
+ let sessionAborted = false;
507
+
508
+ const promise = withLLMSession(async (session) => {
509
+ // Wait a bit then check if aborted
510
+ await new Promise(resolve => setTimeout(resolve, 100));
511
+
512
+ if (!session.isValid) {
513
+ sessionAborted = true;
514
+ throw new SessionReleasedError("Session aborted");
515
+ }
516
+
517
+ return "should not reach";
518
+ }, { signal: abortController.signal });
519
+
520
+ // Abort after 20ms
521
+ setTimeout(() => abortController.abort(), 20);
522
+
523
+ try {
524
+ await promise;
525
+ } catch (err) {
526
+ // Expected
527
+ }
528
+
529
+ expect(sessionAborted).toBe(true);
530
+ }, 5000);
531
+
532
+ test("session provides abort signal for monitoring", async () => {
533
+ await withLLMSession(async (session) => {
534
+ expect(session.signal).toBeInstanceOf(AbortSignal);
535
+ expect(session.signal.aborted).toBe(false);
536
+ });
537
+ });
538
+
539
+ test("returns value from callback", async () => {
540
+ const result = await withLLMSession(async (session) => {
541
+ await session.embed("test");
542
+ return { status: "complete", count: 42 };
543
+ });
544
+
545
+ expect(result).toEqual({ status: "complete", count: 42 });
546
+ });
547
+
548
+ test("propagates errors from callback", async () => {
549
+ const customError = new Error("Custom test error");
550
+
551
+ await expect(
552
+ withLLMSession(async () => {
553
+ throw customError;
554
+ })
555
+ ).rejects.toThrow("Custom test error");
556
+ });
557
+ });
558
+ });
559
+