@kernl-sdk/turbopuffer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/.turbo/turbo-build.log +4 -0
  2. package/.turbo/turbo-check-types.log +60 -0
  3. package/CHANGELOG.md +33 -0
  4. package/LICENSE +201 -0
  5. package/README.md +60 -0
  6. package/dist/__tests__/convert.test.d.ts +2 -0
  7. package/dist/__tests__/convert.test.d.ts.map +1 -0
  8. package/dist/__tests__/convert.test.js +346 -0
  9. package/dist/__tests__/filter.test.d.ts +8 -0
  10. package/dist/__tests__/filter.test.d.ts.map +1 -0
  11. package/dist/__tests__/filter.test.js +649 -0
  12. package/dist/__tests__/filters.integration.test.d.ts +8 -0
  13. package/dist/__tests__/filters.integration.test.d.ts.map +1 -0
  14. package/dist/__tests__/filters.integration.test.js +502 -0
  15. package/dist/__tests__/integration/filters.integration.test.d.ts +8 -0
  16. package/dist/__tests__/integration/filters.integration.test.d.ts.map +1 -0
  17. package/dist/__tests__/integration/filters.integration.test.js +475 -0
  18. package/dist/__tests__/integration/integration.test.d.ts +2 -0
  19. package/dist/__tests__/integration/integration.test.d.ts.map +1 -0
  20. package/dist/__tests__/integration/integration.test.js +329 -0
  21. package/dist/__tests__/integration/lifecycle.integration.test.d.ts +8 -0
  22. package/dist/__tests__/integration/lifecycle.integration.test.d.ts.map +1 -0
  23. package/dist/__tests__/integration/lifecycle.integration.test.js +370 -0
  24. package/dist/__tests__/integration/memory.integration.test.d.ts +2 -0
  25. package/dist/__tests__/integration/memory.integration.test.d.ts.map +1 -0
  26. package/dist/__tests__/integration/memory.integration.test.js +287 -0
  27. package/dist/__tests__/integration/query.integration.test.d.ts +8 -0
  28. package/dist/__tests__/integration/query.integration.test.d.ts.map +1 -0
  29. package/dist/__tests__/integration/query.integration.test.js +385 -0
  30. package/dist/__tests__/integration.test.d.ts +2 -0
  31. package/dist/__tests__/integration.test.d.ts.map +1 -0
  32. package/dist/__tests__/integration.test.js +343 -0
  33. package/dist/__tests__/lifecycle.integration.test.d.ts +8 -0
  34. package/dist/__tests__/lifecycle.integration.test.d.ts.map +1 -0
  35. package/dist/__tests__/lifecycle.integration.test.js +385 -0
  36. package/dist/__tests__/query.integration.test.d.ts +8 -0
  37. package/dist/__tests__/query.integration.test.d.ts.map +1 -0
  38. package/dist/__tests__/query.integration.test.js +423 -0
  39. package/dist/__tests__/query.test.d.ts +8 -0
  40. package/dist/__tests__/query.test.d.ts.map +1 -0
  41. package/dist/__tests__/query.test.js +472 -0
  42. package/dist/convert/document.d.ts +20 -0
  43. package/dist/convert/document.d.ts.map +1 -0
  44. package/dist/convert/document.js +72 -0
  45. package/dist/convert/filter.d.ts +15 -0
  46. package/dist/convert/filter.d.ts.map +1 -0
  47. package/dist/convert/filter.js +109 -0
  48. package/dist/convert/index.d.ts +8 -0
  49. package/dist/convert/index.d.ts.map +1 -0
  50. package/dist/convert/index.js +7 -0
  51. package/dist/convert/query.d.ts +22 -0
  52. package/dist/convert/query.d.ts.map +1 -0
  53. package/dist/convert/query.js +111 -0
  54. package/dist/convert/schema.d.ts +39 -0
  55. package/dist/convert/schema.d.ts.map +1 -0
  56. package/dist/convert/schema.js +124 -0
  57. package/dist/convert.d.ts +68 -0
  58. package/dist/convert.d.ts.map +1 -0
  59. package/dist/convert.js +333 -0
  60. package/dist/handle.d.ts +34 -0
  61. package/dist/handle.d.ts.map +1 -0
  62. package/dist/handle.js +72 -0
  63. package/dist/index.d.ts +27 -0
  64. package/dist/index.d.ts.map +1 -0
  65. package/dist/index.js +26 -0
  66. package/dist/search.d.ts +85 -0
  67. package/dist/search.d.ts.map +1 -0
  68. package/dist/search.js +167 -0
  69. package/dist/types.d.ts +14 -0
  70. package/dist/types.d.ts.map +1 -0
  71. package/dist/types.js +1 -0
  72. package/package.json +57 -0
  73. package/src/__tests__/convert.test.ts +425 -0
  74. package/src/__tests__/filter.test.ts +730 -0
  75. package/src/__tests__/integration/filters.integration.test.ts +558 -0
  76. package/src/__tests__/integration/integration.test.ts +399 -0
  77. package/src/__tests__/integration/lifecycle.integration.test.ts +464 -0
  78. package/src/__tests__/integration/memory.integration.test.ts +353 -0
  79. package/src/__tests__/integration/query.integration.test.ts +471 -0
  80. package/src/__tests__/query.test.ts +636 -0
  81. package/src/convert/document.ts +95 -0
  82. package/src/convert/filter.ts +123 -0
  83. package/src/convert/index.ts +8 -0
  84. package/src/convert/query.ts +151 -0
  85. package/src/convert/schema.ts +163 -0
  86. package/src/handle.ts +104 -0
  87. package/src/index.ts +31 -0
  88. package/src/search.ts +207 -0
  89. package/src/types.ts +14 -0
  90. package/tsconfig.json +13 -0
  91. package/vitest.config.ts +15 -0
@@ -0,0 +1,471 @@
1
+ /**
2
+ * Comprehensive query modes integration tests.
3
+ *
4
+ * Tests vector search, BM25 text search, hybrid queries, fusion modes,
5
+ * topK behavior, include semantics, and rank ordering against real Turbopuffer API.
6
+ */
7
+
8
+ import { describe, it, expect, beforeAll, afterAll } from "vitest";
9
+
10
+ import { TurbopufferSearchIndex } from "../../search";
11
+ import type { IndexHandle } from "@kernl-sdk/retrieval";
12
+
13
+ const TURBOPUFFER_API_KEY = process.env.TURBOPUFFER_API_KEY;
14
+ const TURBOPUFFER_REGION = process.env.TURBOPUFFER_REGION ?? "api";
15
+
16
+ /**
17
+ * Test document type.
18
+ */
19
+ interface TestDoc {
20
+ id: string;
21
+ title: string;
22
+ content: string;
23
+ category: string;
24
+ priority: number;
25
+ vector: number[];
26
+ }
27
+
28
+ /**
29
+ * Deterministic test dataset for query testing.
30
+ *
31
+ * Documents are designed to have predictable vector similarity and text relevance:
32
+ * - Vectors are orthogonal basis vectors for predictable ANN results
33
+ * - Text content has specific keywords for BM25 testing
34
+ */
35
+ const TEST_DOCS: TestDoc[] = [
36
+ {
37
+ id: "vec-1",
38
+ title: "Machine Learning Basics",
39
+ content: "Introduction to neural networks and deep learning fundamentals",
40
+ category: "ml",
41
+ priority: 1,
42
+ vector: [1.0, 0.0, 0.0, 0.0], // Basis vector 1
43
+ },
44
+ {
45
+ id: "vec-2",
46
+ title: "Advanced Neural Networks",
47
+ content:
48
+ "Deep dive into transformer architectures and attention mechanisms",
49
+ category: "ml",
50
+ priority: 2,
51
+ vector: [0.0, 1.0, 0.0, 0.0], // Basis vector 2
52
+ },
53
+ {
54
+ id: "vec-3",
55
+ title: "Database Fundamentals",
56
+ content: "SQL queries and relational database design patterns",
57
+ category: "db",
58
+ priority: 3,
59
+ vector: [0.0, 0.0, 1.0, 0.0], // Basis vector 3
60
+ },
61
+ {
62
+ id: "vec-4",
63
+ title: "Vector Databases",
64
+ content: "Introduction to vector search and similarity matching",
65
+ category: "db",
66
+ priority: 4,
67
+ vector: [0.0, 0.0, 0.0, 1.0], // Basis vector 4
68
+ },
69
+ {
70
+ id: "vec-5",
71
+ title: "Search Engine Optimization",
72
+ content: "BM25 ranking and full text search algorithms",
73
+ category: "search",
74
+ priority: 5,
75
+ vector: [0.5, 0.5, 0.0, 0.0], // Mix of 1 and 2
76
+ },
77
+ {
78
+ id: "vec-6",
79
+ title: "Hybrid Search Systems",
80
+ content: "Combining vector and keyword search for better results",
81
+ category: "search",
82
+ priority: 6,
83
+ vector: [0.0, 0.5, 0.5, 0.0], // Mix of 2 and 3
84
+ },
85
+ ];
86
+
87
+ describe("Query modes integration tests", () => {
88
+ if (!TURBOPUFFER_API_KEY) {
89
+ it.skip("requires TURBOPUFFER_API_KEY to be set", () => {});
90
+ return;
91
+ }
92
+
93
+ let tpuf: TurbopufferSearchIndex;
94
+ let index: IndexHandle<TestDoc>;
95
+ const testIndexId = `kernl-query-test-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
96
+
97
+ beforeAll(async () => {
98
+ tpuf = new TurbopufferSearchIndex({
99
+ apiKey: TURBOPUFFER_API_KEY,
100
+ region: TURBOPUFFER_REGION,
101
+ });
102
+
103
+ // Create index with FTS-enabled fields
104
+ await tpuf.createIndex({
105
+ id: testIndexId,
106
+ schema: {
107
+ title: { type: "string", fts: true, filterable: true },
108
+ content: { type: "string", fts: true },
109
+ category: { type: "string", filterable: true },
110
+ priority: { type: "int", filterable: true },
111
+ vector: { type: "vector", dimensions: 4 },
112
+ },
113
+ });
114
+
115
+ index = tpuf.index<TestDoc>(testIndexId);
116
+
117
+ // insert test documents
118
+ await index.upsert(TEST_DOCS);
119
+
120
+ // Wait for indexing
121
+ await new Promise((r) => setTimeout(r, 2000));
122
+ }, 30000);
123
+
124
+ afterAll(async () => {
125
+ try {
126
+ await tpuf.deleteIndex(testIndexId);
127
+ } catch {
128
+ // Ignore cleanup errors
129
+ }
130
+ });
131
+
132
+ // ============================================================
133
+ // VECTOR SEARCH
134
+ // ============================================================
135
+
136
+ describe("vector search", () => {
137
+ it("returns exact match as top result", async () => {
138
+ // Query with basis vector 1 - should match vec-1 best
139
+ const hits = await index.query({
140
+ query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
141
+ topK: 10,
142
+ });
143
+
144
+ expect(hits.length).toBeGreaterThan(0);
145
+ expect(hits[0].id).toBe("vec-1");
146
+ });
147
+
148
+ it("returns results in similarity order", async () => {
149
+ // Query with basis vector 2
150
+ const hits = await index.query({
151
+ query: [{ vector: [0.0, 1.0, 0.0, 0.0] }],
152
+ topK: 10,
153
+ });
154
+
155
+ expect(hits.length).toBeGreaterThan(0);
156
+ expect(hits[0].id).toBe("vec-2");
157
+
158
+ // Scores should be in descending order (or ascending distance)
159
+ for (let i = 1; i < hits.length; i++) {
160
+ expect(hits[i].score).toBeLessThanOrEqual(hits[i - 1].score);
161
+ }
162
+ });
163
+
164
+ it("mixed vector query finds composite matches", async () => {
165
+ // Query with mix of basis 1 and 2 - should prefer vec-5 which has [0.5, 0.5, 0, 0]
166
+ const hits = await index.query({
167
+ query: [{ vector: [0.5, 0.5, 0.0, 0.0] }],
168
+ topK: 10,
169
+ });
170
+
171
+ expect(hits.length).toBeGreaterThan(0);
172
+ expect(hits[0].id).toBe("vec-5");
173
+ });
174
+ });
175
+
176
+ // ============================================================
177
+ // TOPK BEHAVIOR
178
+ // ============================================================
179
+
180
+ describe("topK behavior", () => {
181
+ it("topK smaller than doc count returns exactly topK", async () => {
182
+ const hits = await index.query({
183
+ query: [{ vector: [0.5, 0.5, 0.5, 0.5] }],
184
+ topK: 3,
185
+ });
186
+
187
+ expect(hits.length).toBe(3);
188
+ });
189
+
190
+ it("topK larger than doc count returns all docs", async () => {
191
+ const hits = await index.query({
192
+ query: [{ vector: [0.5, 0.5, 0.5, 0.5] }],
193
+ topK: 100,
194
+ });
195
+
196
+ // We have 6 docs
197
+ expect(hits.length).toBe(6);
198
+ });
199
+
200
+ it("topK of 1 returns single best match", async () => {
201
+ const hits = await index.query({
202
+ query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
203
+ topK: 1,
204
+ });
205
+
206
+ expect(hits.length).toBe(1);
207
+ expect(hits[0].id).toBe("vec-1");
208
+ });
209
+ });
210
+
211
+ // ============================================================
212
+ // TEXT SEARCH (BM25)
213
+ // ============================================================
214
+
215
+ describe("text search (BM25)", () => {
216
+ it("single field text query finds matching docs", async () => {
217
+ const hits = await index.query({
218
+ query: [{ title: "neural" }],
219
+ topK: 10,
220
+ include: ["title"],
221
+ });
222
+
223
+ expect(hits.length).toBeGreaterThan(0);
224
+ // Should find docs with "neural" in title
225
+ const titles = hits.map((h) => h.document?.title);
226
+ expect(titles.some((t) => t?.toLowerCase().includes("neural"))).toBe(
227
+ true,
228
+ );
229
+ });
230
+
231
+ it("content field text query finds matching docs", async () => {
232
+ const hits = await index.query({
233
+ query: [{ content: "transformer" }],
234
+ topK: 10,
235
+ include: ["content"],
236
+ });
237
+
238
+ expect(hits.length).toBeGreaterThan(0);
239
+ // vec-2 has "transformer" in content
240
+ expect(hits.some((h) => h.id === "vec-2")).toBe(true);
241
+ });
242
+
243
+ it("multi-field text query searches both fields", async () => {
244
+ const hits = await index.query({
245
+ query: [{ title: "database" }, { content: "database" }],
246
+ topK: 10,
247
+ include: ["title", "content"],
248
+ });
249
+
250
+ expect(hits.length).toBeGreaterThan(0);
251
+ // vec-3 and vec-4 have "database" related content
252
+ const ids = hits.map((h) => h.id);
253
+ expect(ids.some((id) => id === "vec-3" || id === "vec-4")).toBe(true);
254
+ });
255
+
256
+ it("text query with no matches returns empty", async () => {
257
+ const hits = await index.query({
258
+ query: [{ content: "xyznonexistentkeyword123" }],
259
+ topK: 10,
260
+ });
261
+
262
+ expect(hits.length).toBe(0);
263
+ });
264
+ });
265
+
266
+ // ============================================================
267
+ // HYBRID QUERIES - NOT SUPPORTED BY TURBOPUFFER
268
+ // ============================================================
269
+
270
+ describe("hybrid queries", () => {
271
+ it("throws error for vector + text hybrid fusion", async () => {
272
+ await expect(
273
+ index.query({
274
+ query: [{ vector: [1.0, 0.0, 0.0, 0.0] }, { content: "search" }],
275
+ topK: 10,
276
+ }),
277
+ ).rejects.toThrow(/does not support hybrid/);
278
+ });
279
+
280
+ it("throws error for multi-vector fusion", async () => {
281
+ await expect(
282
+ index.query({
283
+ query: [
284
+ { vector: [1.0, 0.0, 0.0, 0.0] },
285
+ { vector: [0.0, 1.0, 0.0, 0.0] },
286
+ ],
287
+ topK: 10,
288
+ }),
289
+ ).rejects.toThrow(/does not support multi-vector/);
290
+ });
291
+ });
292
+
293
+ // ============================================================
294
+ // FUSION MODES (TEXT ONLY)
295
+ // ============================================================
296
+
297
+ describe("fusion modes", () => {
298
+ it("Sum fusion combines multiple BM25 signals", async () => {
299
+ const hits = await index.query({
300
+ query: [{ title: "database" }, { content: "database" }],
301
+ topK: 10,
302
+ include: ["title", "content"],
303
+ });
304
+
305
+ expect(hits.length).toBeGreaterThan(0);
306
+ // Should find docs with "database" in title or content
307
+ const ids = hits.map((h) => h.id);
308
+ expect(ids.some((id) => id === "vec-3" || id === "vec-4")).toBe(true);
309
+ });
310
+
311
+ it("Max fusion takes best BM25 signal per doc", async () => {
312
+ const hits = await index.query({
313
+ max: [{ title: "neural" }, { content: "neural" }],
314
+ topK: 10,
315
+ include: ["title", "content"],
316
+ });
317
+
318
+ expect(hits.length).toBeGreaterThan(0);
319
+ // Should find docs with "neural" in title or content
320
+ const ids = hits.map((h) => h.id);
321
+ expect(ids.some((id) => id === "vec-1" || id === "vec-2")).toBe(true);
322
+ });
323
+ });
324
+
325
+ // ============================================================
326
+ // INCLUDE SEMANTICS
327
+ // ============================================================
328
+
329
+ describe("include semantics", () => {
330
+ it("include: true returns all attributes", async () => {
331
+ const hits = await index.query({
332
+ query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
333
+ topK: 1,
334
+ include: true,
335
+ });
336
+
337
+ expect(hits.length).toBe(1);
338
+ expect(hits[0].document).toBeDefined();
339
+ expect(hits[0].document).toHaveProperty("title");
340
+ expect(hits[0].document).toHaveProperty("content");
341
+ expect(hits[0].document).toHaveProperty("category");
342
+ expect(hits[0].document).toHaveProperty("priority");
343
+ });
344
+
345
+ it("include: false returns no attributes", async () => {
346
+ const hits = await index.query({
347
+ query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
348
+ topK: 1,
349
+ include: false,
350
+ });
351
+
352
+ expect(hits.length).toBe(1);
353
+ // document should be undefined or empty
354
+ expect(hits[0].document).toBeUndefined();
355
+ });
356
+
357
+ it("include: [fields] returns only specified fields", async () => {
358
+ const hits = await index.query({
359
+ query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
360
+ topK: 1,
361
+ include: ["title", "category"],
362
+ });
363
+
364
+ expect(hits.length).toBe(1);
365
+ expect(hits[0].document).toBeDefined();
366
+ expect(hits[0].document).toHaveProperty("title");
367
+ expect(hits[0].document).toHaveProperty("category");
368
+ // Should NOT have content or priority
369
+ expect(hits[0].document).not.toHaveProperty("content");
370
+ expect(hits[0].document).not.toHaveProperty("priority");
371
+ });
372
+
373
+ it("include: [] returns no attributes", async () => {
374
+ const hits = await index.query({
375
+ query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
376
+ topK: 1,
377
+ include: [],
378
+ });
379
+
380
+ expect(hits.length).toBe(1);
381
+ // Empty include array should return no fields
382
+ expect(
383
+ hits[0].document === undefined ||
384
+ Object.keys(hits[0].document).length === 0,
385
+ ).toBe(true);
386
+ });
387
+ });
388
+
389
+ // ============================================================
390
+ // QUERY WITH FILTERS
391
+ // ============================================================
392
+
393
+ describe("query with filters", () => {
394
+ it("filter by category", async () => {
395
+ const hits = await index.query({
396
+ query: [{ vector: [0.5, 0.5, 0.5, 0.5] }],
397
+ topK: 10,
398
+ filter: { category: "ml" },
399
+ include: ["category"],
400
+ });
401
+
402
+ expect(hits.length).toBe(2); // vec-1, vec-2
403
+ for (const hit of hits) {
404
+ expect(hit.document?.category).toBe("ml");
405
+ }
406
+ });
407
+
408
+ it("filter by priority range", async () => {
409
+ const hits = await index.query({
410
+ query: [{ vector: [0.5, 0.5, 0.5, 0.5] }],
411
+ topK: 10,
412
+ filter: { priority: { $gte: 3, $lte: 5 } },
413
+ include: ["priority"],
414
+ });
415
+
416
+ expect(hits.length).toBe(3); // priority 3, 4, 5
417
+ for (const hit of hits) {
418
+ expect(hit.document?.priority).toBeGreaterThanOrEqual(3);
419
+ expect(hit.document?.priority).toBeLessThanOrEqual(5);
420
+ }
421
+ });
422
+
423
+ it("filter with $or", async () => {
424
+ const hits = await index.query({
425
+ query: [{ vector: [0.5, 0.5, 0.5, 0.5] }],
426
+ topK: 10,
427
+ filter: {
428
+ $or: [{ category: "ml" }, { category: "search" }],
429
+ },
430
+ include: ["category"],
431
+ });
432
+
433
+ expect(hits.length).toBe(4); // ml: 2, search: 2
434
+ for (const hit of hits) {
435
+ expect(["ml", "search"]).toContain(hit.document?.category);
436
+ }
437
+ });
438
+ });
439
+
440
+ // ============================================================
441
+ // RESULT STRUCTURE
442
+ // ============================================================
443
+
444
+ describe("result structure", () => {
445
+ it("results have required fields", async () => {
446
+ const hits = await index.query({
447
+ query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
448
+ topK: 1,
449
+ });
450
+
451
+ expect(hits.length).toBe(1);
452
+ expect(hits[0]).toHaveProperty("id");
453
+ expect(hits[0]).toHaveProperty("index", testIndexId);
454
+ expect(hits[0]).toHaveProperty("score");
455
+ expect(typeof hits[0].id).toBe("string");
456
+ expect(typeof hits[0].score).toBe("number");
457
+ });
458
+
459
+ it("score is a valid number", async () => {
460
+ const hits = await index.query({
461
+ query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
462
+ topK: 5,
463
+ });
464
+
465
+ for (const hit of hits) {
466
+ expect(typeof hit.score).toBe("number");
467
+ expect(Number.isFinite(hit.score)).toBe(true);
468
+ }
469
+ });
470
+ });
471
+ });