@kernl-sdk/turbopuffer 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,423 +0,0 @@
1
- /**
2
- * Comprehensive query modes integration tests.
3
- *
4
- * Tests vector search, BM25 text search, hybrid queries, fusion modes,
5
- * topK behavior, include semantics, and rank ordering against real Turbopuffer API.
6
- */
7
- import { describe, it, expect, beforeAll, afterAll } from "vitest";
8
- import { TurbopufferSearchIndex } from "../search.js";
9
- const TURBOPUFFER_API_KEY = process.env.TURBOPUFFER_API_KEY;
10
- const TURBOPUFFER_REGION = process.env.TURBOPUFFER_REGION ?? "api";
11
- /**
12
- * Helper to create a DenseVector.
13
- */
14
- function vec(values) {
15
- return { kind: "vector", values };
16
- }
17
- /**
18
- * Deterministic test dataset for query testing.
19
- *
20
- * Documents are designed to have predictable vector similarity and text relevance:
21
- * - Vectors are orthogonal basis vectors for predictable ANN results
22
- * - Text content has specific keywords for BM25 testing
23
- */
24
- const TEST_DOCS = [
25
- {
26
- id: "vec-1",
27
- fields: {
28
- title: "Machine Learning Basics",
29
- content: "Introduction to neural networks and deep learning fundamentals",
30
- category: "ml",
31
- priority: 1,
32
- vector: vec([1.0, 0.0, 0.0, 0.0]), // Basis vector 1
33
- },
34
- },
35
- {
36
- id: "vec-2",
37
- fields: {
38
- title: "Advanced Neural Networks",
39
- content: "Deep dive into transformer architectures and attention mechanisms",
40
- category: "ml",
41
- priority: 2,
42
- vector: vec([0.0, 1.0, 0.0, 0.0]), // Basis vector 2
43
- },
44
- },
45
- {
46
- id: "vec-3",
47
- fields: {
48
- title: "Database Fundamentals",
49
- content: "SQL queries and relational database design patterns",
50
- category: "db",
51
- priority: 3,
52
- vector: vec([0.0, 0.0, 1.0, 0.0]), // Basis vector 3
53
- },
54
- },
55
- {
56
- id: "vec-4",
57
- fields: {
58
- title: "Vector Databases",
59
- content: "Introduction to vector search and similarity matching",
60
- category: "db",
61
- priority: 4,
62
- vector: vec([0.0, 0.0, 0.0, 1.0]), // Basis vector 4
63
- },
64
- },
65
- {
66
- id: "vec-5",
67
- fields: {
68
- title: "Search Engine Optimization",
69
- content: "BM25 ranking and full text search algorithms",
70
- category: "search",
71
- priority: 5,
72
- vector: vec([0.5, 0.5, 0.0, 0.0]), // Mix of 1 and 2
73
- },
74
- },
75
- {
76
- id: "vec-6",
77
- fields: {
78
- title: "Hybrid Search Systems",
79
- content: "Combining vector and keyword search for better results",
80
- category: "search",
81
- priority: 6,
82
- vector: vec([0.0, 0.5, 0.5, 0.0]), // Mix of 2 and 3
83
- },
84
- },
85
- ];
86
- describe("Query modes integration tests", () => {
87
- if (!TURBOPUFFER_API_KEY) {
88
- it.skip("requires TURBOPUFFER_API_KEY to be set", () => { });
89
- return;
90
- }
91
- let tpuf;
92
- let index;
93
- const testIndexId = `kernl-query-test-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
94
- beforeAll(async () => {
95
- tpuf = new TurbopufferSearchIndex({
96
- apiKey: TURBOPUFFER_API_KEY,
97
- region: TURBOPUFFER_REGION,
98
- });
99
- // Create index with FTS-enabled fields
100
- await tpuf.createIndex({
101
- id: testIndexId,
102
- schema: {
103
- title: { type: "string", fts: true, filterable: true },
104
- content: { type: "string", fts: true },
105
- category: { type: "string", filterable: true },
106
- priority: { type: "int", filterable: true },
107
- vector: { type: "vector", dimensions: 4 },
108
- },
109
- });
110
- index = tpuf.index(testIndexId);
111
- // Insert test documents
112
- await index.upsert(TEST_DOCS);
113
- // Wait for indexing
114
- await new Promise((r) => setTimeout(r, 2000));
115
- }, 30000);
116
- afterAll(async () => {
117
- try {
118
- await tpuf.deleteIndex(testIndexId);
119
- }
120
- catch {
121
- // Ignore cleanup errors
122
- }
123
- });
124
- // ============================================================
125
- // VECTOR SEARCH
126
- // ============================================================
127
- describe("vector search", () => {
128
- it("returns exact match as top result", async () => {
129
- // Query with basis vector 1 - should match vec-1 best
130
- const hits = await index.query({
131
- query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
132
- topK: 10,
133
- });
134
- expect(hits.length).toBeGreaterThan(0);
135
- expect(hits[0].id).toBe("vec-1");
136
- });
137
- it("returns results in similarity order", async () => {
138
- // Query with basis vector 2
139
- const hits = await index.query({
140
- query: [{ vector: [0.0, 1.0, 0.0, 0.0] }],
141
- topK: 10,
142
- });
143
- expect(hits.length).toBeGreaterThan(0);
144
- expect(hits[0].id).toBe("vec-2");
145
- // Scores should be in descending order (or ascending distance)
146
- for (let i = 1; i < hits.length; i++) {
147
- expect(hits[i].score).toBeLessThanOrEqual(hits[i - 1].score);
148
- }
149
- });
150
- it("mixed vector query finds composite matches", async () => {
151
- // Query with mix of basis 1 and 2 - should prefer vec-5 which has [0.5, 0.5, 0, 0]
152
- const hits = await index.query({
153
- query: [{ vector: [0.5, 0.5, 0.0, 0.0] }],
154
- topK: 10,
155
- });
156
- expect(hits.length).toBeGreaterThan(0);
157
- expect(hits[0].id).toBe("vec-5");
158
- });
159
- });
160
- // ============================================================
161
- // TOPK BEHAVIOR
162
- // ============================================================
163
- describe("topK behavior", () => {
164
- it("topK smaller than doc count returns exactly topK", async () => {
165
- const hits = await index.query({
166
- query: [{ vector: [0.5, 0.5, 0.5, 0.5] }],
167
- topK: 3,
168
- });
169
- expect(hits.length).toBe(3);
170
- });
171
- it("topK larger than doc count returns all docs", async () => {
172
- const hits = await index.query({
173
- query: [{ vector: [0.5, 0.5, 0.5, 0.5] }],
174
- topK: 100,
175
- });
176
- // We have 6 docs
177
- expect(hits.length).toBe(6);
178
- });
179
- it("topK of 1 returns single best match", async () => {
180
- const hits = await index.query({
181
- query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
182
- topK: 1,
183
- });
184
- expect(hits.length).toBe(1);
185
- expect(hits[0].id).toBe("vec-1");
186
- });
187
- });
188
- // ============================================================
189
- // TEXT SEARCH (BM25)
190
- // ============================================================
191
- describe("text search (BM25)", () => {
192
- it("single field text query finds matching docs", async () => {
193
- const hits = await index.query({
194
- query: [{ title: "neural" }],
195
- topK: 10,
196
- include: ["title"],
197
- });
198
- expect(hits.length).toBeGreaterThan(0);
199
- // Should find docs with "neural" in title
200
- const titles = hits.map((h) => h.document?.title);
201
- expect(titles.some((t) => t?.toLowerCase().includes("neural"))).toBe(true);
202
- });
203
- it("content field text query finds matching docs", async () => {
204
- const hits = await index.query({
205
- query: [{ content: "transformer" }],
206
- topK: 10,
207
- include: ["content"],
208
- });
209
- expect(hits.length).toBeGreaterThan(0);
210
- // vec-2 has "transformer" in content
211
- expect(hits.some((h) => h.id === "vec-2")).toBe(true);
212
- });
213
- it("multi-field text query searches both fields", async () => {
214
- const hits = await index.query({
215
- query: [{ title: "database" }, { content: "database" }],
216
- topK: 10,
217
- include: ["title", "content"],
218
- });
219
- expect(hits.length).toBeGreaterThan(0);
220
- // vec-3 and vec-4 have "database" related content
221
- const ids = hits.map((h) => h.id);
222
- expect(ids.some((id) => id === "vec-3" || id === "vec-4")).toBe(true);
223
- });
224
- it("text query with no matches returns empty", async () => {
225
- const hits = await index.query({
226
- query: [{ content: "xyznonexistentkeyword123" }],
227
- topK: 10,
228
- });
229
- expect(hits.length).toBe(0);
230
- });
231
- });
232
- // ============================================================
233
- // HYBRID QUERIES (VECTOR + TEXT)
234
- // ============================================================
235
- describe("hybrid queries", () => {
236
- it("combines vector and text signals", async () => {
237
- // Vector points to vec-1, text "search" appears in vec-5, vec-6
238
- const hits = await index.query({
239
- query: [
240
- { vector: [1.0, 0.0, 0.0, 0.0] },
241
- { content: "search" },
242
- ],
243
- topK: 10,
244
- });
245
- expect(hits.length).toBeGreaterThan(0);
246
- // Results should include docs matching either signal
247
- const ids = hits.map((h) => h.id);
248
- // vec-1 should be present (vector match)
249
- expect(ids).toContain("vec-1");
250
- });
251
- it("hybrid with filter narrows results", async () => {
252
- const hits = await index.query({
253
- query: [
254
- { vector: [0.5, 0.5, 0.5, 0.5] },
255
- { content: "vector" },
256
- ],
257
- topK: 10,
258
- filter: { category: "db" },
259
- include: ["category"],
260
- });
261
- expect(hits.length).toBeGreaterThan(0);
262
- for (const hit of hits) {
263
- expect(hit.document?.category).toBe("db");
264
- }
265
- });
266
- });
267
- // ============================================================
268
- // FUSION MODES
269
- // ============================================================
270
- describe("fusion modes", () => {
271
- it("Sum fusion combines multiple signals", async () => {
272
- const hits = await index.query({
273
- query: [
274
- { vector: [1.0, 0.0, 0.0, 0.0] },
275
- { vector: [0.0, 1.0, 0.0, 0.0] },
276
- ],
277
- topK: 10,
278
- });
279
- expect(hits.length).toBeGreaterThan(0);
280
- // With Sum fusion, vec-5 [0.5, 0.5, 0, 0] should score well
281
- // as it has some similarity to both query vectors
282
- });
283
- it("Max fusion takes best signal per doc", async () => {
284
- const hits = await index.query({
285
- max: [
286
- { vector: [1.0, 0.0, 0.0, 0.0] },
287
- { vector: [0.0, 1.0, 0.0, 0.0] },
288
- ],
289
- topK: 10,
290
- });
291
- expect(hits.length).toBeGreaterThan(0);
292
- // With Max fusion, vec-1 and vec-2 should be top as they
293
- // exactly match one of the query vectors
294
- const topIds = hits.slice(0, 2).map((h) => h.id);
295
- expect(topIds).toContain("vec-1");
296
- expect(topIds).toContain("vec-2");
297
- });
298
- });
299
- // ============================================================
300
- // INCLUDE SEMANTICS
301
- // ============================================================
302
- describe("include semantics", () => {
303
- it("include: true returns all attributes", async () => {
304
- const hits = await index.query({
305
- query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
306
- topK: 1,
307
- include: true,
308
- });
309
- expect(hits.length).toBe(1);
310
- expect(hits[0].document).toBeDefined();
311
- expect(hits[0].document).toHaveProperty("title");
312
- expect(hits[0].document).toHaveProperty("content");
313
- expect(hits[0].document).toHaveProperty("category");
314
- expect(hits[0].document).toHaveProperty("priority");
315
- });
316
- it("include: false returns no attributes", async () => {
317
- const hits = await index.query({
318
- query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
319
- topK: 1,
320
- include: false,
321
- });
322
- expect(hits.length).toBe(1);
323
- // document should be undefined or empty
324
- expect(hits[0].document).toBeUndefined();
325
- });
326
- it("include: [fields] returns only specified fields", async () => {
327
- const hits = await index.query({
328
- query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
329
- topK: 1,
330
- include: ["title", "category"],
331
- });
332
- expect(hits.length).toBe(1);
333
- expect(hits[0].document).toBeDefined();
334
- expect(hits[0].document).toHaveProperty("title");
335
- expect(hits[0].document).toHaveProperty("category");
336
- // Should NOT have content or priority
337
- expect(hits[0].document).not.toHaveProperty("content");
338
- expect(hits[0].document).not.toHaveProperty("priority");
339
- });
340
- it("include: [] returns no attributes", async () => {
341
- const hits = await index.query({
342
- query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
343
- topK: 1,
344
- include: [],
345
- });
346
- expect(hits.length).toBe(1);
347
- // Empty include array should return no fields
348
- expect(hits[0].document === undefined ||
349
- Object.keys(hits[0].document).length === 0).toBe(true);
350
- });
351
- });
352
- // ============================================================
353
- // QUERY WITH FILTERS
354
- // ============================================================
355
- describe("query with filters", () => {
356
- it("filter by category", async () => {
357
- const hits = await index.query({
358
- query: [{ vector: [0.5, 0.5, 0.5, 0.5] }],
359
- topK: 10,
360
- filter: { category: "ml" },
361
- include: ["category"],
362
- });
363
- expect(hits.length).toBe(2); // vec-1, vec-2
364
- for (const hit of hits) {
365
- expect(hit.document?.category).toBe("ml");
366
- }
367
- });
368
- it("filter by priority range", async () => {
369
- const hits = await index.query({
370
- query: [{ vector: [0.5, 0.5, 0.5, 0.5] }],
371
- topK: 10,
372
- filter: { priority: { $gte: 3, $lte: 5 } },
373
- include: ["priority"],
374
- });
375
- expect(hits.length).toBe(3); // priority 3, 4, 5
376
- for (const hit of hits) {
377
- expect(hit.document?.priority).toBeGreaterThanOrEqual(3);
378
- expect(hit.document?.priority).toBeLessThanOrEqual(5);
379
- }
380
- });
381
- it("filter with $or", async () => {
382
- const hits = await index.query({
383
- query: [{ vector: [0.5, 0.5, 0.5, 0.5] }],
384
- topK: 10,
385
- filter: {
386
- $or: [{ category: "ml" }, { category: "search" }],
387
- },
388
- include: ["category"],
389
- });
390
- expect(hits.length).toBe(4); // ml: 2, search: 2
391
- for (const hit of hits) {
392
- expect(["ml", "search"]).toContain(hit.document?.category);
393
- }
394
- });
395
- });
396
- // ============================================================
397
- // RESULT STRUCTURE
398
- // ============================================================
399
- describe("result structure", () => {
400
- it("results have required fields", async () => {
401
- const hits = await index.query({
402
- query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
403
- topK: 1,
404
- });
405
- expect(hits.length).toBe(1);
406
- expect(hits[0]).toHaveProperty("id");
407
- expect(hits[0]).toHaveProperty("index", testIndexId);
408
- expect(hits[0]).toHaveProperty("score");
409
- expect(typeof hits[0].id).toBe("string");
410
- expect(typeof hits[0].score).toBe("number");
411
- });
412
- it("score is a valid number", async () => {
413
- const hits = await index.query({
414
- query: [{ vector: [1.0, 0.0, 0.0, 0.0] }],
415
- topK: 5,
416
- });
417
- for (const hit of hits) {
418
- expect(typeof hit.score).toBe("number");
419
- expect(Number.isFinite(hit.score)).toBe(true);
420
- }
421
- });
422
- });
423
- });
package/dist/convert.d.ts DELETED
@@ -1,68 +0,0 @@
1
- /**
2
- * Codecs for converting between kernl and Turbopuffer types.
3
- */
4
- import type { Codec } from "@kernl-sdk/shared/lib";
5
- import type { FieldSchema, VectorFieldSchema, ScalarFieldSchema, SearchDocument, SearchQuery, SearchHit, FilterExpression } from "@kernl-sdk/retrieval";
6
- import type { AttributeSchema, DistanceMetric, Row, NamespaceQueryParams } from "@turbopuffer/turbopuffer/resources/namespaces";
7
- import type { Filter } from "@turbopuffer/turbopuffer/resources/custom";
8
- type Similarity = VectorFieldSchema["similarity"];
9
- type ScalarType = ScalarFieldSchema["type"];
10
- type TpufType = string;
11
- /**
12
- * Codec for converting kernl scalar types to Turbopuffer attribute types.
13
- */
14
- export declare const SCALAR_TYPE: Codec<ScalarType, TpufType>;
15
- /**
16
- * Codec for converting similarity metric to Turbopuffer distance metric.
17
- *
18
- * Turbopuffer supports: cosine_distance, euclidean_squared
19
- * We support: cosine, euclidean, dot_product
20
- */
21
- export declare const SIMILARITY: Codec<Similarity, DistanceMetric>;
22
- /**
23
- * Codec-like converter for FieldSchema to Turbopuffer AttributeSchema.
24
- *
25
- * Takes the field name as context since Turbopuffer requires `ann: true`
26
- * only on the special `vector` attribute.
27
- */
28
- export declare const FIELD_SCHEMA: {
29
- encode: (field: FieldSchema, name: string) => AttributeSchema;
30
- decode: () => never;
31
- };
32
- /**
33
- * Codec for converting a full schema record.
34
- *
35
- * Validates that vector fields are named `vector` since Turbopuffer only
36
- * supports ANN indexing on that specific attribute name.
37
- */
38
- export declare const INDEX_SCHEMA: Codec<Record<string, FieldSchema>, Record<string, AttributeSchema>>;
39
- /**
40
- * Codec for converting SearchDocument to Turbopuffer Row.
41
- */
42
- export declare const DOCUMENT: Codec<SearchDocument, Row>;
43
- /**
44
- * Codec for converting FilterExpression to Turbopuffer Filter.
45
- */
46
- export declare const FILTER: {
47
- encode: (filter: FilterExpression) => Filter;
48
- decode: (_filter: Filter) => FilterExpression;
49
- };
50
- /**
51
- * Codec for converting SearchQuery to Turbopuffer NamespaceQueryParams.
52
- *
53
- * Note: Hybrid search (vector + text) requires multi-query which is
54
- * handled separately.
55
- */
56
- export declare const QUERY: {
57
- encode: (query: SearchQuery) => NamespaceQueryParams;
58
- decode: (_params: NamespaceQueryParams) => SearchQuery;
59
- };
60
- /**
61
- * Codec for converting Turbopuffer Row to SearchHit.
62
- */
63
- export declare const SEARCH_HIT: {
64
- encode: (_hit: SearchHit) => Row;
65
- decode: (row: Row, index: string) => SearchHit;
66
- };
67
- export {};
68
- //# sourceMappingURL=convert.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"convert.d.ts","sourceRoot":"","sources":["../src/convert.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,KAAK,EACV,WAAW,EACX,iBAAiB,EACjB,iBAAiB,EAGjB,cAAc,EACd,WAAW,EACX,SAAS,EACT,gBAAgB,EAIjB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,KAAK,EACV,eAAe,EAEf,cAAc,EACd,GAAG,EACH,oBAAoB,EACrB,MAAM,+CAA+C,CAAC;AACvD,OAAO,KAAK,EAAE,MAAM,EAAU,MAAM,2CAA2C,CAAC;AAEhF,KAAK,UAAU,GAAG,iBAAiB,CAAC,YAAY,CAAC,CAAC;AAClD,KAAK,UAAU,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC;AAC5C,KAAK,QAAQ,GAAG,MAAM,CAAC;AAEvB;;GAEG;AACH,eAAO,MAAM,WAAW,EAAE,KAAK,CAAC,UAAU,EAAE,QAAQ,CA4CnD,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,UAAU,EAAE,KAAK,CAAC,UAAU,EAAE,cAAc,CAqBxD,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,YAAY;oBACP,WAAW,QAAQ,MAAM,KAAG,eAAe;;CAiC5D,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,YAAY,EAAE,KAAK,CAC9B,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,EAC3B,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CA0BhC,CAAC;AAyBF;;GAEG;AACH,eAAO,MAAM,QAAQ,EAAE,KAAK,CAAC,cAAc,EAAE,GAAG,CAc/C,CAAC;AAkEF;;GAEG;AACH,eAAO,MAAM,MAAM;qBACA,gBAAgB,KAAG,MAAM;sBAsCxB,MAAM,KAAG,gBAAgB;CAG5C,CAAC;AA4BF;;;;;GAKG;AACH,eAAO,MAAM,KAAK;oBACA,WAAW,KAAG,oBAAoB;sBAsChC,oBAAoB,KAAG,WAAW;CAGrD,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,UAAU;mBACN,SAAS,KAAG,GAAG;kBAIhB,GAAG,SAAS,MAAM,KAAG,SAAS;CAqB7C,CAAC"}