@kernl-sdk/turbopuffer 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/convert.js DELETED
@@ -1,333 +0,0 @@
1
- /**
2
- * Codecs for converting between kernl and Turbopuffer types.
3
- */
4
- /**
5
- * Codec for converting kernl scalar types to Turbopuffer attribute types.
6
- */
7
- export const SCALAR_TYPE = {
8
- encode: (type) => {
9
- switch (type) {
10
- case "string":
11
- return "string";
12
- case "int":
13
- return "int";
14
- case "float":
15
- return "int"; // tpuf doesn't have float
16
- case "boolean":
17
- return "bool";
18
- case "date":
19
- return "datetime";
20
- case "string[]":
21
- return "[]string";
22
- case "int[]":
23
- return "[]int";
24
- case "date[]":
25
- return "[]datetime";
26
- default:
27
- return "string";
28
- }
29
- },
30
- decode: (type) => {
31
- switch (type) {
32
- case "string":
33
- return "string";
34
- case "int":
35
- return "int";
36
- case "bool":
37
- return "boolean";
38
- case "datetime":
39
- return "date";
40
- case "[]string":
41
- return "string[]";
42
- case "[]int":
43
- return "int[]";
44
- case "[]datetime":
45
- return "date[]";
46
- default:
47
- return "string";
48
- }
49
- },
50
- };
51
- /**
52
- * Codec for converting similarity metric to Turbopuffer distance metric.
53
- *
54
- * Turbopuffer supports: cosine_distance, euclidean_squared
55
- * We support: cosine, euclidean, dot_product
56
- */
57
- export const SIMILARITY = {
58
- encode: (similarity) => {
59
- switch (similarity) {
60
- case "euclidean":
61
- return "euclidean_squared";
62
- case "cosine":
63
- case "dot_product":
64
- default:
65
- return "cosine_distance";
66
- }
67
- },
68
- decode: (metric) => {
69
- switch (metric) {
70
- case "euclidean_squared":
71
- return "euclidean";
72
- case "cosine_distance":
73
- default:
74
- return "cosine";
75
- }
76
- },
77
- };
78
- /**
79
- * Codec-like converter for FieldSchema to Turbopuffer AttributeSchema.
80
- *
81
- * Takes the field name as context since Turbopuffer requires `ann: true`
82
- * only on the special `vector` attribute.
83
- */
84
- export const FIELD_SCHEMA = {
85
- encode: (field, name) => {
86
- // Vector fields
87
- if (field.type === "vector" || field.type === "sparse-vector") {
88
- const vf = field;
89
- const precision = vf.quantization === "f16" ? "f16" : "f32";
90
- return {
91
- type: `[${vf.dimensions}]${precision}`,
92
- ann: name === "vector",
93
- };
94
- }
95
- // Scalar fields
96
- const config = {
97
- type: SCALAR_TYPE.encode(field.type),
98
- };
99
- if (field.filterable) {
100
- config.filterable = true;
101
- }
102
- if (field.fts) {
103
- config.full_text_search =
104
- typeof field.fts === "object"
105
- ? { language: field.fts.language }
106
- : true;
107
- }
108
- return config;
109
- },
110
- decode: () => {
111
- throw new Error("FIELD_SCHEMA.decode: not implemented");
112
- },
113
- };
114
- /**
115
- * Codec for converting a full schema record.
116
- *
117
- * Validates that vector fields are named `vector` since Turbopuffer only
118
- * supports ANN indexing on that specific attribute name.
119
- */
120
- export const INDEX_SCHEMA = {
121
- encode: (schema) => {
122
- const result = {};
123
- for (const [name, field] of Object.entries(schema)) {
124
- const isVector = field.type === "vector" || field.type === "sparse-vector";
125
- // Enforce vector field naming
126
- if (isVector && name !== "vector") {
127
- throw new Error(`Turbopuffer requires vector fields to be named "vector", got "${name}". ` +
128
- `Rename your field or use a different search provider.`);
129
- }
130
- result[name] = FIELD_SCHEMA.encode(field, name);
131
- }
132
- return result;
133
- },
134
- decode: () => {
135
- throw new Error("INDEX_SCHEMA.decode: not implemented");
136
- },
137
- };
138
- /**
139
- * Check if a value is a DenseVector.
140
- */
141
- function isDenseVector(val) {
142
- return (typeof val === "object" &&
143
- val !== null &&
144
- "kind" in val &&
145
- val.kind === "vector");
146
- }
147
- /**
148
- * Convert a FieldValue to Turbopuffer attribute value.
149
- * Extracts vector values from DenseVector wrapper.
150
- */
151
- function encodeFieldValue(val) {
152
- if (isDenseVector(val)) {
153
- return val.values;
154
- }
155
- return val;
156
- }
157
- /**
158
- * Codec for converting SearchDocument to Turbopuffer Row.
159
- */
160
- export const DOCUMENT = {
161
- encode: (doc) => {
162
- const row = { id: doc.id };
163
- for (const [key, val] of Object.entries(doc.fields)) {
164
- row[key] = encodeFieldValue(val);
165
- }
166
- return row;
167
- },
168
- decode: (_row) => {
169
- throw new Error("DOCUMENT.decode: not implemented");
170
- },
171
- };
172
- const FILTER_OP_MAP = {
173
- eq: "Eq",
174
- neq: "NotEq",
175
- gt: "Gt",
176
- gte: "Gte",
177
- lt: "Lt",
178
- lte: "Lte",
179
- in: "In",
180
- nin: "NotIn",
181
- contains: "Contains",
182
- starts_with: "Glob", // will add * suffix
183
- ends_with: "Glob", // will add * prefix
184
- contains_all: "ContainsAny", // closest match
185
- contains_any: "ContainsAny",
186
- };
187
- /**
188
- * Type guards for filter expressions.
189
- */
190
- function isFieldFilter(f) {
191
- return "field" in f && "op" in f && "value" in f;
192
- }
193
- function isExistsFilter(f) {
194
- return ("field" in f && "op" in f && (f.op === "exists" || f.op === "not_exists"));
195
- }
196
- function isAndFilter(f) {
197
- return "and" in f;
198
- }
199
- function isOrFilter(f) {
200
- return "or" in f;
201
- }
202
- function isNotFilter(f) {
203
- return "not" in f;
204
- }
205
- /**
206
- * Codec for converting FilterExpression to Turbopuffer Filter.
207
- */
208
- export const FILTER = {
209
- encode: (filter) => {
210
- if (isAndFilter(filter)) {
211
- return ["And", filter.and.map(FILTER.encode)];
212
- }
213
- if (isOrFilter(filter)) {
214
- return ["Or", filter.or.map(FILTER.encode)];
215
- }
216
- if (isNotFilter(filter)) {
217
- return ["Not", FILTER.encode(filter.not)];
218
- }
219
- if (isExistsFilter(filter)) {
220
- // exists → NotEq null, not_exists → Eq null
221
- return filter.op === "exists"
222
- ? [filter.field, "NotEq", null]
223
- : [filter.field, "Eq", null];
224
- }
225
- if (isFieldFilter(filter)) {
226
- const { field, op, value } = filter;
227
- // Handle glob patterns for starts_with/ends_with
228
- if (op === "starts_with") {
229
- return [field, "Glob", `${value}*`];
230
- }
231
- if (op === "ends_with") {
232
- return [field, "Glob", `*${value}`];
233
- }
234
- const tpufOp = FILTER_OP_MAP[op];
235
- return [field, tpufOp, value];
236
- }
237
- throw new Error(`Unknown filter type: ${JSON.stringify(filter)}`);
238
- },
239
- decode: (_filter) => {
240
- throw new Error("FILTER.decode: not implemented");
241
- },
242
- };
243
- /**
244
- * Build rank_by for vector search.
245
- */
246
- function buildVectorRankBy(vector) {
247
- return ["vector", "ANN", vector];
248
- }
249
- /**
250
- * Build rank_by for full-text search.
251
- */
252
- function buildTextRankBy(text, fields) {
253
- if (!fields || fields.length === 0) {
254
- throw new Error("textFields required for full-text search");
255
- }
256
- if (fields.length === 1) {
257
- return [fields[0], "BM25", text];
258
- }
259
- // Multiple fields: combine with Sum
260
- const subQueries = fields.map((field) => [field, "BM25", text]);
261
- return ["Sum", subQueries];
262
- }
263
- /**
264
- * Codec for converting SearchQuery to Turbopuffer NamespaceQueryParams.
265
- *
266
- * Note: Hybrid search (vector + text) requires multi-query which is
267
- * handled separately.
268
- */
269
- export const QUERY = {
270
- encode: (query) => {
271
- const params = {};
272
- // Determine ranking method
273
- if (query.vector) {
274
- params.rank_by = buildVectorRankBy(query.vector);
275
- }
276
- else if (query.text) {
277
- params.rank_by = buildTextRankBy(query.text, query.textFields);
278
- }
279
- // Top K
280
- if (query.topK !== undefined) {
281
- params.top_k = query.topK;
282
- }
283
- // Filters
284
- if (query.filter) {
285
- params.filters = FILTER.encode(query.filter);
286
- }
287
- // Include attributes
288
- if (query.includeFields !== undefined) {
289
- if (typeof query.includeFields === "boolean") {
290
- params.include_attributes = query.includeFields;
291
- }
292
- else {
293
- const attrs = [...query.includeFields];
294
- if (query.includeVectors && !attrs.includes("vector")) {
295
- attrs.push("vector");
296
- }
297
- params.include_attributes = attrs;
298
- }
299
- }
300
- else if (query.includeVectors) {
301
- params.include_attributes = true; // include all to get vector
302
- }
303
- return params;
304
- },
305
- decode: (_params) => {
306
- throw new Error("QUERY.decode: not implemented");
307
- },
308
- };
309
- /**
310
- * Codec for converting Turbopuffer Row to SearchHit.
311
- */
312
- export const SEARCH_HIT = {
313
- encode: (_hit) => {
314
- throw new Error("SEARCH_HIT.encode: not implemented");
315
- },
316
- decode: (row, index) => {
317
- const { id, $dist, vector, ...rest } = row;
318
- const hit = {
319
- id: String(id),
320
- index,
321
- score: typeof $dist === "number" ? $dist : 0,
322
- };
323
- // Include vector if present
324
- if (vector !== undefined) {
325
- hit.vector = vector;
326
- }
327
- // Include other fields
328
- if (Object.keys(rest).length > 0) {
329
- hit.fields = rest;
330
- }
331
- return hit;
332
- },
333
- };