@kernl-sdk/turbopuffer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -0
- package/.turbo/turbo-check-types.log +60 -0
- package/CHANGELOG.md +33 -0
- package/LICENSE +201 -0
- package/README.md +60 -0
- package/dist/__tests__/convert.test.d.ts +2 -0
- package/dist/__tests__/convert.test.d.ts.map +1 -0
- package/dist/__tests__/convert.test.js +346 -0
- package/dist/__tests__/filter.test.d.ts +8 -0
- package/dist/__tests__/filter.test.d.ts.map +1 -0
- package/dist/__tests__/filter.test.js +649 -0
- package/dist/__tests__/filters.integration.test.d.ts +8 -0
- package/dist/__tests__/filters.integration.test.d.ts.map +1 -0
- package/dist/__tests__/filters.integration.test.js +502 -0
- package/dist/__tests__/integration/filters.integration.test.d.ts +8 -0
- package/dist/__tests__/integration/filters.integration.test.d.ts.map +1 -0
- package/dist/__tests__/integration/filters.integration.test.js +475 -0
- package/dist/__tests__/integration/integration.test.d.ts +2 -0
- package/dist/__tests__/integration/integration.test.d.ts.map +1 -0
- package/dist/__tests__/integration/integration.test.js +329 -0
- package/dist/__tests__/integration/lifecycle.integration.test.d.ts +8 -0
- package/dist/__tests__/integration/lifecycle.integration.test.d.ts.map +1 -0
- package/dist/__tests__/integration/lifecycle.integration.test.js +370 -0
- package/dist/__tests__/integration/memory.integration.test.d.ts +2 -0
- package/dist/__tests__/integration/memory.integration.test.d.ts.map +1 -0
- package/dist/__tests__/integration/memory.integration.test.js +287 -0
- package/dist/__tests__/integration/query.integration.test.d.ts +8 -0
- package/dist/__tests__/integration/query.integration.test.d.ts.map +1 -0
- package/dist/__tests__/integration/query.integration.test.js +385 -0
- package/dist/__tests__/integration.test.d.ts +2 -0
- package/dist/__tests__/integration.test.d.ts.map +1 -0
- package/dist/__tests__/integration.test.js +343 -0
- package/dist/__tests__/lifecycle.integration.test.d.ts +8 -0
- package/dist/__tests__/lifecycle.integration.test.d.ts.map +1 -0
- package/dist/__tests__/lifecycle.integration.test.js +385 -0
- package/dist/__tests__/query.integration.test.d.ts +8 -0
- package/dist/__tests__/query.integration.test.d.ts.map +1 -0
- package/dist/__tests__/query.integration.test.js +423 -0
- package/dist/__tests__/query.test.d.ts +8 -0
- package/dist/__tests__/query.test.d.ts.map +1 -0
- package/dist/__tests__/query.test.js +472 -0
- package/dist/convert/document.d.ts +20 -0
- package/dist/convert/document.d.ts.map +1 -0
- package/dist/convert/document.js +72 -0
- package/dist/convert/filter.d.ts +15 -0
- package/dist/convert/filter.d.ts.map +1 -0
- package/dist/convert/filter.js +109 -0
- package/dist/convert/index.d.ts +8 -0
- package/dist/convert/index.d.ts.map +1 -0
- package/dist/convert/index.js +7 -0
- package/dist/convert/query.d.ts +22 -0
- package/dist/convert/query.d.ts.map +1 -0
- package/dist/convert/query.js +111 -0
- package/dist/convert/schema.d.ts +39 -0
- package/dist/convert/schema.d.ts.map +1 -0
- package/dist/convert/schema.js +124 -0
- package/dist/convert.d.ts +68 -0
- package/dist/convert.d.ts.map +1 -0
- package/dist/convert.js +333 -0
- package/dist/handle.d.ts +34 -0
- package/dist/handle.d.ts.map +1 -0
- package/dist/handle.js +72 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +26 -0
- package/dist/search.d.ts +85 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +167 -0
- package/dist/types.d.ts +14 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +1 -0
- package/package.json +57 -0
- package/src/__tests__/convert.test.ts +425 -0
- package/src/__tests__/filter.test.ts +730 -0
- package/src/__tests__/integration/filters.integration.test.ts +558 -0
- package/src/__tests__/integration/integration.test.ts +399 -0
- package/src/__tests__/integration/lifecycle.integration.test.ts +464 -0
- package/src/__tests__/integration/memory.integration.test.ts +353 -0
- package/src/__tests__/integration/query.integration.test.ts +471 -0
- package/src/__tests__/query.test.ts +636 -0
- package/src/convert/document.ts +95 -0
- package/src/convert/filter.ts +123 -0
- package/src/convert/index.ts +8 -0
- package/src/convert/query.ts +151 -0
- package/src/convert/schema.ts +163 -0
- package/src/handle.ts +104 -0
- package/src/index.ts +31 -0
- package/src/search.ts +207 -0
- package/src/types.ts +14 -0
- package/tsconfig.json +13 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Filter conversion codecs.
|
|
3
|
+
*
|
|
4
|
+
* Converts MongoDB-style filters to Turbopuffer filter format.
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Codec for converting Filter to Turbopuffer Filter.
|
|
8
|
+
*/
|
|
9
|
+
export const FILTER = {
|
|
10
|
+
encode: (filter) => {
|
|
11
|
+
const conditions = [];
|
|
12
|
+
for (const [key, value] of Object.entries(filter)) {
|
|
13
|
+
if (value === undefined)
|
|
14
|
+
continue;
|
|
15
|
+
// Logical operators
|
|
16
|
+
if (key === "$and" && Array.isArray(value)) {
|
|
17
|
+
const sub = value.map(FILTER.encode);
|
|
18
|
+
if (sub.length === 1) {
|
|
19
|
+
conditions.push(sub[0]);
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
conditions.push(["And", sub]);
|
|
23
|
+
}
|
|
24
|
+
continue;
|
|
25
|
+
}
|
|
26
|
+
if (key === "$or" && Array.isArray(value)) {
|
|
27
|
+
const sub = value.map(FILTER.encode);
|
|
28
|
+
conditions.push(["Or", sub]);
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
if (key === "$not") {
|
|
32
|
+
conditions.push(["Not", FILTER.encode(value)]);
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
// Field-level filter
|
|
36
|
+
if (isFieldOps(value)) {
|
|
37
|
+
conditions.push(...encodeFieldOps(key, value));
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
// Simple equality: { field: value }
|
|
41
|
+
conditions.push([key, "Eq", value]);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
if (conditions.length === 0) {
|
|
45
|
+
throw new Error("Empty filter");
|
|
46
|
+
}
|
|
47
|
+
if (conditions.length === 1) {
|
|
48
|
+
return conditions[0];
|
|
49
|
+
}
|
|
50
|
+
return ["And", conditions];
|
|
51
|
+
},
|
|
52
|
+
decode: (_filter) => {
|
|
53
|
+
throw new Error("FILTER.decode: not implemented");
|
|
54
|
+
},
|
|
55
|
+
};
|
|
56
|
+
/**
|
|
57
|
+
* Check if a value is a FieldOps object (has operator keys like $eq, $gt, etc.)
|
|
58
|
+
*/
|
|
59
|
+
function isFieldOps(value) {
|
|
60
|
+
if (typeof value !== "object" || value === null || Array.isArray(value)) {
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
const keys = Object.keys(value);
|
|
64
|
+
return keys.some((k) => k.startsWith("$"));
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Encode field-level operators to Turbopuffer filters.
|
|
68
|
+
*/
|
|
69
|
+
function encodeFieldOps(field, ops) {
|
|
70
|
+
const conditions = [];
|
|
71
|
+
if (ops.$eq !== undefined) {
|
|
72
|
+
conditions.push([field, "Eq", ops.$eq]);
|
|
73
|
+
}
|
|
74
|
+
if (ops.$neq !== undefined) {
|
|
75
|
+
conditions.push([field, "NotEq", ops.$neq]);
|
|
76
|
+
}
|
|
77
|
+
if (ops.$gt !== undefined) {
|
|
78
|
+
conditions.push([field, "Gt", ops.$gt]);
|
|
79
|
+
}
|
|
80
|
+
if (ops.$gte !== undefined) {
|
|
81
|
+
conditions.push([field, "Gte", ops.$gte]);
|
|
82
|
+
}
|
|
83
|
+
if (ops.$lt !== undefined) {
|
|
84
|
+
conditions.push([field, "Lt", ops.$lt]);
|
|
85
|
+
}
|
|
86
|
+
if (ops.$lte !== undefined) {
|
|
87
|
+
conditions.push([field, "Lte", ops.$lte]);
|
|
88
|
+
}
|
|
89
|
+
if (ops.$in !== undefined) {
|
|
90
|
+
conditions.push([field, "In", ops.$in]);
|
|
91
|
+
}
|
|
92
|
+
if (ops.$nin !== undefined) {
|
|
93
|
+
conditions.push([field, "NotIn", ops.$nin]);
|
|
94
|
+
}
|
|
95
|
+
if (ops.$contains !== undefined) {
|
|
96
|
+
conditions.push([field, "Contains", ops.$contains]);
|
|
97
|
+
}
|
|
98
|
+
if (ops.$startsWith !== undefined) {
|
|
99
|
+
conditions.push([field, "Glob", `${ops.$startsWith}*`]);
|
|
100
|
+
}
|
|
101
|
+
if (ops.$endsWith !== undefined) {
|
|
102
|
+
conditions.push([field, "Glob", `*${ops.$endsWith}`]);
|
|
103
|
+
}
|
|
104
|
+
if (ops.$exists !== undefined) {
|
|
105
|
+
// exists: true → NotEq null, exists: false → Eq null
|
|
106
|
+
conditions.push([field, ops.$exists ? "NotEq" : "Eq", null]);
|
|
107
|
+
}
|
|
108
|
+
return conditions;
|
|
109
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type conversion codecs for Turbopuffer.
|
|
3
|
+
*/
|
|
4
|
+
export { SCALAR_TYPE, SIMILARITY, FIELD_SCHEMA, INDEX_SCHEMA } from "./schema.js";
|
|
5
|
+
export { DOCUMENT, PATCH } from "./document.js";
|
|
6
|
+
export { FILTER } from "./filter.js";
|
|
7
|
+
export { QUERY, SEARCH_HIT } from "./query.js";
|
|
8
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/convert/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAC/E,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AAClC,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type conversion codecs for Turbopuffer.
|
|
3
|
+
*/
|
|
4
|
+
export { SCALAR_TYPE, SIMILARITY, FIELD_SCHEMA, INDEX_SCHEMA } from "./schema.js";
|
|
5
|
+
export { DOCUMENT, PATCH } from "./document.js";
|
|
6
|
+
export { FILTER } from "./filter.js";
|
|
7
|
+
export { QUERY, SEARCH_HIT } from "./query.js";
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Query conversion codecs.
|
|
3
|
+
*
|
|
4
|
+
* Converts the new RankingSignal-based query format to Turbopuffer params.
|
|
5
|
+
*/
|
|
6
|
+
import type { SearchQuery, SearchHit, UnknownDocument } from "@kernl-sdk/retrieval";
|
|
7
|
+
import type { Row, NamespaceQueryParams } from "@turbopuffer/turbopuffer/resources/namespaces";
|
|
8
|
+
/**
|
|
9
|
+
* Codec for converting SearchQuery to Turbopuffer NamespaceQueryParams.
|
|
10
|
+
*/
|
|
11
|
+
export declare const QUERY: {
|
|
12
|
+
encode: (query: SearchQuery) => NamespaceQueryParams;
|
|
13
|
+
decode: (_params: NamespaceQueryParams) => SearchQuery;
|
|
14
|
+
};
|
|
15
|
+
/**
|
|
16
|
+
* Codec for converting Turbopuffer Row to SearchHit.
|
|
17
|
+
*/
|
|
18
|
+
export declare const SEARCH_HIT: {
|
|
19
|
+
encode: <TDocument = UnknownDocument>(_hit: SearchHit<TDocument>) => Row;
|
|
20
|
+
decode: <TDocument = UnknownDocument>(row: Row, index: string) => SearchHit<TDocument>;
|
|
21
|
+
};
|
|
22
|
+
//# sourceMappingURL=query.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"query.d.ts","sourceRoot":"","sources":["../../src/convert/query.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EACV,WAAW,EACX,SAAS,EAET,eAAe,EAChB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,KAAK,EACV,GAAG,EACH,oBAAoB,EACrB,MAAM,+CAA+C,CAAC;AAKvD;;GAEG;AACH,eAAO,MAAM,KAAK;oBACA,WAAW,KAAG,oBAAoB;sBA+BhC,oBAAoB,KAAG,WAAW;CAGrD,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,UAAU;aACZ,SAAS,0BAA0B,SAAS,CAAC,SAAS,CAAC,KAAG,GAAG;aAI7D,SAAS,yBACX,GAAG,SACD,MAAM,KACZ,SAAS,CAAC,SAAS,CAAC;CAgBxB,CAAC"}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Query conversion codecs.
|
|
3
|
+
*
|
|
4
|
+
* Converts the new RankingSignal-based query format to Turbopuffer params.
|
|
5
|
+
*/
|
|
6
|
+
import { FILTER } from "./filter.js";
|
|
7
|
+
/**
|
|
8
|
+
* Codec for converting SearchQuery to Turbopuffer NamespaceQueryParams.
|
|
9
|
+
*/
|
|
10
|
+
export const QUERY = {
|
|
11
|
+
encode: (query) => {
|
|
12
|
+
const params = {};
|
|
13
|
+
// Build rank_by from query signals
|
|
14
|
+
const signals = query.query ?? query.max;
|
|
15
|
+
if (signals && signals.length > 0) {
|
|
16
|
+
params.rank_by = buildRankBy(signals, query.max !== undefined);
|
|
17
|
+
}
|
|
18
|
+
// top K
|
|
19
|
+
if (query.topK !== undefined) {
|
|
20
|
+
params.top_k = query.topK;
|
|
21
|
+
}
|
|
22
|
+
// filters
|
|
23
|
+
if (query.filter) {
|
|
24
|
+
params.filters = FILTER.encode(query.filter);
|
|
25
|
+
}
|
|
26
|
+
// include attributes
|
|
27
|
+
if (query.include !== undefined) {
|
|
28
|
+
if (typeof query.include === "boolean") {
|
|
29
|
+
params.include_attributes = query.include;
|
|
30
|
+
}
|
|
31
|
+
else {
|
|
32
|
+
params.include_attributes = [...query.include];
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return params;
|
|
36
|
+
},
|
|
37
|
+
decode: (_params) => {
|
|
38
|
+
throw new Error("QUERY.decode: not implemented");
|
|
39
|
+
},
|
|
40
|
+
};
|
|
41
|
+
/**
|
|
42
|
+
* Codec for converting Turbopuffer Row to SearchHit.
|
|
43
|
+
*/
|
|
44
|
+
export const SEARCH_HIT = {
|
|
45
|
+
encode: (_hit) => {
|
|
46
|
+
throw new Error("SEARCH_HIT.encode: not implemented");
|
|
47
|
+
},
|
|
48
|
+
decode: (row, index) => {
|
|
49
|
+
const { id, $dist, ...rest } = row;
|
|
50
|
+
const dist = typeof $dist === "number" ? $dist : 0;
|
|
51
|
+
const hit = {
|
|
52
|
+
id: String(id),
|
|
53
|
+
index,
|
|
54
|
+
score: dist === 0 ? 0 : -dist, // convert distance to similarity (negate so higher = better)
|
|
55
|
+
};
|
|
56
|
+
// include document fields with id
|
|
57
|
+
hit.document = { id, ...rest };
|
|
58
|
+
return hit;
|
|
59
|
+
},
|
|
60
|
+
};
|
|
61
|
+
/**
|
|
62
|
+
* Build rank_by from ranking signals.
|
|
63
|
+
*
|
|
64
|
+
* Turbopuffer constraints:
|
|
65
|
+
* - Sum/Max fusion only works with BM25 (text) signals
|
|
66
|
+
* - Vector search must be a single ANN query
|
|
67
|
+
* - Hybrid (text + vector) fusion is not supported in a single query
|
|
68
|
+
*/
|
|
69
|
+
function buildRankBy(signals, useMax) {
|
|
70
|
+
const textRankBys = [];
|
|
71
|
+
const vectorRankBys = [];
|
|
72
|
+
for (const signal of signals) {
|
|
73
|
+
const { weight, ...fields } = signal;
|
|
74
|
+
for (const [field, value] of Object.entries(fields)) {
|
|
75
|
+
if (value === undefined)
|
|
76
|
+
continue;
|
|
77
|
+
if (Array.isArray(value)) {
|
|
78
|
+
vectorRankBys.push(["vector", "ANN", value]);
|
|
79
|
+
}
|
|
80
|
+
else if (typeof value === "string") {
|
|
81
|
+
textRankBys.push([field, "BM25", value]);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
const hasVector = vectorRankBys.length > 0;
|
|
86
|
+
const hasText = textRankBys.length > 0;
|
|
87
|
+
if (!hasVector && !hasText) {
|
|
88
|
+
throw new Error("No ranking signals provided");
|
|
89
|
+
}
|
|
90
|
+
// hybrid fusion not supported
|
|
91
|
+
if (hasVector && hasText) {
|
|
92
|
+
throw new Error("Turbopuffer does not support hybrid (vector + text) fusion in a single query. " +
|
|
93
|
+
"Use separate queries and merge results client-side.");
|
|
94
|
+
}
|
|
95
|
+
// multi-vector fusion not supported
|
|
96
|
+
if (vectorRankBys.length > 1) {
|
|
97
|
+
throw new Error("Turbopuffer does not support multi-vector fusion. " +
|
|
98
|
+
"Use separate queries and merge results client-side.");
|
|
99
|
+
}
|
|
100
|
+
// single vector query
|
|
101
|
+
if (hasVector) {
|
|
102
|
+
return vectorRankBys[0];
|
|
103
|
+
}
|
|
104
|
+
// single text query
|
|
105
|
+
if (textRankBys.length === 1) {
|
|
106
|
+
return textRankBys[0];
|
|
107
|
+
}
|
|
108
|
+
// multiple text signals: use Sum or Max fusion
|
|
109
|
+
const fusion = useMax ? "Max" : "Sum";
|
|
110
|
+
return [fusion, textRankBys];
|
|
111
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Schema conversion codecs.
|
|
3
|
+
*/
|
|
4
|
+
import type { Codec } from "@kernl-sdk/shared/lib";
|
|
5
|
+
import type { FieldSchema, VectorFieldSchema, ScalarFieldSchema } from "@kernl-sdk/retrieval";
|
|
6
|
+
import type { AttributeSchema, DistanceMetric } from "@turbopuffer/turbopuffer/resources/namespaces";
|
|
7
|
+
type Similarity = VectorFieldSchema["similarity"];
|
|
8
|
+
type ScalarType = ScalarFieldSchema["type"];
|
|
9
|
+
type TpufType = string;
|
|
10
|
+
/**
|
|
11
|
+
* Codec for converting kernl scalar types to Turbopuffer attribute types.
|
|
12
|
+
*/
|
|
13
|
+
export declare const SCALAR_TYPE: Codec<ScalarType, TpufType>;
|
|
14
|
+
/**
|
|
15
|
+
* Codec for converting similarity metric to Turbopuffer distance metric.
|
|
16
|
+
*
|
|
17
|
+
* Turbopuffer supports: cosine_distance, euclidean_squared
|
|
18
|
+
* We support: cosine, euclidean, dot_product
|
|
19
|
+
*/
|
|
20
|
+
export declare const SIMILARITY: Codec<Similarity, DistanceMetric>;
|
|
21
|
+
/**
|
|
22
|
+
* Codec-like converter for FieldSchema to Turbopuffer AttributeSchema.
|
|
23
|
+
*
|
|
24
|
+
* Takes the field name as context since Turbopuffer requires `ann: true`
|
|
25
|
+
* only on the special `vector` attribute.
|
|
26
|
+
*/
|
|
27
|
+
export declare const FIELD_SCHEMA: {
|
|
28
|
+
encode: (field: FieldSchema, name: string) => AttributeSchema;
|
|
29
|
+
decode: () => never;
|
|
30
|
+
};
|
|
31
|
+
/**
|
|
32
|
+
* Codec for converting a full schema record.
|
|
33
|
+
*
|
|
34
|
+
* Validates that vector fields are named `vector` since Turbopuffer only
|
|
35
|
+
* supports ANN indexing on that specific attribute name.
|
|
36
|
+
*/
|
|
37
|
+
export declare const INDEX_SCHEMA: Codec<Record<string, FieldSchema>, Record<string, AttributeSchema>>;
|
|
38
|
+
export {};
|
|
39
|
+
//# sourceMappingURL=schema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../src/convert/schema.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,KAAK,EACV,WAAW,EACX,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,KAAK,EACV,eAAe,EAEf,cAAc,EACf,MAAM,+CAA+C,CAAC;AAEvD,KAAK,UAAU,GAAG,iBAAiB,CAAC,YAAY,CAAC,CAAC;AAClD,KAAK,UAAU,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC;AAC5C,KAAK,QAAQ,GAAG,MAAM,CAAC;AA+BvB;;GAEG;AACH,eAAO,MAAM,WAAW,EAAE,KAAK,CAAC,UAAU,EAAE,QAAQ,CAGnD,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,UAAU,EAAE,KAAK,CAAC,UAAU,EAAE,cAAc,CAqBxD,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,YAAY;oBACP,WAAW,QAAQ,MAAM,KAAG,eAAe;;CAiC5D,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,YAAY,EAAE,KAAK,CAC9B,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,EAC3B,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CA0BhC,CAAC"}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Schema conversion codecs.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Mapping from kernl scalar types to Turbopuffer attribute types.
|
|
6
|
+
*/
|
|
7
|
+
const SCALAR_TO_TPUF = {
|
|
8
|
+
string: "string",
|
|
9
|
+
int: "int",
|
|
10
|
+
bigint: "uint", // Unix epoch timestamps in ms
|
|
11
|
+
float: "int", // tpuf doesn't have float
|
|
12
|
+
boolean: "bool",
|
|
13
|
+
date: "datetime",
|
|
14
|
+
"string[]": "[]string",
|
|
15
|
+
"int[]": "[]int",
|
|
16
|
+
"date[]": "[]datetime",
|
|
17
|
+
};
|
|
18
|
+
/**
|
|
19
|
+
* Mapping from Turbopuffer attribute types to kernl scalar types.
|
|
20
|
+
*/
|
|
21
|
+
const TPUF_TO_SCALAR = {
|
|
22
|
+
string: "string",
|
|
23
|
+
int: "int",
|
|
24
|
+
uint: "bigint",
|
|
25
|
+
bool: "boolean",
|
|
26
|
+
datetime: "date",
|
|
27
|
+
"[]string": "string[]",
|
|
28
|
+
"[]int": "int[]",
|
|
29
|
+
"[]datetime": "date[]",
|
|
30
|
+
};
|
|
31
|
+
/**
|
|
32
|
+
* Codec for converting kernl scalar types to Turbopuffer attribute types.
|
|
33
|
+
*/
|
|
34
|
+
export const SCALAR_TYPE = {
|
|
35
|
+
encode: (type) => SCALAR_TO_TPUF[type] ?? "string",
|
|
36
|
+
decode: (type) => TPUF_TO_SCALAR[type] ?? "string",
|
|
37
|
+
};
|
|
38
|
+
/**
|
|
39
|
+
* Codec for converting similarity metric to Turbopuffer distance metric.
|
|
40
|
+
*
|
|
41
|
+
* Turbopuffer supports: cosine_distance, euclidean_squared
|
|
42
|
+
* We support: cosine, euclidean, dot_product
|
|
43
|
+
*/
|
|
44
|
+
export const SIMILARITY = {
|
|
45
|
+
encode: (similarity) => {
|
|
46
|
+
switch (similarity) {
|
|
47
|
+
case "euclidean":
|
|
48
|
+
return "euclidean_squared";
|
|
49
|
+
case "cosine":
|
|
50
|
+
case "dot_product":
|
|
51
|
+
default:
|
|
52
|
+
return "cosine_distance";
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
decode: (metric) => {
|
|
56
|
+
switch (metric) {
|
|
57
|
+
case "euclidean_squared":
|
|
58
|
+
return "euclidean";
|
|
59
|
+
case "cosine_distance":
|
|
60
|
+
default:
|
|
61
|
+
return "cosine";
|
|
62
|
+
}
|
|
63
|
+
},
|
|
64
|
+
};
|
|
65
|
+
/**
|
|
66
|
+
* Codec-like converter for FieldSchema to Turbopuffer AttributeSchema.
|
|
67
|
+
*
|
|
68
|
+
* Takes the field name as context since Turbopuffer requires `ann: true`
|
|
69
|
+
* only on the special `vector` attribute.
|
|
70
|
+
*/
|
|
71
|
+
export const FIELD_SCHEMA = {
|
|
72
|
+
encode: (field, name) => {
|
|
73
|
+
// Vector fields
|
|
74
|
+
if (field.type === "vector" || field.type === "sparse-vector") {
|
|
75
|
+
const vf = field;
|
|
76
|
+
const precision = vf.quantization === "f16" ? "f16" : "f32";
|
|
77
|
+
return {
|
|
78
|
+
type: `[${vf.dimensions}]${precision}`,
|
|
79
|
+
ann: name === "vector",
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
// Scalar fields
|
|
83
|
+
const config = {
|
|
84
|
+
type: SCALAR_TYPE.encode(field.type),
|
|
85
|
+
};
|
|
86
|
+
if (field.filterable) {
|
|
87
|
+
config.filterable = true;
|
|
88
|
+
}
|
|
89
|
+
if (field.fts) {
|
|
90
|
+
config.full_text_search =
|
|
91
|
+
typeof field.fts === "object"
|
|
92
|
+
? { language: field.fts.language }
|
|
93
|
+
: true;
|
|
94
|
+
}
|
|
95
|
+
return config;
|
|
96
|
+
},
|
|
97
|
+
decode: () => {
|
|
98
|
+
throw new Error("FIELD_SCHEMA.decode: not implemented");
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
/**
|
|
102
|
+
* Codec for converting a full schema record.
|
|
103
|
+
*
|
|
104
|
+
* Validates that vector fields are named `vector` since Turbopuffer only
|
|
105
|
+
* supports ANN indexing on that specific attribute name.
|
|
106
|
+
*/
|
|
107
|
+
export const INDEX_SCHEMA = {
|
|
108
|
+
encode: (schema) => {
|
|
109
|
+
const result = {};
|
|
110
|
+
for (const [name, field] of Object.entries(schema)) {
|
|
111
|
+
const isVector = field.type === "vector" || field.type === "sparse-vector";
|
|
112
|
+
// Enforce vector field naming
|
|
113
|
+
if (isVector && name !== "vector") {
|
|
114
|
+
throw new Error(`Turbopuffer requires vector fields to be named "vector", got "${name}". ` +
|
|
115
|
+
`Rename your field or use a different search provider.`);
|
|
116
|
+
}
|
|
117
|
+
result[name] = FIELD_SCHEMA.encode(field, name);
|
|
118
|
+
}
|
|
119
|
+
return result;
|
|
120
|
+
},
|
|
121
|
+
decode: () => {
|
|
122
|
+
throw new Error("INDEX_SCHEMA.decode: not implemented");
|
|
123
|
+
},
|
|
124
|
+
};
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Codecs for converting between kernl and Turbopuffer types.
|
|
3
|
+
*/
|
|
4
|
+
import type { Codec } from "@kernl-sdk/shared/lib";
|
|
5
|
+
import type { FieldSchema, VectorFieldSchema, ScalarFieldSchema, SearchDocument, SearchQuery, SearchHit, FilterExpression } from "@kernl-sdk/retrieval";
|
|
6
|
+
import type { AttributeSchema, DistanceMetric, Row, NamespaceQueryParams } from "@turbopuffer/turbopuffer/resources/namespaces";
|
|
7
|
+
import type { Filter } from "@turbopuffer/turbopuffer/resources/custom";
|
|
8
|
+
type Similarity = VectorFieldSchema["similarity"];
|
|
9
|
+
type ScalarType = ScalarFieldSchema["type"];
|
|
10
|
+
type TpufType = string;
|
|
11
|
+
/**
|
|
12
|
+
* Codec for converting kernl scalar types to Turbopuffer attribute types.
|
|
13
|
+
*/
|
|
14
|
+
export declare const SCALAR_TYPE: Codec<ScalarType, TpufType>;
|
|
15
|
+
/**
|
|
16
|
+
* Codec for converting similarity metric to Turbopuffer distance metric.
|
|
17
|
+
*
|
|
18
|
+
* Turbopuffer supports: cosine_distance, euclidean_squared
|
|
19
|
+
* We support: cosine, euclidean, dot_product
|
|
20
|
+
*/
|
|
21
|
+
export declare const SIMILARITY: Codec<Similarity, DistanceMetric>;
|
|
22
|
+
/**
|
|
23
|
+
* Codec-like converter for FieldSchema to Turbopuffer AttributeSchema.
|
|
24
|
+
*
|
|
25
|
+
* Takes the field name as context since Turbopuffer requires `ann: true`
|
|
26
|
+
* only on the special `vector` attribute.
|
|
27
|
+
*/
|
|
28
|
+
export declare const FIELD_SCHEMA: {
|
|
29
|
+
encode: (field: FieldSchema, name: string) => AttributeSchema;
|
|
30
|
+
decode: () => never;
|
|
31
|
+
};
|
|
32
|
+
/**
|
|
33
|
+
* Codec for converting a full schema record.
|
|
34
|
+
*
|
|
35
|
+
* Validates that vector fields are named `vector` since Turbopuffer only
|
|
36
|
+
* supports ANN indexing on that specific attribute name.
|
|
37
|
+
*/
|
|
38
|
+
export declare const INDEX_SCHEMA: Codec<Record<string, FieldSchema>, Record<string, AttributeSchema>>;
|
|
39
|
+
/**
|
|
40
|
+
* Codec for converting SearchDocument to Turbopuffer Row.
|
|
41
|
+
*/
|
|
42
|
+
export declare const DOCUMENT: Codec<SearchDocument, Row>;
|
|
43
|
+
/**
|
|
44
|
+
* Codec for converting FilterExpression to Turbopuffer Filter.
|
|
45
|
+
*/
|
|
46
|
+
export declare const FILTER: {
|
|
47
|
+
encode: (filter: FilterExpression) => Filter;
|
|
48
|
+
decode: (_filter: Filter) => FilterExpression;
|
|
49
|
+
};
|
|
50
|
+
/**
|
|
51
|
+
* Codec for converting SearchQuery to Turbopuffer NamespaceQueryParams.
|
|
52
|
+
*
|
|
53
|
+
* Note: Hybrid search (vector + text) requires multi-query which is
|
|
54
|
+
* handled separately.
|
|
55
|
+
*/
|
|
56
|
+
export declare const QUERY: {
|
|
57
|
+
encode: (query: SearchQuery) => NamespaceQueryParams;
|
|
58
|
+
decode: (_params: NamespaceQueryParams) => SearchQuery;
|
|
59
|
+
};
|
|
60
|
+
/**
|
|
61
|
+
* Codec for converting Turbopuffer Row to SearchHit.
|
|
62
|
+
*/
|
|
63
|
+
export declare const SEARCH_HIT: {
|
|
64
|
+
encode: (_hit: SearchHit) => Row;
|
|
65
|
+
decode: (row: Row, index: string) => SearchHit;
|
|
66
|
+
};
|
|
67
|
+
export {};
|
|
68
|
+
//# sourceMappingURL=convert.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"convert.d.ts","sourceRoot":"","sources":["../src/convert.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,KAAK,EACV,WAAW,EACX,iBAAiB,EACjB,iBAAiB,EAGjB,cAAc,EACd,WAAW,EACX,SAAS,EACT,gBAAgB,EAIjB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,KAAK,EACV,eAAe,EAEf,cAAc,EACd,GAAG,EACH,oBAAoB,EACrB,MAAM,+CAA+C,CAAC;AACvD,OAAO,KAAK,EAAE,MAAM,EAAU,MAAM,2CAA2C,CAAC;AAEhF,KAAK,UAAU,GAAG,iBAAiB,CAAC,YAAY,CAAC,CAAC;AAClD,KAAK,UAAU,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC;AAC5C,KAAK,QAAQ,GAAG,MAAM,CAAC;AAEvB;;GAEG;AACH,eAAO,MAAM,WAAW,EAAE,KAAK,CAAC,UAAU,EAAE,QAAQ,CA4CnD,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,UAAU,EAAE,KAAK,CAAC,UAAU,EAAE,cAAc,CAqBxD,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,YAAY;oBACP,WAAW,QAAQ,MAAM,KAAG,eAAe;;CAiC5D,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,YAAY,EAAE,KAAK,CAC9B,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,EAC3B,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CA0BhC,CAAC;AAyBF;;GAEG;AACH,eAAO,MAAM,QAAQ,EAAE,KAAK,CAAC,cAAc,EAAE,GAAG,CAc/C,CAAC;AAkEF;;GAEG;AACH,eAAO,MAAM,MAAM;qBACA,gBAAgB,KAAG,MAAM;sBAsCxB,MAAM,KAAG,gBAAgB;CAG5C,CAAC;AA4BF;;;;;GAKG;AACH,eAAO,MAAM,KAAK;oBACA,WAAW,KAAG,oBAAoB;sBAsChC,oBAAoB,KAAG,WAAW;CAGrD,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,UAAU;mBACN,SAAS,KAAG,GAAG;kBAIhB,GAAG,SAAS,MAAM,KAAG,SAAS;CAqB7C,CAAC"}
|