@kernl-sdk/pg 0.1.10 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -5
- package/.turbo/turbo-check-types.log +36 -0
- package/CHANGELOG.md +41 -0
- package/README.md +124 -0
- package/dist/__tests__/integration.test.js +81 -1
- package/dist/__tests__/memory-integration.test.d.ts +2 -0
- package/dist/__tests__/memory-integration.test.d.ts.map +1 -0
- package/dist/__tests__/memory-integration.test.js +287 -0
- package/dist/__tests__/memory.test.d.ts +2 -0
- package/dist/__tests__/memory.test.d.ts.map +1 -0
- package/dist/__tests__/memory.test.js +357 -0
- package/dist/index.d.ts +5 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -3
- package/dist/memory/sql.d.ts +30 -0
- package/dist/memory/sql.d.ts.map +1 -0
- package/dist/memory/sql.js +100 -0
- package/dist/memory/store.d.ts +41 -0
- package/dist/memory/store.d.ts.map +1 -0
- package/dist/memory/store.js +114 -0
- package/dist/migrations.d.ts +1 -1
- package/dist/migrations.d.ts.map +1 -1
- package/dist/migrations.js +9 -3
- package/dist/pgvector/__tests__/handle.test.d.ts +2 -0
- package/dist/pgvector/__tests__/handle.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/handle.test.js +277 -0
- package/dist/pgvector/__tests__/hit.test.d.ts +2 -0
- package/dist/pgvector/__tests__/hit.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/hit.test.js +134 -0
- package/dist/pgvector/__tests__/integration/document.integration.test.d.ts +7 -0
- package/dist/pgvector/__tests__/integration/document.integration.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/integration/document.integration.test.js +587 -0
- package/dist/pgvector/__tests__/integration/edge.integration.test.d.ts +8 -0
- package/dist/pgvector/__tests__/integration/edge.integration.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/integration/edge.integration.test.js +663 -0
- package/dist/pgvector/__tests__/integration/filters.integration.test.d.ts +8 -0
- package/dist/pgvector/__tests__/integration/filters.integration.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/integration/filters.integration.test.js +609 -0
- package/dist/pgvector/__tests__/integration/lifecycle.integration.test.d.ts +8 -0
- package/dist/pgvector/__tests__/integration/lifecycle.integration.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/integration/lifecycle.integration.test.js +449 -0
- package/dist/pgvector/__tests__/integration/query.integration.test.d.ts +8 -0
- package/dist/pgvector/__tests__/integration/query.integration.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/integration/query.integration.test.js +544 -0
- package/dist/pgvector/__tests__/search.test.d.ts +2 -0
- package/dist/pgvector/__tests__/search.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/search.test.js +279 -0
- package/dist/pgvector/handle.d.ts +60 -0
- package/dist/pgvector/handle.d.ts.map +1 -0
- package/dist/pgvector/handle.js +213 -0
- package/dist/pgvector/hit.d.ts +10 -0
- package/dist/pgvector/hit.d.ts.map +1 -0
- package/dist/pgvector/hit.js +44 -0
- package/dist/pgvector/index.d.ts +7 -0
- package/dist/pgvector/index.d.ts.map +1 -0
- package/dist/pgvector/index.js +5 -0
- package/dist/pgvector/search.d.ts +60 -0
- package/dist/pgvector/search.d.ts.map +1 -0
- package/dist/pgvector/search.js +227 -0
- package/dist/pgvector/sql/__tests__/limit.test.d.ts +2 -0
- package/dist/pgvector/sql/__tests__/limit.test.d.ts.map +1 -0
- package/dist/pgvector/sql/__tests__/limit.test.js +161 -0
- package/dist/pgvector/sql/__tests__/order.test.d.ts +2 -0
- package/dist/pgvector/sql/__tests__/order.test.d.ts.map +1 -0
- package/dist/pgvector/sql/__tests__/order.test.js +218 -0
- package/dist/pgvector/sql/__tests__/query.test.d.ts +2 -0
- package/dist/pgvector/sql/__tests__/query.test.d.ts.map +1 -0
- package/dist/pgvector/sql/__tests__/query.test.js +392 -0
- package/dist/pgvector/sql/__tests__/select.test.d.ts +2 -0
- package/dist/pgvector/sql/__tests__/select.test.d.ts.map +1 -0
- package/dist/pgvector/sql/__tests__/select.test.js +293 -0
- package/dist/pgvector/sql/__tests__/where.test.d.ts +2 -0
- package/dist/pgvector/sql/__tests__/where.test.d.ts.map +1 -0
- package/dist/pgvector/sql/__tests__/where.test.js +488 -0
- package/dist/pgvector/sql/index.d.ts +7 -0
- package/dist/pgvector/sql/index.d.ts.map +1 -0
- package/dist/pgvector/sql/index.js +6 -0
- package/dist/pgvector/sql/limit.d.ts +8 -0
- package/dist/pgvector/sql/limit.d.ts.map +1 -0
- package/dist/pgvector/sql/limit.js +20 -0
- package/dist/pgvector/sql/order.d.ts +9 -0
- package/dist/pgvector/sql/order.d.ts.map +1 -0
- package/dist/pgvector/sql/order.js +47 -0
- package/dist/pgvector/sql/query.d.ts +46 -0
- package/dist/pgvector/sql/query.d.ts.map +1 -0
- package/dist/pgvector/sql/query.js +54 -0
- package/dist/pgvector/sql/schema.d.ts +16 -0
- package/dist/pgvector/sql/schema.d.ts.map +1 -0
- package/dist/pgvector/sql/schema.js +47 -0
- package/dist/pgvector/sql/select.d.ts +11 -0
- package/dist/pgvector/sql/select.d.ts.map +1 -0
- package/dist/pgvector/sql/select.js +87 -0
- package/dist/pgvector/sql/where.d.ts +8 -0
- package/dist/pgvector/sql/where.d.ts.map +1 -0
- package/dist/pgvector/sql/where.js +137 -0
- package/dist/pgvector/types.d.ts +20 -0
- package/dist/pgvector/types.d.ts.map +1 -0
- package/dist/pgvector/types.js +1 -0
- package/dist/pgvector/utils.d.ts +18 -0
- package/dist/pgvector/utils.d.ts.map +1 -0
- package/dist/pgvector/utils.js +22 -0
- package/dist/postgres.d.ts +19 -26
- package/dist/postgres.d.ts.map +1 -1
- package/dist/postgres.js +15 -27
- package/dist/storage.d.ts +62 -0
- package/dist/storage.d.ts.map +1 -1
- package/dist/storage.js +55 -10
- package/dist/thread/sql.d.ts +38 -0
- package/dist/thread/sql.d.ts.map +1 -0
- package/dist/thread/sql.js +112 -0
- package/dist/thread/store.d.ts +7 -3
- package/dist/thread/store.d.ts.map +1 -1
- package/dist/thread/store.js +46 -105
- package/package.json +8 -5
- package/src/__tests__/integration.test.ts +114 -15
- package/src/__tests__/memory-integration.test.ts +355 -0
- package/src/__tests__/memory.test.ts +428 -0
- package/src/index.ts +19 -3
- package/src/memory/sql.ts +141 -0
- package/src/memory/store.ts +166 -0
- package/src/migrations.ts +13 -3
- package/src/pgvector/README.md +50 -0
- package/src/pgvector/__tests__/handle.test.ts +335 -0
- package/src/pgvector/__tests__/hit.test.ts +165 -0
- package/src/pgvector/__tests__/integration/document.integration.test.ts +717 -0
- package/src/pgvector/__tests__/integration/edge.integration.test.ts +835 -0
- package/src/pgvector/__tests__/integration/filters.integration.test.ts +721 -0
- package/src/pgvector/__tests__/integration/lifecycle.integration.test.ts +570 -0
- package/src/pgvector/__tests__/integration/query.integration.test.ts +667 -0
- package/src/pgvector/__tests__/search.test.ts +366 -0
- package/src/pgvector/handle.ts +285 -0
- package/src/pgvector/hit.ts +56 -0
- package/src/pgvector/index.ts +7 -0
- package/src/pgvector/search.ts +330 -0
- package/src/pgvector/sql/__tests__/limit.test.ts +180 -0
- package/src/pgvector/sql/__tests__/order.test.ts +248 -0
- package/src/pgvector/sql/__tests__/query.test.ts +548 -0
- package/src/pgvector/sql/__tests__/select.test.ts +367 -0
- package/src/pgvector/sql/__tests__/where.test.ts +554 -0
- package/src/pgvector/sql/index.ts +14 -0
- package/src/pgvector/sql/limit.ts +29 -0
- package/src/pgvector/sql/order.ts +55 -0
- package/src/pgvector/sql/query.ts +112 -0
- package/src/pgvector/sql/schema.ts +61 -0
- package/src/pgvector/sql/select.ts +100 -0
- package/src/pgvector/sql/where.ts +152 -0
- package/src/pgvector/types.ts +21 -0
- package/src/pgvector/utils.ts +24 -0
- package/src/postgres.ts +31 -33
- package/src/storage.ts +102 -11
- package/src/thread/sql.ts +159 -0
- package/src/thread/store.ts +58 -127
- package/tsconfig.tsbuildinfo +1 -0
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
|
2
|
+
import { Pool } from "pg";
|
|
3
|
+
import { PGSearchIndex } from "../search";
|
|
4
|
+
|
|
5
|
+
const TEST_DB_URL = process.env.KERNL_PG_TEST_URL;
|
|
6
|
+
const SCHEMA = "kernl_search_idx_test";
|
|
7
|
+
|
|
8
|
+
describe.sequential("PGSearchIndex", () => {
|
|
9
|
+
if (!TEST_DB_URL) {
|
|
10
|
+
it.skip("requires KERNL_PG_TEST_URL environment variable", () => {});
|
|
11
|
+
return;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
let pool: Pool;
|
|
15
|
+
let pgvec: PGSearchIndex;
|
|
16
|
+
|
|
17
|
+
beforeAll(async () => {
|
|
18
|
+
pool = new Pool({ connectionString: TEST_DB_URL });
|
|
19
|
+
|
|
20
|
+
// Ensure pgvector extension exists
|
|
21
|
+
await pool.query(`CREATE EXTENSION IF NOT EXISTS vector`);
|
|
22
|
+
|
|
23
|
+
// Clean slate
|
|
24
|
+
await pool.query(`DROP SCHEMA IF EXISTS "${SCHEMA}" CASCADE`);
|
|
25
|
+
await pool.query(`CREATE SCHEMA "${SCHEMA}"`);
|
|
26
|
+
|
|
27
|
+
pgvec = new PGSearchIndex({ pool });
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
afterAll(async () => {
|
|
31
|
+
await pool.query(`DROP SCHEMA IF EXISTS "${SCHEMA}" CASCADE`);
|
|
32
|
+
await pool.end();
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
describe("createIndex", () => {
|
|
36
|
+
it("creates a table with correct column types", async () => {
|
|
37
|
+
await pgvec.createIndex({
|
|
38
|
+
id: "articles",
|
|
39
|
+
schema: {
|
|
40
|
+
id: { type: "string", pk: true },
|
|
41
|
+
title: { type: "string" },
|
|
42
|
+
views: { type: "int" },
|
|
43
|
+
rating: { type: "float" },
|
|
44
|
+
published: { type: "boolean" },
|
|
45
|
+
created_at: { type: "date" },
|
|
46
|
+
},
|
|
47
|
+
providerOptions: { schema: SCHEMA },
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// Verify table exists with correct columns
|
|
51
|
+
const result = await pool.query<{
|
|
52
|
+
column_name: string;
|
|
53
|
+
data_type: string;
|
|
54
|
+
}>(
|
|
55
|
+
`SELECT column_name, data_type
|
|
56
|
+
FROM information_schema.columns
|
|
57
|
+
WHERE table_schema = $1 AND table_name = $2
|
|
58
|
+
ORDER BY ordinal_position`,
|
|
59
|
+
[SCHEMA, "articles"],
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
const columns = Object.fromEntries(
|
|
63
|
+
result.rows.map((r) => [r.column_name, r.data_type]),
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
expect(columns.id).toBe("text");
|
|
67
|
+
expect(columns.title).toBe("text");
|
|
68
|
+
expect(columns.views).toBe("integer");
|
|
69
|
+
expect(columns.rating).toBe("double precision");
|
|
70
|
+
expect(columns.published).toBe("boolean");
|
|
71
|
+
expect(columns.created_at).toBe("timestamp with time zone");
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it("creates vector columns with correct dimensions", async () => {
|
|
75
|
+
await pgvec.createIndex({
|
|
76
|
+
id: "embeddings",
|
|
77
|
+
schema: {
|
|
78
|
+
id: { type: "string", pk: true },
|
|
79
|
+
embedding: { type: "vector", dimensions: 384 },
|
|
80
|
+
},
|
|
81
|
+
providerOptions: { schema: SCHEMA },
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// Check the vector column type via pg_attribute
|
|
85
|
+
const result = await pool.query<{ typname: string; atttypmod: number }>(
|
|
86
|
+
`SELECT t.typname, a.atttypmod
|
|
87
|
+
FROM pg_attribute a
|
|
88
|
+
JOIN pg_class c ON a.attrelid = c.oid
|
|
89
|
+
JOIN pg_namespace n ON c.relnamespace = n.oid
|
|
90
|
+
JOIN pg_type t ON a.atttypid = t.oid
|
|
91
|
+
WHERE n.nspname = $1
|
|
92
|
+
AND c.relname = $2
|
|
93
|
+
AND a.attname = $3`,
|
|
94
|
+
[SCHEMA, "embeddings", "embedding"],
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
expect(result.rows[0]?.typname).toBe("vector");
|
|
98
|
+
// atttypmod encodes dimensions for vector type
|
|
99
|
+
expect(result.rows[0]?.atttypmod).toBe(384);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("creates HNSW index for vector fields", async () => {
|
|
103
|
+
await pgvec.createIndex({
|
|
104
|
+
id: "searchable",
|
|
105
|
+
schema: {
|
|
106
|
+
id: { type: "string", pk: true },
|
|
107
|
+
embedding: { type: "vector", dimensions: 128, similarity: "cosine" },
|
|
108
|
+
},
|
|
109
|
+
providerOptions: { schema: SCHEMA },
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const result = await pool.query<{ indexname: string; indexdef: string }>(
|
|
113
|
+
`SELECT indexname, indexdef
|
|
114
|
+
FROM pg_indexes
|
|
115
|
+
WHERE schemaname = $1 AND tablename = $2`,
|
|
116
|
+
[SCHEMA, "searchable"],
|
|
117
|
+
);
|
|
118
|
+
|
|
119
|
+
const hnswIndex = result.rows.find(
|
|
120
|
+
(r) => r.indexname === "searchable_embedding_idx",
|
|
121
|
+
);
|
|
122
|
+
|
|
123
|
+
expect(hnswIndex).toBeDefined();
|
|
124
|
+
expect(hnswIndex?.indexdef).toContain("hnsw");
|
|
125
|
+
expect(hnswIndex?.indexdef).toContain("vector_cosine_ops");
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it("uses correct operator class for each similarity metric", async () => {
|
|
129
|
+
// Cosine
|
|
130
|
+
await pgvec.createIndex({
|
|
131
|
+
id: "cosine_idx",
|
|
132
|
+
schema: {
|
|
133
|
+
id: { type: "string", pk: true },
|
|
134
|
+
vec: { type: "vector", dimensions: 8, similarity: "cosine" },
|
|
135
|
+
},
|
|
136
|
+
providerOptions: { schema: SCHEMA },
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
// Euclidean
|
|
140
|
+
await pgvec.createIndex({
|
|
141
|
+
id: "euclidean_idx",
|
|
142
|
+
schema: {
|
|
143
|
+
id: { type: "string", pk: true },
|
|
144
|
+
vec: { type: "vector", dimensions: 8, similarity: "euclidean" },
|
|
145
|
+
},
|
|
146
|
+
providerOptions: { schema: SCHEMA },
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
// Dot product
|
|
150
|
+
await pgvec.createIndex({
|
|
151
|
+
id: "dot_idx",
|
|
152
|
+
schema: {
|
|
153
|
+
id: { type: "string", pk: true },
|
|
154
|
+
vec: { type: "vector", dimensions: 8, similarity: "dot_product" },
|
|
155
|
+
},
|
|
156
|
+
providerOptions: { schema: SCHEMA },
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
const result = await pool.query<{ indexname: string; indexdef: string }>(
|
|
160
|
+
`SELECT indexname, indexdef
|
|
161
|
+
FROM pg_indexes
|
|
162
|
+
WHERE schemaname = $1
|
|
163
|
+
ORDER BY indexname`,
|
|
164
|
+
[SCHEMA],
|
|
165
|
+
);
|
|
166
|
+
|
|
167
|
+
const indexes = Object.fromEntries(
|
|
168
|
+
result.rows.map((r) => [r.indexname, r.indexdef]),
|
|
169
|
+
);
|
|
170
|
+
|
|
171
|
+
expect(indexes["cosine_idx_vec_idx"]).toContain("vector_cosine_ops");
|
|
172
|
+
expect(indexes["euclidean_idx_vec_idx"]).toContain("vector_l2_ops");
|
|
173
|
+
expect(indexes["dot_idx_vec_idx"]).toContain("vector_ip_ops");
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it("throws if schema has no pk field", async () => {
|
|
177
|
+
await expect(
|
|
178
|
+
pgvec.createIndex({
|
|
179
|
+
id: "no_pk",
|
|
180
|
+
schema: {
|
|
181
|
+
title: { type: "string" },
|
|
182
|
+
content: { type: "string" },
|
|
183
|
+
},
|
|
184
|
+
providerOptions: { schema: SCHEMA },
|
|
185
|
+
}),
|
|
186
|
+
).rejects.toThrow("schema must have a field with pk: true");
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
it("auto-binds the created index for immediate use", async () => {
|
|
190
|
+
await pgvec.createIndex({
|
|
191
|
+
id: "auto_bound",
|
|
192
|
+
schema: {
|
|
193
|
+
id: { type: "string", pk: true },
|
|
194
|
+
name: { type: "string" },
|
|
195
|
+
embedding: { type: "vector", dimensions: 3, similarity: "cosine" },
|
|
196
|
+
},
|
|
197
|
+
providerOptions: { schema: SCHEMA },
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
// Should be able to use the index immediately without bindIndex
|
|
201
|
+
const handle = pgvec.index("auto_bound");
|
|
202
|
+
expect(handle.id).toBe("auto_bound");
|
|
203
|
+
|
|
204
|
+
// Insert and query should work
|
|
205
|
+
await handle.upsert({
|
|
206
|
+
id: "test-1",
|
|
207
|
+
name: "Test Doc",
|
|
208
|
+
embedding: [0.1, 0.2, 0.3],
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
const results = await handle.query({
|
|
212
|
+
query: [{ embedding: [0.1, 0.2, 0.3] }],
|
|
213
|
+
topK: 1,
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
expect(results).toHaveLength(1);
|
|
217
|
+
expect(results[0].id).toBe("test-1");
|
|
218
|
+
});
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
describe("deleteIndex", () => {
|
|
222
|
+
it("drops the table and removes binding", async () => {
|
|
223
|
+
await pgvec.createIndex({
|
|
224
|
+
id: "to_delete",
|
|
225
|
+
schema: {
|
|
226
|
+
id: { type: "string", pk: true },
|
|
227
|
+
name: { type: "string" },
|
|
228
|
+
},
|
|
229
|
+
providerOptions: { schema: SCHEMA },
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
// Verify table exists
|
|
233
|
+
const before = await pool.query(
|
|
234
|
+
`SELECT 1 FROM information_schema.tables
|
|
235
|
+
WHERE table_schema = $1 AND table_name = $2`,
|
|
236
|
+
[SCHEMA, "to_delete"],
|
|
237
|
+
);
|
|
238
|
+
expect(before.rows).toHaveLength(1);
|
|
239
|
+
|
|
240
|
+
await pgvec.deleteIndex("to_delete");
|
|
241
|
+
|
|
242
|
+
// Verify table is gone
|
|
243
|
+
const after = await pool.query(
|
|
244
|
+
`SELECT 1 FROM information_schema.tables
|
|
245
|
+
WHERE table_schema = $1 AND table_name = $2`,
|
|
246
|
+
[SCHEMA, "to_delete"],
|
|
247
|
+
);
|
|
248
|
+
expect(after.rows).toHaveLength(0);
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
it("throws if index is not bound", async () => {
|
|
252
|
+
await expect(pgvec.deleteIndex("nonexistent")).rejects.toThrow(
|
|
253
|
+
'Index "nonexistent" not bound',
|
|
254
|
+
);
|
|
255
|
+
});
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
describe("listIndexes", () => {
|
|
259
|
+
it("returns empty page when no indexes match prefix", async () => {
|
|
260
|
+
const page = await pgvec.listIndexes({ prefix: "nonexistent_prefix_" });
|
|
261
|
+
|
|
262
|
+
expect(page.data).toEqual([]);
|
|
263
|
+
expect(page.last).toBe(true);
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
it("lists created indexes", async () => {
|
|
267
|
+
await pgvec.createIndex({
|
|
268
|
+
id: "list_test_a",
|
|
269
|
+
schema: { id: { type: "string", pk: true } },
|
|
270
|
+
providerOptions: { schema: SCHEMA },
|
|
271
|
+
});
|
|
272
|
+
await pgvec.createIndex({
|
|
273
|
+
id: "list_test_b",
|
|
274
|
+
schema: { id: { type: "string", pk: true } },
|
|
275
|
+
providerOptions: { schema: SCHEMA },
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
const page = await pgvec.listIndexes();
|
|
279
|
+
|
|
280
|
+
const ids = page.data.map((s) => s.id);
|
|
281
|
+
expect(ids).toContain("list_test_a");
|
|
282
|
+
expect(ids).toContain("list_test_b");
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
it("filters by prefix", async () => {
|
|
286
|
+
await pgvec.createIndex({
|
|
287
|
+
id: "prefix_foo_1",
|
|
288
|
+
schema: { id: { type: "string", pk: true } },
|
|
289
|
+
providerOptions: { schema: SCHEMA },
|
|
290
|
+
});
|
|
291
|
+
await pgvec.createIndex({
|
|
292
|
+
id: "prefix_bar_1",
|
|
293
|
+
schema: { id: { type: "string", pk: true } },
|
|
294
|
+
providerOptions: { schema: SCHEMA },
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
const page = await pgvec.listIndexes({ prefix: "prefix_foo" });
|
|
298
|
+
|
|
299
|
+
expect(page.data).toHaveLength(1);
|
|
300
|
+
expect(page.data[0].id).toBe("prefix_foo_1");
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
it("respects limit and provides cursor for pagination", async () => {
|
|
304
|
+
await pgvec.createIndex({
|
|
305
|
+
id: "page_1",
|
|
306
|
+
schema: { id: { type: "string", pk: true } },
|
|
307
|
+
providerOptions: { schema: SCHEMA },
|
|
308
|
+
});
|
|
309
|
+
await pgvec.createIndex({
|
|
310
|
+
id: "page_2",
|
|
311
|
+
schema: { id: { type: "string", pk: true } },
|
|
312
|
+
providerOptions: { schema: SCHEMA },
|
|
313
|
+
});
|
|
314
|
+
await pgvec.createIndex({
|
|
315
|
+
id: "page_3",
|
|
316
|
+
schema: { id: { type: "string", pk: true } },
|
|
317
|
+
providerOptions: { schema: SCHEMA },
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
const page1 = await pgvec.listIndexes({ prefix: "page_", limit: 2 });
|
|
321
|
+
|
|
322
|
+
expect(page1.data).toHaveLength(2);
|
|
323
|
+
expect(page1.last).toBe(false);
|
|
324
|
+
|
|
325
|
+
const page2 = await page1.next();
|
|
326
|
+
|
|
327
|
+
expect(page2).not.toBeNull();
|
|
328
|
+
expect(page2!.data).toHaveLength(1);
|
|
329
|
+
expect(page2!.last).toBe(true);
|
|
330
|
+
});
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
describe("describeIndex", () => {
|
|
334
|
+
it("returns stats for an index", async () => {
|
|
335
|
+
await pgvec.createIndex({
|
|
336
|
+
id: "describe_test",
|
|
337
|
+
schema: {
|
|
338
|
+
id: { type: "string", pk: true },
|
|
339
|
+
content: { type: "string" },
|
|
340
|
+
embedding: { type: "vector", dimensions: 128, similarity: "cosine" },
|
|
341
|
+
},
|
|
342
|
+
providerOptions: { schema: SCHEMA },
|
|
343
|
+
});
|
|
344
|
+
|
|
345
|
+
const handle = pgvec.index("describe_test");
|
|
346
|
+
const vec = new Array(128).fill(0.1);
|
|
347
|
+
await handle.upsert({ id: "doc-1", content: "hello", embedding: vec });
|
|
348
|
+
await handle.upsert({ id: "doc-2", content: "world", embedding: vec });
|
|
349
|
+
|
|
350
|
+
const stats = await pgvec.describeIndex("describe_test");
|
|
351
|
+
|
|
352
|
+
expect(stats.id).toBe("describe_test");
|
|
353
|
+
expect(stats.count).toBe(2);
|
|
354
|
+
expect(stats.sizeb).toBeGreaterThan(0);
|
|
355
|
+
expect(stats.dimensions).toBe(128);
|
|
356
|
+
expect(stats.similarity).toBe("cosine");
|
|
357
|
+
expect(stats.status).toBe("ready");
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
it("throws if index is not bound", async () => {
|
|
361
|
+
await expect(pgvec.describeIndex("nonexistent")).rejects.toThrow(
|
|
362
|
+
'Index "nonexistent" not bound',
|
|
363
|
+
);
|
|
364
|
+
});
|
|
365
|
+
});
|
|
366
|
+
});
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
import type { Pool } from "pg";
|
|
2
|
+
|
|
3
|
+
import type {
|
|
4
|
+
IndexHandle,
|
|
5
|
+
DocumentPatch,
|
|
6
|
+
QueryInput,
|
|
7
|
+
SearchHit,
|
|
8
|
+
FieldSchema,
|
|
9
|
+
UnknownDocument,
|
|
10
|
+
UpsertResult,
|
|
11
|
+
PatchResult,
|
|
12
|
+
DeleteResult,
|
|
13
|
+
} from "@kernl-sdk/retrieval";
|
|
14
|
+
import { normalizeQuery } from "@kernl-sdk/retrieval";
|
|
15
|
+
|
|
16
|
+
import { SEARCH_HIT } from "./hit";
|
|
17
|
+
import { sqlize, SQL_SELECT, SQL_WHERE, SQL_ORDER, SQL_LIMIT } from "./sql";
|
|
18
|
+
import type { PGIndexConfig } from "./types";
|
|
19
|
+
import { parseIndexId, isVector } from "./utils";
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* pgvector-backed IndexHandle.
|
|
23
|
+
*/
|
|
24
|
+
export class PGIndexHandle<TDocument = UnknownDocument>
|
|
25
|
+
implements IndexHandle<TDocument>
|
|
26
|
+
{
|
|
27
|
+
readonly id: string;
|
|
28
|
+
|
|
29
|
+
private pool: Pool;
|
|
30
|
+
private ensureInit: () => Promise<void>;
|
|
31
|
+
private config?: PGIndexConfig;
|
|
32
|
+
|
|
33
|
+
constructor(
|
|
34
|
+
pool: Pool,
|
|
35
|
+
ensureInit: () => Promise<void>,
|
|
36
|
+
id: string,
|
|
37
|
+
config?: PGIndexConfig,
|
|
38
|
+
) {
|
|
39
|
+
this.id = id;
|
|
40
|
+
this.pool = pool;
|
|
41
|
+
this.ensureInit = ensureInit;
|
|
42
|
+
this.config = config;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Query the index using vector search, full-text search, or filters.
|
|
47
|
+
*/
|
|
48
|
+
async query(input: QueryInput): Promise<SearchHit<TDocument>[]> {
|
|
49
|
+
await this.ensureInit();
|
|
50
|
+
|
|
51
|
+
const q = normalizeQuery(input);
|
|
52
|
+
const { schema, table, pkey } = this.table;
|
|
53
|
+
|
|
54
|
+
const sqlized = sqlize(q, { pkey, schema, table, binding: this.config });
|
|
55
|
+
|
|
56
|
+
const select = SQL_SELECT.encode(sqlized.select);
|
|
57
|
+
const where = SQL_WHERE.encode({
|
|
58
|
+
...sqlized.where,
|
|
59
|
+
startIdx: select.params.length + 1,
|
|
60
|
+
});
|
|
61
|
+
const order = SQL_ORDER.encode(sqlized.order);
|
|
62
|
+
const limit = SQL_LIMIT.encode({
|
|
63
|
+
...sqlized.limit,
|
|
64
|
+
startIdx: select.params.length + where.params.length + 1,
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
const sql = `
|
|
68
|
+
SELECT ${select.sql}
|
|
69
|
+
FROM "${schema}"."${table}"
|
|
70
|
+
${where.sql ? `WHERE ${where.sql}` : ""}
|
|
71
|
+
ORDER BY ${order.sql}
|
|
72
|
+
${limit.sql}
|
|
73
|
+
`;
|
|
74
|
+
|
|
75
|
+
const params = [...select.params, ...where.params, ...limit.params];
|
|
76
|
+
const result = await this.pool.query(sql, params);
|
|
77
|
+
|
|
78
|
+
return result.rows.map((row) =>
|
|
79
|
+
SEARCH_HIT.decode<TDocument>(row, this.id, this.config),
|
|
80
|
+
);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/* ---- document ops ---- */
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Upsert one or more documents.
|
|
87
|
+
*
|
|
88
|
+
* Documents are flat objects. The pkey field (default "id") is used for
|
|
89
|
+
* conflict resolution. All other fields are written to matching columns.
|
|
90
|
+
*/
|
|
91
|
+
async upsert(docs: TDocument | TDocument[]): Promise<UpsertResult> {
|
|
92
|
+
const arr = Array.isArray(docs) ? docs : [docs];
|
|
93
|
+
if (arr.length === 0) return { count: 0, inserted: 0, updated: 0 };
|
|
94
|
+
|
|
95
|
+
await this.ensureInit();
|
|
96
|
+
const { schema, table, pkey, fields } = this.table;
|
|
97
|
+
|
|
98
|
+
// Find which logical field maps to the pkey column
|
|
99
|
+
let pkeyField = pkey; // default: same name
|
|
100
|
+
for (const [fieldName, fieldCfg] of Object.entries(fields)) {
|
|
101
|
+
if (fieldCfg.column === pkey) {
|
|
102
|
+
pkeyField = fieldName;
|
|
103
|
+
break;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// collect field names from first doc (excluding the pkey field)
|
|
108
|
+
const first = arr[0] as UnknownDocument;
|
|
109
|
+
const fieldNames = Object.keys(first).filter((k) => k !== pkeyField);
|
|
110
|
+
|
|
111
|
+
// map field name → column name (from binding or same name)
|
|
112
|
+
const colFor = (name: string) => fields[name]?.column ?? name;
|
|
113
|
+
const cols = [pkey, ...fieldNames.map(colFor)];
|
|
114
|
+
|
|
115
|
+
const params: unknown[] = [];
|
|
116
|
+
const rows: string[] = [];
|
|
117
|
+
|
|
118
|
+
for (const doc of arr) {
|
|
119
|
+
const d = doc as UnknownDocument;
|
|
120
|
+
const placeholders: string[] = [];
|
|
121
|
+
|
|
122
|
+
// pkey value - use the logical field name, not column name
|
|
123
|
+
const pkval = d[pkeyField];
|
|
124
|
+
if (typeof pkval !== "string") {
|
|
125
|
+
throw new Error(`Document missing string field "${pkeyField}"`);
|
|
126
|
+
}
|
|
127
|
+
params.push(pkval);
|
|
128
|
+
placeholders.push(`$${params.length}`);
|
|
129
|
+
|
|
130
|
+
// ...other fields
|
|
131
|
+
for (const field of fieldNames) {
|
|
132
|
+
const val = d[field] ?? null;
|
|
133
|
+
const binding = fields[field];
|
|
134
|
+
|
|
135
|
+
// detect vector: explicit binding or runtime check
|
|
136
|
+
const isVec = binding?.type === "vector" || isVector(val);
|
|
137
|
+
|
|
138
|
+
params.push(isVec ? JSON.stringify(val) : val);
|
|
139
|
+
placeholders.push(
|
|
140
|
+
isVec ? `$${params.length}::vector` : `$${params.length}`,
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
rows.push(`(${placeholders.join(", ")})`);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// build SET clause for conflict (exclude pkey)
|
|
148
|
+
const sets = cols.slice(1).map((c) => `"${c}" = EXCLUDED."${c}"`);
|
|
149
|
+
|
|
150
|
+
// xmax = 0 means inserted, xmax != 0 means updated
|
|
151
|
+
const sql = `
|
|
152
|
+
INSERT INTO "${schema}"."${table}" (${cols.map((c) => `"${c}"`).join(", ")})
|
|
153
|
+
VALUES ${rows.join(", ")}
|
|
154
|
+
ON CONFLICT ("${pkey}") DO UPDATE SET ${sets.join(", ")}
|
|
155
|
+
RETURNING (xmax = 0) as inserted
|
|
156
|
+
`;
|
|
157
|
+
|
|
158
|
+
const result = await this.pool.query(sql, params);
|
|
159
|
+
const inserted = result.rows.filter((r) => r.inserted).length;
|
|
160
|
+
|
|
161
|
+
return {
|
|
162
|
+
count: result.rowCount ?? 0,
|
|
163
|
+
inserted,
|
|
164
|
+
updated: (result.rowCount ?? 0) - inserted,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Patch one or more documents.
|
|
170
|
+
*
|
|
171
|
+
* Only specified fields are updated. Set a field to `null` to unset it.
|
|
172
|
+
*/
|
|
173
|
+
async patch(
|
|
174
|
+
patches: DocumentPatch<TDocument> | DocumentPatch<TDocument>[],
|
|
175
|
+
): Promise<PatchResult> {
|
|
176
|
+
const arr = Array.isArray(patches) ? patches : [patches];
|
|
177
|
+
if (arr.length === 0) return { count: 0 };
|
|
178
|
+
|
|
179
|
+
await this.ensureInit();
|
|
180
|
+
const { schema, table, pkey, fields } = this.table;
|
|
181
|
+
|
|
182
|
+
let totalCount = 0;
|
|
183
|
+
|
|
184
|
+
// process each patch individually (different fields may be updated)
|
|
185
|
+
for (const patch of arr) {
|
|
186
|
+
const pkval = patch.id;
|
|
187
|
+
if (typeof pkval !== "string") {
|
|
188
|
+
throw new Error(`Patch missing string field "id"`);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// collect fields to update (excluding pkey)
|
|
192
|
+
const updates: string[] = [];
|
|
193
|
+
const params: unknown[] = [];
|
|
194
|
+
|
|
195
|
+
for (const [key, val] of Object.entries(patch)) {
|
|
196
|
+
if (key === "id" || key === pkey) continue;
|
|
197
|
+
if (val === undefined) continue;
|
|
198
|
+
|
|
199
|
+
const col = fields[key]?.column ?? key;
|
|
200
|
+
const binding = fields[key];
|
|
201
|
+
const isVec = binding?.type === "vector" || isVector(val);
|
|
202
|
+
|
|
203
|
+
params.push(isVec && val !== null ? JSON.stringify(val) : val);
|
|
204
|
+
updates.push(
|
|
205
|
+
`"${col}" = $${params.length}${isVec && val !== null ? "::vector" : ""}`,
|
|
206
|
+
);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
if (updates.length === 0) continue;
|
|
210
|
+
|
|
211
|
+
params.push(pkval);
|
|
212
|
+
const sql = `
|
|
213
|
+
UPDATE "${schema}"."${table}"
|
|
214
|
+
SET ${updates.join(", ")}
|
|
215
|
+
WHERE "${pkey}" = $${params.length}
|
|
216
|
+
`;
|
|
217
|
+
|
|
218
|
+
const result = await this.pool.query(sql, params);
|
|
219
|
+
totalCount += result.rowCount ?? 0;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return { count: totalCount };
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Delete one or more documents by ID.
|
|
227
|
+
*/
|
|
228
|
+
async delete(ids: string | string[]): Promise<DeleteResult> {
|
|
229
|
+
const arr = Array.isArray(ids) ? ids : [ids];
|
|
230
|
+
if (arr.length === 0) return { count: 0 };
|
|
231
|
+
|
|
232
|
+
await this.ensureInit();
|
|
233
|
+
const { schema, table, pkey } = this.table;
|
|
234
|
+
|
|
235
|
+
const placeholders = arr.map((_, i) => `$${i + 1}`);
|
|
236
|
+
const sql = `
|
|
237
|
+
DELETE FROM "${schema}"."${table}"
|
|
238
|
+
WHERE "${pkey}" IN (${placeholders.join(", ")})
|
|
239
|
+
`;
|
|
240
|
+
|
|
241
|
+
const result = await this.pool.query(sql, arr);
|
|
242
|
+
return { count: result.rowCount ?? 0 };
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
/* ---- schema ops ---- */
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Add a field to the index schema.
|
|
249
|
+
*
|
|
250
|
+
* Not yet implemented - left as a stub until we have a concrete use case
|
|
251
|
+
* to inform the design (e.g. vector-only vs general columns, index creation).
|
|
252
|
+
*/
|
|
253
|
+
async addField(_field: string, _schema: FieldSchema): Promise<void> {
|
|
254
|
+
throw new Error("addField not yet implemented");
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/* ---- internal utils ---- */
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Resolve table config: use explicit binding or derive from conventions.
|
|
261
|
+
*
|
|
262
|
+
* Resolution order:
|
|
263
|
+
* - If a PGIndexConfig was provided or bound via PGSearchIndex.createIndex()
|
|
264
|
+
* / bindIndex(), that config is used (schema, table, pkey, fields).
|
|
265
|
+
* - Otherwise we fall back to convention-based parsing of the index id via
|
|
266
|
+
* parseIndexId(id):
|
|
267
|
+
* - "docs" → schema: "public", table: "docs"
|
|
268
|
+
* - "analytics.events" → schema: "analytics", table: "events"
|
|
269
|
+
*
|
|
270
|
+
* This allows callers to:
|
|
271
|
+
* - Point at existing tables without an explicit bindIndex call by using
|
|
272
|
+
* either "table" or "schema.table" ids, and
|
|
273
|
+
* - Rely on explicit configs (from createIndex/bindIndex) when using the
|
|
274
|
+
* higher-level SearchIndex API.
|
|
275
|
+
*/
|
|
276
|
+
private get table() {
|
|
277
|
+
if (this.config) {
|
|
278
|
+
return this.config;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// convention-based defaults
|
|
282
|
+
const { schema, table } = parseIndexId(this.id);
|
|
283
|
+
return { schema, table, pkey: "id", fields: {} };
|
|
284
|
+
}
|
|
285
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { FieldValue, SearchHit, UnknownDocument } from "@kernl-sdk/retrieval";
|
|
2
|
+
|
|
3
|
+
import type { PGIndexConfig } from "./types";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Codec for converting DB row ←→ SearchHit.
|
|
7
|
+
*/
|
|
8
|
+
export const SEARCH_HIT = {
|
|
9
|
+
encode: (_hit: SearchHit): Record<string, unknown> => {
|
|
10
|
+
throw new Error("SEARCH_HIT.encode: not implemented");
|
|
11
|
+
},
|
|
12
|
+
|
|
13
|
+
decode: <TDocument = UnknownDocument>(
|
|
14
|
+
row: Record<string, unknown>,
|
|
15
|
+
index: string,
|
|
16
|
+
config?: PGIndexConfig,
|
|
17
|
+
): SearchHit<TDocument> => {
|
|
18
|
+
const { id, score, ...rest } = row;
|
|
19
|
+
const doc: Record<string, FieldValue> = {};
|
|
20
|
+
|
|
21
|
+
// Helper to parse pgvector strings like "[0.1,0.2,0.3]" back to arrays
|
|
22
|
+
const parseValue = (val: unknown, isVector: boolean): FieldValue => {
|
|
23
|
+
if (isVector && typeof val === "string" && val.startsWith("[")) {
|
|
24
|
+
return JSON.parse(val);
|
|
25
|
+
}
|
|
26
|
+
return val as FieldValue;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
if (config) {
|
|
30
|
+
// map columns back to logical field names
|
|
31
|
+
for (const [field, cfg] of Object.entries(config.fields)) {
|
|
32
|
+
const col = cfg.column;
|
|
33
|
+
if (col in rest) {
|
|
34
|
+
const isVector = cfg.type === "vector";
|
|
35
|
+
doc[field] = parseValue(rest[col], isVector);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
} else {
|
|
39
|
+
// no config - parse all values, detecting vectors by format
|
|
40
|
+
for (const [k, v] of Object.entries(rest)) {
|
|
41
|
+
const isVector = typeof v === "string" && v.startsWith("[") && v.endsWith("]");
|
|
42
|
+
doc[k] = parseValue(v, isVector);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Always include id in document for consistency
|
|
47
|
+
doc.id = String(id);
|
|
48
|
+
|
|
49
|
+
return {
|
|
50
|
+
id: String(id),
|
|
51
|
+
index,
|
|
52
|
+
score: typeof score === "number" ? score : 0,
|
|
53
|
+
document: doc as unknown as Partial<TDocument>,
|
|
54
|
+
};
|
|
55
|
+
},
|
|
56
|
+
};
|