@kernl-sdk/pg 0.1.11 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/.turbo/turbo-check-types.log +36 -0
  3. package/CHANGELOG.md +32 -0
  4. package/README.md +124 -0
  5. package/dist/__tests__/integration.test.js +2 -2
  6. package/dist/__tests__/memory-integration.test.d.ts +2 -0
  7. package/dist/__tests__/memory-integration.test.d.ts.map +1 -0
  8. package/dist/__tests__/memory-integration.test.js +287 -0
  9. package/dist/__tests__/memory.test.d.ts +2 -0
  10. package/dist/__tests__/memory.test.d.ts.map +1 -0
  11. package/dist/__tests__/memory.test.js +357 -0
  12. package/dist/index.d.ts +5 -3
  13. package/dist/index.d.ts.map +1 -1
  14. package/dist/index.js +5 -3
  15. package/dist/memory/sql.d.ts +30 -0
  16. package/dist/memory/sql.d.ts.map +1 -0
  17. package/dist/memory/sql.js +100 -0
  18. package/dist/memory/store.d.ts +41 -0
  19. package/dist/memory/store.d.ts.map +1 -0
  20. package/dist/memory/store.js +114 -0
  21. package/dist/migrations.d.ts +1 -1
  22. package/dist/migrations.d.ts.map +1 -1
  23. package/dist/migrations.js +9 -3
  24. package/dist/pgvector/__tests__/handle.test.d.ts +2 -0
  25. package/dist/pgvector/__tests__/handle.test.d.ts.map +1 -0
  26. package/dist/pgvector/__tests__/handle.test.js +277 -0
  27. package/dist/pgvector/__tests__/hit.test.d.ts +2 -0
  28. package/dist/pgvector/__tests__/hit.test.d.ts.map +1 -0
  29. package/dist/pgvector/__tests__/hit.test.js +134 -0
  30. package/dist/pgvector/__tests__/integration/document.integration.test.d.ts +7 -0
  31. package/dist/pgvector/__tests__/integration/document.integration.test.d.ts.map +1 -0
  32. package/dist/pgvector/__tests__/integration/document.integration.test.js +587 -0
  33. package/dist/pgvector/__tests__/integration/edge.integration.test.d.ts +8 -0
  34. package/dist/pgvector/__tests__/integration/edge.integration.test.d.ts.map +1 -0
  35. package/dist/pgvector/__tests__/integration/edge.integration.test.js +663 -0
  36. package/dist/pgvector/__tests__/integration/filters.integration.test.d.ts +8 -0
  37. package/dist/pgvector/__tests__/integration/filters.integration.test.d.ts.map +1 -0
  38. package/dist/pgvector/__tests__/integration/filters.integration.test.js +609 -0
  39. package/dist/pgvector/__tests__/integration/lifecycle.integration.test.d.ts +8 -0
  40. package/dist/pgvector/__tests__/integration/lifecycle.integration.test.d.ts.map +1 -0
  41. package/dist/pgvector/__tests__/integration/lifecycle.integration.test.js +449 -0
  42. package/dist/pgvector/__tests__/integration/query.integration.test.d.ts +8 -0
  43. package/dist/pgvector/__tests__/integration/query.integration.test.d.ts.map +1 -0
  44. package/dist/pgvector/__tests__/integration/query.integration.test.js +544 -0
  45. package/dist/pgvector/__tests__/search.test.d.ts +2 -0
  46. package/dist/pgvector/__tests__/search.test.d.ts.map +1 -0
  47. package/dist/pgvector/__tests__/search.test.js +279 -0
  48. package/dist/pgvector/handle.d.ts +60 -0
  49. package/dist/pgvector/handle.d.ts.map +1 -0
  50. package/dist/pgvector/handle.js +213 -0
  51. package/dist/pgvector/hit.d.ts +10 -0
  52. package/dist/pgvector/hit.d.ts.map +1 -0
  53. package/dist/pgvector/hit.js +44 -0
  54. package/dist/pgvector/index.d.ts +7 -0
  55. package/dist/pgvector/index.d.ts.map +1 -0
  56. package/dist/pgvector/index.js +5 -0
  57. package/dist/pgvector/search.d.ts +60 -0
  58. package/dist/pgvector/search.d.ts.map +1 -0
  59. package/dist/pgvector/search.js +227 -0
  60. package/dist/pgvector/sql/__tests__/limit.test.d.ts +2 -0
  61. package/dist/pgvector/sql/__tests__/limit.test.d.ts.map +1 -0
  62. package/dist/pgvector/sql/__tests__/limit.test.js +161 -0
  63. package/dist/pgvector/sql/__tests__/order.test.d.ts +2 -0
  64. package/dist/pgvector/sql/__tests__/order.test.d.ts.map +1 -0
  65. package/dist/pgvector/sql/__tests__/order.test.js +218 -0
  66. package/dist/pgvector/sql/__tests__/query.test.d.ts +2 -0
  67. package/dist/pgvector/sql/__tests__/query.test.d.ts.map +1 -0
  68. package/dist/pgvector/sql/__tests__/query.test.js +392 -0
  69. package/dist/pgvector/sql/__tests__/select.test.d.ts +2 -0
  70. package/dist/pgvector/sql/__tests__/select.test.d.ts.map +1 -0
  71. package/dist/pgvector/sql/__tests__/select.test.js +293 -0
  72. package/dist/pgvector/sql/__tests__/where.test.d.ts +2 -0
  73. package/dist/pgvector/sql/__tests__/where.test.d.ts.map +1 -0
  74. package/dist/pgvector/sql/__tests__/where.test.js +488 -0
  75. package/dist/pgvector/sql/index.d.ts +7 -0
  76. package/dist/pgvector/sql/index.d.ts.map +1 -0
  77. package/dist/pgvector/sql/index.js +6 -0
  78. package/dist/pgvector/sql/limit.d.ts +8 -0
  79. package/dist/pgvector/sql/limit.d.ts.map +1 -0
  80. package/dist/pgvector/sql/limit.js +20 -0
  81. package/dist/pgvector/sql/order.d.ts +9 -0
  82. package/dist/pgvector/sql/order.d.ts.map +1 -0
  83. package/dist/pgvector/sql/order.js +47 -0
  84. package/dist/pgvector/sql/query.d.ts +46 -0
  85. package/dist/pgvector/sql/query.d.ts.map +1 -0
  86. package/dist/pgvector/sql/query.js +54 -0
  87. package/dist/pgvector/sql/schema.d.ts +16 -0
  88. package/dist/pgvector/sql/schema.d.ts.map +1 -0
  89. package/dist/pgvector/sql/schema.js +47 -0
  90. package/dist/pgvector/sql/select.d.ts +11 -0
  91. package/dist/pgvector/sql/select.d.ts.map +1 -0
  92. package/dist/pgvector/sql/select.js +87 -0
  93. package/dist/pgvector/sql/where.d.ts +8 -0
  94. package/dist/pgvector/sql/where.d.ts.map +1 -0
  95. package/dist/pgvector/sql/where.js +137 -0
  96. package/dist/pgvector/types.d.ts +20 -0
  97. package/dist/pgvector/types.d.ts.map +1 -0
  98. package/dist/pgvector/types.js +1 -0
  99. package/dist/pgvector/utils.d.ts +18 -0
  100. package/dist/pgvector/utils.d.ts.map +1 -0
  101. package/dist/pgvector/utils.js +22 -0
  102. package/dist/postgres.d.ts +19 -26
  103. package/dist/postgres.d.ts.map +1 -1
  104. package/dist/postgres.js +15 -27
  105. package/dist/storage.d.ts +48 -0
  106. package/dist/storage.d.ts.map +1 -1
  107. package/dist/storage.js +32 -9
  108. package/dist/thread/sql.d.ts +38 -0
  109. package/dist/thread/sql.d.ts.map +1 -0
  110. package/dist/thread/sql.js +112 -0
  111. package/dist/thread/store.d.ts +2 -2
  112. package/dist/thread/store.d.ts.map +1 -1
  113. package/dist/thread/store.js +32 -102
  114. package/package.json +7 -4
  115. package/src/__tests__/integration.test.ts +15 -17
  116. package/src/__tests__/memory-integration.test.ts +355 -0
  117. package/src/__tests__/memory.test.ts +428 -0
  118. package/src/index.ts +19 -3
  119. package/src/memory/sql.ts +141 -0
  120. package/src/memory/store.ts +166 -0
  121. package/src/migrations.ts +13 -3
  122. package/src/pgvector/README.md +50 -0
  123. package/src/pgvector/__tests__/handle.test.ts +335 -0
  124. package/src/pgvector/__tests__/hit.test.ts +165 -0
  125. package/src/pgvector/__tests__/integration/document.integration.test.ts +717 -0
  126. package/src/pgvector/__tests__/integration/edge.integration.test.ts +835 -0
  127. package/src/pgvector/__tests__/integration/filters.integration.test.ts +721 -0
  128. package/src/pgvector/__tests__/integration/lifecycle.integration.test.ts +570 -0
  129. package/src/pgvector/__tests__/integration/query.integration.test.ts +667 -0
  130. package/src/pgvector/__tests__/search.test.ts +366 -0
  131. package/src/pgvector/handle.ts +285 -0
  132. package/src/pgvector/hit.ts +56 -0
  133. package/src/pgvector/index.ts +7 -0
  134. package/src/pgvector/search.ts +330 -0
  135. package/src/pgvector/sql/__tests__/limit.test.ts +180 -0
  136. package/src/pgvector/sql/__tests__/order.test.ts +248 -0
  137. package/src/pgvector/sql/__tests__/query.test.ts +548 -0
  138. package/src/pgvector/sql/__tests__/select.test.ts +367 -0
  139. package/src/pgvector/sql/__tests__/where.test.ts +554 -0
  140. package/src/pgvector/sql/index.ts +14 -0
  141. package/src/pgvector/sql/limit.ts +29 -0
  142. package/src/pgvector/sql/order.ts +55 -0
  143. package/src/pgvector/sql/query.ts +112 -0
  144. package/src/pgvector/sql/schema.ts +61 -0
  145. package/src/pgvector/sql/select.ts +100 -0
  146. package/src/pgvector/sql/where.ts +152 -0
  147. package/src/pgvector/types.ts +21 -0
  148. package/src/pgvector/utils.ts +24 -0
  149. package/src/postgres.ts +31 -33
  150. package/src/storage.ts +77 -9
  151. package/src/thread/sql.ts +159 -0
  152. package/src/thread/store.ts +40 -127
  153. package/tsconfig.tsbuildinfo +1 -0
@@ -0,0 +1,366 @@
1
+ import { describe, it, expect, beforeAll, afterAll } from "vitest";
2
+ import { Pool } from "pg";
3
+ import { PGSearchIndex } from "../search";
4
+
5
+ const TEST_DB_URL = process.env.KERNL_PG_TEST_URL;
6
+ const SCHEMA = "kernl_search_idx_test";
7
+
8
+ describe.sequential("PGSearchIndex", () => {
9
+ if (!TEST_DB_URL) {
10
+ it.skip("requires KERNL_PG_TEST_URL environment variable", () => {});
11
+ return;
12
+ }
13
+
14
+ let pool: Pool;
15
+ let pgvec: PGSearchIndex;
16
+
17
+ beforeAll(async () => {
18
+ pool = new Pool({ connectionString: TEST_DB_URL });
19
+
20
+ // Ensure pgvector extension exists
21
+ await pool.query(`CREATE EXTENSION IF NOT EXISTS vector`);
22
+
23
+ // Clean slate
24
+ await pool.query(`DROP SCHEMA IF EXISTS "${SCHEMA}" CASCADE`);
25
+ await pool.query(`CREATE SCHEMA "${SCHEMA}"`);
26
+
27
+ pgvec = new PGSearchIndex({ pool });
28
+ });
29
+
30
+ afterAll(async () => {
31
+ await pool.query(`DROP SCHEMA IF EXISTS "${SCHEMA}" CASCADE`);
32
+ await pool.end();
33
+ });
34
+
35
+ describe("createIndex", () => {
36
+ it("creates a table with correct column types", async () => {
37
+ await pgvec.createIndex({
38
+ id: "articles",
39
+ schema: {
40
+ id: { type: "string", pk: true },
41
+ title: { type: "string" },
42
+ views: { type: "int" },
43
+ rating: { type: "float" },
44
+ published: { type: "boolean" },
45
+ created_at: { type: "date" },
46
+ },
47
+ providerOptions: { schema: SCHEMA },
48
+ });
49
+
50
+ // Verify table exists with correct columns
51
+ const result = await pool.query<{
52
+ column_name: string;
53
+ data_type: string;
54
+ }>(
55
+ `SELECT column_name, data_type
56
+ FROM information_schema.columns
57
+ WHERE table_schema = $1 AND table_name = $2
58
+ ORDER BY ordinal_position`,
59
+ [SCHEMA, "articles"],
60
+ );
61
+
62
+ const columns = Object.fromEntries(
63
+ result.rows.map((r) => [r.column_name, r.data_type]),
64
+ );
65
+
66
+ expect(columns.id).toBe("text");
67
+ expect(columns.title).toBe("text");
68
+ expect(columns.views).toBe("integer");
69
+ expect(columns.rating).toBe("double precision");
70
+ expect(columns.published).toBe("boolean");
71
+ expect(columns.created_at).toBe("timestamp with time zone");
72
+ });
73
+
74
+ it("creates vector columns with correct dimensions", async () => {
75
+ await pgvec.createIndex({
76
+ id: "embeddings",
77
+ schema: {
78
+ id: { type: "string", pk: true },
79
+ embedding: { type: "vector", dimensions: 384 },
80
+ },
81
+ providerOptions: { schema: SCHEMA },
82
+ });
83
+
84
+ // Check the vector column type via pg_attribute
85
+ const result = await pool.query<{ typname: string; atttypmod: number }>(
86
+ `SELECT t.typname, a.atttypmod
87
+ FROM pg_attribute a
88
+ JOIN pg_class c ON a.attrelid = c.oid
89
+ JOIN pg_namespace n ON c.relnamespace = n.oid
90
+ JOIN pg_type t ON a.atttypid = t.oid
91
+ WHERE n.nspname = $1
92
+ AND c.relname = $2
93
+ AND a.attname = $3`,
94
+ [SCHEMA, "embeddings", "embedding"],
95
+ );
96
+
97
+ expect(result.rows[0]?.typname).toBe("vector");
98
+ // atttypmod encodes dimensions for vector type
99
+ expect(result.rows[0]?.atttypmod).toBe(384);
100
+ });
101
+
102
+ it("creates HNSW index for vector fields", async () => {
103
+ await pgvec.createIndex({
104
+ id: "searchable",
105
+ schema: {
106
+ id: { type: "string", pk: true },
107
+ embedding: { type: "vector", dimensions: 128, similarity: "cosine" },
108
+ },
109
+ providerOptions: { schema: SCHEMA },
110
+ });
111
+
112
+ const result = await pool.query<{ indexname: string; indexdef: string }>(
113
+ `SELECT indexname, indexdef
114
+ FROM pg_indexes
115
+ WHERE schemaname = $1 AND tablename = $2`,
116
+ [SCHEMA, "searchable"],
117
+ );
118
+
119
+ const hnswIndex = result.rows.find(
120
+ (r) => r.indexname === "searchable_embedding_idx",
121
+ );
122
+
123
+ expect(hnswIndex).toBeDefined();
124
+ expect(hnswIndex?.indexdef).toContain("hnsw");
125
+ expect(hnswIndex?.indexdef).toContain("vector_cosine_ops");
126
+ });
127
+
128
+ it("uses correct operator class for each similarity metric", async () => {
129
+ // Cosine
130
+ await pgvec.createIndex({
131
+ id: "cosine_idx",
132
+ schema: {
133
+ id: { type: "string", pk: true },
134
+ vec: { type: "vector", dimensions: 8, similarity: "cosine" },
135
+ },
136
+ providerOptions: { schema: SCHEMA },
137
+ });
138
+
139
+ // Euclidean
140
+ await pgvec.createIndex({
141
+ id: "euclidean_idx",
142
+ schema: {
143
+ id: { type: "string", pk: true },
144
+ vec: { type: "vector", dimensions: 8, similarity: "euclidean" },
145
+ },
146
+ providerOptions: { schema: SCHEMA },
147
+ });
148
+
149
+ // Dot product
150
+ await pgvec.createIndex({
151
+ id: "dot_idx",
152
+ schema: {
153
+ id: { type: "string", pk: true },
154
+ vec: { type: "vector", dimensions: 8, similarity: "dot_product" },
155
+ },
156
+ providerOptions: { schema: SCHEMA },
157
+ });
158
+
159
+ const result = await pool.query<{ indexname: string; indexdef: string }>(
160
+ `SELECT indexname, indexdef
161
+ FROM pg_indexes
162
+ WHERE schemaname = $1
163
+ ORDER BY indexname`,
164
+ [SCHEMA],
165
+ );
166
+
167
+ const indexes = Object.fromEntries(
168
+ result.rows.map((r) => [r.indexname, r.indexdef]),
169
+ );
170
+
171
+ expect(indexes["cosine_idx_vec_idx"]).toContain("vector_cosine_ops");
172
+ expect(indexes["euclidean_idx_vec_idx"]).toContain("vector_l2_ops");
173
+ expect(indexes["dot_idx_vec_idx"]).toContain("vector_ip_ops");
174
+ });
175
+
176
+ it("throws if schema has no pk field", async () => {
177
+ await expect(
178
+ pgvec.createIndex({
179
+ id: "no_pk",
180
+ schema: {
181
+ title: { type: "string" },
182
+ content: { type: "string" },
183
+ },
184
+ providerOptions: { schema: SCHEMA },
185
+ }),
186
+ ).rejects.toThrow("schema must have a field with pk: true");
187
+ });
188
+
189
+ it("auto-binds the created index for immediate use", async () => {
190
+ await pgvec.createIndex({
191
+ id: "auto_bound",
192
+ schema: {
193
+ id: { type: "string", pk: true },
194
+ name: { type: "string" },
195
+ embedding: { type: "vector", dimensions: 3, similarity: "cosine" },
196
+ },
197
+ providerOptions: { schema: SCHEMA },
198
+ });
199
+
200
+ // Should be able to use the index immediately without bindIndex
201
+ const handle = pgvec.index("auto_bound");
202
+ expect(handle.id).toBe("auto_bound");
203
+
204
+ // Insert and query should work
205
+ await handle.upsert({
206
+ id: "test-1",
207
+ name: "Test Doc",
208
+ embedding: [0.1, 0.2, 0.3],
209
+ });
210
+
211
+ const results = await handle.query({
212
+ query: [{ embedding: [0.1, 0.2, 0.3] }],
213
+ topK: 1,
214
+ });
215
+
216
+ expect(results).toHaveLength(1);
217
+ expect(results[0].id).toBe("test-1");
218
+ });
219
+ });
220
+
221
+ describe("deleteIndex", () => {
222
+ it("drops the table and removes binding", async () => {
223
+ await pgvec.createIndex({
224
+ id: "to_delete",
225
+ schema: {
226
+ id: { type: "string", pk: true },
227
+ name: { type: "string" },
228
+ },
229
+ providerOptions: { schema: SCHEMA },
230
+ });
231
+
232
+ // Verify table exists
233
+ const before = await pool.query(
234
+ `SELECT 1 FROM information_schema.tables
235
+ WHERE table_schema = $1 AND table_name = $2`,
236
+ [SCHEMA, "to_delete"],
237
+ );
238
+ expect(before.rows).toHaveLength(1);
239
+
240
+ await pgvec.deleteIndex("to_delete");
241
+
242
+ // Verify table is gone
243
+ const after = await pool.query(
244
+ `SELECT 1 FROM information_schema.tables
245
+ WHERE table_schema = $1 AND table_name = $2`,
246
+ [SCHEMA, "to_delete"],
247
+ );
248
+ expect(after.rows).toHaveLength(0);
249
+ });
250
+
251
+ it("throws if index is not bound", async () => {
252
+ await expect(pgvec.deleteIndex("nonexistent")).rejects.toThrow(
253
+ 'Index "nonexistent" not bound',
254
+ );
255
+ });
256
+ });
257
+
258
+ describe("listIndexes", () => {
259
+ it("returns empty page when no indexes match prefix", async () => {
260
+ const page = await pgvec.listIndexes({ prefix: "nonexistent_prefix_" });
261
+
262
+ expect(page.data).toEqual([]);
263
+ expect(page.last).toBe(true);
264
+ });
265
+
266
+ it("lists created indexes", async () => {
267
+ await pgvec.createIndex({
268
+ id: "list_test_a",
269
+ schema: { id: { type: "string", pk: true } },
270
+ providerOptions: { schema: SCHEMA },
271
+ });
272
+ await pgvec.createIndex({
273
+ id: "list_test_b",
274
+ schema: { id: { type: "string", pk: true } },
275
+ providerOptions: { schema: SCHEMA },
276
+ });
277
+
278
+ const page = await pgvec.listIndexes();
279
+
280
+ const ids = page.data.map((s) => s.id);
281
+ expect(ids).toContain("list_test_a");
282
+ expect(ids).toContain("list_test_b");
283
+ });
284
+
285
+ it("filters by prefix", async () => {
286
+ await pgvec.createIndex({
287
+ id: "prefix_foo_1",
288
+ schema: { id: { type: "string", pk: true } },
289
+ providerOptions: { schema: SCHEMA },
290
+ });
291
+ await pgvec.createIndex({
292
+ id: "prefix_bar_1",
293
+ schema: { id: { type: "string", pk: true } },
294
+ providerOptions: { schema: SCHEMA },
295
+ });
296
+
297
+ const page = await pgvec.listIndexes({ prefix: "prefix_foo" });
298
+
299
+ expect(page.data).toHaveLength(1);
300
+ expect(page.data[0].id).toBe("prefix_foo_1");
301
+ });
302
+
303
+ it("respects limit and provides cursor for pagination", async () => {
304
+ await pgvec.createIndex({
305
+ id: "page_1",
306
+ schema: { id: { type: "string", pk: true } },
307
+ providerOptions: { schema: SCHEMA },
308
+ });
309
+ await pgvec.createIndex({
310
+ id: "page_2",
311
+ schema: { id: { type: "string", pk: true } },
312
+ providerOptions: { schema: SCHEMA },
313
+ });
314
+ await pgvec.createIndex({
315
+ id: "page_3",
316
+ schema: { id: { type: "string", pk: true } },
317
+ providerOptions: { schema: SCHEMA },
318
+ });
319
+
320
+ const page1 = await pgvec.listIndexes({ prefix: "page_", limit: 2 });
321
+
322
+ expect(page1.data).toHaveLength(2);
323
+ expect(page1.last).toBe(false);
324
+
325
+ const page2 = await page1.next();
326
+
327
+ expect(page2).not.toBeNull();
328
+ expect(page2!.data).toHaveLength(1);
329
+ expect(page2!.last).toBe(true);
330
+ });
331
+ });
332
+
333
+ describe("describeIndex", () => {
334
+ it("returns stats for an index", async () => {
335
+ await pgvec.createIndex({
336
+ id: "describe_test",
337
+ schema: {
338
+ id: { type: "string", pk: true },
339
+ content: { type: "string" },
340
+ embedding: { type: "vector", dimensions: 128, similarity: "cosine" },
341
+ },
342
+ providerOptions: { schema: SCHEMA },
343
+ });
344
+
345
+ const handle = pgvec.index("describe_test");
346
+ const vec = new Array(128).fill(0.1);
347
+ await handle.upsert({ id: "doc-1", content: "hello", embedding: vec });
348
+ await handle.upsert({ id: "doc-2", content: "world", embedding: vec });
349
+
350
+ const stats = await pgvec.describeIndex("describe_test");
351
+
352
+ expect(stats.id).toBe("describe_test");
353
+ expect(stats.count).toBe(2);
354
+ expect(stats.sizeb).toBeGreaterThan(0);
355
+ expect(stats.dimensions).toBe(128);
356
+ expect(stats.similarity).toBe("cosine");
357
+ expect(stats.status).toBe("ready");
358
+ });
359
+
360
+ it("throws if index is not bound", async () => {
361
+ await expect(pgvec.describeIndex("nonexistent")).rejects.toThrow(
362
+ 'Index "nonexistent" not bound',
363
+ );
364
+ });
365
+ });
366
+ });
@@ -0,0 +1,285 @@
1
+ import type { Pool } from "pg";
2
+
3
+ import type {
4
+ IndexHandle,
5
+ DocumentPatch,
6
+ QueryInput,
7
+ SearchHit,
8
+ FieldSchema,
9
+ UnknownDocument,
10
+ UpsertResult,
11
+ PatchResult,
12
+ DeleteResult,
13
+ } from "@kernl-sdk/retrieval";
14
+ import { normalizeQuery } from "@kernl-sdk/retrieval";
15
+
16
+ import { SEARCH_HIT } from "./hit";
17
+ import { sqlize, SQL_SELECT, SQL_WHERE, SQL_ORDER, SQL_LIMIT } from "./sql";
18
+ import type { PGIndexConfig } from "./types";
19
+ import { parseIndexId, isVector } from "./utils";
20
+
21
+ /**
22
+ * pgvector-backed IndexHandle.
23
+ */
24
+ export class PGIndexHandle<TDocument = UnknownDocument>
25
+ implements IndexHandle<TDocument>
26
+ {
27
+ readonly id: string;
28
+
29
+ private pool: Pool;
30
+ private ensureInit: () => Promise<void>;
31
+ private config?: PGIndexConfig;
32
+
33
+ constructor(
34
+ pool: Pool,
35
+ ensureInit: () => Promise<void>,
36
+ id: string,
37
+ config?: PGIndexConfig,
38
+ ) {
39
+ this.id = id;
40
+ this.pool = pool;
41
+ this.ensureInit = ensureInit;
42
+ this.config = config;
43
+ }
44
+
45
+ /**
46
+ * Query the index using vector search, full-text search, or filters.
47
+ */
48
+ async query(input: QueryInput): Promise<SearchHit<TDocument>[]> {
49
+ await this.ensureInit();
50
+
51
+ const q = normalizeQuery(input);
52
+ const { schema, table, pkey } = this.table;
53
+
54
+ const sqlized = sqlize(q, { pkey, schema, table, binding: this.config });
55
+
56
+ const select = SQL_SELECT.encode(sqlized.select);
57
+ const where = SQL_WHERE.encode({
58
+ ...sqlized.where,
59
+ startIdx: select.params.length + 1,
60
+ });
61
+ const order = SQL_ORDER.encode(sqlized.order);
62
+ const limit = SQL_LIMIT.encode({
63
+ ...sqlized.limit,
64
+ startIdx: select.params.length + where.params.length + 1,
65
+ });
66
+
67
+ const sql = `
68
+ SELECT ${select.sql}
69
+ FROM "${schema}"."${table}"
70
+ ${where.sql ? `WHERE ${where.sql}` : ""}
71
+ ORDER BY ${order.sql}
72
+ ${limit.sql}
73
+ `;
74
+
75
+ const params = [...select.params, ...where.params, ...limit.params];
76
+ const result = await this.pool.query(sql, params);
77
+
78
+ return result.rows.map((row) =>
79
+ SEARCH_HIT.decode<TDocument>(row, this.id, this.config),
80
+ );
81
+ }
82
+
83
+ /* ---- document ops ---- */
84
+
85
+ /**
86
+ * Upsert one or more documents.
87
+ *
88
+ * Documents are flat objects. The pkey field (default "id") is used for
89
+ * conflict resolution. All other fields are written to matching columns.
90
+ */
91
+ async upsert(docs: TDocument | TDocument[]): Promise<UpsertResult> {
92
+ const arr = Array.isArray(docs) ? docs : [docs];
93
+ if (arr.length === 0) return { count: 0, inserted: 0, updated: 0 };
94
+
95
+ await this.ensureInit();
96
+ const { schema, table, pkey, fields } = this.table;
97
+
98
+ // Find which logical field maps to the pkey column
99
+ let pkeyField = pkey; // default: same name
100
+ for (const [fieldName, fieldCfg] of Object.entries(fields)) {
101
+ if (fieldCfg.column === pkey) {
102
+ pkeyField = fieldName;
103
+ break;
104
+ }
105
+ }
106
+
107
+ // collect field names from first doc (excluding the pkey field)
108
+ const first = arr[0] as UnknownDocument;
109
+ const fieldNames = Object.keys(first).filter((k) => k !== pkeyField);
110
+
111
+ // map field name → column name (from binding or same name)
112
+ const colFor = (name: string) => fields[name]?.column ?? name;
113
+ const cols = [pkey, ...fieldNames.map(colFor)];
114
+
115
+ const params: unknown[] = [];
116
+ const rows: string[] = [];
117
+
118
+ for (const doc of arr) {
119
+ const d = doc as UnknownDocument;
120
+ const placeholders: string[] = [];
121
+
122
+ // pkey value - use the logical field name, not column name
123
+ const pkval = d[pkeyField];
124
+ if (typeof pkval !== "string") {
125
+ throw new Error(`Document missing string field "${pkeyField}"`);
126
+ }
127
+ params.push(pkval);
128
+ placeholders.push(`$${params.length}`);
129
+
130
+ // ...other fields
131
+ for (const field of fieldNames) {
132
+ const val = d[field] ?? null;
133
+ const binding = fields[field];
134
+
135
+ // detect vector: explicit binding or runtime check
136
+ const isVec = binding?.type === "vector" || isVector(val);
137
+
138
+ params.push(isVec ? JSON.stringify(val) : val);
139
+ placeholders.push(
140
+ isVec ? `$${params.length}::vector` : `$${params.length}`,
141
+ );
142
+ }
143
+
144
+ rows.push(`(${placeholders.join(", ")})`);
145
+ }
146
+
147
+ // build SET clause for conflict (exclude pkey)
148
+ const sets = cols.slice(1).map((c) => `"${c}" = EXCLUDED."${c}"`);
149
+
150
+ // xmax = 0 means inserted, xmax != 0 means updated
151
+ const sql = `
152
+ INSERT INTO "${schema}"."${table}" (${cols.map((c) => `"${c}"`).join(", ")})
153
+ VALUES ${rows.join(", ")}
154
+ ON CONFLICT ("${pkey}") DO UPDATE SET ${sets.join(", ")}
155
+ RETURNING (xmax = 0) as inserted
156
+ `;
157
+
158
+ const result = await this.pool.query(sql, params);
159
+ const inserted = result.rows.filter((r) => r.inserted).length;
160
+
161
+ return {
162
+ count: result.rowCount ?? 0,
163
+ inserted,
164
+ updated: (result.rowCount ?? 0) - inserted,
165
+ };
166
+ }
167
+
168
+ /**
169
+ * Patch one or more documents.
170
+ *
171
+ * Only specified fields are updated. Set a field to `null` to unset it.
172
+ */
173
+ async patch(
174
+ patches: DocumentPatch<TDocument> | DocumentPatch<TDocument>[],
175
+ ): Promise<PatchResult> {
176
+ const arr = Array.isArray(patches) ? patches : [patches];
177
+ if (arr.length === 0) return { count: 0 };
178
+
179
+ await this.ensureInit();
180
+ const { schema, table, pkey, fields } = this.table;
181
+
182
+ let totalCount = 0;
183
+
184
+ // process each patch individually (different fields may be updated)
185
+ for (const patch of arr) {
186
+ const pkval = patch.id;
187
+ if (typeof pkval !== "string") {
188
+ throw new Error(`Patch missing string field "id"`);
189
+ }
190
+
191
+ // collect fields to update (excluding pkey)
192
+ const updates: string[] = [];
193
+ const params: unknown[] = [];
194
+
195
+ for (const [key, val] of Object.entries(patch)) {
196
+ if (key === "id" || key === pkey) continue;
197
+ if (val === undefined) continue;
198
+
199
+ const col = fields[key]?.column ?? key;
200
+ const binding = fields[key];
201
+ const isVec = binding?.type === "vector" || isVector(val);
202
+
203
+ params.push(isVec && val !== null ? JSON.stringify(val) : val);
204
+ updates.push(
205
+ `"${col}" = $${params.length}${isVec && val !== null ? "::vector" : ""}`,
206
+ );
207
+ }
208
+
209
+ if (updates.length === 0) continue;
210
+
211
+ params.push(pkval);
212
+ const sql = `
213
+ UPDATE "${schema}"."${table}"
214
+ SET ${updates.join(", ")}
215
+ WHERE "${pkey}" = $${params.length}
216
+ `;
217
+
218
+ const result = await this.pool.query(sql, params);
219
+ totalCount += result.rowCount ?? 0;
220
+ }
221
+
222
+ return { count: totalCount };
223
+ }
224
+
225
+ /**
226
+ * Delete one or more documents by ID.
227
+ */
228
+ async delete(ids: string | string[]): Promise<DeleteResult> {
229
+ const arr = Array.isArray(ids) ? ids : [ids];
230
+ if (arr.length === 0) return { count: 0 };
231
+
232
+ await this.ensureInit();
233
+ const { schema, table, pkey } = this.table;
234
+
235
+ const placeholders = arr.map((_, i) => `$${i + 1}`);
236
+ const sql = `
237
+ DELETE FROM "${schema}"."${table}"
238
+ WHERE "${pkey}" IN (${placeholders.join(", ")})
239
+ `;
240
+
241
+ const result = await this.pool.query(sql, arr);
242
+ return { count: result.rowCount ?? 0 };
243
+ }
244
+
245
+ /* ---- schema ops ---- */
246
+
247
+ /**
248
+ * Add a field to the index schema.
249
+ *
250
+ * Not yet implemented - left as a stub until we have a concrete use case
251
+ * to inform the design (e.g. vector-only vs general columns, index creation).
252
+ */
253
+ async addField(_field: string, _schema: FieldSchema): Promise<void> {
254
+ throw new Error("addField not yet implemented");
255
+ }
256
+
257
+ /* ---- internal utils ---- */
258
+
259
+ /**
260
+ * Resolve table config: use explicit binding or derive from conventions.
261
+ *
262
+ * Resolution order:
263
+ * - If a PGIndexConfig was provided or bound via PGSearchIndex.createIndex()
264
+ * / bindIndex(), that config is used (schema, table, pkey, fields).
265
+ * - Otherwise we fall back to convention-based parsing of the index id via
266
+ * parseIndexId(id):
267
+ * - "docs" → schema: "public", table: "docs"
268
+ * - "analytics.events" → schema: "analytics", table: "events"
269
+ *
270
+ * This allows callers to:
271
+ * - Point at existing tables without an explicit bindIndex call by using
272
+ * either "table" or "schema.table" ids, and
273
+ * - Rely on explicit configs (from createIndex/bindIndex) when using the
274
+ * higher-level SearchIndex API.
275
+ */
276
+ private get table() {
277
+ if (this.config) {
278
+ return this.config;
279
+ }
280
+
281
+ // convention-based defaults
282
+ const { schema, table } = parseIndexId(this.id);
283
+ return { schema, table, pkey: "id", fields: {} };
284
+ }
285
+ }
@@ -0,0 +1,56 @@
1
+ import { FieldValue, SearchHit, UnknownDocument } from "@kernl-sdk/retrieval";
2
+
3
+ import type { PGIndexConfig } from "./types";
4
+
5
+ /**
6
+ * Codec for converting DB row ←→ SearchHit.
7
+ */
8
+ export const SEARCH_HIT = {
9
+ encode: (_hit: SearchHit): Record<string, unknown> => {
10
+ throw new Error("SEARCH_HIT.encode: not implemented");
11
+ },
12
+
13
+ decode: <TDocument = UnknownDocument>(
14
+ row: Record<string, unknown>,
15
+ index: string,
16
+ config?: PGIndexConfig,
17
+ ): SearchHit<TDocument> => {
18
+ const { id, score, ...rest } = row;
19
+ const doc: Record<string, FieldValue> = {};
20
+
21
+ // Helper to parse pgvector strings like "[0.1,0.2,0.3]" back to arrays
22
+ const parseValue = (val: unknown, isVector: boolean): FieldValue => {
23
+ if (isVector && typeof val === "string" && val.startsWith("[")) {
24
+ return JSON.parse(val);
25
+ }
26
+ return val as FieldValue;
27
+ };
28
+
29
+ if (config) {
30
+ // map columns back to logical field names
31
+ for (const [field, cfg] of Object.entries(config.fields)) {
32
+ const col = cfg.column;
33
+ if (col in rest) {
34
+ const isVector = cfg.type === "vector";
35
+ doc[field] = parseValue(rest[col], isVector);
36
+ }
37
+ }
38
+ } else {
39
+ // no config - parse all values, detecting vectors by format
40
+ for (const [k, v] of Object.entries(rest)) {
41
+ const isVector = typeof v === "string" && v.startsWith("[") && v.endsWith("]");
42
+ doc[k] = parseValue(v, isVector);
43
+ }
44
+ }
45
+
46
+ // Always include id in document for consistency
47
+ doc.id = String(id);
48
+
49
+ return {
50
+ id: String(id),
51
+ index,
52
+ score: typeof score === "number" ? score : 0,
53
+ document: doc as unknown as Partial<TDocument>,
54
+ };
55
+ },
56
+ };