@kernl-sdk/pg 0.1.11 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/.turbo/turbo-check-types.log +36 -0
- package/CHANGELOG.md +32 -0
- package/README.md +124 -0
- package/dist/__tests__/integration.test.js +2 -2
- package/dist/__tests__/memory-integration.test.d.ts +2 -0
- package/dist/__tests__/memory-integration.test.d.ts.map +1 -0
- package/dist/__tests__/memory-integration.test.js +287 -0
- package/dist/__tests__/memory.test.d.ts +2 -0
- package/dist/__tests__/memory.test.d.ts.map +1 -0
- package/dist/__tests__/memory.test.js +357 -0
- package/dist/index.d.ts +5 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -3
- package/dist/memory/sql.d.ts +30 -0
- package/dist/memory/sql.d.ts.map +1 -0
- package/dist/memory/sql.js +100 -0
- package/dist/memory/store.d.ts +41 -0
- package/dist/memory/store.d.ts.map +1 -0
- package/dist/memory/store.js +114 -0
- package/dist/migrations.d.ts +1 -1
- package/dist/migrations.d.ts.map +1 -1
- package/dist/migrations.js +9 -3
- package/dist/pgvector/__tests__/handle.test.d.ts +2 -0
- package/dist/pgvector/__tests__/handle.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/handle.test.js +277 -0
- package/dist/pgvector/__tests__/hit.test.d.ts +2 -0
- package/dist/pgvector/__tests__/hit.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/hit.test.js +134 -0
- package/dist/pgvector/__tests__/integration/document.integration.test.d.ts +7 -0
- package/dist/pgvector/__tests__/integration/document.integration.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/integration/document.integration.test.js +587 -0
- package/dist/pgvector/__tests__/integration/edge.integration.test.d.ts +8 -0
- package/dist/pgvector/__tests__/integration/edge.integration.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/integration/edge.integration.test.js +663 -0
- package/dist/pgvector/__tests__/integration/filters.integration.test.d.ts +8 -0
- package/dist/pgvector/__tests__/integration/filters.integration.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/integration/filters.integration.test.js +609 -0
- package/dist/pgvector/__tests__/integration/lifecycle.integration.test.d.ts +8 -0
- package/dist/pgvector/__tests__/integration/lifecycle.integration.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/integration/lifecycle.integration.test.js +449 -0
- package/dist/pgvector/__tests__/integration/query.integration.test.d.ts +8 -0
- package/dist/pgvector/__tests__/integration/query.integration.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/integration/query.integration.test.js +544 -0
- package/dist/pgvector/__tests__/search.test.d.ts +2 -0
- package/dist/pgvector/__tests__/search.test.d.ts.map +1 -0
- package/dist/pgvector/__tests__/search.test.js +279 -0
- package/dist/pgvector/handle.d.ts +60 -0
- package/dist/pgvector/handle.d.ts.map +1 -0
- package/dist/pgvector/handle.js +213 -0
- package/dist/pgvector/hit.d.ts +10 -0
- package/dist/pgvector/hit.d.ts.map +1 -0
- package/dist/pgvector/hit.js +44 -0
- package/dist/pgvector/index.d.ts +7 -0
- package/dist/pgvector/index.d.ts.map +1 -0
- package/dist/pgvector/index.js +5 -0
- package/dist/pgvector/search.d.ts +60 -0
- package/dist/pgvector/search.d.ts.map +1 -0
- package/dist/pgvector/search.js +227 -0
- package/dist/pgvector/sql/__tests__/limit.test.d.ts +2 -0
- package/dist/pgvector/sql/__tests__/limit.test.d.ts.map +1 -0
- package/dist/pgvector/sql/__tests__/limit.test.js +161 -0
- package/dist/pgvector/sql/__tests__/order.test.d.ts +2 -0
- package/dist/pgvector/sql/__tests__/order.test.d.ts.map +1 -0
- package/dist/pgvector/sql/__tests__/order.test.js +218 -0
- package/dist/pgvector/sql/__tests__/query.test.d.ts +2 -0
- package/dist/pgvector/sql/__tests__/query.test.d.ts.map +1 -0
- package/dist/pgvector/sql/__tests__/query.test.js +392 -0
- package/dist/pgvector/sql/__tests__/select.test.d.ts +2 -0
- package/dist/pgvector/sql/__tests__/select.test.d.ts.map +1 -0
- package/dist/pgvector/sql/__tests__/select.test.js +293 -0
- package/dist/pgvector/sql/__tests__/where.test.d.ts +2 -0
- package/dist/pgvector/sql/__tests__/where.test.d.ts.map +1 -0
- package/dist/pgvector/sql/__tests__/where.test.js +488 -0
- package/dist/pgvector/sql/index.d.ts +7 -0
- package/dist/pgvector/sql/index.d.ts.map +1 -0
- package/dist/pgvector/sql/index.js +6 -0
- package/dist/pgvector/sql/limit.d.ts +8 -0
- package/dist/pgvector/sql/limit.d.ts.map +1 -0
- package/dist/pgvector/sql/limit.js +20 -0
- package/dist/pgvector/sql/order.d.ts +9 -0
- package/dist/pgvector/sql/order.d.ts.map +1 -0
- package/dist/pgvector/sql/order.js +47 -0
- package/dist/pgvector/sql/query.d.ts +46 -0
- package/dist/pgvector/sql/query.d.ts.map +1 -0
- package/dist/pgvector/sql/query.js +54 -0
- package/dist/pgvector/sql/schema.d.ts +16 -0
- package/dist/pgvector/sql/schema.d.ts.map +1 -0
- package/dist/pgvector/sql/schema.js +47 -0
- package/dist/pgvector/sql/select.d.ts +11 -0
- package/dist/pgvector/sql/select.d.ts.map +1 -0
- package/dist/pgvector/sql/select.js +87 -0
- package/dist/pgvector/sql/where.d.ts +8 -0
- package/dist/pgvector/sql/where.d.ts.map +1 -0
- package/dist/pgvector/sql/where.js +137 -0
- package/dist/pgvector/types.d.ts +20 -0
- package/dist/pgvector/types.d.ts.map +1 -0
- package/dist/pgvector/types.js +1 -0
- package/dist/pgvector/utils.d.ts +18 -0
- package/dist/pgvector/utils.d.ts.map +1 -0
- package/dist/pgvector/utils.js +22 -0
- package/dist/postgres.d.ts +19 -26
- package/dist/postgres.d.ts.map +1 -1
- package/dist/postgres.js +15 -27
- package/dist/storage.d.ts +48 -0
- package/dist/storage.d.ts.map +1 -1
- package/dist/storage.js +32 -9
- package/dist/thread/sql.d.ts +38 -0
- package/dist/thread/sql.d.ts.map +1 -0
- package/dist/thread/sql.js +112 -0
- package/dist/thread/store.d.ts +2 -2
- package/dist/thread/store.d.ts.map +1 -1
- package/dist/thread/store.js +32 -102
- package/package.json +7 -4
- package/src/__tests__/integration.test.ts +15 -17
- package/src/__tests__/memory-integration.test.ts +355 -0
- package/src/__tests__/memory.test.ts +428 -0
- package/src/index.ts +19 -3
- package/src/memory/sql.ts +141 -0
- package/src/memory/store.ts +166 -0
- package/src/migrations.ts +13 -3
- package/src/pgvector/README.md +50 -0
- package/src/pgvector/__tests__/handle.test.ts +335 -0
- package/src/pgvector/__tests__/hit.test.ts +165 -0
- package/src/pgvector/__tests__/integration/document.integration.test.ts +717 -0
- package/src/pgvector/__tests__/integration/edge.integration.test.ts +835 -0
- package/src/pgvector/__tests__/integration/filters.integration.test.ts +721 -0
- package/src/pgvector/__tests__/integration/lifecycle.integration.test.ts +570 -0
- package/src/pgvector/__tests__/integration/query.integration.test.ts +667 -0
- package/src/pgvector/__tests__/search.test.ts +366 -0
- package/src/pgvector/handle.ts +285 -0
- package/src/pgvector/hit.ts +56 -0
- package/src/pgvector/index.ts +7 -0
- package/src/pgvector/search.ts +330 -0
- package/src/pgvector/sql/__tests__/limit.test.ts +180 -0
- package/src/pgvector/sql/__tests__/order.test.ts +248 -0
- package/src/pgvector/sql/__tests__/query.test.ts +548 -0
- package/src/pgvector/sql/__tests__/select.test.ts +367 -0
- package/src/pgvector/sql/__tests__/where.test.ts +554 -0
- package/src/pgvector/sql/index.ts +14 -0
- package/src/pgvector/sql/limit.ts +29 -0
- package/src/pgvector/sql/order.ts +55 -0
- package/src/pgvector/sql/query.ts +112 -0
- package/src/pgvector/sql/schema.ts +61 -0
- package/src/pgvector/sql/select.ts +100 -0
- package/src/pgvector/sql/where.ts +152 -0
- package/src/pgvector/types.ts +21 -0
- package/src/pgvector/utils.ts +24 -0
- package/src/postgres.ts +31 -33
- package/src/storage.ts +77 -9
- package/src/thread/sql.ts +159 -0
- package/src/thread/store.ts +40 -127
- package/tsconfig.tsbuildinfo +1 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import type { Pool } from "pg";
|
|
2
|
+
import { CursorPage } from "@kernl-sdk/shared";
|
|
3
|
+
import type { SearchIndex, IndexHandle, NewIndexParams, ListIndexesParams, IndexSummary, IndexStats, UnknownDocument, SearchCapabilities } from "@kernl-sdk/retrieval";
|
|
4
|
+
import type { PGIndexConfig } from "./types.js";
|
|
5
|
+
export interface PGSearchIndexConfig {
|
|
6
|
+
pool: Pool;
|
|
7
|
+
ensureInit?: () => Promise<void>;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* pgvector-backed SearchIndex implementation.
|
|
11
|
+
*/
|
|
12
|
+
export declare class PGSearchIndex implements SearchIndex<PGIndexConfig> {
|
|
13
|
+
readonly id = "pgvector";
|
|
14
|
+
private pool;
|
|
15
|
+
private userInit;
|
|
16
|
+
private configs;
|
|
17
|
+
private ready;
|
|
18
|
+
constructor(config: PGSearchIndexConfig);
|
|
19
|
+
/**
|
|
20
|
+
* Create a new index table.
|
|
21
|
+
*
|
|
22
|
+
* @param params.id - Table name
|
|
23
|
+
* @param params.schema - Field definitions (one field must have pk: true)
|
|
24
|
+
* @param params.providerOptions.schema - Postgres schema (default: "public")
|
|
25
|
+
*/
|
|
26
|
+
createIndex(params: NewIndexParams): Promise<void>;
|
|
27
|
+
/**
|
|
28
|
+
* List all indexes.
|
|
29
|
+
*/
|
|
30
|
+
listIndexes(params?: ListIndexesParams): Promise<CursorPage<IndexSummary>>;
|
|
31
|
+
/**
|
|
32
|
+
* Get index statistics.
|
|
33
|
+
*/
|
|
34
|
+
describeIndex(id: string): Promise<IndexStats>;
|
|
35
|
+
/**
|
|
36
|
+
* Delete an index and all its documents.
|
|
37
|
+
*/
|
|
38
|
+
deleteIndex(id: string): Promise<void>;
|
|
39
|
+
/**
|
|
40
|
+
* No-op for pgvector.
|
|
41
|
+
*/
|
|
42
|
+
warm(_id: string): Promise<void>;
|
|
43
|
+
/**
|
|
44
|
+
* pgvector capabilities.
|
|
45
|
+
*/
|
|
46
|
+
capabilities(): SearchCapabilities;
|
|
47
|
+
/**
|
|
48
|
+
* Get a handle for operating on a specific index.
|
|
49
|
+
*/
|
|
50
|
+
index<TDocument = UnknownDocument>(id: string): IndexHandle<TDocument>;
|
|
51
|
+
/**
|
|
52
|
+
* Bind an existing Postgres table as an index.
|
|
53
|
+
*/
|
|
54
|
+
bindIndex(id: string, config: PGIndexConfig): Promise<void>;
|
|
55
|
+
/**
|
|
56
|
+
* Ensure metadata table exists and load configs.
|
|
57
|
+
*/
|
|
58
|
+
private ensureInit;
|
|
59
|
+
}
|
|
60
|
+
//# sourceMappingURL=search.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../src/pgvector/search.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,IAAI,CAAC;AAC/B,OAAO,EAAE,UAAU,EAA2B,MAAM,mBAAmB,CAAC;AAExE,OAAO,KAAK,EACV,WAAW,EACX,WAAW,EACX,cAAc,EACd,iBAAiB,EACjB,YAAY,EACZ,UAAU,EACV,eAAe,EACf,kBAAkB,EACnB,MAAM,sBAAsB,CAAC;AAI9B,OAAO,KAAK,EAAE,aAAa,EAAkB,MAAM,SAAS,CAAC;AAI7D,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,IAAI,CAAC;IACX,UAAU,CAAC,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;CAClC;AAED;;GAEG;AACH,qBAAa,aAAc,YAAW,WAAW,CAAC,aAAa,CAAC;IAC9D,QAAQ,CAAC,EAAE,cAAc;IAEzB,OAAO,CAAC,IAAI,CAAO;IACnB,OAAO,CAAC,QAAQ,CAAsB;IACtC,OAAO,CAAC,OAAO,CAAoC;IACnD,OAAO,CAAC,KAAK,CAAS;gBAEV,MAAM,EAAE,mBAAmB;IAKvC;;;;;;OAMG;IACG,WAAW,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC;IA8ExD;;OAEG;IACG,WAAW,CACf,MAAM,CAAC,EAAE,iBAAiB,GACzB,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;IAwDpC;;OAEG;IACG,aAAa,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC;IAmCpD;;OAEG;IACG,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAkB5C;;OAEG;IACG,IAAI,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAEtC;;OAEG;IACH,YAAY,IAAI,kBAAkB;IAUlC;;OAEG;IACH,KAAK,CAAC,SAAS,GAAG,eAAe,EAAE,EAAE,EAAE,MAAM,GAAG,WAAW,CAAC,SAAS,CAAC;IAUtE;;OAEG;IACG,SAAS,CAAC,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;IAgBjE;;OAEG;YACW,UAAU;CA8BzB"}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import { CursorPage } from "@kernl-sdk/shared";
|
|
2
|
+
import { KERNL_SCHEMA_NAME } from "@kernl-sdk/storage";
|
|
3
|
+
import { PGIndexHandle } from "./handle.js";
|
|
4
|
+
import { FIELD_TYPE, SIMILARITY } from "./sql/index.js";
|
|
5
|
+
const META_TABLE = "search_indexes";
|
|
6
|
+
/**
|
|
7
|
+
* pgvector-backed SearchIndex implementation.
|
|
8
|
+
*/
|
|
9
|
+
export class PGSearchIndex {
|
|
10
|
+
id = "pgvector";
|
|
11
|
+
pool;
|
|
12
|
+
userInit;
|
|
13
|
+
configs = new Map();
|
|
14
|
+
ready = false;
|
|
15
|
+
constructor(config) {
|
|
16
|
+
this.pool = config.pool;
|
|
17
|
+
this.userInit = config.ensureInit ?? (() => Promise.resolve());
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Create a new index table.
|
|
21
|
+
*
|
|
22
|
+
* @param params.id - Table name
|
|
23
|
+
* @param params.schema - Field definitions (one field must have pk: true)
|
|
24
|
+
* @param params.providerOptions.schema - Postgres schema (default: "public")
|
|
25
|
+
*/
|
|
26
|
+
async createIndex(params) {
|
|
27
|
+
await this.ensureInit();
|
|
28
|
+
const schemaName = params.providerOptions?.schema ?? "public";
|
|
29
|
+
// find primary key field
|
|
30
|
+
const pkEntry = Object.entries(params.schema).find(([, f]) => f.pk);
|
|
31
|
+
if (!pkEntry) {
|
|
32
|
+
throw new Error("schema must have a field with pk: true");
|
|
33
|
+
}
|
|
34
|
+
const pkey = pkEntry[0];
|
|
35
|
+
const columns = [];
|
|
36
|
+
const vectorFields = [];
|
|
37
|
+
for (const [name, field] of Object.entries(params.schema)) {
|
|
38
|
+
const colDef = `"${name}" ${FIELD_TYPE.encode(field)}${field.pk ? " PRIMARY KEY" : ""}`;
|
|
39
|
+
columns.push(colDef);
|
|
40
|
+
if (field.type === "vector") {
|
|
41
|
+
vectorFields.push({
|
|
42
|
+
name,
|
|
43
|
+
dimensions: field.dimensions,
|
|
44
|
+
similarity: field.similarity,
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
// create table
|
|
49
|
+
await this.pool.query(`
|
|
50
|
+
CREATE TABLE "${schemaName}"."${params.id}" (
|
|
51
|
+
${columns.join(",\n ")}
|
|
52
|
+
)
|
|
53
|
+
`);
|
|
54
|
+
// create HNSW indexes for vector fields
|
|
55
|
+
for (const vf of vectorFields) {
|
|
56
|
+
await this.pool.query(`
|
|
57
|
+
CREATE INDEX "${params.id}_${vf.name}_idx"
|
|
58
|
+
ON "${schemaName}"."${params.id}"
|
|
59
|
+
USING hnsw ("${vf.name}" ${SIMILARITY.encode(vf.similarity)})
|
|
60
|
+
`);
|
|
61
|
+
}
|
|
62
|
+
// auto-bind the created table
|
|
63
|
+
const fields = {};
|
|
64
|
+
for (const [name, field] of Object.entries(params.schema)) {
|
|
65
|
+
fields[name] = {
|
|
66
|
+
column: name,
|
|
67
|
+
type: field.type,
|
|
68
|
+
...(field.type === "vector" && {
|
|
69
|
+
dimensions: field.dimensions,
|
|
70
|
+
similarity: field.similarity,
|
|
71
|
+
}),
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
const config = {
|
|
75
|
+
schema: schemaName,
|
|
76
|
+
table: params.id,
|
|
77
|
+
pkey,
|
|
78
|
+
fields,
|
|
79
|
+
};
|
|
80
|
+
// persist to metadata table
|
|
81
|
+
await this.pool.query(`INSERT INTO "${KERNL_SCHEMA_NAME}"."${META_TABLE}" (id, backend, config, created_at)
|
|
82
|
+
VALUES ($1, $2, $3, $4)`, [params.id, this.id, JSON.stringify(config), Date.now()]);
|
|
83
|
+
this.configs.set(params.id, config);
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* List all indexes.
|
|
87
|
+
*/
|
|
88
|
+
async listIndexes(params) {
|
|
89
|
+
await this.ensureInit();
|
|
90
|
+
const loader = async (p) => {
|
|
91
|
+
const limit = p.limit ?? 100;
|
|
92
|
+
let sql = `
|
|
93
|
+
SELECT id FROM "${KERNL_SCHEMA_NAME}"."${META_TABLE}"
|
|
94
|
+
WHERE backend = $1
|
|
95
|
+
`;
|
|
96
|
+
const sqlParams = [this.id];
|
|
97
|
+
let idx = 2;
|
|
98
|
+
if (p.prefix) {
|
|
99
|
+
sql += ` AND id LIKE $${idx}`;
|
|
100
|
+
sqlParams.push(`${p.prefix}%`);
|
|
101
|
+
idx++;
|
|
102
|
+
}
|
|
103
|
+
if (p.cursor) {
|
|
104
|
+
sql += ` AND id > $${idx}`;
|
|
105
|
+
sqlParams.push(p.cursor);
|
|
106
|
+
idx++;
|
|
107
|
+
}
|
|
108
|
+
sql += ` ORDER BY id ASC LIMIT $${idx}`;
|
|
109
|
+
sqlParams.push(limit + 1);
|
|
110
|
+
const result = await this.pool.query(sql, sqlParams);
|
|
111
|
+
const hasMore = result.rows.length > limit;
|
|
112
|
+
const rows = hasMore ? result.rows.slice(0, -1) : result.rows;
|
|
113
|
+
const data = rows.map((row) => ({
|
|
114
|
+
id: row.id,
|
|
115
|
+
status: "ready",
|
|
116
|
+
}));
|
|
117
|
+
return {
|
|
118
|
+
data,
|
|
119
|
+
next: hasMore ? (rows[rows.length - 1]?.id ?? null) : null,
|
|
120
|
+
last: !hasMore,
|
|
121
|
+
};
|
|
122
|
+
};
|
|
123
|
+
const response = await loader(params ?? {});
|
|
124
|
+
return new CursorPage({
|
|
125
|
+
params: params ?? {},
|
|
126
|
+
response,
|
|
127
|
+
loader,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Get index statistics.
|
|
132
|
+
*/
|
|
133
|
+
async describeIndex(id) {
|
|
134
|
+
await this.ensureInit();
|
|
135
|
+
const cfg = this.configs.get(id);
|
|
136
|
+
if (!cfg) {
|
|
137
|
+
throw new Error(`Index "${id}" not bound`);
|
|
138
|
+
}
|
|
139
|
+
// get row count
|
|
140
|
+
const countRes = await this.pool.query(`SELECT COUNT(*) as count FROM "${cfg.schema}"."${cfg.table}"`);
|
|
141
|
+
const count = parseInt(countRes.rows[0]?.count ?? "0", 10);
|
|
142
|
+
// get table size in bytes
|
|
143
|
+
const sizeRes = await this.pool.query(`SELECT pg_total_relation_size('"${cfg.schema}"."${cfg.table}"') as size`);
|
|
144
|
+
const sizeb = parseInt(sizeRes.rows[0]?.size ?? "0", 10);
|
|
145
|
+
// find vector field for dimensions/similarity
|
|
146
|
+
const vectorField = Object.values(cfg.fields).find((f) => f.type === "vector");
|
|
147
|
+
return {
|
|
148
|
+
id,
|
|
149
|
+
count,
|
|
150
|
+
sizeb,
|
|
151
|
+
dimensions: vectorField?.dimensions,
|
|
152
|
+
similarity: vectorField?.similarity,
|
|
153
|
+
status: "ready",
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Delete an index and all its documents.
|
|
158
|
+
*/
|
|
159
|
+
async deleteIndex(id) {
|
|
160
|
+
await this.ensureInit();
|
|
161
|
+
const cfg = this.configs.get(id);
|
|
162
|
+
if (!cfg) {
|
|
163
|
+
throw new Error(`Index "${id}" not bound`);
|
|
164
|
+
}
|
|
165
|
+
await this.pool.query(`DROP TABLE IF EXISTS "${cfg.schema}"."${cfg.table}"`);
|
|
166
|
+
await this.pool.query(`DELETE FROM "${KERNL_SCHEMA_NAME}"."${META_TABLE}" WHERE id = $1`, [id]);
|
|
167
|
+
this.configs.delete(id);
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* No-op for pgvector.
|
|
171
|
+
*/
|
|
172
|
+
async warm(_id) { }
|
|
173
|
+
/**
|
|
174
|
+
* pgvector capabilities.
|
|
175
|
+
*/
|
|
176
|
+
capabilities() {
|
|
177
|
+
return {
|
|
178
|
+
modes: new Set(["vector"]),
|
|
179
|
+
multiSignal: false,
|
|
180
|
+
multiVector: false,
|
|
181
|
+
filters: true,
|
|
182
|
+
orderBy: true,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Get a handle for operating on a specific index.
|
|
187
|
+
*/
|
|
188
|
+
index(id) {
|
|
189
|
+
const cfg = this.configs.get(id);
|
|
190
|
+
return new PGIndexHandle(this.pool, () => this.ensureInit(), id, cfg);
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Bind an existing Postgres table as an index.
|
|
194
|
+
*/
|
|
195
|
+
async bindIndex(id, config) {
|
|
196
|
+
await this.ensureInit();
|
|
197
|
+
// upsert to metadata table
|
|
198
|
+
await this.pool.query(`INSERT INTO "${KERNL_SCHEMA_NAME}"."${META_TABLE}" (id, backend, config, created_at)
|
|
199
|
+
VALUES ($1, $2, $3, $4)
|
|
200
|
+
ON CONFLICT (id) DO UPDATE SET config = $3`, [id, this.id, JSON.stringify(config), Date.now()]);
|
|
201
|
+
this.configs.set(id, config);
|
|
202
|
+
}
|
|
203
|
+
/* --- internal utils --- */
|
|
204
|
+
/**
|
|
205
|
+
* Ensure metadata table exists and load configs.
|
|
206
|
+
*/
|
|
207
|
+
async ensureInit() {
|
|
208
|
+
if (this.ready)
|
|
209
|
+
return;
|
|
210
|
+
await this.userInit();
|
|
211
|
+
// create metadata table
|
|
212
|
+
await this.pool.query(`
|
|
213
|
+
CREATE TABLE IF NOT EXISTS "${KERNL_SCHEMA_NAME}"."${META_TABLE}" (
|
|
214
|
+
id TEXT PRIMARY KEY,
|
|
215
|
+
backend TEXT NOT NULL,
|
|
216
|
+
config JSONB NOT NULL,
|
|
217
|
+
created_at BIGINT NOT NULL
|
|
218
|
+
)
|
|
219
|
+
`);
|
|
220
|
+
// load existing configs for this backend
|
|
221
|
+
const result = await this.pool.query(`SELECT id, config FROM "${KERNL_SCHEMA_NAME}"."${META_TABLE}" WHERE backend = $1`, [this.id]);
|
|
222
|
+
for (const row of result.rows) {
|
|
223
|
+
this.configs.set(row.id, row.config);
|
|
224
|
+
}
|
|
225
|
+
this.ready = true;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"limit.test.d.ts","sourceRoot":"","sources":["../../../../src/pgvector/sql/__tests__/limit.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { SQL_LIMIT } from "../limit.js";
|
|
3
|
+
describe("SQL_LIMIT", () => {
|
|
4
|
+
describe("encode", () => {
|
|
5
|
+
it("builds LIMIT clause", () => {
|
|
6
|
+
const result = SQL_LIMIT.encode({
|
|
7
|
+
topK: 10,
|
|
8
|
+
offset: 0,
|
|
9
|
+
startIdx: 1,
|
|
10
|
+
});
|
|
11
|
+
expect(result.sql).toBe("LIMIT $1");
|
|
12
|
+
expect(result.params).toEqual([10]);
|
|
13
|
+
});
|
|
14
|
+
it("respects startIdx for parameter numbering", () => {
|
|
15
|
+
const result = SQL_LIMIT.encode({
|
|
16
|
+
topK: 10,
|
|
17
|
+
offset: 0,
|
|
18
|
+
startIdx: 5,
|
|
19
|
+
});
|
|
20
|
+
expect(result.sql).toBe("LIMIT $5");
|
|
21
|
+
expect(result.params).toEqual([10]);
|
|
22
|
+
});
|
|
23
|
+
it("includes OFFSET when offset > 0", () => {
|
|
24
|
+
const result = SQL_LIMIT.encode({
|
|
25
|
+
topK: 10,
|
|
26
|
+
offset: 20,
|
|
27
|
+
startIdx: 1,
|
|
28
|
+
});
|
|
29
|
+
expect(result.sql).toBe("LIMIT $1 OFFSET $2");
|
|
30
|
+
expect(result.params).toEqual([10, 20]);
|
|
31
|
+
});
|
|
32
|
+
it("skips OFFSET when offset is 0", () => {
|
|
33
|
+
const result = SQL_LIMIT.encode({
|
|
34
|
+
topK: 25,
|
|
35
|
+
offset: 0,
|
|
36
|
+
startIdx: 3,
|
|
37
|
+
});
|
|
38
|
+
expect(result.sql).toBe("LIMIT $3");
|
|
39
|
+
expect(result.params).toEqual([25]);
|
|
40
|
+
});
|
|
41
|
+
it("handles pagination correctly", () => {
|
|
42
|
+
// Page 1: offset 0
|
|
43
|
+
const page1 = SQL_LIMIT.encode({
|
|
44
|
+
topK: 20,
|
|
45
|
+
offset: 0,
|
|
46
|
+
startIdx: 1,
|
|
47
|
+
});
|
|
48
|
+
expect(page1.sql).toBe("LIMIT $1");
|
|
49
|
+
expect(page1.params).toEqual([20]);
|
|
50
|
+
// Page 2: offset 20
|
|
51
|
+
const page2 = SQL_LIMIT.encode({
|
|
52
|
+
topK: 20,
|
|
53
|
+
offset: 20,
|
|
54
|
+
startIdx: 1,
|
|
55
|
+
});
|
|
56
|
+
expect(page2.sql).toBe("LIMIT $1 OFFSET $2");
|
|
57
|
+
expect(page2.params).toEqual([20, 20]);
|
|
58
|
+
// Page 3: offset 40
|
|
59
|
+
const page3 = SQL_LIMIT.encode({
|
|
60
|
+
topK: 20,
|
|
61
|
+
offset: 40,
|
|
62
|
+
startIdx: 1,
|
|
63
|
+
});
|
|
64
|
+
expect(page3.sql).toBe("LIMIT $1 OFFSET $2");
|
|
65
|
+
expect(page3.params).toEqual([20, 40]);
|
|
66
|
+
});
|
|
67
|
+
it("correctly increments param index after SELECT and WHERE", () => {
|
|
68
|
+
// Simulating: SELECT uses $1, WHERE uses $2-$4
|
|
69
|
+
// LIMIT should start at $5
|
|
70
|
+
const result = SQL_LIMIT.encode({
|
|
71
|
+
topK: 10,
|
|
72
|
+
offset: 50,
|
|
73
|
+
startIdx: 5,
|
|
74
|
+
});
|
|
75
|
+
expect(result.sql).toBe("LIMIT $5 OFFSET $6");
|
|
76
|
+
expect(result.params).toEqual([10, 50]);
|
|
77
|
+
});
|
|
78
|
+
describe("edge values", () => {
|
|
79
|
+
it("handles topK: 0", () => {
|
|
80
|
+
const result = SQL_LIMIT.encode({
|
|
81
|
+
topK: 0,
|
|
82
|
+
offset: 0,
|
|
83
|
+
startIdx: 1,
|
|
84
|
+
});
|
|
85
|
+
// LIMIT 0 is valid SQL - returns no rows
|
|
86
|
+
expect(result.sql).toBe("LIMIT $1");
|
|
87
|
+
expect(result.params).toEqual([0]);
|
|
88
|
+
});
|
|
89
|
+
it("handles topK: 1", () => {
|
|
90
|
+
const result = SQL_LIMIT.encode({
|
|
91
|
+
topK: 1,
|
|
92
|
+
offset: 0,
|
|
93
|
+
startIdx: 1,
|
|
94
|
+
});
|
|
95
|
+
expect(result.sql).toBe("LIMIT $1");
|
|
96
|
+
expect(result.params).toEqual([1]);
|
|
97
|
+
});
|
|
98
|
+
it("handles very large topK", () => {
|
|
99
|
+
const result = SQL_LIMIT.encode({
|
|
100
|
+
topK: 1000000,
|
|
101
|
+
offset: 0,
|
|
102
|
+
startIdx: 1,
|
|
103
|
+
});
|
|
104
|
+
expect(result.sql).toBe("LIMIT $1");
|
|
105
|
+
expect(result.params).toEqual([1000000]);
|
|
106
|
+
});
|
|
107
|
+
it("handles very large offset", () => {
|
|
108
|
+
const result = SQL_LIMIT.encode({
|
|
109
|
+
topK: 10,
|
|
110
|
+
offset: 999999,
|
|
111
|
+
startIdx: 1,
|
|
112
|
+
});
|
|
113
|
+
expect(result.sql).toBe("LIMIT $1 OFFSET $2");
|
|
114
|
+
expect(result.params).toEqual([10, 999999]);
|
|
115
|
+
});
|
|
116
|
+
it("handles very large startIdx", () => {
|
|
117
|
+
const result = SQL_LIMIT.encode({
|
|
118
|
+
topK: 10,
|
|
119
|
+
offset: 20,
|
|
120
|
+
startIdx: 50,
|
|
121
|
+
});
|
|
122
|
+
expect(result.sql).toBe("LIMIT $50 OFFSET $51");
|
|
123
|
+
expect(result.params).toEqual([10, 20]);
|
|
124
|
+
});
|
|
125
|
+
it("handles startIdx: 1 with both topK and offset", () => {
|
|
126
|
+
const result = SQL_LIMIT.encode({
|
|
127
|
+
topK: 25,
|
|
128
|
+
offset: 100,
|
|
129
|
+
startIdx: 1,
|
|
130
|
+
});
|
|
131
|
+
expect(result.sql).toBe("LIMIT $1 OFFSET $2");
|
|
132
|
+
expect(result.params).toEqual([25, 100]);
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
describe("offset boundary", () => {
|
|
136
|
+
it("includes OFFSET when offset is exactly 1", () => {
|
|
137
|
+
const result = SQL_LIMIT.encode({
|
|
138
|
+
topK: 10,
|
|
139
|
+
offset: 1,
|
|
140
|
+
startIdx: 1,
|
|
141
|
+
});
|
|
142
|
+
expect(result.sql).toBe("LIMIT $1 OFFSET $2");
|
|
143
|
+
expect(result.params).toEqual([10, 1]);
|
|
144
|
+
});
|
|
145
|
+
it("does not include OFFSET when offset is exactly 0", () => {
|
|
146
|
+
const result = SQL_LIMIT.encode({
|
|
147
|
+
topK: 10,
|
|
148
|
+
offset: 0,
|
|
149
|
+
startIdx: 1,
|
|
150
|
+
});
|
|
151
|
+
expect(result.sql).toBe("LIMIT $1");
|
|
152
|
+
expect(result.params).toEqual([10]);
|
|
153
|
+
});
|
|
154
|
+
});
|
|
155
|
+
});
|
|
156
|
+
describe("decode", () => {
|
|
157
|
+
it("throws not implemented", () => {
|
|
158
|
+
expect(() => SQL_LIMIT.decode({})).toThrow("SQL_LIMIT.decode not implemented");
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"order.test.d.ts","sourceRoot":"","sources":["../../../../src/pgvector/sql/__tests__/order.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { SQL_ORDER } from "../order.js";
|
|
3
|
+
describe("SQL_ORDER", () => {
|
|
4
|
+
describe("encode", () => {
|
|
5
|
+
it("returns score DESC when no signals or orderBy", () => {
|
|
6
|
+
const result = SQL_ORDER.encode({
|
|
7
|
+
signals: [],
|
|
8
|
+
});
|
|
9
|
+
expect(result.sql).toBe("score DESC");
|
|
10
|
+
});
|
|
11
|
+
describe("explicit orderBy", () => {
|
|
12
|
+
it("handles orderBy with default direction (desc)", () => {
|
|
13
|
+
const result = SQL_ORDER.encode({
|
|
14
|
+
signals: [],
|
|
15
|
+
orderBy: { field: "created_at" },
|
|
16
|
+
});
|
|
17
|
+
expect(result.sql).toBe('"created_at" DESC');
|
|
18
|
+
});
|
|
19
|
+
it("handles orderBy with asc direction", () => {
|
|
20
|
+
const result = SQL_ORDER.encode({
|
|
21
|
+
signals: [],
|
|
22
|
+
orderBy: { field: "name", direction: "asc" },
|
|
23
|
+
});
|
|
24
|
+
expect(result.sql).toBe('"name" ASC');
|
|
25
|
+
});
|
|
26
|
+
it("handles orderBy with desc direction", () => {
|
|
27
|
+
const result = SQL_ORDER.encode({
|
|
28
|
+
signals: [],
|
|
29
|
+
orderBy: { field: "views", direction: "desc" },
|
|
30
|
+
});
|
|
31
|
+
expect(result.sql).toBe('"views" DESC');
|
|
32
|
+
});
|
|
33
|
+
it("orderBy takes precedence over vector signals", () => {
|
|
34
|
+
const result = SQL_ORDER.encode({
|
|
35
|
+
signals: [{ embedding: [0.1, 0.2, 0.3] }],
|
|
36
|
+
orderBy: { field: "created_at", direction: "desc" },
|
|
37
|
+
});
|
|
38
|
+
expect(result.sql).toBe('"created_at" DESC');
|
|
39
|
+
});
|
|
40
|
+
});
|
|
41
|
+
describe("vector ordering", () => {
|
|
42
|
+
it("orders by cosine distance (default)", () => {
|
|
43
|
+
const result = SQL_ORDER.encode({
|
|
44
|
+
signals: [{ embedding: [0.1, 0.2, 0.3] }],
|
|
45
|
+
});
|
|
46
|
+
expect(result.sql).toBe('"embedding" <=> $1::vector');
|
|
47
|
+
});
|
|
48
|
+
it("uses binding to map field to column", () => {
|
|
49
|
+
const result = SQL_ORDER.encode({
|
|
50
|
+
signals: [{ embedding: [0.1, 0.2, 0.3] }],
|
|
51
|
+
binding: {
|
|
52
|
+
schema: "public",
|
|
53
|
+
table: "docs",
|
|
54
|
+
pkey: "id",
|
|
55
|
+
fields: {
|
|
56
|
+
embedding: {
|
|
57
|
+
column: "vec_col",
|
|
58
|
+
type: "vector",
|
|
59
|
+
dimensions: 3,
|
|
60
|
+
similarity: "cosine",
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
},
|
|
64
|
+
});
|
|
65
|
+
expect(result.sql).toBe('"vec_col" <=> $1::vector');
|
|
66
|
+
});
|
|
67
|
+
it("orders by euclidean distance", () => {
|
|
68
|
+
const result = SQL_ORDER.encode({
|
|
69
|
+
signals: [{ embedding: [0.1, 0.2, 0.3] }],
|
|
70
|
+
binding: {
|
|
71
|
+
schema: "public",
|
|
72
|
+
table: "docs",
|
|
73
|
+
pkey: "id",
|
|
74
|
+
fields: {
|
|
75
|
+
embedding: {
|
|
76
|
+
column: "embedding",
|
|
77
|
+
type: "vector",
|
|
78
|
+
dimensions: 3,
|
|
79
|
+
similarity: "euclidean",
|
|
80
|
+
},
|
|
81
|
+
},
|
|
82
|
+
},
|
|
83
|
+
});
|
|
84
|
+
expect(result.sql).toBe('"embedding" <-> $1::vector');
|
|
85
|
+
});
|
|
86
|
+
it("orders by dot product distance", () => {
|
|
87
|
+
const result = SQL_ORDER.encode({
|
|
88
|
+
signals: [{ embedding: [0.1, 0.2, 0.3] }],
|
|
89
|
+
binding: {
|
|
90
|
+
schema: "public",
|
|
91
|
+
table: "docs",
|
|
92
|
+
pkey: "id",
|
|
93
|
+
fields: {
|
|
94
|
+
embedding: {
|
|
95
|
+
column: "embedding",
|
|
96
|
+
type: "vector",
|
|
97
|
+
dimensions: 3,
|
|
98
|
+
similarity: "dot_product",
|
|
99
|
+
},
|
|
100
|
+
},
|
|
101
|
+
},
|
|
102
|
+
});
|
|
103
|
+
expect(result.sql).toBe('"embedding" <#> $1::vector');
|
|
104
|
+
});
|
|
105
|
+
it("ignores non-vector signals", () => {
|
|
106
|
+
const result = SQL_ORDER.encode({
|
|
107
|
+
signals: [{ content: "search text" }],
|
|
108
|
+
});
|
|
109
|
+
expect(result.sql).toBe("score DESC");
|
|
110
|
+
});
|
|
111
|
+
it("finds vector signal among mixed signals", () => {
|
|
112
|
+
const result = SQL_ORDER.encode({
|
|
113
|
+
signals: [
|
|
114
|
+
{ content: "search text", weight: 0.3 },
|
|
115
|
+
{ embedding: [0.1, 0.2, 0.3], weight: 0.7 },
|
|
116
|
+
],
|
|
117
|
+
});
|
|
118
|
+
expect(result.sql).toBe('"embedding" <=> $1::vector');
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
describe("binding edge cases", () => {
|
|
122
|
+
it("falls back to field name when binding exists but field not in fields", () => {
|
|
123
|
+
const result = SQL_ORDER.encode({
|
|
124
|
+
signals: [{ embedding: [0.1, 0.2, 0.3] }],
|
|
125
|
+
binding: {
|
|
126
|
+
schema: "public",
|
|
127
|
+
table: "docs",
|
|
128
|
+
pkey: "id",
|
|
129
|
+
fields: {
|
|
130
|
+
other_field: {
|
|
131
|
+
column: "other_col",
|
|
132
|
+
type: "vector",
|
|
133
|
+
dimensions: 3,
|
|
134
|
+
similarity: "euclidean",
|
|
135
|
+
},
|
|
136
|
+
},
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
// embedding not in binding.fields, uses field name and defaults to cosine
|
|
140
|
+
expect(result.sql).toBe('"embedding" <=> $1::vector');
|
|
141
|
+
});
|
|
142
|
+
it("defaults to cosine when binding field has no similarity", () => {
|
|
143
|
+
const result = SQL_ORDER.encode({
|
|
144
|
+
signals: [{ embedding: [0.1, 0.2, 0.3] }],
|
|
145
|
+
binding: {
|
|
146
|
+
schema: "public",
|
|
147
|
+
table: "docs",
|
|
148
|
+
fields: {
|
|
149
|
+
embedding: {
|
|
150
|
+
column: "vec_col",
|
|
151
|
+
type: "vector",
|
|
152
|
+
dimensions: 3,
|
|
153
|
+
// similarity intentionally omitted
|
|
154
|
+
},
|
|
155
|
+
},
|
|
156
|
+
}, // cast to bypass type check for test
|
|
157
|
+
});
|
|
158
|
+
expect(result.sql).toBe('"vec_col" <=> $1::vector');
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
describe("multiple vector signals", () => {
|
|
162
|
+
it("uses only the first vector signal for ordering", () => {
|
|
163
|
+
const result = SQL_ORDER.encode({
|
|
164
|
+
signals: [
|
|
165
|
+
{ embedding1: [0.1, 0.2, 0.3] },
|
|
166
|
+
{ embedding2: [0.4, 0.5, 0.6] },
|
|
167
|
+
],
|
|
168
|
+
});
|
|
169
|
+
// Should use embedding1, not embedding2
|
|
170
|
+
expect(result.sql).toBe('"embedding1" <=> $1::vector');
|
|
171
|
+
});
|
|
172
|
+
it("uses first vector field within a single signal object", () => {
|
|
173
|
+
const result = SQL_ORDER.encode({
|
|
174
|
+
signals: [{ embedding1: [0.1, 0.2], embedding2: [0.3, 0.4] }],
|
|
175
|
+
});
|
|
176
|
+
// Object iteration order - first encountered wins
|
|
177
|
+
expect(result.sql).toMatch(/\$1::vector/);
|
|
178
|
+
});
|
|
179
|
+
});
|
|
180
|
+
describe("consistency with SELECT", () => {
|
|
181
|
+
it("uses same $1 placeholder as SELECT for vector param", () => {
|
|
182
|
+
// ORDER BY always references $1::vector when there's a vector signal
|
|
183
|
+
// This must stay in sync with SELECT which puts vector at $1
|
|
184
|
+
const result = SQL_ORDER.encode({
|
|
185
|
+
signals: [{ embedding: [0.1, 0.2, 0.3] }],
|
|
186
|
+
});
|
|
187
|
+
expect(result.sql).toBe('"embedding" <=> $1::vector');
|
|
188
|
+
// No params returned - ORDER BY reuses SELECT's $1
|
|
189
|
+
});
|
|
190
|
+
});
|
|
191
|
+
describe("malformed signals", () => {
|
|
192
|
+
it("treats empty signal object as no vector signal", () => {
|
|
193
|
+
const result = SQL_ORDER.encode({
|
|
194
|
+
signals: [{}],
|
|
195
|
+
});
|
|
196
|
+
expect(result.sql).toBe("score DESC");
|
|
197
|
+
});
|
|
198
|
+
it("treats signal with only weight as no vector signal", () => {
|
|
199
|
+
const result = SQL_ORDER.encode({
|
|
200
|
+
signals: [{ weight: 0.5 }],
|
|
201
|
+
});
|
|
202
|
+
expect(result.sql).toBe("score DESC");
|
|
203
|
+
});
|
|
204
|
+
it("handles empty vector array", () => {
|
|
205
|
+
const result = SQL_ORDER.encode({
|
|
206
|
+
signals: [{ embedding: [] }],
|
|
207
|
+
});
|
|
208
|
+
// Empty array is still detected as vector
|
|
209
|
+
expect(result.sql).toBe('"embedding" <=> $1::vector');
|
|
210
|
+
});
|
|
211
|
+
});
|
|
212
|
+
});
|
|
213
|
+
describe("decode", () => {
|
|
214
|
+
it("throws not implemented", () => {
|
|
215
|
+
expect(() => SQL_ORDER.decode({})).toThrow("SQL_ORDER.decode not implemented");
|
|
216
|
+
});
|
|
217
|
+
});
|
|
218
|
+
});
|