@anvia/pgvector 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Indra Zulfi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,110 @@
1
+ # @anvia/pgvector
2
+
3
+ Postgres pgvector store adapter for Anvia.
4
+
5
+ Use this package when you want to store Anvia embedded documents in Postgres with the pgvector extension and query them through Anvia's vector search interfaces.
6
+
7
+ ## Installation
8
+
9
+ ```sh
10
+ pnpm add @anvia/pgvector @anvia/core pg pgvector
11
+ ```
12
+
13
+ In this monorepo, the package is available through the workspace:
14
+
15
+ ```sh
16
+ pnpm --filter @anvia/pgvector build
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ```ts
22
+ import { embedDocuments } from "@anvia/core";
23
+ import { OpenAIClient } from "@anvia/openai";
24
+ import { PgVectorStore } from "@anvia/pgvector";
25
+
26
+ const openai = new OpenAIClient({
27
+ apiKey,
28
+ });
29
+
30
+ const embeddings = openai.embeddingModel("text-embedding-3-small");
31
+
32
+ const documents = await embedDocuments(
33
+ embeddings,
34
+ [
35
+ {
36
+ id: "password-reset",
37
+ title: "Password reset policy",
38
+ body: "Password reset links expire after 30 minutes.",
39
+ product: "support",
40
+ },
41
+ {
42
+ id: "priority-support",
43
+ title: "Priority support",
44
+ body: "Enterprise customers receive priority support.",
45
+ product: "support",
46
+ },
47
+ ],
48
+ {
49
+ id: (document) => document.id,
50
+ content: (document) => `${document.title}\n${document.body}`,
51
+ metadata: (document) => ({
52
+ product: document.product,
53
+ title: document.title,
54
+ }),
55
+ },
56
+ );
57
+
58
+ const store = await PgVectorStore.connect({
59
+ tableName: "support_docs",
60
+ vectorSize: 1536,
61
+ });
62
+
63
+ await store.upsertDocuments(documents);
64
+
65
+ const index = store.index(embeddings);
66
+ const results = await index.search({
67
+ query: "How long does a password reset link last?",
68
+ topK: 3,
69
+ });
70
+
71
+ console.log(results);
72
+ ```
73
+
74
+ ## Postgres
75
+
76
+ By default, `PgVectorStore.connect` creates a `pg.Pool` from `connectionString` or the normal Postgres environment variables supported by `pg`. You can also pass a custom `pg` client or pool:
77
+
78
+ ```ts
79
+ import pg from "pg";
80
+
81
+ const pool = new pg.Pool({
82
+ connectionString: process.env.DATABASE_URL,
83
+ });
84
+
85
+ const store = await PgVectorStore.connect({
86
+ client: pool,
87
+ tableName: "support_docs",
88
+ vectorSize: 1536,
89
+ createIfMissing: true,
90
+ });
91
+ ```
92
+
93
+ pgvector requires vector dimensions before creating a table, so `vectorSize` is required.
94
+
95
+ `connect(...)` is async by design. It verifies or creates the pgvector extension and backing table before returning a store, so configuration and connection errors fail early instead of surfacing later from `upsertDocuments(...)` or `search(...)`. Constructors stay synchronous and side-effect free.
96
+
97
+ ## Exports
98
+
99
+ - `PgVectorStore`
100
+ - `PgVectorIndex`
101
+ - `filterToPgVectorWhere`
102
+ - `PgVectorStoreConnectOptions`
103
+
104
+ ## Development
105
+
106
+ ```sh
107
+ pnpm --filter @anvia/pgvector typecheck
108
+ pnpm --filter @anvia/pgvector test
109
+ pnpm --filter @anvia/pgvector build
110
+ ```
@@ -0,0 +1,48 @@
1
+ import { VectorMetadata, VectorSearchIndex, EmbeddingModel, VectorSearchRequest, VectorSearchResult, VectorSearchToolOptions, Tool, EmbeddedDocument, VectorFilter } from '@anvia/core';
2
+
3
+ type PgVectorDistance = "cosine" | "l2" | "innerProduct";
4
+ type PgVectorWhere = {
5
+ sql: string;
6
+ values: unknown[];
7
+ };
8
+ type PgClientLike = {
9
+ query(text: string, values?: readonly unknown[]): Promise<{
10
+ rows: Record<string, unknown>[];
11
+ }>;
12
+ };
13
+ type PgVectorStoreConnectOptions = {
14
+ client?: PgClientLike | undefined;
15
+ connectionString?: string | undefined;
16
+ tableName: string;
17
+ vectorSize: number;
18
+ createIfMissing?: boolean | undefined;
19
+ distance?: PgVectorDistance | undefined;
20
+ };
21
+ declare class PgVectorStore<T, Metadata extends VectorMetadata = VectorMetadata> {
22
+ private readonly client;
23
+ private readonly tableName;
24
+ private readonly distance;
25
+ private constructor();
26
+ static connect<T, Metadata extends VectorMetadata = VectorMetadata>(options: PgVectorStoreConnectOptions): Promise<PgVectorStore<T, Metadata>>;
27
+ upsertDocuments(documents: Array<EmbeddedDocument<T, Metadata>>): Promise<void>;
28
+ index(model: EmbeddingModel): PgVectorIndex<T, Metadata>;
29
+ }
30
+ declare class PgVectorIndex<T, Metadata extends VectorMetadata = VectorMetadata> implements VectorSearchIndex<T, Metadata> {
31
+ private readonly model;
32
+ private readonly client;
33
+ private readonly tableName;
34
+ private readonly distance;
35
+ constructor(model: EmbeddingModel, client: PgClientLike, tableName: string, distance: PgVectorDistance);
36
+ search(request: VectorSearchRequest): Promise<Array<VectorSearchResult<T, Metadata>>>;
37
+ searchIds(request: VectorSearchRequest): Promise<Array<{
38
+ score: number;
39
+ id: string;
40
+ }>>;
41
+ asTool(options: VectorSearchToolOptions): Tool<{
42
+ query: string;
43
+ topK?: number;
44
+ }, unknown>;
45
+ }
46
+ declare function filterToPgVectorWhere(filter: VectorFilter | undefined, startIndex?: number): PgVectorWhere | undefined;
47
+
48
+ export { type PgVectorDistance, PgVectorIndex, PgVectorStore, type PgVectorStoreConnectOptions, type PgVectorWhere, filterToPgVectorWhere };
package/dist/index.js ADDED
@@ -0,0 +1,275 @@
1
+ // src/index.ts
2
+ import { createHash } from "crypto";
3
+ import {
4
+ createVectorSearchTool,
5
+ embedText
6
+ } from "@anvia/core";
7
+ import pgvector from "pgvector";
8
+ var reservedMetadataPrefix = "__anvia_";
9
+ var PgVectorStore = class _PgVectorStore {
10
+ constructor(client, tableName, distance) {
11
+ this.client = client;
12
+ this.tableName = tableName;
13
+ this.distance = distance;
14
+ }
15
+ client;
16
+ tableName;
17
+ distance;
18
+ static async connect(options) {
19
+ const client = options.client ?? await defaultPgClient(options.connectionString);
20
+ const tableName = quoteQualifiedIdentifier(options.tableName);
21
+ const distance = options.distance ?? "cosine";
22
+ if (options.createIfMissing !== false) {
23
+ await client.query("CREATE EXTENSION IF NOT EXISTS vector");
24
+ await client.query(`CREATE TABLE IF NOT EXISTS ${tableName} (
25
+ id text PRIMARY KEY,
26
+ document_id text NOT NULL,
27
+ document jsonb NOT NULL,
28
+ metadata jsonb,
29
+ embedding vector(${options.vectorSize}) NOT NULL
30
+ )`);
31
+ }
32
+ await validateTable(client, tableName, options.vectorSize);
33
+ return new _PgVectorStore(client, tableName, distance);
34
+ }
35
+ async upsertDocuments(documents) {
36
+ const rows = documents.flatMap((document) => pgVectorRows(document));
37
+ if (rows.length === 0) {
38
+ return;
39
+ }
40
+ const values = rows.flatMap((row) => [
41
+ row.id,
42
+ row.documentId,
43
+ JSON.stringify(row.document),
44
+ row.metadata === void 0 ? null : JSON.stringify(row.metadata),
45
+ pgvector.toSql(row.embedding)
46
+ ]);
47
+ const placeholders = rows.map((_, index) => {
48
+ const offset = index * 5;
49
+ return `($${offset + 1}, $${offset + 2}, $${offset + 3}::jsonb, $${offset + 4}::jsonb, $${offset + 5}::vector)`;
50
+ });
51
+ await this.client.query(
52
+ `INSERT INTO ${this.tableName} (id, document_id, document, metadata, embedding)
53
+ VALUES ${placeholders.join(", ")}
54
+ ON CONFLICT (id) DO UPDATE SET
55
+ document_id = EXCLUDED.document_id,
56
+ document = EXCLUDED.document,
57
+ metadata = EXCLUDED.metadata,
58
+ embedding = EXCLUDED.embedding`,
59
+ values
60
+ );
61
+ }
62
+ index(model) {
63
+ return new PgVectorIndex(model, this.client, this.tableName, this.distance);
64
+ }
65
+ };
66
+ var PgVectorIndex = class {
67
+ constructor(model, client, tableName, distance) {
68
+ this.model = model;
69
+ this.client = client;
70
+ this.tableName = tableName;
71
+ this.distance = distance;
72
+ }
73
+ model;
74
+ client;
75
+ tableName;
76
+ distance;
77
+ async search(request) {
78
+ const queryEmbedding = await embedText(this.model, request.query);
79
+ const operator = distanceOperator(this.distance);
80
+ const where = filterToPgVectorWhere(request.filter, 2);
81
+ const limitParameter = 2 + (where?.values.length ?? 0);
82
+ const response = await this.client.query(
83
+ `SELECT id, document_id, document, metadata, embedding ${operator} $1::vector AS distance
84
+ FROM ${this.tableName}
85
+ ${where === void 0 ? "" : `WHERE ${where.sql}`}
86
+ ORDER BY embedding ${operator} $1::vector
87
+ LIMIT $${limitParameter}`,
88
+ [
89
+ pgvector.toSql(queryEmbedding.vector),
90
+ ...where?.values ?? [],
91
+ normalizedTopK(request.topK)
92
+ ]
93
+ );
94
+ return parseSearchRows(
95
+ response.rows,
96
+ request.threshold,
97
+ this.distance
98
+ );
99
+ }
100
+ async searchIds(request) {
101
+ return (await this.search(request)).map(({ score, id }) => ({ score, id }));
102
+ }
103
+ asTool(options) {
104
+ return createVectorSearchTool(this, options);
105
+ }
106
+ };
107
+ function filterToPgVectorWhere(filter, startIndex = 1) {
108
+ if (filter === void 0) {
109
+ return void 0;
110
+ }
111
+ const state = { nextIndex: startIndex };
112
+ const sql = buildFilterSql(filter, state);
113
+ return { sql, values: stateValues(filter) };
114
+ }
115
+ async function defaultPgClient(connectionString) {
116
+ const pg = await import("pg");
117
+ return new pg.Pool(connectionString === void 0 ? {} : { connectionString });
118
+ }
119
+ async function validateTable(client, tableName, vectorSize) {
120
+ const result = await client.query(
121
+ `SELECT a.atttypmod AS vector_size
122
+ FROM pg_attribute a
123
+ WHERE a.attrelid = $1::regclass
124
+ AND a.attname = 'embedding'
125
+ AND NOT a.attisdropped`,
126
+ [tableName]
127
+ );
128
+ const rawSize = result.rows[0]?.vector_size;
129
+ if (rawSize === void 0) {
130
+ throw new Error(`PgVector table ${tableName} is missing an embedding vector column`);
131
+ }
132
+ const actualSize = Number(rawSize);
133
+ if (actualSize !== vectorSize) {
134
+ throw new Error(
135
+ `PgVector table ${tableName} has vector size ${actualSize}; expected ${vectorSize}`
136
+ );
137
+ }
138
+ }
139
+ function distanceOperator(distance) {
140
+ switch (distance) {
141
+ case "cosine":
142
+ return "<=>";
143
+ case "l2":
144
+ return "<->";
145
+ case "innerProduct":
146
+ return "<#>";
147
+ }
148
+ }
149
+ function scoreFromDistance(distance, strategy) {
150
+ return strategy === "cosine" ? 1 - distance : -distance;
151
+ }
152
+ function parseSearchRows(rows, threshold, distanceStrategy) {
153
+ const byId = /* @__PURE__ */ new Map();
154
+ for (const row of rows) {
155
+ const score = scoreFromDistance(Number(row.distance), distanceStrategy);
156
+ if (threshold !== void 0 && score < threshold) {
157
+ continue;
158
+ }
159
+ const result = {
160
+ id: row.document_id,
161
+ score,
162
+ document: row.document,
163
+ ...row.metadata === null ? {} : { metadata: row.metadata }
164
+ };
165
+ const current = byId.get(result.id);
166
+ if (current === void 0 || result.score > current.score) {
167
+ byId.set(result.id, result);
168
+ }
169
+ }
170
+ return [...byId.values()];
171
+ }
172
+ function pgVectorRows(document) {
173
+ if (document.embeddings.length === 0) {
174
+ throw new Error(`Document ${document.id} has no embeddings`);
175
+ }
176
+ assertNoReservedMetadata(document.metadata);
177
+ return document.embeddings.map((embedding, index) => {
178
+ const logicalId = document.embeddings.length === 1 ? document.id : `${document.id}#embedding:${index}`;
179
+ return {
180
+ id: pointId(logicalId),
181
+ documentId: document.id,
182
+ document: document.document,
183
+ metadata: document.metadata,
184
+ embedding: embedding.vector
185
+ };
186
+ });
187
+ }
188
+ function assertNoReservedMetadata(metadata) {
189
+ for (const key of Object.keys(metadata ?? {})) {
190
+ if (key.startsWith(reservedMetadataPrefix)) {
191
+ throw new Error(`Metadata key ${key} is reserved for Anvia pgvector metadata`);
192
+ }
193
+ }
194
+ }
195
+ function pointId(id) {
196
+ const hex = createHash("sha256").update(id).digest("hex").slice(0, 32);
197
+ return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(
198
+ 16,
199
+ 20
200
+ )}-${hex.slice(20)}`;
201
+ }
202
+ function quoteQualifiedIdentifier(identifier) {
203
+ const parts = identifier.split(".");
204
+ if (parts.length === 0 || parts.some((part) => part.length === 0)) {
205
+ throw new Error(`Invalid Postgres identifier: ${identifier}`);
206
+ }
207
+ return parts.map(quoteIdentifier).join(".");
208
+ }
209
+ function quoteIdentifier(identifier) {
210
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(identifier)) {
211
+ throw new Error(`Invalid Postgres identifier: ${identifier}`);
212
+ }
213
+ return `"${identifier.replaceAll('"', '""')}"`;
214
+ }
215
+ function normalizedTopK(topK) {
216
+ return Math.max(0, Math.trunc(topK));
217
+ }
218
+ function buildFilterSql(filter, state) {
219
+ switch (filter.type) {
220
+ case "eq": {
221
+ const keyIndex = state.nextIndex++;
222
+ const valueIndex = state.nextIndex++;
223
+ return `(metadata ->> $${keyIndex}) = $${valueIndex}`;
224
+ }
225
+ case "gt": {
226
+ assertNumericFilterValue(filter.value, filter.type);
227
+ const keyIndex = state.nextIndex++;
228
+ const valueIndex = state.nextIndex++;
229
+ return `(metadata ->> $${keyIndex})::numeric > $${valueIndex}`;
230
+ }
231
+ case "lt": {
232
+ assertNumericFilterValue(filter.value, filter.type);
233
+ const keyIndex = state.nextIndex++;
234
+ const valueIndex = state.nextIndex++;
235
+ return `(metadata ->> $${keyIndex})::numeric < $${valueIndex}`;
236
+ }
237
+ case "and":
238
+ return `(${buildFilterSql(filter.filters[0], state)} AND ${buildFilterSql(
239
+ filter.filters[1],
240
+ state
241
+ )})`;
242
+ case "or":
243
+ return `(${buildFilterSql(filter.filters[0], state)} OR ${buildFilterSql(
244
+ filter.filters[1],
245
+ state
246
+ )})`;
247
+ }
248
+ }
249
+ function stateValues(filter) {
250
+ switch (filter.type) {
251
+ case "eq":
252
+ return [filter.key, serializeMetadataValue(filter.value)];
253
+ case "gt":
254
+ case "lt":
255
+ assertNumericFilterValue(filter.value, filter.type);
256
+ return [filter.key, filter.value];
257
+ case "and":
258
+ case "or":
259
+ return [...stateValues(filter.filters[0]), ...stateValues(filter.filters[1])];
260
+ }
261
+ }
262
+ function serializeMetadataValue(value) {
263
+ return value === null ? null : String(value);
264
+ }
265
+ function assertNumericFilterValue(value, operator) {
266
+ if (typeof value !== "number") {
267
+ throw new Error(`PgVector ${operator} filters require numeric metadata values`);
268
+ }
269
+ }
270
+ export {
271
+ PgVectorIndex,
272
+ PgVectorStore,
273
+ filterToPgVectorWhere
274
+ };
275
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["import { createHash } from \"node:crypto\";\nimport {\n createVectorSearchTool,\n type EmbeddedDocument,\n type EmbeddingModel,\n embedText,\n type Tool,\n type VectorFilter,\n type VectorMetadata,\n type VectorMetadataValue,\n type VectorSearchIndex,\n type VectorSearchRequest,\n type VectorSearchResult,\n type VectorSearchToolOptions,\n} from \"@anvia/core\";\nimport pgvector from \"pgvector\";\n\nconst reservedMetadataPrefix = \"__anvia_\";\n\nexport type PgVectorDistance = \"cosine\" | \"l2\" | \"innerProduct\";\n\nexport type PgVectorWhere = {\n sql: string;\n values: unknown[];\n};\n\ntype PgClientLike = {\n query(text: string, values?: readonly unknown[]): Promise<{ rows: Record<string, unknown>[] }>;\n};\n\nexport type PgVectorStoreConnectOptions = {\n client?: PgClientLike | undefined;\n connectionString?: string | undefined;\n tableName: string;\n vectorSize: number;\n createIfMissing?: boolean | undefined;\n distance?: PgVectorDistance | undefined;\n};\n\nexport class PgVectorStore<T, Metadata extends VectorMetadata = VectorMetadata> {\n private constructor(\n private readonly client: PgClientLike,\n private readonly tableName: string,\n private readonly distance: PgVectorDistance,\n ) {}\n\n static async connect<T, Metadata extends VectorMetadata = VectorMetadata>(\n options: PgVectorStoreConnectOptions,\n ): Promise<PgVectorStore<T, Metadata>> {\n const client = options.client ?? (await defaultPgClient(options.connectionString));\n const tableName = quoteQualifiedIdentifier(options.tableName);\n const distance = options.distance ?? \"cosine\";\n\n if (options.createIfMissing !== false) {\n await client.query(\"CREATE EXTENSION IF NOT EXISTS vector\");\n await client.query(`CREATE TABLE IF NOT EXISTS ${tableName} (\n id text PRIMARY KEY,\n document_id text NOT NULL,\n document jsonb NOT NULL,\n metadata jsonb,\n embedding vector(${options.vectorSize}) NOT NULL\n)`);\n }\n\n await validateTable(client, tableName, options.vectorSize);\n return new PgVectorStore<T, Metadata>(client, tableName, distance);\n }\n\n async upsertDocuments(documents: Array<EmbeddedDocument<T, Metadata>>): Promise<void> {\n const rows = documents.flatMap((document) => pgVectorRows(document));\n if (rows.length === 0) {\n return;\n }\n\n const values = rows.flatMap((row) => [\n row.id,\n row.documentId,\n JSON.stringify(row.document),\n row.metadata === undefined ? null : JSON.stringify(row.metadata),\n pgvector.toSql(row.embedding),\n ]);\n const placeholders = rows.map((_, index) => {\n const offset = index * 5;\n return `($${offset + 1}, $${offset + 2}, $${offset + 3}::jsonb, $${offset + 4}::jsonb, $${\n offset + 5\n }::vector)`;\n });\n\n await this.client.query(\n `INSERT INTO ${this.tableName} (id, document_id, document, metadata, embedding)\nVALUES ${placeholders.join(\", \")}\nON CONFLICT (id) DO UPDATE SET\n document_id = EXCLUDED.document_id,\n document = EXCLUDED.document,\n metadata = EXCLUDED.metadata,\n embedding = EXCLUDED.embedding`,\n values,\n );\n }\n\n index(model: EmbeddingModel): PgVectorIndex<T, Metadata> {\n return new PgVectorIndex(model, this.client, this.tableName, this.distance);\n }\n}\n\nexport class PgVectorIndex<T, Metadata extends VectorMetadata = VectorMetadata>\n implements VectorSearchIndex<T, Metadata>\n{\n constructor(\n private readonly model: EmbeddingModel,\n private readonly client: PgClientLike,\n private readonly tableName: string,\n private readonly distance: PgVectorDistance,\n ) {}\n\n async search(request: VectorSearchRequest): Promise<Array<VectorSearchResult<T, Metadata>>> {\n const queryEmbedding = await embedText(this.model, request.query);\n const operator = distanceOperator(this.distance);\n const where = filterToPgVectorWhere(request.filter, 2);\n const limitParameter = 2 + (where?.values.length ?? 0);\n const response = await this.client.query(\n `SELECT id, document_id, document, metadata, embedding ${operator} $1::vector AS distance\nFROM ${this.tableName}\n${where === undefined ? \"\" : `WHERE ${where.sql}`}\nORDER BY embedding ${operator} $1::vector\nLIMIT $${limitParameter}`,\n [\n pgvector.toSql(queryEmbedding.vector),\n ...(where?.values ?? []),\n normalizedTopK(request.topK),\n ],\n );\n\n return parseSearchRows<T, Metadata>(\n response.rows as Array<{\n id: string;\n document_id: string;\n document: unknown;\n metadata: Metadata | null;\n distance: number | string;\n }>,\n request.threshold,\n this.distance,\n );\n }\n\n async searchIds(request: VectorSearchRequest): Promise<Array<{ score: number; id: string }>> {\n return (await this.search(request)).map(({ score, id }) => ({ score, id }));\n }\n\n asTool(options: VectorSearchToolOptions): Tool<{ query: string; topK?: number }, unknown> {\n return createVectorSearchTool(this, options);\n }\n}\n\nexport function filterToPgVectorWhere(\n filter: VectorFilter | undefined,\n startIndex = 1,\n): PgVectorWhere | undefined {\n if (filter === undefined) {\n return undefined;\n }\n const state = { nextIndex: startIndex };\n const sql = buildFilterSql(filter, state);\n return { sql, values: stateValues(filter) };\n}\n\nasync function defaultPgClient(connectionString: string | undefined): Promise<PgClientLike> {\n const pg = await import(\"pg\");\n return new pg.Pool(connectionString === undefined ? {} : { connectionString }) as PgClientLike;\n}\n\nasync function validateTable(\n client: PgClientLike,\n tableName: string,\n vectorSize: number,\n): Promise<void> {\n const result = await client.query(\n `SELECT a.atttypmod AS vector_size\nFROM pg_attribute a\nWHERE a.attrelid = $1::regclass\n AND a.attname = 'embedding'\n AND NOT a.attisdropped`,\n [tableName],\n );\n const rawSize = result.rows[0]?.vector_size;\n if (rawSize === undefined) {\n throw new Error(`PgVector table ${tableName} is missing an embedding vector column`);\n }\n const actualSize = Number(rawSize);\n if (actualSize !== vectorSize) {\n throw new Error(\n `PgVector table ${tableName} has vector size ${actualSize}; expected ${vectorSize}`,\n );\n }\n}\n\nfunction distanceOperator(distance: PgVectorDistance): \"<=>\" | \"<->\" | \"<#>\" {\n switch (distance) {\n case \"cosine\":\n return \"<=>\";\n case \"l2\":\n return \"<->\";\n case \"innerProduct\":\n return \"<#>\";\n }\n}\n\nfunction scoreFromDistance(distance: number, strategy: PgVectorDistance): number {\n return strategy === \"cosine\" ? 1 - distance : -distance;\n}\n\nfunction parseSearchRows<T, Metadata extends VectorMetadata>(\n rows: Array<{\n id: string;\n document_id: string;\n document: unknown;\n metadata: Metadata | null;\n distance: number | string;\n }>,\n threshold: number | undefined,\n distanceStrategy: PgVectorDistance,\n): Array<VectorSearchResult<T, Metadata>> {\n const byId = new Map<string, VectorSearchResult<T, Metadata>>();\n\n for (const row of rows) {\n const score = scoreFromDistance(Number(row.distance), distanceStrategy);\n if (threshold !== undefined && score < threshold) {\n continue;\n }\n const result = {\n id: row.document_id,\n score,\n document: row.document as T,\n ...(row.metadata === null ? {} : { metadata: row.metadata }),\n } as VectorSearchResult<T, Metadata>;\n const current = byId.get(result.id);\n if (current === undefined || result.score > current.score) {\n byId.set(result.id, result);\n }\n }\n\n return [...byId.values()];\n}\n\nfunction pgVectorRows<T, Metadata extends VectorMetadata>(\n document: EmbeddedDocument<T, Metadata>,\n): Array<{\n id: string;\n documentId: string;\n document: T;\n metadata: Metadata | undefined;\n embedding: number[];\n}> {\n if (document.embeddings.length === 0) {\n throw new Error(`Document ${document.id} has no embeddings`);\n }\n assertNoReservedMetadata(document.metadata);\n\n return document.embeddings.map((embedding, index) => {\n const logicalId =\n document.embeddings.length === 1 ? document.id : `${document.id}#embedding:${index}`;\n return {\n id: pointId(logicalId),\n documentId: document.id,\n document: document.document,\n metadata: document.metadata,\n embedding: embedding.vector,\n };\n });\n}\n\nfunction assertNoReservedMetadata(metadata: VectorMetadata | undefined): void {\n for (const key of Object.keys(metadata ?? {})) {\n if (key.startsWith(reservedMetadataPrefix)) {\n throw new Error(`Metadata key ${key} is reserved for Anvia pgvector metadata`);\n }\n }\n}\n\nfunction pointId(id: string): string {\n const hex = createHash(\"sha256\").update(id).digest(\"hex\").slice(0, 32);\n return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(\n 16,\n 20,\n )}-${hex.slice(20)}`;\n}\n\nfunction quoteQualifiedIdentifier(identifier: string): string {\n const parts = identifier.split(\".\");\n if (parts.length === 0 || parts.some((part) => part.length === 0)) {\n throw new Error(`Invalid Postgres identifier: ${identifier}`);\n }\n return parts.map(quoteIdentifier).join(\".\");\n}\n\nfunction quoteIdentifier(identifier: string): string {\n if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(identifier)) {\n throw new Error(`Invalid Postgres identifier: ${identifier}`);\n }\n return `\"${identifier.replaceAll('\"', '\"\"')}\"`;\n}\n\nfunction normalizedTopK(topK: number): number {\n return Math.max(0, Math.trunc(topK));\n}\n\nfunction buildFilterSql(filter: VectorFilter, state: { nextIndex: number }): string {\n switch (filter.type) {\n case \"eq\": {\n const keyIndex = state.nextIndex++;\n const valueIndex = state.nextIndex++;\n return `(metadata ->> $${keyIndex}) = $${valueIndex}`;\n }\n case \"gt\": {\n assertNumericFilterValue(filter.value, filter.type);\n const keyIndex = state.nextIndex++;\n const valueIndex = state.nextIndex++;\n return `(metadata ->> $${keyIndex})::numeric > $${valueIndex}`;\n }\n case \"lt\": {\n assertNumericFilterValue(filter.value, filter.type);\n const keyIndex = state.nextIndex++;\n const valueIndex = state.nextIndex++;\n return `(metadata ->> $${keyIndex})::numeric < $${valueIndex}`;\n }\n case \"and\":\n return `(${buildFilterSql(filter.filters[0], state)} AND ${buildFilterSql(\n filter.filters[1],\n state,\n )})`;\n case \"or\":\n return `(${buildFilterSql(filter.filters[0], state)} OR ${buildFilterSql(\n filter.filters[1],\n state,\n )})`;\n }\n}\n\nfunction stateValues(filter: VectorFilter): unknown[] {\n switch (filter.type) {\n case \"eq\":\n return [filter.key, serializeMetadataValue(filter.value)];\n case \"gt\":\n case \"lt\":\n assertNumericFilterValue(filter.value, filter.type);\n return [filter.key, filter.value];\n case \"and\":\n case \"or\":\n return [...stateValues(filter.filters[0]), ...stateValues(filter.filters[1])];\n }\n}\n\nfunction serializeMetadataValue(value: VectorMetadataValue): string | null {\n return value === null ? null : String(value);\n}\n\nfunction assertNumericFilterValue(value: VectorMetadataValue, operator: \"gt\" | \"lt\"): void {\n if (typeof value !== \"number\") {\n throw new Error(`PgVector ${operator} filters require numeric metadata values`);\n }\n}\n"],"mappings":";AAAA,SAAS,kBAAkB;AAC3B;AAAA,EACE;AAAA,EAGA;AAAA,OASK;AACP,OAAO,cAAc;AAErB,IAAM,yBAAyB;AAsBxB,IAAM,gBAAN,MAAM,eAAmE;AAAA,EACtE,YACW,QACA,WACA,UACjB;AAHiB;AACA;AACA;AAAA,EAChB;AAAA,EAHgB;AAAA,EACA;AAAA,EACA;AAAA,EAGnB,aAAa,QACX,SACqC;AACrC,UAAM,SAAS,QAAQ,UAAW,MAAM,gBAAgB,QAAQ,gBAAgB;AAChF,UAAM,YAAY,yBAAyB,QAAQ,SAAS;AAC5D,UAAM,WAAW,QAAQ,YAAY;AAErC,QAAI,QAAQ,oBAAoB,OAAO;AACrC,YAAM,OAAO,MAAM,uCAAuC;AAC1D,YAAM,OAAO,MAAM,8BAA8B,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA,qBAK3C,QAAQ,UAAU;AAAA,EACrC;AAAA,IACE;AAEA,UAAM,cAAc,QAAQ,WAAW,QAAQ,UAAU;AACzD,WAAO,IAAI,eAA2B,QAAQ,WAAW,QAAQ;AAAA,EACnE;AAAA,EAEA,MAAM,gBAAgB,WAAgE;AACpF,UAAM,OAAO,UAAU,QAAQ,CAAC,aAAa,aAAa,QAAQ,CAAC;AACnE,QAAI,KAAK,WAAW,GAAG;AACrB;AAAA,IACF;AAEA,UAAM,SAAS,KAAK,QAAQ,CAAC,QAAQ;AAAA,MACnC,IAAI;AAAA,MACJ,IAAI;AAAA,MACJ,KAAK,UAAU,IAAI,QAAQ;AAAA,MAC3B,IAAI,aAAa,SAAY,OAAO,KAAK,UAAU,IAAI,QAAQ;AAAA,MAC/D,SAAS,MAAM,IAAI,SAAS;AAAA,IAC9B,CAAC;AACD,UAAM,eAAe,KAAK,IAAI,CAAC,GAAG,UAAU;AAC1C,YAAM,SAAS,QAAQ;AACvB,aAAO,KAAK,SAAS,CAAC,MAAM,SAAS,CAAC,MAAM,SAAS,CAAC,aAAa,SAAS,CAAC,aAC3E,SAAS,CACX;AAAA,IACF,CAAC;AAED,UAAM,KAAK,OAAO;AAAA,MAChB,eAAe,KAAK,SAAS;AAAA,SAC1B,aAAa,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,MAM1B;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,OAAmD;AACvD,WAAO,IAAI,cAAc,OAAO,KAAK,QAAQ,KAAK,WAAW,KAAK,QAAQ;AAAA,EAC5E;AACF;AAEO,IAAM,gBAAN,MAEP;AAAA,EACE,YACmB,OACA,QACA,WACA,UACjB;AAJiB;AACA;AACA;AACA;AAAA,EAChB;AAAA,EAJgB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAGnB,MAAM,OAAO,SAA+E;AAC1F,UAAM,iBAAiB,MAAM,UAAU,KAAK,OAAO,QAAQ,KAAK;AAChE,UAAM,WAAW,iBAAiB,KAAK,QAAQ;AAC/C,UAAM,QAAQ,sBAAsB,QAAQ,QAAQ,CAAC;AACrD,UAAM,iBAAiB,KAAK,OAAO,OAAO,UAAU;AACpD,UAAM,WAAW,MAAM,KAAK,OAAO;AAAA,MACjC,yDAAyD,QAAQ;AAAA,OAChE,KAAK,SAAS;AAAA,EACnB,UAAU,SAAY,KAAK,SAAS,MAAM,GAAG,EAAE;AAAA,qBAC5B,QAAQ;AAAA,SACpB,cAAc;AAAA,MACjB;AAAA,QACE,SAAS,MAAM,eAAe,MAAM;AAAA,QACpC,GAAI,OAAO,UAAU,CAAC;AAAA,QACtB,eAAe,QAAQ,IAAI;AAAA,MAC7B;AAAA,IACF;AAEA,WAAO;AAAA,MACL,SAAS;AAAA,MAOT,QAAQ;AAAA,MACR,KAAK;AAAA,IACP;AAAA,EACF;AAAA,EAEA,MAAM,UAAU,SAA6E;AAC3F,YAAQ,MAAM,KAAK,OAAO,OAAO,GAAG,IAAI,CAAC,EAAE,OAAO,GAAG,OAAO,EAAE,OAAO,GAAG,EAAE;AAAA,EAC5E;AAAA,EAEA,OAAO,SAAmF;AACxF,WAAO,uBAAuB,MAAM,OAAO;AAAA,EAC7C;AACF;AAEO,SAAS,sBACd,QACA,aAAa,GACc;AAC3B,MAAI,WAAW,QAAW;AACxB,WAAO;AAAA,EACT;AACA,QAAM,QAAQ,EAAE,WAAW,WAAW;AACtC,QAAM,MAAM,eAAe,QAAQ,KAAK;AACxC,SAAO,EAAE,KAAK,QAAQ,YAAY,MAAM,EAAE;AAC5C;AAEA,eAAe,gBAAgB,kBAA6D;AAC1F,QAAM,KAAK,MAAM,OAAO,IAAI;AAC5B,SAAO,IAAI,GAAG,KAAK,qBAAqB,SAAY,CAAC,IAAI,EAAE,iBAAiB,CAAC;AAC/E;AAEA,eAAe,cACb,QACA,WACA,YACe;AACf,QAAM,SAAS,MAAM,OAAO;AAAA,IAC1B;AAAA;AAAA;AAAA;AAAA;AAAA,IAKA,CAAC,SAAS;AAAA,EACZ;AACA,QAAM,UAAU,OAAO,KAAK,CAAC,GAAG;AAChC,MAAI,YAAY,QAAW;AACzB,UAAM,IAAI,MAAM,kBAAkB,SAAS,wCAAwC;AAAA,EACrF;AACA,QAAM,aAAa,OAAO,OAAO;AACjC,MAAI,eAAe,YAAY;AAC7B,UAAM,IAAI;AAAA,MACR,kBAAkB,SAAS,oBAAoB,UAAU,cAAc,UAAU;AAAA,IACnF;AAAA,EACF;AACF;AAEA,SAAS,iBAAiB,UAAmD;AAC3E,UAAQ,UAAU;AAAA,IAChB,KAAK;AACH,aAAO;AAAA,IACT,KAAK;AACH,aAAO;AAAA,IACT,KAAK;AACH,aAAO;AAAA,EACX;AACF;AAEA,SAAS,kBAAkB,UAAkB,UAAoC;AAC/E,SAAO,aAAa,WAAW,IAAI,WAAW,CAAC;AACjD;AAEA,SAAS,gBACP,MAOA,WACA,kBACwC;AACxC,QAAM,OAAO,oBAAI,IAA6C;AAE9D,aAAW,OAAO,MAAM;AACtB,UAAM,QAAQ,kBAAkB,OAAO,IAAI,QAAQ,GAAG,gBAAgB;AACtE,QAAI,cAAc,UAAa,QAAQ,WAAW;AAChD;AAAA,IACF;AACA,UAAM,SAAS;AAAA,MACb,IAAI,IAAI;AAAA,MACR;AAAA,MACA,UAAU,IAAI;AAAA,MACd,GAAI,IAAI,aAAa,OAAO,CAAC,IAAI,EAAE,UAAU,IAAI,SAAS;AAAA,IAC5D;AACA,UAAM,UAAU,KAAK,IAAI,OAAO,EAAE;AAClC,QAAI,YAAY,UAAa,OAAO,QAAQ,QAAQ,OAAO;AACzD,WAAK,IAAI,OAAO,IAAI,MAAM;AAAA,IAC5B;AAAA,EACF;AAEA,SAAO,CAAC,GAAG,KAAK,OAAO,CAAC;AAC1B;AAEA,SAAS,aACP,UAOC;AACD,MAAI,SAAS,WAAW,WAAW,GAAG;AACpC,UAAM,IAAI,MAAM,YAAY,SAAS,EAAE,oBAAoB;AAAA,EAC7D;AACA,2BAAyB,SAAS,QAAQ;AAE1C,SAAO,SAAS,WAAW,IAAI,CAAC,WAAW,UAAU;AACnD,UAAM,YACJ,SAAS,WAAW,WAAW,IAAI,SAAS,KAAK,GAAG,SAAS,EAAE,cAAc,KAAK;AACpF,WAAO;AAAA,MACL,IAAI,QAAQ,SAAS;AAAA,MACrB,YAAY,SAAS;AAAA,MACrB,UAAU,SAAS;AAAA,MACnB,UAAU,SAAS;AAAA,MACnB,WAAW,UAAU;AAAA,IACvB;AAAA,EACF,CAAC;AACH;AAEA,SAAS,yBAAyB,UAA4C;AAC5E,aAAW,OAAO,OAAO,KAAK,YAAY,CAAC,CAAC,GAAG;AAC7C,QAAI,IAAI,WAAW,sBAAsB,GAAG;AAC1C,YAAM,IAAI,MAAM,gBAAgB,GAAG,0CAA0C;AAAA,IAC/E;AAAA,EACF;AACF;AAEA,SAAS,QAAQ,IAAoB;AACnC,QAAM,MAAM,WAAW,QAAQ,EAAE,OAAO,EAAE,EAAE,OAAO,KAAK,EAAE,MAAM,GAAG,EAAE;AACrE,SAAO,GAAG,IAAI,MAAM,GAAG,CAAC,CAAC,IAAI,IAAI,MAAM,GAAG,EAAE,CAAC,IAAI,IAAI,MAAM,IAAI,EAAE,CAAC,IAAI,IAAI;AAAA,IACxE;AAAA,IACA;AAAA,EACF,CAAC,IAAI,IAAI,MAAM,EAAE,CAAC;AACpB;AAEA,SAAS,yBAAyB,YAA4B;AAC5D,QAAM,QAAQ,WAAW,MAAM,GAAG;AAClC,MAAI,MAAM,WAAW,KAAK,MAAM,KAAK,CAAC,SAAS,KAAK,WAAW,CAAC,GAAG;AACjE,UAAM,IAAI,MAAM,gCAAgC,UAAU,EAAE;AAAA,EAC9D;AACA,SAAO,MAAM,IAAI,eAAe,EAAE,KAAK,GAAG;AAC5C;AAEA,SAAS,gBAAgB,YAA4B;AACnD,MAAI,CAAC,2BAA2B,KAAK,UAAU,GAAG;AAChD,UAAM,IAAI,MAAM,gCAAgC,UAAU,EAAE;AAAA,EAC9D;AACA,SAAO,IAAI,WAAW,WAAW,KAAK,IAAI,CAAC;AAC7C;AAEA,SAAS,eAAe,MAAsB;AAC5C,SAAO,KAAK,IAAI,GAAG,KAAK,MAAM,IAAI,CAAC;AACrC;AAEA,SAAS,eAAe,QAAsB,OAAsC;AAClF,UAAQ,OAAO,MAAM;AAAA,IACnB,KAAK,MAAM;AACT,YAAM,WAAW,MAAM;AACvB,YAAM,aAAa,MAAM;AACzB,aAAO,kBAAkB,QAAQ,QAAQ,UAAU;AAAA,IACrD;AAAA,IACA,KAAK,MAAM;AACT,+BAAyB,OAAO,OAAO,OAAO,IAAI;AAClD,YAAM,WAAW,MAAM;AACvB,YAAM,aAAa,MAAM;AACzB,aAAO,kBAAkB,QAAQ,iBAAiB,UAAU;AAAA,IAC9D;AAAA,IACA,KAAK,MAAM;AACT,+BAAyB,OAAO,OAAO,OAAO,IAAI;AAClD,YAAM,WAAW,MAAM;AACvB,YAAM,aAAa,MAAM;AACzB,aAAO,kBAAkB,QAAQ,iBAAiB,UAAU;AAAA,IAC9D;AAAA,IACA,KAAK;AACH,aAAO,IAAI,eAAe,OAAO,QAAQ,CAAC,GAAG,KAAK,CAAC,QAAQ;AAAA,QACzD,OAAO,QAAQ,CAAC;AAAA,QAChB;AAAA,MACF,CAAC;AAAA,IACH,KAAK;AACH,aAAO,IAAI,eAAe,OAAO,QAAQ,CAAC,GAAG,KAAK,CAAC,OAAO;AAAA,QACxD,OAAO,QAAQ,CAAC;AAAA,QAChB;AAAA,MACF,CAAC;AAAA,EACL;AACF;AAEA,SAAS,YAAY,QAAiC;AACpD,UAAQ,OAAO,MAAM;AAAA,IACnB,KAAK;AACH,aAAO,CAAC,OAAO,KAAK,uBAAuB,OAAO,KAAK,CAAC;AAAA,IAC1D,KAAK;AAAA,IACL,KAAK;AACH,+BAAyB,OAAO,OAAO,OAAO,IAAI;AAClD,aAAO,CAAC,OAAO,KAAK,OAAO,KAAK;AAAA,IAClC,KAAK;AAAA,IACL,KAAK;AACH,aAAO,CAAC,GAAG,YAAY,OAAO,QAAQ,CAAC,CAAC,GAAG,GAAG,YAAY,OAAO,QAAQ,CAAC,CAAC,CAAC;AAAA,EAChF;AACF;AAEA,SAAS,uBAAuB,OAA2C;AACzE,SAAO,UAAU,OAAO,OAAO,OAAO,KAAK;AAC7C;AAEA,SAAS,yBAAyB,OAA4B,UAA6B;AACzF,MAAI,OAAO,UAAU,UAAU;AAC7B,UAAM,IAAI,MAAM,YAAY,QAAQ,0CAA0C;AAAA,EAChF;AACF;","names":[]}
package/package.json ADDED
@@ -0,0 +1,37 @@
1
+ {
2
+ "name": "@anvia/pgvector",
3
+ "version": "0.1.0",
4
+ "description": "Postgres pgvector store adapter for Anvia.",
5
+ "author": "anvia",
6
+ "maintainer": "Indra Zulfi",
7
+ "license": "MIT",
8
+ "files": [
9
+ "dist"
10
+ ],
11
+ "type": "module",
12
+ "main": "./dist/index.js",
13
+ "types": "./dist/index.d.ts",
14
+ "exports": {
15
+ ".": {
16
+ "types": "./dist/index.d.ts",
17
+ "import": "./dist/index.js"
18
+ }
19
+ },
20
+ "dependencies": {
21
+ "pg": "^8.20.0",
22
+ "pgvector": "^0.2.1",
23
+ "@anvia/core": "0.1.0"
24
+ },
25
+ "devDependencies": {
26
+ "@types/node": "^24.9.1",
27
+ "@types/pg": "^8.15.6",
28
+ "tsup": "^8.5.0",
29
+ "typescript": "^5.9.3",
30
+ "vitest": "^4.0.8"
31
+ },
32
+ "scripts": {
33
+ "build": "tsup src/index.ts --format esm --dts --sourcemap --clean",
34
+ "test": "vitest run",
35
+ "typecheck": "tsc --noEmit"
36
+ }
37
+ }