@absolutejs/rag-postgres 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,622 +1,956 @@
1
1
  // @bun
2
- var __require = import.meta.require;
3
-
4
2
  // src/index.ts
3
+ import { createRAGCollection, ragPlugin } from "@absolutejs/rag/adapter-kit";
4
+
5
+ // src/createPostgresRAGStore.ts
5
6
  import {
6
- createRAGCollection,
7
7
  createRAGVector,
8
- normalizeVector
9
- } from "@absolutejs/rag";
10
- var ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME = "@absolutejs/rag-postgres";
11
- var POSTGRESQL_RAG_IMPLEMENTATIONS = ["pgvector"];
12
- var PGVECTOR_DISTANCE_METRICS = [
13
- "cosine",
14
- "l2",
15
- "inner_product"
16
- ];
17
- var PGVECTOR_INDEX_TYPES = ["none", "hnsw", "ivfflat"];
18
- var DEFAULT_SCHEMA_NAME = "absolute_rag";
19
- var DEFAULT_CHUNK_TABLE_NAME = "chunks";
20
- var DEFAULT_MIGRATION_TABLE_NAME = "migrations";
21
- var DEFAULT_DIMENSIONS = 1536;
22
- var IDENTIFIER_RE = /^[A-Za-z_][A-Za-z0-9_]*$/;
23
- var assertIdentifier = (value, label) => {
24
- if (typeof value !== "string" || !IDENTIFIER_RE.test(value)) {
25
- throw new Error(`${ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME}: invalid ${label} "${String(value)}"`);
26
- }
27
- };
28
- var quoteIdentifier = (value) => {
29
- assertIdentifier(value, "identifier");
30
- return `"${value}"`;
31
- };
32
- var qualifiedTable = (schemaName, tableName) => `${quoteIdentifier(schemaName)}.${quoteIdentifier(tableName)}`;
33
- var escapeLiteral = (value) => value.replace(/'/g, "''");
34
- var vectorLiteral = (vector) => {
35
- if (!Array.isArray(vector) || vector.length === 0) {
36
- throw new Error(`${ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME}: vector values must be a non-empty array`);
37
- }
38
- return `[${vector.map((value) => {
39
- if (typeof value !== "number" || !Number.isFinite(value)) {
40
- throw new Error(`${ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME}: vector values must be finite numbers`);
8
+ matchesMetadataFilterRecord,
9
+ normalizeVector,
10
+ planNativeCandidateSearchBackfillK,
11
+ planNativeCandidateSearchK,
12
+ RAG_NATIVE_QUERY_CANDIDATE_LIMIT,
13
+ RAG_VECTOR_DIMENSIONS_DEFAULT,
14
+ rankRAGLexicalMatches,
15
+ resolveAdaptiveNativeCandidateLimit,
16
+ summarizeSQLiteCandidateCoverage
17
+ } from "@absolutejs/rag/adapter-kit";
18
+ var DEFAULT_DIMENSIONS = RAG_VECTOR_DIMENSIONS_DEFAULT;
19
+ var DEFAULT_TABLE_NAME = "rag_chunks";
20
+ var DEFAULT_SCHEMA_NAME = "public";
21
+ var DEFAULT_QUERY_MULTIPLIER = 4;
22
+ var MAX_QUERY_MULTIPLIER = 16;
23
+ var DEFAULT_POSTGRES_INDEX_TYPE = "hnsw";
24
+ var DEFAULT_POSTGRES_IVFFLAT_LISTS = 100;
25
+ var DEFAULT_POSTGRES_HNSW_M = 16;
26
+ var DEFAULT_POSTGRES_HNSW_EF_CONSTRUCTION = 64;
27
+ var IDENTIFIER_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
28
+ var FILTER_PATH_SEGMENT_RE = /^[a-zA-Z0-9_]+$/;
29
+ var isObjectFilterRecord = (value) => Boolean(value) && typeof value === "object" && !Array.isArray(value);
30
+ var isNestedFilterArray = (value) => Array.isArray(value) && value.every((entry) => isObjectFilterRecord(entry));
31
+ var isOperatorFilterRecord = (value) => isObjectFilterRecord(value) && Object.keys(value).some((key) => key.startsWith("$"));
32
+ var countFilterClauses = (filter) => {
33
+ if (!filter) {
34
+ return 0;
35
+ }
36
+ let count = 0;
37
+ for (const [key, value] of Object.entries(filter)) {
38
+ if (key === "$and" || key === "$or") {
39
+ if (isNestedFilterArray(value)) {
40
+ count += value.reduce((total, entry) => total + countFilterClauses(entry), 0);
41
+ }
42
+ continue;
41
43
  }
42
- return String(value);
43
- }).join(",")}]`;
44
- };
45
- var makePlaceholder = (params, value, cast = "") => {
46
- params.push(value);
47
- const suffix = cast ? `::${cast}` : "";
48
- return `$${params.length}${suffix}`;
49
- };
50
- var normalizeMetric = (metric) => {
51
- if (metric === "l2" || metric === "inner_product") {
52
- return metric;
44
+ if (key === "$not") {
45
+ if (isObjectFilterRecord(value)) {
46
+ count += countFilterClauses(value);
47
+ }
48
+ continue;
49
+ }
50
+ count += 1;
53
51
  }
54
- return "cosine";
52
+ return count;
55
53
  };
56
- var normalizeIndex = (index) => {
57
- if (!index || index.type === undefined) {
58
- return { type: "none" };
54
+ var toPostgresJsonPath = (key) => {
55
+ const segments = key.split(".").filter(Boolean);
56
+ if (segments.length === 0 || !segments.every((segment) => FILTER_PATH_SEGMENT_RE.test(segment))) {
57
+ return null;
59
58
  }
60
- if (index.type === "hnsw" || index.type === "ivfflat" || index.type === "none") {
61
- return index;
59
+ return segments;
60
+ };
61
+ var toPostgresFilterBinding = (value) => {
62
+ if (typeof value === "string" || typeof value === "number" || typeof value === "boolean" || value === null) {
63
+ return value;
62
64
  }
63
- throw new Error(`${ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME}: unsupported pgvector index type "${String(index.type)}"`);
65
+ return;
64
66
  };
65
- var resolveSchemaConfig = (options) => {
66
- const schemaName = options.schema?.schemaName ?? DEFAULT_SCHEMA_NAME;
67
- const chunkTableName = options.schema?.chunkTableName ?? DEFAULT_CHUNK_TABLE_NAME;
68
- const migrationTableName = options.schema?.migrationTableName ?? DEFAULT_MIGRATION_TABLE_NAME;
69
- assertIdentifier(schemaName, "schema name");
70
- assertIdentifier(chunkTableName, "chunk table name");
71
- assertIdentifier(migrationTableName, "migration table name");
72
- return {
73
- schemaName,
74
- chunkTableName,
75
- migrationTableName
76
- };
67
+ var buildPostgresJsonbScalarEquality = (input) => {
68
+ const comparison = input.comparison ?? "=";
69
+ return comparison === "=" ? `jsonb_typeof(${input.valueSql}) = 'null'` : `coalesce(jsonb_typeof(${input.valueSql}), 'missing') <> 'null'`;
77
70
  };
78
- var resolveVectorConfig = (options) => {
79
- const vector = options?.vector;
80
- if (!vector || vector.provider !== "pgvector") {
81
- throw new Error(`${ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME}: PostgreSQL RAG currently requires vector.provider = "pgvector"`);
82
- }
83
- const dimensions = vector.dimensions ?? DEFAULT_DIMENSIONS;
84
- if (!Number.isInteger(dimensions) || dimensions <= 0) {
85
- throw new Error(`${ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME}: dimensions must be a positive integer`);
71
+ var buildPostgresMetadataScalarEquality = (input) => input.value === null ? buildPostgresJsonbScalarEquality({
72
+ comparison: input.comparison,
73
+ valueSql: input.valueSql
74
+ }) : `${input.actualSql} ${input.comparison ?? "="} ${input.bind(String(input.value))}`;
75
+ var buildPostgresFilterPlan = (filter, startIndex = 0) => {
76
+ if (!filter) {
77
+ return { clause: "", params: [] };
86
78
  }
87
- const distanceMetric = normalizeMetric(vector.distanceMetric);
88
- const index = normalizeIndex(vector.index);
89
- return {
90
- ...vector,
91
- dimensions,
92
- distanceMetric,
93
- extensionName: vector.extensionName ?? "vector",
94
- index
79
+ const params = [];
80
+ const bind = (value) => {
81
+ params.push(value);
82
+ return `$${params.length + startIndex}`;
95
83
  };
96
- };
97
- var operatorForMetric = (distanceMetric) => {
98
- switch (distanceMetric) {
99
- case "l2":
100
- return "<->";
101
- case "inner_product":
102
- return "<#>";
103
- case "cosine":
104
- default:
105
- return "<=>";
106
- }
107
- };
108
- var operatorClassForMetric = (distanceMetric) => {
109
- switch (distanceMetric) {
110
- case "l2":
111
- return "vector_l2_ops";
112
- case "inner_product":
113
- return "vector_ip_ops";
114
- case "cosine":
115
- default:
116
- return "vector_cosine_ops";
117
- }
118
- };
119
- var scoreFromDistance = (distance, distanceMetric) => {
120
- if (typeof distance !== "number" || !Number.isFinite(distance)) {
121
- return 0;
122
- }
123
- switch (distanceMetric) {
124
- case "inner_product":
125
- return -distance;
126
- case "l2":
127
- return 1 / (1 + Math.abs(distance));
128
- case "cosine":
129
- default:
130
- return 1 - distance;
131
- }
132
- };
133
- var createIndexSql = ({
134
- schemaName,
135
- chunkTableName,
136
- distanceMetric,
137
- index
138
- }) => {
139
- if (!index || index.type === "none") {
140
- return [];
141
- }
142
- const qualifiedChunkTable = qualifiedTable(schemaName, chunkTableName);
143
- const opClass = operatorClassForMetric(distanceMetric);
144
- const indexName = `${chunkTableName}_embedding_${index.type}_${distanceMetric}_idx`;
145
- const withParts = [];
146
- if (index.type === "hnsw") {
147
- if (Number.isInteger(index.m) && (index.m ?? 0) > 0) {
148
- withParts.push(`m = ${index.m}`);
149
- }
150
- if (Number.isInteger(index.efConstruction) && (index.efConstruction ?? 0) > 0) {
151
- withParts.push(`ef_construction = ${index.efConstruction}`);
84
+ const build = (entry) => {
85
+ const clauses = [];
86
+ for (const [key, value] of Object.entries(entry)) {
87
+ if (key === "$and" || key === "$or") {
88
+ if (!isNestedFilterArray(value) || value.length === 0) {
89
+ return null;
90
+ }
91
+ const nested = value.map((item) => build(item));
92
+ if (nested.some((item) => item === null)) {
93
+ return null;
94
+ }
95
+ clauses.push(`(${nested.filter((item) => Boolean(item)).join(key === "$and" ? " AND " : " OR ")})`);
96
+ continue;
97
+ }
98
+ if (key === "$not") {
99
+ if (!isObjectFilterRecord(value)) {
100
+ return null;
101
+ }
102
+ const nested = build(value);
103
+ if (!nested) {
104
+ return null;
105
+ }
106
+ clauses.push(`NOT (${nested})`);
107
+ continue;
108
+ }
109
+ const isScalarField = key === "chunkId" || key === "source" || key === "title";
110
+ const jsonPath = isScalarField ? null : toPostgresJsonPath(key);
111
+ if (!isScalarField && !jsonPath) {
112
+ return null;
113
+ }
114
+ let actualSql;
115
+ let metadataPathSegments = [];
116
+ let metadataValueSql;
117
+ if (isScalarField) {
118
+ actualSql = key === "chunkId" ? "chunk_id" : key;
119
+ } else {
120
+ metadataPathSegments = jsonPath ?? [];
121
+ actualSql = `jsonb_extract_path_text(metadata, ${metadataPathSegments.map((segment) => `'${segment}'`).join(", ")})`;
122
+ metadataValueSql = `metadata #> '{${metadataPathSegments.join(",")}}'`;
123
+ }
124
+ if (!isOperatorFilterRecord(value)) {
125
+ const binding = toPostgresFilterBinding(value);
126
+ if (binding === undefined) {
127
+ return null;
128
+ }
129
+ clauses.push(isScalarField ? `${actualSql} = ${bind(String(binding))}` : buildPostgresMetadataScalarEquality({
130
+ actualSql,
131
+ bind,
132
+ value: binding,
133
+ valueSql: metadataValueSql
134
+ }));
135
+ continue;
136
+ }
137
+ const operatorClauses = Object.entries(value).map(([operator, expected]) => {
138
+ switch (operator) {
139
+ case "$exists":
140
+ return isScalarField ? expected ? `${actualSql} IS NOT NULL` : `${actualSql} IS NULL` : expected ? `${metadataValueSql} IS NOT NULL` : `${metadataValueSql} IS NULL`;
141
+ case "$in": {
142
+ if (!Array.isArray(expected) || expected.length === 0) {
143
+ return null;
144
+ }
145
+ const bindings = expected.map((entry2) => toPostgresFilterBinding(entry2)).filter((entry2) => entry2 !== undefined);
146
+ if (bindings.length !== expected.length) {
147
+ return null;
148
+ }
149
+ return isScalarField ? `${actualSql} IN (${bindings.map((entry2) => bind(String(entry2))).join(", ")})` : `(${bindings.map((entry2) => buildPostgresMetadataScalarEquality({
150
+ actualSql,
151
+ bind,
152
+ value: entry2,
153
+ valueSql: metadataValueSql
154
+ })).join(" OR ")})`;
155
+ }
156
+ case "$ne": {
157
+ const binding = toPostgresFilterBinding(expected);
158
+ return binding === undefined ? null : isScalarField ? `${actualSql} <> ${bind(String(binding))}` : buildPostgresMetadataScalarEquality({
159
+ actualSql,
160
+ bind,
161
+ comparison: "<>",
162
+ value: binding,
163
+ valueSql: metadataValueSql
164
+ });
165
+ }
166
+ case "$gt":
167
+ case "$gte":
168
+ case "$lt":
169
+ case "$lte": {
170
+ if (typeof expected !== "number" || !Number.isFinite(expected)) {
171
+ return null;
172
+ }
173
+ const comparison = operator === "$gt" ? ">" : operator === "$gte" ? ">=" : operator === "$lt" ? "<" : "<=";
174
+ return `((${actualSql}) ~ '^-?[0-9]+(\\.[0-9]+)?$' AND (${actualSql})::double precision ${comparison} ${bind(expected)})`;
175
+ }
176
+ case "$contains":
177
+ if (isScalarField) {
178
+ return null;
179
+ }
180
+ if (toPostgresFilterBinding(expected) === undefined) {
181
+ return null;
182
+ }
183
+ return `(${metadataValueSql} IS NOT NULL AND ${metadataValueSql} ? ${bind(String(expected))})`;
184
+ case "$containsAny":
185
+ case "$containsAll": {
186
+ if (isScalarField || !Array.isArray(expected)) {
187
+ return null;
188
+ }
189
+ const values = expected.map((entry2) => toPostgresFilterBinding(entry2)).filter((entry2) => entry2 !== undefined);
190
+ if (values.length === 0 || values.length !== expected.length) {
191
+ return null;
192
+ }
193
+ const sqlArray = `ARRAY[${values.map((value2) => bind(String(value2))).join(", ")}]::text[]`;
194
+ return `(${metadataValueSql} IS NOT NULL AND ${metadataValueSql} ${operator === "$containsAny" ? "?|" : "?&"} ${sqlArray})`;
195
+ }
196
+ default:
197
+ return null;
198
+ }
199
+ });
200
+ if (operatorClauses.some((clause2) => clause2 === null)) {
201
+ return null;
202
+ }
203
+ clauses.push(operatorClauses.filter((clause2) => Boolean(clause2)).map((clause2) => `(${clause2})`).join(" AND "));
152
204
  }
153
- }
154
- if (index.type === "ivfflat" && Number.isInteger(index.lists) && (index.lists ?? 0) > 0) {
155
- withParts.push(`lists = ${index.lists}`);
156
- }
157
- const withClause = withParts.length > 0 ? ` WITH (${withParts.join(", ")})` : "";
158
- return [
159
- `CREATE INDEX IF NOT EXISTS ${quoteIdentifier(indexName)} ON ${qualifiedChunkTable} USING ${index.type} (embedding ${opClass})${withClause}`
160
- ];
205
+ return clauses.length > 0 ? clauses.map((clause2) => `(${clause2})`).join(" AND ") : "";
206
+ };
207
+ const clause = build(filter);
208
+ return clause === null || clause.trim().length === 0 ? null : { clause, params };
161
209
  };
162
- var createQuerySessionSql = ({
163
- index
164
- }) => {
165
- if (!index || index.type === "none") {
166
- return [];
210
+ var buildPostgresPushdownFilter = (filter) => {
211
+ if (!filter) {
212
+ return;
167
213
  }
168
- const sql = [];
169
- if (index.type === "hnsw") {
170
- if (Number.isInteger(index.efSearch) && (index.efSearch ?? 0) > 0) {
171
- sql.push(`SET LOCAL hnsw.ef_search = ${index.efSearch}`);
214
+ const hasPushdownFilterPlan = (entry) => {
215
+ const plan = buildPostgresFilterPlan(entry);
216
+ return plan !== null && Boolean(plan.clause) && plan.clause.trim().length > 0;
217
+ };
218
+ const hasPushdownFilterPlanEntry = (entry) => {
219
+ if (!isObjectFilterRecord(entry)) {
220
+ return false;
172
221
  }
173
- if (index.iterativeScan && index.iterativeScan !== "off") {
174
- sql.push(`SET LOCAL hnsw.iterative_scan = '${escapeLiteral(index.iterativeScan)}'`);
222
+ return hasPushdownFilterPlan(entry);
223
+ };
224
+ const nextEntries = [];
225
+ for (const [key, value] of Object.entries(filter)) {
226
+ if (key === "$and" || key === "$or") {
227
+ if (!isNestedFilterArray(value)) {
228
+ continue;
229
+ }
230
+ const nested = value.map((entry) => buildPostgresPushdownFilter(entry)).filter((entry) => hasPushdownFilterPlanEntry(entry));
231
+ if (nested.length > 0) {
232
+ nextEntries.push([key, nested]);
233
+ }
234
+ continue;
175
235
  }
176
- }
177
- if (index.type === "ivfflat") {
178
- if (Number.isInteger(index.probes) && (index.probes ?? 0) > 0) {
179
- sql.push(`SET LOCAL ivfflat.probes = ${index.probes}`);
236
+ if (key === "$not") {
237
+ if (!isObjectFilterRecord(value)) {
238
+ continue;
239
+ }
240
+ const nested = buildPostgresPushdownFilter(value);
241
+ if (hasPushdownFilterPlanEntry(nested)) {
242
+ nextEntries.push([key, nested]);
243
+ }
244
+ continue;
180
245
  }
181
- if (Number.isInteger(index.maxProbes) && (index.maxProbes ?? 0) > 0) {
182
- sql.push(`SET LOCAL ivfflat.max_probes = ${index.maxProbes}`);
246
+ if (Array.isArray(value) || isOperatorFilterRecord(value) && Object.keys(value).some((operator) => !(operator === "$exists" || operator === "$in" || operator === "$contains" || operator === "$containsAny" || operator === "$containsAll" || operator === "$ne" || operator === "$gt" || operator === "$gte" || operator === "$lt" || operator === "$lte"))) {
247
+ continue;
183
248
  }
184
- if (index.iterativeScan && index.iterativeScan !== "off") {
185
- sql.push(`SET LOCAL ivfflat.iterative_scan = '${escapeLiteral(index.iterativeScan)}'`);
249
+ const isScalarColumnKey = ["chunkId", "source", "title"].includes(key);
250
+ const jsonPath = isScalarColumnKey ? null : toPostgresJsonPath(key);
251
+ if (!isScalarColumnKey && !jsonPath) {
252
+ continue;
186
253
  }
254
+ if (!hasPushdownFilterPlan({ [key]: value })) {
255
+ continue;
256
+ }
257
+ nextEntries.push([key, value]);
187
258
  }
188
- return sql;
189
- };
190
- var stageOrder = [
191
- "extension",
192
- "schema",
193
- "table",
194
- "index"
195
- ];
196
- var buildMigrationName = (stage, stageIndex, sql) => {
197
- const normalized = sql.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_+|_+$/g, "").slice(0, 48) || "statement";
198
- const globalOrder = String(stageOrder.indexOf(stage) + 1).padStart(2, "0");
199
- const localOrder = String(stageIndex + 1).padStart(3, "0");
200
- return `${globalOrder}_${stage}_${localOrder}_${normalized}`;
201
- };
202
- var createMigrationTableSql = (schemaName, migrationTableName) => `CREATE TABLE IF NOT EXISTS ${qualifiedTable(schemaName, migrationTableName)} (name TEXT PRIMARY KEY, applied_at TIMESTAMPTZ NOT NULL DEFAULT NOW())`;
203
- var filterTrackedTableSql = (tableSql, schemaName, migrationTableName) => {
204
- const migrationTableTarget = qualifiedTable(schemaName, migrationTableName);
205
- return tableSql.filter((sql) => !sql.includes(migrationTableTarget));
206
- };
207
- var createPostgresSchemaPlan = (options) => {
208
- const schema = resolveSchemaConfig(options ?? {});
209
- const vector = resolveVectorConfig(options ?? {});
210
- const qualifiedChunkTable = qualifiedTable(schema.schemaName, schema.chunkTableName);
211
- const qualifiedMigrationTable = qualifiedTable(schema.schemaName, schema.migrationTableName);
212
- const extensionSql = vector.autoCreateExtension === false ? [] : [
213
- `CREATE EXTENSION IF NOT EXISTS ${quoteIdentifier(vector.extensionName)}`
214
- ];
215
- const schemaSql = vector.autoCreateSchema === false ? [] : [
216
- `CREATE SCHEMA IF NOT EXISTS ${quoteIdentifier(schema.schemaName)}`
217
- ];
218
- const tableSql = vector.autoCreateTables === false ? [] : [
219
- `CREATE TABLE IF NOT EXISTS ${qualifiedChunkTable} (id BIGSERIAL PRIMARY KEY, chunk_id TEXT NOT NULL UNIQUE, text TEXT NOT NULL, title TEXT, source TEXT, metadata JSONB NOT NULL DEFAULT '{}'::jsonb, embedding VECTOR(${vector.dimensions}) NOT NULL, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW())`,
220
- `CREATE INDEX IF NOT EXISTS ${quoteIdentifier(`${schema.chunkTableName}_chunk_id_idx`)} ON ${qualifiedChunkTable} (chunk_id)`,
221
- `CREATE INDEX IF NOT EXISTS ${quoteIdentifier(`${schema.chunkTableName}_source_idx`)} ON ${qualifiedChunkTable} (source)`,
222
- `CREATE INDEX IF NOT EXISTS ${quoteIdentifier(`${schema.chunkTableName}_metadata_idx`)} ON ${qualifiedChunkTable} USING GIN (metadata)`,
223
- createMigrationTableSql(schema.schemaName, schema.migrationTableName)
224
- ];
225
- const indexSql = vector.autoCreateIndex === false ? [] : createIndexSql({
226
- schemaName: schema.schemaName,
227
- chunkTableName: schema.chunkTableName,
228
- distanceMetric: vector.distanceMetric,
229
- index: vector.index
230
- });
231
- return {
232
- implementation: "pgvector",
233
- extensionSql,
234
- schemaSql,
235
- tableSql,
236
- indexSql,
237
- querySessionSql: createQuerySessionSql({ index: vector.index }),
238
- migrationTableQualifiedName: qualifiedMigrationTable
239
- };
259
+ return nextEntries.length > 0 ? Object.fromEntries(nextEntries) : undefined;
240
260
  };
241
- var createPostgresMigrationPlan = (options) => {
242
- const schema = resolveSchemaConfig(options ?? {});
243
- const schemaPlan = createPostgresSchemaPlan(options ?? {});
244
- const bootstrapSql = [];
245
- if (schemaPlan.schemaSql.length > 0) {
246
- bootstrapSql.push(...schemaPlan.schemaSql);
247
- }
248
- const migrationTableSql = createMigrationTableSql(schema.schemaName, schema.migrationTableName);
249
- if (!bootstrapSql.includes(migrationTableSql)) {
250
- bootstrapSql.push(migrationTableSql);
251
- }
252
- const migrations = [
253
- ...schemaPlan.extensionSql.map((sql, index) => ({
254
- stage: "extension",
255
- sql,
256
- stageIndex: index
257
- })),
258
- ...filterTrackedTableSql(schemaPlan.tableSql, schema.schemaName, schema.migrationTableName).map((sql, index) => ({
259
- stage: "table",
260
- sql,
261
- stageIndex: index
262
- })),
263
- ...schemaPlan.indexSql.map((sql, index) => ({
264
- stage: "index",
265
- sql,
266
- stageIndex: index
267
- }))
268
- ].map((entry) => ({
269
- name: buildMigrationName(entry.stage, entry.stageIndex, entry.sql),
270
- stage: entry.stage,
271
- sql: entry.sql
272
- }));
261
+ var resolvePostgresPushdownMode = (input) => {
262
+ const totalFilterClauseCount = countFilterClauses(input.filter);
263
+ const pushdownClauseCount = countFilterClauses(input.pushdownFilter);
264
+ const jsRemainderClauseCount = Math.max(0, totalFilterClauseCount - pushdownClauseCount);
265
+ const pushdownMode = pushdownClauseCount === 0 ? "none" : pushdownClauseCount >= totalFilterClauseCount ? "full" : "partial";
273
266
  return {
274
- implementation: schemaPlan.implementation,
275
- schemaName: schema.schemaName,
276
- migrationTableName: schema.migrationTableName,
277
- migrationTableQualifiedName: qualifiedTable(schema.schemaName, schema.migrationTableName),
278
- bootstrapSql,
279
- migrations,
280
- schemaPlan
267
+ jsRemainderClauseCount,
268
+ jsRemainderRatio: totalFilterClauseCount > 0 ? jsRemainderClauseCount / totalFilterClauseCount : undefined,
269
+ pushdownClauseCount,
270
+ pushdownCoverageRatio: totalFilterClauseCount > 0 ? pushdownClauseCount / totalFilterClauseCount : undefined,
271
+ pushdownMode,
272
+ totalFilterClauseCount
281
273
  };
282
274
  };
283
- var createWrappedPostgresClient = (sql, rootSql = sql) => ({
284
- query: async (queryText, params = []) => {
285
- const rows = await sql.unsafe(queryText, params);
286
- return {
287
- rows,
288
- rowCount: typeof rows.count === "number" ? rows.count : rows.length
289
- };
290
- },
291
- transaction: async (run) => rootSql.begin(async (transactionSql) => run(createWrappedPostgresClient(transactionSql, transactionSql))),
292
- close: async () => {
293
- if (typeof rootSql.end === "function") {
294
- await rootSql.end({ timeout: 5 });
295
- }
275
+ var assertSupportedIdentifier = (name) => {
276
+ if (!IDENTIFIER_RE.test(name)) {
277
+ throw new Error(`Invalid identifier "${name}". Only alphanumeric and underscore names are allowed.`);
296
278
  }
297
- });
298
- var createDefaultPostgresClientFactory = (options) => {
299
- const connectionString = typeof options.connectionString === "string" ? options.connectionString.trim() : "";
300
- if (connectionString.length === 0) {
301
- return;
302
- }
303
- let clientPromise;
304
- return async () => {
305
- if (!clientPromise) {
306
- clientPromise = (async () => {
307
- const postgresModule = await import("postgres");
308
- const postgres = postgresModule.default;
309
- const sql = postgres(connectionString, {
310
- onnotice: () => {},
311
- ...options.driver ?? {}
312
- });
313
- return createWrappedPostgresClient(sql, sql);
314
- })();
315
- }
316
- return clientPromise;
317
- };
318
279
  };
319
- var resolveClientFactory = (options) => {
320
- if (typeof options.clientFactory === "function") {
321
- const { clientFactory } = options;
322
- return async () => clientFactory();
280
+ var normalizePostgresIndexType = (value) => {
281
+ if (value === undefined) {
282
+ return DEFAULT_POSTGRES_INDEX_TYPE;
323
283
  }
324
- if (options.client) {
325
- const { client } = options;
326
- return async () => client;
284
+ if (value === "none" || value === "hnsw" || value === "ivfflat") {
285
+ return value;
327
286
  }
328
- const defaultFactory = createDefaultPostgresClientFactory(options);
329
- if (defaultFactory) {
330
- return defaultFactory;
287
+ throw new Error(`Invalid postgres index type "${String(value)}". Expected "none", "hnsw", or "ivfflat".`);
288
+ };
289
+ var normalizePositiveInteger = (value, fallback) => {
290
+ if (value === undefined || !Number.isFinite(value)) {
291
+ return fallback;
331
292
  }
332
- return async () => {
333
- throw new Error(`${ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME}: createPostgresRAG requires connectionString, client, or clientFactory.`);
334
- };
293
+ return Math.max(1, Math.floor(value));
335
294
  };
336
- var buildMetadataFilter = (filter) => {
337
- if (!filter) {
295
+ var getPostgresIndexOperatorClass = (metric) => metric === "cosine" ? "vector_cosine_ops" : metric === "inner_product" ? "vector_ip_ops" : "vector_l2_ops";
296
+ var getPostgresIndexName = (qualifiedTableName, indexType) => indexType === "none" ? undefined : `${qualifiedTableName.replace(".", "_")}_embedding_${indexType}_idx`;
297
+ var buildPostgresIndexSql = (input) => {
298
+ if (input.indexType === "none") {
338
299
  return;
339
300
  }
340
- const metadataEntries = Object.entries(filter).filter(([key]) => key !== "chunkId" && key !== "title" && key !== "source");
341
- if (metadataEntries.length === 0) {
342
- return;
301
+ const opclass = getPostgresIndexOperatorClass(input.distanceMetric);
302
+ const indexName = getPostgresIndexName(input.qualifiedTableName, input.indexType);
303
+ const optionsSql = input.indexType === "hnsw" ? ` with (m = ${input.hnswM}, ef_construction = ${input.hnswEfConstruction})` : ` with (lists = ${input.indexLists})`;
304
+ const createPrefix = input.ifNotExists === false ? "create index" : "create index if not exists";
305
+ return `${createPrefix} ${indexName} on ${input.qualifiedTableName} using ${input.indexType} (embedding ${opclass})${optionsSql}`;
306
+ };
307
+ var normalizeQueryMultiplier = (value) => {
308
+ if (value === undefined || !Number.isFinite(value)) {
309
+ return DEFAULT_QUERY_MULTIPLIER;
343
310
  }
344
- return Object.fromEntries(metadataEntries);
311
+ return Math.min(MAX_QUERY_MULTIPLIER, Math.max(1, Math.floor(value)));
345
312
  };
346
- var parseMetadataValue = (value) => {
347
- if (value === null || value === undefined) {
313
+ var normalizeMaxBackfills = (value) => {
314
+ if (value === undefined || !Number.isFinite(value)) {
348
315
  return;
349
316
  }
317
+ return Math.max(0, Math.floor(value));
318
+ };
319
+ var normalizeMinResults = (value, topK) => {
320
+ if (value === undefined || !Number.isFinite(value)) {
321
+ return topK;
322
+ }
323
+ return Math.min(topK, Math.max(1, Math.floor(value)));
324
+ };
325
+ var resolveFillTarget = (input) => {
326
+ const fillPolicy = input.fillPolicy ?? "satisfy_min_results";
327
+ return {
328
+ fillPolicy,
329
+ targetResults: fillPolicy === "strict_topk" ? input.topK : input.minResults
330
+ };
331
+ };
332
+ var toQualifiedTableName = (schemaName, tableName) => `${schemaName}.${tableName}`;
333
+ var toVectorLiteral = (vector) => `[${vector.join(",")}]`;
334
+ var parseMetadata = (value) => {
350
335
  if (typeof value === "string") {
351
336
  try {
352
337
  const parsed = JSON.parse(value);
353
- if (parsed && typeof parsed === "object") {
338
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
354
339
  return parsed;
355
340
  }
356
341
  } catch {
357
342
  return;
358
343
  }
359
344
  }
360
- if (typeof value === "object") {
345
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
346
+ return;
347
+ }
348
+ return value;
349
+ };
350
+ var parseVectorText = (value) => {
351
+ if (!value) {
352
+ return [];
353
+ }
354
+ const normalized = value.trim();
355
+ const wrapped = normalized.startsWith("[") ? normalized : `[${normalized.replace(/[()]/g, "")}]`;
356
+ try {
357
+ const parsed = JSON.parse(wrapped);
358
+ return Array.isArray(parsed) ? parsed.filter((entry) => typeof entry === "number" && Number.isFinite(entry)) : [];
359
+ } catch {
360
+ return [];
361
+ }
362
+ };
363
+ var parseCountValue = (value) => {
364
+ if (typeof value === "number" && Number.isFinite(value)) {
361
365
  return value;
362
366
  }
363
- return;
367
+ if (typeof value === "bigint") {
368
+ return Number(value);
369
+ }
370
+ if (typeof value === "string") {
371
+ const parsed = Number(value);
372
+ return Number.isFinite(parsed) ? parsed : 0;
373
+ }
374
+ return 0;
364
375
  };
365
- var createPgvectorStoreStatus = ({
366
- vector,
367
- schema,
368
- diagnostics,
369
- initialized
370
- }) => ({
371
- backend: "postgres",
372
- vectorMode: "native_pgvector",
373
- dimensions: vector.dimensions,
374
- native: {
375
- requested: true,
376
- available: initialized && !diagnostics.lastInitError,
377
- active: initialized && !diagnostics.lastInitError,
378
- mode: "pgvector",
379
- extensionName: vector.extensionName,
380
- schemaName: schema.schemaName,
381
- tableName: schema.chunkTableName,
382
- distanceMetric: vector.distanceMetric,
383
- indexType: vector.index.type,
384
- fallbackReason: diagnostics.fallbackReason,
385
- lastInitError: diagnostics.lastInitError,
386
- lastQueryError: diagnostics.lastQueryError,
387
- lastUpsertError: diagnostics.lastUpsertError,
388
- lastMigrationError: diagnostics.lastMigrationError
376
+ var parseBooleanValue = (value) => {
377
+ if (typeof value === "boolean") {
378
+ return value;
389
379
  }
390
- });
391
- var getAppliedMigrationNames = async (client, migrationPlan) => {
392
- const result = await client.query(`SELECT name FROM ${migrationPlan.migrationTableQualifiedName} ORDER BY name ASC`);
393
- return new Set(result.rows.map((row) => String(row.name)));
394
- };
395
- var insertAppliedMigration = async (client, migrationPlan, name) => {
396
- await client.query(`INSERT INTO ${migrationPlan.migrationTableQualifiedName} (name) VALUES ($1) ON CONFLICT (name) DO NOTHING`, [name]);
397
- };
398
- var executeMigrationSequence = async (client, migrationPlan, migrations) => {
399
- const appliedNames = [];
400
- for (const migration of migrations) {
401
- await client.query(migration.sql);
402
- await insertAppliedMigration(client, migrationPlan, migration.name);
403
- appliedNames.push(migration.name);
404
- }
405
- return appliedNames;
406
- };
407
- var applyPostgresMigrations = async (options, applyOptions = {}) => {
408
- const migrationPlan = createPostgresMigrationPlan(options ?? {});
409
- const injectedClient = applyOptions.client;
410
- const getClient = injectedClient ? async () => injectedClient : resolveClientFactory(options ?? {});
411
- const client = await getClient();
412
- for (const sql of migrationPlan.bootstrapSql) {
413
- await client.query(sql);
414
- }
415
- const alreadyApplied = await getAppliedMigrationNames(client, migrationPlan);
416
- const pendingMigrations = migrationPlan.migrations.filter((migration) => !alreadyApplied.has(migration.name));
417
- const skippedNames = migrationPlan.migrations.filter((migration) => alreadyApplied.has(migration.name)).map((migration) => migration.name);
418
- if (applyOptions.dryRun === true) {
419
- return {
420
- migrationPlan,
421
- appliedNames: [],
422
- skippedNames,
423
- pendingNames: pendingMigrations.map((migration) => migration.name),
424
- appliedCount: 0,
425
- pendingCount: pendingMigrations.length,
426
- dryRun: true
427
- };
380
+ if (typeof value === "number") {
381
+ return value !== 0;
428
382
  }
429
- const run = async (activeClient) => {
430
- const names = await executeMigrationSequence(activeClient, migrationPlan, pendingMigrations);
431
- return {
432
- migrationPlan,
433
- appliedNames: names,
434
- skippedNames,
435
- pendingNames: pendingMigrations.map((migration) => migration.name),
436
- appliedCount: names.length,
437
- pendingCount: pendingMigrations.length,
438
- dryRun: false
439
- };
440
- };
441
- if (typeof client.transaction === "function" && pendingMigrations.length > 0) {
442
- return client.transaction(async (transactionClient) => run(transactionClient));
443
- }
444
- return run(client);
445
- };
446
- var applyPostgresSchemaPlan = applyPostgresMigrations;
447
- var createPgvectorStore = (options) => {
448
- const vector = resolveVectorConfig(options ?? {});
449
- const schema = resolveSchemaConfig(options ?? {});
450
- const plan = createPostgresSchemaPlan(options ?? {});
451
- const getClient = resolveClientFactory(options ?? {});
452
- const diagnostics = {
453
- fallbackReason: undefined,
454
- lastInitError: undefined,
455
- lastQueryError: undefined,
456
- lastUpsertError: undefined,
457
- lastMigrationError: undefined
458
- };
459
- let initialized = false;
460
- let initPromise;
461
- const ensureInitialized = async () => {
462
- if (initialized) {
463
- return;
383
+ if (typeof value === "bigint") {
384
+ return value !== 0n;
385
+ }
386
+ if (typeof value === "string") {
387
+ const normalized = value.trim().toLowerCase();
388
+ return normalized === "true" || normalized === "t" || normalized === "1";
389
+ }
390
+ return false;
391
+ };
392
+ var refreshPostgresRuntimeDiagnostics = async (db, nativeDiagnostics, input) => {
393
+ try {
394
+ const rows = await db.unsafe(`select
395
+ c.reltuples::bigint as estimated_row_count,
396
+ pg_relation_size($1::regclass) as table_bytes,
397
+ pg_indexes_size($1::regclass) as index_bytes,
398
+ pg_total_relation_size($1::regclass) as total_bytes,
399
+ exists(
400
+ select 1
401
+ from pg_indexes
402
+ where schemaname = $2
403
+ and tablename = $3
404
+ and indexname = $4
405
+ ) as index_present
406
+ from pg_class c
407
+ join pg_namespace n on n.oid = c.relnamespace
408
+ where n.nspname = $2
409
+ and c.relname = $3
410
+ limit 1`, [
411
+ input.qualifiedTableName,
412
+ input.schemaName,
413
+ input.tableName,
414
+ input.indexName ?? ""
415
+ ]);
416
+ const row = rows[0];
417
+ nativeDiagnostics.indexName = input.indexName;
418
+ nativeDiagnostics.indexPresent = input.indexName ? parseBooleanValue(row?.index_present) : undefined;
419
+ nativeDiagnostics.estimatedRowCount = parseCountValue(row?.estimated_row_count);
420
+ nativeDiagnostics.tableBytes = parseCountValue(row?.table_bytes);
421
+ nativeDiagnostics.indexBytes = parseCountValue(row?.index_bytes);
422
+ nativeDiagnostics.totalBytes = parseCountValue(row?.total_bytes);
423
+ nativeDiagnostics.lastHealthCheckAt = Date.now();
424
+ nativeDiagnostics.lastHealthError = undefined;
425
+ } catch (error) {
426
+ nativeDiagnostics.lastHealthCheckAt = Date.now();
427
+ nativeDiagnostics.lastHealthError = error instanceof Error ? error.message : String(error);
428
+ }
429
+ };
430
+ var analyzePostgresTable = async (db, nativeDiagnostics, input) => {
431
+ try {
432
+ await db.unsafe(`analyze ${input.qualifiedTableName}`);
433
+ nativeDiagnostics.lastAnalyzeAt = Date.now();
434
+ nativeDiagnostics.lastAnalyzeError = undefined;
435
+ await refreshPostgresRuntimeDiagnostics(db, nativeDiagnostics, input);
436
+ } catch (error) {
437
+ nativeDiagnostics.lastAnalyzeAt = Date.now();
438
+ nativeDiagnostics.lastAnalyzeError = error instanceof Error ? error.message : String(error);
439
+ throw error;
440
+ }
441
+ };
442
+ var rebuildPostgresNativeIndex = async (db, nativeDiagnostics, input) => {
443
+ if (!input.indexName || input.indexType === "none") {
444
+ throw new Error("Postgres native index rebuild is not configured");
445
+ }
446
+ try {
447
+ await db.unsafe(`drop index if exists ${input.indexName}`);
448
+ await db.unsafe(buildPostgresIndexSql({
449
+ distanceMetric: input.distanceMetric,
450
+ hnswEfConstruction: input.hnswEfConstruction,
451
+ hnswM: input.hnswM,
452
+ ifNotExists: false,
453
+ indexLists: input.indexLists,
454
+ indexType: input.indexType,
455
+ qualifiedTableName: input.qualifiedTableName
456
+ }));
457
+ nativeDiagnostics.lastReindexAt = Date.now();
458
+ nativeDiagnostics.lastReindexError = undefined;
459
+ await analyzePostgresTable(db, nativeDiagnostics, input);
460
+ } catch (error) {
461
+ nativeDiagnostics.lastReindexAt = Date.now();
462
+ nativeDiagnostics.lastReindexError = error instanceof Error ? error.message : String(error);
463
+ throw error;
464
+ }
465
+ };
466
+ var getPostgresChunkIdsByChunkIds = async (db, qualifiedTableName, chunkIds) => {
467
+ const normalized = [...new Set(chunkIds)].filter((chunkId) => chunkId.length > 0);
468
+ if (normalized.length === 0) {
469
+ return [];
470
+ }
471
+ const placeholders = normalized.map((_, index) => `$${index + 1}`).join(", ");
472
+ const rows = await db.unsafe(`select chunk_id from ${qualifiedTableName} where chunk_id in (${placeholders})`, normalized);
473
+ return rows.map((row) => row.chunk_id).filter((chunkId) => typeof chunkId === "string");
474
+ };
475
+ var getPostgresCandidateChunkIdsByFilter = async (db, qualifiedTableName, filter) => {
476
+ if (!filter || Object.keys(filter).length === 0) {
477
+ return [];
478
+ }
479
+ const pushdownFilter = buildPostgresPushdownFilter(filter);
480
+ const filterPlan = buildPostgresFilterPlan(pushdownFilter);
481
+ const rowsSql = filterPlan?.clause ? `select chunk_id, text, title, source, metadata from ${qualifiedTableName} where ${filterPlan.clause}` : `select chunk_id, text, title, source, metadata from ${qualifiedTableName}`;
482
+ const rows = await db.unsafe(rowsSql, filterPlan?.clause ? filterPlan.params ?? [] : []);
483
+ const chunks = rows.map((row) => mapRowToChunk(row)).filter((chunk) => matchesFilter(chunk, filter));
484
+ return chunks.map((chunk) => chunk.chunkId);
485
+ };
486
+ var getPostgresCandidateChunkIds = async (db, qualifiedTableName, input) => {
487
+ const chunkIdSet = new Set;
488
+ if (input.filter && Object.keys(input.filter).length > 0) {
489
+ for (const chunkId of await getPostgresCandidateChunkIdsByFilter(db, qualifiedTableName, input.filter)) {
490
+ chunkIdSet.add(chunkId);
464
491
  }
465
- if (!initPromise) {
466
- initPromise = (async () => {
467
- try {
468
- const client = await getClient();
469
- await applyPostgresMigrations(options ?? {}, { client });
470
- initialized = true;
471
- diagnostics.lastInitError = undefined;
472
- diagnostics.lastMigrationError = undefined;
473
- diagnostics.fallbackReason = undefined;
474
- } catch (error) {
475
- initialized = false;
476
- const message = error instanceof Error ? error.message : String(error);
477
- diagnostics.lastInitError = message;
478
- diagnostics.lastMigrationError = message;
479
- diagnostics.fallbackReason = message;
480
- throw error;
481
- }
482
- })();
492
+ }
493
+ if (input.chunkIds && input.chunkIds.length > 0) {
494
+ for (const chunkId of await getPostgresChunkIdsByChunkIds(db, qualifiedTableName, input.chunkIds)) {
495
+ chunkIdSet.add(chunkId);
483
496
  }
484
- return initPromise;
497
+ }
498
+ return [...chunkIdSet];
499
+ };
500
+ var normalizeDistance = (distance, metric) => {
501
+ if (!Number.isFinite(distance)) {
502
+ return 0;
503
+ }
504
+ if (metric === "cosine") {
505
+ return Math.min(1, Math.max(0, 1 - distance));
506
+ }
507
+ if (metric === "inner_product") {
508
+ return Math.max(0, -distance);
509
+ }
510
+ return Math.max(0, 1 / (1 + Math.abs(distance)));
511
+ };
512
+ var getDistanceOperator = (metric) => metric === "cosine" ? "<=>" : metric === "inner_product" ? "<#>" : "<->";
513
+ var createPostgresStatus = (dimensions, nativeDiagnostics) => ({
514
+ backend: "postgres",
515
+ dimensions,
516
+ native: nativeDiagnostics,
517
+ vectorMode: "native_pgvector"
518
+ });
519
+ var createPostgresCapabilities = () => ({
520
+ backend: "postgres",
521
+ nativeVectorSearch: true,
522
+ persistence: "external",
523
+ serverSideFiltering: true,
524
+ streamingIngestStatus: false
525
+ });
526
+ var updatePostgresLastQueryPlan = (input) => {
527
+ const pushdown = resolvePostgresPushdownMode({
528
+ filter: input.filter,
529
+ pushdownFilter: input.pushdownFilter
530
+ });
531
+ input.nativeDiagnostics.lastQueryPlan = {
532
+ backfillCount: input.backfillCount,
533
+ candidateBudgetExhausted: input.candidateBudgetExhausted,
534
+ candidateCoverage: summarizeSQLiteCandidateCoverage({
535
+ filteredCandidateCount: input.filteredCandidateCount,
536
+ returnedCount: input.returnedCount,
537
+ topK: input.topK
538
+ }),
539
+ filteredCandidateCount: input.filteredCandidateCount,
540
+ finalSearchK: input.finalSearchK,
541
+ initialSearchK: input.initialSearchK,
542
+ searchExpansionRatio: typeof input.initialSearchK === "number" && typeof input.finalSearchK === "number" && input.initialSearchK > 0 ? input.finalSearchK / input.initialSearchK : undefined,
543
+ candidateLimitUsed: input.candidateLimitUsed,
544
+ maxBackfillsUsed: input.maxBackfillsUsed,
545
+ minResultsUsed: input.minResultsUsed,
546
+ fillPolicyUsed: input.fillPolicyUsed,
547
+ plannerProfileUsed: input.plannerProfileUsed,
548
+ jsRemainderClauseCount: pushdown.jsRemainderClauseCount,
549
+ queryMultiplierUsed: input.queryMultiplierUsed,
550
+ jsRemainderRatio: pushdown.jsRemainderRatio,
551
+ pushdownApplied: pushdown.pushdownClauseCount > 0,
552
+ pushdownClauseCount: pushdown.pushdownClauseCount,
553
+ pushdownCoverageRatio: pushdown.pushdownCoverageRatio,
554
+ pushdownMode: pushdown.pushdownMode,
555
+ queryMode: "native_pgvector",
556
+ candidateYieldRatio: typeof input.returnedCount === "number" && typeof input.finalSearchK === "number" && input.finalSearchK > 0 ? input.returnedCount / input.finalSearchK : undefined,
557
+ returnedCount: input.returnedCount,
558
+ backfillLimitReached: input.backfillLimitReached,
559
+ minResultsSatisfied: input.minResultsSatisfied,
560
+ topKFillRatio: typeof input.returnedCount === "number" && input.topK > 0 ? input.returnedCount / input.topK : undefined,
561
+ totalFilterClauseCount: pushdown.totalFilterClauseCount,
562
+ underfilledTopK: input.underfilledTopK
563
+ };
564
+ };
565
+ var matchesFilter = (record, filter) => matchesMetadataFilterRecord({
566
+ chunkId: record.chunkId,
567
+ metadata: record.metadata,
568
+ source: record.source,
569
+ title: record.title,
570
+ ...record.metadata ?? {}
571
+ }, filter);
572
+ var mapRowToChunk = (row) => ({
573
+ chunkId: row.chunk_id,
574
+ metadata: parseMetadata(row.metadata),
575
+ source: row.source ?? undefined,
576
+ text: row.text,
577
+ title: row.title ?? undefined,
578
+ vector: parseVectorText(row.embedding)
579
+ });
580
+ var ensurePostgresSchema = async (db, input) => {
581
+ await db.unsafe("create extension if not exists vector");
582
+ const [schemaName] = input.qualifiedTableName.split(".");
583
+ if (schemaName) {
584
+ await db.unsafe(`create schema if not exists ${schemaName}`);
585
+ }
586
+ await db.unsafe(`
587
+ create table if not exists ${input.qualifiedTableName} (
588
+ chunk_id text primary key,
589
+ text text not null,
590
+ title text,
591
+ source text,
592
+ metadata jsonb,
593
+ embedding vector(${input.dimensions}) not null
594
+ )
595
+ `);
596
+ const indexSql = buildPostgresIndexSql(input);
597
+ if (indexSql) {
598
+ await db.unsafe(indexSql);
599
+ }
600
+ };
601
+ var createPostgresRAGStore = (options = {}) => {
602
+ const dimensions = options.dimensions ?? DEFAULT_DIMENSIONS;
603
+ const distanceMetric = options.distanceMetric ?? "cosine";
604
+ const queryMultiplier = normalizeQueryMultiplier(options.queryMultiplier);
605
+ const indexType = normalizePostgresIndexType(options.indexType);
606
+ const indexLists = normalizePositiveInteger(options.indexLists, DEFAULT_POSTGRES_IVFFLAT_LISTS);
607
+ const hnswM = normalizePositiveInteger(options.hnswM, DEFAULT_POSTGRES_HNSW_M);
608
+ const hnswEfConstruction = normalizePositiveInteger(options.hnswEfConstruction, DEFAULT_POSTGRES_HNSW_EF_CONSTRUCTION);
609
+ const tableName = options.tableName ?? DEFAULT_TABLE_NAME;
610
+ const schemaName = options.schemaName ?? DEFAULT_SCHEMA_NAME;
611
+ assertSupportedIdentifier(tableName);
612
+ assertSupportedIdentifier(schemaName);
613
+ const qualifiedTableName = toQualifiedTableName(schemaName, tableName);
614
+ const indexName = getPostgresIndexName(qualifiedTableName, indexType);
615
+ const db = options.sql ?? new Bun.SQL(options.connectionString ?? process.env.RAG_POSTGRES_URL ?? process.env.DATABASE_URL ?? "postgres://postgres:postgres@localhost:55433/absolute_rag_demo");
616
+ const nativeDiagnostics = {
617
+ active: true,
618
+ available: true,
619
+ distanceMetric,
620
+ extensionName: "vector",
621
+ indexName,
622
+ indexType,
623
+ mode: "pgvector",
624
+ requested: true,
625
+ schemaName,
626
+ tableName
627
+ };
628
+ const capabilities = createPostgresCapabilities();
629
+ const distanceOperator = getDistanceOperator(distanceMetric);
630
+ let initialized;
631
+ const init = () => {
632
+ initialized ??= ensurePostgresSchema(db, {
633
+ dimensions,
634
+ distanceMetric,
635
+ hnswEfConstruction,
636
+ hnswM,
637
+ indexLists,
638
+ indexType,
639
+ qualifiedTableName
640
+ }).then(() => refreshPostgresRuntimeDiagnostics(db, nativeDiagnostics, {
641
+ indexName,
642
+ qualifiedTableName,
643
+ schemaName,
644
+ tableName
645
+ })).catch((error) => {
646
+ nativeDiagnostics.active = false;
647
+ nativeDiagnostics.available = false;
648
+ nativeDiagnostics.lastInitError = error instanceof Error ? error.message : String(error);
649
+ nativeDiagnostics.lastMigrationError = error instanceof Error ? error.message : String(error);
650
+ nativeDiagnostics.fallbackReason = nativeDiagnostics.lastInitError;
651
+ throw error;
652
+ });
653
+ return initialized;
485
654
  };
486
655
  const embed = async (input) => {
487
- if (typeof options.embedding === "function") {
488
- const result = await options.embedding(input);
489
- return normalizeVector(result);
656
+ input.model;
657
+ input.signal;
658
+ if (options.mockEmbedding) {
659
+ return options.mockEmbedding(input.text);
490
660
  }
491
- return normalizeVector([
492
- ...createRAGVector(input.text, vector.dimensions)
493
- ]);
661
+ return normalizeVector(createRAGVector(input.text, dimensions));
494
662
  };
495
663
  const query = async (input) => {
496
- await ensureInitialized();
497
- const client = await getClient();
498
- const params = [];
499
- const qualifiedChunkTable = qualifiedTable(schema.schemaName, schema.chunkTableName);
500
- const operator = operatorForMetric(vector.distanceMetric);
501
- const vectorPlaceholder = makePlaceholder(params, vectorLiteral(normalizeVector(input.queryVector)), "vector");
502
- const limitPlaceholder = makePlaceholder(params, input.topK);
503
- const whereParts = [];
504
- const filter = input.filter;
505
- if (filter?.chunkId !== undefined) {
506
- whereParts.push(`chunk_id = ${makePlaceholder(params, filter.chunkId)}`);
507
- }
508
- if (filter?.title !== undefined) {
509
- whereParts.push(`title = ${makePlaceholder(params, filter.title)}`);
510
- }
511
- if (filter?.source !== undefined) {
512
- whereParts.push(`source = ${makePlaceholder(params, filter.source)}`);
513
- }
514
- const metadataFilter = buildMetadataFilter(filter);
515
- if (metadataFilter) {
516
- whereParts.push(`metadata @> ${makePlaceholder(params, JSON.stringify(metadataFilter), "jsonb")}`);
664
+ await init();
665
+ const queryVector = normalizeVector(input.queryVector);
666
+ const queryMultiplier2 = normalizeQueryMultiplier(input.queryMultiplier ?? options.queryMultiplier);
667
+ const maxBackfills = normalizeMaxBackfills(input.maxBackfills);
668
+ const minResults = normalizeMinResults(input.minResults, input.topK);
669
+ const fillTarget = resolveFillTarget({
670
+ fillPolicy: input.fillPolicy,
671
+ minResults,
672
+ topK: input.topK
673
+ });
674
+ const queryVectorLiteral = toVectorLiteral(queryVector);
675
+ const pushdownFilter = buildPostgresPushdownFilter(input.filter);
676
+ const queryFilterPlan = buildPostgresFilterPlan(pushdownFilter);
677
+ const effectivePushdownFilter = queryFilterPlan ? pushdownFilter : undefined;
678
+ const countFilterPlan = queryFilterPlan;
679
+ const countSql = countFilterPlan?.clause ? `select count(*)::int as count from ${qualifiedTableName} where ${countFilterPlan.clause}` : `select count(*)::int as count from ${qualifiedTableName}`;
680
+ const totalRowsResult = await db.unsafe(countSql, countFilterPlan?.params ?? []);
681
+ nativeDiagnostics.lastFilterDebug = {
682
+ countParams: countFilterPlan?.params ?? [],
683
+ countResultRaw: totalRowsResult?.[0],
684
+ countSql,
685
+ filter: input.filter,
686
+ pushdownFilter: effectivePushdownFilter
687
+ };
688
+ const totalRows = parseCountValue(totalRowsResult?.[0]?.count);
689
+ const candidateLimit = resolveAdaptiveNativeCandidateLimit({
690
+ defaultCandidateLimit: RAG_NATIVE_QUERY_CANDIDATE_LIMIT,
691
+ explicitCandidateLimit: input.candidateLimit,
692
+ filteredCandidateCount: totalRows,
693
+ plannerProfile: input.plannerProfile,
694
+ queryMultiplier: queryMultiplier2,
695
+ topK: input.topK
696
+ });
697
+ const hasPushdownFilter = Boolean(effectivePushdownFilter);
698
+ const plannedFilteredCandidateCount = hasPushdownFilter && totalRows === 0 ? undefined : totalRows;
699
+ const initialSearchK = planNativeCandidateSearchK({
700
+ candidateLimit,
701
+ filteredCandidateCount: plannedFilteredCandidateCount,
702
+ queryMultiplier: queryMultiplier2,
703
+ topK: input.topK
704
+ });
705
+ if (initialSearchK === 0) {
706
+ return [];
517
707
  }
518
- const whereSql = whereParts.length > 0 ? `WHERE ${whereParts.join(" AND ")}` : "";
519
- const sessionSql = plan.querySessionSql;
520
- const selectSql = `SELECT chunk_id, text, title, source, metadata, embedding ${operator} ${vectorPlaceholder} AS distance FROM ${qualifiedChunkTable} ${whereSql} ORDER BY distance ASC LIMIT ${limitPlaceholder}`;
521
- try {
522
- for (const sql of sessionSql) {
523
- await client.query(sql);
708
+ let currentSearchK = initialSearchK;
709
+ let backfillCount = 0;
710
+ let candidateBudgetExhausted = false;
711
+ let backfillLimitReached = false;
712
+ let effectiveFilteredCandidateCount = plannedFilteredCandidateCount;
713
+ let mapped = [];
714
+ for (;; ) {
715
+ const rowsSql = queryFilterPlan?.clause ? `select chunk_id, text, title, source, metadata, embedding::text as embedding, embedding ${distanceOperator} '${queryVectorLiteral}'::vector as distance from ${qualifiedTableName} where ${queryFilterPlan.clause} order by embedding ${distanceOperator} '${queryVectorLiteral}'::vector limit $${queryFilterPlan.params.length + 1}` : `select chunk_id, text, title, source, metadata, embedding::text as embedding, embedding ${distanceOperator} '${queryVectorLiteral}'::vector as distance from ${qualifiedTableName} order by embedding ${distanceOperator} '${queryVectorLiteral}'::vector limit $1`;
716
+ const rows = await db.unsafe(rowsSql, queryFilterPlan?.clause ? [...queryFilterPlan.params ?? [], currentSearchK] : [currentSearchK]);
717
+ nativeDiagnostics.lastFilterDebug = {
718
+ ...nativeDiagnostics.lastFilterDebug,
719
+ queryParams: queryFilterPlan?.clause ? [...queryFilterPlan.params ?? [], currentSearchK] : [currentSearchK],
720
+ queryRowCount: rows.length,
721
+ querySql: rowsSql
722
+ };
723
+ if (hasPushdownFilter && effectiveFilteredCandidateCount === undefined && rows.length <= currentSearchK) {
724
+ effectiveFilteredCandidateCount = rows.length;
524
725
  }
525
- const result = await client.query(selectSql, params);
526
- return result.rows.map((row) => ({
527
- chunkId: row.chunk_id,
528
- chunkText: row.text,
529
- title: row.title ?? undefined,
530
- source: row.source ?? undefined,
531
- metadata: parseMetadataValue(row.metadata),
532
- score: scoreFromDistance(Number(row.distance), vector.distanceMetric)
533
- }));
534
- } catch (error) {
535
- diagnostics.lastQueryError = error instanceof Error ? error.message : String(error);
536
- throw error;
726
+ mapped = rows.map((row) => {
727
+ const chunk = mapRowToChunk(row);
728
+ return {
729
+ chunk,
730
+ score: normalizeDistance(Number(row.distance ?? 0), distanceMetric)
731
+ };
732
+ }).filter(({ chunk }) => matchesFilter(chunk, input.filter)).map((entry) => ({
733
+ chunkId: entry.chunk.chunkId,
734
+ chunkText: entry.chunk.text,
735
+ embedding: entry.chunk.vector,
736
+ metadata: entry.chunk.metadata,
737
+ score: entry.score,
738
+ source: entry.chunk.source,
739
+ title: entry.chunk.title
740
+ })).sort((left, right) => right.score - left.score);
741
+ if (mapped.length >= fillTarget.targetResults) {
742
+ break;
743
+ }
744
+ const nextSearchK = planNativeCandidateSearchBackfillK({
745
+ backfillCount,
746
+ candidateLimit,
747
+ currentSearchK,
748
+ filteredCandidateCount: effectiveFilteredCandidateCount,
749
+ maxBackfills
750
+ });
751
+ if (nextSearchK <= currentSearchK) {
752
+ backfillLimitReached = typeof maxBackfills === "number" && backfillCount >= maxBackfills && mapped.length < fillTarget.targetResults;
753
+ candidateBudgetExhausted = mapped.length < fillTarget.targetResults;
754
+ break;
755
+ }
756
+ currentSearchK = nextSearchK;
757
+ backfillCount += 1;
537
758
  }
759
+ nativeDiagnostics.lastQueryError = undefined;
760
+ const returned = mapped.slice(0, input.topK);
761
+ updatePostgresLastQueryPlan({
762
+ backfillCount,
763
+ backfillLimitReached,
764
+ candidateBudgetExhausted,
765
+ candidateLimitUsed: candidateLimit,
766
+ maxBackfillsUsed: maxBackfills,
767
+ minResultsUsed: minResults,
768
+ fillPolicyUsed: fillTarget.fillPolicy,
769
+ plannerProfileUsed: input.plannerProfile,
770
+ filter: input.filter,
771
+ pushdownFilter: effectivePushdownFilter,
772
+ queryMultiplierUsed: queryMultiplier2,
773
+ filteredCandidateCount: effectiveFilteredCandidateCount,
774
+ finalSearchK: currentSearchK,
775
+ initialSearchK,
776
+ nativeDiagnostics,
777
+ minResultsSatisfied: returned.length >= minResults,
778
+ returnedCount: returned.length,
779
+ topK: input.topK,
780
+ underfilledTopK: returned.length < input.topK
781
+ });
782
+ return returned;
783
+ };
784
+ const queryLexical = async (input) => {
785
+ await init();
786
+ const pushdownFilter = buildPostgresPushdownFilter(input.filter);
787
+ const lexicalFilterPlan = buildPostgresFilterPlan(pushdownFilter);
788
+ const rowsSql = lexicalFilterPlan?.clause ? `select chunk_id, text, title, source, metadata from ${qualifiedTableName} where ${lexicalFilterPlan.clause}` : `select chunk_id, text, title, source, metadata from ${qualifiedTableName}`;
789
+ const rows = await db.unsafe(rowsSql, lexicalFilterPlan?.params ?? []);
790
+ const chunks = rows.map((row) => mapRowToChunk(row)).filter((chunk) => matchesFilter(chunk, input.filter));
791
+ const ranked = rankRAGLexicalMatches(input.query, chunks);
792
+ return ranked.slice(0, input.topK).map(({ result, score }) => ({
793
+ chunkId: result.chunkId,
794
+ chunkText: result.text,
795
+ metadata: result.metadata,
796
+ score,
797
+ source: result.source,
798
+ title: result.title
799
+ }));
538
800
  };
539
801
  const upsert = async (input) => {
540
- await ensureInitialized();
541
- const client = await getClient();
542
- const qualifiedChunkTable = qualifiedTable(schema.schemaName, schema.chunkTableName);
543
- const sql = `INSERT INTO ${qualifiedChunkTable} (chunk_id, text, title, source, metadata, embedding, updated_at) VALUES ($1, $2, $3, $4, $5::jsonb, $6::vector, NOW()) ON CONFLICT (chunk_id) DO UPDATE SET text = EXCLUDED.text, title = EXCLUDED.title, source = EXCLUDED.source, metadata = EXCLUDED.metadata, embedding = EXCLUDED.embedding, updated_at = NOW()`;
544
- try {
545
- for (const chunk of input.chunks) {
546
- const vectorValue = Array.isArray(chunk.embedding) && chunk.embedding.length > 0 ? normalizeVector(chunk.embedding) : await embed({ text: chunk.text });
547
- await client.query(sql, [
548
- chunk.chunkId,
549
- chunk.text,
550
- chunk.title ?? null,
551
- chunk.source ?? null,
552
- JSON.stringify(chunk.metadata ?? {}),
553
- vectorLiteral(vectorValue)
554
- ]);
555
- }
556
- } catch (error) {
557
- diagnostics.lastUpsertError = error instanceof Error ? error.message : String(error);
558
- throw error;
802
+ await init();
803
+ const chunks = input.chunks.length > 0 ? await Promise.all(input.chunks.map(async (chunk) => ({
804
+ chunkId: chunk.chunkId,
805
+ metadata: chunk.metadata,
806
+ source: chunk.source,
807
+ text: chunk.text,
808
+ title: chunk.title,
809
+ vector: chunk.embedding ? normalizeVector(chunk.embedding) : normalizeVector(await embed({ text: chunk.text }))
810
+ }))) : [];
811
+ for (const chunk of chunks) {
812
+ await db.unsafe(`insert into ${qualifiedTableName} (chunk_id, text, title, source, metadata, embedding)
813
+ values ($1, $2, $3, $4, $5::jsonb, $6::vector)
814
+ on conflict (chunk_id) do update set
815
+ text = excluded.text,
816
+ title = excluded.title,
817
+ source = excluded.source,
818
+ metadata = excluded.metadata,
819
+ embedding = excluded.embedding`, [
820
+ chunk.chunkId,
821
+ chunk.text,
822
+ chunk.title ?? null,
823
+ chunk.source ?? null,
824
+ chunk.metadata ?? null,
825
+ toVectorLiteral(chunk.vector)
826
+ ]);
827
+ }
828
+ await refreshPostgresRuntimeDiagnostics(db, nativeDiagnostics, {
829
+ indexName,
830
+ qualifiedTableName,
831
+ schemaName,
832
+ tableName
833
+ });
834
+ };
835
+ const count = async (input = {}) => {
836
+ await init();
837
+ const filter = input.filter;
838
+ const chunkIds = input.chunkIds;
839
+ const hasFilter = Boolean(filter && Object.keys(filter).length > 0);
840
+ const hasChunkIds = Boolean(chunkIds && chunkIds.length > 0);
841
+ if (!hasFilter && !hasChunkIds) {
842
+ const countResult = await db.unsafe(`select count(*)::int as count from ${qualifiedTableName}`);
843
+ return parseCountValue(countResult[0]?.count);
559
844
  }
845
+ return (await getPostgresCandidateChunkIds(db, qualifiedTableName, {
846
+ filter,
847
+ chunkIds
848
+ })).length;
849
+ };
850
+ const remove = async (input = {}) => {
851
+ await init();
852
+ const filter = input.filter;
853
+ const chunkIds = input.chunkIds;
854
+ const hasFilter = Boolean(filter && Object.keys(filter).length > 0);
855
+ const hasChunkIds = Boolean(chunkIds && chunkIds.length > 0);
856
+ if (!hasFilter && !hasChunkIds) {
857
+ return 0;
858
+ }
859
+ const ids = await getPostgresCandidateChunkIds(db, qualifiedTableName, {
860
+ filter,
861
+ chunkIds
862
+ });
863
+ if (ids.length === 0) {
864
+ return 0;
865
+ }
866
+ const placeholders = ids.map((_, index) => `$${index + 1}`).join(", ");
867
+ await db.unsafe(`delete from ${qualifiedTableName} where chunk_id in (${placeholders})`, ids);
868
+ await refreshPostgresRuntimeDiagnostics(db, nativeDiagnostics, {
869
+ indexName,
870
+ qualifiedTableName,
871
+ schemaName,
872
+ tableName
873
+ });
874
+ return ids.length;
560
875
  };
561
876
  const clear = async () => {
562
- await ensureInitialized();
563
- const client = await getClient();
564
- const qualifiedChunkTable = qualifiedTable(schema.schemaName, schema.chunkTableName);
565
- await client.query(`DELETE FROM ${qualifiedChunkTable}`);
877
+ await init();
878
+ await db.unsafe(`truncate table ${qualifiedTableName}`);
879
+ await refreshPostgresRuntimeDiagnostics(db, nativeDiagnostics, {
880
+ indexName,
881
+ qualifiedTableName,
882
+ schemaName,
883
+ tableName
884
+ });
885
+ };
886
+ const analyze = async () => {
887
+ await init();
888
+ await analyzePostgresTable(db, nativeDiagnostics, {
889
+ indexName,
890
+ qualifiedTableName,
891
+ schemaName,
892
+ tableName
893
+ });
894
+ };
895
+ const rebuildNativeIndex = async () => {
896
+ await init();
897
+ await rebuildPostgresNativeIndex(db, nativeDiagnostics, {
898
+ distanceMetric,
899
+ hnswEfConstruction,
900
+ hnswM,
901
+ indexLists,
902
+ indexName,
903
+ indexType,
904
+ qualifiedTableName,
905
+ schemaName,
906
+ tableName
907
+ });
908
+ };
909
+ const close = async () => {
910
+ await db.close?.();
566
911
  };
567
912
  return {
913
+ analyze,
914
+ clear,
915
+ close,
568
916
  embed,
917
+ getCapabilities: () => capabilities,
918
+ getStatus: () => createPostgresStatus(dimensions, nativeDiagnostics),
569
919
  query,
570
- upsert,
571
- clear,
572
- getCapabilities: () => ({
573
- backend: "postgres",
574
- persistence: "external",
575
- nativeVectorSearch: true,
576
- serverSideFiltering: true,
577
- streamingIngestStatus: false
578
- }),
579
- getStatus: () => createPgvectorStoreStatus({
580
- vector,
581
- schema,
582
- diagnostics,
583
- initialized
584
- })
920
+ queryLexical,
921
+ rebuildNativeIndex: indexName ? rebuildNativeIndex : undefined,
922
+ count,
923
+ delete: remove,
924
+ upsert
585
925
  };
586
926
  };
587
- var createPostgresRAGCollection = (options) => createRAGCollection({
588
- store: createPgvectorStore(options)
589
- });
590
- var createPostgresRAG = (options) => {
591
- const store = createPgvectorStore(options);
592
- const collection = createRAGCollection({ store });
593
- const schemaPlan = createPostgresSchemaPlan(options);
594
- const migrationPlan = createPostgresMigrationPlan(options);
927
+
928
+ // src/index.ts
929
+ var ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME = "@absolutejs/rag-postgres";
930
+ var createPostgresRAGCollection = (options = {}) => {
931
+ const store = options.store ?? createPostgresRAGStore(options.storeOptions ?? {});
932
+ return createRAGCollection({ store });
933
+ };
934
+ var createPostgresRAG = (options = {}) => {
935
+ const store = options.store ?? createPostgresRAGStore(options.storeOptions ?? {});
936
+ const collection = options.collection ?? createRAGCollection({ store });
595
937
  return {
596
938
  store,
597
939
  collection,
598
- getStatus: () => store.getStatus?.(),
599
- getCapabilities: () => store.getCapabilities?.(),
600
- getSchemaPlan: () => schemaPlan,
601
- getMigrationPlan: () => migrationPlan,
602
- applyMigrations: (applyOptions) => applyPostgresMigrations(options, applyOptions)
940
+ getStatus: () => collection.getStatus?.() ?? store.getStatus?.(),
941
+ getCapabilities: () => collection.getCapabilities?.() ?? store.getCapabilities?.()
603
942
  };
604
943
  };
605
944
  var createPostgreSQLRAG = createPostgresRAG;
606
945
  export {
607
- createPostgresSchemaPlan,
946
+ ragPlugin,
947
+ createRAGCollection,
948
+ createPostgresRAGStore,
608
949
  createPostgresRAGCollection,
609
950
  createPostgresRAG,
610
- createPostgresMigrationPlan,
611
951
  createPostgreSQLRAG,
612
- createPgvectorStore,
613
- applyPostgresSchemaPlan,
614
- applyPostgresMigrations,
615
- POSTGRESQL_RAG_IMPLEMENTATIONS,
616
- PGVECTOR_INDEX_TYPES,
617
- PGVECTOR_DISTANCE_METRICS,
618
952
  ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME
619
953
  };
620
954
 
621
- //# debugId=22931C2F3EFA23C264756E2164756E21
955
+ //# debugId=705798FA59AD060364756E2164756E21
622
956
  //# sourceMappingURL=index.js.map