@absolutejs/rag-postgres 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/createPostgresRAGStore.d.ts +3 -0
- package/dist/index.d.ts +16 -113
- package/dist/index.js +869 -535
- package/dist/index.js.map +5 -4
- package/dist/types.d.ts +14 -0
- package/package.json +5 -8
package/dist/index.js
CHANGED
|
@@ -1,622 +1,956 @@
|
|
|
1
1
|
// @bun
|
|
2
|
-
var __require = import.meta.require;
|
|
3
|
-
|
|
4
2
|
// src/index.ts
|
|
3
|
+
import { createRAGCollection, ragPlugin } from "@absolutejs/rag/adapter-kit";
|
|
4
|
+
|
|
5
|
+
// src/createPostgresRAGStore.ts
|
|
5
6
|
import {
|
|
6
|
-
createRAGCollection,
|
|
7
7
|
createRAGVector,
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
var
|
|
19
|
-
var
|
|
20
|
-
var
|
|
21
|
-
var
|
|
22
|
-
var
|
|
23
|
-
var
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
var
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
var
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
8
|
+
matchesMetadataFilterRecord,
|
|
9
|
+
normalizeVector,
|
|
10
|
+
planNativeCandidateSearchBackfillK,
|
|
11
|
+
planNativeCandidateSearchK,
|
|
12
|
+
RAG_NATIVE_QUERY_CANDIDATE_LIMIT,
|
|
13
|
+
RAG_VECTOR_DIMENSIONS_DEFAULT,
|
|
14
|
+
rankRAGLexicalMatches,
|
|
15
|
+
resolveAdaptiveNativeCandidateLimit,
|
|
16
|
+
summarizeSQLiteCandidateCoverage
|
|
17
|
+
} from "@absolutejs/rag/adapter-kit";
|
|
18
|
+
var DEFAULT_DIMENSIONS = RAG_VECTOR_DIMENSIONS_DEFAULT;
|
|
19
|
+
var DEFAULT_TABLE_NAME = "rag_chunks";
|
|
20
|
+
var DEFAULT_SCHEMA_NAME = "public";
|
|
21
|
+
var DEFAULT_QUERY_MULTIPLIER = 4;
|
|
22
|
+
var MAX_QUERY_MULTIPLIER = 16;
|
|
23
|
+
var DEFAULT_POSTGRES_INDEX_TYPE = "hnsw";
|
|
24
|
+
var DEFAULT_POSTGRES_IVFFLAT_LISTS = 100;
|
|
25
|
+
var DEFAULT_POSTGRES_HNSW_M = 16;
|
|
26
|
+
var DEFAULT_POSTGRES_HNSW_EF_CONSTRUCTION = 64;
|
|
27
|
+
var IDENTIFIER_RE = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
|
|
28
|
+
var FILTER_PATH_SEGMENT_RE = /^[a-zA-Z0-9_]+$/;
|
|
29
|
+
var isObjectFilterRecord = (value) => Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
30
|
+
var isNestedFilterArray = (value) => Array.isArray(value) && value.every((entry) => isObjectFilterRecord(entry));
|
|
31
|
+
var isOperatorFilterRecord = (value) => isObjectFilterRecord(value) && Object.keys(value).some((key) => key.startsWith("$"));
|
|
32
|
+
var countFilterClauses = (filter) => {
|
|
33
|
+
if (!filter) {
|
|
34
|
+
return 0;
|
|
35
|
+
}
|
|
36
|
+
let count = 0;
|
|
37
|
+
for (const [key, value] of Object.entries(filter)) {
|
|
38
|
+
if (key === "$and" || key === "$or") {
|
|
39
|
+
if (isNestedFilterArray(value)) {
|
|
40
|
+
count += value.reduce((total, entry) => total + countFilterClauses(entry), 0);
|
|
41
|
+
}
|
|
42
|
+
continue;
|
|
41
43
|
}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
};
|
|
50
|
-
var normalizeMetric = (metric) => {
|
|
51
|
-
if (metric === "l2" || metric === "inner_product") {
|
|
52
|
-
return metric;
|
|
44
|
+
if (key === "$not") {
|
|
45
|
+
if (isObjectFilterRecord(value)) {
|
|
46
|
+
count += countFilterClauses(value);
|
|
47
|
+
}
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
count += 1;
|
|
53
51
|
}
|
|
54
|
-
return
|
|
52
|
+
return count;
|
|
55
53
|
};
|
|
56
|
-
var
|
|
57
|
-
|
|
58
|
-
|
|
54
|
+
var toPostgresJsonPath = (key) => {
|
|
55
|
+
const segments = key.split(".").filter(Boolean);
|
|
56
|
+
if (segments.length === 0 || !segments.every((segment) => FILTER_PATH_SEGMENT_RE.test(segment))) {
|
|
57
|
+
return null;
|
|
59
58
|
}
|
|
60
|
-
|
|
61
|
-
|
|
59
|
+
return segments;
|
|
60
|
+
};
|
|
61
|
+
var toPostgresFilterBinding = (value) => {
|
|
62
|
+
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean" || value === null) {
|
|
63
|
+
return value;
|
|
62
64
|
}
|
|
63
|
-
|
|
65
|
+
return;
|
|
64
66
|
};
|
|
65
|
-
var
|
|
66
|
-
const
|
|
67
|
-
|
|
68
|
-
const migrationTableName = options.schema?.migrationTableName ?? DEFAULT_MIGRATION_TABLE_NAME;
|
|
69
|
-
assertIdentifier(schemaName, "schema name");
|
|
70
|
-
assertIdentifier(chunkTableName, "chunk table name");
|
|
71
|
-
assertIdentifier(migrationTableName, "migration table name");
|
|
72
|
-
return {
|
|
73
|
-
schemaName,
|
|
74
|
-
chunkTableName,
|
|
75
|
-
migrationTableName
|
|
76
|
-
};
|
|
67
|
+
var buildPostgresJsonbScalarEquality = (input) => {
|
|
68
|
+
const comparison = input.comparison ?? "=";
|
|
69
|
+
return comparison === "=" ? `jsonb_typeof(${input.valueSql}) = 'null'` : `coalesce(jsonb_typeof(${input.valueSql}), 'missing') <> 'null'`;
|
|
77
70
|
};
|
|
78
|
-
var
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
throw new Error(`${ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME}: dimensions must be a positive integer`);
|
|
71
|
+
var buildPostgresMetadataScalarEquality = (input) => input.value === null ? buildPostgresJsonbScalarEquality({
|
|
72
|
+
comparison: input.comparison,
|
|
73
|
+
valueSql: input.valueSql
|
|
74
|
+
}) : `${input.actualSql} ${input.comparison ?? "="} ${input.bind(String(input.value))}`;
|
|
75
|
+
var buildPostgresFilterPlan = (filter, startIndex = 0) => {
|
|
76
|
+
if (!filter) {
|
|
77
|
+
return { clause: "", params: [] };
|
|
86
78
|
}
|
|
87
|
-
const
|
|
88
|
-
const
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
dimensions,
|
|
92
|
-
distanceMetric,
|
|
93
|
-
extensionName: vector.extensionName ?? "vector",
|
|
94
|
-
index
|
|
79
|
+
const params = [];
|
|
80
|
+
const bind = (value) => {
|
|
81
|
+
params.push(value);
|
|
82
|
+
return `$${params.length + startIndex}`;
|
|
95
83
|
};
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
};
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
};
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
84
|
+
const build = (entry) => {
|
|
85
|
+
const clauses = [];
|
|
86
|
+
for (const [key, value] of Object.entries(entry)) {
|
|
87
|
+
if (key === "$and" || key === "$or") {
|
|
88
|
+
if (!isNestedFilterArray(value) || value.length === 0) {
|
|
89
|
+
return null;
|
|
90
|
+
}
|
|
91
|
+
const nested = value.map((item) => build(item));
|
|
92
|
+
if (nested.some((item) => item === null)) {
|
|
93
|
+
return null;
|
|
94
|
+
}
|
|
95
|
+
clauses.push(`(${nested.filter((item) => Boolean(item)).join(key === "$and" ? " AND " : " OR ")})`);
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
if (key === "$not") {
|
|
99
|
+
if (!isObjectFilterRecord(value)) {
|
|
100
|
+
return null;
|
|
101
|
+
}
|
|
102
|
+
const nested = build(value);
|
|
103
|
+
if (!nested) {
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
clauses.push(`NOT (${nested})`);
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
const isScalarField = key === "chunkId" || key === "source" || key === "title";
|
|
110
|
+
const jsonPath = isScalarField ? null : toPostgresJsonPath(key);
|
|
111
|
+
if (!isScalarField && !jsonPath) {
|
|
112
|
+
return null;
|
|
113
|
+
}
|
|
114
|
+
let actualSql;
|
|
115
|
+
let metadataPathSegments = [];
|
|
116
|
+
let metadataValueSql;
|
|
117
|
+
if (isScalarField) {
|
|
118
|
+
actualSql = key === "chunkId" ? "chunk_id" : key;
|
|
119
|
+
} else {
|
|
120
|
+
metadataPathSegments = jsonPath ?? [];
|
|
121
|
+
actualSql = `jsonb_extract_path_text(metadata, ${metadataPathSegments.map((segment) => `'${segment}'`).join(", ")})`;
|
|
122
|
+
metadataValueSql = `metadata #> '{${metadataPathSegments.join(",")}}'`;
|
|
123
|
+
}
|
|
124
|
+
if (!isOperatorFilterRecord(value)) {
|
|
125
|
+
const binding = toPostgresFilterBinding(value);
|
|
126
|
+
if (binding === undefined) {
|
|
127
|
+
return null;
|
|
128
|
+
}
|
|
129
|
+
clauses.push(isScalarField ? `${actualSql} = ${bind(String(binding))}` : buildPostgresMetadataScalarEquality({
|
|
130
|
+
actualSql,
|
|
131
|
+
bind,
|
|
132
|
+
value: binding,
|
|
133
|
+
valueSql: metadataValueSql
|
|
134
|
+
}));
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
const operatorClauses = Object.entries(value).map(([operator, expected]) => {
|
|
138
|
+
switch (operator) {
|
|
139
|
+
case "$exists":
|
|
140
|
+
return isScalarField ? expected ? `${actualSql} IS NOT NULL` : `${actualSql} IS NULL` : expected ? `${metadataValueSql} IS NOT NULL` : `${metadataValueSql} IS NULL`;
|
|
141
|
+
case "$in": {
|
|
142
|
+
if (!Array.isArray(expected) || expected.length === 0) {
|
|
143
|
+
return null;
|
|
144
|
+
}
|
|
145
|
+
const bindings = expected.map((entry2) => toPostgresFilterBinding(entry2)).filter((entry2) => entry2 !== undefined);
|
|
146
|
+
if (bindings.length !== expected.length) {
|
|
147
|
+
return null;
|
|
148
|
+
}
|
|
149
|
+
return isScalarField ? `${actualSql} IN (${bindings.map((entry2) => bind(String(entry2))).join(", ")})` : `(${bindings.map((entry2) => buildPostgresMetadataScalarEquality({
|
|
150
|
+
actualSql,
|
|
151
|
+
bind,
|
|
152
|
+
value: entry2,
|
|
153
|
+
valueSql: metadataValueSql
|
|
154
|
+
})).join(" OR ")})`;
|
|
155
|
+
}
|
|
156
|
+
case "$ne": {
|
|
157
|
+
const binding = toPostgresFilterBinding(expected);
|
|
158
|
+
return binding === undefined ? null : isScalarField ? `${actualSql} <> ${bind(String(binding))}` : buildPostgresMetadataScalarEquality({
|
|
159
|
+
actualSql,
|
|
160
|
+
bind,
|
|
161
|
+
comparison: "<>",
|
|
162
|
+
value: binding,
|
|
163
|
+
valueSql: metadataValueSql
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
case "$gt":
|
|
167
|
+
case "$gte":
|
|
168
|
+
case "$lt":
|
|
169
|
+
case "$lte": {
|
|
170
|
+
if (typeof expected !== "number" || !Number.isFinite(expected)) {
|
|
171
|
+
return null;
|
|
172
|
+
}
|
|
173
|
+
const comparison = operator === "$gt" ? ">" : operator === "$gte" ? ">=" : operator === "$lt" ? "<" : "<=";
|
|
174
|
+
return `((${actualSql}) ~ '^-?[0-9]+(\\.[0-9]+)?$' AND (${actualSql})::double precision ${comparison} ${bind(expected)})`;
|
|
175
|
+
}
|
|
176
|
+
case "$contains":
|
|
177
|
+
if (isScalarField) {
|
|
178
|
+
return null;
|
|
179
|
+
}
|
|
180
|
+
if (toPostgresFilterBinding(expected) === undefined) {
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
return `(${metadataValueSql} IS NOT NULL AND ${metadataValueSql} ? ${bind(String(expected))})`;
|
|
184
|
+
case "$containsAny":
|
|
185
|
+
case "$containsAll": {
|
|
186
|
+
if (isScalarField || !Array.isArray(expected)) {
|
|
187
|
+
return null;
|
|
188
|
+
}
|
|
189
|
+
const values = expected.map((entry2) => toPostgresFilterBinding(entry2)).filter((entry2) => entry2 !== undefined);
|
|
190
|
+
if (values.length === 0 || values.length !== expected.length) {
|
|
191
|
+
return null;
|
|
192
|
+
}
|
|
193
|
+
const sqlArray = `ARRAY[${values.map((value2) => bind(String(value2))).join(", ")}]::text[]`;
|
|
194
|
+
return `(${metadataValueSql} IS NOT NULL AND ${metadataValueSql} ${operator === "$containsAny" ? "?|" : "?&"} ${sqlArray})`;
|
|
195
|
+
}
|
|
196
|
+
default:
|
|
197
|
+
return null;
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
if (operatorClauses.some((clause2) => clause2 === null)) {
|
|
201
|
+
return null;
|
|
202
|
+
}
|
|
203
|
+
clauses.push(operatorClauses.filter((clause2) => Boolean(clause2)).map((clause2) => `(${clause2})`).join(" AND "));
|
|
152
204
|
}
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
}
|
|
157
|
-
const withClause = withParts.length > 0 ? ` WITH (${withParts.join(", ")})` : "";
|
|
158
|
-
return [
|
|
159
|
-
`CREATE INDEX IF NOT EXISTS ${quoteIdentifier(indexName)} ON ${qualifiedChunkTable} USING ${index.type} (embedding ${opClass})${withClause}`
|
|
160
|
-
];
|
|
205
|
+
return clauses.length > 0 ? clauses.map((clause2) => `(${clause2})`).join(" AND ") : "";
|
|
206
|
+
};
|
|
207
|
+
const clause = build(filter);
|
|
208
|
+
return clause === null || clause.trim().length === 0 ? null : { clause, params };
|
|
161
209
|
};
|
|
162
|
-
var
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
if (!index || index.type === "none") {
|
|
166
|
-
return [];
|
|
210
|
+
var buildPostgresPushdownFilter = (filter) => {
|
|
211
|
+
if (!filter) {
|
|
212
|
+
return;
|
|
167
213
|
}
|
|
168
|
-
const
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
214
|
+
const hasPushdownFilterPlan = (entry) => {
|
|
215
|
+
const plan = buildPostgresFilterPlan(entry);
|
|
216
|
+
return plan !== null && Boolean(plan.clause) && plan.clause.trim().length > 0;
|
|
217
|
+
};
|
|
218
|
+
const hasPushdownFilterPlanEntry = (entry) => {
|
|
219
|
+
if (!isObjectFilterRecord(entry)) {
|
|
220
|
+
return false;
|
|
172
221
|
}
|
|
173
|
-
|
|
174
|
-
|
|
222
|
+
return hasPushdownFilterPlan(entry);
|
|
223
|
+
};
|
|
224
|
+
const nextEntries = [];
|
|
225
|
+
for (const [key, value] of Object.entries(filter)) {
|
|
226
|
+
if (key === "$and" || key === "$or") {
|
|
227
|
+
if (!isNestedFilterArray(value)) {
|
|
228
|
+
continue;
|
|
229
|
+
}
|
|
230
|
+
const nested = value.map((entry) => buildPostgresPushdownFilter(entry)).filter((entry) => hasPushdownFilterPlanEntry(entry));
|
|
231
|
+
if (nested.length > 0) {
|
|
232
|
+
nextEntries.push([key, nested]);
|
|
233
|
+
}
|
|
234
|
+
continue;
|
|
175
235
|
}
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
236
|
+
if (key === "$not") {
|
|
237
|
+
if (!isObjectFilterRecord(value)) {
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
const nested = buildPostgresPushdownFilter(value);
|
|
241
|
+
if (hasPushdownFilterPlanEntry(nested)) {
|
|
242
|
+
nextEntries.push([key, nested]);
|
|
243
|
+
}
|
|
244
|
+
continue;
|
|
180
245
|
}
|
|
181
|
-
if (
|
|
182
|
-
|
|
246
|
+
if (Array.isArray(value) || isOperatorFilterRecord(value) && Object.keys(value).some((operator) => !(operator === "$exists" || operator === "$in" || operator === "$contains" || operator === "$containsAny" || operator === "$containsAll" || operator === "$ne" || operator === "$gt" || operator === "$gte" || operator === "$lt" || operator === "$lte"))) {
|
|
247
|
+
continue;
|
|
183
248
|
}
|
|
184
|
-
|
|
185
|
-
|
|
249
|
+
const isScalarColumnKey = ["chunkId", "source", "title"].includes(key);
|
|
250
|
+
const jsonPath = isScalarColumnKey ? null : toPostgresJsonPath(key);
|
|
251
|
+
if (!isScalarColumnKey && !jsonPath) {
|
|
252
|
+
continue;
|
|
186
253
|
}
|
|
254
|
+
if (!hasPushdownFilterPlan({ [key]: value })) {
|
|
255
|
+
continue;
|
|
256
|
+
}
|
|
257
|
+
nextEntries.push([key, value]);
|
|
187
258
|
}
|
|
188
|
-
return
|
|
189
|
-
};
|
|
190
|
-
var stageOrder = [
|
|
191
|
-
"extension",
|
|
192
|
-
"schema",
|
|
193
|
-
"table",
|
|
194
|
-
"index"
|
|
195
|
-
];
|
|
196
|
-
var buildMigrationName = (stage, stageIndex, sql) => {
|
|
197
|
-
const normalized = sql.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_+|_+$/g, "").slice(0, 48) || "statement";
|
|
198
|
-
const globalOrder = String(stageOrder.indexOf(stage) + 1).padStart(2, "0");
|
|
199
|
-
const localOrder = String(stageIndex + 1).padStart(3, "0");
|
|
200
|
-
return `${globalOrder}_${stage}_${localOrder}_${normalized}`;
|
|
201
|
-
};
|
|
202
|
-
var createMigrationTableSql = (schemaName, migrationTableName) => `CREATE TABLE IF NOT EXISTS ${qualifiedTable(schemaName, migrationTableName)} (name TEXT PRIMARY KEY, applied_at TIMESTAMPTZ NOT NULL DEFAULT NOW())`;
|
|
203
|
-
var filterTrackedTableSql = (tableSql, schemaName, migrationTableName) => {
|
|
204
|
-
const migrationTableTarget = qualifiedTable(schemaName, migrationTableName);
|
|
205
|
-
return tableSql.filter((sql) => !sql.includes(migrationTableTarget));
|
|
206
|
-
};
|
|
207
|
-
var createPostgresSchemaPlan = (options) => {
|
|
208
|
-
const schema = resolveSchemaConfig(options ?? {});
|
|
209
|
-
const vector = resolveVectorConfig(options ?? {});
|
|
210
|
-
const qualifiedChunkTable = qualifiedTable(schema.schemaName, schema.chunkTableName);
|
|
211
|
-
const qualifiedMigrationTable = qualifiedTable(schema.schemaName, schema.migrationTableName);
|
|
212
|
-
const extensionSql = vector.autoCreateExtension === false ? [] : [
|
|
213
|
-
`CREATE EXTENSION IF NOT EXISTS ${quoteIdentifier(vector.extensionName)}`
|
|
214
|
-
];
|
|
215
|
-
const schemaSql = vector.autoCreateSchema === false ? [] : [
|
|
216
|
-
`CREATE SCHEMA IF NOT EXISTS ${quoteIdentifier(schema.schemaName)}`
|
|
217
|
-
];
|
|
218
|
-
const tableSql = vector.autoCreateTables === false ? [] : [
|
|
219
|
-
`CREATE TABLE IF NOT EXISTS ${qualifiedChunkTable} (id BIGSERIAL PRIMARY KEY, chunk_id TEXT NOT NULL UNIQUE, text TEXT NOT NULL, title TEXT, source TEXT, metadata JSONB NOT NULL DEFAULT '{}'::jsonb, embedding VECTOR(${vector.dimensions}) NOT NULL, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW())`,
|
|
220
|
-
`CREATE INDEX IF NOT EXISTS ${quoteIdentifier(`${schema.chunkTableName}_chunk_id_idx`)} ON ${qualifiedChunkTable} (chunk_id)`,
|
|
221
|
-
`CREATE INDEX IF NOT EXISTS ${quoteIdentifier(`${schema.chunkTableName}_source_idx`)} ON ${qualifiedChunkTable} (source)`,
|
|
222
|
-
`CREATE INDEX IF NOT EXISTS ${quoteIdentifier(`${schema.chunkTableName}_metadata_idx`)} ON ${qualifiedChunkTable} USING GIN (metadata)`,
|
|
223
|
-
createMigrationTableSql(schema.schemaName, schema.migrationTableName)
|
|
224
|
-
];
|
|
225
|
-
const indexSql = vector.autoCreateIndex === false ? [] : createIndexSql({
|
|
226
|
-
schemaName: schema.schemaName,
|
|
227
|
-
chunkTableName: schema.chunkTableName,
|
|
228
|
-
distanceMetric: vector.distanceMetric,
|
|
229
|
-
index: vector.index
|
|
230
|
-
});
|
|
231
|
-
return {
|
|
232
|
-
implementation: "pgvector",
|
|
233
|
-
extensionSql,
|
|
234
|
-
schemaSql,
|
|
235
|
-
tableSql,
|
|
236
|
-
indexSql,
|
|
237
|
-
querySessionSql: createQuerySessionSql({ index: vector.index }),
|
|
238
|
-
migrationTableQualifiedName: qualifiedMigrationTable
|
|
239
|
-
};
|
|
259
|
+
return nextEntries.length > 0 ? Object.fromEntries(nextEntries) : undefined;
|
|
240
260
|
};
|
|
241
|
-
var
|
|
242
|
-
const
|
|
243
|
-
const
|
|
244
|
-
const
|
|
245
|
-
|
|
246
|
-
bootstrapSql.push(...schemaPlan.schemaSql);
|
|
247
|
-
}
|
|
248
|
-
const migrationTableSql = createMigrationTableSql(schema.schemaName, schema.migrationTableName);
|
|
249
|
-
if (!bootstrapSql.includes(migrationTableSql)) {
|
|
250
|
-
bootstrapSql.push(migrationTableSql);
|
|
251
|
-
}
|
|
252
|
-
const migrations = [
|
|
253
|
-
...schemaPlan.extensionSql.map((sql, index) => ({
|
|
254
|
-
stage: "extension",
|
|
255
|
-
sql,
|
|
256
|
-
stageIndex: index
|
|
257
|
-
})),
|
|
258
|
-
...filterTrackedTableSql(schemaPlan.tableSql, schema.schemaName, schema.migrationTableName).map((sql, index) => ({
|
|
259
|
-
stage: "table",
|
|
260
|
-
sql,
|
|
261
|
-
stageIndex: index
|
|
262
|
-
})),
|
|
263
|
-
...schemaPlan.indexSql.map((sql, index) => ({
|
|
264
|
-
stage: "index",
|
|
265
|
-
sql,
|
|
266
|
-
stageIndex: index
|
|
267
|
-
}))
|
|
268
|
-
].map((entry) => ({
|
|
269
|
-
name: buildMigrationName(entry.stage, entry.stageIndex, entry.sql),
|
|
270
|
-
stage: entry.stage,
|
|
271
|
-
sql: entry.sql
|
|
272
|
-
}));
|
|
261
|
+
var resolvePostgresPushdownMode = (input) => {
|
|
262
|
+
const totalFilterClauseCount = countFilterClauses(input.filter);
|
|
263
|
+
const pushdownClauseCount = countFilterClauses(input.pushdownFilter);
|
|
264
|
+
const jsRemainderClauseCount = Math.max(0, totalFilterClauseCount - pushdownClauseCount);
|
|
265
|
+
const pushdownMode = pushdownClauseCount === 0 ? "none" : pushdownClauseCount >= totalFilterClauseCount ? "full" : "partial";
|
|
273
266
|
return {
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
schemaPlan
|
|
267
|
+
jsRemainderClauseCount,
|
|
268
|
+
jsRemainderRatio: totalFilterClauseCount > 0 ? jsRemainderClauseCount / totalFilterClauseCount : undefined,
|
|
269
|
+
pushdownClauseCount,
|
|
270
|
+
pushdownCoverageRatio: totalFilterClauseCount > 0 ? pushdownClauseCount / totalFilterClauseCount : undefined,
|
|
271
|
+
pushdownMode,
|
|
272
|
+
totalFilterClauseCount
|
|
281
273
|
};
|
|
282
274
|
};
|
|
283
|
-
var
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
return {
|
|
287
|
-
rows,
|
|
288
|
-
rowCount: typeof rows.count === "number" ? rows.count : rows.length
|
|
289
|
-
};
|
|
290
|
-
},
|
|
291
|
-
transaction: async (run) => rootSql.begin(async (transactionSql) => run(createWrappedPostgresClient(transactionSql, transactionSql))),
|
|
292
|
-
close: async () => {
|
|
293
|
-
if (typeof rootSql.end === "function") {
|
|
294
|
-
await rootSql.end({ timeout: 5 });
|
|
295
|
-
}
|
|
275
|
+
var assertSupportedIdentifier = (name) => {
|
|
276
|
+
if (!IDENTIFIER_RE.test(name)) {
|
|
277
|
+
throw new Error(`Invalid identifier "${name}". Only alphanumeric and underscore names are allowed.`);
|
|
296
278
|
}
|
|
297
|
-
});
|
|
298
|
-
var createDefaultPostgresClientFactory = (options) => {
|
|
299
|
-
const connectionString = typeof options.connectionString === "string" ? options.connectionString.trim() : "";
|
|
300
|
-
if (connectionString.length === 0) {
|
|
301
|
-
return;
|
|
302
|
-
}
|
|
303
|
-
let clientPromise;
|
|
304
|
-
return async () => {
|
|
305
|
-
if (!clientPromise) {
|
|
306
|
-
clientPromise = (async () => {
|
|
307
|
-
const postgresModule = await import("postgres");
|
|
308
|
-
const postgres = postgresModule.default;
|
|
309
|
-
const sql = postgres(connectionString, {
|
|
310
|
-
onnotice: () => {},
|
|
311
|
-
...options.driver ?? {}
|
|
312
|
-
});
|
|
313
|
-
return createWrappedPostgresClient(sql, sql);
|
|
314
|
-
})();
|
|
315
|
-
}
|
|
316
|
-
return clientPromise;
|
|
317
|
-
};
|
|
318
279
|
};
|
|
319
|
-
var
|
|
320
|
-
if (
|
|
321
|
-
|
|
322
|
-
return async () => clientFactory();
|
|
280
|
+
var normalizePostgresIndexType = (value) => {
|
|
281
|
+
if (value === undefined) {
|
|
282
|
+
return DEFAULT_POSTGRES_INDEX_TYPE;
|
|
323
283
|
}
|
|
324
|
-
if (
|
|
325
|
-
|
|
326
|
-
return async () => client;
|
|
284
|
+
if (value === "none" || value === "hnsw" || value === "ivfflat") {
|
|
285
|
+
return value;
|
|
327
286
|
}
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
287
|
+
throw new Error(`Invalid postgres index type "${String(value)}". Expected "none", "hnsw", or "ivfflat".`);
|
|
288
|
+
};
|
|
289
|
+
var normalizePositiveInteger = (value, fallback) => {
|
|
290
|
+
if (value === undefined || !Number.isFinite(value)) {
|
|
291
|
+
return fallback;
|
|
331
292
|
}
|
|
332
|
-
return
|
|
333
|
-
throw new Error(`${ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME}: createPostgresRAG requires connectionString, client, or clientFactory.`);
|
|
334
|
-
};
|
|
293
|
+
return Math.max(1, Math.floor(value));
|
|
335
294
|
};
|
|
336
|
-
var
|
|
337
|
-
|
|
295
|
+
var getPostgresIndexOperatorClass = (metric) => metric === "cosine" ? "vector_cosine_ops" : metric === "inner_product" ? "vector_ip_ops" : "vector_l2_ops";
|
|
296
|
+
var getPostgresIndexName = (qualifiedTableName, indexType) => indexType === "none" ? undefined : `${qualifiedTableName.replace(".", "_")}_embedding_${indexType}_idx`;
|
|
297
|
+
var buildPostgresIndexSql = (input) => {
|
|
298
|
+
if (input.indexType === "none") {
|
|
338
299
|
return;
|
|
339
300
|
}
|
|
340
|
-
const
|
|
341
|
-
|
|
342
|
-
|
|
301
|
+
const opclass = getPostgresIndexOperatorClass(input.distanceMetric);
|
|
302
|
+
const indexName = getPostgresIndexName(input.qualifiedTableName, input.indexType);
|
|
303
|
+
const optionsSql = input.indexType === "hnsw" ? ` with (m = ${input.hnswM}, ef_construction = ${input.hnswEfConstruction})` : ` with (lists = ${input.indexLists})`;
|
|
304
|
+
const createPrefix = input.ifNotExists === false ? "create index" : "create index if not exists";
|
|
305
|
+
return `${createPrefix} ${indexName} on ${input.qualifiedTableName} using ${input.indexType} (embedding ${opclass})${optionsSql}`;
|
|
306
|
+
};
|
|
307
|
+
var normalizeQueryMultiplier = (value) => {
|
|
308
|
+
if (value === undefined || !Number.isFinite(value)) {
|
|
309
|
+
return DEFAULT_QUERY_MULTIPLIER;
|
|
343
310
|
}
|
|
344
|
-
return
|
|
311
|
+
return Math.min(MAX_QUERY_MULTIPLIER, Math.max(1, Math.floor(value)));
|
|
345
312
|
};
|
|
346
|
-
var
|
|
347
|
-
if (value ===
|
|
313
|
+
var normalizeMaxBackfills = (value) => {
|
|
314
|
+
if (value === undefined || !Number.isFinite(value)) {
|
|
348
315
|
return;
|
|
349
316
|
}
|
|
317
|
+
return Math.max(0, Math.floor(value));
|
|
318
|
+
};
|
|
319
|
+
var normalizeMinResults = (value, topK) => {
|
|
320
|
+
if (value === undefined || !Number.isFinite(value)) {
|
|
321
|
+
return topK;
|
|
322
|
+
}
|
|
323
|
+
return Math.min(topK, Math.max(1, Math.floor(value)));
|
|
324
|
+
};
|
|
325
|
+
var resolveFillTarget = (input) => {
|
|
326
|
+
const fillPolicy = input.fillPolicy ?? "satisfy_min_results";
|
|
327
|
+
return {
|
|
328
|
+
fillPolicy,
|
|
329
|
+
targetResults: fillPolicy === "strict_topk" ? input.topK : input.minResults
|
|
330
|
+
};
|
|
331
|
+
};
|
|
332
|
+
var toQualifiedTableName = (schemaName, tableName) => `${schemaName}.${tableName}`;
|
|
333
|
+
var toVectorLiteral = (vector) => `[${vector.join(",")}]`;
|
|
334
|
+
var parseMetadata = (value) => {
|
|
350
335
|
if (typeof value === "string") {
|
|
351
336
|
try {
|
|
352
337
|
const parsed = JSON.parse(value);
|
|
353
|
-
if (parsed && typeof parsed === "object") {
|
|
338
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
354
339
|
return parsed;
|
|
355
340
|
}
|
|
356
341
|
} catch {
|
|
357
342
|
return;
|
|
358
343
|
}
|
|
359
344
|
}
|
|
360
|
-
if (typeof value
|
|
345
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
346
|
+
return;
|
|
347
|
+
}
|
|
348
|
+
return value;
|
|
349
|
+
};
|
|
350
|
+
var parseVectorText = (value) => {
|
|
351
|
+
if (!value) {
|
|
352
|
+
return [];
|
|
353
|
+
}
|
|
354
|
+
const normalized = value.trim();
|
|
355
|
+
const wrapped = normalized.startsWith("[") ? normalized : `[${normalized.replace(/[()]/g, "")}]`;
|
|
356
|
+
try {
|
|
357
|
+
const parsed = JSON.parse(wrapped);
|
|
358
|
+
return Array.isArray(parsed) ? parsed.filter((entry) => typeof entry === "number" && Number.isFinite(entry)) : [];
|
|
359
|
+
} catch {
|
|
360
|
+
return [];
|
|
361
|
+
}
|
|
362
|
+
};
|
|
363
|
+
var parseCountValue = (value) => {
|
|
364
|
+
if (typeof value === "number" && Number.isFinite(value)) {
|
|
361
365
|
return value;
|
|
362
366
|
}
|
|
363
|
-
|
|
367
|
+
if (typeof value === "bigint") {
|
|
368
|
+
return Number(value);
|
|
369
|
+
}
|
|
370
|
+
if (typeof value === "string") {
|
|
371
|
+
const parsed = Number(value);
|
|
372
|
+
return Number.isFinite(parsed) ? parsed : 0;
|
|
373
|
+
}
|
|
374
|
+
return 0;
|
|
364
375
|
};
|
|
365
|
-
var
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
diagnostics,
|
|
369
|
-
initialized
|
|
370
|
-
}) => ({
|
|
371
|
-
backend: "postgres",
|
|
372
|
-
vectorMode: "native_pgvector",
|
|
373
|
-
dimensions: vector.dimensions,
|
|
374
|
-
native: {
|
|
375
|
-
requested: true,
|
|
376
|
-
available: initialized && !diagnostics.lastInitError,
|
|
377
|
-
active: initialized && !diagnostics.lastInitError,
|
|
378
|
-
mode: "pgvector",
|
|
379
|
-
extensionName: vector.extensionName,
|
|
380
|
-
schemaName: schema.schemaName,
|
|
381
|
-
tableName: schema.chunkTableName,
|
|
382
|
-
distanceMetric: vector.distanceMetric,
|
|
383
|
-
indexType: vector.index.type,
|
|
384
|
-
fallbackReason: diagnostics.fallbackReason,
|
|
385
|
-
lastInitError: diagnostics.lastInitError,
|
|
386
|
-
lastQueryError: diagnostics.lastQueryError,
|
|
387
|
-
lastUpsertError: diagnostics.lastUpsertError,
|
|
388
|
-
lastMigrationError: diagnostics.lastMigrationError
|
|
376
|
+
var parseBooleanValue = (value) => {
|
|
377
|
+
if (typeof value === "boolean") {
|
|
378
|
+
return value;
|
|
389
379
|
}
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
const result = await client.query(`SELECT name FROM ${migrationPlan.migrationTableQualifiedName} ORDER BY name ASC`);
|
|
393
|
-
return new Set(result.rows.map((row) => String(row.name)));
|
|
394
|
-
};
|
|
395
|
-
var insertAppliedMigration = async (client, migrationPlan, name) => {
|
|
396
|
-
await client.query(`INSERT INTO ${migrationPlan.migrationTableQualifiedName} (name) VALUES ($1) ON CONFLICT (name) DO NOTHING`, [name]);
|
|
397
|
-
};
|
|
398
|
-
var executeMigrationSequence = async (client, migrationPlan, migrations) => {
|
|
399
|
-
const appliedNames = [];
|
|
400
|
-
for (const migration of migrations) {
|
|
401
|
-
await client.query(migration.sql);
|
|
402
|
-
await insertAppliedMigration(client, migrationPlan, migration.name);
|
|
403
|
-
appliedNames.push(migration.name);
|
|
404
|
-
}
|
|
405
|
-
return appliedNames;
|
|
406
|
-
};
|
|
407
|
-
var applyPostgresMigrations = async (options, applyOptions = {}) => {
|
|
408
|
-
const migrationPlan = createPostgresMigrationPlan(options ?? {});
|
|
409
|
-
const injectedClient = applyOptions.client;
|
|
410
|
-
const getClient = injectedClient ? async () => injectedClient : resolveClientFactory(options ?? {});
|
|
411
|
-
const client = await getClient();
|
|
412
|
-
for (const sql of migrationPlan.bootstrapSql) {
|
|
413
|
-
await client.query(sql);
|
|
414
|
-
}
|
|
415
|
-
const alreadyApplied = await getAppliedMigrationNames(client, migrationPlan);
|
|
416
|
-
const pendingMigrations = migrationPlan.migrations.filter((migration) => !alreadyApplied.has(migration.name));
|
|
417
|
-
const skippedNames = migrationPlan.migrations.filter((migration) => alreadyApplied.has(migration.name)).map((migration) => migration.name);
|
|
418
|
-
if (applyOptions.dryRun === true) {
|
|
419
|
-
return {
|
|
420
|
-
migrationPlan,
|
|
421
|
-
appliedNames: [],
|
|
422
|
-
skippedNames,
|
|
423
|
-
pendingNames: pendingMigrations.map((migration) => migration.name),
|
|
424
|
-
appliedCount: 0,
|
|
425
|
-
pendingCount: pendingMigrations.length,
|
|
426
|
-
dryRun: true
|
|
427
|
-
};
|
|
380
|
+
if (typeof value === "number") {
|
|
381
|
+
return value !== 0;
|
|
428
382
|
}
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
383
|
+
if (typeof value === "bigint") {
|
|
384
|
+
return value !== 0n;
|
|
385
|
+
}
|
|
386
|
+
if (typeof value === "string") {
|
|
387
|
+
const normalized = value.trim().toLowerCase();
|
|
388
|
+
return normalized === "true" || normalized === "t" || normalized === "1";
|
|
389
|
+
}
|
|
390
|
+
return false;
|
|
391
|
+
};
|
|
392
|
+
var refreshPostgresRuntimeDiagnostics = async (db, nativeDiagnostics, input) => {
|
|
393
|
+
try {
|
|
394
|
+
const rows = await db.unsafe(`select
|
|
395
|
+
c.reltuples::bigint as estimated_row_count,
|
|
396
|
+
pg_relation_size($1::regclass) as table_bytes,
|
|
397
|
+
pg_indexes_size($1::regclass) as index_bytes,
|
|
398
|
+
pg_total_relation_size($1::regclass) as total_bytes,
|
|
399
|
+
exists(
|
|
400
|
+
select 1
|
|
401
|
+
from pg_indexes
|
|
402
|
+
where schemaname = $2
|
|
403
|
+
and tablename = $3
|
|
404
|
+
and indexname = $4
|
|
405
|
+
) as index_present
|
|
406
|
+
from pg_class c
|
|
407
|
+
join pg_namespace n on n.oid = c.relnamespace
|
|
408
|
+
where n.nspname = $2
|
|
409
|
+
and c.relname = $3
|
|
410
|
+
limit 1`, [
|
|
411
|
+
input.qualifiedTableName,
|
|
412
|
+
input.schemaName,
|
|
413
|
+
input.tableName,
|
|
414
|
+
input.indexName ?? ""
|
|
415
|
+
]);
|
|
416
|
+
const row = rows[0];
|
|
417
|
+
nativeDiagnostics.indexName = input.indexName;
|
|
418
|
+
nativeDiagnostics.indexPresent = input.indexName ? parseBooleanValue(row?.index_present) : undefined;
|
|
419
|
+
nativeDiagnostics.estimatedRowCount = parseCountValue(row?.estimated_row_count);
|
|
420
|
+
nativeDiagnostics.tableBytes = parseCountValue(row?.table_bytes);
|
|
421
|
+
nativeDiagnostics.indexBytes = parseCountValue(row?.index_bytes);
|
|
422
|
+
nativeDiagnostics.totalBytes = parseCountValue(row?.total_bytes);
|
|
423
|
+
nativeDiagnostics.lastHealthCheckAt = Date.now();
|
|
424
|
+
nativeDiagnostics.lastHealthError = undefined;
|
|
425
|
+
} catch (error) {
|
|
426
|
+
nativeDiagnostics.lastHealthCheckAt = Date.now();
|
|
427
|
+
nativeDiagnostics.lastHealthError = error instanceof Error ? error.message : String(error);
|
|
428
|
+
}
|
|
429
|
+
};
|
|
430
|
+
var analyzePostgresTable = async (db, nativeDiagnostics, input) => {
|
|
431
|
+
try {
|
|
432
|
+
await db.unsafe(`analyze ${input.qualifiedTableName}`);
|
|
433
|
+
nativeDiagnostics.lastAnalyzeAt = Date.now();
|
|
434
|
+
nativeDiagnostics.lastAnalyzeError = undefined;
|
|
435
|
+
await refreshPostgresRuntimeDiagnostics(db, nativeDiagnostics, input);
|
|
436
|
+
} catch (error) {
|
|
437
|
+
nativeDiagnostics.lastAnalyzeAt = Date.now();
|
|
438
|
+
nativeDiagnostics.lastAnalyzeError = error instanceof Error ? error.message : String(error);
|
|
439
|
+
throw error;
|
|
440
|
+
}
|
|
441
|
+
};
|
|
442
|
+
var rebuildPostgresNativeIndex = async (db, nativeDiagnostics, input) => {
|
|
443
|
+
if (!input.indexName || input.indexType === "none") {
|
|
444
|
+
throw new Error("Postgres native index rebuild is not configured");
|
|
445
|
+
}
|
|
446
|
+
try {
|
|
447
|
+
await db.unsafe(`drop index if exists ${input.indexName}`);
|
|
448
|
+
await db.unsafe(buildPostgresIndexSql({
|
|
449
|
+
distanceMetric: input.distanceMetric,
|
|
450
|
+
hnswEfConstruction: input.hnswEfConstruction,
|
|
451
|
+
hnswM: input.hnswM,
|
|
452
|
+
ifNotExists: false,
|
|
453
|
+
indexLists: input.indexLists,
|
|
454
|
+
indexType: input.indexType,
|
|
455
|
+
qualifiedTableName: input.qualifiedTableName
|
|
456
|
+
}));
|
|
457
|
+
nativeDiagnostics.lastReindexAt = Date.now();
|
|
458
|
+
nativeDiagnostics.lastReindexError = undefined;
|
|
459
|
+
await analyzePostgresTable(db, nativeDiagnostics, input);
|
|
460
|
+
} catch (error) {
|
|
461
|
+
nativeDiagnostics.lastReindexAt = Date.now();
|
|
462
|
+
nativeDiagnostics.lastReindexError = error instanceof Error ? error.message : String(error);
|
|
463
|
+
throw error;
|
|
464
|
+
}
|
|
465
|
+
};
|
|
466
|
+
var getPostgresChunkIdsByChunkIds = async (db, qualifiedTableName, chunkIds) => {
|
|
467
|
+
const normalized = [...new Set(chunkIds)].filter((chunkId) => chunkId.length > 0);
|
|
468
|
+
if (normalized.length === 0) {
|
|
469
|
+
return [];
|
|
470
|
+
}
|
|
471
|
+
const placeholders = normalized.map((_, index) => `$${index + 1}`).join(", ");
|
|
472
|
+
const rows = await db.unsafe(`select chunk_id from ${qualifiedTableName} where chunk_id in (${placeholders})`, normalized);
|
|
473
|
+
return rows.map((row) => row.chunk_id).filter((chunkId) => typeof chunkId === "string");
|
|
474
|
+
};
|
|
475
|
+
var getPostgresCandidateChunkIdsByFilter = async (db, qualifiedTableName, filter) => {
|
|
476
|
+
if (!filter || Object.keys(filter).length === 0) {
|
|
477
|
+
return [];
|
|
478
|
+
}
|
|
479
|
+
const pushdownFilter = buildPostgresPushdownFilter(filter);
|
|
480
|
+
const filterPlan = buildPostgresFilterPlan(pushdownFilter);
|
|
481
|
+
const rowsSql = filterPlan?.clause ? `select chunk_id, text, title, source, metadata from ${qualifiedTableName} where ${filterPlan.clause}` : `select chunk_id, text, title, source, metadata from ${qualifiedTableName}`;
|
|
482
|
+
const rows = await db.unsafe(rowsSql, filterPlan?.clause ? filterPlan.params ?? [] : []);
|
|
483
|
+
const chunks = rows.map((row) => mapRowToChunk(row)).filter((chunk) => matchesFilter(chunk, filter));
|
|
484
|
+
return chunks.map((chunk) => chunk.chunkId);
|
|
485
|
+
};
|
|
486
|
+
var getPostgresCandidateChunkIds = async (db, qualifiedTableName, input) => {
|
|
487
|
+
const chunkIdSet = new Set;
|
|
488
|
+
if (input.filter && Object.keys(input.filter).length > 0) {
|
|
489
|
+
for (const chunkId of await getPostgresCandidateChunkIdsByFilter(db, qualifiedTableName, input.filter)) {
|
|
490
|
+
chunkIdSet.add(chunkId);
|
|
464
491
|
}
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
await applyPostgresMigrations(options ?? {}, { client });
|
|
470
|
-
initialized = true;
|
|
471
|
-
diagnostics.lastInitError = undefined;
|
|
472
|
-
diagnostics.lastMigrationError = undefined;
|
|
473
|
-
diagnostics.fallbackReason = undefined;
|
|
474
|
-
} catch (error) {
|
|
475
|
-
initialized = false;
|
|
476
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
477
|
-
diagnostics.lastInitError = message;
|
|
478
|
-
diagnostics.lastMigrationError = message;
|
|
479
|
-
diagnostics.fallbackReason = message;
|
|
480
|
-
throw error;
|
|
481
|
-
}
|
|
482
|
-
})();
|
|
492
|
+
}
|
|
493
|
+
if (input.chunkIds && input.chunkIds.length > 0) {
|
|
494
|
+
for (const chunkId of await getPostgresChunkIdsByChunkIds(db, qualifiedTableName, input.chunkIds)) {
|
|
495
|
+
chunkIdSet.add(chunkId);
|
|
483
496
|
}
|
|
484
|
-
|
|
497
|
+
}
|
|
498
|
+
return [...chunkIdSet];
|
|
499
|
+
};
|
|
500
|
+
var normalizeDistance = (distance, metric) => {
|
|
501
|
+
if (!Number.isFinite(distance)) {
|
|
502
|
+
return 0;
|
|
503
|
+
}
|
|
504
|
+
if (metric === "cosine") {
|
|
505
|
+
return Math.min(1, Math.max(0, 1 - distance));
|
|
506
|
+
}
|
|
507
|
+
if (metric === "inner_product") {
|
|
508
|
+
return Math.max(0, -distance);
|
|
509
|
+
}
|
|
510
|
+
return Math.max(0, 1 / (1 + Math.abs(distance)));
|
|
511
|
+
};
|
|
512
|
+
var getDistanceOperator = (metric) => metric === "cosine" ? "<=>" : metric === "inner_product" ? "<#>" : "<->";
|
|
513
|
+
var createPostgresStatus = (dimensions, nativeDiagnostics) => ({
|
|
514
|
+
backend: "postgres",
|
|
515
|
+
dimensions,
|
|
516
|
+
native: nativeDiagnostics,
|
|
517
|
+
vectorMode: "native_pgvector"
|
|
518
|
+
});
|
|
519
|
+
var createPostgresCapabilities = () => ({
|
|
520
|
+
backend: "postgres",
|
|
521
|
+
nativeVectorSearch: true,
|
|
522
|
+
persistence: "external",
|
|
523
|
+
serverSideFiltering: true,
|
|
524
|
+
streamingIngestStatus: false
|
|
525
|
+
});
|
|
526
|
+
var updatePostgresLastQueryPlan = (input) => {
|
|
527
|
+
const pushdown = resolvePostgresPushdownMode({
|
|
528
|
+
filter: input.filter,
|
|
529
|
+
pushdownFilter: input.pushdownFilter
|
|
530
|
+
});
|
|
531
|
+
input.nativeDiagnostics.lastQueryPlan = {
|
|
532
|
+
backfillCount: input.backfillCount,
|
|
533
|
+
candidateBudgetExhausted: input.candidateBudgetExhausted,
|
|
534
|
+
candidateCoverage: summarizeSQLiteCandidateCoverage({
|
|
535
|
+
filteredCandidateCount: input.filteredCandidateCount,
|
|
536
|
+
returnedCount: input.returnedCount,
|
|
537
|
+
topK: input.topK
|
|
538
|
+
}),
|
|
539
|
+
filteredCandidateCount: input.filteredCandidateCount,
|
|
540
|
+
finalSearchK: input.finalSearchK,
|
|
541
|
+
initialSearchK: input.initialSearchK,
|
|
542
|
+
searchExpansionRatio: typeof input.initialSearchK === "number" && typeof input.finalSearchK === "number" && input.initialSearchK > 0 ? input.finalSearchK / input.initialSearchK : undefined,
|
|
543
|
+
candidateLimitUsed: input.candidateLimitUsed,
|
|
544
|
+
maxBackfillsUsed: input.maxBackfillsUsed,
|
|
545
|
+
minResultsUsed: input.minResultsUsed,
|
|
546
|
+
fillPolicyUsed: input.fillPolicyUsed,
|
|
547
|
+
plannerProfileUsed: input.plannerProfileUsed,
|
|
548
|
+
jsRemainderClauseCount: pushdown.jsRemainderClauseCount,
|
|
549
|
+
queryMultiplierUsed: input.queryMultiplierUsed,
|
|
550
|
+
jsRemainderRatio: pushdown.jsRemainderRatio,
|
|
551
|
+
pushdownApplied: pushdown.pushdownClauseCount > 0,
|
|
552
|
+
pushdownClauseCount: pushdown.pushdownClauseCount,
|
|
553
|
+
pushdownCoverageRatio: pushdown.pushdownCoverageRatio,
|
|
554
|
+
pushdownMode: pushdown.pushdownMode,
|
|
555
|
+
queryMode: "native_pgvector",
|
|
556
|
+
candidateYieldRatio: typeof input.returnedCount === "number" && typeof input.finalSearchK === "number" && input.finalSearchK > 0 ? input.returnedCount / input.finalSearchK : undefined,
|
|
557
|
+
returnedCount: input.returnedCount,
|
|
558
|
+
backfillLimitReached: input.backfillLimitReached,
|
|
559
|
+
minResultsSatisfied: input.minResultsSatisfied,
|
|
560
|
+
topKFillRatio: typeof input.returnedCount === "number" && input.topK > 0 ? input.returnedCount / input.topK : undefined,
|
|
561
|
+
totalFilterClauseCount: pushdown.totalFilterClauseCount,
|
|
562
|
+
underfilledTopK: input.underfilledTopK
|
|
563
|
+
};
|
|
564
|
+
};
|
|
565
|
+
var matchesFilter = (record, filter) => matchesMetadataFilterRecord({
|
|
566
|
+
chunkId: record.chunkId,
|
|
567
|
+
metadata: record.metadata,
|
|
568
|
+
source: record.source,
|
|
569
|
+
title: record.title,
|
|
570
|
+
...record.metadata ?? {}
|
|
571
|
+
}, filter);
|
|
572
|
+
var mapRowToChunk = (row) => ({
|
|
573
|
+
chunkId: row.chunk_id,
|
|
574
|
+
metadata: parseMetadata(row.metadata),
|
|
575
|
+
source: row.source ?? undefined,
|
|
576
|
+
text: row.text,
|
|
577
|
+
title: row.title ?? undefined,
|
|
578
|
+
vector: parseVectorText(row.embedding)
|
|
579
|
+
});
|
|
580
|
+
var ensurePostgresSchema = async (db, input) => {
|
|
581
|
+
await db.unsafe("create extension if not exists vector");
|
|
582
|
+
const [schemaName] = input.qualifiedTableName.split(".");
|
|
583
|
+
if (schemaName) {
|
|
584
|
+
await db.unsafe(`create schema if not exists ${schemaName}`);
|
|
585
|
+
}
|
|
586
|
+
await db.unsafe(`
|
|
587
|
+
create table if not exists ${input.qualifiedTableName} (
|
|
588
|
+
chunk_id text primary key,
|
|
589
|
+
text text not null,
|
|
590
|
+
title text,
|
|
591
|
+
source text,
|
|
592
|
+
metadata jsonb,
|
|
593
|
+
embedding vector(${input.dimensions}) not null
|
|
594
|
+
)
|
|
595
|
+
`);
|
|
596
|
+
const indexSql = buildPostgresIndexSql(input);
|
|
597
|
+
if (indexSql) {
|
|
598
|
+
await db.unsafe(indexSql);
|
|
599
|
+
}
|
|
600
|
+
};
|
|
601
|
+
var createPostgresRAGStore = (options = {}) => {
|
|
602
|
+
const dimensions = options.dimensions ?? DEFAULT_DIMENSIONS;
|
|
603
|
+
const distanceMetric = options.distanceMetric ?? "cosine";
|
|
604
|
+
const queryMultiplier = normalizeQueryMultiplier(options.queryMultiplier);
|
|
605
|
+
const indexType = normalizePostgresIndexType(options.indexType);
|
|
606
|
+
const indexLists = normalizePositiveInteger(options.indexLists, DEFAULT_POSTGRES_IVFFLAT_LISTS);
|
|
607
|
+
const hnswM = normalizePositiveInteger(options.hnswM, DEFAULT_POSTGRES_HNSW_M);
|
|
608
|
+
const hnswEfConstruction = normalizePositiveInteger(options.hnswEfConstruction, DEFAULT_POSTGRES_HNSW_EF_CONSTRUCTION);
|
|
609
|
+
const tableName = options.tableName ?? DEFAULT_TABLE_NAME;
|
|
610
|
+
const schemaName = options.schemaName ?? DEFAULT_SCHEMA_NAME;
|
|
611
|
+
assertSupportedIdentifier(tableName);
|
|
612
|
+
assertSupportedIdentifier(schemaName);
|
|
613
|
+
const qualifiedTableName = toQualifiedTableName(schemaName, tableName);
|
|
614
|
+
const indexName = getPostgresIndexName(qualifiedTableName, indexType);
|
|
615
|
+
const db = options.sql ?? new Bun.SQL(options.connectionString ?? process.env.RAG_POSTGRES_URL ?? process.env.DATABASE_URL ?? "postgres://postgres:postgres@localhost:55433/absolute_rag_demo");
|
|
616
|
+
const nativeDiagnostics = {
|
|
617
|
+
active: true,
|
|
618
|
+
available: true,
|
|
619
|
+
distanceMetric,
|
|
620
|
+
extensionName: "vector",
|
|
621
|
+
indexName,
|
|
622
|
+
indexType,
|
|
623
|
+
mode: "pgvector",
|
|
624
|
+
requested: true,
|
|
625
|
+
schemaName,
|
|
626
|
+
tableName
|
|
627
|
+
};
|
|
628
|
+
const capabilities = createPostgresCapabilities();
|
|
629
|
+
const distanceOperator = getDistanceOperator(distanceMetric);
|
|
630
|
+
let initialized;
|
|
631
|
+
const init = () => {
|
|
632
|
+
initialized ??= ensurePostgresSchema(db, {
|
|
633
|
+
dimensions,
|
|
634
|
+
distanceMetric,
|
|
635
|
+
hnswEfConstruction,
|
|
636
|
+
hnswM,
|
|
637
|
+
indexLists,
|
|
638
|
+
indexType,
|
|
639
|
+
qualifiedTableName
|
|
640
|
+
}).then(() => refreshPostgresRuntimeDiagnostics(db, nativeDiagnostics, {
|
|
641
|
+
indexName,
|
|
642
|
+
qualifiedTableName,
|
|
643
|
+
schemaName,
|
|
644
|
+
tableName
|
|
645
|
+
})).catch((error) => {
|
|
646
|
+
nativeDiagnostics.active = false;
|
|
647
|
+
nativeDiagnostics.available = false;
|
|
648
|
+
nativeDiagnostics.lastInitError = error instanceof Error ? error.message : String(error);
|
|
649
|
+
nativeDiagnostics.lastMigrationError = error instanceof Error ? error.message : String(error);
|
|
650
|
+
nativeDiagnostics.fallbackReason = nativeDiagnostics.lastInitError;
|
|
651
|
+
throw error;
|
|
652
|
+
});
|
|
653
|
+
return initialized;
|
|
485
654
|
};
|
|
486
655
|
const embed = async (input) => {
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
656
|
+
input.model;
|
|
657
|
+
input.signal;
|
|
658
|
+
if (options.mockEmbedding) {
|
|
659
|
+
return options.mockEmbedding(input.text);
|
|
490
660
|
}
|
|
491
|
-
return normalizeVector(
|
|
492
|
-
...createRAGVector(input.text, vector.dimensions)
|
|
493
|
-
]);
|
|
661
|
+
return normalizeVector(createRAGVector(input.text, dimensions));
|
|
494
662
|
};
|
|
495
663
|
const query = async (input) => {
|
|
496
|
-
await
|
|
497
|
-
const
|
|
498
|
-
const
|
|
499
|
-
const
|
|
500
|
-
const
|
|
501
|
-
const
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
664
|
+
await init();
|
|
665
|
+
const queryVector = normalizeVector(input.queryVector);
|
|
666
|
+
const queryMultiplier2 = normalizeQueryMultiplier(input.queryMultiplier ?? options.queryMultiplier);
|
|
667
|
+
const maxBackfills = normalizeMaxBackfills(input.maxBackfills);
|
|
668
|
+
const minResults = normalizeMinResults(input.minResults, input.topK);
|
|
669
|
+
const fillTarget = resolveFillTarget({
|
|
670
|
+
fillPolicy: input.fillPolicy,
|
|
671
|
+
minResults,
|
|
672
|
+
topK: input.topK
|
|
673
|
+
});
|
|
674
|
+
const queryVectorLiteral = toVectorLiteral(queryVector);
|
|
675
|
+
const pushdownFilter = buildPostgresPushdownFilter(input.filter);
|
|
676
|
+
const queryFilterPlan = buildPostgresFilterPlan(pushdownFilter);
|
|
677
|
+
const effectivePushdownFilter = queryFilterPlan ? pushdownFilter : undefined;
|
|
678
|
+
const countFilterPlan = queryFilterPlan;
|
|
679
|
+
const countSql = countFilterPlan?.clause ? `select count(*)::int as count from ${qualifiedTableName} where ${countFilterPlan.clause}` : `select count(*)::int as count from ${qualifiedTableName}`;
|
|
680
|
+
const totalRowsResult = await db.unsafe(countSql, countFilterPlan?.params ?? []);
|
|
681
|
+
nativeDiagnostics.lastFilterDebug = {
|
|
682
|
+
countParams: countFilterPlan?.params ?? [],
|
|
683
|
+
countResultRaw: totalRowsResult?.[0],
|
|
684
|
+
countSql,
|
|
685
|
+
filter: input.filter,
|
|
686
|
+
pushdownFilter: effectivePushdownFilter
|
|
687
|
+
};
|
|
688
|
+
const totalRows = parseCountValue(totalRowsResult?.[0]?.count);
|
|
689
|
+
const candidateLimit = resolveAdaptiveNativeCandidateLimit({
|
|
690
|
+
defaultCandidateLimit: RAG_NATIVE_QUERY_CANDIDATE_LIMIT,
|
|
691
|
+
explicitCandidateLimit: input.candidateLimit,
|
|
692
|
+
filteredCandidateCount: totalRows,
|
|
693
|
+
plannerProfile: input.plannerProfile,
|
|
694
|
+
queryMultiplier: queryMultiplier2,
|
|
695
|
+
topK: input.topK
|
|
696
|
+
});
|
|
697
|
+
const hasPushdownFilter = Boolean(effectivePushdownFilter);
|
|
698
|
+
const plannedFilteredCandidateCount = hasPushdownFilter && totalRows === 0 ? undefined : totalRows;
|
|
699
|
+
const initialSearchK = planNativeCandidateSearchK({
|
|
700
|
+
candidateLimit,
|
|
701
|
+
filteredCandidateCount: plannedFilteredCandidateCount,
|
|
702
|
+
queryMultiplier: queryMultiplier2,
|
|
703
|
+
topK: input.topK
|
|
704
|
+
});
|
|
705
|
+
if (initialSearchK === 0) {
|
|
706
|
+
return [];
|
|
517
707
|
}
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
708
|
+
let currentSearchK = initialSearchK;
|
|
709
|
+
let backfillCount = 0;
|
|
710
|
+
let candidateBudgetExhausted = false;
|
|
711
|
+
let backfillLimitReached = false;
|
|
712
|
+
let effectiveFilteredCandidateCount = plannedFilteredCandidateCount;
|
|
713
|
+
let mapped = [];
|
|
714
|
+
for (;; ) {
|
|
715
|
+
const rowsSql = queryFilterPlan?.clause ? `select chunk_id, text, title, source, metadata, embedding::text as embedding, embedding ${distanceOperator} '${queryVectorLiteral}'::vector as distance from ${qualifiedTableName} where ${queryFilterPlan.clause} order by embedding ${distanceOperator} '${queryVectorLiteral}'::vector limit $${queryFilterPlan.params.length + 1}` : `select chunk_id, text, title, source, metadata, embedding::text as embedding, embedding ${distanceOperator} '${queryVectorLiteral}'::vector as distance from ${qualifiedTableName} order by embedding ${distanceOperator} '${queryVectorLiteral}'::vector limit $1`;
|
|
716
|
+
const rows = await db.unsafe(rowsSql, queryFilterPlan?.clause ? [...queryFilterPlan.params ?? [], currentSearchK] : [currentSearchK]);
|
|
717
|
+
nativeDiagnostics.lastFilterDebug = {
|
|
718
|
+
...nativeDiagnostics.lastFilterDebug,
|
|
719
|
+
queryParams: queryFilterPlan?.clause ? [...queryFilterPlan.params ?? [], currentSearchK] : [currentSearchK],
|
|
720
|
+
queryRowCount: rows.length,
|
|
721
|
+
querySql: rowsSql
|
|
722
|
+
};
|
|
723
|
+
if (hasPushdownFilter && effectiveFilteredCandidateCount === undefined && rows.length <= currentSearchK) {
|
|
724
|
+
effectiveFilteredCandidateCount = rows.length;
|
|
524
725
|
}
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
726
|
+
mapped = rows.map((row) => {
|
|
727
|
+
const chunk = mapRowToChunk(row);
|
|
728
|
+
return {
|
|
729
|
+
chunk,
|
|
730
|
+
score: normalizeDistance(Number(row.distance ?? 0), distanceMetric)
|
|
731
|
+
};
|
|
732
|
+
}).filter(({ chunk }) => matchesFilter(chunk, input.filter)).map((entry) => ({
|
|
733
|
+
chunkId: entry.chunk.chunkId,
|
|
734
|
+
chunkText: entry.chunk.text,
|
|
735
|
+
embedding: entry.chunk.vector,
|
|
736
|
+
metadata: entry.chunk.metadata,
|
|
737
|
+
score: entry.score,
|
|
738
|
+
source: entry.chunk.source,
|
|
739
|
+
title: entry.chunk.title
|
|
740
|
+
})).sort((left, right) => right.score - left.score);
|
|
741
|
+
if (mapped.length >= fillTarget.targetResults) {
|
|
742
|
+
break;
|
|
743
|
+
}
|
|
744
|
+
const nextSearchK = planNativeCandidateSearchBackfillK({
|
|
745
|
+
backfillCount,
|
|
746
|
+
candidateLimit,
|
|
747
|
+
currentSearchK,
|
|
748
|
+
filteredCandidateCount: effectiveFilteredCandidateCount,
|
|
749
|
+
maxBackfills
|
|
750
|
+
});
|
|
751
|
+
if (nextSearchK <= currentSearchK) {
|
|
752
|
+
backfillLimitReached = typeof maxBackfills === "number" && backfillCount >= maxBackfills && mapped.length < fillTarget.targetResults;
|
|
753
|
+
candidateBudgetExhausted = mapped.length < fillTarget.targetResults;
|
|
754
|
+
break;
|
|
755
|
+
}
|
|
756
|
+
currentSearchK = nextSearchK;
|
|
757
|
+
backfillCount += 1;
|
|
537
758
|
}
|
|
759
|
+
nativeDiagnostics.lastQueryError = undefined;
|
|
760
|
+
const returned = mapped.slice(0, input.topK);
|
|
761
|
+
updatePostgresLastQueryPlan({
|
|
762
|
+
backfillCount,
|
|
763
|
+
backfillLimitReached,
|
|
764
|
+
candidateBudgetExhausted,
|
|
765
|
+
candidateLimitUsed: candidateLimit,
|
|
766
|
+
maxBackfillsUsed: maxBackfills,
|
|
767
|
+
minResultsUsed: minResults,
|
|
768
|
+
fillPolicyUsed: fillTarget.fillPolicy,
|
|
769
|
+
plannerProfileUsed: input.plannerProfile,
|
|
770
|
+
filter: input.filter,
|
|
771
|
+
pushdownFilter: effectivePushdownFilter,
|
|
772
|
+
queryMultiplierUsed: queryMultiplier2,
|
|
773
|
+
filteredCandidateCount: effectiveFilteredCandidateCount,
|
|
774
|
+
finalSearchK: currentSearchK,
|
|
775
|
+
initialSearchK,
|
|
776
|
+
nativeDiagnostics,
|
|
777
|
+
minResultsSatisfied: returned.length >= minResults,
|
|
778
|
+
returnedCount: returned.length,
|
|
779
|
+
topK: input.topK,
|
|
780
|
+
underfilledTopK: returned.length < input.topK
|
|
781
|
+
});
|
|
782
|
+
return returned;
|
|
783
|
+
};
|
|
784
|
+
const queryLexical = async (input) => {
|
|
785
|
+
await init();
|
|
786
|
+
const pushdownFilter = buildPostgresPushdownFilter(input.filter);
|
|
787
|
+
const lexicalFilterPlan = buildPostgresFilterPlan(pushdownFilter);
|
|
788
|
+
const rowsSql = lexicalFilterPlan?.clause ? `select chunk_id, text, title, source, metadata from ${qualifiedTableName} where ${lexicalFilterPlan.clause}` : `select chunk_id, text, title, source, metadata from ${qualifiedTableName}`;
|
|
789
|
+
const rows = await db.unsafe(rowsSql, lexicalFilterPlan?.params ?? []);
|
|
790
|
+
const chunks = rows.map((row) => mapRowToChunk(row)).filter((chunk) => matchesFilter(chunk, input.filter));
|
|
791
|
+
const ranked = rankRAGLexicalMatches(input.query, chunks);
|
|
792
|
+
return ranked.slice(0, input.topK).map(({ result, score }) => ({
|
|
793
|
+
chunkId: result.chunkId,
|
|
794
|
+
chunkText: result.text,
|
|
795
|
+
metadata: result.metadata,
|
|
796
|
+
score,
|
|
797
|
+
source: result.source,
|
|
798
|
+
title: result.title
|
|
799
|
+
}));
|
|
538
800
|
};
|
|
539
801
|
const upsert = async (input) => {
|
|
540
|
-
await
|
|
541
|
-
const
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
802
|
+
await init();
|
|
803
|
+
const chunks = input.chunks.length > 0 ? await Promise.all(input.chunks.map(async (chunk) => ({
|
|
804
|
+
chunkId: chunk.chunkId,
|
|
805
|
+
metadata: chunk.metadata,
|
|
806
|
+
source: chunk.source,
|
|
807
|
+
text: chunk.text,
|
|
808
|
+
title: chunk.title,
|
|
809
|
+
vector: chunk.embedding ? normalizeVector(chunk.embedding) : normalizeVector(await embed({ text: chunk.text }))
|
|
810
|
+
}))) : [];
|
|
811
|
+
for (const chunk of chunks) {
|
|
812
|
+
await db.unsafe(`insert into ${qualifiedTableName} (chunk_id, text, title, source, metadata, embedding)
|
|
813
|
+
values ($1, $2, $3, $4, $5::jsonb, $6::vector)
|
|
814
|
+
on conflict (chunk_id) do update set
|
|
815
|
+
text = excluded.text,
|
|
816
|
+
title = excluded.title,
|
|
817
|
+
source = excluded.source,
|
|
818
|
+
metadata = excluded.metadata,
|
|
819
|
+
embedding = excluded.embedding`, [
|
|
820
|
+
chunk.chunkId,
|
|
821
|
+
chunk.text,
|
|
822
|
+
chunk.title ?? null,
|
|
823
|
+
chunk.source ?? null,
|
|
824
|
+
chunk.metadata ?? null,
|
|
825
|
+
toVectorLiteral(chunk.vector)
|
|
826
|
+
]);
|
|
827
|
+
}
|
|
828
|
+
await refreshPostgresRuntimeDiagnostics(db, nativeDiagnostics, {
|
|
829
|
+
indexName,
|
|
830
|
+
qualifiedTableName,
|
|
831
|
+
schemaName,
|
|
832
|
+
tableName
|
|
833
|
+
});
|
|
834
|
+
};
|
|
835
|
+
const count = async (input = {}) => {
|
|
836
|
+
await init();
|
|
837
|
+
const filter = input.filter;
|
|
838
|
+
const chunkIds = input.chunkIds;
|
|
839
|
+
const hasFilter = Boolean(filter && Object.keys(filter).length > 0);
|
|
840
|
+
const hasChunkIds = Boolean(chunkIds && chunkIds.length > 0);
|
|
841
|
+
if (!hasFilter && !hasChunkIds) {
|
|
842
|
+
const countResult = await db.unsafe(`select count(*)::int as count from ${qualifiedTableName}`);
|
|
843
|
+
return parseCountValue(countResult[0]?.count);
|
|
559
844
|
}
|
|
845
|
+
return (await getPostgresCandidateChunkIds(db, qualifiedTableName, {
|
|
846
|
+
filter,
|
|
847
|
+
chunkIds
|
|
848
|
+
})).length;
|
|
849
|
+
};
|
|
850
|
+
const remove = async (input = {}) => {
|
|
851
|
+
await init();
|
|
852
|
+
const filter = input.filter;
|
|
853
|
+
const chunkIds = input.chunkIds;
|
|
854
|
+
const hasFilter = Boolean(filter && Object.keys(filter).length > 0);
|
|
855
|
+
const hasChunkIds = Boolean(chunkIds && chunkIds.length > 0);
|
|
856
|
+
if (!hasFilter && !hasChunkIds) {
|
|
857
|
+
return 0;
|
|
858
|
+
}
|
|
859
|
+
const ids = await getPostgresCandidateChunkIds(db, qualifiedTableName, {
|
|
860
|
+
filter,
|
|
861
|
+
chunkIds
|
|
862
|
+
});
|
|
863
|
+
if (ids.length === 0) {
|
|
864
|
+
return 0;
|
|
865
|
+
}
|
|
866
|
+
const placeholders = ids.map((_, index) => `$${index + 1}`).join(", ");
|
|
867
|
+
await db.unsafe(`delete from ${qualifiedTableName} where chunk_id in (${placeholders})`, ids);
|
|
868
|
+
await refreshPostgresRuntimeDiagnostics(db, nativeDiagnostics, {
|
|
869
|
+
indexName,
|
|
870
|
+
qualifiedTableName,
|
|
871
|
+
schemaName,
|
|
872
|
+
tableName
|
|
873
|
+
});
|
|
874
|
+
return ids.length;
|
|
560
875
|
};
|
|
561
876
|
const clear = async () => {
|
|
562
|
-
await
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
877
|
+
await init();
|
|
878
|
+
await db.unsafe(`truncate table ${qualifiedTableName}`);
|
|
879
|
+
await refreshPostgresRuntimeDiagnostics(db, nativeDiagnostics, {
|
|
880
|
+
indexName,
|
|
881
|
+
qualifiedTableName,
|
|
882
|
+
schemaName,
|
|
883
|
+
tableName
|
|
884
|
+
});
|
|
885
|
+
};
|
|
886
|
+
const analyze = async () => {
|
|
887
|
+
await init();
|
|
888
|
+
await analyzePostgresTable(db, nativeDiagnostics, {
|
|
889
|
+
indexName,
|
|
890
|
+
qualifiedTableName,
|
|
891
|
+
schemaName,
|
|
892
|
+
tableName
|
|
893
|
+
});
|
|
894
|
+
};
|
|
895
|
+
const rebuildNativeIndex = async () => {
|
|
896
|
+
await init();
|
|
897
|
+
await rebuildPostgresNativeIndex(db, nativeDiagnostics, {
|
|
898
|
+
distanceMetric,
|
|
899
|
+
hnswEfConstruction,
|
|
900
|
+
hnswM,
|
|
901
|
+
indexLists,
|
|
902
|
+
indexName,
|
|
903
|
+
indexType,
|
|
904
|
+
qualifiedTableName,
|
|
905
|
+
schemaName,
|
|
906
|
+
tableName
|
|
907
|
+
});
|
|
908
|
+
};
|
|
909
|
+
const close = async () => {
|
|
910
|
+
await db.close?.();
|
|
566
911
|
};
|
|
567
912
|
return {
|
|
913
|
+
analyze,
|
|
914
|
+
clear,
|
|
915
|
+
close,
|
|
568
916
|
embed,
|
|
917
|
+
getCapabilities: () => capabilities,
|
|
918
|
+
getStatus: () => createPostgresStatus(dimensions, nativeDiagnostics),
|
|
569
919
|
query,
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
nativeVectorSearch: true,
|
|
576
|
-
serverSideFiltering: true,
|
|
577
|
-
streamingIngestStatus: false
|
|
578
|
-
}),
|
|
579
|
-
getStatus: () => createPgvectorStoreStatus({
|
|
580
|
-
vector,
|
|
581
|
-
schema,
|
|
582
|
-
diagnostics,
|
|
583
|
-
initialized
|
|
584
|
-
})
|
|
920
|
+
queryLexical,
|
|
921
|
+
rebuildNativeIndex: indexName ? rebuildNativeIndex : undefined,
|
|
922
|
+
count,
|
|
923
|
+
delete: remove,
|
|
924
|
+
upsert
|
|
585
925
|
};
|
|
586
926
|
};
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
var
|
|
591
|
-
const store =
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
927
|
+
|
|
928
|
+
// src/index.ts
|
|
929
|
+
var ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME = "@absolutejs/rag-postgres";
|
|
930
|
+
var createPostgresRAGCollection = (options = {}) => {
|
|
931
|
+
const store = options.store ?? createPostgresRAGStore(options.storeOptions ?? {});
|
|
932
|
+
return createRAGCollection({ store });
|
|
933
|
+
};
|
|
934
|
+
var createPostgresRAG = (options = {}) => {
|
|
935
|
+
const store = options.store ?? createPostgresRAGStore(options.storeOptions ?? {});
|
|
936
|
+
const collection = options.collection ?? createRAGCollection({ store });
|
|
595
937
|
return {
|
|
596
938
|
store,
|
|
597
939
|
collection,
|
|
598
|
-
getStatus: () => store.getStatus?.(),
|
|
599
|
-
getCapabilities: () => store.getCapabilities?.()
|
|
600
|
-
getSchemaPlan: () => schemaPlan,
|
|
601
|
-
getMigrationPlan: () => migrationPlan,
|
|
602
|
-
applyMigrations: (applyOptions) => applyPostgresMigrations(options, applyOptions)
|
|
940
|
+
getStatus: () => collection.getStatus?.() ?? store.getStatus?.(),
|
|
941
|
+
getCapabilities: () => collection.getCapabilities?.() ?? store.getCapabilities?.()
|
|
603
942
|
};
|
|
604
943
|
};
|
|
605
944
|
var createPostgreSQLRAG = createPostgresRAG;
|
|
606
945
|
export {
|
|
607
|
-
|
|
946
|
+
ragPlugin,
|
|
947
|
+
createRAGCollection,
|
|
948
|
+
createPostgresRAGStore,
|
|
608
949
|
createPostgresRAGCollection,
|
|
609
950
|
createPostgresRAG,
|
|
610
|
-
createPostgresMigrationPlan,
|
|
611
951
|
createPostgreSQLRAG,
|
|
612
|
-
createPgvectorStore,
|
|
613
|
-
applyPostgresSchemaPlan,
|
|
614
|
-
applyPostgresMigrations,
|
|
615
|
-
POSTGRESQL_RAG_IMPLEMENTATIONS,
|
|
616
|
-
PGVECTOR_INDEX_TYPES,
|
|
617
|
-
PGVECTOR_DISTANCE_METRICS,
|
|
618
952
|
ABSOLUTE_POSTGRESQL_RAG_PACKAGE_NAME
|
|
619
953
|
};
|
|
620
954
|
|
|
621
|
-
//# debugId=
|
|
955
|
+
//# debugId=705798FA59AD060364756E2164756E21
|
|
622
956
|
//# sourceMappingURL=index.js.map
|