ydb-qdrant 6.0.0 → 8.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config/env.d.ts +0 -3
- package/dist/config/env.js +0 -17
- package/dist/package/api.d.ts +3 -0
- package/dist/qdrant/QdrantRestTypes.d.ts +35 -0
- package/dist/qdrant/QdrantRestTypes.js +1 -0
- package/dist/repositories/collectionsRepo.one-table.js +37 -63
- package/dist/repositories/collectionsRepo.shared.js +8 -2
- package/dist/repositories/pointsRepo.d.ts +5 -11
- package/dist/repositories/pointsRepo.js +6 -3
- package/dist/repositories/pointsRepo.one-table/Delete.d.ts +2 -0
- package/dist/repositories/pointsRepo.one-table/Delete.js +166 -0
- package/dist/repositories/pointsRepo.one-table/PathSegmentsFilter.d.ts +14 -0
- package/dist/repositories/pointsRepo.one-table/PathSegmentsFilter.js +33 -0
- package/dist/repositories/pointsRepo.one-table/Search.d.ts +4 -0
- package/dist/repositories/pointsRepo.one-table/Search.js +208 -0
- package/dist/repositories/pointsRepo.one-table/Upsert.d.ts +2 -0
- package/dist/repositories/pointsRepo.one-table/Upsert.js +85 -0
- package/dist/repositories/pointsRepo.one-table.d.ts +3 -13
- package/dist/repositories/pointsRepo.one-table.js +3 -403
- package/dist/routes/points.js +17 -4
- package/dist/server.d.ts +1 -0
- package/dist/server.js +70 -2
- package/dist/services/CollectionService.d.ts +9 -0
- package/dist/services/CollectionService.js +9 -0
- package/dist/services/PointsService.d.ts +3 -10
- package/dist/services/PointsService.js +73 -3
- package/dist/types.d.ts +59 -5
- package/dist/types.js +27 -3
- package/dist/utils/normalization.d.ts +1 -0
- package/dist/utils/normalization.js +2 -1
- package/dist/utils/vectorBinary.js +94 -10
- package/dist/ydb/bootstrapMetaTable.d.ts +7 -0
- package/dist/ydb/bootstrapMetaTable.js +75 -0
- package/dist/ydb/client.d.ts +10 -3
- package/dist/ydb/client.js +26 -2
- package/dist/ydb/helpers.d.ts +0 -2
- package/dist/ydb/helpers.js +0 -7
- package/dist/ydb/schema.js +100 -66
- package/package.json +3 -6
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import { Types, TypedValues, withSession, createExecuteQuerySettingsWithTimeout, } from "../../ydb/client.js";
|
|
2
|
+
import { buildVectorBinaryParams } from "../../ydb/helpers.js";
|
|
3
|
+
import { mapDistanceToKnnFn, mapDistanceToBitKnnFn, } from "../../utils/distance.js";
|
|
4
|
+
import { logger } from "../../logging/logger.js";
|
|
5
|
+
import { SearchMode, SEARCH_OPERATION_TIMEOUT_MS } from "../../config/env.js";
|
|
6
|
+
import { buildPathSegmentsFilter } from "./PathSegmentsFilter.js";
|
|
7
|
+
function assertVectorDimension(vector, dimension, messagePrefix = "Vector dimension mismatch") {
|
|
8
|
+
if (vector.length !== dimension) {
|
|
9
|
+
throw new Error(`${messagePrefix}: got ${vector.length}, expected ${dimension}`);
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
function typedBytesOrFallback(value) {
|
|
13
|
+
const typedValuesCompat = TypedValues;
|
|
14
|
+
if (typeof typedValuesCompat.bytes === "function") {
|
|
15
|
+
return typedValuesCompat.bytes(value);
|
|
16
|
+
}
|
|
17
|
+
if (typeof typedValuesCompat.fromNative === "function") {
|
|
18
|
+
return typedValuesCompat.fromNative(Types.BYTES, value);
|
|
19
|
+
}
|
|
20
|
+
throw new Error("ydb-sdk does not support constructing BYTES typed parameters (TypedValues.bytes/fromNative missing); cannot execute vector search");
|
|
21
|
+
}
|
|
22
|
+
function parseSearchRows(rows, withPayload) {
|
|
23
|
+
return rows.map((row) => {
|
|
24
|
+
const id = row.items?.[0]?.textValue;
|
|
25
|
+
if (typeof id !== "string") {
|
|
26
|
+
throw new Error("point_id is missing in YDB search result");
|
|
27
|
+
}
|
|
28
|
+
let payload;
|
|
29
|
+
let scoreIdx = 1;
|
|
30
|
+
if (withPayload) {
|
|
31
|
+
const payloadText = row.items?.[1]?.textValue;
|
|
32
|
+
if (payloadText) {
|
|
33
|
+
try {
|
|
34
|
+
payload = JSON.parse(payloadText);
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
payload = undefined;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
scoreIdx = 2;
|
|
41
|
+
}
|
|
42
|
+
const score = Number(row.items?.[scoreIdx]?.floatValue ?? row.items?.[scoreIdx]?.textValue);
|
|
43
|
+
return { id, score, ...(payload ? { payload } : {}) };
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
function buildExactSearchQueryAndParams(args) {
|
|
47
|
+
const { fn, order } = mapDistanceToKnnFn(args.distance);
|
|
48
|
+
const filter = buildPathSegmentsFilter(args.filterPaths);
|
|
49
|
+
const filterWhere = filter ? ` AND ${filter.whereSql}` : "";
|
|
50
|
+
const binaries = buildVectorBinaryParams(args.queryVector);
|
|
51
|
+
const yql = `
|
|
52
|
+
DECLARE $qbinf AS String;
|
|
53
|
+
DECLARE $k AS Uint32;
|
|
54
|
+
DECLARE $uid AS Utf8;
|
|
55
|
+
${filter?.whereParamDeclarations ?? ""}
|
|
56
|
+
SELECT point_id, ${args.withPayload ? "payload, " : ""}${fn}(embedding, $qbinf) AS score
|
|
57
|
+
FROM ${args.tableName}
|
|
58
|
+
WHERE uid = $uid${filterWhere}
|
|
59
|
+
ORDER BY score ${order}
|
|
60
|
+
LIMIT $k;
|
|
61
|
+
`;
|
|
62
|
+
const params = {
|
|
63
|
+
...(filter?.whereParams ?? {}),
|
|
64
|
+
$qbinf: typedBytesOrFallback(binaries.float),
|
|
65
|
+
$k: TypedValues.uint32(args.top),
|
|
66
|
+
$uid: TypedValues.utf8(args.uid),
|
|
67
|
+
};
|
|
68
|
+
return { yql, params, modeLog: "one_table_exact_client_side_serialization" };
|
|
69
|
+
}
|
|
70
|
+
function buildApproxSearchQueryAndParams(args) {
|
|
71
|
+
const { fn, order } = mapDistanceToKnnFn(args.distance);
|
|
72
|
+
const { fn: bitFn, order: bitOrder } = mapDistanceToBitKnnFn(args.distance);
|
|
73
|
+
const safeTop = args.top > 0 ? args.top : 1;
|
|
74
|
+
const rawCandidateLimit = safeTop * args.overfetchMultiplier;
|
|
75
|
+
const candidateLimit = Math.max(safeTop, rawCandidateLimit);
|
|
76
|
+
const filter = buildPathSegmentsFilter(args.filterPaths);
|
|
77
|
+
const filterWhere = filter ? ` AND ${filter.whereSql}` : "";
|
|
78
|
+
const binaries = buildVectorBinaryParams(args.queryVector);
|
|
79
|
+
const yql = `
|
|
80
|
+
DECLARE $qbin_bit AS String;
|
|
81
|
+
DECLARE $qbinf AS String;
|
|
82
|
+
DECLARE $candidateLimit AS Uint32;
|
|
83
|
+
DECLARE $safeTop AS Uint32;
|
|
84
|
+
DECLARE $uid AS Utf8;
|
|
85
|
+
${filter?.whereParamDeclarations ?? ""}
|
|
86
|
+
|
|
87
|
+
$candidates = (
|
|
88
|
+
SELECT point_id
|
|
89
|
+
FROM ${args.tableName}
|
|
90
|
+
WHERE uid = $uid AND embedding_quantized IS NOT NULL
|
|
91
|
+
${filterWhere}
|
|
92
|
+
ORDER BY ${bitFn}(embedding_quantized, $qbin_bit) ${bitOrder}
|
|
93
|
+
LIMIT $candidateLimit
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
SELECT point_id, ${args.withPayload ? "payload, " : ""}${fn}(embedding, $qbinf) AS score
|
|
97
|
+
FROM ${args.tableName}
|
|
98
|
+
WHERE uid = $uid
|
|
99
|
+
AND point_id IN $candidates
|
|
100
|
+
${filterWhere}
|
|
101
|
+
ORDER BY score ${order}
|
|
102
|
+
LIMIT $safeTop;
|
|
103
|
+
`;
|
|
104
|
+
const params = {
|
|
105
|
+
...(filter?.whereParams ?? {}),
|
|
106
|
+
$qbin_bit: typedBytesOrFallback(binaries.bit),
|
|
107
|
+
$qbinf: typedBytesOrFallback(binaries.float),
|
|
108
|
+
$candidateLimit: TypedValues.uint32(candidateLimit),
|
|
109
|
+
$safeTop: TypedValues.uint32(safeTop),
|
|
110
|
+
$uid: TypedValues.utf8(args.uid),
|
|
111
|
+
};
|
|
112
|
+
return {
|
|
113
|
+
yql,
|
|
114
|
+
params,
|
|
115
|
+
safeTop,
|
|
116
|
+
candidateLimit,
|
|
117
|
+
modeLog: "one_table_approximate_client_side_serialization",
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
async function searchPointsOneTableExact(tableName, queryVector, top, withPayload, distance, dimension, uid, filterPaths) {
|
|
121
|
+
assertVectorDimension(queryVector, dimension);
|
|
122
|
+
const results = await withSession(async (s) => {
|
|
123
|
+
const { yql, params, modeLog } = buildExactSearchQueryAndParams({
|
|
124
|
+
tableName,
|
|
125
|
+
queryVector,
|
|
126
|
+
top,
|
|
127
|
+
withPayload,
|
|
128
|
+
distance,
|
|
129
|
+
uid,
|
|
130
|
+
filterPaths,
|
|
131
|
+
});
|
|
132
|
+
if (logger.isLevelEnabled("debug")) {
|
|
133
|
+
logger.debug({
|
|
134
|
+
tableName,
|
|
135
|
+
distance,
|
|
136
|
+
top,
|
|
137
|
+
withPayload,
|
|
138
|
+
mode: modeLog,
|
|
139
|
+
yql,
|
|
140
|
+
params: {
|
|
141
|
+
uid,
|
|
142
|
+
top,
|
|
143
|
+
vectorLength: queryVector.length,
|
|
144
|
+
vectorPreview: queryVector.slice(0, 3),
|
|
145
|
+
},
|
|
146
|
+
}, "one_table search (exact): executing YQL");
|
|
147
|
+
}
|
|
148
|
+
const settings = createExecuteQuerySettingsWithTimeout({
|
|
149
|
+
keepInCache: true,
|
|
150
|
+
idempotent: true,
|
|
151
|
+
timeoutMs: SEARCH_OPERATION_TIMEOUT_MS,
|
|
152
|
+
});
|
|
153
|
+
const rs = await s.executeQuery(yql, params, undefined, settings);
|
|
154
|
+
const rowset = rs.resultSets?.[0];
|
|
155
|
+
const rows = (rowset?.rows ?? []);
|
|
156
|
+
return parseSearchRows(rows, withPayload);
|
|
157
|
+
});
|
|
158
|
+
return results;
|
|
159
|
+
}
|
|
160
|
+
async function searchPointsOneTableApproximate(tableName, queryVector, top, withPayload, distance, dimension, uid, overfetchMultiplier, filterPaths) {
|
|
161
|
+
assertVectorDimension(queryVector, dimension);
|
|
162
|
+
const results = await withSession(async (s) => {
|
|
163
|
+
const { yql, params, safeTop, candidateLimit, modeLog } = buildApproxSearchQueryAndParams({
|
|
164
|
+
tableName,
|
|
165
|
+
queryVector,
|
|
166
|
+
top,
|
|
167
|
+
withPayload,
|
|
168
|
+
distance,
|
|
169
|
+
uid,
|
|
170
|
+
overfetchMultiplier,
|
|
171
|
+
filterPaths,
|
|
172
|
+
});
|
|
173
|
+
if (logger.isLevelEnabled("debug")) {
|
|
174
|
+
logger.debug({
|
|
175
|
+
tableName,
|
|
176
|
+
distance,
|
|
177
|
+
top,
|
|
178
|
+
safeTop,
|
|
179
|
+
candidateLimit,
|
|
180
|
+
mode: modeLog,
|
|
181
|
+
yql,
|
|
182
|
+
params: {
|
|
183
|
+
uid,
|
|
184
|
+
safeTop,
|
|
185
|
+
candidateLimit,
|
|
186
|
+
vectorLength: queryVector.length,
|
|
187
|
+
vectorPreview: queryVector.slice(0, 3),
|
|
188
|
+
},
|
|
189
|
+
}, "one_table search (approximate): executing YQL");
|
|
190
|
+
}
|
|
191
|
+
const settings = createExecuteQuerySettingsWithTimeout({
|
|
192
|
+
keepInCache: true,
|
|
193
|
+
idempotent: true,
|
|
194
|
+
timeoutMs: SEARCH_OPERATION_TIMEOUT_MS,
|
|
195
|
+
});
|
|
196
|
+
const rs = await s.executeQuery(yql, params, undefined, settings);
|
|
197
|
+
const rowset = rs.resultSets?.[0];
|
|
198
|
+
const rows = (rowset?.rows ?? []);
|
|
199
|
+
return parseSearchRows(rows, withPayload);
|
|
200
|
+
});
|
|
201
|
+
return results;
|
|
202
|
+
}
|
|
203
|
+
export async function searchPointsOneTable(tableName, queryVector, top, withPayload, distance, dimension, uid, mode, overfetchMultiplier, filterPaths) {
|
|
204
|
+
if (mode === SearchMode.Exact) {
|
|
205
|
+
return await searchPointsOneTableExact(tableName, queryVector, top, withPayload, distance, dimension, uid, filterPaths);
|
|
206
|
+
}
|
|
207
|
+
return await searchPointsOneTableApproximate(tableName, queryVector, top, withPayload, distance, dimension, uid, overfetchMultiplier, filterPaths);
|
|
208
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { TypedValues, Types, withSession, Ydb as YdbRuntime, createBulkUpsertSettingsWithTimeout, } from "../../ydb/client.js";
|
|
2
|
+
import { buildVectorBinaryParams } from "../../ydb/helpers.js";
|
|
3
|
+
import { withRetry, isTransientYdbError } from "../../utils/retry.js";
|
|
4
|
+
import { UPSERT_BATCH_SIZE } from "../../ydb/schema.js";
|
|
5
|
+
import { UPSERT_OPERATION_TIMEOUT_MS } from "../../config/env.js";
|
|
6
|
+
import { logger } from "../../logging/logger.js";
|
|
7
|
+
function assertPointVectorsDimension(args) {
|
|
8
|
+
for (const p of args.points) {
|
|
9
|
+
const id = String(p.id);
|
|
10
|
+
if (p.vector.length !== args.dimension) {
|
|
11
|
+
const previewLength = Math.min(16, p.vector.length);
|
|
12
|
+
const vectorPreview = previewLength > 0 ? p.vector.slice(0, previewLength) : [];
|
|
13
|
+
logger.warn({
|
|
14
|
+
tableName: args.tableName,
|
|
15
|
+
uid: args.uid,
|
|
16
|
+
pointId: id,
|
|
17
|
+
vectorLen: p.vector.length,
|
|
18
|
+
expectedDimension: args.dimension,
|
|
19
|
+
vectorPreview,
|
|
20
|
+
}, "upsertPointsOneTable: vector dimension mismatch");
|
|
21
|
+
throw new Error(`Vector dimension mismatch for id=${id}: got ${p.vector.length}, expected ${args.dimension}`);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
function buildBulkUpsertRowsValue(args) {
|
|
26
|
+
const rowType = Types.struct({
|
|
27
|
+
uid: Types.UTF8,
|
|
28
|
+
point_id: Types.UTF8,
|
|
29
|
+
embedding: Types.BYTES,
|
|
30
|
+
embedding_quantized: Types.BYTES,
|
|
31
|
+
payload: Types.JSON_DOCUMENT,
|
|
32
|
+
});
|
|
33
|
+
return TypedValues.list(rowType, args.batch.map((p) => {
|
|
34
|
+
const binaries = buildVectorBinaryParams(p.vector);
|
|
35
|
+
return {
|
|
36
|
+
uid: args.uid,
|
|
37
|
+
point_id: String(p.id),
|
|
38
|
+
embedding: binaries.float,
|
|
39
|
+
embedding_quantized: binaries.bit,
|
|
40
|
+
payload: JSON.stringify(p.payload ?? {}),
|
|
41
|
+
};
|
|
42
|
+
}));
|
|
43
|
+
}
|
|
44
|
+
export async function upsertPointsOneTable(tableName, points, dimension, uid) {
|
|
45
|
+
if (!tableName) {
|
|
46
|
+
throw new Error("bulkUpsert: tableName is empty");
|
|
47
|
+
}
|
|
48
|
+
assertPointVectorsDimension({ tableName, uid, points, dimension });
|
|
49
|
+
let upserted = 0;
|
|
50
|
+
await withSession(async (s) => {
|
|
51
|
+
const bulkSettings = createBulkUpsertSettingsWithTimeout({
|
|
52
|
+
timeoutMs: UPSERT_OPERATION_TIMEOUT_MS,
|
|
53
|
+
});
|
|
54
|
+
for (let i = 0; i < points.length; i += UPSERT_BATCH_SIZE) {
|
|
55
|
+
const batch = points.slice(i, i + UPSERT_BATCH_SIZE);
|
|
56
|
+
const rowsValue = buildBulkUpsertRowsValue({
|
|
57
|
+
uid,
|
|
58
|
+
batch,
|
|
59
|
+
});
|
|
60
|
+
if (logger.isLevelEnabled("debug")) {
|
|
61
|
+
logger.debug({
|
|
62
|
+
tableName,
|
|
63
|
+
mode: "one_table_bulk_upsert_client_side_serialization",
|
|
64
|
+
batchSize: batch.length,
|
|
65
|
+
params: {
|
|
66
|
+
rows: batch.map((p) => ({
|
|
67
|
+
uid,
|
|
68
|
+
point_id: String(p.id),
|
|
69
|
+
vectorLength: p.vector.length,
|
|
70
|
+
vectorPreview: p.vector.slice(0, 3),
|
|
71
|
+
payload: p.payload ?? {},
|
|
72
|
+
})),
|
|
73
|
+
},
|
|
74
|
+
}, "one_table upsert: executing BulkUpsert");
|
|
75
|
+
}
|
|
76
|
+
const typedRows = YdbRuntime.TypedValue.create(rowsValue);
|
|
77
|
+
await withRetry(() => s.bulkUpsert(tableName, typedRows, bulkSettings), {
|
|
78
|
+
isTransient: isTransientYdbError,
|
|
79
|
+
context: { tableName, batchSize: batch.length, mode: "bulkUpsert" },
|
|
80
|
+
});
|
|
81
|
+
upserted += batch.length;
|
|
82
|
+
}
|
|
83
|
+
});
|
|
84
|
+
return upserted;
|
|
85
|
+
}
|
|
@@ -1,13 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
export
|
|
4
|
-
id: string | number;
|
|
5
|
-
vector: number[];
|
|
6
|
-
payload?: Record<string, unknown>;
|
|
7
|
-
}>, dimension: number, uid: string): Promise<number>;
|
|
8
|
-
export declare function searchPointsOneTable(tableName: string, queryVector: number[], top: number, withPayload: boolean | undefined, distance: DistanceKind, dimension: number, uid: string, mode: SearchMode | undefined, overfetchMultiplier: number): Promise<Array<{
|
|
9
|
-
id: string;
|
|
10
|
-
score: number;
|
|
11
|
-
payload?: Record<string, unknown>;
|
|
12
|
-
}>>;
|
|
13
|
-
export declare function deletePointsOneTable(tableName: string, ids: Array<string | number>, uid: string): Promise<number>;
|
|
1
|
+
export { searchPointsOneTable } from "./pointsRepo.one-table/Search.js";
|
|
2
|
+
export { upsertPointsOneTable } from "./pointsRepo.one-table/Upsert.js";
|
|
3
|
+
export { deletePointsOneTable, deletePointsByPathSegmentsOneTable, } from "./pointsRepo.one-table/Delete.js";
|