ydb-qdrant 7.0.1 → 8.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/config/env.d.ts +0 -8
- package/dist/config/env.js +2 -29
- package/dist/package/api.d.ts +5 -2
- package/dist/package/api.js +2 -2
- package/dist/qdrant/QdrantRestTypes.d.ts +35 -0
- package/dist/repositories/collectionsRepo.d.ts +1 -2
- package/dist/repositories/collectionsRepo.js +62 -103
- package/dist/repositories/collectionsRepo.one-table.js +103 -47
- package/dist/repositories/collectionsRepo.shared.d.ts +2 -0
- package/dist/repositories/collectionsRepo.shared.js +32 -0
- package/dist/repositories/pointsRepo.d.ts +4 -8
- package/dist/repositories/pointsRepo.one-table/Delete.js +122 -67
- package/dist/repositories/pointsRepo.one-table/PathSegmentsFilter.d.ts +5 -2
- package/dist/repositories/pointsRepo.one-table/PathSegmentsFilter.js +7 -6
- package/dist/repositories/pointsRepo.one-table/Search.d.ts +4 -0
- package/dist/repositories/pointsRepo.one-table/Search.js +208 -0
- package/dist/repositories/pointsRepo.one-table/Upsert.d.ts +2 -2
- package/dist/repositories/pointsRepo.one-table/Upsert.js +51 -66
- package/dist/repositories/pointsRepo.one-table.d.ts +1 -1
- package/dist/repositories/pointsRepo.one-table.js +1 -1
- package/dist/routes/collections.js +7 -61
- package/dist/routes/points.js +15 -66
- package/dist/services/PointsService.d.ts +3 -8
- package/dist/services/PointsService.js +19 -23
- package/dist/types.d.ts +23 -33
- package/dist/types.js +18 -20
- package/dist/utils/normalization.js +13 -14
- package/dist/utils/retry.js +19 -29
- package/dist/utils/vectorBinary.js +10 -5
- package/dist/ydb/bootstrapMetaTable.d.ts +7 -0
- package/dist/ydb/bootstrapMetaTable.js +75 -0
- package/dist/ydb/client.d.ts +23 -17
- package/dist/ydb/client.js +82 -423
- package/dist/ydb/schema.js +88 -148
- package/package.json +2 -10
- package/dist/qdrant/QdrantTypes.d.ts +0 -19
- package/dist/repositories/pointsRepo.one-table/Search/Approximate.d.ts +0 -18
- package/dist/repositories/pointsRepo.one-table/Search/Approximate.js +0 -119
- package/dist/repositories/pointsRepo.one-table/Search/Exact.d.ts +0 -17
- package/dist/repositories/pointsRepo.one-table/Search/Exact.js +0 -101
- package/dist/repositories/pointsRepo.one-table/Search/index.d.ts +0 -8
- package/dist/repositories/pointsRepo.one-table/Search/index.js +0 -30
- package/dist/utils/typeGuards.d.ts +0 -1
- package/dist/utils/typeGuards.js +0 -3
- package/dist/ydb/QueryDiagnostics.d.ts +0 -6
- package/dist/ydb/QueryDiagnostics.js +0 -52
- package/dist/ydb/SessionPool.d.ts +0 -36
- package/dist/ydb/SessionPool.js +0 -248
- package/dist/ydb/bulkUpsert.d.ts +0 -6
- package/dist/ydb/bulkUpsert.js +0 -52
- /package/dist/qdrant/{QdrantTypes.js → QdrantRestTypes.js} +0 -0
|
@@ -1,53 +1,107 @@
|
|
|
1
|
-
import { withSession } from "../../ydb/client.js";
|
|
2
|
-
import { buildPathSegmentsWhereClause } from "./PathSegmentsFilter.js";
|
|
3
|
-
import { Uint32, Utf8 } from "@ydbjs/value/primitive";
|
|
1
|
+
import { TypedValues, withSession, createExecuteQuerySettings, } from "../../ydb/client.js";
|
|
4
2
|
import { withRetry, isTransientYdbError } from "../../utils/retry.js";
|
|
5
|
-
import {
|
|
6
|
-
import { attachQueryDiagnostics } from "../../ydb/QueryDiagnostics.js";
|
|
3
|
+
import { buildPathSegmentsWhereClause } from "./PathSegmentsFilter.js";
|
|
7
4
|
const DELETE_FILTER_SELECT_BATCH_SIZE = 1000;
|
|
8
5
|
export async function deletePointsOneTable(tableName, ids, uid) {
|
|
9
6
|
let deleted = 0;
|
|
10
|
-
await withSession(async (
|
|
7
|
+
await withSession(async (s) => {
|
|
8
|
+
const settings = createExecuteQuerySettings();
|
|
11
9
|
for (const id of ids) {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
.parameter("uid", new Utf8(uid))
|
|
23
|
-
.parameter("id", new Utf8(String(id)))
|
|
24
|
-
.idempotent(true)
|
|
25
|
-
.timeout(UPSERT_OPERATION_TIMEOUT_MS)
|
|
26
|
-
.signal(signal);
|
|
27
|
-
}, {
|
|
10
|
+
const yql = `
|
|
11
|
+
DECLARE $uid AS Utf8;
|
|
12
|
+
DECLARE $id AS Utf8;
|
|
13
|
+
DELETE FROM ${tableName} WHERE uid = $uid AND point_id = $id;
|
|
14
|
+
`;
|
|
15
|
+
const params = {
|
|
16
|
+
$uid: TypedValues.utf8(uid),
|
|
17
|
+
$id: TypedValues.utf8(String(id)),
|
|
18
|
+
};
|
|
19
|
+
await withRetry(() => s.executeQuery(yql, params, undefined, settings), {
|
|
28
20
|
isTransient: isTransientYdbError,
|
|
29
|
-
context: {
|
|
30
|
-
operation: "deletePointsOneTable",
|
|
31
|
-
tableName,
|
|
32
|
-
uid,
|
|
33
|
-
pointId: String(id),
|
|
34
|
-
},
|
|
21
|
+
context: { tableName, uid, pointId: String(id) },
|
|
35
22
|
});
|
|
36
23
|
deleted += 1;
|
|
37
24
|
}
|
|
38
25
|
});
|
|
39
26
|
return deleted;
|
|
40
27
|
}
|
|
41
|
-
|
|
28
|
+
const MAX_SAFE_BIGINT = BigInt(Number.MAX_SAFE_INTEGER);
|
|
29
|
+
function bigintToSafeNumberOrNull(value) {
|
|
30
|
+
if (value > MAX_SAFE_BIGINT || value < -MAX_SAFE_BIGINT) {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
return Number(value);
|
|
34
|
+
}
|
|
35
|
+
function longLikeToBigInt(value) {
|
|
36
|
+
const low = BigInt(value.low >>> 0);
|
|
37
|
+
const high = BigInt(value.high >>> 0);
|
|
38
|
+
let n = low + (high << 32n);
|
|
39
|
+
// If this is a signed Long-like and the sign bit is set, interpret as a negative 64-bit integer.
|
|
40
|
+
const isUnsigned = value.unsigned === true;
|
|
41
|
+
const signBitSet = (value.high & 0x8000_0000) !== 0;
|
|
42
|
+
if (!isUnsigned && signBitSet) {
|
|
43
|
+
n -= 1n << 64n;
|
|
44
|
+
}
|
|
45
|
+
return n;
|
|
46
|
+
}
|
|
47
|
+
function toNumber(value) {
|
|
42
48
|
if (typeof value === "number" && Number.isFinite(value))
|
|
43
49
|
return value;
|
|
44
50
|
if (typeof value === "bigint") {
|
|
45
|
-
|
|
46
|
-
return Number.isFinite(n) ? n : 0;
|
|
51
|
+
return bigintToSafeNumberOrNull(value);
|
|
47
52
|
}
|
|
48
53
|
if (typeof value === "string") {
|
|
54
|
+
// Prefer exact parsing for integer strings to avoid silent precision loss.
|
|
55
|
+
if (/^-?\d+$/.test(value.trim())) {
|
|
56
|
+
try {
|
|
57
|
+
const b = BigInt(value.trim());
|
|
58
|
+
return bigintToSafeNumberOrNull(b);
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
49
64
|
const n = Number(value);
|
|
50
|
-
return Number.isFinite(n) ? n :
|
|
65
|
+
return Number.isFinite(n) ? n : null;
|
|
66
|
+
}
|
|
67
|
+
if (value && typeof value === "object") {
|
|
68
|
+
// ydb-sdk may return Uint64/Int64 as protobufjs Long-like objects:
|
|
69
|
+
// { low: number, high: number, unsigned?: boolean }
|
|
70
|
+
const v = value;
|
|
71
|
+
if (typeof v.low === "number" && typeof v.high === "number") {
|
|
72
|
+
const b = longLikeToBigInt({
|
|
73
|
+
low: v.low,
|
|
74
|
+
high: v.high,
|
|
75
|
+
unsigned: v.unsigned === true,
|
|
76
|
+
});
|
|
77
|
+
return bigintToSafeNumberOrNull(b);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
function readDeletedCountFromResult(rs) {
|
|
83
|
+
const sets = rs.resultSets ?? [];
|
|
84
|
+
for (let i = sets.length - 1; i >= 0; i -= 1) {
|
|
85
|
+
const rowset = sets[i];
|
|
86
|
+
const rows = rowset?.rows ?? [];
|
|
87
|
+
const cell = rows[0]?.items?.[0];
|
|
88
|
+
if (!cell)
|
|
89
|
+
continue;
|
|
90
|
+
const candidates = [
|
|
91
|
+
cell.uint64Value,
|
|
92
|
+
cell.int64Value,
|
|
93
|
+
cell.uint32Value,
|
|
94
|
+
cell.int32Value,
|
|
95
|
+
cell.textValue,
|
|
96
|
+
];
|
|
97
|
+
for (const c of candidates) {
|
|
98
|
+
const n = toNumber(c);
|
|
99
|
+
if (n !== null)
|
|
100
|
+
return n;
|
|
101
|
+
}
|
|
102
|
+
// We got a result cell but couldn't parse any of its known numeric representations.
|
|
103
|
+
// Returning 0 here would silently stop the delete loop, so fail loud.
|
|
104
|
+
throw new Error("Unable to parse deleted count from YDB result.");
|
|
51
105
|
}
|
|
52
106
|
return 0;
|
|
53
107
|
}
|
|
@@ -56,51 +110,52 @@ export async function deletePointsByPathSegmentsOneTable(tableName, uid, paths)
|
|
|
56
110
|
return 0;
|
|
57
111
|
}
|
|
58
112
|
const { whereSql, params: whereParams } = buildPathSegmentsWhereClause(paths);
|
|
113
|
+
const whereParamDeclarations = Object.keys(whereParams)
|
|
114
|
+
.sort()
|
|
115
|
+
.map((key) => `DECLARE ${key} AS Utf8;`)
|
|
116
|
+
.join("\n ");
|
|
117
|
+
const deleteBatchYql = `
|
|
118
|
+
DECLARE $uid AS Utf8;
|
|
119
|
+
DECLARE $limit AS Uint32;
|
|
120
|
+
${whereParamDeclarations}
|
|
121
|
+
|
|
122
|
+
$to_delete = (
|
|
123
|
+
SELECT uid, point_id
|
|
124
|
+
FROM ${tableName}
|
|
125
|
+
WHERE uid = $uid AND ${whereSql}
|
|
126
|
+
LIMIT $limit
|
|
127
|
+
);
|
|
128
|
+
|
|
129
|
+
DELETE FROM ${tableName} ON
|
|
130
|
+
SELECT uid, point_id FROM $to_delete;
|
|
131
|
+
|
|
132
|
+
SELECT CAST(COUNT(*) AS Uint32) AS deleted FROM $to_delete;
|
|
133
|
+
`;
|
|
59
134
|
let deleted = 0;
|
|
60
|
-
await withSession(async (
|
|
135
|
+
await withSession(async (s) => {
|
|
136
|
+
const settings = createExecuteQuerySettings();
|
|
61
137
|
// Best-effort loop: stop when there are no more matching rows.
|
|
62
138
|
// Use limited batches to avoid per-operation buffer limits.
|
|
63
139
|
while (true) {
|
|
64
|
-
const
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
WHERE uid = $uid AND ${sql.unsafe(whereSql)}
|
|
70
|
-
LIMIT $limit
|
|
71
|
-
);
|
|
72
|
-
|
|
73
|
-
DELETE FROM ${sql.identifier(tableName)} ON
|
|
74
|
-
SELECT uid, point_id FROM $to_delete;
|
|
75
|
-
|
|
76
|
-
SELECT COUNT(*) AS deleted FROM $to_delete;
|
|
77
|
-
`, {
|
|
78
|
-
operation: "deletePointsByPathSegmentsOneTable",
|
|
79
|
-
tableName,
|
|
80
|
-
uid,
|
|
81
|
-
batchLimit: DELETE_FILTER_SELECT_BATCH_SIZE,
|
|
82
|
-
pathsCount: paths.length,
|
|
83
|
-
})
|
|
84
|
-
.idempotent(true)
|
|
85
|
-
.timeout(UPSERT_OPERATION_TIMEOUT_MS)
|
|
86
|
-
.signal(signal)
|
|
87
|
-
.parameter("uid", new Utf8(uid))
|
|
88
|
-
.parameter("limit", new Uint32(DELETE_FILTER_SELECT_BATCH_SIZE));
|
|
89
|
-
for (const [key, value] of Object.entries(whereParams)) {
|
|
90
|
-
q = q.parameter(key, value);
|
|
91
|
-
}
|
|
92
|
-
return await q;
|
|
93
|
-
}, {
|
|
140
|
+
const rs = (await withRetry(() => s.executeQuery(deleteBatchYql, {
|
|
141
|
+
...whereParams,
|
|
142
|
+
$uid: TypedValues.utf8(uid),
|
|
143
|
+
$limit: TypedValues.uint32(DELETE_FILTER_SELECT_BATCH_SIZE),
|
|
144
|
+
}, undefined, settings), {
|
|
94
145
|
isTransient: isTransientYdbError,
|
|
95
146
|
context: {
|
|
96
|
-
operation: "deletePointsByPathSegmentsOneTable",
|
|
97
147
|
tableName,
|
|
98
148
|
uid,
|
|
149
|
+
filterPathsCount: paths.length,
|
|
99
150
|
batchLimit: DELETE_FILTER_SELECT_BATCH_SIZE,
|
|
100
|
-
pathsCount: paths.length,
|
|
101
151
|
},
|
|
102
|
-
});
|
|
103
|
-
const batchDeleted =
|
|
152
|
+
}));
|
|
153
|
+
const batchDeleted = readDeletedCountFromResult(rs);
|
|
154
|
+
if (!Number.isSafeInteger(batchDeleted) ||
|
|
155
|
+
batchDeleted < 0 ||
|
|
156
|
+
batchDeleted > DELETE_FILTER_SELECT_BATCH_SIZE) {
|
|
157
|
+
throw new Error(`Unexpected deleted count from YDB: ${String(batchDeleted)}. Expected an integer in [0, ${DELETE_FILTER_SELECT_BATCH_SIZE}].`);
|
|
158
|
+
}
|
|
104
159
|
if (batchDeleted <= 0) {
|
|
105
160
|
break;
|
|
106
161
|
}
|
|
@@ -1,11 +1,14 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
type QueryParams =
|
|
1
|
+
import type { Ydb } from "ydb-sdk";
|
|
2
|
+
type QueryParams = {
|
|
3
|
+
[key: string]: Ydb.ITypedValue;
|
|
4
|
+
};
|
|
3
5
|
export declare function buildPathSegmentsWhereClause(paths: Array<Array<string>>): {
|
|
4
6
|
whereSql: string;
|
|
5
7
|
params: QueryParams;
|
|
6
8
|
};
|
|
7
9
|
export declare function buildPathSegmentsFilter(paths: Array<Array<string>> | undefined): {
|
|
8
10
|
whereSql: string;
|
|
11
|
+
whereParamDeclarations: string;
|
|
9
12
|
whereParams: QueryParams;
|
|
10
13
|
} | undefined;
|
|
11
14
|
export {};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { TypedValues } from "../../ydb/client.js";
|
|
2
2
|
export function buildPathSegmentsWhereClause(paths) {
|
|
3
3
|
const params = {};
|
|
4
4
|
const orGroups = [];
|
|
@@ -11,11 +11,8 @@ export function buildPathSegmentsWhereClause(paths) {
|
|
|
11
11
|
for (let sIdx = 0; sIdx < segs.length; sIdx += 1) {
|
|
12
12
|
const paramName = `$p${pIdx}_${sIdx}`;
|
|
13
13
|
// payload is JsonDocument; JSON_VALUE supports JsonPath access.
|
|
14
|
-
// Security: path segment values are always bound as parameters (see `params[paramName]`)
|
|
15
|
-
// and MUST NOT be interpolated into `whereSql`. The only dynamic part in the SQL text
|
|
16
|
-
// below is the numeric segment index (sIdx) and the internal parameter name.
|
|
17
14
|
andParts.push(`JSON_VALUE(payload, '$.pathSegments."${sIdx}"') = ${paramName}`);
|
|
18
|
-
params[paramName] =
|
|
15
|
+
params[paramName] = TypedValues.utf8(segs[sIdx]);
|
|
19
16
|
}
|
|
20
17
|
orGroups.push(`(${andParts.join(" AND ")})`);
|
|
21
18
|
}
|
|
@@ -28,5 +25,9 @@ export function buildPathSegmentsFilter(paths) {
|
|
|
28
25
|
if (!paths || paths.length === 0)
|
|
29
26
|
return undefined;
|
|
30
27
|
const { whereSql, params: whereParams } = buildPathSegmentsWhereClause(paths);
|
|
31
|
-
|
|
28
|
+
const whereParamDeclarations = Object.keys(whereParams)
|
|
29
|
+
.sort()
|
|
30
|
+
.map((key) => `DECLARE ${key} AS Utf8;`)
|
|
31
|
+
.join("\n ");
|
|
32
|
+
return { whereSql, whereParamDeclarations, whereParams };
|
|
32
33
|
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { DistanceKind } from "../../types.js";
|
|
2
|
+
import { SearchMode } from "../../config/env.js";
|
|
3
|
+
import type { YdbQdrantScoredPoint } from "../../qdrant/QdrantRestTypes.js";
|
|
4
|
+
export declare function searchPointsOneTable(tableName: string, queryVector: number[], top: number, withPayload: boolean | undefined, distance: DistanceKind, dimension: number, uid: string, mode: SearchMode | undefined, overfetchMultiplier: number, filterPaths?: Array<Array<string>>): Promise<YdbQdrantScoredPoint[]>;
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import { Types, TypedValues, withSession, createExecuteQuerySettingsWithTimeout, } from "../../ydb/client.js";
|
|
2
|
+
import { buildVectorBinaryParams } from "../../ydb/helpers.js";
|
|
3
|
+
import { mapDistanceToKnnFn, mapDistanceToBitKnnFn, } from "../../utils/distance.js";
|
|
4
|
+
import { logger } from "../../logging/logger.js";
|
|
5
|
+
import { SearchMode, SEARCH_OPERATION_TIMEOUT_MS } from "../../config/env.js";
|
|
6
|
+
import { buildPathSegmentsFilter } from "./PathSegmentsFilter.js";
|
|
7
|
+
function assertVectorDimension(vector, dimension, messagePrefix = "Vector dimension mismatch") {
|
|
8
|
+
if (vector.length !== dimension) {
|
|
9
|
+
throw new Error(`${messagePrefix}: got ${vector.length}, expected ${dimension}`);
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
function typedBytesOrFallback(value) {
|
|
13
|
+
const typedValuesCompat = TypedValues;
|
|
14
|
+
if (typeof typedValuesCompat.bytes === "function") {
|
|
15
|
+
return typedValuesCompat.bytes(value);
|
|
16
|
+
}
|
|
17
|
+
if (typeof typedValuesCompat.fromNative === "function") {
|
|
18
|
+
return typedValuesCompat.fromNative(Types.BYTES, value);
|
|
19
|
+
}
|
|
20
|
+
throw new Error("ydb-sdk does not support constructing BYTES typed parameters (TypedValues.bytes/fromNative missing); cannot execute vector search");
|
|
21
|
+
}
|
|
22
|
+
function parseSearchRows(rows, withPayload) {
|
|
23
|
+
return rows.map((row) => {
|
|
24
|
+
const id = row.items?.[0]?.textValue;
|
|
25
|
+
if (typeof id !== "string") {
|
|
26
|
+
throw new Error("point_id is missing in YDB search result");
|
|
27
|
+
}
|
|
28
|
+
let payload;
|
|
29
|
+
let scoreIdx = 1;
|
|
30
|
+
if (withPayload) {
|
|
31
|
+
const payloadText = row.items?.[1]?.textValue;
|
|
32
|
+
if (payloadText) {
|
|
33
|
+
try {
|
|
34
|
+
payload = JSON.parse(payloadText);
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
payload = undefined;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
scoreIdx = 2;
|
|
41
|
+
}
|
|
42
|
+
const score = Number(row.items?.[scoreIdx]?.floatValue ?? row.items?.[scoreIdx]?.textValue);
|
|
43
|
+
return { id, score, ...(payload ? { payload } : {}) };
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
function buildExactSearchQueryAndParams(args) {
|
|
47
|
+
const { fn, order } = mapDistanceToKnnFn(args.distance);
|
|
48
|
+
const filter = buildPathSegmentsFilter(args.filterPaths);
|
|
49
|
+
const filterWhere = filter ? ` AND ${filter.whereSql}` : "";
|
|
50
|
+
const binaries = buildVectorBinaryParams(args.queryVector);
|
|
51
|
+
const yql = `
|
|
52
|
+
DECLARE $qbinf AS String;
|
|
53
|
+
DECLARE $k AS Uint32;
|
|
54
|
+
DECLARE $uid AS Utf8;
|
|
55
|
+
${filter?.whereParamDeclarations ?? ""}
|
|
56
|
+
SELECT point_id, ${args.withPayload ? "payload, " : ""}${fn}(embedding, $qbinf) AS score
|
|
57
|
+
FROM ${args.tableName}
|
|
58
|
+
WHERE uid = $uid${filterWhere}
|
|
59
|
+
ORDER BY score ${order}
|
|
60
|
+
LIMIT $k;
|
|
61
|
+
`;
|
|
62
|
+
const params = {
|
|
63
|
+
...(filter?.whereParams ?? {}),
|
|
64
|
+
$qbinf: typedBytesOrFallback(binaries.float),
|
|
65
|
+
$k: TypedValues.uint32(args.top),
|
|
66
|
+
$uid: TypedValues.utf8(args.uid),
|
|
67
|
+
};
|
|
68
|
+
return { yql, params, modeLog: "one_table_exact_client_side_serialization" };
|
|
69
|
+
}
|
|
70
|
+
function buildApproxSearchQueryAndParams(args) {
|
|
71
|
+
const { fn, order } = mapDistanceToKnnFn(args.distance);
|
|
72
|
+
const { fn: bitFn, order: bitOrder } = mapDistanceToBitKnnFn(args.distance);
|
|
73
|
+
const safeTop = args.top > 0 ? args.top : 1;
|
|
74
|
+
const rawCandidateLimit = safeTop * args.overfetchMultiplier;
|
|
75
|
+
const candidateLimit = Math.max(safeTop, rawCandidateLimit);
|
|
76
|
+
const filter = buildPathSegmentsFilter(args.filterPaths);
|
|
77
|
+
const filterWhere = filter ? ` AND ${filter.whereSql}` : "";
|
|
78
|
+
const binaries = buildVectorBinaryParams(args.queryVector);
|
|
79
|
+
const yql = `
|
|
80
|
+
DECLARE $qbin_bit AS String;
|
|
81
|
+
DECLARE $qbinf AS String;
|
|
82
|
+
DECLARE $candidateLimit AS Uint32;
|
|
83
|
+
DECLARE $safeTop AS Uint32;
|
|
84
|
+
DECLARE $uid AS Utf8;
|
|
85
|
+
${filter?.whereParamDeclarations ?? ""}
|
|
86
|
+
|
|
87
|
+
$candidates = (
|
|
88
|
+
SELECT point_id
|
|
89
|
+
FROM ${args.tableName}
|
|
90
|
+
WHERE uid = $uid AND embedding_quantized IS NOT NULL
|
|
91
|
+
${filterWhere}
|
|
92
|
+
ORDER BY ${bitFn}(embedding_quantized, $qbin_bit) ${bitOrder}
|
|
93
|
+
LIMIT $candidateLimit
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
SELECT point_id, ${args.withPayload ? "payload, " : ""}${fn}(embedding, $qbinf) AS score
|
|
97
|
+
FROM ${args.tableName}
|
|
98
|
+
WHERE uid = $uid
|
|
99
|
+
AND point_id IN $candidates
|
|
100
|
+
${filterWhere}
|
|
101
|
+
ORDER BY score ${order}
|
|
102
|
+
LIMIT $safeTop;
|
|
103
|
+
`;
|
|
104
|
+
const params = {
|
|
105
|
+
...(filter?.whereParams ?? {}),
|
|
106
|
+
$qbin_bit: typedBytesOrFallback(binaries.bit),
|
|
107
|
+
$qbinf: typedBytesOrFallback(binaries.float),
|
|
108
|
+
$candidateLimit: TypedValues.uint32(candidateLimit),
|
|
109
|
+
$safeTop: TypedValues.uint32(safeTop),
|
|
110
|
+
$uid: TypedValues.utf8(args.uid),
|
|
111
|
+
};
|
|
112
|
+
return {
|
|
113
|
+
yql,
|
|
114
|
+
params,
|
|
115
|
+
safeTop,
|
|
116
|
+
candidateLimit,
|
|
117
|
+
modeLog: "one_table_approximate_client_side_serialization",
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
async function searchPointsOneTableExact(tableName, queryVector, top, withPayload, distance, dimension, uid, filterPaths) {
|
|
121
|
+
assertVectorDimension(queryVector, dimension);
|
|
122
|
+
const results = await withSession(async (s) => {
|
|
123
|
+
const { yql, params, modeLog } = buildExactSearchQueryAndParams({
|
|
124
|
+
tableName,
|
|
125
|
+
queryVector,
|
|
126
|
+
top,
|
|
127
|
+
withPayload,
|
|
128
|
+
distance,
|
|
129
|
+
uid,
|
|
130
|
+
filterPaths,
|
|
131
|
+
});
|
|
132
|
+
if (logger.isLevelEnabled("debug")) {
|
|
133
|
+
logger.debug({
|
|
134
|
+
tableName,
|
|
135
|
+
distance,
|
|
136
|
+
top,
|
|
137
|
+
withPayload,
|
|
138
|
+
mode: modeLog,
|
|
139
|
+
yql,
|
|
140
|
+
params: {
|
|
141
|
+
uid,
|
|
142
|
+
top,
|
|
143
|
+
vectorLength: queryVector.length,
|
|
144
|
+
vectorPreview: queryVector.slice(0, 3),
|
|
145
|
+
},
|
|
146
|
+
}, "one_table search (exact): executing YQL");
|
|
147
|
+
}
|
|
148
|
+
const settings = createExecuteQuerySettingsWithTimeout({
|
|
149
|
+
keepInCache: true,
|
|
150
|
+
idempotent: true,
|
|
151
|
+
timeoutMs: SEARCH_OPERATION_TIMEOUT_MS,
|
|
152
|
+
});
|
|
153
|
+
const rs = await s.executeQuery(yql, params, undefined, settings);
|
|
154
|
+
const rowset = rs.resultSets?.[0];
|
|
155
|
+
const rows = (rowset?.rows ?? []);
|
|
156
|
+
return parseSearchRows(rows, withPayload);
|
|
157
|
+
});
|
|
158
|
+
return results;
|
|
159
|
+
}
|
|
160
|
+
async function searchPointsOneTableApproximate(tableName, queryVector, top, withPayload, distance, dimension, uid, overfetchMultiplier, filterPaths) {
|
|
161
|
+
assertVectorDimension(queryVector, dimension);
|
|
162
|
+
const results = await withSession(async (s) => {
|
|
163
|
+
const { yql, params, safeTop, candidateLimit, modeLog } = buildApproxSearchQueryAndParams({
|
|
164
|
+
tableName,
|
|
165
|
+
queryVector,
|
|
166
|
+
top,
|
|
167
|
+
withPayload,
|
|
168
|
+
distance,
|
|
169
|
+
uid,
|
|
170
|
+
overfetchMultiplier,
|
|
171
|
+
filterPaths,
|
|
172
|
+
});
|
|
173
|
+
if (logger.isLevelEnabled("debug")) {
|
|
174
|
+
logger.debug({
|
|
175
|
+
tableName,
|
|
176
|
+
distance,
|
|
177
|
+
top,
|
|
178
|
+
safeTop,
|
|
179
|
+
candidateLimit,
|
|
180
|
+
mode: modeLog,
|
|
181
|
+
yql,
|
|
182
|
+
params: {
|
|
183
|
+
uid,
|
|
184
|
+
safeTop,
|
|
185
|
+
candidateLimit,
|
|
186
|
+
vectorLength: queryVector.length,
|
|
187
|
+
vectorPreview: queryVector.slice(0, 3),
|
|
188
|
+
},
|
|
189
|
+
}, "one_table search (approximate): executing YQL");
|
|
190
|
+
}
|
|
191
|
+
const settings = createExecuteQuerySettingsWithTimeout({
|
|
192
|
+
keepInCache: true,
|
|
193
|
+
idempotent: true,
|
|
194
|
+
timeoutMs: SEARCH_OPERATION_TIMEOUT_MS,
|
|
195
|
+
});
|
|
196
|
+
const rs = await s.executeQuery(yql, params, undefined, settings);
|
|
197
|
+
const rowset = rs.resultSets?.[0];
|
|
198
|
+
const rows = (rowset?.rows ?? []);
|
|
199
|
+
return parseSearchRows(rows, withPayload);
|
|
200
|
+
});
|
|
201
|
+
return results;
|
|
202
|
+
}
|
|
203
|
+
export async function searchPointsOneTable(tableName, queryVector, top, withPayload, distance, dimension, uid, mode, overfetchMultiplier, filterPaths) {
|
|
204
|
+
if (mode === SearchMode.Exact) {
|
|
205
|
+
return await searchPointsOneTableExact(tableName, queryVector, top, withPayload, distance, dimension, uid, filterPaths);
|
|
206
|
+
}
|
|
207
|
+
return await searchPointsOneTableApproximate(tableName, queryVector, top, withPayload, distance, dimension, uid, overfetchMultiplier, filterPaths);
|
|
208
|
+
}
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
export declare function upsertPointsOneTable(tableName: string, points:
|
|
1
|
+
import type { UpsertPoint } from "../../types.js";
|
|
2
|
+
export declare function upsertPointsOneTable(tableName: string, points: UpsertPoint[], dimension: number, uid: string): Promise<number>;
|
|
@@ -1,13 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { TypedValues, Types, withSession, Ydb as YdbRuntime, createBulkUpsertSettingsWithTimeout, } from "../../ydb/client.js";
|
|
2
2
|
import { buildVectorBinaryParams } from "../../ydb/helpers.js";
|
|
3
|
+
import { withRetry, isTransientYdbError } from "../../utils/retry.js";
|
|
3
4
|
import { UPSERT_BATCH_SIZE } from "../../ydb/schema.js";
|
|
4
5
|
import { UPSERT_OPERATION_TIMEOUT_MS } from "../../config/env.js";
|
|
5
6
|
import { logger } from "../../logging/logger.js";
|
|
6
|
-
import { withRetry, isTransientYdbError } from "../../utils/retry.js";
|
|
7
|
-
import { bulkUpsertRowsOnce } from "../../ydb/bulkUpsert.js";
|
|
8
|
-
import { Bytes, JsonDocument, Utf8 } from "@ydbjs/value/primitive";
|
|
9
|
-
import { List } from "@ydbjs/value/list";
|
|
10
|
-
import { Struct } from "@ydbjs/value/struct";
|
|
11
7
|
function assertPointVectorsDimension(args) {
|
|
12
8
|
for (const p of args.points) {
|
|
13
9
|
const id = String(p.id);
|
|
@@ -26,75 +22,64 @@ function assertPointVectorsDimension(args) {
|
|
|
26
22
|
}
|
|
27
23
|
}
|
|
28
24
|
}
|
|
29
|
-
function
|
|
30
|
-
const
|
|
25
|
+
function buildBulkUpsertRowsValue(args) {
|
|
26
|
+
const rowType = Types.struct({
|
|
27
|
+
uid: Types.UTF8,
|
|
28
|
+
point_id: Types.UTF8,
|
|
29
|
+
embedding: Types.BYTES,
|
|
30
|
+
embedding_quantized: Types.BYTES,
|
|
31
|
+
payload: Types.JSON_DOCUMENT,
|
|
32
|
+
});
|
|
33
|
+
return TypedValues.list(rowType, args.batch.map((p) => {
|
|
31
34
|
const binaries = buildVectorBinaryParams(p.vector);
|
|
32
35
|
return {
|
|
33
|
-
uid:
|
|
34
|
-
point_id:
|
|
35
|
-
embedding:
|
|
36
|
-
embedding_quantized:
|
|
37
|
-
payload:
|
|
36
|
+
uid: args.uid,
|
|
37
|
+
point_id: String(p.id),
|
|
38
|
+
embedding: binaries.float,
|
|
39
|
+
embedding_quantized: binaries.bit,
|
|
40
|
+
payload: JSON.stringify(p.payload ?? {}),
|
|
38
41
|
};
|
|
39
|
-
});
|
|
40
|
-
const rowsValue = new List(...rows.map((row) => new Struct({
|
|
41
|
-
uid: row.uid,
|
|
42
|
-
point_id: row.point_id,
|
|
43
|
-
embedding: row.embedding,
|
|
44
|
-
embedding_quantized: row.embedding_quantized,
|
|
45
|
-
payload: row.payload,
|
|
46
|
-
})));
|
|
47
|
-
return {
|
|
48
|
-
rowsValue,
|
|
49
|
-
};
|
|
42
|
+
}));
|
|
50
43
|
}
|
|
51
44
|
export async function upsertPointsOneTable(tableName, points, dimension, uid) {
|
|
45
|
+
if (!tableName) {
|
|
46
|
+
throw new Error("bulkUpsert: tableName is empty");
|
|
47
|
+
}
|
|
52
48
|
assertPointVectorsDimension({ tableName, uid, points, dimension });
|
|
53
49
|
let upserted = 0;
|
|
54
|
-
|
|
55
|
-
const
|
|
56
|
-
|
|
57
|
-
tableName,
|
|
58
|
-
uid,
|
|
59
|
-
batch,
|
|
50
|
+
await withSession(async (s) => {
|
|
51
|
+
const bulkSettings = createBulkUpsertSettingsWithTimeout({
|
|
52
|
+
timeoutMs: UPSERT_OPERATION_TIMEOUT_MS,
|
|
60
53
|
});
|
|
61
|
-
|
|
62
|
-
const
|
|
63
|
-
|
|
64
|
-
|
|
54
|
+
for (let i = 0; i < points.length; i += UPSERT_BATCH_SIZE) {
|
|
55
|
+
const batch = points.slice(i, i + UPSERT_BATCH_SIZE);
|
|
56
|
+
const rowsValue = buildBulkUpsertRowsValue({
|
|
57
|
+
uid,
|
|
58
|
+
batch,
|
|
59
|
+
});
|
|
60
|
+
if (logger.isLevelEnabled("debug")) {
|
|
61
|
+
logger.debug({
|
|
65
62
|
tableName,
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
63
|
+
mode: "one_table_bulk_upsert_client_side_serialization",
|
|
64
|
+
batchSize: batch.length,
|
|
65
|
+
params: {
|
|
66
|
+
rows: batch.map((p) => ({
|
|
67
|
+
uid,
|
|
68
|
+
point_id: String(p.id),
|
|
69
|
+
vectorLength: p.vector.length,
|
|
70
|
+
vectorPreview: p.vector.slice(0, 3),
|
|
71
|
+
payload: p.payload ?? {},
|
|
72
|
+
})),
|
|
73
|
+
},
|
|
74
|
+
}, "one_table upsert: executing BulkUpsert");
|
|
69
75
|
}
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
timeoutMs: UPSERT_OPERATION_TIMEOUT_MS,
|
|
79
|
-
durationMs,
|
|
80
|
-
err,
|
|
81
|
-
errCause: getAbortErrorCause(err),
|
|
82
|
-
isTimeout: isTimeoutAbortError(err),
|
|
83
|
-
}, "upsertPointsOneTable: BulkUpsert aborted");
|
|
84
|
-
}
|
|
85
|
-
throw err;
|
|
86
|
-
}
|
|
87
|
-
}, {
|
|
88
|
-
isTransient: isTransientYdbError,
|
|
89
|
-
context: {
|
|
90
|
-
operation: "upsertPointsOneTable",
|
|
91
|
-
tableName,
|
|
92
|
-
uid,
|
|
93
|
-
batchStart: i,
|
|
94
|
-
batchSize: batch.length,
|
|
95
|
-
},
|
|
96
|
-
});
|
|
97
|
-
upserted += batch.length;
|
|
98
|
-
}
|
|
76
|
+
const typedRows = YdbRuntime.TypedValue.create(rowsValue);
|
|
77
|
+
await withRetry(() => s.bulkUpsert(tableName, typedRows, bulkSettings), {
|
|
78
|
+
isTransient: isTransientYdbError,
|
|
79
|
+
context: { tableName, batchSize: batch.length, mode: "bulkUpsert" },
|
|
80
|
+
});
|
|
81
|
+
upserted += batch.length;
|
|
82
|
+
}
|
|
83
|
+
});
|
|
99
84
|
return upserted;
|
|
100
85
|
}
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export { searchPointsOneTable } from "./pointsRepo.one-table/Search
|
|
1
|
+
export { searchPointsOneTable } from "./pointsRepo.one-table/Search.js";
|
|
2
2
|
export { upsertPointsOneTable } from "./pointsRepo.one-table/Upsert.js";
|
|
3
3
|
export { deletePointsOneTable, deletePointsByPathSegmentsOneTable, } from "./pointsRepo.one-table/Delete.js";
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export { searchPointsOneTable } from "./pointsRepo.one-table/Search
|
|
1
|
+
export { searchPointsOneTable } from "./pointsRepo.one-table/Search.js";
|
|
2
2
|
export { upsertPointsOneTable } from "./pointsRepo.one-table/Upsert.js";
|
|
3
3
|
export { deletePointsOneTable, deletePointsByPathSegmentsOneTable, } from "./pointsRepo.one-table/Delete.js";
|