ydb-qdrant 8.1.0 → 9.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -18
- package/dist/SmokeTest.js +2 -2
- package/dist/compute/ComputePool.d.ts +5 -0
- package/dist/compute/ComputePool.js +64 -0
- package/dist/compute/ComputeWorker.d.ts +36 -0
- package/dist/compute/ComputeWorker.js +84 -0
- package/dist/config/env.d.ts +24 -7
- package/dist/config/env.js +65 -35
- package/dist/index.d.ts +2 -0
- package/dist/index.js +92 -2
- package/dist/logging/DeployLogFormatter.d.ts +2 -0
- package/dist/logging/DeployLogFormatter.js +131 -0
- package/dist/logging/logger.js +13 -1
- package/dist/logging/requestContext.d.ts +17 -0
- package/dist/logging/requestContext.js +43 -0
- package/dist/middleware/requestLogger.js +134 -6
- package/dist/middleware/upsertBodyPhase.d.ts +6 -0
- package/dist/middleware/upsertBodyPhase.js +184 -0
- package/dist/middleware/upsertRequestTimeout.d.ts +16 -0
- package/dist/middleware/upsertRequestTimeout.js +158 -0
- package/dist/package/api.d.ts +20 -12
- package/dist/package/api.js +57 -28
- package/dist/qdrant/QdrantRestTypes.d.ts +4 -0
- package/dist/qdrant/Requests.d.ts +97 -0
- package/dist/qdrant/Requests.js +72 -0
- package/dist/repositories/collectionsRepo.d.ts +18 -2
- package/dist/repositories/collectionsRepo.js +103 -7
- package/dist/repositories/collectionsRepo.one-table.d.ts +4 -3
- package/dist/repositories/collectionsRepo.one-table.js +99 -36
- package/dist/repositories/collectionsRepo.shared.d.ts +2 -2
- package/dist/repositories/collectionsRepo.shared.js +9 -4
- package/dist/repositories/pointsRepo.d.ts +6 -4
- package/dist/repositories/pointsRepo.js +8 -7
- package/dist/repositories/pointsRepo.one-table/Delete.d.ts +2 -2
- package/dist/repositories/pointsRepo.one-table/Delete.js +157 -60
- package/dist/repositories/pointsRepo.one-table/PathSegmentsFilter.d.ts +7 -5
- package/dist/repositories/pointsRepo.one-table/PathSegmentsFilter.js +44 -13
- package/dist/repositories/pointsRepo.one-table/Retrieve.d.ts +6 -0
- package/dist/repositories/pointsRepo.one-table/Retrieve.js +69 -0
- package/dist/repositories/pointsRepo.one-table/Search.d.ts +2 -3
- package/dist/repositories/pointsRepo.one-table/Search.js +102 -124
- package/dist/repositories/pointsRepo.one-table/Upsert.d.ts +2 -2
- package/dist/repositories/pointsRepo.one-table/Upsert.js +244 -48
- package/dist/repositories/pointsRepo.one-table.d.ts +1 -0
- package/dist/repositories/pointsRepo.one-table.js +1 -0
- package/dist/routes/collections.js +45 -36
- package/dist/routes/points.js +145 -56
- package/dist/server.js +42 -6
- package/dist/services/CollectionService.d.ts +7 -5
- package/dist/services/CollectionService.js +12 -9
- package/dist/services/CollectionService.one-table.js +1 -2
- package/dist/services/CollectionService.shared.d.ts +6 -5
- package/dist/services/CollectionService.shared.js +28 -12
- package/dist/services/PointsService.d.ts +8 -0
- package/dist/services/PointsService.js +132 -15
- package/dist/types.d.ts +4 -94
- package/dist/types.js +1 -54
- package/dist/utils/EnvParsers.d.ts +5 -0
- package/dist/utils/EnvParsers.js +30 -0
- package/dist/utils/PayloadSign.d.ts +4 -0
- package/dist/utils/PayloadSign.js +18 -0
- package/dist/utils/distance.d.ts +1 -12
- package/dist/utils/distance.js +0 -21
- package/dist/utils/pathPrefix.d.ts +3 -0
- package/dist/utils/pathPrefix.js +47 -0
- package/dist/utils/prefixExpansion.d.ts +1 -0
- package/dist/utils/prefixExpansion.js +11 -0
- package/dist/utils/qdrantResponse.d.ts +13 -0
- package/dist/utils/qdrantResponse.js +12 -0
- package/dist/utils/requestIdentity.d.ts +8 -0
- package/dist/utils/requestIdentity.js +52 -0
- package/dist/utils/retry.d.ts +2 -0
- package/dist/utils/retry.js +55 -11
- package/dist/utils/tenant.d.ts +12 -6
- package/dist/utils/tenant.js +41 -32
- package/dist/utils/vectorBinary.d.ts +0 -1
- package/dist/utils/vectorBinary.js +0 -98
- package/dist/utils/ydbErrors.d.ts +1 -0
- package/dist/utils/ydbErrors.js +14 -0
- package/dist/ydb/bootstrapMetaTable.js +14 -2
- package/dist/ydb/client.d.ts +10 -2
- package/dist/ydb/client.js +83 -24
- package/dist/ydb/helpers.d.ts +0 -1
- package/dist/ydb/helpers.js +1 -2
- package/dist/ydb/schema.d.ts +2 -0
- package/dist/ydb/schema.js +84 -7
- package/package.json +10 -5
|
@@ -1,3 +1,4 @@
|
|
|
1
|
-
import type { DistanceKind, VectorType } from "../
|
|
2
|
-
export declare function createCollectionOneTable(metaKey: string, dim: number, distance: DistanceKind, vectorType: VectorType): Promise<void>;
|
|
3
|
-
export declare function
|
|
1
|
+
import type { DistanceKind, VectorType } from "../qdrant/QdrantRestTypes.js";
|
|
2
|
+
export declare function createCollectionOneTable(metaKey: string, dim: number, distance: DistanceKind, vectorType: VectorType, userUid?: string): Promise<void>;
|
|
3
|
+
export declare function deleteAllPointsForCollectionOneTable(collection: string): Promise<void>;
|
|
4
|
+
export declare function deleteCollectionOneTable(metaKey: string, collection: string): Promise<void>;
|
|
@@ -1,36 +1,24 @@
|
|
|
1
1
|
import { TypedValues, Types, withSession, withQuerySession, createExecuteQuerySettings, } from "../ydb/client.js";
|
|
2
|
-
import { GLOBAL_POINTS_TABLE, ensureGlobalPointsTable } from "../ydb/schema.js";
|
|
2
|
+
import { GLOBAL_POINTS_TABLE, POINTS_BY_FILE_LOOKUP_TABLE, ensureGlobalPointsTable, ensurePointsByFileTable, } from "../ydb/schema.js";
|
|
3
3
|
import { upsertCollectionMeta } from "./collectionsRepo.shared.js";
|
|
4
4
|
import { withRetry, isTransientYdbError } from "../utils/retry.js";
|
|
5
|
+
import { isOutOfBufferMemoryYdbError } from "../utils/ydbErrors.js";
|
|
6
|
+
import { logger } from "../logging/logger.js";
|
|
5
7
|
const DELETE_COLLECTION_BATCH_SIZE = 10000;
|
|
6
|
-
function
|
|
7
|
-
const msg = error instanceof Error ? error.message : String(error);
|
|
8
|
-
if (/Out of buffer memory/i.test(msg)) {
|
|
9
|
-
return true;
|
|
10
|
-
}
|
|
11
|
-
if (typeof error === "object" && error !== null) {
|
|
12
|
-
const issues = error.issues;
|
|
13
|
-
if (issues !== undefined) {
|
|
14
|
-
const issuesText = typeof issues === "string" ? issues : JSON.stringify(issues);
|
|
15
|
-
return /Out of buffer memory/i.test(issuesText);
|
|
16
|
-
}
|
|
17
|
-
}
|
|
18
|
-
return false;
|
|
19
|
-
}
|
|
20
|
-
async function deletePointsForUidInChunks(s, uid) {
|
|
8
|
+
async function deletePointsForCollectionInChunks(s, collection) {
|
|
21
9
|
const selectYql = `
|
|
22
|
-
DECLARE $
|
|
10
|
+
DECLARE $collection AS Utf8;
|
|
23
11
|
DECLARE $limit AS Uint32;
|
|
24
12
|
SELECT point_id
|
|
25
13
|
FROM ${GLOBAL_POINTS_TABLE}
|
|
26
|
-
WHERE
|
|
14
|
+
WHERE collection = $collection
|
|
27
15
|
LIMIT $limit;
|
|
28
16
|
`;
|
|
29
17
|
const deleteBatchYql = `
|
|
30
|
-
DECLARE $
|
|
18
|
+
DECLARE $collection AS Utf8;
|
|
31
19
|
DECLARE $ids AS List<Utf8>;
|
|
32
20
|
DELETE FROM ${GLOBAL_POINTS_TABLE}
|
|
33
|
-
WHERE
|
|
21
|
+
WHERE collection = $collection AND point_id IN $ids;
|
|
34
22
|
`;
|
|
35
23
|
// Best‑effort loop: stop when there are no more rows for this uid.
|
|
36
24
|
// Each iteration only touches a limited number of rows to avoid
|
|
@@ -40,7 +28,7 @@ async function deletePointsForUidInChunks(s, uid) {
|
|
|
40
28
|
const settings = createExecuteQuerySettings();
|
|
41
29
|
while (iterations++ < MAX_ITERATIONS) {
|
|
42
30
|
const rs = (await s.executeQuery(selectYql, {
|
|
43
|
-
$
|
|
31
|
+
$collection: TypedValues.utf8(collection),
|
|
44
32
|
$limit: TypedValues.uint32(DELETE_COLLECTION_BATCH_SIZE),
|
|
45
33
|
}, undefined, settings));
|
|
46
34
|
const rowset = rs.resultSets?.[0];
|
|
@@ -53,20 +41,76 @@ async function deletePointsForUidInChunks(s, uid) {
|
|
|
53
41
|
}
|
|
54
42
|
const idsValue = TypedValues.list(Types.UTF8, ids);
|
|
55
43
|
await s.executeQuery(deleteBatchYql, {
|
|
56
|
-
$
|
|
44
|
+
$collection: TypedValues.utf8(collection),
|
|
57
45
|
$ids: idsValue,
|
|
58
46
|
}, undefined, settings);
|
|
59
47
|
}
|
|
60
48
|
}
|
|
61
|
-
|
|
62
|
-
|
|
49
|
+
async function deleteLookupRowsForCollectionInChunks(s, collection) {
|
|
50
|
+
const selectYql = `
|
|
51
|
+
DECLARE $collection AS Utf8;
|
|
52
|
+
DECLARE $limit AS Uint32;
|
|
53
|
+
SELECT file_path, point_id
|
|
54
|
+
FROM ${POINTS_BY_FILE_LOOKUP_TABLE}
|
|
55
|
+
WHERE collection = $collection
|
|
56
|
+
LIMIT $limit;
|
|
57
|
+
`;
|
|
58
|
+
const deleteBatchYql = `
|
|
59
|
+
DECLARE $rows AS List<Struct<
|
|
60
|
+
collection: Utf8,
|
|
61
|
+
file_path: Utf8,
|
|
62
|
+
point_id: Utf8
|
|
63
|
+
>>;
|
|
64
|
+
|
|
65
|
+
DELETE FROM ${POINTS_BY_FILE_LOOKUP_TABLE} ON
|
|
66
|
+
SELECT collection, file_path, point_id FROM AS_TABLE($rows);
|
|
67
|
+
`;
|
|
68
|
+
let iterations = 0;
|
|
69
|
+
const MAX_ITERATIONS = 10000;
|
|
70
|
+
const settings = createExecuteQuerySettings();
|
|
71
|
+
const rowType = Types.struct({
|
|
72
|
+
collection: Types.UTF8,
|
|
73
|
+
file_path: Types.UTF8,
|
|
74
|
+
point_id: Types.UTF8,
|
|
75
|
+
});
|
|
76
|
+
while (iterations++ < MAX_ITERATIONS) {
|
|
77
|
+
const rs = (await s.executeQuery(selectYql, {
|
|
78
|
+
$collection: TypedValues.utf8(collection),
|
|
79
|
+
$limit: TypedValues.uint32(DELETE_COLLECTION_BATCH_SIZE),
|
|
80
|
+
}, undefined, settings));
|
|
81
|
+
const rowset = rs.resultSets?.[0];
|
|
82
|
+
const rows = rowset?.rows ?? [];
|
|
83
|
+
const lookupRows = rows
|
|
84
|
+
.map((row) => ({
|
|
85
|
+
collection,
|
|
86
|
+
file_path: row.items?.[0]?.textValue,
|
|
87
|
+
point_id: row.items?.[1]?.textValue,
|
|
88
|
+
}))
|
|
89
|
+
.filter((row) => typeof row.file_path === "string" &&
|
|
90
|
+
typeof row.point_id === "string");
|
|
91
|
+
if (lookupRows.length === 0) {
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
await s.executeQuery(deleteBatchYql, {
|
|
95
|
+
$rows: TypedValues.list(rowType, lookupRows),
|
|
96
|
+
}, undefined, settings);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
export async function createCollectionOneTable(metaKey, dim, distance, vectorType, userUid) {
|
|
100
|
+
await upsertCollectionMeta(metaKey, dim, distance, vectorType, GLOBAL_POINTS_TABLE, userUid);
|
|
63
101
|
}
|
|
64
|
-
export async function
|
|
102
|
+
export async function deleteAllPointsForCollectionOneTable(collection) {
|
|
65
103
|
await ensureGlobalPointsTable();
|
|
104
|
+
await ensurePointsByFileTable();
|
|
66
105
|
const batchDeletePointsYql = `
|
|
67
|
-
DECLARE $
|
|
106
|
+
DECLARE $collection AS Utf8;
|
|
68
107
|
BATCH DELETE FROM ${GLOBAL_POINTS_TABLE}
|
|
69
|
-
WHERE
|
|
108
|
+
WHERE collection = $collection;
|
|
109
|
+
`;
|
|
110
|
+
const batchDeleteLookupYql = `
|
|
111
|
+
DECLARE $collection AS Utf8;
|
|
112
|
+
BATCH DELETE FROM ${POINTS_BY_FILE_LOOKUP_TABLE}
|
|
113
|
+
WHERE collection = $collection;
|
|
70
114
|
`;
|
|
71
115
|
await withRetry(async () => {
|
|
72
116
|
try {
|
|
@@ -74,7 +118,26 @@ export async function deleteCollectionOneTable(metaKey, uid) {
|
|
|
74
118
|
await qs.execute({
|
|
75
119
|
text: batchDeletePointsYql,
|
|
76
120
|
parameters: {
|
|
77
|
-
$
|
|
121
|
+
$collection: TypedValues.utf8(collection),
|
|
122
|
+
},
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
catch (err) {
|
|
127
|
+
if (!isOutOfBufferMemoryYdbError(err)) {
|
|
128
|
+
throw err;
|
|
129
|
+
}
|
|
130
|
+
logger.warn({ tableName: GLOBAL_POINTS_TABLE, collection }, "BATCH DELETE hit out-of-buffer-memory, falling back to chunked deletion");
|
|
131
|
+
await withSession(async (s) => {
|
|
132
|
+
await deletePointsForCollectionInChunks(s, collection);
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
try {
|
|
136
|
+
await withQuerySession(async (qs) => {
|
|
137
|
+
await qs.execute({
|
|
138
|
+
text: batchDeleteLookupYql,
|
|
139
|
+
parameters: {
|
|
140
|
+
$collection: TypedValues.utf8(collection),
|
|
78
141
|
},
|
|
79
142
|
});
|
|
80
143
|
});
|
|
@@ -83,23 +146,23 @@ export async function deleteCollectionOneTable(metaKey, uid) {
|
|
|
83
146
|
if (!isOutOfBufferMemoryYdbError(err)) {
|
|
84
147
|
throw err;
|
|
85
148
|
}
|
|
86
|
-
|
|
87
|
-
// still reports an out-of-buffer-memory condition, fall back to
|
|
88
|
-
// per-uid chunked deletion strategy to complete the deletion.
|
|
149
|
+
logger.warn({ tableName: POINTS_BY_FILE_LOOKUP_TABLE, collection }, "BATCH DELETE hit out-of-buffer-memory, falling back to chunked deletion");
|
|
89
150
|
await withSession(async (s) => {
|
|
90
|
-
await
|
|
151
|
+
await deleteLookupRowsForCollectionInChunks(s, collection);
|
|
91
152
|
});
|
|
92
153
|
}
|
|
93
154
|
}, {
|
|
94
155
|
isTransient: isTransientYdbError,
|
|
95
156
|
context: {
|
|
96
|
-
operation: "
|
|
157
|
+
operation: "deleteAllPointsForCollectionOneTable",
|
|
97
158
|
tableName: GLOBAL_POINTS_TABLE,
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
mode: "batch_delete",
|
|
159
|
+
collection,
|
|
160
|
+
mode: "batch_delete_with_lookup_cleanup",
|
|
101
161
|
},
|
|
102
162
|
});
|
|
163
|
+
}
|
|
164
|
+
export async function deleteCollectionOneTable(metaKey, collection) {
|
|
165
|
+
await deleteAllPointsForCollectionOneTable(collection);
|
|
103
166
|
const delMeta = `
|
|
104
167
|
DECLARE $collection AS Utf8;
|
|
105
168
|
DELETE FROM qdr__collections WHERE collection = $collection;
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import type { DistanceKind, VectorType } from "../
|
|
2
|
-
export declare function upsertCollectionMeta(metaKey: string, dim: number, distance: DistanceKind, vectorType: VectorType, tableName: string): Promise<void>;
|
|
1
|
+
import type { DistanceKind, VectorType } from "../qdrant/QdrantRestTypes.js";
|
|
2
|
+
export declare function upsertCollectionMeta(metaKey: string, dim: number, distance: DistanceKind, vectorType: VectorType, tableName: string, userUid?: string): Promise<void>;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { UPSERT_OPERATION_TIMEOUT_MS } from "../config/env.js";
|
|
2
|
-
import { TypedValues, withSession, createExecuteQuerySettingsWithTimeout, } from "../ydb/client.js";
|
|
3
|
-
export async function upsertCollectionMeta(metaKey, dim, distance, vectorType, tableName) {
|
|
2
|
+
import { TypedValues, Types, withSession, createExecuteQuerySettingsWithTimeout, } from "../ydb/client.js";
|
|
3
|
+
export async function upsertCollectionMeta(metaKey, dim, distance, vectorType, tableName, userUid) {
|
|
4
4
|
const now = new Date();
|
|
5
5
|
const upsertMeta = `
|
|
6
6
|
DECLARE $collection AS Utf8;
|
|
@@ -10,9 +10,13 @@ export async function upsertCollectionMeta(metaKey, dim, distance, vectorType, t
|
|
|
10
10
|
DECLARE $vtype AS Utf8;
|
|
11
11
|
DECLARE $created AS Timestamp;
|
|
12
12
|
DECLARE $last_accessed AS Timestamp;
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
DECLARE $user_uid AS Optional<Utf8>;
|
|
14
|
+
UPSERT INTO qdr__collections (collection, table_name, vector_dimension, distance, vector_type, created_at, last_accessed_at, user_uid)
|
|
15
|
+
VALUES ($collection, $table, $dim, $distance, $vtype, $created, $last_accessed, $user_uid);
|
|
15
16
|
`;
|
|
17
|
+
const userUidValue = userUid && userUid.trim().length > 0
|
|
18
|
+
? TypedValues.optional(TypedValues.utf8(userUid))
|
|
19
|
+
: TypedValues.optionalNull(Types.UTF8);
|
|
16
20
|
await withSession(async (s) => {
|
|
17
21
|
const settings = createExecuteQuerySettingsWithTimeout({
|
|
18
22
|
keepInCache: true,
|
|
@@ -27,6 +31,7 @@ export async function upsertCollectionMeta(metaKey, dim, distance, vectorType, t
|
|
|
27
31
|
$vtype: TypedValues.utf8(vectorType),
|
|
28
32
|
$created: TypedValues.timestamp(now),
|
|
29
33
|
$last_accessed: TypedValues.timestamp(now),
|
|
34
|
+
$user_uid: userUidValue,
|
|
30
35
|
}, undefined, settings);
|
|
31
36
|
});
|
|
32
37
|
}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
import type { YdbQdrantScoredPoint } from "../qdrant/QdrantRestTypes.js";
|
|
3
|
-
|
|
4
|
-
export declare function
|
|
1
|
+
import type { UpsertPoint } from "../qdrant/Requests.js";
|
|
2
|
+
import type { DistanceKind, YdbQdrantScoredPoint } from "../qdrant/QdrantRestTypes.js";
|
|
3
|
+
import type { RetrievedPoint } from "./pointsRepo.one-table/Retrieve.js";
|
|
4
|
+
export declare function upsertPoints(tableName: string, points: UpsertPoint[], dimension: number, uid: string, apiKey: string): Promise<number>;
|
|
5
|
+
export declare function searchPoints(tableName: string, queryVector: number[], top: number, withPayload: boolean | undefined, distance: DistanceKind, dimension: number, uid: string, apiKey: string, filterPaths?: Array<Array<string>>): Promise<YdbQdrantScoredPoint[]>;
|
|
5
6
|
export declare function deletePoints(tableName: string, ids: Array<string | number>, uid: string): Promise<number>;
|
|
6
7
|
export declare function deletePointsByPathSegments(tableName: string, uid: string, paths: Array<Array<string>>): Promise<number>;
|
|
8
|
+
export declare function retrievePointsByIds(tableName: string, ids: Array<string | number>, uid: string, apiKey: string, withPayload: boolean): Promise<RetrievedPoint[]>;
|
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
return await upsertPointsOneTable(tableName, points, dimension, uid);
|
|
1
|
+
import { upsertPointsOneTable, searchPointsOneTable, deletePointsOneTable, deletePointsByPathSegmentsOneTable, retrievePointsByIdsOneTable, } from "./pointsRepo.one-table.js";
|
|
2
|
+
export async function upsertPoints(tableName, points, dimension, uid, apiKey) {
|
|
3
|
+
return await upsertPointsOneTable(tableName, points, dimension, uid, apiKey);
|
|
5
4
|
}
|
|
6
|
-
export async function searchPoints(tableName, queryVector, top, withPayload, distance, dimension, uid, filterPaths) {
|
|
7
|
-
|
|
8
|
-
return await searchPointsOneTable(tableName, queryVector, top, withPayload, distance, dimension, uid, mode, OVERFETCH_MULTIPLIER, filterPaths);
|
|
5
|
+
export async function searchPoints(tableName, queryVector, top, withPayload, distance, dimension, uid, apiKey, filterPaths) {
|
|
6
|
+
return await searchPointsOneTable(tableName, queryVector, top, withPayload, distance, dimension, uid, apiKey, filterPaths);
|
|
9
7
|
}
|
|
10
8
|
export async function deletePoints(tableName, ids, uid) {
|
|
11
9
|
return await deletePointsOneTable(tableName, ids, uid);
|
|
@@ -13,3 +11,6 @@ export async function deletePoints(tableName, ids, uid) {
|
|
|
13
11
|
export async function deletePointsByPathSegments(tableName, uid, paths) {
|
|
14
12
|
return await deletePointsByPathSegmentsOneTable(tableName, uid, paths);
|
|
15
13
|
}
|
|
14
|
+
export async function retrievePointsByIds(tableName, ids, uid, apiKey, withPayload) {
|
|
15
|
+
return await retrievePointsByIdsOneTable(tableName, ids, uid, apiKey, withPayload);
|
|
16
|
+
}
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare function deletePointsOneTable(tableName: string, ids: Array<string | number>,
|
|
2
|
-
export declare function deletePointsByPathSegmentsOneTable(tableName: string,
|
|
1
|
+
export declare function deletePointsOneTable(tableName: string, ids: Array<string | number>, collection: string): Promise<number>;
|
|
2
|
+
export declare function deletePointsByPathSegmentsOneTable(tableName: string, collection: string, paths: Array<Array<string>>): Promise<number>;
|
|
@@ -1,29 +1,56 @@
|
|
|
1
|
-
import { TypedValues, withSession, createExecuteQuerySettings, } from "../../ydb/client.js";
|
|
2
|
-
import { withRetry, isTransientYdbError } from "../../utils/retry.js";
|
|
3
|
-
import {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
1
|
+
import { TypedValues, Types, withSession, createExecuteQuerySettings, } from "../../ydb/client.js";
|
|
2
|
+
import { withRetry, isTransientYdbError, isTransientYdbErrorInAcquiredSession, } from "../../utils/retry.js";
|
|
3
|
+
import { DELETE_FILTER_SELECT_BATCH_SIZE } from "../../config/env.js";
|
|
4
|
+
import { buildExactPathSegmentsFilter } from "./PathSegmentsFilter.js";
|
|
5
|
+
import { pathSegmentsToPrefix } from "../../utils/pathPrefix.js";
|
|
6
|
+
import { POINTS_BY_FILE_LOOKUP_TABLE, ensurePointsByFileTable, } from "../../ydb/schema.js";
|
|
7
|
+
const DELETE_FILTER_PATHS_CHUNK_SIZE = 250;
|
|
8
|
+
const DELETE_FILTER_PATHS_CHUNK_CONCURRENCY = 3;
|
|
9
|
+
// Delete is idempotent in our usage (DELETE by PK / DELETE by selected keys),
|
|
10
|
+
// so it is safe to use stronger retries for transient YDB states like Aborted/Undetermined.
|
|
11
|
+
// Keep backoff capped to avoid multi-minute request tails during outages.
|
|
12
|
+
const DELETE_BY_ID_RETRY_MAX_RETRIES = 10;
|
|
13
|
+
const DELETE_BY_ID_RETRY_BASE_DELAY_MS = 250;
|
|
14
|
+
const DELETE_BY_ID_RETRY_MAX_BACKOFF_MS = 1500;
|
|
15
|
+
const DELETE_BY_FILTER_RETRY_MAX_RETRIES = 12;
|
|
16
|
+
const DELETE_BY_FILTER_RETRY_BASE_DELAY_MS = 300;
|
|
17
|
+
const DELETE_BY_FILTER_RETRY_MAX_BACKOFF_MS = 2000;
|
|
18
|
+
export async function deletePointsOneTable(tableName, ids, collection) {
|
|
19
|
+
if (ids.length === 0) {
|
|
20
|
+
return 0;
|
|
21
|
+
}
|
|
22
|
+
const yql = `
|
|
23
|
+
DECLARE $collection AS Utf8;
|
|
24
|
+
DECLARE $ids AS List<Utf8>;
|
|
25
|
+
|
|
26
|
+
DELETE FROM ${tableName}
|
|
27
|
+
WHERE collection = $collection AND point_id IN $ids;
|
|
28
|
+
|
|
29
|
+
$lookup_rows = (
|
|
30
|
+
SELECT collection, file_path, point_id
|
|
31
|
+
FROM ${POINTS_BY_FILE_LOOKUP_TABLE}
|
|
32
|
+
WHERE collection = $collection AND point_id IN $ids
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
DELETE FROM ${POINTS_BY_FILE_LOOKUP_TABLE} ON
|
|
36
|
+
SELECT collection, file_path, point_id FROM $lookup_rows;
|
|
37
|
+
`;
|
|
38
|
+
const stringIds = ids.map((id) => String(id));
|
|
39
|
+
await ensurePointsByFileTable();
|
|
7
40
|
await withSession(async (s) => {
|
|
8
41
|
const settings = createExecuteQuerySettings();
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
await withRetry(() => s.executeQuery(yql, params, undefined, settings), {
|
|
20
|
-
isTransient: isTransientYdbError,
|
|
21
|
-
context: { tableName, uid, pointId: String(id) },
|
|
22
|
-
});
|
|
23
|
-
deleted += 1;
|
|
24
|
-
}
|
|
42
|
+
await withRetry(() => s.executeQuery(yql, {
|
|
43
|
+
$collection: TypedValues.utf8(collection),
|
|
44
|
+
$ids: TypedValues.list(Types.UTF8, stringIds),
|
|
45
|
+
}, undefined, settings), {
|
|
46
|
+
isTransient: isTransientYdbErrorInAcquiredSession,
|
|
47
|
+
maxRetries: DELETE_BY_ID_RETRY_MAX_RETRIES,
|
|
48
|
+
baseDelayMs: DELETE_BY_ID_RETRY_BASE_DELAY_MS,
|
|
49
|
+
maxBackoffMs: DELETE_BY_ID_RETRY_MAX_BACKOFF_MS,
|
|
50
|
+
context: { tableName, collection, idCount: ids.length },
|
|
51
|
+
});
|
|
25
52
|
});
|
|
26
|
-
return
|
|
53
|
+
return ids.length;
|
|
27
54
|
}
|
|
28
55
|
const MAX_SAFE_BIGINT = BigInt(Number.MAX_SAFE_INTEGER);
|
|
29
56
|
function bigintToSafeNumberOrNull(value) {
|
|
@@ -105,62 +132,132 @@ function readDeletedCountFromResult(rs) {
|
|
|
105
132
|
}
|
|
106
133
|
return 0;
|
|
107
134
|
}
|
|
108
|
-
|
|
109
|
-
if (paths.length === 0) {
|
|
110
|
-
return 0;
|
|
111
|
-
}
|
|
112
|
-
const { whereSql, params: whereParams } = buildPathSegmentsWhereClause(paths);
|
|
113
|
-
const whereParamDeclarations = Object.keys(whereParams)
|
|
114
|
-
.sort()
|
|
115
|
-
.map((key) => `DECLARE ${key} AS Utf8;`)
|
|
116
|
-
.join("\n ");
|
|
135
|
+
async function deletePointsByPathSegmentsChunked(tableName, collection, whereSql, whereParamDeclarations, whereParams, batchLimit) {
|
|
117
136
|
const deleteBatchYql = `
|
|
118
|
-
DECLARE $
|
|
137
|
+
DECLARE $collection AS Utf8;
|
|
119
138
|
DECLARE $limit AS Uint32;
|
|
120
139
|
${whereParamDeclarations}
|
|
121
140
|
|
|
122
141
|
$to_delete = (
|
|
123
|
-
SELECT
|
|
124
|
-
FROM ${
|
|
125
|
-
WHERE
|
|
142
|
+
SELECT collection, file_path, point_id
|
|
143
|
+
FROM ${POINTS_BY_FILE_LOOKUP_TABLE}
|
|
144
|
+
WHERE collection = $collection AND ${whereSql}
|
|
126
145
|
LIMIT $limit
|
|
127
146
|
);
|
|
128
147
|
|
|
129
148
|
DELETE FROM ${tableName} ON
|
|
130
|
-
SELECT
|
|
149
|
+
SELECT collection, point_id FROM $to_delete;
|
|
150
|
+
|
|
151
|
+
DELETE FROM ${POINTS_BY_FILE_LOOKUP_TABLE} ON
|
|
152
|
+
SELECT collection, file_path, point_id FROM $to_delete;
|
|
131
153
|
|
|
132
154
|
SELECT CAST(COUNT(*) AS Uint32) AS deleted FROM $to_delete;
|
|
133
155
|
`;
|
|
156
|
+
const settings = createExecuteQuerySettings();
|
|
134
157
|
let deleted = 0;
|
|
135
|
-
|
|
136
|
-
const
|
|
137
|
-
|
|
138
|
-
// Use limited batches to avoid per-operation buffer limits.
|
|
139
|
-
while (true) {
|
|
140
|
-
const rs = (await withRetry(() => s.executeQuery(deleteBatchYql, {
|
|
158
|
+
while (true) {
|
|
159
|
+
const rs = await withSession(async (s) => {
|
|
160
|
+
return await withRetry(() => s.executeQuery(deleteBatchYql, {
|
|
141
161
|
...whereParams,
|
|
142
|
-
$
|
|
143
|
-
$limit: TypedValues.uint32(
|
|
162
|
+
$collection: TypedValues.utf8(collection),
|
|
163
|
+
$limit: TypedValues.uint32(batchLimit),
|
|
144
164
|
}, undefined, settings), {
|
|
145
|
-
isTransient:
|
|
165
|
+
isTransient: isTransientYdbErrorInAcquiredSession,
|
|
166
|
+
maxRetries: DELETE_BY_FILTER_RETRY_MAX_RETRIES,
|
|
167
|
+
baseDelayMs: DELETE_BY_FILTER_RETRY_BASE_DELAY_MS,
|
|
168
|
+
maxBackoffMs: DELETE_BY_FILTER_RETRY_MAX_BACKOFF_MS,
|
|
146
169
|
context: {
|
|
147
170
|
tableName,
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
batchLimit
|
|
171
|
+
collection,
|
|
172
|
+
filterParamsCount: Object.keys(whereParams).length,
|
|
173
|
+
batchLimit,
|
|
151
174
|
},
|
|
152
|
-
})
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
deleted += batchDeleted;
|
|
175
|
+
});
|
|
176
|
+
});
|
|
177
|
+
const batchDeleted = readDeletedCountFromResult(rs);
|
|
178
|
+
if (!Number.isSafeInteger(batchDeleted) ||
|
|
179
|
+
batchDeleted < 0 ||
|
|
180
|
+
batchDeleted > batchLimit) {
|
|
181
|
+
throw new Error(`Unexpected deleted count from YDB: ${String(batchDeleted)}. Expected an integer in [0, ${batchLimit}].`);
|
|
182
|
+
}
|
|
183
|
+
if (batchDeleted <= 0) {
|
|
184
|
+
break;
|
|
163
185
|
}
|
|
186
|
+
deleted += batchDeleted;
|
|
187
|
+
}
|
|
188
|
+
return deleted;
|
|
189
|
+
}
|
|
190
|
+
function dedupePathSegmentsPaths(paths) {
|
|
191
|
+
const seenPrefixes = new Set();
|
|
192
|
+
const uniquePaths = [];
|
|
193
|
+
for (const pathSegments of paths) {
|
|
194
|
+
const prefix = pathSegmentsToPrefix(pathSegments);
|
|
195
|
+
if (seenPrefixes.has(prefix)) {
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
seenPrefixes.add(prefix);
|
|
199
|
+
uniquePaths.push(pathSegments);
|
|
200
|
+
}
|
|
201
|
+
return uniquePaths;
|
|
202
|
+
}
|
|
203
|
+
async function deletePointsByPathSegmentsChunkWithRetry(tableName, collection, chunk, totalInputPathsCount, chunkCount) {
|
|
204
|
+
const filter = buildExactPathSegmentsFilter(chunk.chunkPaths, "file_path");
|
|
205
|
+
if (!filter) {
|
|
206
|
+
return 0;
|
|
207
|
+
}
|
|
208
|
+
const { whereSql, whereParamDeclarations, whereParams } = filter;
|
|
209
|
+
return await withRetry(async () => {
|
|
210
|
+
return await deletePointsByPathSegmentsChunked(tableName, collection, whereSql, whereParamDeclarations, whereParams, DELETE_FILTER_SELECT_BATCH_SIZE);
|
|
211
|
+
}, {
|
|
212
|
+
isTransient: isTransientYdbError,
|
|
213
|
+
maxRetries: DELETE_BY_FILTER_RETRY_MAX_RETRIES,
|
|
214
|
+
baseDelayMs: DELETE_BY_FILTER_RETRY_BASE_DELAY_MS,
|
|
215
|
+
maxBackoffMs: DELETE_BY_FILTER_RETRY_MAX_BACKOFF_MS,
|
|
216
|
+
context: {
|
|
217
|
+
operation: "deletePointsByPathSegmentsOneTable",
|
|
218
|
+
tableName,
|
|
219
|
+
collection,
|
|
220
|
+
filterPathsCount: totalInputPathsCount,
|
|
221
|
+
chunkPathsCount: chunk.chunkPaths.length,
|
|
222
|
+
chunkIndex: chunk.chunkIndex,
|
|
223
|
+
chunkCount,
|
|
224
|
+
concurrency: DELETE_FILTER_PATHS_CHUNK_CONCURRENCY,
|
|
225
|
+
mode: "points_by_file_lookup_delete",
|
|
226
|
+
},
|
|
164
227
|
});
|
|
228
|
+
}
|
|
229
|
+
export async function deletePointsByPathSegmentsOneTable(tableName, collection, paths) {
|
|
230
|
+
if (paths.length === 0) {
|
|
231
|
+
return 0;
|
|
232
|
+
}
|
|
233
|
+
await ensurePointsByFileTable();
|
|
234
|
+
const uniquePaths = dedupePathSegmentsPaths(paths);
|
|
235
|
+
const chunks = [];
|
|
236
|
+
for (let chunkStart = 0; chunkStart < uniquePaths.length; chunkStart += DELETE_FILTER_PATHS_CHUNK_SIZE) {
|
|
237
|
+
chunks.push({
|
|
238
|
+
chunkIndex: Math.floor(chunkStart / DELETE_FILTER_PATHS_CHUNK_SIZE) + 1,
|
|
239
|
+
chunkPaths: uniquePaths.slice(chunkStart, chunkStart + DELETE_FILTER_PATHS_CHUNK_SIZE),
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
const chunkCount = chunks.length;
|
|
243
|
+
let deleted = 0;
|
|
244
|
+
for (let batchStart = 0; batchStart < chunkCount; batchStart += DELETE_FILTER_PATHS_CHUNK_CONCURRENCY) {
|
|
245
|
+
const chunkBatch = chunks.slice(batchStart, batchStart + DELETE_FILTER_PATHS_CHUNK_CONCURRENCY);
|
|
246
|
+
const batchResults = await Promise.allSettled(chunkBatch.map((chunk) => deletePointsByPathSegmentsChunkWithRetry(tableName, collection, chunk, uniquePaths.length, chunkCount)));
|
|
247
|
+
let firstError;
|
|
248
|
+
for (const result of batchResults) {
|
|
249
|
+
if (result.status === "fulfilled") {
|
|
250
|
+
deleted += result.value;
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
firstError ??=
|
|
254
|
+
result.reason instanceof Error
|
|
255
|
+
? result.reason
|
|
256
|
+
: new Error(String(result.reason));
|
|
257
|
+
}
|
|
258
|
+
if (firstError !== undefined) {
|
|
259
|
+
throw firstError;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
165
262
|
return deleted;
|
|
166
263
|
}
|
|
@@ -2,13 +2,15 @@ import type { Ydb } from "ydb-sdk";
|
|
|
2
2
|
type QueryParams = {
|
|
3
3
|
[key: string]: Ydb.ITypedValue;
|
|
4
4
|
};
|
|
5
|
+
type BuiltPathSegmentsFilter = {
|
|
6
|
+
whereSql: string;
|
|
7
|
+
whereParamDeclarations: string;
|
|
8
|
+
whereParams: QueryParams;
|
|
9
|
+
};
|
|
5
10
|
export declare function buildPathSegmentsWhereClause(paths: Array<Array<string>>): {
|
|
6
11
|
whereSql: string;
|
|
7
12
|
params: QueryParams;
|
|
8
13
|
};
|
|
9
|
-
export declare function
|
|
10
|
-
|
|
11
|
-
whereParamDeclarations: string;
|
|
12
|
-
whereParams: QueryParams;
|
|
13
|
-
} | undefined;
|
|
14
|
+
export declare function buildPrefixPathSegmentsFilter(paths: Array<Array<string>> | undefined, columnName?: string): BuiltPathSegmentsFilter | undefined;
|
|
15
|
+
export declare function buildExactPathSegmentsFilter(paths: Array<Array<string>> | undefined, columnName?: string): BuiltPathSegmentsFilter | undefined;
|
|
14
16
|
export {};
|
|
@@ -1,33 +1,64 @@
|
|
|
1
1
|
import { TypedValues } from "../../ydb/client.js";
|
|
2
|
-
|
|
2
|
+
import { pathSegmentsToPrefix } from "../../utils/pathPrefix.js";
|
|
3
|
+
function buildPrefixPathSegmentsWhereClauseForColumn(paths, columnName) {
|
|
3
4
|
const params = {};
|
|
4
5
|
const orGroups = [];
|
|
5
6
|
for (let pIdx = 0; pIdx < paths.length; pIdx += 1) {
|
|
6
7
|
const segs = paths[pIdx];
|
|
7
8
|
if (segs.length === 0) {
|
|
8
|
-
throw new Error("
|
|
9
|
+
throw new Error("pathSegments filter: empty path segments");
|
|
9
10
|
}
|
|
10
|
-
const
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
11
|
+
const prefix = pathSegmentsToPrefix(segs);
|
|
12
|
+
const exactParam = `$ppfx${pIdx}`;
|
|
13
|
+
const descendantParam = `$ppfxd${pIdx}`;
|
|
14
|
+
params[exactParam] = TypedValues.utf8(prefix);
|
|
15
|
+
params[descendantParam] = TypedValues.utf8(`${prefix}/`);
|
|
16
|
+
orGroups.push(`(${columnName} = ${exactParam} OR StartsWith(${columnName}, ${descendantParam}))`);
|
|
17
|
+
}
|
|
18
|
+
return {
|
|
19
|
+
whereSql: orGroups.length === 1 ? orGroups[0] : `(${orGroups.join(" OR ")})`,
|
|
20
|
+
params,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
function buildExactPathSegmentsWhereClauseForColumn(paths, columnName) {
|
|
24
|
+
const params = {};
|
|
25
|
+
const orGroups = [];
|
|
26
|
+
for (let pIdx = 0; pIdx < paths.length; pIdx += 1) {
|
|
27
|
+
const segs = paths[pIdx];
|
|
28
|
+
if (segs.length === 0) {
|
|
29
|
+
throw new Error("pathSegments filter: empty path segments");
|
|
16
30
|
}
|
|
17
|
-
|
|
31
|
+
const prefix = pathSegmentsToPrefix(segs);
|
|
32
|
+
const exactParam = `$ppfx${pIdx}`;
|
|
33
|
+
params[exactParam] = TypedValues.utf8(prefix);
|
|
34
|
+
orGroups.push(`${columnName} = ${exactParam}`);
|
|
18
35
|
}
|
|
19
36
|
return {
|
|
20
37
|
whereSql: orGroups.length === 1 ? orGroups[0] : `(${orGroups.join(" OR ")})`,
|
|
21
38
|
params,
|
|
22
39
|
};
|
|
23
40
|
}
|
|
24
|
-
|
|
25
|
-
if (!paths || paths.length === 0)
|
|
26
|
-
return undefined;
|
|
27
|
-
const { whereSql, params: whereParams } = buildPathSegmentsWhereClause(paths);
|
|
41
|
+
function buildPathSegmentsFilterFromWhereClause(whereSql, whereParams) {
|
|
28
42
|
const whereParamDeclarations = Object.keys(whereParams)
|
|
29
43
|
.sort()
|
|
30
44
|
.map((key) => `DECLARE ${key} AS Utf8;`)
|
|
31
45
|
.join("\n ");
|
|
32
46
|
return { whereSql, whereParamDeclarations, whereParams };
|
|
33
47
|
}
|
|
48
|
+
export function buildPathSegmentsWhereClause(paths) {
|
|
49
|
+
return buildPrefixPathSegmentsWhereClauseForColumn(paths, "path_prefix");
|
|
50
|
+
}
|
|
51
|
+
export function buildPrefixPathSegmentsFilter(paths, columnName = "path_prefix") {
|
|
52
|
+
if (!paths || paths.length === 0) {
|
|
53
|
+
return undefined;
|
|
54
|
+
}
|
|
55
|
+
const { whereSql, params: whereParams } = buildPrefixPathSegmentsWhereClauseForColumn(paths, columnName);
|
|
56
|
+
return buildPathSegmentsFilterFromWhereClause(whereSql, whereParams);
|
|
57
|
+
}
|
|
58
|
+
export function buildExactPathSegmentsFilter(paths, columnName = "file_path") {
|
|
59
|
+
if (!paths || paths.length === 0) {
|
|
60
|
+
return undefined;
|
|
61
|
+
}
|
|
62
|
+
const { whereSql, params: whereParams } = buildExactPathSegmentsWhereClauseForColumn(paths, columnName);
|
|
63
|
+
return buildPathSegmentsFilterFromWhereClause(whereSql, whereParams);
|
|
64
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { Payload } from "../../qdrant/QdrantRestTypes.js";
|
|
2
|
+
export interface RetrievedPoint {
|
|
3
|
+
id: string;
|
|
4
|
+
payload: Payload | null;
|
|
5
|
+
}
|
|
6
|
+
export declare function retrievePointsByIdsOneTable(tableName: string, ids: Array<string | number>, uid: string, apiKey: string, withPayload: boolean): Promise<RetrievedPoint[]>;
|