ydb-qdrant 8.1.0 → 9.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +20 -18
  2. package/dist/SmokeTest.js +2 -2
  3. package/dist/compute/ComputePool.d.ts +5 -0
  4. package/dist/compute/ComputePool.js +64 -0
  5. package/dist/compute/ComputeWorker.d.ts +36 -0
  6. package/dist/compute/ComputeWorker.js +84 -0
  7. package/dist/config/env.d.ts +24 -7
  8. package/dist/config/env.js +65 -35
  9. package/dist/index.d.ts +2 -0
  10. package/dist/index.js +92 -2
  11. package/dist/logging/DeployLogFormatter.d.ts +2 -0
  12. package/dist/logging/DeployLogFormatter.js +131 -0
  13. package/dist/logging/logger.js +13 -1
  14. package/dist/logging/requestContext.d.ts +17 -0
  15. package/dist/logging/requestContext.js +43 -0
  16. package/dist/middleware/requestLogger.js +134 -6
  17. package/dist/middleware/upsertBodyPhase.d.ts +6 -0
  18. package/dist/middleware/upsertBodyPhase.js +184 -0
  19. package/dist/middleware/upsertRequestTimeout.d.ts +16 -0
  20. package/dist/middleware/upsertRequestTimeout.js +158 -0
  21. package/dist/package/api.d.ts +20 -12
  22. package/dist/package/api.js +57 -28
  23. package/dist/qdrant/QdrantRestTypes.d.ts +4 -0
  24. package/dist/qdrant/Requests.d.ts +97 -0
  25. package/dist/qdrant/Requests.js +72 -0
  26. package/dist/repositories/collectionsRepo.d.ts +18 -2
  27. package/dist/repositories/collectionsRepo.js +103 -7
  28. package/dist/repositories/collectionsRepo.one-table.d.ts +4 -3
  29. package/dist/repositories/collectionsRepo.one-table.js +99 -36
  30. package/dist/repositories/collectionsRepo.shared.d.ts +2 -2
  31. package/dist/repositories/collectionsRepo.shared.js +9 -4
  32. package/dist/repositories/pointsRepo.d.ts +6 -4
  33. package/dist/repositories/pointsRepo.js +8 -7
  34. package/dist/repositories/pointsRepo.one-table/Delete.d.ts +2 -2
  35. package/dist/repositories/pointsRepo.one-table/Delete.js +157 -60
  36. package/dist/repositories/pointsRepo.one-table/PathSegmentsFilter.d.ts +7 -5
  37. package/dist/repositories/pointsRepo.one-table/PathSegmentsFilter.js +44 -13
  38. package/dist/repositories/pointsRepo.one-table/Retrieve.d.ts +6 -0
  39. package/dist/repositories/pointsRepo.one-table/Retrieve.js +69 -0
  40. package/dist/repositories/pointsRepo.one-table/Search.d.ts +2 -3
  41. package/dist/repositories/pointsRepo.one-table/Search.js +102 -124
  42. package/dist/repositories/pointsRepo.one-table/Upsert.d.ts +2 -2
  43. package/dist/repositories/pointsRepo.one-table/Upsert.js +244 -48
  44. package/dist/repositories/pointsRepo.one-table.d.ts +1 -0
  45. package/dist/repositories/pointsRepo.one-table.js +1 -0
  46. package/dist/routes/collections.js +45 -36
  47. package/dist/routes/points.js +145 -56
  48. package/dist/server.js +42 -6
  49. package/dist/services/CollectionService.d.ts +7 -5
  50. package/dist/services/CollectionService.js +12 -9
  51. package/dist/services/CollectionService.one-table.js +1 -2
  52. package/dist/services/CollectionService.shared.d.ts +6 -5
  53. package/dist/services/CollectionService.shared.js +28 -12
  54. package/dist/services/PointsService.d.ts +8 -0
  55. package/dist/services/PointsService.js +132 -15
  56. package/dist/types.d.ts +4 -94
  57. package/dist/types.js +1 -54
  58. package/dist/utils/EnvParsers.d.ts +5 -0
  59. package/dist/utils/EnvParsers.js +30 -0
  60. package/dist/utils/PayloadSign.d.ts +4 -0
  61. package/dist/utils/PayloadSign.js +18 -0
  62. package/dist/utils/distance.d.ts +1 -12
  63. package/dist/utils/distance.js +0 -21
  64. package/dist/utils/pathPrefix.d.ts +3 -0
  65. package/dist/utils/pathPrefix.js +47 -0
  66. package/dist/utils/prefixExpansion.d.ts +1 -0
  67. package/dist/utils/prefixExpansion.js +11 -0
  68. package/dist/utils/qdrantResponse.d.ts +13 -0
  69. package/dist/utils/qdrantResponse.js +12 -0
  70. package/dist/utils/requestIdentity.d.ts +8 -0
  71. package/dist/utils/requestIdentity.js +52 -0
  72. package/dist/utils/retry.d.ts +2 -0
  73. package/dist/utils/retry.js +55 -11
  74. package/dist/utils/tenant.d.ts +12 -6
  75. package/dist/utils/tenant.js +41 -32
  76. package/dist/utils/vectorBinary.d.ts +0 -1
  77. package/dist/utils/vectorBinary.js +0 -98
  78. package/dist/utils/ydbErrors.d.ts +1 -0
  79. package/dist/utils/ydbErrors.js +14 -0
  80. package/dist/ydb/bootstrapMetaTable.js +14 -2
  81. package/dist/ydb/client.d.ts +10 -2
  82. package/dist/ydb/client.js +83 -24
  83. package/dist/ydb/helpers.d.ts +0 -1
  84. package/dist/ydb/helpers.js +1 -2
  85. package/dist/ydb/schema.d.ts +2 -0
  86. package/dist/ydb/schema.js +84 -7
  87. package/package.json +10 -5
@@ -1,3 +1,4 @@
1
- import type { DistanceKind, VectorType } from "../types";
2
- export declare function createCollectionOneTable(metaKey: string, dim: number, distance: DistanceKind, vectorType: VectorType): Promise<void>;
3
- export declare function deleteCollectionOneTable(metaKey: string, uid: string): Promise<void>;
1
+ import type { DistanceKind, VectorType } from "../qdrant/QdrantRestTypes.js";
2
+ export declare function createCollectionOneTable(metaKey: string, dim: number, distance: DistanceKind, vectorType: VectorType, userUid?: string): Promise<void>;
3
+ export declare function deleteAllPointsForCollectionOneTable(collection: string): Promise<void>;
4
+ export declare function deleteCollectionOneTable(metaKey: string, collection: string): Promise<void>;
@@ -1,36 +1,24 @@
1
1
  import { TypedValues, Types, withSession, withQuerySession, createExecuteQuerySettings, } from "../ydb/client.js";
2
- import { GLOBAL_POINTS_TABLE, ensureGlobalPointsTable } from "../ydb/schema.js";
2
+ import { GLOBAL_POINTS_TABLE, POINTS_BY_FILE_LOOKUP_TABLE, ensureGlobalPointsTable, ensurePointsByFileTable, } from "../ydb/schema.js";
3
3
  import { upsertCollectionMeta } from "./collectionsRepo.shared.js";
4
4
  import { withRetry, isTransientYdbError } from "../utils/retry.js";
5
+ import { isOutOfBufferMemoryYdbError } from "../utils/ydbErrors.js";
6
+ import { logger } from "../logging/logger.js";
5
7
  const DELETE_COLLECTION_BATCH_SIZE = 10000;
6
- function isOutOfBufferMemoryYdbError(error) {
7
- const msg = error instanceof Error ? error.message : String(error);
8
- if (/Out of buffer memory/i.test(msg)) {
9
- return true;
10
- }
11
- if (typeof error === "object" && error !== null) {
12
- const issues = error.issues;
13
- if (issues !== undefined) {
14
- const issuesText = typeof issues === "string" ? issues : JSON.stringify(issues);
15
- return /Out of buffer memory/i.test(issuesText);
16
- }
17
- }
18
- return false;
19
- }
20
- async function deletePointsForUidInChunks(s, uid) {
8
+ async function deletePointsForCollectionInChunks(s, collection) {
21
9
  const selectYql = `
22
- DECLARE $uid AS Utf8;
10
+ DECLARE $collection AS Utf8;
23
11
  DECLARE $limit AS Uint32;
24
12
  SELECT point_id
25
13
  FROM ${GLOBAL_POINTS_TABLE}
26
- WHERE uid = $uid
14
+ WHERE collection = $collection
27
15
  LIMIT $limit;
28
16
  `;
29
17
  const deleteBatchYql = `
30
- DECLARE $uid AS Utf8;
18
+ DECLARE $collection AS Utf8;
31
19
  DECLARE $ids AS List<Utf8>;
32
20
  DELETE FROM ${GLOBAL_POINTS_TABLE}
33
- WHERE uid = $uid AND point_id IN $ids;
21
+ WHERE collection = $collection AND point_id IN $ids;
34
22
  `;
35
23
  // Best‑effort loop: stop when there are no more rows for this uid.
36
24
  // Each iteration only touches a limited number of rows to avoid
@@ -40,7 +28,7 @@ async function deletePointsForUidInChunks(s, uid) {
40
28
  const settings = createExecuteQuerySettings();
41
29
  while (iterations++ < MAX_ITERATIONS) {
42
30
  const rs = (await s.executeQuery(selectYql, {
43
- $uid: TypedValues.utf8(uid),
31
+ $collection: TypedValues.utf8(collection),
44
32
  $limit: TypedValues.uint32(DELETE_COLLECTION_BATCH_SIZE),
45
33
  }, undefined, settings));
46
34
  const rowset = rs.resultSets?.[0];
@@ -53,20 +41,76 @@ async function deletePointsForUidInChunks(s, uid) {
53
41
  }
54
42
  const idsValue = TypedValues.list(Types.UTF8, ids);
55
43
  await s.executeQuery(deleteBatchYql, {
56
- $uid: TypedValues.utf8(uid),
44
+ $collection: TypedValues.utf8(collection),
57
45
  $ids: idsValue,
58
46
  }, undefined, settings);
59
47
  }
60
48
  }
61
- export async function createCollectionOneTable(metaKey, dim, distance, vectorType) {
62
- await upsertCollectionMeta(metaKey, dim, distance, vectorType, GLOBAL_POINTS_TABLE);
49
+ async function deleteLookupRowsForCollectionInChunks(s, collection) {
50
+ const selectYql = `
51
+ DECLARE $collection AS Utf8;
52
+ DECLARE $limit AS Uint32;
53
+ SELECT file_path, point_id
54
+ FROM ${POINTS_BY_FILE_LOOKUP_TABLE}
55
+ WHERE collection = $collection
56
+ LIMIT $limit;
57
+ `;
58
+ const deleteBatchYql = `
59
+ DECLARE $rows AS List<Struct<
60
+ collection: Utf8,
61
+ file_path: Utf8,
62
+ point_id: Utf8
63
+ >>;
64
+
65
+ DELETE FROM ${POINTS_BY_FILE_LOOKUP_TABLE} ON
66
+ SELECT collection, file_path, point_id FROM AS_TABLE($rows);
67
+ `;
68
+ let iterations = 0;
69
+ const MAX_ITERATIONS = 10000;
70
+ const settings = createExecuteQuerySettings();
71
+ const rowType = Types.struct({
72
+ collection: Types.UTF8,
73
+ file_path: Types.UTF8,
74
+ point_id: Types.UTF8,
75
+ });
76
+ while (iterations++ < MAX_ITERATIONS) {
77
+ const rs = (await s.executeQuery(selectYql, {
78
+ $collection: TypedValues.utf8(collection),
79
+ $limit: TypedValues.uint32(DELETE_COLLECTION_BATCH_SIZE),
80
+ }, undefined, settings));
81
+ const rowset = rs.resultSets?.[0];
82
+ const rows = rowset?.rows ?? [];
83
+ const lookupRows = rows
84
+ .map((row) => ({
85
+ collection,
86
+ file_path: row.items?.[0]?.textValue,
87
+ point_id: row.items?.[1]?.textValue,
88
+ }))
89
+ .filter((row) => typeof row.file_path === "string" &&
90
+ typeof row.point_id === "string");
91
+ if (lookupRows.length === 0) {
92
+ break;
93
+ }
94
+ await s.executeQuery(deleteBatchYql, {
95
+ $rows: TypedValues.list(rowType, lookupRows),
96
+ }, undefined, settings);
97
+ }
98
+ }
99
+ export async function createCollectionOneTable(metaKey, dim, distance, vectorType, userUid) {
100
+ await upsertCollectionMeta(metaKey, dim, distance, vectorType, GLOBAL_POINTS_TABLE, userUid);
63
101
  }
64
- export async function deleteCollectionOneTable(metaKey, uid) {
102
+ export async function deleteAllPointsForCollectionOneTable(collection) {
65
103
  await ensureGlobalPointsTable();
104
+ await ensurePointsByFileTable();
66
105
  const batchDeletePointsYql = `
67
- DECLARE $uid AS Utf8;
106
+ DECLARE $collection AS Utf8;
68
107
  BATCH DELETE FROM ${GLOBAL_POINTS_TABLE}
69
- WHERE uid = $uid;
108
+ WHERE collection = $collection;
109
+ `;
110
+ const batchDeleteLookupYql = `
111
+ DECLARE $collection AS Utf8;
112
+ BATCH DELETE FROM ${POINTS_BY_FILE_LOOKUP_TABLE}
113
+ WHERE collection = $collection;
70
114
  `;
71
115
  await withRetry(async () => {
72
116
  try {
@@ -74,7 +118,26 @@ export async function deleteCollectionOneTable(metaKey, uid) {
74
118
  await qs.execute({
75
119
  text: batchDeletePointsYql,
76
120
  parameters: {
77
- $uid: TypedValues.utf8(uid),
121
+ $collection: TypedValues.utf8(collection),
122
+ },
123
+ });
124
+ });
125
+ }
126
+ catch (err) {
127
+ if (!isOutOfBufferMemoryYdbError(err)) {
128
+ throw err;
129
+ }
130
+ logger.warn({ tableName: GLOBAL_POINTS_TABLE, collection }, "BATCH DELETE hit out-of-buffer-memory, falling back to chunked deletion");
131
+ await withSession(async (s) => {
132
+ await deletePointsForCollectionInChunks(s, collection);
133
+ });
134
+ }
135
+ try {
136
+ await withQuerySession(async (qs) => {
137
+ await qs.execute({
138
+ text: batchDeleteLookupYql,
139
+ parameters: {
140
+ $collection: TypedValues.utf8(collection),
78
141
  },
79
142
  });
80
143
  });
@@ -83,23 +146,23 @@ export async function deleteCollectionOneTable(metaKey, uid) {
83
146
  if (!isOutOfBufferMemoryYdbError(err)) {
84
147
  throw err;
85
148
  }
86
- // BATCH DELETE already deletes in chunks per partition, but if YDB
87
- // still reports an out-of-buffer-memory condition, fall back to
88
- // per-uid chunked deletion strategy to complete the deletion.
149
+ logger.warn({ tableName: POINTS_BY_FILE_LOOKUP_TABLE, collection }, "BATCH DELETE hit out-of-buffer-memory, falling back to chunked deletion");
89
150
  await withSession(async (s) => {
90
- await deletePointsForUidInChunks(s, uid);
151
+ await deleteLookupRowsForCollectionInChunks(s, collection);
91
152
  });
92
153
  }
93
154
  }, {
94
155
  isTransient: isTransientYdbError,
95
156
  context: {
96
- operation: "deleteCollectionOneTable",
157
+ operation: "deleteAllPointsForCollectionOneTable",
97
158
  tableName: GLOBAL_POINTS_TABLE,
98
- metaKey,
99
- uid,
100
- mode: "batch_delete",
159
+ collection,
160
+ mode: "batch_delete_with_lookup_cleanup",
101
161
  },
102
162
  });
163
+ }
164
+ export async function deleteCollectionOneTable(metaKey, collection) {
165
+ await deleteAllPointsForCollectionOneTable(collection);
103
166
  const delMeta = `
104
167
  DECLARE $collection AS Utf8;
105
168
  DELETE FROM qdr__collections WHERE collection = $collection;
@@ -1,2 +1,2 @@
1
- import type { DistanceKind, VectorType } from "../types";
2
- export declare function upsertCollectionMeta(metaKey: string, dim: number, distance: DistanceKind, vectorType: VectorType, tableName: string): Promise<void>;
1
+ import type { DistanceKind, VectorType } from "../qdrant/QdrantRestTypes.js";
2
+ export declare function upsertCollectionMeta(metaKey: string, dim: number, distance: DistanceKind, vectorType: VectorType, tableName: string, userUid?: string): Promise<void>;
@@ -1,6 +1,6 @@
1
1
  import { UPSERT_OPERATION_TIMEOUT_MS } from "../config/env.js";
2
- import { TypedValues, withSession, createExecuteQuerySettingsWithTimeout, } from "../ydb/client.js";
3
- export async function upsertCollectionMeta(metaKey, dim, distance, vectorType, tableName) {
2
+ import { TypedValues, Types, withSession, createExecuteQuerySettingsWithTimeout, } from "../ydb/client.js";
3
+ export async function upsertCollectionMeta(metaKey, dim, distance, vectorType, tableName, userUid) {
4
4
  const now = new Date();
5
5
  const upsertMeta = `
6
6
  DECLARE $collection AS Utf8;
@@ -10,9 +10,13 @@ export async function upsertCollectionMeta(metaKey, dim, distance, vectorType, t
10
10
  DECLARE $vtype AS Utf8;
11
11
  DECLARE $created AS Timestamp;
12
12
  DECLARE $last_accessed AS Timestamp;
13
- UPSERT INTO qdr__collections (collection, table_name, vector_dimension, distance, vector_type, created_at, last_accessed_at)
14
- VALUES ($collection, $table, $dim, $distance, $vtype, $created, $last_accessed);
13
+ DECLARE $user_uid AS Optional<Utf8>;
14
+ UPSERT INTO qdr__collections (collection, table_name, vector_dimension, distance, vector_type, created_at, last_accessed_at, user_uid)
15
+ VALUES ($collection, $table, $dim, $distance, $vtype, $created, $last_accessed, $user_uid);
15
16
  `;
17
+ const userUidValue = userUid && userUid.trim().length > 0
18
+ ? TypedValues.optional(TypedValues.utf8(userUid))
19
+ : TypedValues.optionalNull(Types.UTF8);
16
20
  await withSession(async (s) => {
17
21
  const settings = createExecuteQuerySettingsWithTimeout({
18
22
  keepInCache: true,
@@ -27,6 +31,7 @@ export async function upsertCollectionMeta(metaKey, dim, distance, vectorType, t
27
31
  $vtype: TypedValues.utf8(vectorType),
28
32
  $created: TypedValues.timestamp(now),
29
33
  $last_accessed: TypedValues.timestamp(now),
34
+ $user_uid: userUidValue,
30
35
  }, undefined, settings);
31
36
  });
32
37
  }
@@ -1,6 +1,8 @@
1
- import type { DistanceKind, UpsertPoint } from "../types.js";
2
- import type { YdbQdrantScoredPoint } from "../qdrant/QdrantRestTypes.js";
3
- export declare function upsertPoints(tableName: string, points: UpsertPoint[], dimension: number, uid: string): Promise<number>;
4
- export declare function searchPoints(tableName: string, queryVector: number[], top: number, withPayload: boolean | undefined, distance: DistanceKind, dimension: number, uid: string, filterPaths?: Array<Array<string>>): Promise<YdbQdrantScoredPoint[]>;
1
+ import type { UpsertPoint } from "../qdrant/Requests.js";
2
+ import type { DistanceKind, YdbQdrantScoredPoint } from "../qdrant/QdrantRestTypes.js";
3
+ import type { RetrievedPoint } from "./pointsRepo.one-table/Retrieve.js";
4
+ export declare function upsertPoints(tableName: string, points: UpsertPoint[], dimension: number, uid: string, apiKey: string): Promise<number>;
5
+ export declare function searchPoints(tableName: string, queryVector: number[], top: number, withPayload: boolean | undefined, distance: DistanceKind, dimension: number, uid: string, apiKey: string, filterPaths?: Array<Array<string>>): Promise<YdbQdrantScoredPoint[]>;
5
6
  export declare function deletePoints(tableName: string, ids: Array<string | number>, uid: string): Promise<number>;
6
7
  export declare function deletePointsByPathSegments(tableName: string, uid: string, paths: Array<Array<string>>): Promise<number>;
8
+ export declare function retrievePointsByIds(tableName: string, ids: Array<string | number>, uid: string, apiKey: string, withPayload: boolean): Promise<RetrievedPoint[]>;
@@ -1,11 +1,9 @@
1
- import { SEARCH_MODE, OVERFETCH_MULTIPLIER, } from "../config/env.js";
2
- import { upsertPointsOneTable, searchPointsOneTable, deletePointsOneTable, deletePointsByPathSegmentsOneTable, } from "./pointsRepo.one-table.js";
3
- export async function upsertPoints(tableName, points, dimension, uid) {
4
- return await upsertPointsOneTable(tableName, points, dimension, uid);
1
+ import { upsertPointsOneTable, searchPointsOneTable, deletePointsOneTable, deletePointsByPathSegmentsOneTable, retrievePointsByIdsOneTable, } from "./pointsRepo.one-table.js";
2
+ export async function upsertPoints(tableName, points, dimension, uid, apiKey) {
3
+ return await upsertPointsOneTable(tableName, points, dimension, uid, apiKey);
5
4
  }
6
- export async function searchPoints(tableName, queryVector, top, withPayload, distance, dimension, uid, filterPaths) {
7
- const mode = SEARCH_MODE;
8
- return await searchPointsOneTable(tableName, queryVector, top, withPayload, distance, dimension, uid, mode, OVERFETCH_MULTIPLIER, filterPaths);
5
+ export async function searchPoints(tableName, queryVector, top, withPayload, distance, dimension, uid, apiKey, filterPaths) {
6
+ return await searchPointsOneTable(tableName, queryVector, top, withPayload, distance, dimension, uid, apiKey, filterPaths);
9
7
  }
10
8
  export async function deletePoints(tableName, ids, uid) {
11
9
  return await deletePointsOneTable(tableName, ids, uid);
@@ -13,3 +11,6 @@ export async function deletePoints(tableName, ids, uid) {
13
11
  export async function deletePointsByPathSegments(tableName, uid, paths) {
14
12
  return await deletePointsByPathSegmentsOneTable(tableName, uid, paths);
15
13
  }
14
+ export async function retrievePointsByIds(tableName, ids, uid, apiKey, withPayload) {
15
+ return await retrievePointsByIdsOneTable(tableName, ids, uid, apiKey, withPayload);
16
+ }
@@ -1,2 +1,2 @@
1
- export declare function deletePointsOneTable(tableName: string, ids: Array<string | number>, uid: string): Promise<number>;
2
- export declare function deletePointsByPathSegmentsOneTable(tableName: string, uid: string, paths: Array<Array<string>>): Promise<number>;
1
+ export declare function deletePointsOneTable(tableName: string, ids: Array<string | number>, collection: string): Promise<number>;
2
+ export declare function deletePointsByPathSegmentsOneTable(tableName: string, collection: string, paths: Array<Array<string>>): Promise<number>;
@@ -1,29 +1,56 @@
1
- import { TypedValues, withSession, createExecuteQuerySettings, } from "../../ydb/client.js";
2
- import { withRetry, isTransientYdbError } from "../../utils/retry.js";
3
- import { buildPathSegmentsWhereClause } from "./PathSegmentsFilter.js";
4
- const DELETE_FILTER_SELECT_BATCH_SIZE = 1000;
5
- export async function deletePointsOneTable(tableName, ids, uid) {
6
- let deleted = 0;
1
+ import { TypedValues, Types, withSession, createExecuteQuerySettings, } from "../../ydb/client.js";
2
+ import { withRetry, isTransientYdbError, isTransientYdbErrorInAcquiredSession, } from "../../utils/retry.js";
3
+ import { DELETE_FILTER_SELECT_BATCH_SIZE } from "../../config/env.js";
4
+ import { buildExactPathSegmentsFilter } from "./PathSegmentsFilter.js";
5
+ import { pathSegmentsToPrefix } from "../../utils/pathPrefix.js";
6
+ import { POINTS_BY_FILE_LOOKUP_TABLE, ensurePointsByFileTable, } from "../../ydb/schema.js";
7
+ const DELETE_FILTER_PATHS_CHUNK_SIZE = 250;
8
+ const DELETE_FILTER_PATHS_CHUNK_CONCURRENCY = 3;
9
+ // Delete is idempotent in our usage (DELETE by PK / DELETE by selected keys),
10
+ // so it is safe to use stronger retries for transient YDB states like Aborted/Undetermined.
11
+ // Keep backoff capped to avoid multi-minute request tails during outages.
12
+ const DELETE_BY_ID_RETRY_MAX_RETRIES = 10;
13
+ const DELETE_BY_ID_RETRY_BASE_DELAY_MS = 250;
14
+ const DELETE_BY_ID_RETRY_MAX_BACKOFF_MS = 1500;
15
+ const DELETE_BY_FILTER_RETRY_MAX_RETRIES = 12;
16
+ const DELETE_BY_FILTER_RETRY_BASE_DELAY_MS = 300;
17
+ const DELETE_BY_FILTER_RETRY_MAX_BACKOFF_MS = 2000;
18
+ export async function deletePointsOneTable(tableName, ids, collection) {
19
+ if (ids.length === 0) {
20
+ return 0;
21
+ }
22
+ const yql = `
23
+ DECLARE $collection AS Utf8;
24
+ DECLARE $ids AS List<Utf8>;
25
+
26
+ DELETE FROM ${tableName}
27
+ WHERE collection = $collection AND point_id IN $ids;
28
+
29
+ $lookup_rows = (
30
+ SELECT collection, file_path, point_id
31
+ FROM ${POINTS_BY_FILE_LOOKUP_TABLE}
32
+ WHERE collection = $collection AND point_id IN $ids
33
+ );
34
+
35
+ DELETE FROM ${POINTS_BY_FILE_LOOKUP_TABLE} ON
36
+ SELECT collection, file_path, point_id FROM $lookup_rows;
37
+ `;
38
+ const stringIds = ids.map((id) => String(id));
39
+ await ensurePointsByFileTable();
7
40
  await withSession(async (s) => {
8
41
  const settings = createExecuteQuerySettings();
9
- for (const id of ids) {
10
- const yql = `
11
- DECLARE $uid AS Utf8;
12
- DECLARE $id AS Utf8;
13
- DELETE FROM ${tableName} WHERE uid = $uid AND point_id = $id;
14
- `;
15
- const params = {
16
- $uid: TypedValues.utf8(uid),
17
- $id: TypedValues.utf8(String(id)),
18
- };
19
- await withRetry(() => s.executeQuery(yql, params, undefined, settings), {
20
- isTransient: isTransientYdbError,
21
- context: { tableName, uid, pointId: String(id) },
22
- });
23
- deleted += 1;
24
- }
42
+ await withRetry(() => s.executeQuery(yql, {
43
+ $collection: TypedValues.utf8(collection),
44
+ $ids: TypedValues.list(Types.UTF8, stringIds),
45
+ }, undefined, settings), {
46
+ isTransient: isTransientYdbErrorInAcquiredSession,
47
+ maxRetries: DELETE_BY_ID_RETRY_MAX_RETRIES,
48
+ baseDelayMs: DELETE_BY_ID_RETRY_BASE_DELAY_MS,
49
+ maxBackoffMs: DELETE_BY_ID_RETRY_MAX_BACKOFF_MS,
50
+ context: { tableName, collection, idCount: ids.length },
51
+ });
25
52
  });
26
- return deleted;
53
+ return ids.length;
27
54
  }
28
55
  const MAX_SAFE_BIGINT = BigInt(Number.MAX_SAFE_INTEGER);
29
56
  function bigintToSafeNumberOrNull(value) {
@@ -105,62 +132,132 @@ function readDeletedCountFromResult(rs) {
105
132
  }
106
133
  return 0;
107
134
  }
108
- export async function deletePointsByPathSegmentsOneTable(tableName, uid, paths) {
109
- if (paths.length === 0) {
110
- return 0;
111
- }
112
- const { whereSql, params: whereParams } = buildPathSegmentsWhereClause(paths);
113
- const whereParamDeclarations = Object.keys(whereParams)
114
- .sort()
115
- .map((key) => `DECLARE ${key} AS Utf8;`)
116
- .join("\n ");
135
+ async function deletePointsByPathSegmentsChunked(tableName, collection, whereSql, whereParamDeclarations, whereParams, batchLimit) {
117
136
  const deleteBatchYql = `
118
- DECLARE $uid AS Utf8;
137
+ DECLARE $collection AS Utf8;
119
138
  DECLARE $limit AS Uint32;
120
139
  ${whereParamDeclarations}
121
140
 
122
141
  $to_delete = (
123
- SELECT uid, point_id
124
- FROM ${tableName}
125
- WHERE uid = $uid AND ${whereSql}
142
+ SELECT collection, file_path, point_id
143
+ FROM ${POINTS_BY_FILE_LOOKUP_TABLE}
144
+ WHERE collection = $collection AND ${whereSql}
126
145
  LIMIT $limit
127
146
  );
128
147
 
129
148
  DELETE FROM ${tableName} ON
130
- SELECT uid, point_id FROM $to_delete;
149
+ SELECT collection, point_id FROM $to_delete;
150
+
151
+ DELETE FROM ${POINTS_BY_FILE_LOOKUP_TABLE} ON
152
+ SELECT collection, file_path, point_id FROM $to_delete;
131
153
 
132
154
  SELECT CAST(COUNT(*) AS Uint32) AS deleted FROM $to_delete;
133
155
  `;
156
+ const settings = createExecuteQuerySettings();
134
157
  let deleted = 0;
135
- await withSession(async (s) => {
136
- const settings = createExecuteQuerySettings();
137
- // Best-effort loop: stop when there are no more matching rows.
138
- // Use limited batches to avoid per-operation buffer limits.
139
- while (true) {
140
- const rs = (await withRetry(() => s.executeQuery(deleteBatchYql, {
158
+ while (true) {
159
+ const rs = await withSession(async (s) => {
160
+ return await withRetry(() => s.executeQuery(deleteBatchYql, {
141
161
  ...whereParams,
142
- $uid: TypedValues.utf8(uid),
143
- $limit: TypedValues.uint32(DELETE_FILTER_SELECT_BATCH_SIZE),
162
+ $collection: TypedValues.utf8(collection),
163
+ $limit: TypedValues.uint32(batchLimit),
144
164
  }, undefined, settings), {
145
- isTransient: isTransientYdbError,
165
+ isTransient: isTransientYdbErrorInAcquiredSession,
166
+ maxRetries: DELETE_BY_FILTER_RETRY_MAX_RETRIES,
167
+ baseDelayMs: DELETE_BY_FILTER_RETRY_BASE_DELAY_MS,
168
+ maxBackoffMs: DELETE_BY_FILTER_RETRY_MAX_BACKOFF_MS,
146
169
  context: {
147
170
  tableName,
148
- uid,
149
- filterPathsCount: paths.length,
150
- batchLimit: DELETE_FILTER_SELECT_BATCH_SIZE,
171
+ collection,
172
+ filterParamsCount: Object.keys(whereParams).length,
173
+ batchLimit,
151
174
  },
152
- }));
153
- const batchDeleted = readDeletedCountFromResult(rs);
154
- if (!Number.isSafeInteger(batchDeleted) ||
155
- batchDeleted < 0 ||
156
- batchDeleted > DELETE_FILTER_SELECT_BATCH_SIZE) {
157
- throw new Error(`Unexpected deleted count from YDB: ${String(batchDeleted)}. Expected an integer in [0, ${DELETE_FILTER_SELECT_BATCH_SIZE}].`);
158
- }
159
- if (batchDeleted <= 0) {
160
- break;
161
- }
162
- deleted += batchDeleted;
175
+ });
176
+ });
177
+ const batchDeleted = readDeletedCountFromResult(rs);
178
+ if (!Number.isSafeInteger(batchDeleted) ||
179
+ batchDeleted < 0 ||
180
+ batchDeleted > batchLimit) {
181
+ throw new Error(`Unexpected deleted count from YDB: ${String(batchDeleted)}. Expected an integer in [0, ${batchLimit}].`);
182
+ }
183
+ if (batchDeleted <= 0) {
184
+ break;
163
185
  }
186
+ deleted += batchDeleted;
187
+ }
188
+ return deleted;
189
+ }
190
+ function dedupePathSegmentsPaths(paths) {
191
+ const seenPrefixes = new Set();
192
+ const uniquePaths = [];
193
+ for (const pathSegments of paths) {
194
+ const prefix = pathSegmentsToPrefix(pathSegments);
195
+ if (seenPrefixes.has(prefix)) {
196
+ continue;
197
+ }
198
+ seenPrefixes.add(prefix);
199
+ uniquePaths.push(pathSegments);
200
+ }
201
+ return uniquePaths;
202
+ }
203
+ async function deletePointsByPathSegmentsChunkWithRetry(tableName, collection, chunk, totalInputPathsCount, chunkCount) {
204
+ const filter = buildExactPathSegmentsFilter(chunk.chunkPaths, "file_path");
205
+ if (!filter) {
206
+ return 0;
207
+ }
208
+ const { whereSql, whereParamDeclarations, whereParams } = filter;
209
+ return await withRetry(async () => {
210
+ return await deletePointsByPathSegmentsChunked(tableName, collection, whereSql, whereParamDeclarations, whereParams, DELETE_FILTER_SELECT_BATCH_SIZE);
211
+ }, {
212
+ isTransient: isTransientYdbError,
213
+ maxRetries: DELETE_BY_FILTER_RETRY_MAX_RETRIES,
214
+ baseDelayMs: DELETE_BY_FILTER_RETRY_BASE_DELAY_MS,
215
+ maxBackoffMs: DELETE_BY_FILTER_RETRY_MAX_BACKOFF_MS,
216
+ context: {
217
+ operation: "deletePointsByPathSegmentsOneTable",
218
+ tableName,
219
+ collection,
220
+ filterPathsCount: totalInputPathsCount,
221
+ chunkPathsCount: chunk.chunkPaths.length,
222
+ chunkIndex: chunk.chunkIndex,
223
+ chunkCount,
224
+ concurrency: DELETE_FILTER_PATHS_CHUNK_CONCURRENCY,
225
+ mode: "points_by_file_lookup_delete",
226
+ },
164
227
  });
228
+ }
229
+ export async function deletePointsByPathSegmentsOneTable(tableName, collection, paths) {
230
+ if (paths.length === 0) {
231
+ return 0;
232
+ }
233
+ await ensurePointsByFileTable();
234
+ const uniquePaths = dedupePathSegmentsPaths(paths);
235
+ const chunks = [];
236
+ for (let chunkStart = 0; chunkStart < uniquePaths.length; chunkStart += DELETE_FILTER_PATHS_CHUNK_SIZE) {
237
+ chunks.push({
238
+ chunkIndex: Math.floor(chunkStart / DELETE_FILTER_PATHS_CHUNK_SIZE) + 1,
239
+ chunkPaths: uniquePaths.slice(chunkStart, chunkStart + DELETE_FILTER_PATHS_CHUNK_SIZE),
240
+ });
241
+ }
242
+ const chunkCount = chunks.length;
243
+ let deleted = 0;
244
+ for (let batchStart = 0; batchStart < chunkCount; batchStart += DELETE_FILTER_PATHS_CHUNK_CONCURRENCY) {
245
+ const chunkBatch = chunks.slice(batchStart, batchStart + DELETE_FILTER_PATHS_CHUNK_CONCURRENCY);
246
+ const batchResults = await Promise.allSettled(chunkBatch.map((chunk) => deletePointsByPathSegmentsChunkWithRetry(tableName, collection, chunk, uniquePaths.length, chunkCount)));
247
+ let firstError;
248
+ for (const result of batchResults) {
249
+ if (result.status === "fulfilled") {
250
+ deleted += result.value;
251
+ continue;
252
+ }
253
+ firstError ??=
254
+ result.reason instanceof Error
255
+ ? result.reason
256
+ : new Error(String(result.reason));
257
+ }
258
+ if (firstError !== undefined) {
259
+ throw firstError;
260
+ }
261
+ }
165
262
  return deleted;
166
263
  }
@@ -2,13 +2,15 @@ import type { Ydb } from "ydb-sdk";
2
2
  type QueryParams = {
3
3
  [key: string]: Ydb.ITypedValue;
4
4
  };
5
+ type BuiltPathSegmentsFilter = {
6
+ whereSql: string;
7
+ whereParamDeclarations: string;
8
+ whereParams: QueryParams;
9
+ };
5
10
  export declare function buildPathSegmentsWhereClause(paths: Array<Array<string>>): {
6
11
  whereSql: string;
7
12
  params: QueryParams;
8
13
  };
9
- export declare function buildPathSegmentsFilter(paths: Array<Array<string>> | undefined): {
10
- whereSql: string;
11
- whereParamDeclarations: string;
12
- whereParams: QueryParams;
13
- } | undefined;
14
+ export declare function buildPrefixPathSegmentsFilter(paths: Array<Array<string>> | undefined, columnName?: string): BuiltPathSegmentsFilter | undefined;
15
+ export declare function buildExactPathSegmentsFilter(paths: Array<Array<string>> | undefined, columnName?: string): BuiltPathSegmentsFilter | undefined;
14
16
  export {};
@@ -1,33 +1,64 @@
1
1
  import { TypedValues } from "../../ydb/client.js";
2
- export function buildPathSegmentsWhereClause(paths) {
2
+ import { pathSegmentsToPrefix } from "../../utils/pathPrefix.js";
3
+ function buildPrefixPathSegmentsWhereClauseForColumn(paths, columnName) {
3
4
  const params = {};
4
5
  const orGroups = [];
5
6
  for (let pIdx = 0; pIdx < paths.length; pIdx += 1) {
6
7
  const segs = paths[pIdx];
7
8
  if (segs.length === 0) {
8
- throw new Error("delete-by-filter: empty path segments");
9
+ throw new Error("pathSegments filter: empty path segments");
9
10
  }
10
- const andParts = [];
11
- for (let sIdx = 0; sIdx < segs.length; sIdx += 1) {
12
- const paramName = `$p${pIdx}_${sIdx}`;
13
- // payload is JsonDocument; JSON_VALUE supports JsonPath access.
14
- andParts.push(`JSON_VALUE(payload, '$.pathSegments."${sIdx}"') = ${paramName}`);
15
- params[paramName] = TypedValues.utf8(segs[sIdx]);
11
+ const prefix = pathSegmentsToPrefix(segs);
12
+ const exactParam = `$ppfx${pIdx}`;
13
+ const descendantParam = `$ppfxd${pIdx}`;
14
+ params[exactParam] = TypedValues.utf8(prefix);
15
+ params[descendantParam] = TypedValues.utf8(`${prefix}/`);
16
+ orGroups.push(`(${columnName} = ${exactParam} OR StartsWith(${columnName}, ${descendantParam}))`);
17
+ }
18
+ return {
19
+ whereSql: orGroups.length === 1 ? orGroups[0] : `(${orGroups.join(" OR ")})`,
20
+ params,
21
+ };
22
+ }
23
+ function buildExactPathSegmentsWhereClauseForColumn(paths, columnName) {
24
+ const params = {};
25
+ const orGroups = [];
26
+ for (let pIdx = 0; pIdx < paths.length; pIdx += 1) {
27
+ const segs = paths[pIdx];
28
+ if (segs.length === 0) {
29
+ throw new Error("pathSegments filter: empty path segments");
16
30
  }
17
- orGroups.push(`(${andParts.join(" AND ")})`);
31
+ const prefix = pathSegmentsToPrefix(segs);
32
+ const exactParam = `$ppfx${pIdx}`;
33
+ params[exactParam] = TypedValues.utf8(prefix);
34
+ orGroups.push(`${columnName} = ${exactParam}`);
18
35
  }
19
36
  return {
20
37
  whereSql: orGroups.length === 1 ? orGroups[0] : `(${orGroups.join(" OR ")})`,
21
38
  params,
22
39
  };
23
40
  }
24
- export function buildPathSegmentsFilter(paths) {
25
- if (!paths || paths.length === 0)
26
- return undefined;
27
- const { whereSql, params: whereParams } = buildPathSegmentsWhereClause(paths);
41
+ function buildPathSegmentsFilterFromWhereClause(whereSql, whereParams) {
28
42
  const whereParamDeclarations = Object.keys(whereParams)
29
43
  .sort()
30
44
  .map((key) => `DECLARE ${key} AS Utf8;`)
31
45
  .join("\n ");
32
46
  return { whereSql, whereParamDeclarations, whereParams };
33
47
  }
48
+ export function buildPathSegmentsWhereClause(paths) {
49
+ return buildPrefixPathSegmentsWhereClauseForColumn(paths, "path_prefix");
50
+ }
51
+ export function buildPrefixPathSegmentsFilter(paths, columnName = "path_prefix") {
52
+ if (!paths || paths.length === 0) {
53
+ return undefined;
54
+ }
55
+ const { whereSql, params: whereParams } = buildPrefixPathSegmentsWhereClauseForColumn(paths, columnName);
56
+ return buildPathSegmentsFilterFromWhereClause(whereSql, whereParams);
57
+ }
58
+ export function buildExactPathSegmentsFilter(paths, columnName = "file_path") {
59
+ if (!paths || paths.length === 0) {
60
+ return undefined;
61
+ }
62
+ const { whereSql, params: whereParams } = buildExactPathSegmentsWhereClauseForColumn(paths, columnName);
63
+ return buildPathSegmentsFilterFromWhereClause(whereSql, whereParams);
64
+ }
@@ -0,0 +1,6 @@
1
+ import type { Payload } from "../../qdrant/QdrantRestTypes.js";
2
+ export interface RetrievedPoint {
3
+ id: string;
4
+ payload: Payload | null;
5
+ }
6
+ export declare function retrievePointsByIdsOneTable(tableName: string, ids: Array<string | number>, uid: string, apiKey: string, withPayload: boolean): Promise<RetrievedPoint[]>;