ydb-qdrant 4.1.1 → 4.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/dist/config/env.d.ts +1 -0
- package/dist/config/env.js +1 -0
- package/dist/services/PointsService.js +40 -3
- package/dist/services/errors.d.ts +1 -0
- package/dist/services/errors.js +3 -0
- package/dist/ydb/schema.js +48 -46
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -401,8 +401,9 @@ curl -X POST http://localhost:8080/collections/mycol/points/delete \
|
|
|
401
401
|
|
|
402
402
|
## Notes
|
|
403
403
|
- Storage layout:
|
|
404
|
-
|
|
405
|
-
|
|
404
|
+
- **multi_table** (default): one YDB table per collection; metadata is tracked in `qdr__collections`.
|
|
405
|
+
- **one_table**: a single global table `qdrant_all_points` with `(uid, point_id)` PK, where `uid` encodes tenant+collection. Columns: `uid Utf8`, `point_id Utf8`, `embedding String` (binary float), `embedding_bit String` (bit‑quantized), `payload JsonDocument`.
|
|
406
|
+
- **Schema migrations** (one_table mode): automatic schema/backfill steps for `qdrant_all_points` are disabled by default. To opt in, set `YDB_QDRANT_GLOBAL_POINTS_AUTOMIGRATE=true` after backing up data; otherwise the service will error if the `embedding_bit` column is missing or needs backfill.
|
|
406
407
|
- Per‑collection table schema (multi_table): `point_id Utf8` (PK), `embedding String` (binary), `payload JsonDocument`.
|
|
407
408
|
- Vectors are serialized with `Knn::ToBinaryStringFloat`.
|
|
408
409
|
- Search uses a single-phase top‑k over `embedding` with automatic YDB vector index (`emb_idx`) when available; falls back to table scan if missing.
|
package/dist/config/env.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ export declare const YDB_ENDPOINT: string;
|
|
|
3
3
|
export declare const YDB_DATABASE: string;
|
|
4
4
|
export declare const PORT: number;
|
|
5
5
|
export declare const LOG_LEVEL: string;
|
|
6
|
+
export declare const GLOBAL_POINTS_AUTOMIGRATE_ENABLED: boolean;
|
|
6
7
|
export declare const VECTOR_INDEX_BUILD_ENABLED: boolean;
|
|
7
8
|
export declare enum CollectionStorageMode {
|
|
8
9
|
MultiTable = "multi_table",
|
package/dist/config/env.js
CHANGED
|
@@ -3,6 +3,7 @@ export const YDB_ENDPOINT = process.env.YDB_ENDPOINT ?? "";
|
|
|
3
3
|
export const YDB_DATABASE = process.env.YDB_DATABASE ?? "";
|
|
4
4
|
export const PORT = process.env.PORT ? Number(process.env.PORT) : 8080;
|
|
5
5
|
export const LOG_LEVEL = process.env.LOG_LEVEL ?? "info";
|
|
6
|
+
export const GLOBAL_POINTS_AUTOMIGRATE_ENABLED = parseBooleanEnv(process.env.YDB_QDRANT_GLOBAL_POINTS_AUTOMIGRATE, false);
|
|
6
7
|
function parseBooleanEnv(value, defaultValue) {
|
|
7
8
|
if (value === undefined) {
|
|
8
9
|
return defaultValue;
|
|
@@ -5,7 +5,7 @@ import { deletePoints as repoDeletePoints, searchPoints as repoSearchPoints, ups
|
|
|
5
5
|
import { requestIndexBuild } from "../indexing/IndexScheduler.js";
|
|
6
6
|
import { logger } from "../logging/logger.js";
|
|
7
7
|
import { VECTOR_INDEX_BUILD_ENABLED } from "../config/env.js";
|
|
8
|
-
import { QdrantServiceError } from "./errors.js";
|
|
8
|
+
import { QdrantServiceError, isVectorDimensionMismatchError, } from "./errors.js";
|
|
9
9
|
import { normalizeCollectionContext, resolvePointsTableAndUid, } from "./CollectionService.js";
|
|
10
10
|
import { normalizeSearchBodyForSearch, normalizeSearchBodyForQuery, } from "../utils/normalization.js";
|
|
11
11
|
let loggedIndexBuildDisabled = false;
|
|
@@ -27,7 +27,25 @@ export async function upsertPoints(ctx, body) {
|
|
|
27
27
|
});
|
|
28
28
|
}
|
|
29
29
|
const { tableName, uid } = await resolvePointsTableAndUid(normalized, meta);
|
|
30
|
-
|
|
30
|
+
let upserted;
|
|
31
|
+
try {
|
|
32
|
+
upserted = await repoUpsertPoints(tableName, parsed.data.points, meta.dimension, uid);
|
|
33
|
+
}
|
|
34
|
+
catch (err) {
|
|
35
|
+
if (isVectorDimensionMismatchError(err)) {
|
|
36
|
+
logger.warn({
|
|
37
|
+
tenant: normalized.tenant,
|
|
38
|
+
collection: normalized.collection,
|
|
39
|
+
table: tableName,
|
|
40
|
+
dimension: meta.dimension,
|
|
41
|
+
}, "upsertPoints: vector dimension mismatch");
|
|
42
|
+
throw new QdrantServiceError(400, {
|
|
43
|
+
status: "error",
|
|
44
|
+
error: err.message,
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
throw err;
|
|
48
|
+
}
|
|
31
49
|
if (VECTOR_INDEX_BUILD_ENABLED) {
|
|
32
50
|
requestIndexBuild(tableName, meta.dimension, meta.distance, meta.vectorType);
|
|
33
51
|
}
|
|
@@ -79,7 +97,26 @@ async function executeSearch(ctx, normalizedSearch, source) {
|
|
|
79
97
|
distance: meta.distance,
|
|
80
98
|
vectorType: meta.vectorType,
|
|
81
99
|
}, `${source}: executing`);
|
|
82
|
-
|
|
100
|
+
let hits;
|
|
101
|
+
try {
|
|
102
|
+
hits = await repoSearchPoints(tableName, parsed.data.vector, parsed.data.top, parsed.data.with_payload, meta.distance, meta.dimension, uid);
|
|
103
|
+
}
|
|
104
|
+
catch (err) {
|
|
105
|
+
if (isVectorDimensionMismatchError(err)) {
|
|
106
|
+
logger.warn({
|
|
107
|
+
tenant: normalized.tenant,
|
|
108
|
+
collection: normalized.collection,
|
|
109
|
+
table: tableName,
|
|
110
|
+
dimension: meta.dimension,
|
|
111
|
+
queryVectorLen: parsed.data.vector.length,
|
|
112
|
+
}, `${source}: vector dimension mismatch`);
|
|
113
|
+
throw new QdrantServiceError(400, {
|
|
114
|
+
status: "error",
|
|
115
|
+
error: err.message,
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
throw err;
|
|
119
|
+
}
|
|
83
120
|
const threshold = normalizedSearch.scoreThreshold;
|
|
84
121
|
const filtered = threshold === undefined
|
|
85
122
|
? hits
|
|
@@ -2,6 +2,7 @@ export interface QdrantServiceErrorPayload {
|
|
|
2
2
|
status: "error";
|
|
3
3
|
error: unknown;
|
|
4
4
|
}
|
|
5
|
+
export declare function isVectorDimensionMismatchError(err: unknown): err is Error;
|
|
5
6
|
export declare class QdrantServiceError extends Error {
|
|
6
7
|
readonly statusCode: number;
|
|
7
8
|
readonly payload: QdrantServiceErrorPayload;
|
package/dist/services/errors.js
CHANGED
package/dist/ydb/schema.js
CHANGED
|
@@ -1,7 +1,12 @@
|
|
|
1
1
|
import { withSession, TableDescription, Column, Types } from "./client.js";
|
|
2
2
|
import { logger } from "../logging/logger.js";
|
|
3
|
+
import { GLOBAL_POINTS_AUTOMIGRATE_ENABLED } from "../config/env.js";
|
|
3
4
|
export const GLOBAL_POINTS_TABLE = "qdrant_all_points";
|
|
4
5
|
let globalPointsTableReady = false;
|
|
6
|
+
function throwMigrationRequired(message) {
|
|
7
|
+
logger.error(message);
|
|
8
|
+
throw new Error(message);
|
|
9
|
+
}
|
|
5
10
|
export async function ensureMetaTable() {
|
|
6
11
|
try {
|
|
7
12
|
await withSession(async (s) => {
|
|
@@ -28,64 +33,61 @@ export async function ensureGlobalPointsTable() {
|
|
|
28
33
|
if (globalPointsTableReady) {
|
|
29
34
|
return;
|
|
30
35
|
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
36
|
+
await withSession(async (s) => {
|
|
37
|
+
let tableDescription = null;
|
|
38
|
+
try {
|
|
39
|
+
tableDescription = await s.describeTable(GLOBAL_POINTS_TABLE);
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
// Table doesn't exist, create it with all columns
|
|
43
|
+
const desc = new TableDescription()
|
|
44
|
+
.withColumns(new Column("uid", Types.UTF8), new Column("point_id", Types.UTF8), new Column("embedding", Types.BYTES), new Column("embedding_bit", Types.BYTES), new Column("payload", Types.JSON_DOCUMENT))
|
|
45
|
+
.withPrimaryKeys("uid", "point_id");
|
|
46
|
+
await s.createTable(GLOBAL_POINTS_TABLE, desc);
|
|
47
|
+
globalPointsTableReady = true;
|
|
48
|
+
logger.info(`created global points table ${GLOBAL_POINTS_TABLE}`);
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
// Table exists, check if embedding_bit column is present
|
|
52
|
+
const columns = tableDescription.columns ?? [];
|
|
53
|
+
const hasEmbeddingBit = columns.some((col) => col.name === "embedding_bit");
|
|
54
|
+
let needsBackfill = false;
|
|
55
|
+
if (!hasEmbeddingBit) {
|
|
56
|
+
if (!GLOBAL_POINTS_AUTOMIGRATE_ENABLED) {
|
|
57
|
+
throwMigrationRequired(`Global points table ${GLOBAL_POINTS_TABLE} is missing required column embedding_bit; set YDB_QDRANT_GLOBAL_POINTS_AUTOMIGRATE=true after backup to apply the migration manually.`);
|
|
46
58
|
}
|
|
47
|
-
|
|
48
|
-
const columns = tableDescription.columns ?? [];
|
|
49
|
-
const hasEmbeddingBit = columns.some((col) => col.name === "embedding_bit");
|
|
50
|
-
let needsBackfill = false;
|
|
51
|
-
if (!hasEmbeddingBit) {
|
|
52
|
-
// Add the missing embedding_bit column
|
|
53
|
-
const alterDdl = `
|
|
59
|
+
const alterDdl = `
|
|
54
60
|
ALTER TABLE ${GLOBAL_POINTS_TABLE}
|
|
55
61
|
ADD COLUMN embedding_bit String;
|
|
56
62
|
`;
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
const checkNullsDdl = `
|
|
63
|
+
await s.executeQuery(alterDdl);
|
|
64
|
+
logger.info(`added embedding_bit column to existing table ${GLOBAL_POINTS_TABLE}`);
|
|
65
|
+
needsBackfill = true;
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
const checkNullsDdl = `
|
|
64
69
|
SELECT 1 AS has_null
|
|
65
70
|
FROM ${GLOBAL_POINTS_TABLE}
|
|
66
71
|
WHERE embedding_bit IS NULL
|
|
67
72
|
LIMIT 1;
|
|
68
73
|
`;
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
74
|
+
const checkRes = await s.executeQuery(checkNullsDdl);
|
|
75
|
+
const rows = checkRes?.resultSets?.[0]?.rows ?? [];
|
|
76
|
+
needsBackfill = rows.length > 0;
|
|
77
|
+
}
|
|
78
|
+
if (needsBackfill) {
|
|
79
|
+
if (!GLOBAL_POINTS_AUTOMIGRATE_ENABLED) {
|
|
80
|
+
throwMigrationRequired(`Global points table ${GLOBAL_POINTS_TABLE} requires backfill for embedding_bit; set YDB_QDRANT_GLOBAL_POINTS_AUTOMIGRATE=true after backup to apply the migration manually.`);
|
|
73
81
|
}
|
|
74
|
-
|
|
75
|
-
// Backfill existing rows: convert embedding to bit representation
|
|
76
|
-
const backfillDdl = `
|
|
82
|
+
const backfillDdl = `
|
|
77
83
|
UPDATE ${GLOBAL_POINTS_TABLE}
|
|
78
84
|
SET embedding_bit = Untag(Knn::ToBinaryStringBit(Knn::FloatFromBinaryString(embedding)), "BitVector")
|
|
79
85
|
WHERE embedding_bit IS NULL;
|
|
80
86
|
`;
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
}
|
|
88
|
-
catch (err) {
|
|
89
|
-
logger.debug({ err }, "ensureGlobalPointsTable: ignored");
|
|
90
|
-
}
|
|
87
|
+
await s.executeQuery(backfillDdl);
|
|
88
|
+
logger.info(`backfilled embedding_bit column from embedding in ${GLOBAL_POINTS_TABLE}`);
|
|
89
|
+
}
|
|
90
|
+
// Mark table ready only after schema (and any required backfill) succeed
|
|
91
|
+
globalPointsTableReady = true;
|
|
92
|
+
});
|
|
91
93
|
}
|