ydb-qdrant 4.1.1 → 4.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -401,8 +401,9 @@ curl -X POST http://localhost:8080/collections/mycol/points/delete \
401
401
 
402
402
  ## Notes
403
403
  - Storage layout:
404
- - **multi_table** (default): one YDB table per collection; metadata is tracked in `qdr__collections`.
405
- - **one_table**: a single global table `qdrant_all_points` with `(uid, point_id)` PK, where `uid` encodes tenant+collection. Columns: `uid Utf8`, `point_id Utf8`, `embedding String` (binary float), `embedding_bit String` (bit‑quantized), `payload JsonDocument`.
404
+ - **multi_table** (default): one YDB table per collection; metadata is tracked in `qdr__collections`.
405
+ - **one_table**: a single global table `qdrant_all_points` with `(uid, point_id)` PK, where `uid` encodes tenant+collection. Columns: `uid Utf8`, `point_id Utf8`, `embedding String` (binary float), `embedding_bit String` (bit‑quantized), `payload JsonDocument`.
406
+ - **Schema migrations** (one_table mode): automatic schema/backfill steps for `qdrant_all_points` are disabled by default. To opt in, set `YDB_QDRANT_GLOBAL_POINTS_AUTOMIGRATE=true` after backing up data; otherwise the service will error if the `embedding_bit` column is missing or needs backfill.
406
407
  - Per‑collection table schema (multi_table): `point_id Utf8` (PK), `embedding String` (binary), `payload JsonDocument`.
407
408
  - Vectors are serialized with `Knn::ToBinaryStringFloat`.
408
409
  - Search uses a single-phase top‑k over `embedding` with automatic YDB vector index (`emb_idx`) when available; falls back to table scan if missing.
@@ -3,6 +3,7 @@ export declare const YDB_ENDPOINT: string;
3
3
  export declare const YDB_DATABASE: string;
4
4
  export declare const PORT: number;
5
5
  export declare const LOG_LEVEL: string;
6
+ export declare const GLOBAL_POINTS_AUTOMIGRATE_ENABLED: boolean;
6
7
  export declare const VECTOR_INDEX_BUILD_ENABLED: boolean;
7
8
  export declare enum CollectionStorageMode {
8
9
  MultiTable = "multi_table",
@@ -3,6 +3,7 @@ export const YDB_ENDPOINT = process.env.YDB_ENDPOINT ?? "";
3
3
  export const YDB_DATABASE = process.env.YDB_DATABASE ?? "";
4
4
  export const PORT = process.env.PORT ? Number(process.env.PORT) : 8080;
5
5
  export const LOG_LEVEL = process.env.LOG_LEVEL ?? "info";
6
+ export const GLOBAL_POINTS_AUTOMIGRATE_ENABLED = parseBooleanEnv(process.env.YDB_QDRANT_GLOBAL_POINTS_AUTOMIGRATE, false);
6
7
  function parseBooleanEnv(value, defaultValue) {
7
8
  if (value === undefined) {
8
9
  return defaultValue;
@@ -5,7 +5,7 @@ import { deletePoints as repoDeletePoints, searchPoints as repoSearchPoints, ups
5
5
  import { requestIndexBuild } from "../indexing/IndexScheduler.js";
6
6
  import { logger } from "../logging/logger.js";
7
7
  import { VECTOR_INDEX_BUILD_ENABLED } from "../config/env.js";
8
- import { QdrantServiceError } from "./errors.js";
8
+ import { QdrantServiceError, isVectorDimensionMismatchError, } from "./errors.js";
9
9
  import { normalizeCollectionContext, resolvePointsTableAndUid, } from "./CollectionService.js";
10
10
  import { normalizeSearchBodyForSearch, normalizeSearchBodyForQuery, } from "../utils/normalization.js";
11
11
  let loggedIndexBuildDisabled = false;
@@ -27,7 +27,25 @@ export async function upsertPoints(ctx, body) {
27
27
  });
28
28
  }
29
29
  const { tableName, uid } = await resolvePointsTableAndUid(normalized, meta);
30
- const upserted = await repoUpsertPoints(tableName, parsed.data.points, meta.dimension, uid);
30
+ let upserted;
31
+ try {
32
+ upserted = await repoUpsertPoints(tableName, parsed.data.points, meta.dimension, uid);
33
+ }
34
+ catch (err) {
35
+ if (isVectorDimensionMismatchError(err)) {
36
+ logger.warn({
37
+ tenant: normalized.tenant,
38
+ collection: normalized.collection,
39
+ table: tableName,
40
+ dimension: meta.dimension,
41
+ }, "upsertPoints: vector dimension mismatch");
42
+ throw new QdrantServiceError(400, {
43
+ status: "error",
44
+ error: err.message,
45
+ });
46
+ }
47
+ throw err;
48
+ }
31
49
  if (VECTOR_INDEX_BUILD_ENABLED) {
32
50
  requestIndexBuild(tableName, meta.dimension, meta.distance, meta.vectorType);
33
51
  }
@@ -79,7 +97,26 @@ async function executeSearch(ctx, normalizedSearch, source) {
79
97
  distance: meta.distance,
80
98
  vectorType: meta.vectorType,
81
99
  }, `${source}: executing`);
82
- const hits = await repoSearchPoints(tableName, parsed.data.vector, parsed.data.top, parsed.data.with_payload, meta.distance, meta.dimension, uid);
100
+ let hits;
101
+ try {
102
+ hits = await repoSearchPoints(tableName, parsed.data.vector, parsed.data.top, parsed.data.with_payload, meta.distance, meta.dimension, uid);
103
+ }
104
+ catch (err) {
105
+ if (isVectorDimensionMismatchError(err)) {
106
+ logger.warn({
107
+ tenant: normalized.tenant,
108
+ collection: normalized.collection,
109
+ table: tableName,
110
+ dimension: meta.dimension,
111
+ queryVectorLen: parsed.data.vector.length,
112
+ }, `${source}: vector dimension mismatch`);
113
+ throw new QdrantServiceError(400, {
114
+ status: "error",
115
+ error: err.message,
116
+ });
117
+ }
118
+ throw err;
119
+ }
83
120
  const threshold = normalizedSearch.scoreThreshold;
84
121
  const filtered = threshold === undefined
85
122
  ? hits
@@ -2,6 +2,7 @@ export interface QdrantServiceErrorPayload {
2
2
  status: "error";
3
3
  error: unknown;
4
4
  }
5
+ export declare function isVectorDimensionMismatchError(err: unknown): err is Error;
5
6
  export declare class QdrantServiceError extends Error {
6
7
  readonly statusCode: number;
7
8
  readonly payload: QdrantServiceErrorPayload;
@@ -1,3 +1,6 @@
1
+ export function isVectorDimensionMismatchError(err) {
2
+ return (err instanceof Error && err.message.startsWith("Vector dimension mismatch"));
3
+ }
1
4
  export class QdrantServiceError extends Error {
2
5
  statusCode;
3
6
  payload;
@@ -1,7 +1,12 @@
1
1
  import { withSession, TableDescription, Column, Types } from "./client.js";
2
2
  import { logger } from "../logging/logger.js";
3
+ import { GLOBAL_POINTS_AUTOMIGRATE_ENABLED } from "../config/env.js";
3
4
  export const GLOBAL_POINTS_TABLE = "qdrant_all_points";
4
5
  let globalPointsTableReady = false;
6
+ function throwMigrationRequired(message) {
7
+ logger.error(message);
8
+ throw new Error(message);
9
+ }
5
10
  export async function ensureMetaTable() {
6
11
  try {
7
12
  await withSession(async (s) => {
@@ -28,64 +33,61 @@ export async function ensureGlobalPointsTable() {
28
33
  if (globalPointsTableReady) {
29
34
  return;
30
35
  }
31
- try {
32
- await withSession(async (s) => {
33
- let tableDescription = null;
34
- try {
35
- tableDescription = await s.describeTable(GLOBAL_POINTS_TABLE);
36
- }
37
- catch {
38
- // Table doesn't exist, create it with all columns
39
- const desc = new TableDescription()
40
- .withColumns(new Column("uid", Types.UTF8), new Column("point_id", Types.UTF8), new Column("embedding", Types.BYTES), new Column("embedding_bit", Types.BYTES), new Column("payload", Types.JSON_DOCUMENT))
41
- .withPrimaryKeys("uid", "point_id");
42
- await s.createTable(GLOBAL_POINTS_TABLE, desc);
43
- globalPointsTableReady = true;
44
- logger.info(`created global points table ${GLOBAL_POINTS_TABLE}`);
45
- return;
36
+ await withSession(async (s) => {
37
+ let tableDescription = null;
38
+ try {
39
+ tableDescription = await s.describeTable(GLOBAL_POINTS_TABLE);
40
+ }
41
+ catch {
42
+ // Table doesn't exist, create it with all columns
43
+ const desc = new TableDescription()
44
+ .withColumns(new Column("uid", Types.UTF8), new Column("point_id", Types.UTF8), new Column("embedding", Types.BYTES), new Column("embedding_bit", Types.BYTES), new Column("payload", Types.JSON_DOCUMENT))
45
+ .withPrimaryKeys("uid", "point_id");
46
+ await s.createTable(GLOBAL_POINTS_TABLE, desc);
47
+ globalPointsTableReady = true;
48
+ logger.info(`created global points table ${GLOBAL_POINTS_TABLE}`);
49
+ return;
50
+ }
51
+ // Table exists, check if embedding_bit column is present
52
+ const columns = tableDescription.columns ?? [];
53
+ const hasEmbeddingBit = columns.some((col) => col.name === "embedding_bit");
54
+ let needsBackfill = false;
55
+ if (!hasEmbeddingBit) {
56
+ if (!GLOBAL_POINTS_AUTOMIGRATE_ENABLED) {
57
+ throwMigrationRequired(`Global points table ${GLOBAL_POINTS_TABLE} is missing required column embedding_bit; set YDB_QDRANT_GLOBAL_POINTS_AUTOMIGRATE=true after backup to apply the migration manually.`);
46
58
  }
47
- // Table exists, check if embedding_bit column is present
48
- const columns = tableDescription.columns ?? [];
49
- const hasEmbeddingBit = columns.some((col) => col.name === "embedding_bit");
50
- let needsBackfill = false;
51
- if (!hasEmbeddingBit) {
52
- // Add the missing embedding_bit column
53
- const alterDdl = `
59
+ const alterDdl = `
54
60
  ALTER TABLE ${GLOBAL_POINTS_TABLE}
55
61
  ADD COLUMN embedding_bit String;
56
62
  `;
57
- await s.executeQuery(alterDdl);
58
- logger.info(`added embedding_bit column to existing table ${GLOBAL_POINTS_TABLE}`);
59
- needsBackfill = true;
60
- }
61
- else {
62
- // Column exists; check if any legacy rows still have NULL embedding_bit
63
- const checkNullsDdl = `
63
+ await s.executeQuery(alterDdl);
64
+ logger.info(`added embedding_bit column to existing table ${GLOBAL_POINTS_TABLE}`);
65
+ needsBackfill = true;
66
+ }
67
+ else {
68
+ const checkNullsDdl = `
64
69
  SELECT 1 AS has_null
65
70
  FROM ${GLOBAL_POINTS_TABLE}
66
71
  WHERE embedding_bit IS NULL
67
72
  LIMIT 1;
68
73
  `;
69
- const checkRes = await s.executeQuery(checkNullsDdl);
70
- const hasNullRows = checkRes.resultSets?.[0]?.rows &&
71
- checkRes.resultSets[0].rows.length > 0;
72
- needsBackfill = Boolean(hasNullRows);
74
+ const checkRes = await s.executeQuery(checkNullsDdl);
75
+ const rows = checkRes?.resultSets?.[0]?.rows ?? [];
76
+ needsBackfill = rows.length > 0;
77
+ }
78
+ if (needsBackfill) {
79
+ if (!GLOBAL_POINTS_AUTOMIGRATE_ENABLED) {
80
+ throwMigrationRequired(`Global points table ${GLOBAL_POINTS_TABLE} requires backfill for embedding_bit; set YDB_QDRANT_GLOBAL_POINTS_AUTOMIGRATE=true after backup to apply the migration manually.`);
73
81
  }
74
- if (needsBackfill) {
75
- // Backfill existing rows: convert embedding to bit representation
76
- const backfillDdl = `
82
+ const backfillDdl = `
77
83
  UPDATE ${GLOBAL_POINTS_TABLE}
78
84
  SET embedding_bit = Untag(Knn::ToBinaryStringBit(Knn::FloatFromBinaryString(embedding)), "BitVector")
79
85
  WHERE embedding_bit IS NULL;
80
86
  `;
81
- await s.executeQuery(backfillDdl);
82
- logger.info(`backfilled embedding_bit column from embedding in ${GLOBAL_POINTS_TABLE}`);
83
- }
84
- // Mark table ready only after schema (and any required backfill) succeed
85
- globalPointsTableReady = true;
86
- });
87
- }
88
- catch (err) {
89
- logger.debug({ err }, "ensureGlobalPointsTable: ignored");
90
- }
87
+ await s.executeQuery(backfillDdl);
88
+ logger.info(`backfilled embedding_bit column from embedding in ${GLOBAL_POINTS_TABLE}`);
89
+ }
90
+ // Mark table ready only after schema (and any required backfill) succeed
91
+ globalPointsTableReady = true;
92
+ });
91
93
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ydb-qdrant",
3
- "version": "4.1.1",
3
+ "version": "4.1.3",
4
4
  "main": "dist/package/api.js",
5
5
  "types": "dist/package/api.d.ts",
6
6
  "exports": {