ydb-qdrant 2.3.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -400,7 +400,7 @@ curl -X POST http://localhost:8080/collections/mycol/points/delete \
400
400
  ## Notes
401
401
  - One YDB table is created per collection; metadata is tracked in table `qdr__collections`.
402
402
  - Each collection table schema: `point_id Utf8` (PK), `embedding String` (binary), `payload JsonDocument`.
403
- - Vectors are serialized with `Knn::ToBinaryStringFloat` (or `Knn::ToBinaryStringUint8` if collection uses uint8).
403
+ - Vectors are serialized with `Knn::ToBinaryStringFloat`.
404
404
  - Search uses a single-phase top‑k over `embedding` with automatic YDB vector index (`emb_idx`) when available; falls back to table scan if missing.
405
405
  - **Vector index auto-build**: After ≥100 points upserted + 5s quiet window, a `vector_kmeans_tree` index (levels=1, clusters=128) is built automatically. Incremental updates (<100 points) skip index rebuild.
406
406
  - **Concurrency**: During index rebuilds, YDB may return transient `Aborted`/schema metadata errors. Upserts include bounded retries with backoff to handle this automatically.
@@ -1,10 +1,10 @@
1
- import type { VectorType, DistanceKind } from "../types";
1
+ import type { DistanceKind } from "../types";
2
2
  export declare function upsertPoints(tableName: string, points: Array<{
3
3
  id: string | number;
4
4
  vector: number[];
5
5
  payload?: Record<string, unknown>;
6
- }>, vectorType: VectorType, dimension: number): Promise<number>;
7
- export declare function searchPoints(tableName: string, queryVector: number[], top: number, withPayload: boolean | undefined, distance: DistanceKind, vectorType: VectorType, dimension: number): Promise<Array<{
6
+ }>, dimension: number): Promise<number>;
7
+ export declare function searchPoints(tableName: string, queryVector: number[], top: number, withPayload: boolean | undefined, distance: DistanceKind, dimension: number): Promise<Array<{
8
8
  id: string;
9
9
  score: number;
10
10
  payload?: Record<string, unknown>;
@@ -3,7 +3,7 @@ import { buildJsonOrEmpty, buildVectorParam } from "../ydb/helpers.js";
3
3
  import { logger } from "../logging/logger.js";
4
4
  import { notifyUpsert } from "../indexing/IndexScheduler.js";
5
5
  import { VECTOR_INDEX_BUILD_ENABLED } from "../config/env.js";
6
- export async function upsertPoints(tableName, points, vectorType, dimension) {
6
+ export async function upsertPoints(tableName, points, dimension) {
7
7
  let upserted = 0;
8
8
  await withSession(async (s) => {
9
9
  for (const p of points) {
@@ -13,18 +13,18 @@ export async function upsertPoints(tableName, points, vectorType, dimension) {
13
13
  }
14
14
  const ddl = `
15
15
  DECLARE $id AS Utf8;
16
- DECLARE $vec AS List<${vectorType === "uint8" ? "Uint8" : "Float"}>;
16
+ DECLARE $vec AS List<Float>;
17
17
  DECLARE $payload AS JsonDocument;
18
18
  UPSERT INTO ${tableName} (point_id, embedding, payload)
19
19
  VALUES (
20
20
  $id,
21
- Untag(Knn::ToBinaryString${vectorType === "uint8" ? "Uint8" : "Float"}($vec), "${vectorType === "uint8" ? "Uint8Vector" : "FloatVector"}"),
21
+ Untag(Knn::ToBinaryStringFloat($vec), "FloatVector"),
22
22
  $payload
23
23
  );
24
24
  `;
25
25
  const params = {
26
26
  $id: TypedValues.utf8(id),
27
- $vec: buildVectorParam(p.vector, vectorType),
27
+ $vec: buildVectorParam(p.vector),
28
28
  $payload: buildJsonOrEmpty(p.payload),
29
29
  };
30
30
  // Retry on transient schema/metadata mismatches during index rebuild
@@ -56,21 +56,21 @@ export async function upsertPoints(tableName, points, vectorType, dimension) {
56
56
  return upserted;
57
57
  }
58
58
  // Removed legacy index backfill helper
59
- export async function searchPoints(tableName, queryVector, top, withPayload, distance, vectorType, dimension) {
59
+ export async function searchPoints(tableName, queryVector, top, withPayload, distance, dimension) {
60
60
  if (queryVector.length !== dimension) {
61
61
  throw new Error(`Vector dimension mismatch: got ${queryVector.length}, expected ${dimension}`);
62
62
  }
63
63
  const { fn, order } = mapDistanceToKnnFn(distance);
64
64
  // Single-phase search over embedding using vector index if present
65
- const qf = buildVectorParam(queryVector, vectorType);
65
+ const qf = buildVectorParam(queryVector);
66
66
  const params = {
67
67
  $qf: qf,
68
68
  $k2: TypedValues.uint32(top),
69
69
  };
70
70
  const buildQuery = (useIndex) => `
71
- DECLARE $qf AS List<${vectorType === "uint8" ? "Uint8" : "Float"}>;
71
+ DECLARE $qf AS List<Float>;
72
72
  DECLARE $k2 AS Uint32;
73
- $qbinf = Knn::ToBinaryString${vectorType === "uint8" ? "Uint8" : "Float"}($qf);
73
+ $qbinf = Knn::ToBinaryStringFloat($qf);
74
74
  SELECT point_id, ${withPayload ? "payload, " : ""}${fn}(embedding, $qbinf) AS score
75
75
  FROM ${tableName}${useIndex ? " VIEW emb_idx" : ""}
76
76
  ORDER BY score ${order}
@@ -214,7 +214,7 @@ export async function upsertPoints(ctx, body) {
214
214
  error: parsed.error.flatten(),
215
215
  });
216
216
  }
217
- const upserted = await repoUpsertPoints(meta.table, parsed.data.points, meta.vectorType, meta.dimension);
217
+ const upserted = await repoUpsertPoints(meta.table, parsed.data.points, meta.dimension);
218
218
  if (VECTOR_INDEX_BUILD_ENABLED) {
219
219
  requestIndexBuild(meta.table, meta.dimension, meta.distance, meta.vectorType);
220
220
  }
@@ -265,7 +265,7 @@ async function executeSearch(ctx, normalizedSearch, source) {
265
265
  distance: meta.distance,
266
266
  vectorType: meta.vectorType,
267
267
  }, `${source}: executing`);
268
- const hits = await repoSearchPoints(meta.table, parsed.data.vector, parsed.data.top, parsed.data.with_payload, meta.distance, meta.vectorType, meta.dimension);
268
+ const hits = await repoSearchPoints(meta.table, parsed.data.vector, parsed.data.top, parsed.data.with_payload, meta.distance, meta.dimension);
269
269
  const threshold = normalizedSearch.scoreThreshold;
270
270
  const filtered = threshold === undefined
271
271
  ? hits
package/dist/types.d.ts CHANGED
@@ -1,13 +1,12 @@
1
1
  import { z } from "zod";
2
2
  export type DistanceKind = "Cosine" | "Euclid" | "Dot" | "Manhattan";
3
- export type VectorType = "float" | "uint8";
3
+ export type VectorType = "float";
4
4
  export declare const CreateCollectionReq: z.ZodObject<{
5
5
  vectors: z.ZodObject<{
6
6
  size: z.ZodNumber;
7
7
  distance: z.ZodType<DistanceKind>;
8
8
  data_type: z.ZodOptional<z.ZodEnum<{
9
9
  float: "float";
10
- uint8: "uint8";
11
10
  }>>;
12
11
  }, z.core.$strip>;
13
12
  }, z.core.$strip>;
package/dist/types.js CHANGED
@@ -8,7 +8,7 @@ export const CreateCollectionReq = z.object({
8
8
  "Dot",
9
9
  "Manhattan",
10
10
  ]),
11
- data_type: z.enum(["float", "uint8"]).optional(),
11
+ data_type: z.enum(["float"]).optional(),
12
12
  }),
13
13
  });
14
14
  export const UpsertPointsReq = z.object({
@@ -1,2 +1,2 @@
1
- export declare function buildVectorParam(vector: number[], vectorType: "float" | "uint8"): import("ydb-sdk-proto").Ydb.ITypedValue;
1
+ export declare function buildVectorParam(vector: number[]): import("ydb-sdk-proto").Ydb.ITypedValue;
2
2
  export declare function buildJsonOrEmpty(payload?: Record<string, unknown>): import("ydb-sdk-proto").Ydb.ITypedValue;
@@ -1,44 +1,6 @@
1
1
  import { Types, TypedValues } from "./client.js";
2
- export function buildVectorParam(vector, vectorType) {
3
- let list;
4
- if (vectorType === "uint8") {
5
- // Check if vector is already quantized (integers in [0,255])
6
- const isAlreadyQuantized = vector.every(v => Number.isInteger(v) && v >= 0 && v <= 255);
7
- if (isAlreadyQuantized) {
8
- list = vector;
9
- }
10
- else {
11
- // Float embeddings need quantization. Per YDB docs (knn.md lines 282-294):
12
- // Formula: ((x - min) / (max - min)) * 255
13
- const min = Math.min(...vector);
14
- const max = Math.max(...vector);
15
- // Determine quantization strategy based on detected range
16
- if (min >= 0 && max <= 1.01) {
17
- // Normalized [0,1] embeddings (common for some models)
18
- list = vector.map(v => Math.round(Math.max(0, Math.min(1, v)) * 255));
19
- }
20
- else if (min >= -1.01 && max <= 1.01) {
21
- // Normalized [-1,1] embeddings (most common)
22
- // Map to [0,255]: ((x + 1) / 2) * 255 = (x + 1) * 127.5
23
- list = vector.map(v => Math.round((Math.max(-1, Math.min(1, v)) + 1) * 127.5));
24
- }
25
- else {
26
- // General case: linear scaling from [min,max] to [0,255]
27
- const range = max - min;
28
- if (range > 0) {
29
- list = vector.map(v => Math.round(((v - min) / range) * 255));
30
- }
31
- else {
32
- // All values identical; map to midpoint
33
- list = vector.map(() => 127);
34
- }
35
- }
36
- }
37
- }
38
- else {
39
- list = vector;
40
- }
41
- return TypedValues.list(vectorType === "uint8" ? Types.UINT8 : Types.FLOAT, list);
2
+ export function buildVectorParam(vector) {
3
+ return TypedValues.list(Types.FLOAT, vector);
42
4
  }
43
5
  export function buildJsonOrEmpty(payload) {
44
6
  return TypedValues.jsonDocument(JSON.stringify(payload ?? {}));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ydb-qdrant",
3
- "version": "2.3.0",
3
+ "version": "3.0.0",
4
4
  "main": "dist/package/Api.js",
5
5
  "types": "dist/package/Api.d.ts",
6
6
  "exports": {
@@ -46,7 +46,7 @@
46
46
  ],
47
47
  "author": "",
48
48
  "license": "ISC",
49
- "description": "Qdrant-compatible Node.js/TypeScript API that stores/searches embeddings in YDB using approximate coarse-to-fine vector search (quantized uint8 preselect + float refine).",
49
+ "description": "Qdrant-compatible Node.js/TypeScript API that stores/searches embeddings in YDB using single-phase top-k vector search with an automatic vector index and table-scan fallback.",
50
50
  "type": "module",
51
51
  "publishConfig": {
52
52
  "access": "public"