ydb-qdrant 7.0.1 → 8.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/config/env.d.ts +0 -8
- package/dist/config/env.js +2 -29
- package/dist/package/api.d.ts +5 -2
- package/dist/package/api.js +2 -2
- package/dist/qdrant/QdrantRestTypes.d.ts +35 -0
- package/dist/repositories/collectionsRepo.d.ts +1 -2
- package/dist/repositories/collectionsRepo.js +62 -103
- package/dist/repositories/collectionsRepo.one-table.js +103 -47
- package/dist/repositories/collectionsRepo.shared.d.ts +2 -0
- package/dist/repositories/collectionsRepo.shared.js +32 -0
- package/dist/repositories/pointsRepo.d.ts +4 -8
- package/dist/repositories/pointsRepo.one-table/Delete.js +122 -67
- package/dist/repositories/pointsRepo.one-table/PathSegmentsFilter.d.ts +5 -2
- package/dist/repositories/pointsRepo.one-table/PathSegmentsFilter.js +7 -6
- package/dist/repositories/pointsRepo.one-table/Search.d.ts +4 -0
- package/dist/repositories/pointsRepo.one-table/Search.js +208 -0
- package/dist/repositories/pointsRepo.one-table/Upsert.d.ts +2 -2
- package/dist/repositories/pointsRepo.one-table/Upsert.js +51 -66
- package/dist/repositories/pointsRepo.one-table.d.ts +1 -1
- package/dist/repositories/pointsRepo.one-table.js +1 -1
- package/dist/routes/collections.js +7 -61
- package/dist/routes/points.js +11 -66
- package/dist/services/PointsService.d.ts +3 -8
- package/dist/services/PointsService.js +19 -23
- package/dist/types.d.ts +23 -33
- package/dist/types.js +18 -20
- package/dist/utils/normalization.js +13 -14
- package/dist/utils/retry.js +19 -29
- package/dist/utils/vectorBinary.js +10 -5
- package/dist/ydb/bootstrapMetaTable.d.ts +7 -0
- package/dist/ydb/bootstrapMetaTable.js +75 -0
- package/dist/ydb/client.d.ts +23 -17
- package/dist/ydb/client.js +82 -423
- package/dist/ydb/schema.js +88 -148
- package/package.json +2 -10
- package/dist/qdrant/QdrantTypes.d.ts +0 -19
- package/dist/repositories/pointsRepo.one-table/Search/Approximate.d.ts +0 -18
- package/dist/repositories/pointsRepo.one-table/Search/Approximate.js +0 -119
- package/dist/repositories/pointsRepo.one-table/Search/Exact.d.ts +0 -17
- package/dist/repositories/pointsRepo.one-table/Search/Exact.js +0 -101
- package/dist/repositories/pointsRepo.one-table/Search/index.d.ts +0 -8
- package/dist/repositories/pointsRepo.one-table/Search/index.js +0 -30
- package/dist/utils/typeGuards.d.ts +0 -1
- package/dist/utils/typeGuards.js +0 -3
- package/dist/ydb/QueryDiagnostics.d.ts +0 -6
- package/dist/ydb/QueryDiagnostics.js +0 -52
- package/dist/ydb/SessionPool.d.ts +0 -36
- package/dist/ydb/SessionPool.js +0 -248
- package/dist/ydb/bulkUpsert.d.ts +0 -6
- package/dist/ydb/bulkUpsert.js +0 -52
- /package/dist/qdrant/{QdrantTypes.js → QdrantRestTypes.js} +0 -0
package/dist/ydb/schema.js
CHANGED
|
@@ -1,121 +1,61 @@
|
|
|
1
|
-
import { withSession } from "./client.js";
|
|
1
|
+
import { withSession, TableDescription, Column, Types, Ydb } from "./client.js";
|
|
2
2
|
import { logger } from "../logging/logger.js";
|
|
3
|
-
import { STARTUP_PROBE_SESSION_TIMEOUT_MS } from "../config/env.js";
|
|
4
3
|
export const GLOBAL_POINTS_TABLE = "qdrant_all_points";
|
|
5
4
|
// Shared YDB-related constants for repositories.
|
|
6
5
|
export { UPSERT_BATCH_SIZE } from "../config/env.js";
|
|
7
|
-
const SCHEMA_DDL_TIMEOUT_MS = 5000;
|
|
8
6
|
let metaTableReady = false;
|
|
9
7
|
let metaTableReadyInFlight = null;
|
|
10
8
|
let globalPointsTableReady = false;
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
const MAX_DEPTH = 8;
|
|
16
|
-
const MAX_NODES = 500;
|
|
17
|
-
const queue = [{ v: err, depth: 0 }];
|
|
18
|
-
let visited = 0;
|
|
19
|
-
while (queue.length > 0 && visited < MAX_NODES) {
|
|
20
|
-
const next = queue.shift();
|
|
21
|
-
if (!next)
|
|
22
|
-
break;
|
|
23
|
-
const { v, depth } = next;
|
|
24
|
-
if (depth > MAX_DEPTH)
|
|
25
|
-
continue;
|
|
26
|
-
if (v === null || typeof v !== "object")
|
|
27
|
-
continue;
|
|
28
|
-
if (seen.has(v))
|
|
29
|
-
continue;
|
|
30
|
-
seen.add(v);
|
|
31
|
-
visited++;
|
|
32
|
-
const maybeMessage = v.message;
|
|
33
|
-
if (typeof maybeMessage === "string" && maybeMessage.length > 0) {
|
|
34
|
-
out.push(maybeMessage);
|
|
35
|
-
}
|
|
36
|
-
const maybeIssues = v.issues;
|
|
37
|
-
if (Array.isArray(maybeIssues)) {
|
|
38
|
-
for (const child of maybeIssues) {
|
|
39
|
-
queue.push({ v: child, depth: depth + 1 });
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
return out;
|
|
9
|
+
let globalPointsTableReadyInFlight = null;
|
|
10
|
+
function throwMigrationRequired(message) {
|
|
11
|
+
logger.error(message);
|
|
12
|
+
throw new Error(message);
|
|
44
13
|
}
|
|
45
|
-
function
|
|
14
|
+
function isTableNotFoundError(err) {
|
|
46
15
|
const msg = err instanceof Error ? err.message : String(err);
|
|
47
|
-
|
|
16
|
+
const ctorName = err instanceof Error
|
|
17
|
+
? err.constructor?.name
|
|
18
|
+
: undefined;
|
|
19
|
+
const statusCodeMatch = /code\s+(\d{6})/i.exec(msg);
|
|
20
|
+
const statusCode = statusCodeMatch && statusCodeMatch[1]
|
|
21
|
+
? Number(statusCodeMatch[1])
|
|
22
|
+
: undefined;
|
|
23
|
+
// ydb-sdk exposes dedicated error classes with server status codes.
|
|
24
|
+
// In practice, table-not-found can surface as:
|
|
25
|
+
// - NotFound (code 400140)
|
|
26
|
+
// - SchemeError (code 400070) with empty issues (observed in CI logs for describeTable)
|
|
27
|
+
if (ctorName === "NotFound" || statusCode === 400140) {
|
|
48
28
|
return true;
|
|
49
29
|
}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
const issueMsgs = collectIssueMessages(err).join("\n");
|
|
53
|
-
return (/already exists/i.test(issueMsgs) ||
|
|
54
|
-
/path exists/i.test(issueMsgs) ||
|
|
55
|
-
/table name conflict/i.test(issueMsgs));
|
|
56
|
-
}
|
|
57
|
-
function isUnknownColumnError(err) {
|
|
58
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
59
|
-
const re = /unknown column|cannot resolve|member not found/i;
|
|
60
|
-
if (re.test(msg)) {
|
|
30
|
+
if ((ctorName === "SchemeError" || statusCode === 400070) &&
|
|
31
|
+
/:\s*\[\s*\]\s*$/i.test(msg)) {
|
|
61
32
|
return true;
|
|
62
33
|
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
34
|
+
return (/table.*not found/i.test(msg) ||
|
|
35
|
+
/path.*not found/i.test(msg) ||
|
|
36
|
+
/does not exist/i.test(msg));
|
|
66
37
|
}
|
|
67
|
-
function
|
|
68
|
-
|
|
69
|
-
|
|
38
|
+
function isAlreadyExistsError(err) {
|
|
39
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
40
|
+
return /already exists/i.test(msg) || /path.*exists/i.test(msg);
|
|
70
41
|
}
|
|
71
42
|
async function ensureMetaTableOnce() {
|
|
72
|
-
await withSession(async (
|
|
43
|
+
await withSession(async (s) => {
|
|
44
|
+
let tableDescription = null;
|
|
73
45
|
try {
|
|
74
|
-
await
|
|
75
|
-
CREATE TABLE qdr__collections (
|
|
76
|
-
collection Utf8,
|
|
77
|
-
table_name Utf8,
|
|
78
|
-
vector_dimension Uint32,
|
|
79
|
-
distance Utf8,
|
|
80
|
-
vector_type Utf8,
|
|
81
|
-
created_at Timestamp,
|
|
82
|
-
last_accessed_at Timestamp,
|
|
83
|
-
PRIMARY KEY (collection)
|
|
84
|
-
);
|
|
85
|
-
`
|
|
86
|
-
.idempotent(true)
|
|
87
|
-
.timeout(SCHEMA_DDL_TIMEOUT_MS)
|
|
88
|
-
.signal(signal);
|
|
89
|
-
logger.info("created metadata table qdr__collections");
|
|
46
|
+
tableDescription = await s.describeTable("qdr__collections");
|
|
90
47
|
}
|
|
91
48
|
catch (err) {
|
|
92
|
-
if (
|
|
93
|
-
|
|
94
|
-
// or name resolution conflicts. Probe existence before failing startup.
|
|
95
|
-
try {
|
|
96
|
-
await sql `SELECT collection FROM qdr__collections LIMIT 0;`
|
|
97
|
-
.idempotent(true)
|
|
98
|
-
.timeout(STARTUP_PROBE_SESSION_TIMEOUT_MS)
|
|
99
|
-
.signal(signal);
|
|
100
|
-
logger.warn({ err }, "CREATE TABLE qdr__collections failed, but the table appears to exist; continuing");
|
|
101
|
-
}
|
|
102
|
-
catch {
|
|
103
|
-
throw err;
|
|
104
|
-
}
|
|
49
|
+
if (isTableNotFoundError(err)) {
|
|
50
|
+
throwMigrationRequired("Metadata table qdr__collections does not exist; please create it before starting the service");
|
|
105
51
|
}
|
|
52
|
+
throw err;
|
|
106
53
|
}
|
|
107
|
-
//
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
.signal(signal);
|
|
113
|
-
}
|
|
114
|
-
catch (err) {
|
|
115
|
-
if (!isUnknownColumnError(err)) {
|
|
116
|
-
throw err;
|
|
117
|
-
}
|
|
118
|
-
throwMigrationRequired("Metadata table qdr__collections is missing required column last_accessed_at; apply a manual migration (ALTER TABLE qdr__collections ADD COLUMN last_accessed_at Timestamp).");
|
|
54
|
+
// Table exists: validate required columns.
|
|
55
|
+
const columns = tableDescription.columns ?? [];
|
|
56
|
+
const hasLastAccessedAt = columns.some((col) => col.name === "last_accessed_at");
|
|
57
|
+
if (!hasLastAccessedAt) {
|
|
58
|
+
throwMigrationRequired("Metadata table qdr__collections is missing required column last_accessed_at; please recreate the table or apply a manual schema migration before starting the service");
|
|
119
59
|
}
|
|
120
60
|
});
|
|
121
61
|
metaTableReady = true;
|
|
@@ -136,63 +76,63 @@ export async function ensureMetaTable() {
|
|
|
136
76
|
metaTableReadyInFlight = null;
|
|
137
77
|
}
|
|
138
78
|
}
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
}
|
|
143
|
-
await withSession(async (sql, signal) => {
|
|
79
|
+
async function ensureGlobalPointsTableOnce() {
|
|
80
|
+
await withSession(async (s) => {
|
|
81
|
+
let tableDescription = null;
|
|
144
82
|
try {
|
|
145
|
-
await
|
|
146
|
-
CREATE TABLE ${sql.identifier(GLOBAL_POINTS_TABLE)} (
|
|
147
|
-
uid Utf8,
|
|
148
|
-
point_id Utf8,
|
|
149
|
-
embedding String,
|
|
150
|
-
embedding_quantized String,
|
|
151
|
-
payload JsonDocument,
|
|
152
|
-
PRIMARY KEY (uid, point_id)
|
|
153
|
-
)
|
|
154
|
-
WITH (
|
|
155
|
-
AUTO_PARTITIONING_BY_LOAD = ENABLED,
|
|
156
|
-
AUTO_PARTITIONING_BY_SIZE = ENABLED,
|
|
157
|
-
AUTO_PARTITIONING_PARTITION_SIZE_MB = 100
|
|
158
|
-
);
|
|
159
|
-
`
|
|
160
|
-
.idempotent(true)
|
|
161
|
-
.timeout(SCHEMA_DDL_TIMEOUT_MS)
|
|
162
|
-
.signal(signal);
|
|
163
|
-
logger.info(`created global points table ${GLOBAL_POINTS_TABLE}`);
|
|
83
|
+
tableDescription = await s.describeTable(GLOBAL_POINTS_TABLE);
|
|
164
84
|
}
|
|
165
85
|
catch (err) {
|
|
166
|
-
if (!
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
86
|
+
if (!isTableNotFoundError(err)) {
|
|
87
|
+
throw err;
|
|
88
|
+
}
|
|
89
|
+
// Table doesn't exist, create it with all columns using the new schema and
|
|
90
|
+
// auto-partitioning enabled.
|
|
91
|
+
const desc = new TableDescription()
|
|
92
|
+
.withColumns(new Column("uid", Types.UTF8), new Column("point_id", Types.UTF8), new Column("embedding", Types.BYTES), new Column("embedding_quantized", Types.BYTES), new Column("payload", Types.JSON_DOCUMENT))
|
|
93
|
+
.withPrimaryKeys("uid", "point_id");
|
|
94
|
+
desc.withPartitioningSettings({
|
|
95
|
+
partitioningByLoad: Ydb.FeatureFlag.Status.ENABLED,
|
|
96
|
+
partitioningBySize: Ydb.FeatureFlag.Status.ENABLED,
|
|
97
|
+
partitionSizeMb: 100,
|
|
98
|
+
});
|
|
99
|
+
try {
|
|
100
|
+
await s.createTable(GLOBAL_POINTS_TABLE, desc);
|
|
101
|
+
logger.info(`created global points table ${GLOBAL_POINTS_TABLE}`);
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
catch (createErr) {
|
|
105
|
+
// Race-safe: another concurrent caller might have created the table.
|
|
106
|
+
if (!isAlreadyExistsError(createErr)) {
|
|
107
|
+
throw createErr;
|
|
180
108
|
}
|
|
181
109
|
}
|
|
110
|
+
// If the table already exists (race), fall through to a fresh describe +
|
|
111
|
+
// schema validation.
|
|
112
|
+
tableDescription = await s.describeTable(GLOBAL_POINTS_TABLE);
|
|
182
113
|
}
|
|
183
|
-
//
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
.signal(signal);
|
|
189
|
-
}
|
|
190
|
-
catch (err) {
|
|
191
|
-
if (!isUnknownColumnError(err)) {
|
|
192
|
-
throw err;
|
|
193
|
-
}
|
|
194
|
-
throwMigrationRequired(`Global points table ${GLOBAL_POINTS_TABLE} is missing required column embedding_quantized; apply a manual migration (ALTER TABLE ${GLOBAL_POINTS_TABLE} ADD COLUMN embedding_quantized String). If your legacy schema used embedding_bit, rename it or recreate the table.`);
|
|
114
|
+
// Table exists, require the new embedding_quantized column.
|
|
115
|
+
const columns = tableDescription.columns ?? [];
|
|
116
|
+
const hasEmbeddingQuantized = columns.some((col) => col.name === "embedding_quantized");
|
|
117
|
+
if (!hasEmbeddingQuantized) {
|
|
118
|
+
throwMigrationRequired(`Global points table ${GLOBAL_POINTS_TABLE} is missing required column embedding_quantized; please recreate the table or apply a manual schema migration before starting the service`);
|
|
195
119
|
}
|
|
196
|
-
globalPointsTableReady = true;
|
|
197
120
|
});
|
|
198
121
|
}
|
|
122
|
+
export async function ensureGlobalPointsTable() {
|
|
123
|
+
if (globalPointsTableReady) {
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
if (globalPointsTableReadyInFlight) {
|
|
127
|
+
await globalPointsTableReadyInFlight;
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
globalPointsTableReadyInFlight = ensureGlobalPointsTableOnce();
|
|
131
|
+
try {
|
|
132
|
+
await globalPointsTableReadyInFlight;
|
|
133
|
+
globalPointsTableReady = true;
|
|
134
|
+
}
|
|
135
|
+
finally {
|
|
136
|
+
globalPointsTableReadyInFlight = null;
|
|
137
|
+
}
|
|
138
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ydb-qdrant",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "8.1.0",
|
|
4
4
|
"main": "dist/package/api.js",
|
|
5
5
|
"types": "dist/package/api.d.ts",
|
|
6
6
|
"exports": {
|
|
@@ -67,16 +67,11 @@
|
|
|
67
67
|
"@grpc/grpc-js": "^1.14.0",
|
|
68
68
|
"@qdrant/js-client-rest": "^1.16.2",
|
|
69
69
|
"@yandex-cloud/nodejs-sdk": "^2.9.0",
|
|
70
|
-
"@ydbjs/api": "^6.0.5",
|
|
71
|
-
"@ydbjs/auth": "^6.0.5",
|
|
72
|
-
"@ydbjs/core": "^6.0.7",
|
|
73
|
-
"@ydbjs/query": "^6.0.7",
|
|
74
|
-
"@ydbjs/retry": "^6.0.5",
|
|
75
|
-
"@ydbjs/value": "^6.0.5",
|
|
76
70
|
"dotenv": "^17.2.3",
|
|
77
71
|
"express": "^5.1.0",
|
|
78
72
|
"nice-grpc": "^2.1.13",
|
|
79
73
|
"pino": "^10.1.0",
|
|
74
|
+
"ydb-sdk": "^5.11.1",
|
|
80
75
|
"zod": "^4.1.12"
|
|
81
76
|
},
|
|
82
77
|
"devDependencies": {
|
|
@@ -92,8 +87,5 @@
|
|
|
92
87
|
"typescript": "^5.9.3",
|
|
93
88
|
"typescript-eslint": "^8.47.0",
|
|
94
89
|
"vitest": "^4.0.12"
|
|
95
|
-
},
|
|
96
|
-
"engines": {
|
|
97
|
-
"node": ">=20.19.0"
|
|
98
90
|
}
|
|
99
91
|
}
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import type { Schemas } from "@qdrant/js-client-rest";
|
|
2
|
-
/**
|
|
3
|
-
* Centralized Qdrant OpenAPI-derived types (via @qdrant/js-client-rest), narrowed to the
|
|
4
|
-
* subset of shapes that ydb-qdrant currently supports.
|
|
5
|
-
*
|
|
6
|
-
* Important:
|
|
7
|
-
* - Qdrant's schema types are intentionally broad (named vectors, multi-vectors, sparse vectors, inference objects).
|
|
8
|
-
* - Internally we support dense vectors only (`number[]`), so we narrow types accordingly.
|
|
9
|
-
*/
|
|
10
|
-
export type QdrantDistance = Schemas["Distance"];
|
|
11
|
-
export type QdrantPointId = Schemas["ExtendedPointId"];
|
|
12
|
-
export type QdrantDenseVector = Extract<Schemas["VectorStruct"], number[]>;
|
|
13
|
-
export type QdrantPayload = Record<string, unknown>;
|
|
14
|
-
export type QdrantPointStructDense = Omit<Schemas["PointStruct"], "vector" | "payload"> & {
|
|
15
|
-
vector: QdrantDenseVector;
|
|
16
|
-
payload?: QdrantPayload;
|
|
17
|
-
};
|
|
18
|
-
export type QdrantScoredPoint = Schemas["ScoredPoint"];
|
|
19
|
-
export type QdrantQueryResponse = Schemas["QueryResponse"];
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
import type { DistanceKind } from "../../../types";
|
|
2
|
-
import type { QdrantPayload } from "../../../qdrant/QdrantTypes.js";
|
|
3
|
-
export declare function searchPointsOneTableApproximate(args: {
|
|
4
|
-
tableName: string;
|
|
5
|
-
queryVector: number[];
|
|
6
|
-
top: number;
|
|
7
|
-
withPayload: boolean | undefined;
|
|
8
|
-
distance: DistanceKind;
|
|
9
|
-
dimension: number;
|
|
10
|
-
uid: string;
|
|
11
|
-
overfetchMultiplier: number;
|
|
12
|
-
timeoutMs: number;
|
|
13
|
-
filterPaths?: Array<Array<string>>;
|
|
14
|
-
}): Promise<Array<{
|
|
15
|
-
id: string;
|
|
16
|
-
score: number;
|
|
17
|
-
payload?: QdrantPayload;
|
|
18
|
-
}>>;
|
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
import { withSession } from "../../../ydb/client.js";
|
|
2
|
-
import { buildVectorBinaryParams } from "../../../ydb/helpers.js";
|
|
3
|
-
import { Bytes, Uint32, Utf8 } from "@ydbjs/value/primitive";
|
|
4
|
-
import { mapDistanceToBitKnnFn, mapDistanceToKnnFn, } from "../../../utils/distance.js";
|
|
5
|
-
import { buildPathSegmentsFilter } from "../PathSegmentsFilter.js";
|
|
6
|
-
import { attachQueryDiagnostics } from "../../../ydb/QueryDiagnostics.js";
|
|
7
|
-
import { isRecord } from "../../../utils/typeGuards.js";
|
|
8
|
-
function assertVectorDimension(vector, dimension, messagePrefix = "Vector dimension mismatch") {
|
|
9
|
-
if (vector.length !== dimension) {
|
|
10
|
-
throw new Error(`${messagePrefix}: got ${vector.length}, expected ${dimension}`);
|
|
11
|
-
}
|
|
12
|
-
}
|
|
13
|
-
function parsePayloadJson(payloadText) {
|
|
14
|
-
if (isRecord(payloadText)) {
|
|
15
|
-
return payloadText;
|
|
16
|
-
}
|
|
17
|
-
if (typeof payloadText !== "string" || payloadText.length === 0) {
|
|
18
|
-
return undefined;
|
|
19
|
-
}
|
|
20
|
-
try {
|
|
21
|
-
return JSON.parse(payloadText);
|
|
22
|
-
}
|
|
23
|
-
catch {
|
|
24
|
-
return undefined;
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
function buildApproxSearchParams(args) {
|
|
28
|
-
const safeTop = args.top > 0 ? args.top : 1;
|
|
29
|
-
const rawCandidateLimit = safeTop * args.overfetchMultiplier;
|
|
30
|
-
const candidateLimit = Math.max(safeTop, rawCandidateLimit);
|
|
31
|
-
const filter = buildPathSegmentsFilter(args.filterPaths);
|
|
32
|
-
const binaries = buildVectorBinaryParams(args.queryVector);
|
|
33
|
-
return {
|
|
34
|
-
params: {
|
|
35
|
-
...(filter?.whereParams ?? {}),
|
|
36
|
-
$qbin_bit: new Bytes(binaries.bit),
|
|
37
|
-
$qbinf: new Bytes(binaries.float),
|
|
38
|
-
$candidateLimit: new Uint32(candidateLimit),
|
|
39
|
-
$safeTop: new Uint32(safeTop),
|
|
40
|
-
$uid: new Utf8(args.uid),
|
|
41
|
-
},
|
|
42
|
-
filterWhereSql: filter?.whereSql,
|
|
43
|
-
};
|
|
44
|
-
}
|
|
45
|
-
export async function searchPointsOneTableApproximate(args) {
|
|
46
|
-
assertVectorDimension(args.queryVector, args.dimension);
|
|
47
|
-
return await withSession(async (sql, signal) => {
|
|
48
|
-
const { fn, order } = mapDistanceToKnnFn(args.distance);
|
|
49
|
-
const { fn: bitFn, order: bitOrder } = mapDistanceToBitKnnFn(args.distance);
|
|
50
|
-
const { params, filterWhereSql } = buildApproxSearchParams({
|
|
51
|
-
queryVector: args.queryVector,
|
|
52
|
-
top: args.top,
|
|
53
|
-
uid: args.uid,
|
|
54
|
-
overfetchMultiplier: args.overfetchMultiplier,
|
|
55
|
-
filterPaths: args.filterPaths,
|
|
56
|
-
});
|
|
57
|
-
let payloadColumn;
|
|
58
|
-
if (args.withPayload) {
|
|
59
|
-
payloadColumn = sql.unsafe(", payload");
|
|
60
|
-
}
|
|
61
|
-
else {
|
|
62
|
-
payloadColumn = sql.unsafe("");
|
|
63
|
-
}
|
|
64
|
-
let filterClause;
|
|
65
|
-
if (filterWhereSql) {
|
|
66
|
-
filterClause = sql.unsafe(` AND ${filterWhereSql}`);
|
|
67
|
-
}
|
|
68
|
-
else {
|
|
69
|
-
filterClause = sql.unsafe("");
|
|
70
|
-
}
|
|
71
|
-
const baseQuery = sql `
|
|
72
|
-
$candidates = (
|
|
73
|
-
SELECT point_id
|
|
74
|
-
FROM ${sql.identifier(args.tableName)}
|
|
75
|
-
WHERE uid = $uid
|
|
76
|
-
AND embedding_quantized IS NOT NULL${filterClause}
|
|
77
|
-
ORDER BY ${sql.unsafe(bitFn)}(embedding_quantized, $qbin_bit) ${sql.unsafe(bitOrder)}
|
|
78
|
-
LIMIT $candidateLimit
|
|
79
|
-
);
|
|
80
|
-
|
|
81
|
-
SELECT
|
|
82
|
-
point_id${payloadColumn},
|
|
83
|
-
${sql.unsafe(fn)}(embedding, $qbinf) AS score
|
|
84
|
-
FROM ${sql.identifier(args.tableName)}
|
|
85
|
-
WHERE uid = $uid
|
|
86
|
-
AND point_id IN $candidates${filterClause}
|
|
87
|
-
ORDER BY score ${sql.unsafe(order)}
|
|
88
|
-
LIMIT $safeTop;
|
|
89
|
-
`;
|
|
90
|
-
let q = attachQueryDiagnostics(baseQuery, {
|
|
91
|
-
operation: "searchPointsOneTableApproximate",
|
|
92
|
-
tableName: args.tableName,
|
|
93
|
-
uid: args.uid,
|
|
94
|
-
distance: args.distance,
|
|
95
|
-
withPayload: Boolean(args.withPayload),
|
|
96
|
-
overfetchMultiplier: args.overfetchMultiplier,
|
|
97
|
-
})
|
|
98
|
-
.idempotent(true)
|
|
99
|
-
.timeout(args.timeoutMs)
|
|
100
|
-
.signal(signal);
|
|
101
|
-
for (const [key, value] of Object.entries(params)) {
|
|
102
|
-
q = q.parameter(key, value);
|
|
103
|
-
}
|
|
104
|
-
const [rows] = await q;
|
|
105
|
-
return rows.map((r) => {
|
|
106
|
-
if (!r.point_id) {
|
|
107
|
-
throw new Error("point_id is missing in YDB search result");
|
|
108
|
-
}
|
|
109
|
-
const payload = args.withPayload
|
|
110
|
-
? parsePayloadJson(r.payload)
|
|
111
|
-
: undefined;
|
|
112
|
-
return {
|
|
113
|
-
id: r.point_id,
|
|
114
|
-
score: Number(r.score),
|
|
115
|
-
...(payload ? { payload } : {}),
|
|
116
|
-
};
|
|
117
|
-
});
|
|
118
|
-
});
|
|
119
|
-
}
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
import type { DistanceKind } from "../../../types";
|
|
2
|
-
import type { QdrantPayload } from "../../../qdrant/QdrantTypes.js";
|
|
3
|
-
export declare function searchPointsOneTableExact(args: {
|
|
4
|
-
tableName: string;
|
|
5
|
-
queryVector: number[];
|
|
6
|
-
top: number;
|
|
7
|
-
withPayload: boolean | undefined;
|
|
8
|
-
distance: DistanceKind;
|
|
9
|
-
dimension: number;
|
|
10
|
-
uid: string;
|
|
11
|
-
timeoutMs: number;
|
|
12
|
-
filterPaths?: Array<Array<string>>;
|
|
13
|
-
}): Promise<Array<{
|
|
14
|
-
id: string;
|
|
15
|
-
score: number;
|
|
16
|
-
payload?: QdrantPayload;
|
|
17
|
-
}>>;
|
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
import { withSession } from "../../../ydb/client.js";
|
|
2
|
-
import { buildVectorBinaryParams } from "../../../ydb/helpers.js";
|
|
3
|
-
import { Bytes, Uint32, Utf8 } from "@ydbjs/value/primitive";
|
|
4
|
-
import { mapDistanceToKnnFn } from "../../../utils/distance.js";
|
|
5
|
-
import { buildPathSegmentsFilter } from "../PathSegmentsFilter.js";
|
|
6
|
-
import { attachQueryDiagnostics } from "../../../ydb/QueryDiagnostics.js";
|
|
7
|
-
import { isRecord } from "../../../utils/typeGuards.js";
|
|
8
|
-
function assertVectorDimension(vector, dimension, messagePrefix = "Vector dimension mismatch") {
|
|
9
|
-
if (vector.length !== dimension) {
|
|
10
|
-
throw new Error(`${messagePrefix}: got ${vector.length}, expected ${dimension}`);
|
|
11
|
-
}
|
|
12
|
-
}
|
|
13
|
-
function parsePayloadJson(payloadText) {
|
|
14
|
-
if (isRecord(payloadText)) {
|
|
15
|
-
return payloadText;
|
|
16
|
-
}
|
|
17
|
-
if (typeof payloadText !== "string" || payloadText.length === 0) {
|
|
18
|
-
return undefined;
|
|
19
|
-
}
|
|
20
|
-
try {
|
|
21
|
-
return JSON.parse(payloadText);
|
|
22
|
-
}
|
|
23
|
-
catch {
|
|
24
|
-
return undefined;
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
function buildExactSearchParams(args) {
|
|
28
|
-
const filter = buildPathSegmentsFilter(args.filterPaths);
|
|
29
|
-
const binaries = buildVectorBinaryParams(args.queryVector);
|
|
30
|
-
return {
|
|
31
|
-
params: {
|
|
32
|
-
...(filter?.whereParams ?? {}),
|
|
33
|
-
$qbinf: new Bytes(binaries.float),
|
|
34
|
-
$k: new Uint32(args.top),
|
|
35
|
-
$uid: new Utf8(args.uid),
|
|
36
|
-
},
|
|
37
|
-
filterWhereSql: filter?.whereSql,
|
|
38
|
-
};
|
|
39
|
-
}
|
|
40
|
-
export async function searchPointsOneTableExact(args) {
|
|
41
|
-
assertVectorDimension(args.queryVector, args.dimension);
|
|
42
|
-
return await withSession(async (sql, signal) => {
|
|
43
|
-
const { fn, order } = mapDistanceToKnnFn(args.distance);
|
|
44
|
-
const { params, filterWhereSql } = buildExactSearchParams({
|
|
45
|
-
queryVector: args.queryVector,
|
|
46
|
-
top: args.top,
|
|
47
|
-
uid: args.uid,
|
|
48
|
-
filterPaths: args.filterPaths,
|
|
49
|
-
});
|
|
50
|
-
let payloadColumn;
|
|
51
|
-
if (args.withPayload) {
|
|
52
|
-
payloadColumn = sql.unsafe(", payload");
|
|
53
|
-
}
|
|
54
|
-
else {
|
|
55
|
-
payloadColumn = sql.unsafe("");
|
|
56
|
-
}
|
|
57
|
-
let filterClause;
|
|
58
|
-
if (filterWhereSql) {
|
|
59
|
-
filterClause = sql.unsafe(` AND ${filterWhereSql}`);
|
|
60
|
-
}
|
|
61
|
-
else {
|
|
62
|
-
filterClause = sql.unsafe("");
|
|
63
|
-
}
|
|
64
|
-
const baseQuery = sql `
|
|
65
|
-
SELECT
|
|
66
|
-
point_id${payloadColumn},
|
|
67
|
-
${sql.unsafe(fn)}(embedding, $qbinf) AS score
|
|
68
|
-
FROM ${sql.identifier(args.tableName)}
|
|
69
|
-
WHERE uid = $uid${filterClause}
|
|
70
|
-
ORDER BY score ${sql.unsafe(order)}
|
|
71
|
-
LIMIT $k;
|
|
72
|
-
`;
|
|
73
|
-
let q = attachQueryDiagnostics(baseQuery, {
|
|
74
|
-
operation: "searchPointsOneTableExact",
|
|
75
|
-
tableName: args.tableName,
|
|
76
|
-
uid: args.uid,
|
|
77
|
-
distance: args.distance,
|
|
78
|
-
withPayload: Boolean(args.withPayload),
|
|
79
|
-
})
|
|
80
|
-
.idempotent(true)
|
|
81
|
-
.timeout(args.timeoutMs)
|
|
82
|
-
.signal(signal);
|
|
83
|
-
for (const [key, value] of Object.entries(params)) {
|
|
84
|
-
q = q.parameter(key, value);
|
|
85
|
-
}
|
|
86
|
-
const [rows] = await q;
|
|
87
|
-
return rows.map((r) => {
|
|
88
|
-
if (!r.point_id) {
|
|
89
|
-
throw new Error("point_id is missing in YDB search result");
|
|
90
|
-
}
|
|
91
|
-
const payload = args.withPayload
|
|
92
|
-
? parsePayloadJson(r.payload)
|
|
93
|
-
: undefined;
|
|
94
|
-
return {
|
|
95
|
-
id: r.point_id,
|
|
96
|
-
score: Number(r.score),
|
|
97
|
-
...(payload ? { payload } : {}),
|
|
98
|
-
};
|
|
99
|
-
});
|
|
100
|
-
});
|
|
101
|
-
}
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
import type { DistanceKind } from "../../../types";
|
|
2
|
-
import { SearchMode } from "../../../config/env.js";
|
|
3
|
-
import type { QdrantPayload } from "../../../qdrant/QdrantTypes.js";
|
|
4
|
-
export declare function searchPointsOneTable(tableName: string, queryVector: number[], top: number, withPayload: boolean | undefined, distance: DistanceKind, dimension: number, uid: string, mode: SearchMode | undefined, overfetchMultiplier: number, filterPaths?: Array<Array<string>>): Promise<Array<{
|
|
5
|
-
id: string;
|
|
6
|
-
score: number;
|
|
7
|
-
payload?: QdrantPayload;
|
|
8
|
-
}>>;
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import { SearchMode, SEARCH_OPERATION_TIMEOUT_MS, } from "../../../config/env.js";
|
|
2
|
-
import { searchPointsOneTableExact } from "./Exact.js";
|
|
3
|
-
import { searchPointsOneTableApproximate } from "./Approximate.js";
|
|
4
|
-
export async function searchPointsOneTable(tableName, queryVector, top, withPayload, distance, dimension, uid, mode, overfetchMultiplier, filterPaths) {
|
|
5
|
-
if (mode === SearchMode.Exact) {
|
|
6
|
-
return await searchPointsOneTableExact({
|
|
7
|
-
tableName,
|
|
8
|
-
queryVector,
|
|
9
|
-
top,
|
|
10
|
-
withPayload,
|
|
11
|
-
distance,
|
|
12
|
-
dimension,
|
|
13
|
-
uid,
|
|
14
|
-
timeoutMs: SEARCH_OPERATION_TIMEOUT_MS,
|
|
15
|
-
filterPaths,
|
|
16
|
-
});
|
|
17
|
-
}
|
|
18
|
-
return await searchPointsOneTableApproximate({
|
|
19
|
-
tableName,
|
|
20
|
-
queryVector,
|
|
21
|
-
top,
|
|
22
|
-
withPayload,
|
|
23
|
-
distance,
|
|
24
|
-
dimension,
|
|
25
|
-
uid,
|
|
26
|
-
overfetchMultiplier,
|
|
27
|
-
timeoutMs: SEARCH_OPERATION_TIMEOUT_MS,
|
|
28
|
-
filterPaths,
|
|
29
|
-
});
|
|
30
|
-
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export declare function isRecord(value: unknown): value is Record<string, unknown>;
|
package/dist/utils/typeGuards.js
DELETED