@dofe/infra-vector 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/embedding.service.d.ts +54 -0
- package/dist/embedding.service.d.ts.map +1 -0
- package/dist/embedding.service.js +254 -0
- package/dist/embedding.service.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +27 -0
- package/dist/index.js.map +1 -0
- package/dist/knowledge-base-vikingdb.client.d.ts +29 -0
- package/dist/knowledge-base-vikingdb.client.d.ts.map +1 -0
- package/dist/knowledge-base-vikingdb.client.js +85 -0
- package/dist/knowledge-base-vikingdb.client.js.map +1 -0
- package/dist/vector.module.d.ts +3 -0
- package/dist/vector.module.d.ts.map +1 -0
- package/dist/vector.module.js +44 -0
- package/dist/vector.module.js.map +1 -0
- package/dist/vikingdb-client.service.d.ts +134 -0
- package/dist/vikingdb-client.service.d.ts.map +1 -0
- package/dist/vikingdb-client.service.js +900 -0
- package/dist/vikingdb-client.service.js.map +1 -0
- package/dist/vikingdb.types.d.ts +320 -0
- package/dist/vikingdb.types.d.ts.map +1 -0
- package/dist/vikingdb.types.js +129 -0
- package/dist/vikingdb.types.js.map +1 -0
- package/package.json +36 -0
|
@@ -0,0 +1,900 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
9
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
10
|
+
};
|
|
11
|
+
var __param = (this && this.__param) || function (paramIndex, decorator) {
|
|
12
|
+
return function (target, key) { decorator(target, key, paramIndex); }
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.VikingDbClientService = void 0;
|
|
16
|
+
/**
|
|
17
|
+
* VikingDB Client Service
|
|
18
|
+
* VikingDB 向量数据库客户端服务
|
|
19
|
+
*/
|
|
20
|
+
const common_1 = require("@nestjs/common");
|
|
21
|
+
const config_1 = require("@nestjs/config");
|
|
22
|
+
const nest_winston_1 = require("nest-winston");
|
|
23
|
+
const winston_1 = require("winston");
|
|
24
|
+
const axios_1 = require("@nestjs/axios");
|
|
25
|
+
const rxjs_1 = require("rxjs");
|
|
26
|
+
const infra_common_1 = require("@dofe/infra-common");
|
|
27
|
+
const vikingdb_types_1 = require("./vikingdb.types");
|
|
28
|
+
const openapi_1 = require("@volcengine/openapi");
|
|
29
|
+
let VikingDbClientService = class VikingDbClientService {
|
|
30
|
+
logger;
|
|
31
|
+
configService;
|
|
32
|
+
httpService;
|
|
33
|
+
config;
|
|
34
|
+
volcService;
|
|
35
|
+
constructor(logger, configService, httpService) {
|
|
36
|
+
this.logger = logger;
|
|
37
|
+
this.configService = configService;
|
|
38
|
+
this.httpService = httpService;
|
|
39
|
+
}
|
|
40
|
+
onModuleInit() {
|
|
41
|
+
const keysConfig = (0, infra_common_1.getKeysConfig)();
|
|
42
|
+
const vikingKeys = keysConfig?.vikingdb;
|
|
43
|
+
// keys/config.json 优先,其次才 fallback env(兼容旧配置)
|
|
44
|
+
const provider = vikingKeys?.provider ??
|
|
45
|
+
this.configService.get('VIKINGDB_PROVIDER') ??
|
|
46
|
+
'custom-http';
|
|
47
|
+
this.config = {
|
|
48
|
+
provider,
|
|
49
|
+
baseUrl: vikingKeys?.baseUrl ??
|
|
50
|
+
this.configService.get('VIKINGDB_BASE_URL', ''),
|
|
51
|
+
authToken: vikingKeys?.authToken ??
|
|
52
|
+
this.configService.get('VIKINGDB_AUTH_TOKEN'),
|
|
53
|
+
database: vikingKeys?.database ??
|
|
54
|
+
this.configService.get('VIKINGDB_DATABASE', ''),
|
|
55
|
+
collection: vikingKeys?.collection ??
|
|
56
|
+
this.configService.get('VIKINGDB_COLLECTION', ''),
|
|
57
|
+
ak: vikingKeys?.volcengine?.ak ??
|
|
58
|
+
this.configService.get('VIKINGDB_VOLCENGINE_AK'),
|
|
59
|
+
sk: vikingKeys?.volcengine?.sk ??
|
|
60
|
+
this.configService.get('VIKINGDB_VOLCENGINE_SK'),
|
|
61
|
+
region: vikingKeys?.volcengine?.region ??
|
|
62
|
+
this.configService.get('VIKINGDB_VOLCENGINE_REGION', 'cn-beijing'),
|
|
63
|
+
sessionToken: vikingKeys?.volcengine?.sessionToken ??
|
|
64
|
+
this.configService.get('VIKINGDB_VOLCENGINE_SESSION_TOKEN'),
|
|
65
|
+
indexName: vikingKeys?.volcengine?.indexName ??
|
|
66
|
+
this.configService.get('VIKINGDB_VOLCENGINE_INDEX_NAME'),
|
|
67
|
+
collectionName: vikingKeys?.volcengine?.collectionName ??
|
|
68
|
+
this.configService.get('VIKINGDB_VOLCENGINE_COLLECTION_NAME'),
|
|
69
|
+
collectionAlias: vikingKeys?.volcengine?.collectionAlias ??
|
|
70
|
+
this.configService.get('VIKINGDB_VOLCENGINE_COLLECTION_ALIAS'),
|
|
71
|
+
primaryKeyField: vikingKeys?.fields?.primaryKey ??
|
|
72
|
+
this.configService.get('VIKINGDB_FIELD_PRIMARY_KEY', 'id'),
|
|
73
|
+
textField: vikingKeys?.fields?.text ??
|
|
74
|
+
this.configService.get('VIKINGDB_FIELD_TEXT', 'text'),
|
|
75
|
+
vectorField: vikingKeys?.fields?.vector ??
|
|
76
|
+
this.configService.get('VIKINGDB_FIELD_VECTOR', 'embedding'),
|
|
77
|
+
summaryField: vikingKeys?.fields?.summary ??
|
|
78
|
+
this.configService.get('VIKINGDB_FIELD_SUMMARY', 'summary'),
|
|
79
|
+
metadataField: vikingKeys?.fields?.metadata ??
|
|
80
|
+
this.configService.get('VIKINGDB_FIELD_METADATA', 'metadata'),
|
|
81
|
+
tagsField: vikingKeys?.fields?.tags ??
|
|
82
|
+
this.configService.get('VIKINGDB_FIELD_TAGS', 'tags'),
|
|
83
|
+
taskTypeField: vikingKeys?.fields?.taskType ??
|
|
84
|
+
this.configService.get('VIKINGDB_FIELD_TASK_TYPE', 'taskType'),
|
|
85
|
+
createdAtField: vikingKeys?.fields?.createdAt ??
|
|
86
|
+
this.configService.get('VIKINGDB_FIELD_CREATED_AT', 'createdAt'),
|
|
87
|
+
updatedAtField: vikingKeys?.fields?.updatedAt ??
|
|
88
|
+
this.configService.get('VIKINGDB_FIELD_UPDATED_AT', 'updatedAt'),
|
|
89
|
+
// scope/source 字段映射(Volcengine 建议使用扁平化字段做过滤)
|
|
90
|
+
tenantIdField: vikingKeys?.fields?.tenantId ??
|
|
91
|
+
this.configService.get('VIKINGDB_FIELD_TENANT_ID', 'tenantId'),
|
|
92
|
+
botIdField: vikingKeys?.fields?.botId ??
|
|
93
|
+
this.configService.get('VIKINGDB_FIELD_BOT_ID', 'botId'),
|
|
94
|
+
scopeTypeField: vikingKeys?.fields?.scopeType ??
|
|
95
|
+
this.configService.get('VIKINGDB_FIELD_SCOPE_TYPE', 'scopeType'),
|
|
96
|
+
scopeIdField: vikingKeys?.fields?.scopeId ??
|
|
97
|
+
this.configService.get('VIKINGDB_FIELD_SCOPE_ID', 'scopeId'),
|
|
98
|
+
sourceTypeField: vikingKeys?.fields?.sourceType ??
|
|
99
|
+
this.configService.get('VIKINGDB_FIELD_SOURCE_TYPE', 'sourceType'),
|
|
100
|
+
createdByIdField: vikingKeys?.fields?.createdById ??
|
|
101
|
+
this.configService.get('VIKINGDB_FIELD_CREATED_BY_ID', 'createdById'),
|
|
102
|
+
timeoutMs: vikingKeys?.timeoutMs ??
|
|
103
|
+
this.configService.get('VIKINGDB_TIMEOUT_MS', 20000),
|
|
104
|
+
vectorSearchPath: vikingKeys?.paths?.vectorSearch ??
|
|
105
|
+
this.configService.get('VIKINGDB_VECTOR_SEARCH_PATH', '/vector/search'),
|
|
106
|
+
bm25SearchPath: vikingKeys?.paths?.bm25Search ??
|
|
107
|
+
this.configService.get('VIKINGDB_BM25_SEARCH_PATH', '/bm25/search'),
|
|
108
|
+
upsertPath: vikingKeys?.paths?.upsert ??
|
|
109
|
+
this.configService.get('VIKINGDB_UPSERT_PATH', '/documents/upsert'),
|
|
110
|
+
deletePath: vikingKeys?.paths?.delete ??
|
|
111
|
+
this.configService.get('VIKINGDB_DELETE_PATH', '/documents/delete'),
|
|
112
|
+
healthPath: vikingKeys?.paths?.health ??
|
|
113
|
+
this.configService.get('VIKINGDB_HEALTH_PATH', '/health'),
|
|
114
|
+
headers: vikingKeys?.headers ??
|
|
115
|
+
this.parseHeaders(this.configService.get('VIKINGDB_HEADERS', '{}')),
|
|
116
|
+
};
|
|
117
|
+
if (this.config.provider === 'volcengine') {
|
|
118
|
+
if (this.config.ak && this.config.sk) {
|
|
119
|
+
this.volcService = new openapi_1.vikingdb.VikingdbService({
|
|
120
|
+
ak: this.config.ak,
|
|
121
|
+
sk: this.config.sk,
|
|
122
|
+
region: this.config.region,
|
|
123
|
+
sessionToken: this.config.sessionToken,
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
this.logger.info('[VikingDB] Volcengine client initialized', {
|
|
127
|
+
region: this.config.region,
|
|
128
|
+
indexName: this.config.indexName,
|
|
129
|
+
collectionName: this.config.collectionName,
|
|
130
|
+
collectionAlias: this.config.collectionAlias,
|
|
131
|
+
});
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
this.logger.info('[VikingDB] Client initialized', {
|
|
135
|
+
baseUrl: this.config.baseUrl,
|
|
136
|
+
database: this.config.database,
|
|
137
|
+
collection: this.config.collection,
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
// ============================================================================
|
|
141
|
+
// Public API
|
|
142
|
+
// ============================================================================
|
|
143
|
+
/**
|
|
144
|
+
* Volcengine: 获取 collection schema(字段列表)
|
|
145
|
+
*/
|
|
146
|
+
async getVolcengineCollectionSchema() {
|
|
147
|
+
if (this.config.provider !== 'volcengine') {
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
if (!this.volcService) {
|
|
151
|
+
throw new Error('Volcengine VikingDB client not configured (missing AK/SK)');
|
|
152
|
+
}
|
|
153
|
+
const collectionName = this.resolveCollectionName();
|
|
154
|
+
const resp = await this.volcService.collection.GetCollectionInfo({
|
|
155
|
+
CollectionName: collectionName,
|
|
156
|
+
});
|
|
157
|
+
const info = resp?.CollectionInfo;
|
|
158
|
+
const fields = Array.isArray(info?.Fields) ? info.Fields : [];
|
|
159
|
+
return {
|
|
160
|
+
collectionName,
|
|
161
|
+
fields: fields.map((f) => ({
|
|
162
|
+
FieldName: f.FieldName,
|
|
163
|
+
FieldType: f.FieldType,
|
|
164
|
+
...(f.Dim !== undefined ? { Dim: f.Dim } : {}),
|
|
165
|
+
})),
|
|
166
|
+
indexNames: Array.isArray(info?.IndexNames) ? info.IndexNames : [],
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Volcengine: 连通性自检(不写入数据)
|
|
171
|
+
*/
|
|
172
|
+
async volcengineSelfCheck() {
|
|
173
|
+
if (this.config.provider !== 'volcengine') {
|
|
174
|
+
return null;
|
|
175
|
+
}
|
|
176
|
+
const start = Date.now();
|
|
177
|
+
const warnings = [];
|
|
178
|
+
try {
|
|
179
|
+
if (!this.volcService) {
|
|
180
|
+
throw new Error('Volcengine VikingDB client not configured (missing AK/SK)');
|
|
181
|
+
}
|
|
182
|
+
const collectionName = this.resolveCollectionName();
|
|
183
|
+
const indexName = this.resolveIndexName();
|
|
184
|
+
// 1) CollectionInfo
|
|
185
|
+
const collectionResp = await this.volcService.collection.GetCollectionInfo({
|
|
186
|
+
CollectionName: collectionName,
|
|
187
|
+
});
|
|
188
|
+
const info = collectionResp?.CollectionInfo;
|
|
189
|
+
const indexNames = Array.isArray(info?.IndexNames) ? info.IndexNames : [];
|
|
190
|
+
if (indexNames.length > 0 && !indexNames.includes(indexName)) {
|
|
191
|
+
warnings.push(`IndexName not found in collection.IndexNames: ${indexName} (available: ${indexNames.join(', ')})`);
|
|
192
|
+
}
|
|
193
|
+
// 2) IndexInfo
|
|
194
|
+
await this.volcService.index.GetIndexInfo({
|
|
195
|
+
CollectionName: collectionName,
|
|
196
|
+
IndexName: indexName,
|
|
197
|
+
});
|
|
198
|
+
// 3) Sample search (empty filter, tiny limit)
|
|
199
|
+
// 用 SearchByText 做一次最轻量的 API 可用性验证
|
|
200
|
+
await this.volcService.search.SearchByText({
|
|
201
|
+
IndexName: indexName,
|
|
202
|
+
Limit: 1,
|
|
203
|
+
Text: 'ping',
|
|
204
|
+
OutputFields: [this.config.primaryKeyField ?? 'id'],
|
|
205
|
+
...this.resolveCollectionNameOrAlias(),
|
|
206
|
+
});
|
|
207
|
+
return {
|
|
208
|
+
ok: true,
|
|
209
|
+
provider: 'volcengine',
|
|
210
|
+
region: this.config.region,
|
|
211
|
+
collectionName,
|
|
212
|
+
indexName,
|
|
213
|
+
latencyMs: Date.now() - start,
|
|
214
|
+
checks: {
|
|
215
|
+
collectionInfo: true,
|
|
216
|
+
indexInfo: true,
|
|
217
|
+
sampleSearch: true,
|
|
218
|
+
},
|
|
219
|
+
warnings,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
catch (error) {
|
|
223
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
224
|
+
return {
|
|
225
|
+
ok: false,
|
|
226
|
+
provider: 'volcengine',
|
|
227
|
+
region: this.config.region,
|
|
228
|
+
collectionName: this.config.collectionName,
|
|
229
|
+
indexName: this.config.indexName,
|
|
230
|
+
latencyMs: Date.now() - start,
|
|
231
|
+
checks: {
|
|
232
|
+
collectionInfo: false,
|
|
233
|
+
indexInfo: false,
|
|
234
|
+
sampleSearch: false,
|
|
235
|
+
},
|
|
236
|
+
warnings,
|
|
237
|
+
error: msg,
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Volcengine: 校验远程 collection schema 是否匹配预期
|
|
243
|
+
* 返回缺失字段/类型不匹配的警告列表
|
|
244
|
+
*/
|
|
245
|
+
async validateVolcengineSchema() {
|
|
246
|
+
if (this.config.provider !== 'volcengine') {
|
|
247
|
+
return null;
|
|
248
|
+
}
|
|
249
|
+
const warnings = [];
|
|
250
|
+
const errors = [];
|
|
251
|
+
const schema = await this.getVolcengineCollectionSchema();
|
|
252
|
+
if (!schema) {
|
|
253
|
+
return null;
|
|
254
|
+
}
|
|
255
|
+
const remoteFieldMap = new Map(schema.fields.map((f) => [f.FieldName, f]));
|
|
256
|
+
// 检查期望字段是否存在、类型是否匹配
|
|
257
|
+
for (const expected of vikingdb_types_1.VOLCENGINE_MEMORY_COLLECTION_SCHEMA) {
|
|
258
|
+
const remote = remoteFieldMap.get(expected.FieldName);
|
|
259
|
+
if (!remote) {
|
|
260
|
+
warnings.push(`Missing expected field: ${expected.FieldName} (type: ${expected.FieldType})`);
|
|
261
|
+
continue;
|
|
262
|
+
}
|
|
263
|
+
// 类型兼容校验(允许一定灵活性)
|
|
264
|
+
if (remote.FieldType !== expected.FieldType) {
|
|
265
|
+
// 特殊情况:string/text 可视为兼容
|
|
266
|
+
const isTextStringCompat = (expected.FieldType === 'text' && remote.FieldType === 'string') ||
|
|
267
|
+
(expected.FieldType === 'string' && remote.FieldType === 'text');
|
|
268
|
+
if (!isTextStringCompat) {
|
|
269
|
+
warnings.push(`Field type mismatch: ${expected.FieldName} expected ${expected.FieldType}, got ${remote.FieldType}`);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
// 向量维度检查
|
|
273
|
+
if (expected.FieldType === 'vector' && expected.Dim) {
|
|
274
|
+
if (remote.Dim && remote.Dim !== expected.Dim) {
|
|
275
|
+
warnings.push(`Vector dimension mismatch: ${expected.FieldName} expected ${expected.Dim}, got ${remote.Dim}`);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
// 列出未知字段(不警告,只记录)
|
|
280
|
+
const expectedNames = new Set(vikingdb_types_1.VOLCENGINE_MEMORY_COLLECTION_SCHEMA.map((f) => f.FieldName));
|
|
281
|
+
const extraFields = schema.fields
|
|
282
|
+
.filter((f) => !expectedNames.has(f.FieldName))
|
|
283
|
+
.map((f) => f.FieldName);
|
|
284
|
+
if (extraFields.length > 0) {
|
|
285
|
+
this.logger.debug('[VikingDB] Extra fields in collection', {
|
|
286
|
+
extraFields,
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
return {
|
|
290
|
+
ok: errors.length === 0,
|
|
291
|
+
collectionName: schema.collectionName,
|
|
292
|
+
warnings,
|
|
293
|
+
errors,
|
|
294
|
+
remoteFields: schema.fields,
|
|
295
|
+
expectedFields: vikingdb_types_1.VOLCENGINE_MEMORY_COLLECTION_SCHEMA,
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* 健康检查
|
|
300
|
+
*/
|
|
301
|
+
async healthCheck() {
|
|
302
|
+
const startTime = Date.now();
|
|
303
|
+
try {
|
|
304
|
+
if (this.config.provider === 'volcengine') {
|
|
305
|
+
if (!this.volcService) {
|
|
306
|
+
throw new Error('Volcengine VikingDB client not configured (missing AK/SK)');
|
|
307
|
+
}
|
|
308
|
+
const collectionName = this.resolveCollectionName();
|
|
309
|
+
// 轻量级检查:拉取 CollectionInfo
|
|
310
|
+
await this.volcService.collection.GetCollectionInfo({
|
|
311
|
+
CollectionName: collectionName,
|
|
312
|
+
});
|
|
313
|
+
return {
|
|
314
|
+
healthy: true,
|
|
315
|
+
latency: Date.now() - startTime,
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
await this.request('GET', this.config.healthPath);
|
|
319
|
+
return {
|
|
320
|
+
healthy: true,
|
|
321
|
+
latency: Date.now() - startTime,
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
catch (error) {
|
|
325
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
326
|
+
this.logger.error('[VikingDB] Health check failed', {
|
|
327
|
+
error: errorMessage,
|
|
328
|
+
});
|
|
329
|
+
return {
|
|
330
|
+
healthy: false,
|
|
331
|
+
latency: Date.now() - startTime,
|
|
332
|
+
error: errorMessage,
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
/**
|
|
337
|
+
* 向量搜索
|
|
338
|
+
*/
|
|
339
|
+
async vectorSearch(vector, scope, options = {}) {
|
|
340
|
+
if (this.config.provider === 'volcengine') {
|
|
341
|
+
if (!this.volcService) {
|
|
342
|
+
throw new Error('Volcengine VikingDB client not configured (missing AK/SK)');
|
|
343
|
+
}
|
|
344
|
+
const indexName = this.resolveIndexName();
|
|
345
|
+
const limit = options.topK ?? 50;
|
|
346
|
+
const collectionNameOrAlias = this.resolveCollectionNameOrAlias();
|
|
347
|
+
const volcFilter = this.buildVolcFilter(scope, options.taskType);
|
|
348
|
+
const response = await this.volcService.search.SearchByVector({
|
|
349
|
+
IndexName: indexName,
|
|
350
|
+
Limit: limit,
|
|
351
|
+
DenseVectors: [vector],
|
|
352
|
+
Filter: volcFilter,
|
|
353
|
+
OutputFields: [
|
|
354
|
+
this.config.primaryKeyField ?? 'id',
|
|
355
|
+
this.config.textField ?? 'text',
|
|
356
|
+
this.config.summaryField ?? 'summary',
|
|
357
|
+
this.config.metadataField ?? 'metadata',
|
|
358
|
+
this.config.tagsField ?? 'tags',
|
|
359
|
+
this.config.taskTypeField ?? 'taskType',
|
|
360
|
+
this.config.createdAtField ?? 'createdAt',
|
|
361
|
+
this.config.updatedAtField ?? 'updatedAt',
|
|
362
|
+
],
|
|
363
|
+
...collectionNameOrAlias,
|
|
364
|
+
});
|
|
365
|
+
return this.normalizeVolcSearchResponse(response, scope);
|
|
366
|
+
}
|
|
367
|
+
const response = await this.request('POST', this.config.vectorSearchPath, {
|
|
368
|
+
vector,
|
|
369
|
+
topK: options.topK ?? 50,
|
|
370
|
+
scope,
|
|
371
|
+
taskType: options.taskType,
|
|
372
|
+
});
|
|
373
|
+
return this.normalizeCandidates(response);
|
|
374
|
+
}
|
|
375
|
+
/**
|
|
376
|
+
* BM25 搜索
|
|
377
|
+
*/
|
|
378
|
+
async bm25Search(query, scope, options = {}) {
|
|
379
|
+
if (this.config.provider === 'volcengine') {
|
|
380
|
+
if (!this.volcService) {
|
|
381
|
+
throw new Error('Volcengine VikingDB client not configured (missing AK/SK)');
|
|
382
|
+
}
|
|
383
|
+
const indexName = this.resolveIndexName();
|
|
384
|
+
const limit = options.topK ?? 50;
|
|
385
|
+
const collectionNameOrAlias = this.resolveCollectionNameOrAlias();
|
|
386
|
+
const volcFilter = this.buildVolcFilter(scope, options.taskType);
|
|
387
|
+
const response = await this.volcService.search.SearchByText({
|
|
388
|
+
IndexName: indexName,
|
|
389
|
+
Limit: limit,
|
|
390
|
+
Text: query,
|
|
391
|
+
Filter: volcFilter,
|
|
392
|
+
OutputFields: [
|
|
393
|
+
this.config.primaryKeyField ?? 'id',
|
|
394
|
+
this.config.textField ?? 'text',
|
|
395
|
+
this.config.summaryField ?? 'summary',
|
|
396
|
+
this.config.metadataField ?? 'metadata',
|
|
397
|
+
this.config.tagsField ?? 'tags',
|
|
398
|
+
this.config.taskTypeField ?? 'taskType',
|
|
399
|
+
this.config.createdAtField ?? 'createdAt',
|
|
400
|
+
this.config.updatedAtField ?? 'updatedAt',
|
|
401
|
+
],
|
|
402
|
+
...collectionNameOrAlias,
|
|
403
|
+
});
|
|
404
|
+
return this.normalizeVolcSearchResponse(response, scope);
|
|
405
|
+
}
|
|
406
|
+
const response = await this.request('POST', this.config.bm25SearchPath, {
|
|
407
|
+
query,
|
|
408
|
+
topK: options.topK ?? 50,
|
|
409
|
+
scope,
|
|
410
|
+
taskType: options.taskType,
|
|
411
|
+
});
|
|
412
|
+
return this.normalizeCandidates(response);
|
|
413
|
+
}
|
|
414
|
+
/**
|
|
415
|
+
* 混合搜索(向量 + BM25)
|
|
416
|
+
*/
|
|
417
|
+
async hybridSearch(vector, query, scope, options = {}) {
|
|
418
|
+
const candidateK = options.candidateK ?? 50;
|
|
419
|
+
// 并行执行向量搜索和 BM25 搜索
|
|
420
|
+
const [vectorResults, bm25Results] = await Promise.all([
|
|
421
|
+
this.vectorSearch(vector, scope, {
|
|
422
|
+
topK: candidateK,
|
|
423
|
+
taskType: options.taskType,
|
|
424
|
+
}),
|
|
425
|
+
this.bm25Search(query, scope, {
|
|
426
|
+
topK: candidateK,
|
|
427
|
+
taskType: options.taskType,
|
|
428
|
+
}),
|
|
429
|
+
]);
|
|
430
|
+
// 使用 Reciprocal Rank Fusion 合并结果
|
|
431
|
+
const fused = this.reciprocalRankFusion(vectorResults, bm25Results, options.vectorWeight ?? 0.7, options.bm25Weight ?? 0.3);
|
|
432
|
+
// 返回 topK 结果
|
|
433
|
+
return fused.slice(0, options.topK ?? 10);
|
|
434
|
+
}
|
|
435
|
+
/**
|
|
436
|
+
* Upsert 文档
|
|
437
|
+
*/
|
|
438
|
+
async upsert(documents) {
|
|
439
|
+
if (documents.length === 0) {
|
|
440
|
+
return;
|
|
441
|
+
}
|
|
442
|
+
if (this.config.provider === 'volcengine') {
|
|
443
|
+
if (!this.volcService) {
|
|
444
|
+
throw new Error('Volcengine VikingDB client not configured (missing AK/SK)');
|
|
445
|
+
}
|
|
446
|
+
const { primaryKeys, fields } = this.buildVolcUpsertFields(documents);
|
|
447
|
+
const collectionNameOrAlias = this.resolveCollectionNameOrAlias();
|
|
448
|
+
await this.volcService.data.UpsertData({
|
|
449
|
+
Fields: fields,
|
|
450
|
+
...collectionNameOrAlias,
|
|
451
|
+
});
|
|
452
|
+
this.logger.info('[VikingDB] Volcengine upserted', {
|
|
453
|
+
count: primaryKeys.length,
|
|
454
|
+
});
|
|
455
|
+
return;
|
|
456
|
+
}
|
|
457
|
+
await this.request('POST', this.config.upsertPath, {
|
|
458
|
+
documents: documents.map((doc) => ({
|
|
459
|
+
...doc,
|
|
460
|
+
timestamp: doc.timestamp ?? new Date().toISOString(),
|
|
461
|
+
createdAt: doc.createdAt ?? new Date().toISOString(),
|
|
462
|
+
updatedAt: new Date().toISOString(),
|
|
463
|
+
})),
|
|
464
|
+
});
|
|
465
|
+
this.logger.info('[VikingDB] Documents upserted', {
|
|
466
|
+
count: documents.length,
|
|
467
|
+
});
|
|
468
|
+
}
|
|
469
|
+
/**
|
|
470
|
+
* 更新文档(部分更新,仅更新指定字段)
|
|
471
|
+
*/
|
|
472
|
+
async update(documents) {
|
|
473
|
+
if (documents.length === 0) {
|
|
474
|
+
return;
|
|
475
|
+
}
|
|
476
|
+
if (this.config.provider === 'volcengine') {
|
|
477
|
+
if (!this.volcService) {
|
|
478
|
+
throw new Error('Volcengine VikingDB client not configured (missing AK/SK)');
|
|
479
|
+
}
|
|
480
|
+
const pkField = this.config.primaryKeyField ?? 'id';
|
|
481
|
+
const fields = documents.map((doc) => ({
|
|
482
|
+
[pkField]: doc.id,
|
|
483
|
+
...doc,
|
|
484
|
+
}));
|
|
485
|
+
const collectionNameOrAlias = this.resolveCollectionNameOrAlias();
|
|
486
|
+
await this.volcService.data.UpdateData({
|
|
487
|
+
Fields: fields,
|
|
488
|
+
...collectionNameOrAlias,
|
|
489
|
+
});
|
|
490
|
+
this.logger.info('[VikingDB] Volcengine updated', {
|
|
491
|
+
count: fields.length,
|
|
492
|
+
});
|
|
493
|
+
return;
|
|
494
|
+
}
|
|
495
|
+
// For custom-http provider, use a dedicated update endpoint if available
|
|
496
|
+
// Otherwise, we need to fetch existing documents and merge updates
|
|
497
|
+
throw new Error('Update operation is not supported for custom-http provider. Use upsert instead.');
|
|
498
|
+
}
|
|
499
|
+
/**
|
|
500
|
+
* 删除文档
|
|
501
|
+
*/
|
|
502
|
+
async delete(options) {
|
|
503
|
+
const ids = options.ids ?? (options.id ? [options.id] : []);
|
|
504
|
+
if (this.config.provider === 'volcengine') {
|
|
505
|
+
if (!this.volcService) {
|
|
506
|
+
throw new Error('Volcengine VikingDB client not configured (missing AK/SK)');
|
|
507
|
+
}
|
|
508
|
+
const collectionNameOrAlias = this.resolveCollectionNameOrAlias();
|
|
509
|
+
if (options.scope) {
|
|
510
|
+
// 火山云 SDK 仅支持按主键删除或全量删除;scope 删除需上层自行维护主键集合。
|
|
511
|
+
throw new Error('Volcengine VikingDB does not support delete by scope; delete by ids only');
|
|
512
|
+
}
|
|
513
|
+
if (ids.length === 0) {
|
|
514
|
+
return;
|
|
515
|
+
}
|
|
516
|
+
await this.volcService.data.DeleteData({
|
|
517
|
+
PrimaryKeys: ids,
|
|
518
|
+
...collectionNameOrAlias,
|
|
519
|
+
});
|
|
520
|
+
this.logger.info('[VikingDB] Volcengine deleted', {
|
|
521
|
+
ids: ids.length,
|
|
522
|
+
});
|
|
523
|
+
return;
|
|
524
|
+
}
|
|
525
|
+
if (ids.length === 0 && !options.scope) {
|
|
526
|
+
return;
|
|
527
|
+
}
|
|
528
|
+
await this.request('POST', this.config.deletePath, {
|
|
529
|
+
ids: ids.length > 0 ? ids : undefined,
|
|
530
|
+
scope: options.scope,
|
|
531
|
+
});
|
|
532
|
+
this.logger.info('[VikingDB] Documents deleted', {
|
|
533
|
+
ids: ids.length,
|
|
534
|
+
scope: options.scope,
|
|
535
|
+
});
|
|
536
|
+
}
|
|
537
|
+
/**
|
|
538
|
+
* 按作用域删除所有文档
|
|
539
|
+
*/
|
|
540
|
+
async deleteByScope(scope) {
|
|
541
|
+
await this.delete({ scope });
|
|
542
|
+
}
|
|
543
|
+
// ============================================================================
|
|
544
|
+
// Volcengine helpers
|
|
545
|
+
// ============================================================================
|
|
546
|
+
resolveIndexName() {
|
|
547
|
+
if (!this.config.indexName) {
|
|
548
|
+
throw new Error('VIKINGDB_VOLCENGINE_INDEX_NAME is required when VIKINGDB_PROVIDER=volcengine');
|
|
549
|
+
}
|
|
550
|
+
return this.config.indexName;
|
|
551
|
+
}
|
|
552
|
+
resolveCollectionName() {
|
|
553
|
+
if (this.config.collectionName) {
|
|
554
|
+
return this.config.collectionName;
|
|
555
|
+
}
|
|
556
|
+
if (this.config.collectionAlias) {
|
|
557
|
+
// healthCheck 需要 CollectionName;这里给出更明确的错误
|
|
558
|
+
throw new Error('VIKINGDB_VOLCENGINE_COLLECTION_NAME is required (collectionAlias is not enough for this operation)');
|
|
559
|
+
}
|
|
560
|
+
throw new Error('VIKINGDB_VOLCENGINE_COLLECTION_NAME or VIKINGDB_VOLCENGINE_COLLECTION_ALIAS is required');
|
|
561
|
+
}
|
|
562
|
+
resolveCollectionNameOrAlias() {
|
|
563
|
+
if (this.config.collectionName) {
|
|
564
|
+
return { CollectionName: this.config.collectionName };
|
|
565
|
+
}
|
|
566
|
+
if (this.config.collectionAlias) {
|
|
567
|
+
return { CollectionAlias: this.config.collectionAlias };
|
|
568
|
+
}
|
|
569
|
+
throw new Error('VIKINGDB_VOLCENGINE_COLLECTION_NAME or VIKINGDB_VOLCENGINE_COLLECTION_ALIAS is required when VIKINGDB_PROVIDER=volcengine');
|
|
570
|
+
}
|
|
571
|
+
buildVolcFilter(scope, taskType) {
|
|
572
|
+
const conditions = [];
|
|
573
|
+
// Volcengine SDK 的 filter 依赖 scalar / list scalar 字段。
|
|
574
|
+
// 因此这里优先使用扁平化字段(tenantId/botId/scopeType/scopeId)。
|
|
575
|
+
const pushEq = (fieldName, value) => {
|
|
576
|
+
if (!fieldName)
|
|
577
|
+
return;
|
|
578
|
+
if (value === undefined || value === null || String(value).length === 0)
|
|
579
|
+
return;
|
|
580
|
+
conditions.push({
|
|
581
|
+
Operation: 'must',
|
|
582
|
+
FieldName: fieldName,
|
|
583
|
+
Conditions: [String(value)],
|
|
584
|
+
});
|
|
585
|
+
};
|
|
586
|
+
pushEq(this.config.tenantIdField, scope.tenantId);
|
|
587
|
+
pushEq(this.config.botIdField, scope.botId);
|
|
588
|
+
pushEq(this.config.scopeTypeField, scope.scopeType);
|
|
589
|
+
pushEq(this.config.scopeIdField, scope.scopeId);
|
|
590
|
+
// 兜底:如果没有配置扁平化字段,则继续用 metadata.scope.xxx(需要 collection schema 支持)
|
|
591
|
+
if (conditions.length === 0) {
|
|
592
|
+
for (const [k, v] of Object.entries(scope ?? {})) {
|
|
593
|
+
if (v === undefined || v === null || String(v).length === 0)
|
|
594
|
+
continue;
|
|
595
|
+
conditions.push({
|
|
596
|
+
Operation: 'must',
|
|
597
|
+
FieldName: `${this.config.metadataField ?? 'metadata'}.scope.${k}`,
|
|
598
|
+
Conditions: [String(v)],
|
|
599
|
+
});
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
if (taskType) {
|
|
603
|
+
conditions.push({
|
|
604
|
+
Operation: 'must',
|
|
605
|
+
FieldName: this.config.taskTypeField ?? 'taskType',
|
|
606
|
+
Conditions: [String(taskType)],
|
|
607
|
+
});
|
|
608
|
+
}
|
|
609
|
+
if (conditions.length === 0)
|
|
610
|
+
return undefined;
|
|
611
|
+
if (conditions.length === 1)
|
|
612
|
+
return conditions[0];
|
|
613
|
+
return {
|
|
614
|
+
Operation: 'and',
|
|
615
|
+
Conditions: conditions,
|
|
616
|
+
};
|
|
617
|
+
}
|
|
618
|
+
normalizeVolcSearchResponse(response, fallbackScope) {
|
|
619
|
+
const data = response?.Data ?? [];
|
|
620
|
+
const rows = data[0] ?? [];
|
|
621
|
+
return rows.map((row) => {
|
|
622
|
+
const fields = row?.Fields ?? {};
|
|
623
|
+
const id = String(fields[this.config.primaryKeyField ?? 'id'] ??
|
|
624
|
+
row[this.config.primaryKeyField ?? 'id'] ??
|
|
625
|
+
row?.Id ??
|
|
626
|
+
row?.id ??
|
|
627
|
+
'');
|
|
628
|
+
const text = String(fields[this.config.textField ?? 'text'] ?? '');
|
|
629
|
+
const summary = fields[this.config.summaryField ?? 'summary'];
|
|
630
|
+
const metadataRaw = fields[this.config.metadataField ?? 'metadata'];
|
|
631
|
+
// metadata 存储为 JSON 字符串,需要解析
|
|
632
|
+
const metadata = this.parseMetadata(metadataRaw);
|
|
633
|
+
const tags = fields[this.config.tagsField ?? 'tags'] ?? [];
|
|
634
|
+
const taskType = fields[this.config.taskTypeField ?? 'taskType'];
|
|
635
|
+
const createdAt = fields[this.config.createdAtField ?? 'createdAt'];
|
|
636
|
+
const updatedAt = fields[this.config.updatedAtField ?? 'updatedAt'];
|
|
637
|
+
const scope = metadata?.scope ?? fallbackScope ?? {};
|
|
638
|
+
const scoreFinal = typeof row.Score === 'number' ? row.Score : 0;
|
|
639
|
+
return {
|
|
640
|
+
id: id || 'unknown',
|
|
641
|
+
text,
|
|
642
|
+
summary: summary ? String(summary) : undefined,
|
|
643
|
+
scope,
|
|
644
|
+
metadata: metadata ?? {},
|
|
645
|
+
tags: Array.isArray(tags) ? tags.map(String) : [],
|
|
646
|
+
taskType: taskType ? String(taskType) : undefined,
|
|
647
|
+
role: undefined,
|
|
648
|
+
pinned: false,
|
|
649
|
+
createdAt: createdAt ? String(createdAt) : undefined,
|
|
650
|
+
updatedAt: updatedAt ? String(updatedAt) : undefined,
|
|
651
|
+
scoreVector: scoreFinal,
|
|
652
|
+
scoreBm25: scoreFinal,
|
|
653
|
+
scoreHybrid: scoreFinal,
|
|
654
|
+
scoreRerank: undefined,
|
|
655
|
+
scoreFinal,
|
|
656
|
+
};
|
|
657
|
+
});
|
|
658
|
+
}
|
|
659
|
+
parseMetadata(raw) {
|
|
660
|
+
if (!raw)
|
|
661
|
+
return {};
|
|
662
|
+
if (typeof raw === 'object')
|
|
663
|
+
return raw;
|
|
664
|
+
if (typeof raw === 'string') {
|
|
665
|
+
try {
|
|
666
|
+
return JSON.parse(raw);
|
|
667
|
+
}
|
|
668
|
+
catch {
|
|
669
|
+
return {};
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
return {};
|
|
673
|
+
}
|
|
674
|
+
buildVolcUpsertFields(documents) {
|
|
675
|
+
const pkField = this.config.primaryKeyField ?? 'id';
|
|
676
|
+
const primaryKeys = [];
|
|
677
|
+
const fields = documents.map((doc) => {
|
|
678
|
+
const id = String(doc.id ?? '');
|
|
679
|
+
if (!id) {
|
|
680
|
+
throw new Error(`MemoryDocument.id is required for volcengine upsert (field: ${pkField})`);
|
|
681
|
+
}
|
|
682
|
+
primaryKeys.push(id);
|
|
683
|
+
const now = new Date().toISOString();
|
|
684
|
+
// 过滤掉 null/undefined 值的 metadata
|
|
685
|
+
const metadata = this.buildMetadata(doc.metadata, doc.scope);
|
|
686
|
+
const scope = doc.scope ?? {};
|
|
687
|
+
// 提取 bots 字段(如果在 metadata 中存在)
|
|
688
|
+
const bots = doc.metadata?.bots ?? [];
|
|
689
|
+
return {
|
|
690
|
+
[pkField]: id,
|
|
691
|
+
[this.config.textField ?? 'text']: doc.text,
|
|
692
|
+
...(doc.summary !== undefined
|
|
693
|
+
? { [this.config.summaryField ?? 'summary']: doc.summary }
|
|
694
|
+
: {}),
|
|
695
|
+
[this.config.vectorField ?? 'embedding']: doc.embedding ?? [],
|
|
696
|
+
[this.config.tagsField ?? 'tags']: doc.tags ?? [],
|
|
697
|
+
...(doc.taskType !== undefined
|
|
698
|
+
? { [this.config.taskTypeField ?? 'taskType']: doc.taskType }
|
|
699
|
+
: {}),
|
|
700
|
+
// 扁平化 scope/source 字段(用于 Volcengine filter)
|
|
701
|
+
...(this.config.tenantIdField
|
|
702
|
+
? { [this.config.tenantIdField]: scope.tenantId }
|
|
703
|
+
: {}),
|
|
704
|
+
...(this.config.botIdField
|
|
705
|
+
? { [this.config.botIdField]: scope.botId }
|
|
706
|
+
: {}),
|
|
707
|
+
...(this.config.scopeTypeField
|
|
708
|
+
? { [this.config.scopeTypeField]: scope.scopeType }
|
|
709
|
+
: {}),
|
|
710
|
+
...(this.config.scopeIdField
|
|
711
|
+
? { [this.config.scopeIdField]: scope.scopeId }
|
|
712
|
+
: {}),
|
|
713
|
+
...(this.config.sourceTypeField
|
|
714
|
+
? { [this.config.sourceTypeField]: doc.metadata?.sourceType }
|
|
715
|
+
: {}),
|
|
716
|
+
...(this.config.createdByIdField
|
|
717
|
+
? {
|
|
718
|
+
[this.config.createdByIdField]: doc.metadata
|
|
719
|
+
?.createdById,
|
|
720
|
+
}
|
|
721
|
+
: {}),
|
|
722
|
+
// bots 字段(知识库元数据使用)
|
|
723
|
+
...(bots.length > 0 ? { bots } : {}),
|
|
724
|
+
[this.config.createdAtField ?? 'createdAt']: doc.createdAt ?? now,
|
|
725
|
+
[this.config.updatedAtField ?? 'updatedAt']: doc.updatedAt ?? now,
|
|
726
|
+
// metadata 字段为 string 类型,需要 JSON 序列化
|
|
727
|
+
[this.config.metadataField ?? 'metadata']: JSON.stringify(metadata),
|
|
728
|
+
};
|
|
729
|
+
});
|
|
730
|
+
return { primaryKeys, fields };
|
|
731
|
+
}
|
|
732
|
+
buildMetadata(metadata, scope) {
|
|
733
|
+
const result = {};
|
|
734
|
+
// 添加 scope
|
|
735
|
+
if (scope) {
|
|
736
|
+
result.scope = scope;
|
|
737
|
+
}
|
|
738
|
+
// 过滤掉 null/undefined 值
|
|
739
|
+
if (metadata) {
|
|
740
|
+
for (const [key, value] of Object.entries(metadata)) {
|
|
741
|
+
if (value !== null && value !== undefined) {
|
|
742
|
+
result[key] = value;
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
return result;
|
|
747
|
+
}
|
|
748
|
+
async request(method, path, body) {
|
|
749
|
+
if (this.config.provider === 'volcengine') {
|
|
750
|
+
throw new Error('request() is not supported for volcengine provider; use SDK methods');
|
|
751
|
+
}
|
|
752
|
+
const url = this.buildUrl(path);
|
|
753
|
+
const headers = this.buildHeaders();
|
|
754
|
+
this.logger.debug('[VikingDB] Making request', {
|
|
755
|
+
method,
|
|
756
|
+
url,
|
|
757
|
+
hasBody: !!body,
|
|
758
|
+
});
|
|
759
|
+
try {
|
|
760
|
+
const observable$ = method === 'GET'
|
|
761
|
+
? this.httpService.get(url, { headers })
|
|
762
|
+
: this.httpService.post(url, body, { headers });
|
|
763
|
+
const response = await (0, rxjs_1.firstValueFrom)(observable$.pipe((0, rxjs_1.timeout)(this.config.timeoutMs), (0, rxjs_1.retry)({
|
|
764
|
+
count: 2,
|
|
765
|
+
delay: 1000,
|
|
766
|
+
})));
|
|
767
|
+
return response.data;
|
|
768
|
+
}
|
|
769
|
+
catch (error) {
|
|
770
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
771
|
+
this.logger.error('[VikingDB] Request failed', {
|
|
772
|
+
method,
|
|
773
|
+
url,
|
|
774
|
+
error: errorMessage,
|
|
775
|
+
});
|
|
776
|
+
throw new Error(`VikingDB request failed: ${errorMessage}`);
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
buildUrl(path) {
|
|
780
|
+
const baseUrl = this.config.baseUrl.endsWith('/')
|
|
781
|
+
? this.config.baseUrl.slice(0, -1)
|
|
782
|
+
: this.config.baseUrl;
|
|
783
|
+
const normalizedPath = path.startsWith('/') ? path : `/${path}`;
|
|
784
|
+
return `${baseUrl}${normalizedPath}`;
|
|
785
|
+
}
|
|
786
|
+
buildHeaders() {
|
|
787
|
+
const headers = {
|
|
788
|
+
'Content-Type': 'application/json',
|
|
789
|
+
...this.config.headers,
|
|
790
|
+
};
|
|
791
|
+
if (this.config.authToken) {
|
|
792
|
+
headers['Authorization'] = `Bearer ${this.config.authToken}`;
|
|
793
|
+
}
|
|
794
|
+
return headers;
|
|
795
|
+
}
|
|
796
|
+
parseHeaders(headersJson) {
|
|
797
|
+
try {
|
|
798
|
+
const parsed = JSON.parse(headersJson);
|
|
799
|
+
if (typeof parsed === 'object' && parsed !== null) {
|
|
800
|
+
return Object.fromEntries(Object.entries(parsed)
|
|
801
|
+
.filter(([, v]) => typeof v === 'string')
|
|
802
|
+
.map(([k, v]) => [k, String(v)]));
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
catch {
|
|
806
|
+
// Ignore parse errors
|
|
807
|
+
}
|
|
808
|
+
return {};
|
|
809
|
+
}
|
|
810
|
+
normalizeCandidates(response) {
|
|
811
|
+
if (!response) {
|
|
812
|
+
return [];
|
|
813
|
+
}
|
|
814
|
+
const candidates = response.candidates ?? response.results ?? response.data;
|
|
815
|
+
if (!Array.isArray(candidates)) {
|
|
816
|
+
return [];
|
|
817
|
+
}
|
|
818
|
+
return candidates.map((item, index) => ({
|
|
819
|
+
id: String(item.id ?? `unknown-${index}`),
|
|
820
|
+
text: String(item.text ?? ''),
|
|
821
|
+
summary: item.summary ? String(item.summary) : undefined,
|
|
822
|
+
scope: item.scope ?? {},
|
|
823
|
+
metadata: item.metadata ?? {},
|
|
824
|
+
tags: Array.isArray(item.tags) ? item.tags : [],
|
|
825
|
+
taskType: item.taskType ? String(item.taskType) : undefined,
|
|
826
|
+
role: item.role ? String(item.role) : undefined,
|
|
827
|
+
pinned: Boolean(item.pinned),
|
|
828
|
+
createdAt: item.createdAt ? String(item.createdAt) : undefined,
|
|
829
|
+
updatedAt: item.updatedAt ? String(item.updatedAt) : undefined,
|
|
830
|
+
scoreVector: item.scoreVector ?? item.score_vector ?? 0,
|
|
831
|
+
scoreBm25: item.scoreBm25 ?? item.score_bm25 ?? 0,
|
|
832
|
+
scoreHybrid: item.scoreHybrid ?? item.score_hybrid ?? 0,
|
|
833
|
+
scoreRerank: item.scoreRerank
|
|
834
|
+
? item.scoreRerank
|
|
835
|
+
: item.score_rerank
|
|
836
|
+
? item.score_rerank
|
|
837
|
+
: undefined,
|
|
838
|
+
scoreFinal: item.scoreFinal ??
|
|
839
|
+
item.score_final ??
|
|
840
|
+
item.score ??
|
|
841
|
+
0,
|
|
842
|
+
embedding: Array.isArray(item.embedding)
|
|
843
|
+
? item.embedding
|
|
844
|
+
: undefined,
|
|
845
|
+
}));
|
|
846
|
+
}
|
|
847
|
+
/**
|
|
848
|
+
* Reciprocal Rank Fusion 算法
|
|
849
|
+
* 合并向量搜索和 BM25 搜索的结果
|
|
850
|
+
*/
|
|
851
|
+
reciprocalRankFusion(vectorResults, bm25Results, vectorWeight, bm25Weight, k = 60) {
|
|
852
|
+
const scoreMap = new Map();
|
|
853
|
+
// 计算向量搜索的 RRF 分数
|
|
854
|
+
vectorResults.forEach((candidate, index) => {
|
|
855
|
+
const rrfScore = vectorWeight / (k + index + 1);
|
|
856
|
+
const existing = scoreMap.get(candidate.id);
|
|
857
|
+
if (existing) {
|
|
858
|
+
existing.score += rrfScore;
|
|
859
|
+
existing.candidate.scoreHybrid = existing.score;
|
|
860
|
+
}
|
|
861
|
+
else {
|
|
862
|
+
scoreMap.set(candidate.id, {
|
|
863
|
+
candidate: { ...candidate, scoreHybrid: rrfScore },
|
|
864
|
+
score: rrfScore,
|
|
865
|
+
});
|
|
866
|
+
}
|
|
867
|
+
});
|
|
868
|
+
// 计算 BM25 搜索的 RRF 分数
|
|
869
|
+
bm25Results.forEach((candidate, index) => {
|
|
870
|
+
const rrfScore = bm25Weight / (k + index + 1);
|
|
871
|
+
const existing = scoreMap.get(candidate.id);
|
|
872
|
+
if (existing) {
|
|
873
|
+
existing.score += rrfScore;
|
|
874
|
+
existing.candidate.scoreHybrid = existing.score;
|
|
875
|
+
}
|
|
876
|
+
else {
|
|
877
|
+
scoreMap.set(candidate.id, {
|
|
878
|
+
candidate: { ...candidate, scoreHybrid: rrfScore },
|
|
879
|
+
score: rrfScore,
|
|
880
|
+
});
|
|
881
|
+
}
|
|
882
|
+
});
|
|
883
|
+
// 按分数排序
|
|
884
|
+
return Array.from(scoreMap.values())
|
|
885
|
+
.sort((a, b) => b.score - a.score)
|
|
886
|
+
.map((item) => ({
|
|
887
|
+
...item.candidate,
|
|
888
|
+
scoreFinal: item.candidate.scoreRerank ?? item.candidate.scoreHybrid,
|
|
889
|
+
}));
|
|
890
|
+
}
|
|
891
|
+
};
|
|
892
|
+
exports.VikingDbClientService = VikingDbClientService;
|
|
893
|
+
exports.VikingDbClientService = VikingDbClientService = __decorate([
|
|
894
|
+
(0, common_1.Injectable)(),
|
|
895
|
+
__param(0, (0, common_1.Inject)(nest_winston_1.WINSTON_MODULE_PROVIDER)),
|
|
896
|
+
__metadata("design:paramtypes", [winston_1.Logger,
|
|
897
|
+
config_1.ConfigService,
|
|
898
|
+
axios_1.HttpService])
|
|
899
|
+
], VikingDbClientService);
|
|
900
|
+
//# sourceMappingURL=vikingdb-client.service.js.map
|