@chiway/contextweaver 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/SearchService-MYPOCM3B.js +886 -0
- package/dist/chunk-6QMYML5V.js +1760 -0
- package/dist/{chunk-B6OWNBOD.js → chunk-6Z4JEEVJ.js} +32 -22
- package/dist/{chunk-EZG4H4MN.js → chunk-7G5V7YT5.js} +4 -4
- package/dist/{chunk-2CY5SYBI.js → chunk-NQR4CGQ6.js} +6 -4
- package/dist/{codebaseRetrieval-IC44RHCL.js → codebaseRetrieval-NLAMGOA2.js} +3 -3
- package/dist/index.js +5 -5
- package/dist/scanner-RFG4YWYI.js +11 -0
- package/dist/{server-PPQUHCUB.js → server-27HI7WZO.js} +3 -3
- package/package.json +5 -3
- package/dist/SearchService-533KL2HP.js +0 -1654
- package/dist/chunk-HR5KUQSM.js +0 -906
- package/dist/scanner-SZ2BDYDS.js +0 -11
|
@@ -0,0 +1,1760 @@
|
|
|
1
|
+
import {
|
|
2
|
+
batchDeleteFileChunksFts,
|
|
3
|
+
batchUpdateVectorIndexHash,
|
|
4
|
+
batchUpsertChunkFts,
|
|
5
|
+
clearVectorIndexHash,
|
|
6
|
+
initDb,
|
|
7
|
+
isChunksFtsInitialized
|
|
8
|
+
} from "./chunk-6Z4JEEVJ.js";
|
|
9
|
+
import {
|
|
10
|
+
logger
|
|
11
|
+
} from "./chunk-AMQQK4P7.js";
|
|
12
|
+
import {
|
|
13
|
+
getEmbeddingConfig
|
|
14
|
+
} from "./chunk-RJURH22T.js";
|
|
15
|
+
|
|
16
|
+
// src/vectorStore/index.ts
|
|
17
|
+
import fs from "fs";
|
|
18
|
+
import os from "os";
|
|
19
|
+
import path from "path";
|
|
20
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
21
|
+
var BASE_DIR = path.join(os.homedir(), ".contextweaver");
|
|
22
|
+
var VectorStore = class {
|
|
23
|
+
db = null;
|
|
24
|
+
table = null;
|
|
25
|
+
projectId;
|
|
26
|
+
dbPath;
|
|
27
|
+
vectorDim;
|
|
28
|
+
constructor(projectId, vectorDim = 1024) {
|
|
29
|
+
this.projectId = projectId;
|
|
30
|
+
this.dbPath = path.join(BASE_DIR, projectId, "vectors.lance");
|
|
31
|
+
this.vectorDim = vectorDim;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* 初始化连接
|
|
35
|
+
*/
|
|
36
|
+
async init() {
|
|
37
|
+
if (this.db) return;
|
|
38
|
+
const projectDir = path.join(BASE_DIR, this.projectId);
|
|
39
|
+
if (!fs.existsSync(projectDir)) {
|
|
40
|
+
fs.mkdirSync(projectDir, { recursive: true });
|
|
41
|
+
}
|
|
42
|
+
this.db = await lancedb.connect(this.dbPath);
|
|
43
|
+
const tableNames = await this.db.tableNames();
|
|
44
|
+
if (tableNames.includes("chunks")) {
|
|
45
|
+
this.table = await this.db.openTable("chunks");
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* 确保表存在(首次插入时调用)
|
|
50
|
+
*/
|
|
51
|
+
async ensureTable(records) {
|
|
52
|
+
if (this.table) return;
|
|
53
|
+
if (!this.db) throw new Error("VectorStore not initialized");
|
|
54
|
+
if (records.length === 0) return;
|
|
55
|
+
this.table = await this.db.createTable(
|
|
56
|
+
"chunks",
|
|
57
|
+
records
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* 单调版本更新:先插入新版本,再删除旧版本
|
|
62
|
+
*
|
|
63
|
+
* 这保证了:
|
|
64
|
+
* - 最坏情况(崩溃)是新旧版本共存(不缺失)
|
|
65
|
+
* - 正常情况下旧版本被清理
|
|
66
|
+
*/
|
|
67
|
+
async upsertFile(filePath, newHash, records) {
|
|
68
|
+
if (!this.db) throw new Error("VectorStore not initialized");
|
|
69
|
+
if (records.length === 0) {
|
|
70
|
+
await this.deleteFile(filePath);
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
if (!this.table) {
|
|
74
|
+
await this.ensureTable(records);
|
|
75
|
+
} else {
|
|
76
|
+
await this.table.add(records);
|
|
77
|
+
}
|
|
78
|
+
if (this.table) {
|
|
79
|
+
await this.table.delete(
|
|
80
|
+
`file_path = '${this.escapeString(filePath)}' AND file_hash != '${this.escapeString(newHash)}'`
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* 批量 upsert 多个文件(性能优化版,带分批机制)
|
|
86
|
+
*
|
|
87
|
+
* 流程:
|
|
88
|
+
* 1. 将文件分成小批次(每批最多 BATCH_FILES 个文件或 BATCH_RECORDS 条记录)
|
|
89
|
+
* 2. 每批执行:插入新 records → 删除旧版本
|
|
90
|
+
*
|
|
91
|
+
* 分批是必要的,因为 LanceDB native 模块在处理超大数据时可能崩溃
|
|
92
|
+
*
|
|
93
|
+
* @param files 文件列表,每个包含 path、hash 和 records
|
|
94
|
+
*/
|
|
95
|
+
async batchUpsertFiles(files) {
|
|
96
|
+
if (!this.db) throw new Error("VectorStore not initialized");
|
|
97
|
+
if (files.length === 0) return;
|
|
98
|
+
const BATCH_FILES = 50;
|
|
99
|
+
const BATCH_RECORDS = 5e3;
|
|
100
|
+
const batches = [];
|
|
101
|
+
let currentBatch = [];
|
|
102
|
+
let currentRecordCount = 0;
|
|
103
|
+
for (const file of files) {
|
|
104
|
+
if (currentBatch.length >= BATCH_FILES || currentRecordCount + file.records.length > BATCH_RECORDS) {
|
|
105
|
+
if (currentBatch.length > 0) {
|
|
106
|
+
batches.push(currentBatch);
|
|
107
|
+
}
|
|
108
|
+
currentBatch = [];
|
|
109
|
+
currentRecordCount = 0;
|
|
110
|
+
}
|
|
111
|
+
currentBatch.push(file);
|
|
112
|
+
currentRecordCount += file.records.length;
|
|
113
|
+
}
|
|
114
|
+
if (currentBatch.length > 0) {
|
|
115
|
+
batches.push(currentBatch);
|
|
116
|
+
}
|
|
117
|
+
for (const batch of batches) {
|
|
118
|
+
const batchRecords = [];
|
|
119
|
+
for (const file of batch) {
|
|
120
|
+
batchRecords.push(...file.records);
|
|
121
|
+
}
|
|
122
|
+
if (batchRecords.length === 0) {
|
|
123
|
+
const pathsToDelete = batch.map((f) => f.path);
|
|
124
|
+
await this.deleteFiles(pathsToDelete);
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
if (!this.table) {
|
|
128
|
+
await this.ensureTable(batchRecords);
|
|
129
|
+
} else {
|
|
130
|
+
await this.table.add(batchRecords);
|
|
131
|
+
}
|
|
132
|
+
if (this.table && batch.length > 0) {
|
|
133
|
+
const deleteConditions = batch.map(
|
|
134
|
+
(f) => `(file_path = '${this.escapeString(f.path)}' AND file_hash != '${this.escapeString(f.hash)}')`
|
|
135
|
+
).join(" OR ");
|
|
136
|
+
await this.table.delete(deleteConditions);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* 删除文件的所有 chunks
|
|
142
|
+
*/
|
|
143
|
+
async deleteFile(filePath) {
|
|
144
|
+
if (!this.table) return;
|
|
145
|
+
await this.table.delete(`file_path = '${this.escapeString(filePath)}'`);
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* 批量删除文件(性能优化:单次 DELETE 替代 N 次循环)
|
|
149
|
+
* 当文件数超过 500 时分批处理,防止 LanceDB filter 字符串过长
|
|
150
|
+
*/
|
|
151
|
+
async deleteFiles(filePaths) {
|
|
152
|
+
if (!this.table || filePaths.length === 0) return;
|
|
153
|
+
const BATCH_SIZE = 500;
|
|
154
|
+
if (filePaths.length <= BATCH_SIZE) {
|
|
155
|
+
const conditions = filePaths.map((p) => `file_path = '${this.escapeString(p)}'`).join(" OR ");
|
|
156
|
+
await this.table.delete(conditions);
|
|
157
|
+
} else {
|
|
158
|
+
for (let i = 0; i < filePaths.length; i += BATCH_SIZE) {
|
|
159
|
+
const batch = filePaths.slice(i, i + BATCH_SIZE);
|
|
160
|
+
const conditions = batch.map((p) => `file_path = '${this.escapeString(p)}'`).join(" OR ");
|
|
161
|
+
await this.table.delete(conditions);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* 向量搜索
|
|
167
|
+
*/
|
|
168
|
+
async search(queryVector, limit = 10, filter) {
|
|
169
|
+
if (!this.table) return [];
|
|
170
|
+
let query = this.table.vectorSearch(queryVector).limit(limit);
|
|
171
|
+
if (filter) {
|
|
172
|
+
query = query.where(filter);
|
|
173
|
+
}
|
|
174
|
+
const results = await query.toArray();
|
|
175
|
+
return results;
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* 获取文件的所有 chunks(按 chunk_index 排序)
|
|
179
|
+
*/
|
|
180
|
+
async getFileChunks(filePath) {
|
|
181
|
+
if (!this.table) return [];
|
|
182
|
+
const results = await this.table.query().where(`file_path = '${this.escapeString(filePath)}'`).toArray();
|
|
183
|
+
const chunks = results;
|
|
184
|
+
return chunks.sort((a, b) => a.chunk_index - b.chunk_index);
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* 批量获取多个文件的 chunks(性能优化:单次查询替代 N 次循环)
|
|
188
|
+
* 当文件数超过 500 时分批处理,防止 LanceDB filter 字符串过长
|
|
189
|
+
*
|
|
190
|
+
* 适用于 GraphExpander 扩展、词法召回等需要批量获取的场景
|
|
191
|
+
* @returns Map<filePath, ChunkRecord[]>,每个文件的 chunks 已按 chunk_index 排序
|
|
192
|
+
*/
|
|
193
|
+
async getFilesChunks(filePaths) {
|
|
194
|
+
const result = /* @__PURE__ */ new Map();
|
|
195
|
+
if (!this.table || filePaths.length === 0) return result;
|
|
196
|
+
const BATCH_SIZE = 500;
|
|
197
|
+
for (let i = 0; i < filePaths.length; i += BATCH_SIZE) {
|
|
198
|
+
const batch = filePaths.slice(i, i + BATCH_SIZE);
|
|
199
|
+
const conditions = batch.map((p) => `file_path = '${this.escapeString(p)}'`).join(" OR ");
|
|
200
|
+
const rows = await this.table.query().where(conditions).toArray();
|
|
201
|
+
for (const row of rows) {
|
|
202
|
+
let arr = result.get(row.file_path);
|
|
203
|
+
if (!arr) {
|
|
204
|
+
arr = [];
|
|
205
|
+
result.set(row.file_path, arr);
|
|
206
|
+
}
|
|
207
|
+
arr.push(row);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
for (const arr of result.values()) {
|
|
211
|
+
arr.sort((a, b) => a.chunk_index - b.chunk_index);
|
|
212
|
+
}
|
|
213
|
+
return result;
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* 获取表的总记录数
|
|
217
|
+
*/
|
|
218
|
+
async count() {
|
|
219
|
+
if (!this.table) return 0;
|
|
220
|
+
return await this.table.countRows();
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* 清空所有数据
|
|
224
|
+
*/
|
|
225
|
+
async clear() {
|
|
226
|
+
if (!this.db) return;
|
|
227
|
+
try {
|
|
228
|
+
await this.db.dropTable("chunks");
|
|
229
|
+
this.table = null;
|
|
230
|
+
} catch {
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* 获取向量维度
|
|
235
|
+
*/
|
|
236
|
+
getVectorDim() {
|
|
237
|
+
return this.vectorDim;
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* 转义字符串(防止 SQL 注入)
|
|
241
|
+
*/
|
|
242
|
+
escapeString(str) {
|
|
243
|
+
return str.replace(/'/g, "''");
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* 关闭连接
|
|
247
|
+
*/
|
|
248
|
+
async close() {
|
|
249
|
+
this.db = null;
|
|
250
|
+
this.table = null;
|
|
251
|
+
}
|
|
252
|
+
};
|
|
253
|
+
var vectorStores = /* @__PURE__ */ new Map();
|
|
254
|
+
async function getVectorStore(projectId, vectorDim = 1024) {
|
|
255
|
+
let store = vectorStores.get(projectId);
|
|
256
|
+
if (!store) {
|
|
257
|
+
store = new VectorStore(projectId, vectorDim);
|
|
258
|
+
await store.init();
|
|
259
|
+
vectorStores.set(projectId, store);
|
|
260
|
+
}
|
|
261
|
+
return store;
|
|
262
|
+
}
|
|
263
|
+
async function closeAllVectorStores() {
|
|
264
|
+
for (const store of vectorStores.values()) {
|
|
265
|
+
await store.close();
|
|
266
|
+
}
|
|
267
|
+
vectorStores.clear();
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// src/api/embedding.ts
|
|
271
|
+
var ProgressTracker = class {
|
|
272
|
+
completed = 0;
|
|
273
|
+
total;
|
|
274
|
+
totalTokens = 0;
|
|
275
|
+
startTime;
|
|
276
|
+
lastLogTime = 0;
|
|
277
|
+
logIntervalMs = 2e3;
|
|
278
|
+
// 每 2 秒输出一次
|
|
279
|
+
onProgress;
|
|
280
|
+
/** 是否跳过日志(单批次时跳过,避免与索引日志混淆) */
|
|
281
|
+
skipLogs;
|
|
282
|
+
constructor(total, onProgress) {
|
|
283
|
+
this.total = total;
|
|
284
|
+
this.startTime = Date.now();
|
|
285
|
+
this.onProgress = onProgress;
|
|
286
|
+
this.skipLogs = total <= 1;
|
|
287
|
+
}
|
|
288
|
+
/** 记录一个批次完成 */
|
|
289
|
+
recordBatch(tokens) {
|
|
290
|
+
this.completed++;
|
|
291
|
+
this.totalTokens += tokens;
|
|
292
|
+
this.onProgress?.(this.completed, this.total);
|
|
293
|
+
const now = Date.now();
|
|
294
|
+
if (now - this.lastLogTime >= this.logIntervalMs) {
|
|
295
|
+
this.logProgress();
|
|
296
|
+
this.lastLogTime = now;
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
/** 输出进度 */
|
|
300
|
+
logProgress() {
|
|
301
|
+
if (this.skipLogs) return;
|
|
302
|
+
const elapsed = (Date.now() - this.startTime) / 1e3;
|
|
303
|
+
const percent = Math.round(this.completed / this.total * 100);
|
|
304
|
+
const rate = this.completed / elapsed;
|
|
305
|
+
const eta = rate > 0 ? Math.round((this.total - this.completed) / rate) : 0;
|
|
306
|
+
logger.info(
|
|
307
|
+
{
|
|
308
|
+
progress: `${this.completed}/${this.total}`,
|
|
309
|
+
percent: `${percent}%`,
|
|
310
|
+
tokens: this.totalTokens,
|
|
311
|
+
elapsed: `${elapsed.toFixed(1)}s`,
|
|
312
|
+
eta: `${eta}s`
|
|
313
|
+
},
|
|
314
|
+
"Embedding \u8FDB\u5EA6"
|
|
315
|
+
);
|
|
316
|
+
}
|
|
317
|
+
/** 完成时输出最终统计 */
|
|
318
|
+
complete() {
|
|
319
|
+
if (this.skipLogs) return;
|
|
320
|
+
const elapsed = (Date.now() - this.startTime) / 1e3;
|
|
321
|
+
logger.info(
|
|
322
|
+
{
|
|
323
|
+
batches: this.total,
|
|
324
|
+
tokens: this.totalTokens,
|
|
325
|
+
elapsed: `${elapsed.toFixed(1)}s`,
|
|
326
|
+
avgTokensPerBatch: Math.round(this.totalTokens / this.total)
|
|
327
|
+
},
|
|
328
|
+
"Embedding \u5B8C\u6210"
|
|
329
|
+
);
|
|
330
|
+
}
|
|
331
|
+
};
|
|
332
|
+
var RateLimitController = class {
|
|
333
|
+
/** 是否处于暂停状态 */
|
|
334
|
+
isPaused = false;
|
|
335
|
+
/** 暂停恢复的 Promise(所有请求等待此 Promise) */
|
|
336
|
+
pausePromise = null;
|
|
337
|
+
/** 当前有效并发数 */
|
|
338
|
+
currentConcurrency;
|
|
339
|
+
/** 配置的最大并发数 */
|
|
340
|
+
maxConcurrency;
|
|
341
|
+
/** 当前活跃请求数 */
|
|
342
|
+
activeRequests = 0;
|
|
343
|
+
/** 连续成功次数(用于渐进恢复并发) */
|
|
344
|
+
consecutiveSuccesses = 0;
|
|
345
|
+
/** 当前退避时间(毫秒) */
|
|
346
|
+
backoffMs = 5e3;
|
|
347
|
+
/** 恢复并发所需的连续成功次数 */
|
|
348
|
+
successesPerConcurrencyIncrease = 3;
|
|
349
|
+
/** 最小退避时间 */
|
|
350
|
+
minBackoffMs = 5e3;
|
|
351
|
+
/** 最大退避时间 */
|
|
352
|
+
maxBackoffMs = 6e4;
|
|
353
|
+
constructor(maxConcurrency) {
|
|
354
|
+
this.maxConcurrency = maxConcurrency;
|
|
355
|
+
this.currentConcurrency = maxConcurrency;
|
|
356
|
+
}
|
|
357
|
+
/**
|
|
358
|
+
* 获取执行槽位
|
|
359
|
+
* 如果当前暂停或并发已满,则等待
|
|
360
|
+
*/
|
|
361
|
+
async acquire() {
|
|
362
|
+
if (this.pausePromise) {
|
|
363
|
+
await this.pausePromise;
|
|
364
|
+
}
|
|
365
|
+
while (this.activeRequests >= this.currentConcurrency) {
|
|
366
|
+
await sleep(50);
|
|
367
|
+
if (this.pausePromise) {
|
|
368
|
+
await this.pausePromise;
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
this.activeRequests++;
|
|
372
|
+
}
|
|
373
|
+
/**
|
|
374
|
+
* 释放执行槽位(请求成功时调用)
|
|
375
|
+
*/
|
|
376
|
+
releaseSuccess() {
|
|
377
|
+
this.activeRequests = Math.max(0, this.activeRequests - 1);
|
|
378
|
+
this.consecutiveSuccesses++;
|
|
379
|
+
if (this.currentConcurrency < this.maxConcurrency && this.consecutiveSuccesses >= this.successesPerConcurrencyIncrease) {
|
|
380
|
+
this.currentConcurrency++;
|
|
381
|
+
this.consecutiveSuccesses = 0;
|
|
382
|
+
}
|
|
383
|
+
if (this.consecutiveSuccesses > 0 && this.consecutiveSuccesses % 10 === 0) {
|
|
384
|
+
this.backoffMs = Math.max(this.minBackoffMs, this.backoffMs / 2);
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* 释放执行槽位(请求失败但非 429 时调用)
|
|
389
|
+
*/
|
|
390
|
+
releaseFailure() {
|
|
391
|
+
this.activeRequests = Math.max(0, this.activeRequests - 1);
|
|
392
|
+
}
|
|
393
|
+
/**
|
|
394
|
+
* 释放执行槽位(429 重试前调用)
|
|
395
|
+
* 释放槽位并重置成功计数
|
|
396
|
+
*/
|
|
397
|
+
releaseForRetry() {
|
|
398
|
+
this.activeRequests = Math.max(0, this.activeRequests - 1);
|
|
399
|
+
this.consecutiveSuccesses = 0;
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* 触发 429 暂停
|
|
403
|
+
* 所有请求将等待恢复
|
|
404
|
+
*/
|
|
405
|
+
async triggerRateLimit() {
|
|
406
|
+
if (this.isPaused && this.pausePromise) {
|
|
407
|
+
logger.debug("\u901F\u7387\u9650\u5236\uFF1A\u7B49\u5F85\u73B0\u6709\u6682\u505C\u7ED3\u675F");
|
|
408
|
+
await this.pausePromise;
|
|
409
|
+
return;
|
|
410
|
+
}
|
|
411
|
+
this.isPaused = true;
|
|
412
|
+
this.consecutiveSuccesses = 0;
|
|
413
|
+
const previousConcurrency = this.currentConcurrency;
|
|
414
|
+
this.currentConcurrency = 1;
|
|
415
|
+
logger.warn(
|
|
416
|
+
{
|
|
417
|
+
backoffMs: this.backoffMs,
|
|
418
|
+
previousConcurrency,
|
|
419
|
+
newConcurrency: this.currentConcurrency,
|
|
420
|
+
activeRequests: this.activeRequests
|
|
421
|
+
},
|
|
422
|
+
"\u901F\u7387\u9650\u5236\uFF1A\u89E6\u53D1 429\uFF0C\u6682\u505C\u6240\u6709\u8BF7\u6C42"
|
|
423
|
+
);
|
|
424
|
+
let resumeResolve = () => {
|
|
425
|
+
};
|
|
426
|
+
this.pausePromise = new Promise((resolve) => {
|
|
427
|
+
resumeResolve = resolve;
|
|
428
|
+
});
|
|
429
|
+
await sleep(this.backoffMs);
|
|
430
|
+
this.backoffMs = Math.min(this.maxBackoffMs, this.backoffMs * 2);
|
|
431
|
+
this.isPaused = false;
|
|
432
|
+
this.pausePromise = null;
|
|
433
|
+
resumeResolve();
|
|
434
|
+
logger.info({ waitMs: this.backoffMs }, "\u901F\u7387\u9650\u5236\uFF1A\u6062\u590D\u8BF7\u6C42");
|
|
435
|
+
}
|
|
436
|
+
/**
|
|
437
|
+
* 获取当前状态(用于调试)
|
|
438
|
+
*/
|
|
439
|
+
getStatus() {
|
|
440
|
+
return {
|
|
441
|
+
isPaused: this.isPaused,
|
|
442
|
+
currentConcurrency: this.currentConcurrency,
|
|
443
|
+
maxConcurrency: this.maxConcurrency,
|
|
444
|
+
activeRequests: this.activeRequests,
|
|
445
|
+
backoffMs: this.backoffMs
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
};
|
|
449
|
+
var globalRateLimitController = null;
|
|
450
|
+
function getRateLimitController(maxConcurrency) {
|
|
451
|
+
if (!globalRateLimitController) {
|
|
452
|
+
globalRateLimitController = new RateLimitController(maxConcurrency);
|
|
453
|
+
}
|
|
454
|
+
return globalRateLimitController;
|
|
455
|
+
}
|
|
456
|
+
var EmbeddingClient = class {
|
|
457
|
+
config;
|
|
458
|
+
rateLimiter;
|
|
459
|
+
constructor(config) {
|
|
460
|
+
this.config = config || getEmbeddingConfig();
|
|
461
|
+
this.rateLimiter = getRateLimitController(this.config.maxConcurrency);
|
|
462
|
+
}
|
|
463
|
+
/**
|
|
464
|
+
* 获取单个文本的 Embedding
|
|
465
|
+
*/
|
|
466
|
+
async embed(text) {
|
|
467
|
+
const results = await this.embedBatch([text]);
|
|
468
|
+
return results[0].embedding;
|
|
469
|
+
}
|
|
470
|
+
/**
|
|
471
|
+
* 批量获取 Embedding
|
|
472
|
+
* @param texts 待处理的文本数组
|
|
473
|
+
* @param batchSize 每批次发送的文本数量(默认 20)
|
|
474
|
+
* @param onProgress 可选的进度回调 (completed, total) => void
|
|
475
|
+
*/
|
|
476
|
+
async embedBatch(texts, batchSize = 20, onProgress) {
|
|
477
|
+
if (texts.length === 0) {
|
|
478
|
+
return [];
|
|
479
|
+
}
|
|
480
|
+
const batches = [];
|
|
481
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
482
|
+
batches.push(texts.slice(i, i + batchSize));
|
|
483
|
+
}
|
|
484
|
+
const progress = new ProgressTracker(batches.length, onProgress);
|
|
485
|
+
const batchResults = await Promise.all(
|
|
486
|
+
batches.map(
|
|
487
|
+
(batch, batchIndex) => this.processWithRateLimit(batch, batchIndex * batchSize, progress)
|
|
488
|
+
)
|
|
489
|
+
);
|
|
490
|
+
progress.complete();
|
|
491
|
+
return batchResults.flat();
|
|
492
|
+
}
|
|
493
|
+
/**
|
|
494
|
+
* 带速率限制和网络错误重试的批次处理
|
|
495
|
+
* 使用循环而非递归,避免栈溢出和槽位泄漏
|
|
496
|
+
*/
|
|
497
|
+
async processWithRateLimit(texts, startIndex, progress) {
|
|
498
|
+
const MAX_NETWORK_RETRIES = 3;
|
|
499
|
+
const INITIAL_RETRY_DELAY_MS = 1e3;
|
|
500
|
+
let networkRetries = 0;
|
|
501
|
+
while (true) {
|
|
502
|
+
await this.rateLimiter.acquire();
|
|
503
|
+
try {
|
|
504
|
+
const result = await this.processBatch(texts, startIndex, progress);
|
|
505
|
+
this.rateLimiter.releaseSuccess();
|
|
506
|
+
return result;
|
|
507
|
+
} catch (err) {
|
|
508
|
+
const error = err;
|
|
509
|
+
const errorMessage = error.message || "";
|
|
510
|
+
const isRateLimited = errorMessage.includes("429") || errorMessage.includes("rate");
|
|
511
|
+
const isNetworkError = this.isNetworkError(err);
|
|
512
|
+
if (isRateLimited) {
|
|
513
|
+
this.rateLimiter.releaseForRetry();
|
|
514
|
+
await this.rateLimiter.triggerRateLimit();
|
|
515
|
+
networkRetries = 0;
|
|
516
|
+
} else if (isNetworkError && networkRetries < MAX_NETWORK_RETRIES) {
|
|
517
|
+
networkRetries++;
|
|
518
|
+
const delayMs = INITIAL_RETRY_DELAY_MS * 2 ** (networkRetries - 1);
|
|
519
|
+
logger.warn(
|
|
520
|
+
{
|
|
521
|
+
error: errorMessage,
|
|
522
|
+
retry: networkRetries,
|
|
523
|
+
maxRetries: MAX_NETWORK_RETRIES,
|
|
524
|
+
delayMs
|
|
525
|
+
},
|
|
526
|
+
"\u7F51\u7EDC\u9519\u8BEF\uFF0C\u51C6\u5907\u91CD\u8BD5"
|
|
527
|
+
);
|
|
528
|
+
this.rateLimiter.releaseForRetry();
|
|
529
|
+
await sleep(delayMs);
|
|
530
|
+
} else {
|
|
531
|
+
this.rateLimiter.releaseFailure();
|
|
532
|
+
if (isNetworkError) {
|
|
533
|
+
logger.error({ error: errorMessage, retries: networkRetries }, "\u7F51\u7EDC\u9519\u8BEF\u91CD\u8BD5\u6B21\u6570\u8017\u5C3D");
|
|
534
|
+
}
|
|
535
|
+
throw err;
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
/**
|
|
541
|
+
* 判断是否为网络错误
|
|
542
|
+
*
|
|
543
|
+
* 常见网络错误类型:
|
|
544
|
+
* - terminated: 连接被中断(TLS 断开)
|
|
545
|
+
* - ECONNRESET: 连接被远端重置
|
|
546
|
+
* - ETIMEDOUT: 连接超时
|
|
547
|
+
* - ENOTFOUND: DNS 解析失败
|
|
548
|
+
* - fetch failed: 通用 fetch 失败
|
|
549
|
+
* - socket hang up: 套接字意外关闭
|
|
550
|
+
*/
|
|
551
|
+
isNetworkError(err) {
|
|
552
|
+
const error = err;
|
|
553
|
+
const message = (error.message || "").toLowerCase();
|
|
554
|
+
const code = error.code || "";
|
|
555
|
+
const networkErrorPatterns = [
|
|
556
|
+
"terminated",
|
|
557
|
+
"econnreset",
|
|
558
|
+
"etimedout",
|
|
559
|
+
"enotfound",
|
|
560
|
+
"econnrefused",
|
|
561
|
+
"fetch failed",
|
|
562
|
+
"socket hang up",
|
|
563
|
+
"network",
|
|
564
|
+
"aborted"
|
|
565
|
+
];
|
|
566
|
+
for (const pattern of networkErrorPatterns) {
|
|
567
|
+
if (message.includes(pattern)) {
|
|
568
|
+
return true;
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
const networkErrorCodes = ["ECONNRESET", "ETIMEDOUT", "ENOTFOUND", "ECONNREFUSED", "EPIPE"];
|
|
572
|
+
if (networkErrorCodes.includes(code)) {
|
|
573
|
+
return true;
|
|
574
|
+
}
|
|
575
|
+
return false;
|
|
576
|
+
}
|
|
577
|
+
/**
|
|
578
|
+
* 处理单个批次(单次请求,不含重试逻辑)
|
|
579
|
+
*/
|
|
580
|
+
async processBatch(texts, startIndex, progress) {
|
|
581
|
+
const requestBody = {
|
|
582
|
+
model: this.config.model,
|
|
583
|
+
input: texts,
|
|
584
|
+
encoding_format: "float"
|
|
585
|
+
};
|
|
586
|
+
const response = await fetch(this.config.baseUrl, {
|
|
587
|
+
method: "POST",
|
|
588
|
+
headers: {
|
|
589
|
+
"Content-Type": "application/json",
|
|
590
|
+
Authorization: `Bearer ${this.config.apiKey}`
|
|
591
|
+
},
|
|
592
|
+
body: JSON.stringify(requestBody)
|
|
593
|
+
});
|
|
594
|
+
const data = await response.json();
|
|
595
|
+
if (!response.ok || data.error) {
|
|
596
|
+
const errorMsg = data.error?.message || `HTTP ${response.status}`;
|
|
597
|
+
throw new Error(`Embedding API \u9519\u8BEF: ${errorMsg}`);
|
|
598
|
+
}
|
|
599
|
+
const results = data.data.map((item) => ({
|
|
600
|
+
text: texts[item.index],
|
|
601
|
+
embedding: item.embedding,
|
|
602
|
+
index: startIndex + item.index
|
|
603
|
+
}));
|
|
604
|
+
progress.recordBatch(data.usage?.total_tokens || 0);
|
|
605
|
+
return results;
|
|
606
|
+
}
|
|
607
|
+
/**
|
|
608
|
+
* 获取当前配置
|
|
609
|
+
*/
|
|
610
|
+
getConfig() {
|
|
611
|
+
return { ...this.config };
|
|
612
|
+
}
|
|
613
|
+
/**
|
|
614
|
+
* 获取速率限制器状态(用于调试)
|
|
615
|
+
*/
|
|
616
|
+
getRateLimiterStatus() {
|
|
617
|
+
return this.rateLimiter.getStatus();
|
|
618
|
+
}
|
|
619
|
+
};
|
|
620
|
+
var defaultClient = null;
|
|
621
|
+
function getEmbeddingClient() {
|
|
622
|
+
if (!defaultClient) {
|
|
623
|
+
defaultClient = new EmbeddingClient();
|
|
624
|
+
}
|
|
625
|
+
return defaultClient;
|
|
626
|
+
}
|
|
627
|
+
function sleep(ms) {
|
|
628
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// src/indexer/index.ts
|
|
632
|
+
var Indexer = class {
|
|
633
|
+
projectId;
|
|
634
|
+
vectorStore = null;
|
|
635
|
+
embeddingClient;
|
|
636
|
+
vectorDim;
|
|
637
|
+
constructor(projectId, vectorDim = 1024) {
|
|
638
|
+
this.projectId = projectId;
|
|
639
|
+
this.vectorDim = vectorDim;
|
|
640
|
+
this.embeddingClient = getEmbeddingClient();
|
|
641
|
+
}
|
|
642
|
+
/**
|
|
643
|
+
* 初始化
|
|
644
|
+
*/
|
|
645
|
+
async init() {
|
|
646
|
+
this.vectorStore = await getVectorStore(this.projectId, this.vectorDim);
|
|
647
|
+
}
|
|
648
|
+
/**
|
|
649
|
+
* 处理扫描结果,更新向量索引
|
|
650
|
+
*
|
|
651
|
+
* @param db SQLite 数据库实例
|
|
652
|
+
* @param results 文件处理结果
|
|
653
|
+
* @param onProgress 可选的进度回调 (indexed, total) => void
|
|
654
|
+
*/
|
|
655
|
+
async indexFiles(db, results, onProgress) {
|
|
656
|
+
if (!this.vectorStore) {
|
|
657
|
+
await this.init();
|
|
658
|
+
}
|
|
659
|
+
const stats = {
|
|
660
|
+
indexed: 0,
|
|
661
|
+
deleted: 0,
|
|
662
|
+
errors: 0,
|
|
663
|
+
skipped: 0
|
|
664
|
+
};
|
|
665
|
+
const toIndex = [];
|
|
666
|
+
const toDelete = [];
|
|
667
|
+
const noChunkSettled = [];
|
|
668
|
+
for (const result of results) {
|
|
669
|
+
switch (result.status) {
|
|
670
|
+
case "added":
|
|
671
|
+
case "modified":
|
|
672
|
+
if (result.chunks.length > 0) {
|
|
673
|
+
toIndex.push({
|
|
674
|
+
path: result.relPath,
|
|
675
|
+
hash: result.hash,
|
|
676
|
+
chunks: result.chunks
|
|
677
|
+
});
|
|
678
|
+
} else {
|
|
679
|
+
if (result.status === "modified") {
|
|
680
|
+
toDelete.push(result.relPath);
|
|
681
|
+
}
|
|
682
|
+
noChunkSettled.push({
|
|
683
|
+
path: result.relPath,
|
|
684
|
+
hash: result.hash
|
|
685
|
+
});
|
|
686
|
+
stats.skipped++;
|
|
687
|
+
}
|
|
688
|
+
break;
|
|
689
|
+
case "deleted":
|
|
690
|
+
toDelete.push(result.relPath);
|
|
691
|
+
break;
|
|
692
|
+
case "unchanged":
|
|
693
|
+
stats.skipped++;
|
|
694
|
+
break;
|
|
695
|
+
case "skipped":
|
|
696
|
+
case "error":
|
|
697
|
+
stats.skipped++;
|
|
698
|
+
break;
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
if (toDelete.length > 0) {
|
|
702
|
+
await this.deleteFiles(db, toDelete);
|
|
703
|
+
stats.deleted = toDelete.length;
|
|
704
|
+
}
|
|
705
|
+
if (noChunkSettled.length > 0) {
|
|
706
|
+
batchUpdateVectorIndexHash(db, noChunkSettled);
|
|
707
|
+
logger.debug({ count: noChunkSettled.length }, "\u65E0\u53EF\u7D22\u5F15 chunk\uFF0C\u6807\u8BB0\u5411\u91CF\u7D22\u5F15\u72B6\u6001\u4E3A\u5DF2\u6536\u655B");
|
|
708
|
+
}
|
|
709
|
+
if (toIndex.length > 0) {
|
|
710
|
+
const indexResult = await this.batchIndex(db, toIndex, onProgress);
|
|
711
|
+
stats.indexed = indexResult.success;
|
|
712
|
+
stats.errors = indexResult.errors;
|
|
713
|
+
}
|
|
714
|
+
logger.info(
|
|
715
|
+
{
|
|
716
|
+
indexed: stats.indexed,
|
|
717
|
+
vectorRecordsDeleted: stats.deleted,
|
|
718
|
+
errors: stats.errors,
|
|
719
|
+
skipped: stats.skipped
|
|
720
|
+
},
|
|
721
|
+
"\u5411\u91CF\u7D22\u5F15\u5B8C\u6210"
|
|
722
|
+
);
|
|
723
|
+
return stats;
|
|
724
|
+
}
|
|
725
|
+
/**
|
|
726
|
+
* 批量索引文件(内存优化版)
|
|
727
|
+
*
|
|
728
|
+
* 优化策略:
|
|
729
|
+
* 1. 文件按批次处理(每批 100 个文件),避免一次性加载所有 embedding 到内存
|
|
730
|
+
* 2. 每批独立完成:collect texts → embedBatch → write LanceDB → write FTS → update SQLite
|
|
731
|
+
* 3. 批次间释放中间数据引用,让 GC 回收内存
|
|
732
|
+
* 4. ProgressTracker 跨批次累计,总数基于所有文件
|
|
733
|
+
*/
|
|
734
|
+
async batchIndex(db, files, onProgress) {
|
|
735
|
+
if (files.length === 0) {
|
|
736
|
+
return { success: 0, errors: 0 };
|
|
737
|
+
}
|
|
738
|
+
const FILE_BATCH_SIZE = 100;
|
|
739
|
+
let totalSuccess = 0;
|
|
740
|
+
let totalErrors = 0;
|
|
741
|
+
const totalChunks = files.reduce((sum, f) => sum + f.chunks.length, 0);
|
|
742
|
+
if (totalChunks === 0) {
|
|
743
|
+
return { success: 0, errors: 0 };
|
|
744
|
+
}
|
|
745
|
+
let completedChunks = 0;
|
|
746
|
+
logger.info(
|
|
747
|
+
{ totalFiles: files.length, totalChunks, batches: Math.ceil(files.length / FILE_BATCH_SIZE) },
|
|
748
|
+
"\u5F00\u59CB\u5206\u6279\u7D22\u5F15"
|
|
749
|
+
);
|
|
750
|
+
for (let batchStart = 0; batchStart < files.length; batchStart += FILE_BATCH_SIZE) {
|
|
751
|
+
const batchFiles = files.slice(batchStart, batchStart + FILE_BATCH_SIZE);
|
|
752
|
+
const batchNum = Math.floor(batchStart / FILE_BATCH_SIZE) + 1;
|
|
753
|
+
const totalBatches = Math.ceil(files.length / FILE_BATCH_SIZE);
|
|
754
|
+
const batchTexts = [];
|
|
755
|
+
const indexByFileChunk = [];
|
|
756
|
+
for (let fileIdx = 0; fileIdx < batchFiles.length; fileIdx++) {
|
|
757
|
+
const file = batchFiles[fileIdx];
|
|
758
|
+
indexByFileChunk[fileIdx] = [];
|
|
759
|
+
for (let chunkIdx = 0; chunkIdx < file.chunks.length; chunkIdx++) {
|
|
760
|
+
const idx = batchTexts.length;
|
|
761
|
+
batchTexts.push(file.chunks[chunkIdx].vectorText);
|
|
762
|
+
indexByFileChunk[fileIdx][chunkIdx] = idx;
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
if (batchTexts.length === 0) {
|
|
766
|
+
totalSuccess += batchFiles.length;
|
|
767
|
+
continue;
|
|
768
|
+
}
|
|
769
|
+
logger.info(
|
|
770
|
+
{ batch: `${batchNum}/${totalBatches}`, texts: batchTexts.length, files: batchFiles.length },
|
|
771
|
+
"\u6279\u6B21 Embedding \u5F00\u59CB"
|
|
772
|
+
);
|
|
773
|
+
let embeddings;
|
|
774
|
+
const EMBED_BATCH_SIZE = 10;
|
|
775
|
+
try {
|
|
776
|
+
const batchOnProgress = onProgress ? (_completed, _total) => {
|
|
777
|
+
onProgress(completedChunks + Math.min(_completed * EMBED_BATCH_SIZE, batchTexts.length), totalChunks);
|
|
778
|
+
} : void 0;
|
|
779
|
+
const results = await this.embeddingClient.embedBatch(batchTexts, EMBED_BATCH_SIZE, batchOnProgress);
|
|
780
|
+
embeddings = results.map((r) => r.embedding);
|
|
781
|
+
} catch (err) {
|
|
782
|
+
const error = err;
|
|
783
|
+
logger.error(
|
|
784
|
+
{ error: error.message, stack: error.stack, batch: `${batchNum}/${totalBatches}` },
|
|
785
|
+
"\u6279\u6B21 Embedding \u5931\u8D25"
|
|
786
|
+
);
|
|
787
|
+
clearVectorIndexHash(
|
|
788
|
+
db,
|
|
789
|
+
batchFiles.map((f) => f.path)
|
|
790
|
+
);
|
|
791
|
+
totalErrors += batchFiles.length;
|
|
792
|
+
completedChunks += batchTexts.length;
|
|
793
|
+
continue;
|
|
794
|
+
}
|
|
795
|
+
const filesToUpsert = [];
|
|
796
|
+
const ftsChunks = [];
|
|
797
|
+
const successFiles = [];
|
|
798
|
+
const errorFiles = [];
|
|
799
|
+
for (let fileIdx = 0; fileIdx < batchFiles.length; fileIdx++) {
|
|
800
|
+
const file = batchFiles[fileIdx];
|
|
801
|
+
try {
|
|
802
|
+
const records = [];
|
|
803
|
+
for (let chunkIdx = 0; chunkIdx < file.chunks.length; chunkIdx++) {
|
|
804
|
+
const chunk = file.chunks[chunkIdx];
|
|
805
|
+
const embIdx = indexByFileChunk[fileIdx][chunkIdx];
|
|
806
|
+
if (embIdx === void 0) {
|
|
807
|
+
throw new Error(`\u627E\u4E0D\u5230 chunk \u7684 embedding: ${file.path}#${chunkIdx}`);
|
|
808
|
+
}
|
|
809
|
+
const record = {
|
|
810
|
+
chunk_id: `${file.path}#${file.hash}#${chunkIdx}`,
|
|
811
|
+
file_path: file.path,
|
|
812
|
+
file_hash: file.hash,
|
|
813
|
+
chunk_index: chunkIdx,
|
|
814
|
+
vector: embeddings[embIdx],
|
|
815
|
+
display_code: chunk.displayCode,
|
|
816
|
+
vector_text: chunk.vectorText,
|
|
817
|
+
language: chunk.metadata.language,
|
|
818
|
+
breadcrumb: chunk.metadata.contextPath.join(" > "),
|
|
819
|
+
start_index: chunk.metadata.startIndex,
|
|
820
|
+
end_index: chunk.metadata.endIndex,
|
|
821
|
+
raw_start: chunk.metadata.rawSpan.start,
|
|
822
|
+
raw_end: chunk.metadata.rawSpan.end,
|
|
823
|
+
vec_start: chunk.metadata.vectorSpan.start,
|
|
824
|
+
vec_end: chunk.metadata.vectorSpan.end
|
|
825
|
+
};
|
|
826
|
+
records.push(record);
|
|
827
|
+
ftsChunks.push({
|
|
828
|
+
chunkId: record.chunk_id,
|
|
829
|
+
filePath: record.file_path,
|
|
830
|
+
chunkIndex: record.chunk_index,
|
|
831
|
+
breadcrumb: record.breadcrumb,
|
|
832
|
+
content: `${record.breadcrumb}
|
|
833
|
+
${record.display_code}`
|
|
834
|
+
});
|
|
835
|
+
}
|
|
836
|
+
filesToUpsert.push({ path: file.path, hash: file.hash, records });
|
|
837
|
+
successFiles.push({ path: file.path, hash: file.hash });
|
|
838
|
+
} catch (err) {
|
|
839
|
+
const error = err;
|
|
840
|
+
logger.error(
|
|
841
|
+
{ path: file.path, error: error.message, stack: error.stack },
|
|
842
|
+
"\u7EC4\u88C5 ChunkRecord \u5931\u8D25"
|
|
843
|
+
);
|
|
844
|
+
errorFiles.push(file.path);
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
if (filesToUpsert.length > 0) {
|
|
848
|
+
try {
|
|
849
|
+
await this.vectorStore?.batchUpsertFiles(filesToUpsert);
|
|
850
|
+
} catch (err) {
|
|
851
|
+
const error = err;
|
|
852
|
+
logger.error({ error: error.message, stack: error.stack }, "LanceDB \u6279\u91CF\u5199\u5165\u5931\u8D25");
|
|
853
|
+
clearVectorIndexHash(
|
|
854
|
+
db,
|
|
855
|
+
batchFiles.map((f) => f.path)
|
|
856
|
+
);
|
|
857
|
+
totalErrors += batchFiles.length;
|
|
858
|
+
completedChunks += batchTexts.length;
|
|
859
|
+
continue;
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
if (isChunksFtsInitialized(db) && ftsChunks.length > 0) {
|
|
863
|
+
try {
|
|
864
|
+
const pathsToDelete = filesToUpsert.map((f) => f.path);
|
|
865
|
+
batchDeleteFileChunksFts(db, pathsToDelete);
|
|
866
|
+
batchUpsertChunkFts(db, ftsChunks);
|
|
867
|
+
} catch (err) {
|
|
868
|
+
const error = err;
|
|
869
|
+
logger.warn({ error: error.message }, "FTS \u6279\u91CF\u66F4\u65B0\u5931\u8D25\uFF08\u5411\u91CF\u7D22\u5F15\u5DF2\u6210\u529F\uFF09");
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
if (successFiles.length > 0) {
|
|
873
|
+
batchUpdateVectorIndexHash(db, successFiles);
|
|
874
|
+
}
|
|
875
|
+
totalSuccess += successFiles.length;
|
|
876
|
+
totalErrors += errorFiles.length;
|
|
877
|
+
completedChunks += batchTexts.length;
|
|
878
|
+
logger.info(
|
|
879
|
+
{
|
|
880
|
+
batch: `${batchNum}/${totalBatches}`,
|
|
881
|
+
success: successFiles.length,
|
|
882
|
+
errors: errorFiles.length
|
|
883
|
+
},
|
|
884
|
+
"\u6279\u6B21\u7D22\u5F15\u5B8C\u6210"
|
|
885
|
+
);
|
|
886
|
+
}
|
|
887
|
+
logger.info({ success: totalSuccess, errors: totalErrors }, "\u5168\u90E8\u6279\u6B21\u7D22\u5F15\u5B8C\u6210");
|
|
888
|
+
return { success: totalSuccess, errors: totalErrors };
|
|
889
|
+
}
|
|
890
|
+
/**
|
|
891
|
+
* 删除文件的向量和 FTS 索引
|
|
892
|
+
*/
|
|
893
|
+
async deleteFiles(db, paths) {
|
|
894
|
+
if (!this.vectorStore) return;
|
|
895
|
+
await this.vectorStore.deleteFiles(paths);
|
|
896
|
+
if (isChunksFtsInitialized(db)) {
|
|
897
|
+
batchDeleteFileChunksFts(db, paths);
|
|
898
|
+
}
|
|
899
|
+
logger.debug({ count: paths.length }, "\u5220\u9664\u6587\u4EF6\u7D22\u5F15");
|
|
900
|
+
}
|
|
901
|
+
/**
|
|
902
|
+
* 向量搜索
|
|
903
|
+
*/
|
|
904
|
+
async search(queryVector, limit = 10, filter) {
|
|
905
|
+
if (!this.vectorStore) {
|
|
906
|
+
await this.init();
|
|
907
|
+
}
|
|
908
|
+
return this.vectorStore?.search(queryVector, limit, filter);
|
|
909
|
+
}
|
|
910
|
+
/**
|
|
911
|
+
* 文本搜索(先 embedding 再向量搜索)
|
|
912
|
+
*/
|
|
913
|
+
async textSearch(query, limit = 10, filter) {
|
|
914
|
+
const queryVector = await this.embeddingClient.embed(query);
|
|
915
|
+
return this.search(queryVector, limit, filter);
|
|
916
|
+
}
|
|
917
|
+
/**
|
|
918
|
+
* 清空索引
|
|
919
|
+
*/
|
|
920
|
+
async clear() {
|
|
921
|
+
if (!this.vectorStore) {
|
|
922
|
+
await this.init();
|
|
923
|
+
}
|
|
924
|
+
await this.vectorStore?.clear();
|
|
925
|
+
}
|
|
926
|
+
/**
|
|
927
|
+
* 获取索引统计
|
|
928
|
+
*/
|
|
929
|
+
async getStats() {
|
|
930
|
+
if (!this.vectorStore) {
|
|
931
|
+
await this.init();
|
|
932
|
+
}
|
|
933
|
+
const count = await this.vectorStore?.count() ?? 0;
|
|
934
|
+
return { totalChunks: count };
|
|
935
|
+
}
|
|
936
|
+
};
|
|
937
|
+
var indexers = /* @__PURE__ */ new Map();
|
|
938
|
+
async function getIndexer(projectId, vectorDim = 1024) {
|
|
939
|
+
let indexer = indexers.get(projectId);
|
|
940
|
+
if (!indexer) {
|
|
941
|
+
indexer = new Indexer(projectId, vectorDim);
|
|
942
|
+
await indexer.init();
|
|
943
|
+
indexers.set(projectId, indexer);
|
|
944
|
+
}
|
|
945
|
+
return indexer;
|
|
946
|
+
}
|
|
947
|
+
function closeAllIndexers() {
|
|
948
|
+
indexers.clear();
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
// src/search/utils.ts
|
|
952
|
+
var tokenBoundaryRegexCache = /* @__PURE__ */ new Map();
|
|
953
|
+
function getTokenBoundaryRegex(token) {
|
|
954
|
+
let regex = tokenBoundaryRegexCache.get(token);
|
|
955
|
+
if (!regex) {
|
|
956
|
+
const escaped = token.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
957
|
+
regex = new RegExp(`\\b${escaped}\\b`);
|
|
958
|
+
tokenBoundaryRegexCache.set(token, regex);
|
|
959
|
+
}
|
|
960
|
+
return regex;
|
|
961
|
+
}
|
|
962
|
+
function scoreChunkTokenOverlap(chunk, queryTokens) {
|
|
963
|
+
const text = `${chunk.breadcrumb} ${chunk.display_code}`.toLowerCase();
|
|
964
|
+
let score = 0;
|
|
965
|
+
for (const token of queryTokens) {
|
|
966
|
+
if (text.includes(token)) {
|
|
967
|
+
const regex = getTokenBoundaryRegex(token);
|
|
968
|
+
if (regex.test(text)) {
|
|
969
|
+
score += 1;
|
|
970
|
+
} else {
|
|
971
|
+
score += 0.5;
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
return score;
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
// src/search/resolvers/types.ts
|
|
979
|
+
function commonPrefixLength(path1, path2) {
|
|
980
|
+
const parts1 = path1.split("/");
|
|
981
|
+
const parts2 = path2.split("/");
|
|
982
|
+
let count = 0;
|
|
983
|
+
for (let i = 0; i < Math.min(parts1.length, parts2.length); i++) {
|
|
984
|
+
if (parts1[i] === parts2[i]) {
|
|
985
|
+
count++;
|
|
986
|
+
} else {
|
|
987
|
+
break;
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
return count;
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
// src/search/resolvers/CppResolver.ts
|
|
994
|
+
var CPP_EXTENSIONS = /* @__PURE__ */ new Set([".c", ".cpp", ".cc", ".cxx", ".h", ".hpp", ".hh", ".hxx"]);
|
|
995
|
+
var CppResolver = class {
|
|
996
|
+
supports(filePath) {
|
|
997
|
+
const ext = filePath.slice(filePath.lastIndexOf("."));
|
|
998
|
+
return CPP_EXTENSIONS.has(ext);
|
|
999
|
+
}
|
|
1000
|
+
extract(content) {
|
|
1001
|
+
const imports = [];
|
|
1002
|
+
const includePattern = /^\s*#\s*include\s+"([^"]+)"/gm;
|
|
1003
|
+
for (const match of content.matchAll(includePattern)) {
|
|
1004
|
+
imports.push(match[1]);
|
|
1005
|
+
}
|
|
1006
|
+
return imports;
|
|
1007
|
+
}
|
|
1008
|
+
resolve(importStr, currentFile, allFiles) {
|
|
1009
|
+
const currentDir = currentFile.split("/").slice(0, -1).join("/");
|
|
1010
|
+
const relativePath = currentDir ? `${currentDir}/${importStr}` : importStr;
|
|
1011
|
+
if (allFiles.has(relativePath)) {
|
|
1012
|
+
return relativePath;
|
|
1013
|
+
}
|
|
1014
|
+
const candidates = [];
|
|
1015
|
+
for (const file of allFiles) {
|
|
1016
|
+
if (file.endsWith(`/${importStr}`) || file === importStr) {
|
|
1017
|
+
candidates.push(file);
|
|
1018
|
+
}
|
|
1019
|
+
}
|
|
1020
|
+
if (candidates.length === 0) {
|
|
1021
|
+
return null;
|
|
1022
|
+
}
|
|
1023
|
+
if (candidates.length === 1) {
|
|
1024
|
+
return candidates[0];
|
|
1025
|
+
}
|
|
1026
|
+
let bestCandidate = candidates[0];
|
|
1027
|
+
let bestPrefixLen = commonPrefixLength(currentFile, bestCandidate);
|
|
1028
|
+
for (let i = 1; i < candidates.length; i++) {
|
|
1029
|
+
const prefixLen = commonPrefixLength(currentFile, candidates[i]);
|
|
1030
|
+
if (prefixLen > bestPrefixLen) {
|
|
1031
|
+
bestPrefixLen = prefixLen;
|
|
1032
|
+
bestCandidate = candidates[i];
|
|
1033
|
+
}
|
|
1034
|
+
}
|
|
1035
|
+
return bestCandidate;
|
|
1036
|
+
}
|
|
1037
|
+
};
|
|
1038
|
+
|
|
1039
|
+
// src/search/resolvers/CSharpResolver.ts
|
|
1040
|
+
var CSharpResolver = class {
|
|
1041
|
+
supports(filePath) {
|
|
1042
|
+
return filePath.endsWith(".cs");
|
|
1043
|
+
}
|
|
1044
|
+
extract(content) {
|
|
1045
|
+
const imports = [];
|
|
1046
|
+
const pattern = /^\s*using\s+(?!static\s)(?!global\s)(?:\w+\s*=\s*)?([\w.]+);/gm;
|
|
1047
|
+
for (const match of content.matchAll(pattern)) {
|
|
1048
|
+
imports.push(match[1]);
|
|
1049
|
+
}
|
|
1050
|
+
return imports;
|
|
1051
|
+
}
|
|
1052
|
+
resolve(importStr, currentFile, allFiles) {
|
|
1053
|
+
const namespacePath = importStr.replace(/\./g, "/");
|
|
1054
|
+
const suffix = `/${namespacePath}.cs`;
|
|
1055
|
+
const candidates = [];
|
|
1056
|
+
for (const filePath of allFiles) {
|
|
1057
|
+
if (filePath.endsWith(suffix)) {
|
|
1058
|
+
candidates.push(filePath);
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
if (candidates.length === 0) {
|
|
1062
|
+
const parts = importStr.split(".");
|
|
1063
|
+
const typeName = parts[parts.length - 1];
|
|
1064
|
+
const typeSuffix = `/${typeName}.cs`;
|
|
1065
|
+
for (const filePath of allFiles) {
|
|
1066
|
+
if (filePath.endsWith(typeSuffix)) {
|
|
1067
|
+
candidates.push(filePath);
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
if (candidates.length === 0) {
|
|
1072
|
+
return null;
|
|
1073
|
+
}
|
|
1074
|
+
if (candidates.length === 1) {
|
|
1075
|
+
return candidates[0];
|
|
1076
|
+
}
|
|
1077
|
+
let bestCandidate = candidates[0];
|
|
1078
|
+
let bestPrefixLen = commonPrefixLength(currentFile, bestCandidate);
|
|
1079
|
+
for (let i = 1; i < candidates.length; i++) {
|
|
1080
|
+
const prefixLen = commonPrefixLength(currentFile, candidates[i]);
|
|
1081
|
+
if (prefixLen > bestPrefixLen) {
|
|
1082
|
+
bestPrefixLen = prefixLen;
|
|
1083
|
+
bestCandidate = candidates[i];
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
return bestCandidate;
|
|
1087
|
+
}
|
|
1088
|
+
};
|
|
1089
|
+
|
|
1090
|
+
// src/search/resolvers/GoResolver.ts
|
|
1091
|
+
var GoResolver = class {
|
|
1092
|
+
supports(filePath) {
|
|
1093
|
+
return filePath.endsWith(".go");
|
|
1094
|
+
}
|
|
1095
|
+
extract(content) {
|
|
1096
|
+
const imports = [];
|
|
1097
|
+
const singlePattern = /^\s*import\s+"([^"]+)"/gm;
|
|
1098
|
+
for (const match of content.matchAll(singlePattern)) {
|
|
1099
|
+
imports.push(match[1]);
|
|
1100
|
+
}
|
|
1101
|
+
const blockPattern = /import\s*\(\s*([\s\S]*?)\s*\)/g;
|
|
1102
|
+
for (const match of content.matchAll(blockPattern)) {
|
|
1103
|
+
const block = match[1];
|
|
1104
|
+
const linePattern = /"([^"]+)"/g;
|
|
1105
|
+
for (const lineMatch of block.matchAll(linePattern)) {
|
|
1106
|
+
imports.push(lineMatch[1]);
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
return imports;
|
|
1110
|
+
}
|
|
1111
|
+
resolve(importStr, _currentFile, allFiles) {
|
|
1112
|
+
if (!importStr.includes("/") && !importStr.includes(".")) {
|
|
1113
|
+
return null;
|
|
1114
|
+
}
|
|
1115
|
+
const suffix = `/${importStr}/`;
|
|
1116
|
+
const candidates = [];
|
|
1117
|
+
for (const filePath of allFiles) {
|
|
1118
|
+
if (filePath.endsWith(".go") && filePath.includes(suffix)) {
|
|
1119
|
+
candidates.push(filePath);
|
|
1120
|
+
}
|
|
1121
|
+
}
|
|
1122
|
+
if (candidates.length === 0) return null;
|
|
1123
|
+
const nonTest = candidates.find((f) => !f.endsWith("_test.go"));
|
|
1124
|
+
return nonTest || candidates[0];
|
|
1125
|
+
}
|
|
1126
|
+
};
|
|
1127
|
+
|
|
1128
|
+
// src/search/resolvers/JavaResolver.ts
|
|
1129
|
+
var JavaResolver = class {
|
|
1130
|
+
supports(filePath) {
|
|
1131
|
+
return filePath.endsWith(".java");
|
|
1132
|
+
}
|
|
1133
|
+
extract(content) {
|
|
1134
|
+
const imports = [];
|
|
1135
|
+
const pattern = /^\s*import\s+(?:static\s+)?([\w.]+);/gm;
|
|
1136
|
+
for (const match of content.matchAll(pattern)) {
|
|
1137
|
+
imports.push(match[1]);
|
|
1138
|
+
}
|
|
1139
|
+
return imports;
|
|
1140
|
+
}
|
|
1141
|
+
resolve(importStr, _currentFile, allFiles) {
|
|
1142
|
+
if (importStr.endsWith(".*")) {
|
|
1143
|
+
const pkgPath = importStr.slice(0, -2).replace(/\./g, "/");
|
|
1144
|
+
const suffix2 = `/${pkgPath}/`;
|
|
1145
|
+
for (const filePath of allFiles) {
|
|
1146
|
+
if (filePath.endsWith(".java") && filePath.includes(suffix2)) {
|
|
1147
|
+
return filePath;
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
return null;
|
|
1151
|
+
}
|
|
1152
|
+
const classPath = importStr.replace(/\./g, "/");
|
|
1153
|
+
const suffix = `/${classPath}.java`;
|
|
1154
|
+
for (const filePath of allFiles) {
|
|
1155
|
+
if (filePath.endsWith(suffix)) {
|
|
1156
|
+
return filePath;
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
return null;
|
|
1160
|
+
}
|
|
1161
|
+
};
|
|
1162
|
+
|
|
1163
|
+
// src/search/resolvers/JsTsResolver.ts
|
|
1164
|
+
var JsTsResolver = class {
|
|
1165
|
+
exts = [".ts", ".tsx", ".js", ".jsx", ".mts", ".mjs", ".cts", ".cjs"];
|
|
1166
|
+
// TypeScript ESM 项目使用 .js 扩展名导入,但源文件是 .ts
|
|
1167
|
+
extMapping = {
|
|
1168
|
+
".js": [".ts", ".tsx", ".js", ".jsx"],
|
|
1169
|
+
".jsx": [".tsx", ".jsx"],
|
|
1170
|
+
".mjs": [".mts", ".mjs"],
|
|
1171
|
+
".cjs": [".cts", ".cjs"]
|
|
1172
|
+
};
|
|
1173
|
+
supports(filePath) {
|
|
1174
|
+
const ext = filePath.split(".").pop()?.toLowerCase();
|
|
1175
|
+
return this.exts.includes(`.${ext}` || "");
|
|
1176
|
+
}
|
|
1177
|
+
extract(content) {
|
|
1178
|
+
const imports = [];
|
|
1179
|
+
const patterns = [
|
|
1180
|
+
// import xxx from './foo' 或 import { xxx } from './foo'
|
|
1181
|
+
/(?:import|export)\s+(?:[\w\s{},*]+\s+from\s+)?['"]([^'"]+)['"]/g,
|
|
1182
|
+
// import('./foo') 或 require('./foo')
|
|
1183
|
+
/(?:import|require)\s*\(\s*['"]([^'"]+)['"]\s*\)/g
|
|
1184
|
+
];
|
|
1185
|
+
for (const pattern of patterns) {
|
|
1186
|
+
for (const match of content.matchAll(pattern)) {
|
|
1187
|
+
imports.push(match[1]);
|
|
1188
|
+
}
|
|
1189
|
+
}
|
|
1190
|
+
return imports;
|
|
1191
|
+
}
|
|
1192
|
+
resolve(importStr, currentFile, allFiles) {
|
|
1193
|
+
if (!importStr.startsWith(".")) return null;
|
|
1194
|
+
const currentDir = currentFile.split("/").slice(0, -1).join("/");
|
|
1195
|
+
const parts = [...currentDir.split("/"), ...importStr.split("/")];
|
|
1196
|
+
const resolvedParts = [];
|
|
1197
|
+
for (const part of parts) {
|
|
1198
|
+
if (part === "." || part === "") continue;
|
|
1199
|
+
if (part === "..") resolvedParts.pop();
|
|
1200
|
+
else resolvedParts.push(part);
|
|
1201
|
+
}
|
|
1202
|
+
const basePath = resolvedParts.join("/");
|
|
1203
|
+
const existingExt = this.exts.find((ext) => basePath.endsWith(ext));
|
|
1204
|
+
if (existingExt) {
|
|
1205
|
+
const basePathWithoutExt = basePath.slice(0, -existingExt.length);
|
|
1206
|
+
const mappedExts = this.extMapping[existingExt] || [existingExt];
|
|
1207
|
+
for (const mappedExt of mappedExts) {
|
|
1208
|
+
const mappedPath = basePathWithoutExt + mappedExt;
|
|
1209
|
+
if (allFiles.has(mappedPath)) return mappedPath;
|
|
1210
|
+
}
|
|
1211
|
+
return null;
|
|
1212
|
+
}
|
|
1213
|
+
for (const ext of this.exts) {
|
|
1214
|
+
const pathWithExt = basePath + ext;
|
|
1215
|
+
if (allFiles.has(pathWithExt)) return pathWithExt;
|
|
1216
|
+
}
|
|
1217
|
+
for (const ext of this.exts) {
|
|
1218
|
+
const indexPath = `${basePath}/index${ext}`;
|
|
1219
|
+
if (allFiles.has(indexPath)) return indexPath;
|
|
1220
|
+
}
|
|
1221
|
+
return null;
|
|
1222
|
+
}
|
|
1223
|
+
};
|
|
1224
|
+
|
|
1225
|
+
// src/search/resolvers/PythonResolver.ts
|
|
1226
|
+
var PythonResolver = class {
|
|
1227
|
+
supports(filePath) {
|
|
1228
|
+
return filePath.endsWith(".py");
|
|
1229
|
+
}
|
|
1230
|
+
extract(content) {
|
|
1231
|
+
const pattern = /^\s*(?:from\s+(\.{0,3}[\w.]*)\s+import|import\s+([\w.]+))/gm;
|
|
1232
|
+
const imports = [];
|
|
1233
|
+
for (const match of content.matchAll(pattern)) {
|
|
1234
|
+
const importStr = match[1] || match[2];
|
|
1235
|
+
if (importStr) {
|
|
1236
|
+
imports.push(importStr);
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
return imports;
|
|
1240
|
+
}
|
|
1241
|
+
resolve(importStr, currentFile, allFiles) {
|
|
1242
|
+
if (importStr.startsWith(".")) {
|
|
1243
|
+
return this.resolveRelativeImport(importStr, currentFile, allFiles);
|
|
1244
|
+
}
|
|
1245
|
+
return this.resolveAbsoluteImport(importStr, currentFile, allFiles);
|
|
1246
|
+
}
|
|
1247
|
+
/**
|
|
1248
|
+
* 解析 Python 相对导入
|
|
1249
|
+
* - from . import foo -> 当前目录的 foo.py 或 foo/__init__.py
|
|
1250
|
+
* - from .. import bar -> 父目录的 bar.py 或 bar/__init__.py
|
|
1251
|
+
* - from ..utils import baz -> 父目录的 utils.py 或 utils/baz.py
|
|
1252
|
+
*/
|
|
1253
|
+
resolveRelativeImport(importStr, currentFile, allFiles) {
|
|
1254
|
+
const dotMatch = importStr.match(/^(\.+)/);
|
|
1255
|
+
if (!dotMatch) return null;
|
|
1256
|
+
const dotCount = dotMatch[1].length;
|
|
1257
|
+
const rest = importStr.slice(dotCount);
|
|
1258
|
+
const currentParts = currentFile.split("/");
|
|
1259
|
+
currentParts.pop();
|
|
1260
|
+
const targetDirParts = currentParts.slice(0, currentParts.length - (dotCount - 1));
|
|
1261
|
+
if (targetDirParts.length < 0) return null;
|
|
1262
|
+
const modulePath = rest.replace(/\./g, "/");
|
|
1263
|
+
const basePath = targetDirParts.join("/");
|
|
1264
|
+
const candidates = [];
|
|
1265
|
+
if (modulePath) {
|
|
1266
|
+
candidates.push(`${basePath}/${modulePath}.py`, `${basePath}/${modulePath}/__init__.py`);
|
|
1267
|
+
} else {
|
|
1268
|
+
candidates.push(`${basePath}/__init__.py`);
|
|
1269
|
+
}
|
|
1270
|
+
for (const candidate of candidates) {
|
|
1271
|
+
if (allFiles.has(candidate)) {
|
|
1272
|
+
return candidate;
|
|
1273
|
+
}
|
|
1274
|
+
}
|
|
1275
|
+
return null;
|
|
1276
|
+
}
|
|
1277
|
+
/**
|
|
1278
|
+
* 解析 Python 绝对导入 (后缀模糊匹配 + 路径前缀歧义消解)
|
|
1279
|
+
* - from my.pkg import xxx -> 找到以 /my/pkg.py 或 /my/pkg/__init__.py 结尾的文件
|
|
1280
|
+
* - 如果有多个匹配,优先选择与当前文件路径前缀重叠最多的
|
|
1281
|
+
*/
|
|
1282
|
+
resolveAbsoluteImport(importStr, currentFile, allFiles) {
|
|
1283
|
+
const modulePath = importStr.replace(/\./g, "/");
|
|
1284
|
+
const suffixes = [`/${modulePath}.py`, `/${modulePath}/__init__.py`];
|
|
1285
|
+
const candidates = [];
|
|
1286
|
+
for (const filePath of allFiles) {
|
|
1287
|
+
for (const suffix of suffixes) {
|
|
1288
|
+
if (filePath.endsWith(suffix)) {
|
|
1289
|
+
const boundaryIndex = filePath.length - suffix.length;
|
|
1290
|
+
if (boundaryIndex <= 0 || filePath[boundaryIndex - 1] === "/") {
|
|
1291
|
+
candidates.push(filePath);
|
|
1292
|
+
break;
|
|
1293
|
+
}
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
}
|
|
1297
|
+
if (candidates.length === 0) return null;
|
|
1298
|
+
if (candidates.length === 1) return candidates[0];
|
|
1299
|
+
const currentDir = currentFile.split("/").slice(0, -1).join("/");
|
|
1300
|
+
candidates.sort((a, b) => {
|
|
1301
|
+
const overlapA = commonPrefixLength(a, currentDir);
|
|
1302
|
+
const overlapB = commonPrefixLength(b, currentDir);
|
|
1303
|
+
return overlapB - overlapA;
|
|
1304
|
+
});
|
|
1305
|
+
return candidates[0];
|
|
1306
|
+
}
|
|
1307
|
+
};
|
|
1308
|
+
|
|
1309
|
+
// src/search/resolvers/RustResolver.ts
|
|
1310
|
+
var RustResolver = class {
|
|
1311
|
+
supports(filePath) {
|
|
1312
|
+
return filePath.endsWith(".rs");
|
|
1313
|
+
}
|
|
1314
|
+
extract(content) {
|
|
1315
|
+
const imports = [];
|
|
1316
|
+
const modPattern = /^\s*(?:pub\s+)?mod\s+(\w+)\s*;/gm;
|
|
1317
|
+
for (const match of content.matchAll(modPattern)) {
|
|
1318
|
+
imports.push(`mod:${match[1]}`);
|
|
1319
|
+
}
|
|
1320
|
+
const usePattern = /^\s*(?:pub\s+)?use\s+((?:crate|super|self)(?:::\w+)+)/gm;
|
|
1321
|
+
for (const match of content.matchAll(usePattern)) {
|
|
1322
|
+
imports.push(`use:${match[1]}`);
|
|
1323
|
+
}
|
|
1324
|
+
return imports;
|
|
1325
|
+
}
|
|
1326
|
+
resolve(importStr, currentFile, allFiles) {
|
|
1327
|
+
const currentDir = currentFile.split("/").slice(0, -1).join("/");
|
|
1328
|
+
if (importStr.startsWith("mod:")) {
|
|
1329
|
+
const modName = importStr.slice(4);
|
|
1330
|
+
const candidates = [`${currentDir}/${modName}.rs`, `${currentDir}/${modName}/mod.rs`];
|
|
1331
|
+
for (const candidate of candidates) {
|
|
1332
|
+
if (allFiles.has(candidate)) {
|
|
1333
|
+
return candidate;
|
|
1334
|
+
}
|
|
1335
|
+
}
|
|
1336
|
+
return null;
|
|
1337
|
+
}
|
|
1338
|
+
if (importStr.startsWith("use:")) {
|
|
1339
|
+
const usePath = importStr.slice(4);
|
|
1340
|
+
const parts = usePath.split("::");
|
|
1341
|
+
let baseParts;
|
|
1342
|
+
let startIndex;
|
|
1343
|
+
if (parts[0] === "crate") {
|
|
1344
|
+
const srcIndex = currentFile.indexOf("/src/");
|
|
1345
|
+
if (srcIndex !== -1) {
|
|
1346
|
+
baseParts = currentFile.slice(0, srcIndex + 4).split("/");
|
|
1347
|
+
} else {
|
|
1348
|
+
baseParts = currentDir.split("/");
|
|
1349
|
+
}
|
|
1350
|
+
startIndex = 1;
|
|
1351
|
+
} else if (parts[0] === "super") {
|
|
1352
|
+
baseParts = currentDir.split("/").slice(0, -1);
|
|
1353
|
+
startIndex = 1;
|
|
1354
|
+
} else if (parts[0] === "self") {
|
|
1355
|
+
baseParts = currentDir.split("/");
|
|
1356
|
+
startIndex = 1;
|
|
1357
|
+
} else {
|
|
1358
|
+
return null;
|
|
1359
|
+
}
|
|
1360
|
+
const moduleParts = parts.slice(startIndex);
|
|
1361
|
+
const modulePath = [...baseParts, ...moduleParts].join("/");
|
|
1362
|
+
const candidates = [`${modulePath}.rs`, `${modulePath}/mod.rs`];
|
|
1363
|
+
for (const candidate of candidates) {
|
|
1364
|
+
if (allFiles.has(candidate)) {
|
|
1365
|
+
return candidate;
|
|
1366
|
+
}
|
|
1367
|
+
}
|
|
1368
|
+
return null;
|
|
1369
|
+
}
|
|
1370
|
+
return null;
|
|
1371
|
+
}
|
|
1372
|
+
};
|
|
1373
|
+
|
|
1374
|
+
// src/search/resolvers/index.ts
|
|
1375
|
+
function createResolvers() {
|
|
1376
|
+
return [
|
|
1377
|
+
new JsTsResolver(),
|
|
1378
|
+
new PythonResolver(),
|
|
1379
|
+
new GoResolver(),
|
|
1380
|
+
new JavaResolver(),
|
|
1381
|
+
new RustResolver(),
|
|
1382
|
+
new CppResolver(),
|
|
1383
|
+
new CSharpResolver()
|
|
1384
|
+
];
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
// src/search/GraphExpander.ts
|
|
1388
|
+
var GraphExpander = class {
|
|
1389
|
+
projectId;
|
|
1390
|
+
config;
|
|
1391
|
+
vectorStore = null;
|
|
1392
|
+
db = null;
|
|
1393
|
+
// 缓存所有文件路径 (用于快速查找和模糊匹配)
|
|
1394
|
+
allFilePaths = null;
|
|
1395
|
+
// 注册解析器(按优先级排列)
|
|
1396
|
+
resolvers = createResolvers();
|
|
1397
|
+
constructor(projectId, config) {
|
|
1398
|
+
this.projectId = projectId;
|
|
1399
|
+
this.config = config;
|
|
1400
|
+
}
|
|
1401
|
+
async init() {
|
|
1402
|
+
const embeddingConfig = getEmbeddingConfig();
|
|
1403
|
+
this.vectorStore = await getVectorStore(this.projectId, embeddingConfig.dimensions);
|
|
1404
|
+
this.db = initDb(this.projectId);
|
|
1405
|
+
}
|
|
1406
|
+
/**
|
|
1407
|
+
* 加载文件索引 (Lazy Load)
|
|
1408
|
+
* 相比反复查 DB,一次性加载所有路径到 Set 内存占用极低且速度极快
|
|
1409
|
+
*/
|
|
1410
|
+
loadFileIndex() {
|
|
1411
|
+
if (this.allFilePaths) return;
|
|
1412
|
+
if (!this.db) this.db = initDb(this.projectId);
|
|
1413
|
+
const rows = this.db.prepare("SELECT path FROM files").all();
|
|
1414
|
+
this.allFilePaths = new Set(rows.map((r) => r.path));
|
|
1415
|
+
logger.debug({ count: this.allFilePaths.size }, "GraphExpander: \u6587\u4EF6\u7D22\u5F15\u5DF2\u52A0\u8F7D");
|
|
1416
|
+
}
|
|
1417
|
+
/**
|
|
1418
|
+
* 使文件索引失效(用于增量索引后刷新)
|
|
1419
|
+
*/
|
|
1420
|
+
invalidateFileIndex() {
|
|
1421
|
+
this.allFilePaths = null;
|
|
1422
|
+
}
|
|
1423
|
+
/**
|
|
1424
|
+
* 扩展 seed chunks
|
|
1425
|
+
*/
|
|
1426
|
+
async expand(seeds, queryTokens) {
|
|
1427
|
+
if (!this.vectorStore || !this.db) {
|
|
1428
|
+
await this.init();
|
|
1429
|
+
}
|
|
1430
|
+
this.loadFileIndex();
|
|
1431
|
+
const stats = {
|
|
1432
|
+
neighborCount: 0,
|
|
1433
|
+
breadcrumbCount: 0,
|
|
1434
|
+
importCount: 0,
|
|
1435
|
+
importDepth1Count: 0
|
|
1436
|
+
};
|
|
1437
|
+
if (seeds.length === 0) {
|
|
1438
|
+
return { chunks: [], stats };
|
|
1439
|
+
}
|
|
1440
|
+
const existingKeys = new Set(seeds.map((s) => this.getChunkKey(s)));
|
|
1441
|
+
const expandedChunks = [];
|
|
1442
|
+
const seedsByFile = this.groupByFile(seeds);
|
|
1443
|
+
const neighborChunks = await this.expandNeighbors(seedsByFile, existingKeys);
|
|
1444
|
+
this.addChunks(neighborChunks, expandedChunks, existingKeys);
|
|
1445
|
+
stats.neighborCount = neighborChunks.length;
|
|
1446
|
+
const breadcrumbChunks = await this.expandBreadcrumb(seeds, existingKeys);
|
|
1447
|
+
this.addChunks(breadcrumbChunks, expandedChunks, existingKeys);
|
|
1448
|
+
stats.breadcrumbCount = breadcrumbChunks.length;
|
|
1449
|
+
const importChunks = await this.expandImports(seeds, existingKeys, queryTokens, stats);
|
|
1450
|
+
this.addChunks(importChunks, expandedChunks, existingKeys);
|
|
1451
|
+
stats.importCount = importChunks.length;
|
|
1452
|
+
logger.debug(stats, "\u4E0A\u4E0B\u6587\u6269\u5C55\u5B8C\u6210");
|
|
1453
|
+
return { chunks: expandedChunks, stats };
|
|
1454
|
+
}
|
|
1455
|
+
/**
|
|
1456
|
+
* 添加 chunks 并更新去重集合
|
|
1457
|
+
*/
|
|
1458
|
+
addChunks(newChunks, target, keys) {
|
|
1459
|
+
for (const chunk of newChunks) {
|
|
1460
|
+
const key = this.getChunkKey(chunk);
|
|
1461
|
+
if (!keys.has(key)) {
|
|
1462
|
+
keys.add(key);
|
|
1463
|
+
target.push(chunk);
|
|
1464
|
+
}
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1467
|
+
// =========================================
|
|
1468
|
+
// E1: 同文件邻居扩展
|
|
1469
|
+
// =========================================
|
|
1470
|
+
/**
|
|
1471
|
+
* 扩展同文件邻居
|
|
1472
|
+
*
|
|
1473
|
+
* 对于每个 seed,获取其前后 ±neighborHops 个 chunks
|
|
1474
|
+
*/
|
|
1475
|
+
async expandNeighbors(seedsByFile, existingKeys) {
|
|
1476
|
+
const result = [];
|
|
1477
|
+
const { neighborHops, decayNeighbor } = this.config;
|
|
1478
|
+
const allFilePaths = Array.from(seedsByFile.keys());
|
|
1479
|
+
const allChunksMap = await this.vectorStore?.getFilesChunks(allFilePaths);
|
|
1480
|
+
if (!allChunksMap) return result;
|
|
1481
|
+
for (const [filePath, fileSeeds] of seedsByFile) {
|
|
1482
|
+
const allChunks = allChunksMap.get(filePath) ?? [];
|
|
1483
|
+
if (allChunks.length === 0) continue;
|
|
1484
|
+
const sortedChunks = allChunks.sort((a, b) => a.chunk_index - b.chunk_index);
|
|
1485
|
+
const chunkMap = new Map(sortedChunks.map((c) => [c.chunk_index, c]));
|
|
1486
|
+
const seedIndices = new Set(fileSeeds.map((s) => s.chunkIndex));
|
|
1487
|
+
const neighborIndices = /* @__PURE__ */ new Set();
|
|
1488
|
+
for (const seed of fileSeeds) {
|
|
1489
|
+
const baseIndex = seed.chunkIndex;
|
|
1490
|
+
for (let delta = -neighborHops; delta <= neighborHops; delta++) {
|
|
1491
|
+
if (delta === 0) continue;
|
|
1492
|
+
const neighborIndex = baseIndex + delta;
|
|
1493
|
+
if (!seedIndices.has(neighborIndex) && chunkMap.has(neighborIndex)) {
|
|
1494
|
+
neighborIndices.add(neighborIndex);
|
|
1495
|
+
}
|
|
1496
|
+
}
|
|
1497
|
+
}
|
|
1498
|
+
for (const neighborIndex of neighborIndices) {
|
|
1499
|
+
const chunk = chunkMap.get(neighborIndex);
|
|
1500
|
+
if (!chunk) continue;
|
|
1501
|
+
const key = `${filePath}#${neighborIndex}`;
|
|
1502
|
+
if (existingKeys.has(key)) continue;
|
|
1503
|
+
let minDistance = Infinity;
|
|
1504
|
+
let maxSeedScore = 0;
|
|
1505
|
+
for (const seed of fileSeeds) {
|
|
1506
|
+
const distance = Math.abs(neighborIndex - seed.chunkIndex);
|
|
1507
|
+
if (distance < minDistance) {
|
|
1508
|
+
minDistance = distance;
|
|
1509
|
+
maxSeedScore = seed.score;
|
|
1510
|
+
} else if (distance === minDistance && seed.score > maxSeedScore) {
|
|
1511
|
+
maxSeedScore = seed.score;
|
|
1512
|
+
}
|
|
1513
|
+
}
|
|
1514
|
+
const decayedScore = maxSeedScore * decayNeighbor ** minDistance;
|
|
1515
|
+
result.push({
|
|
1516
|
+
filePath,
|
|
1517
|
+
chunkIndex: neighborIndex,
|
|
1518
|
+
score: decayedScore,
|
|
1519
|
+
source: "neighbor",
|
|
1520
|
+
record: { ...chunk, _distance: 0 }
|
|
1521
|
+
});
|
|
1522
|
+
}
|
|
1523
|
+
}
|
|
1524
|
+
return result;
|
|
1525
|
+
}
|
|
1526
|
+
// =========================================
|
|
1527
|
+
// E2: breadcrumb 补段
|
|
1528
|
+
// =========================================
|
|
1529
|
+
/**
|
|
1530
|
+
* 扩展 breadcrumb 补段
|
|
1531
|
+
*
|
|
1532
|
+
* 对于每个 seed,找到具有相同 breadcrumb 前缀的其他 chunks
|
|
1533
|
+
* 例如:如果 seed 的 breadcrumb 是 "src/foo.ts > class Foo > method bar"
|
|
1534
|
+
* 则会找到 "src/foo.ts > class Foo > ..." 的其他 chunks
|
|
1535
|
+
*/
|
|
1536
|
+
async expandBreadcrumb(seeds, existingKeys) {
|
|
1537
|
+
const result = [];
|
|
1538
|
+
const { breadcrumbExpandLimit, decayBreadcrumb } = this.config;
|
|
1539
|
+
const prefixGroups = /* @__PURE__ */ new Map();
|
|
1540
|
+
for (const seed of seeds) {
|
|
1541
|
+
const prefix = this.extractBreadcrumbPrefix(seed.record.breadcrumb);
|
|
1542
|
+
if (!prefix) continue;
|
|
1543
|
+
if (!prefixGroups.has(prefix)) {
|
|
1544
|
+
prefixGroups.set(prefix, []);
|
|
1545
|
+
}
|
|
1546
|
+
prefixGroups.get(prefix)?.push(seed);
|
|
1547
|
+
}
|
|
1548
|
+
const uniqueFilePaths = /* @__PURE__ */ new Set();
|
|
1549
|
+
for (const prefixSeeds of prefixGroups.values()) {
|
|
1550
|
+
uniqueFilePaths.add(prefixSeeds[0].filePath);
|
|
1551
|
+
}
|
|
1552
|
+
const allChunksMap = await this.vectorStore?.getFilesChunks(Array.from(uniqueFilePaths));
|
|
1553
|
+
if (!allChunksMap) return result;
|
|
1554
|
+
for (const [prefix, prefixSeeds] of prefixGroups) {
|
|
1555
|
+
const filePath = prefixSeeds[0].filePath;
|
|
1556
|
+
const allChunks = allChunksMap.get(filePath) ?? [];
|
|
1557
|
+
const chunkPrefixCache = /* @__PURE__ */ new Map();
|
|
1558
|
+
for (const chunk of allChunks) {
|
|
1559
|
+
chunkPrefixCache.set(chunk.chunk_index, this.extractBreadcrumbPrefix(chunk.breadcrumb));
|
|
1560
|
+
}
|
|
1561
|
+
const matchingChunks = allChunks.filter((chunk) => {
|
|
1562
|
+
return chunkPrefixCache.get(chunk.chunk_index) === prefix;
|
|
1563
|
+
});
|
|
1564
|
+
const seedIndices = new Set(prefixSeeds.map((s) => s.chunkIndex));
|
|
1565
|
+
const newChunks = matchingChunks.filter((chunk) => !seedIndices.has(chunk.chunk_index)).filter((chunk) => !existingKeys.has(`${filePath}#${chunk.chunk_index}`)).slice(0, breadcrumbExpandLimit);
|
|
1566
|
+
const maxSeedScore = Math.max(...prefixSeeds.map((s) => s.score));
|
|
1567
|
+
for (const chunk of newChunks) {
|
|
1568
|
+
result.push({
|
|
1569
|
+
filePath,
|
|
1570
|
+
chunkIndex: chunk.chunk_index,
|
|
1571
|
+
score: maxSeedScore * decayBreadcrumb,
|
|
1572
|
+
source: "breadcrumb",
|
|
1573
|
+
record: { ...chunk, _distance: 0 }
|
|
1574
|
+
});
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
return result;
|
|
1578
|
+
}
|
|
1579
|
+
/**
|
|
1580
|
+
* 提取 breadcrumb 的父级前缀
|
|
1581
|
+
*
|
|
1582
|
+
* 例如:
|
|
1583
|
+
* - "src/foo.ts > class Foo > method bar" → "src/foo.ts > class Foo"
|
|
1584
|
+
* - "src/foo.ts > function baz" → "src/foo.ts"
|
|
1585
|
+
* - "src/foo.ts" → null (没有父级)
|
|
1586
|
+
*/
|
|
1587
|
+
extractBreadcrumbPrefix(breadcrumb) {
|
|
1588
|
+
const parts = breadcrumb.split(" > ");
|
|
1589
|
+
if (parts.length <= 1) return null;
|
|
1590
|
+
return parts.slice(0, -1).join(" > ");
|
|
1591
|
+
}
|
|
1592
|
+
// =========================================
|
|
1593
|
+
// E3: 跨文件引用解析(多语言支持)
|
|
1594
|
+
// =========================================
|
|
1595
|
+
/**
|
|
1596
|
+
* 扩展 import 关系
|
|
1597
|
+
*
|
|
1598
|
+
* 解析 seed 文件中的 import 语句,获取被导入文件的 chunks
|
|
1599
|
+
* 支持多语言:TypeScript/JavaScript, Python, Go, Java, Rust
|
|
1600
|
+
*/
|
|
1601
|
+
async expandImports(seeds, existingKeys, queryTokens, stats) {
|
|
1602
|
+
const result = [];
|
|
1603
|
+
const { importFilesPerSeed, chunksPerImportFile, decayImport, decayDepth } = this.config;
|
|
1604
|
+
const seedScoreByFile = this.buildSeedScoreByFile(seeds);
|
|
1605
|
+
const queue = [];
|
|
1606
|
+
const visited = /* @__PURE__ */ new Set();
|
|
1607
|
+
for (const [filePath, seedScore] of seedScoreByFile.entries()) {
|
|
1608
|
+
queue.push({ filePath, depth: 0, seedScore });
|
|
1609
|
+
}
|
|
1610
|
+
const resolvedImports = [];
|
|
1611
|
+
const allTargetPaths = /* @__PURE__ */ new Set();
|
|
1612
|
+
while (queue.length > 0) {
|
|
1613
|
+
const item = queue.shift();
|
|
1614
|
+
if (!item) break;
|
|
1615
|
+
const { filePath, depth, seedScore } = item;
|
|
1616
|
+
if (visited.has(filePath)) continue;
|
|
1617
|
+
visited.add(filePath);
|
|
1618
|
+
if (depth > 0 && !this.isBarrelFile(filePath)) continue;
|
|
1619
|
+
const resolver = this.resolvers.find((r) => r.supports(filePath));
|
|
1620
|
+
if (!resolver) continue;
|
|
1621
|
+
const row = this.db?.prepare("SELECT content FROM files WHERE path = ?").get(filePath);
|
|
1622
|
+
if (!row?.content) continue;
|
|
1623
|
+
const importStrs = resolver.extract(row.content);
|
|
1624
|
+
if (importStrs.length === 0) continue;
|
|
1625
|
+
const perFileLimit = depth === 0 ? importFilesPerSeed : Math.min(importFilesPerSeed, 2);
|
|
1626
|
+
let importCount = 0;
|
|
1627
|
+
const processedImports = /* @__PURE__ */ new Set();
|
|
1628
|
+
for (const importStr of importStrs) {
|
|
1629
|
+
if (importCount >= perFileLimit) break;
|
|
1630
|
+
if (processedImports.has(importStr)) continue;
|
|
1631
|
+
processedImports.add(importStr);
|
|
1632
|
+
const targetPath = resolver.resolve(importStr, filePath, this.allFilePaths);
|
|
1633
|
+
if (!targetPath || targetPath === filePath) continue;
|
|
1634
|
+
allTargetPaths.add(targetPath);
|
|
1635
|
+
resolvedImports.push({ targetPath, depth, seedScore, sourceFilePath: filePath });
|
|
1636
|
+
importCount++;
|
|
1637
|
+
if (depth === 0 && this.isBarrelFile(targetPath)) {
|
|
1638
|
+
if (stats) stats.importDepth1Count++;
|
|
1639
|
+
queue.push({ filePath: targetPath, depth: 1, seedScore });
|
|
1640
|
+
}
|
|
1641
|
+
}
|
|
1642
|
+
}
|
|
1643
|
+
if (allTargetPaths.size === 0) return result;
|
|
1644
|
+
const importChunksMap = await this.vectorStore?.getFilesChunks(Array.from(allTargetPaths));
|
|
1645
|
+
if (!importChunksMap) return result;
|
|
1646
|
+
const bestByKey = /* @__PURE__ */ new Map();
|
|
1647
|
+
for (const { targetPath, depth, seedScore } of resolvedImports) {
|
|
1648
|
+
const importChunks = importChunksMap.get(targetPath);
|
|
1649
|
+
if (!importChunks || importChunks.length === 0) continue;
|
|
1650
|
+
const selectedChunks = this.selectImportChunks(
|
|
1651
|
+
importChunks,
|
|
1652
|
+
chunksPerImportFile,
|
|
1653
|
+
queryTokens
|
|
1654
|
+
);
|
|
1655
|
+
const depthDecay = depth === 0 ? 1 : decayDepth;
|
|
1656
|
+
for (const chunk of selectedChunks) {
|
|
1657
|
+
const key = `${targetPath}#${chunk.chunk_index}`;
|
|
1658
|
+
if (existingKeys.has(key)) continue;
|
|
1659
|
+
const score = seedScore * decayImport * depthDecay;
|
|
1660
|
+
const existing = bestByKey.get(key);
|
|
1661
|
+
if (!existing || score > existing.score) {
|
|
1662
|
+
bestByKey.set(key, {
|
|
1663
|
+
filePath: targetPath,
|
|
1664
|
+
chunkIndex: chunk.chunk_index,
|
|
1665
|
+
score,
|
|
1666
|
+
source: "import",
|
|
1667
|
+
record: { ...chunk, _distance: 0 }
|
|
1668
|
+
});
|
|
1669
|
+
}
|
|
1670
|
+
}
|
|
1671
|
+
}
|
|
1672
|
+
return Array.from(bestByKey.values());
|
|
1673
|
+
}
|
|
1674
|
+
// =========================================
|
|
1675
|
+
// 工具方法
|
|
1676
|
+
// =========================================
|
|
1677
|
+
/**
|
|
1678
|
+
* 生成 chunk 唯一键
|
|
1679
|
+
*/
|
|
1680
|
+
getChunkKey(chunk) {
|
|
1681
|
+
return `${chunk.filePath}#${chunk.chunkIndex}`;
|
|
1682
|
+
}
|
|
1683
|
+
/**
|
|
1684
|
+
* 按文件分组
|
|
1685
|
+
*/
|
|
1686
|
+
groupByFile(chunks) {
|
|
1687
|
+
const groups = /* @__PURE__ */ new Map();
|
|
1688
|
+
for (const chunk of chunks) {
|
|
1689
|
+
if (!groups.has(chunk.filePath)) {
|
|
1690
|
+
groups.set(chunk.filePath, []);
|
|
1691
|
+
}
|
|
1692
|
+
groups.get(chunk.filePath)?.push(chunk);
|
|
1693
|
+
}
|
|
1694
|
+
return groups;
|
|
1695
|
+
}
|
|
1696
|
+
/**
|
|
1697
|
+
* 按文件汇总 seed 最大得分
|
|
1698
|
+
*/
|
|
1699
|
+
buildSeedScoreByFile(seeds) {
|
|
1700
|
+
const map = /* @__PURE__ */ new Map();
|
|
1701
|
+
for (const seed of seeds) {
|
|
1702
|
+
const current = map.get(seed.filePath);
|
|
1703
|
+
if (current === void 0 || seed.score > current) {
|
|
1704
|
+
map.set(seed.filePath, seed.score);
|
|
1705
|
+
}
|
|
1706
|
+
}
|
|
1707
|
+
return map;
|
|
1708
|
+
}
|
|
1709
|
+
/**
|
|
1710
|
+
* 选择导入文件的 chunks(优先 query overlap)
|
|
1711
|
+
*/
|
|
1712
|
+
selectImportChunks(chunks, limit, queryTokens) {
|
|
1713
|
+
if (limit <= 0) return [];
|
|
1714
|
+
const sortedByIndex = chunks.slice().sort((a, b) => a.chunk_index - b.chunk_index);
|
|
1715
|
+
if (!queryTokens || queryTokens.size === 0) {
|
|
1716
|
+
return sortedByIndex.slice(0, limit);
|
|
1717
|
+
}
|
|
1718
|
+
const scored = sortedByIndex.map((chunk) => ({
|
|
1719
|
+
chunk,
|
|
1720
|
+
score: scoreChunkTokenOverlap(chunk, queryTokens)
|
|
1721
|
+
}));
|
|
1722
|
+
const overlapped = scored.filter((s) => s.score > 0).sort((a, b) => b.score - a.score).slice(0, limit).map((s) => s.chunk);
|
|
1723
|
+
return overlapped.length > 0 ? overlapped : sortedByIndex.slice(0, limit);
|
|
1724
|
+
}
|
|
1725
|
+
/**
|
|
1726
|
+
* 判断是否为 barrel/index 文件
|
|
1727
|
+
*/
|
|
1728
|
+
isBarrelFile(filePath) {
|
|
1729
|
+
const lower = filePath.toLowerCase();
|
|
1730
|
+
if (lower.endsWith("/__init__.py")) return true;
|
|
1731
|
+
if (lower.endsWith("/mod.rs")) return true;
|
|
1732
|
+
return /\/index\.(ts|tsx|js|jsx|mts|mjs|cts|cjs)$/.test(lower);
|
|
1733
|
+
}
|
|
1734
|
+
};
|
|
1735
|
+
var expanders = /* @__PURE__ */ new Map();
|
|
1736
|
+
function invalidateAllExpanderCaches() {
|
|
1737
|
+
for (const expander of expanders.values()) {
|
|
1738
|
+
expander.invalidateFileIndex();
|
|
1739
|
+
}
|
|
1740
|
+
}
|
|
1741
|
+
async function getGraphExpander(projectId, config) {
|
|
1742
|
+
let expander = expanders.get(projectId);
|
|
1743
|
+
if (!expander) {
|
|
1744
|
+
expander = new GraphExpander(projectId, config);
|
|
1745
|
+
await expander.init();
|
|
1746
|
+
expanders.set(projectId, expander);
|
|
1747
|
+
}
|
|
1748
|
+
return expander;
|
|
1749
|
+
}
|
|
1750
|
+
|
|
1751
|
+
export {
|
|
1752
|
+
getVectorStore,
|
|
1753
|
+
closeAllVectorStores,
|
|
1754
|
+
getIndexer,
|
|
1755
|
+
closeAllIndexers,
|
|
1756
|
+
scoreChunkTokenOverlap,
|
|
1757
|
+
invalidateAllExpanderCaches,
|
|
1758
|
+
getGraphExpander
|
|
1759
|
+
};
|
|
1760
|
+
//# sourceMappingURL=chunk-6QMYML5V.js.map
|