@huyooo/ai-search 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +431 -0
- package/dist/bridge/electron.d.ts +51 -0
- package/dist/bridge/electron.js +10 -0
- package/dist/bridge/electron.js.map +1 -0
- package/dist/chunk-GAT4F5NK.js +176 -0
- package/dist/chunk-GAT4F5NK.js.map +1 -0
- package/dist/chunk-YJIIX54F.js +4239 -0
- package/dist/chunk-YJIIX54F.js.map +1 -0
- package/dist/index-B6UR8lRu.d.ts +576 -0
- package/dist/index.d.ts +508 -0
- package/dist/index.js +99 -0
- package/dist/index.js.map +1 -0
- package/dist/tools/index.d.ts +2 -0
- package/dist/tools/index.js +9 -0
- package/dist/tools/index.js.map +1 -0
- package/package.json +89 -0
|
@@ -0,0 +1,4239 @@
|
|
|
1
|
+
// src/core/search.ts
|
|
2
|
+
import * as fs7 from "fs/promises";
|
|
3
|
+
import * as path10 from "path";
|
|
4
|
+
|
|
5
|
+
// src/types.ts
|
|
6
|
+
var FileType = {
|
|
7
|
+
FOLDER: "folder",
|
|
8
|
+
FILE: "file",
|
|
9
|
+
IMAGE: "image",
|
|
10
|
+
VIDEO: "video",
|
|
11
|
+
MUSIC: "music",
|
|
12
|
+
DOCUMENT: "document",
|
|
13
|
+
CODE: "code",
|
|
14
|
+
TEXT: "text",
|
|
15
|
+
PDF: "pdf",
|
|
16
|
+
ARCHIVE: "archive",
|
|
17
|
+
APPLICATION: "application",
|
|
18
|
+
UNKNOWN: "unknown"
|
|
19
|
+
};
|
|
20
|
+
var DEFAULT_CONFIG = {
|
|
21
|
+
// 排除目录(这些会被合并到规则管理器的 excludedPaths 中)
|
|
22
|
+
excludeDirs: [
|
|
23
|
+
"node_modules",
|
|
24
|
+
".git",
|
|
25
|
+
".svn",
|
|
26
|
+
"__pycache__",
|
|
27
|
+
".cache",
|
|
28
|
+
".Trash",
|
|
29
|
+
"Library",
|
|
30
|
+
"AppData",
|
|
31
|
+
"$RECYCLE.BIN"
|
|
32
|
+
],
|
|
33
|
+
// 支持的扩展名(这些会被合并到规则管理器的 allowed extensions 中)
|
|
34
|
+
extensions: [
|
|
35
|
+
".docx",
|
|
36
|
+
".doc",
|
|
37
|
+
".pdf",
|
|
38
|
+
".xlsx",
|
|
39
|
+
".xls",
|
|
40
|
+
".pptx",
|
|
41
|
+
".ppt",
|
|
42
|
+
".txt",
|
|
43
|
+
".md",
|
|
44
|
+
".rtf"
|
|
45
|
+
],
|
|
46
|
+
maxFileSize: 50 * 1024 * 1024,
|
|
47
|
+
// 50MB
|
|
48
|
+
embeddingModel: "doubao-embedding-vision-250615",
|
|
49
|
+
embeddingDimension: 1024,
|
|
50
|
+
// 向量维度
|
|
51
|
+
indexConcurrency: 5,
|
|
52
|
+
// 并行索引并发数
|
|
53
|
+
enableParallelIndexing: true
|
|
54
|
+
// 默认启用并行索引
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
// src/storage/vector.ts
|
|
58
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
59
|
+
var VectorStore = class {
|
|
60
|
+
db = null;
|
|
61
|
+
table = null;
|
|
62
|
+
dbPath;
|
|
63
|
+
tableName;
|
|
64
|
+
dimension;
|
|
65
|
+
constructor(dbPath, tableName = "documents", dimension = 384) {
|
|
66
|
+
this.dbPath = dbPath;
|
|
67
|
+
this.tableName = tableName;
|
|
68
|
+
this.dimension = dimension;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* 初始化连接
|
|
72
|
+
*/
|
|
73
|
+
async init() {
|
|
74
|
+
this.db = await lancedb.connect(this.dbPath);
|
|
75
|
+
try {
|
|
76
|
+
this.table = await this.db.openTable(this.tableName);
|
|
77
|
+
} catch {
|
|
78
|
+
const initData = [{ id: "__init__", vector: new Array(this.dimension).fill(0) }];
|
|
79
|
+
this.table = await this.db.createTable(this.tableName, initData);
|
|
80
|
+
await this.table.delete('id = "__init__"');
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* 添加向量
|
|
85
|
+
*/
|
|
86
|
+
async add(records) {
|
|
87
|
+
if (!this.table) {
|
|
88
|
+
throw new Error("VectorStore not initialized");
|
|
89
|
+
}
|
|
90
|
+
if (records.length === 0) return;
|
|
91
|
+
await this.table.add(records);
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* 更新向量(删除后重新添加)
|
|
95
|
+
*/
|
|
96
|
+
async update(record) {
|
|
97
|
+
if (!this.table) {
|
|
98
|
+
throw new Error("VectorStore not initialized");
|
|
99
|
+
}
|
|
100
|
+
await this.delete(record.id);
|
|
101
|
+
await this.add([record]);
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* 根据 ID 获取向量
|
|
105
|
+
*/
|
|
106
|
+
async getById(id) {
|
|
107
|
+
if (!this.table) {
|
|
108
|
+
throw new Error("VectorStore not initialized");
|
|
109
|
+
}
|
|
110
|
+
try {
|
|
111
|
+
const results = await this.table.search().where(`id = "${id}"`).limit(1).toArray();
|
|
112
|
+
if (results.length === 0) {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
return results[0].vector;
|
|
116
|
+
} catch {
|
|
117
|
+
return null;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* 删除向量
|
|
122
|
+
*/
|
|
123
|
+
async delete(id) {
|
|
124
|
+
if (!this.table) {
|
|
125
|
+
throw new Error("VectorStore not initialized");
|
|
126
|
+
}
|
|
127
|
+
await this.table.delete(`id = "${id}"`);
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* 搜索最相似的向量
|
|
131
|
+
*/
|
|
132
|
+
async search(queryVector, limit = 10) {
|
|
133
|
+
if (!this.table) {
|
|
134
|
+
throw new Error("VectorStore not initialized");
|
|
135
|
+
}
|
|
136
|
+
const results = await this.table.vectorSearch(queryVector).limit(limit).toArray();
|
|
137
|
+
return results.map((r) => ({
|
|
138
|
+
id: r.id,
|
|
139
|
+
distance: r._distance
|
|
140
|
+
}));
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* 获取记录数量
|
|
144
|
+
*/
|
|
145
|
+
async count() {
|
|
146
|
+
if (!this.table) {
|
|
147
|
+
throw new Error("VectorStore not initialized");
|
|
148
|
+
}
|
|
149
|
+
return await this.table.countRows();
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* 检查 ID 是否存在
|
|
153
|
+
*/
|
|
154
|
+
async exists(id) {
|
|
155
|
+
if (!this.table) {
|
|
156
|
+
throw new Error("VectorStore not initialized");
|
|
157
|
+
}
|
|
158
|
+
const results = await this.table.search([]).where(`id = "${id}"`).limit(1).toArray();
|
|
159
|
+
return results.length > 0;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* 关闭连接(清理资源)
|
|
163
|
+
* 注意:LanceDB 连接会在对象销毁时自动清理,但可以显式重置引用
|
|
164
|
+
*/
|
|
165
|
+
close() {
|
|
166
|
+
this.table = null;
|
|
167
|
+
this.db = null;
|
|
168
|
+
}
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
// src/storage/fulltext.ts
|
|
172
|
+
import FlexSearch from "flexsearch";
|
|
173
|
+
import nodejieba from "nodejieba";
|
|
174
|
+
import * as path from "path";
|
|
175
|
+
var FullTextIndex = class {
|
|
176
|
+
index;
|
|
177
|
+
indexPath;
|
|
178
|
+
docCount = 0;
|
|
179
|
+
constructor(dataDir) {
|
|
180
|
+
this.indexPath = path.join(dataDir, "fulltext-index.json");
|
|
181
|
+
this.index = this.createIndex();
|
|
182
|
+
}
|
|
183
|
+
createIndex() {
|
|
184
|
+
return new FlexSearch.Document({
|
|
185
|
+
document: {
|
|
186
|
+
id: "id",
|
|
187
|
+
index: ["title", "content"],
|
|
188
|
+
store: true
|
|
189
|
+
},
|
|
190
|
+
encode: (str) => {
|
|
191
|
+
const tokens = nodejieba.cut(str);
|
|
192
|
+
return tokens.filter((t) => t.trim().length > 0 && !/^[\s\p{P}]+$/u.test(t));
|
|
193
|
+
},
|
|
194
|
+
cache: 100
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* 初始化
|
|
199
|
+
*/
|
|
200
|
+
async init() {
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* 添加文档
|
|
204
|
+
*/
|
|
205
|
+
add(record) {
|
|
206
|
+
this.index.add(record);
|
|
207
|
+
this.docCount++;
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* 更新文档
|
|
211
|
+
*/
|
|
212
|
+
update(record) {
|
|
213
|
+
this.remove(record.id);
|
|
214
|
+
this.add(record);
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* 删除文档
|
|
218
|
+
*/
|
|
219
|
+
remove(id) {
|
|
220
|
+
this.index.remove(id);
|
|
221
|
+
this.docCount = Math.max(0, this.docCount - 1);
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* 搜索
|
|
225
|
+
*/
|
|
226
|
+
search(query, limit = 10) {
|
|
227
|
+
if (!query.trim()) {
|
|
228
|
+
return [];
|
|
229
|
+
}
|
|
230
|
+
const results = this.index.search(query, {
|
|
231
|
+
limit: limit * 2,
|
|
232
|
+
index: ["title", "content"],
|
|
233
|
+
enrich: true
|
|
234
|
+
});
|
|
235
|
+
const idScores = /* @__PURE__ */ new Map();
|
|
236
|
+
for (const fieldResult of results) {
|
|
237
|
+
const field = fieldResult.field;
|
|
238
|
+
const weight = field === "title" ? 2 : 1;
|
|
239
|
+
if (Array.isArray(fieldResult.result)) {
|
|
240
|
+
for (let rank = 0; rank < fieldResult.result.length; rank++) {
|
|
241
|
+
const item = fieldResult.result[rank];
|
|
242
|
+
const id = typeof item === "object" && item !== null ? item.id ?? item : item;
|
|
243
|
+
if (typeof id === "string") {
|
|
244
|
+
const score = weight / (rank + 1);
|
|
245
|
+
idScores.set(id, (idScores.get(id) || 0) + score);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
const maxScore = Math.max(...idScores.values(), 1);
|
|
251
|
+
return Array.from(idScores.entries()).sort((a, b) => b[1] - a[1]).slice(0, limit).map(([id, score]) => ({
|
|
252
|
+
id,
|
|
253
|
+
score: score / maxScore
|
|
254
|
+
// 标准化到 0-1
|
|
255
|
+
}));
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* 获取文档数量
|
|
259
|
+
*/
|
|
260
|
+
getDocCount() {
|
|
261
|
+
return this.docCount;
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* 根据 ID 获取文档内容
|
|
265
|
+
*/
|
|
266
|
+
getContent(id) {
|
|
267
|
+
try {
|
|
268
|
+
const doc = this.index.get(id);
|
|
269
|
+
if (doc && typeof doc === "object") {
|
|
270
|
+
return doc.content || null;
|
|
271
|
+
}
|
|
272
|
+
return null;
|
|
273
|
+
} catch {
|
|
274
|
+
return null;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* 保存索引
|
|
279
|
+
*/
|
|
280
|
+
async save() {
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* 加载索引
|
|
284
|
+
*/
|
|
285
|
+
async load() {
|
|
286
|
+
}
|
|
287
|
+
/**
|
|
288
|
+
* 清空索引
|
|
289
|
+
*/
|
|
290
|
+
clear() {
|
|
291
|
+
this.index = this.createIndex();
|
|
292
|
+
this.docCount = 0;
|
|
293
|
+
}
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
// src/storage/meta.ts
|
|
297
|
+
import Database from "better-sqlite3";
|
|
298
|
+
import * as path2 from "path";
|
|
299
|
+
import * as fs from "fs";
|
|
300
|
+
var MetaStore = class {
|
|
301
|
+
db;
|
|
302
|
+
dbPath;
|
|
303
|
+
constructor(dataDir) {
|
|
304
|
+
this.dbPath = path2.join(dataDir, "meta.db");
|
|
305
|
+
fs.mkdirSync(dataDir, { recursive: true });
|
|
306
|
+
this.db = new Database(this.dbPath);
|
|
307
|
+
this.init();
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* 初始化数据库表
|
|
311
|
+
*/
|
|
312
|
+
init() {
|
|
313
|
+
this.db.exec(`
|
|
314
|
+
CREATE TABLE IF NOT EXISTS documents (
|
|
315
|
+
id TEXT PRIMARY KEY,
|
|
316
|
+
path TEXT UNIQUE NOT NULL,
|
|
317
|
+
name TEXT NOT NULL,
|
|
318
|
+
file_type TEXT NOT NULL,
|
|
319
|
+
extension TEXT NOT NULL,
|
|
320
|
+
title TEXT,
|
|
321
|
+
content TEXT,
|
|
322
|
+
file_size INTEGER NOT NULL,
|
|
323
|
+
created_at TEXT NOT NULL,
|
|
324
|
+
modified_at TEXT NOT NULL,
|
|
325
|
+
indexed_at TEXT NOT NULL,
|
|
326
|
+
content_hash TEXT NOT NULL
|
|
327
|
+
);
|
|
328
|
+
|
|
329
|
+
CREATE INDEX IF NOT EXISTS idx_documents_path ON documents(path);
|
|
330
|
+
CREATE INDEX IF NOT EXISTS idx_documents_file_type ON documents(file_type);
|
|
331
|
+
CREATE INDEX IF NOT EXISTS idx_documents_modified_at ON documents(modified_at);
|
|
332
|
+
CREATE INDEX IF NOT EXISTS idx_documents_content_hash ON documents(content_hash);
|
|
333
|
+
`);
|
|
334
|
+
}
|
|
335
|
+
/**
|
|
336
|
+
* 添加或更新文档
|
|
337
|
+
*/
|
|
338
|
+
upsert(doc) {
|
|
339
|
+
const stmt = this.db.prepare(`
|
|
340
|
+
INSERT OR REPLACE INTO documents
|
|
341
|
+
(id, path, name, file_type, extension, title, content, file_size, created_at, modified_at, indexed_at, content_hash)
|
|
342
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
343
|
+
`);
|
|
344
|
+
stmt.run(
|
|
345
|
+
doc.id,
|
|
346
|
+
doc.path,
|
|
347
|
+
doc.name,
|
|
348
|
+
doc.fileType,
|
|
349
|
+
doc.extension,
|
|
350
|
+
doc.title || null,
|
|
351
|
+
doc.content,
|
|
352
|
+
doc.fileSize,
|
|
353
|
+
doc.createdAt.toISOString(),
|
|
354
|
+
doc.modifiedAt.toISOString(),
|
|
355
|
+
doc.indexedAt.toISOString(),
|
|
356
|
+
doc.contentHash
|
|
357
|
+
);
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* 根据 ID 获取文档
|
|
361
|
+
*/
|
|
362
|
+
getById(id) {
|
|
363
|
+
const stmt = this.db.prepare("SELECT * FROM documents WHERE id = ?");
|
|
364
|
+
const row = stmt.get(id);
|
|
365
|
+
if (!row) return null;
|
|
366
|
+
return this.rowToDocument(row);
|
|
367
|
+
}
|
|
368
|
+
/**
|
|
369
|
+
* 根据路径获取文档
|
|
370
|
+
*/
|
|
371
|
+
getByPath(filePath) {
|
|
372
|
+
const stmt = this.db.prepare("SELECT * FROM documents WHERE path = ?");
|
|
373
|
+
const row = stmt.get(filePath);
|
|
374
|
+
if (!row) return null;
|
|
375
|
+
return this.rowToDocument(row);
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* 根据路径和修改时间获取文档(更精确的唯一性检查)
|
|
379
|
+
*/
|
|
380
|
+
getByPathAndTime(filePath, modifiedTime) {
|
|
381
|
+
const stmt = this.db.prepare(
|
|
382
|
+
"SELECT * FROM documents WHERE path = ? AND modified_at = ?"
|
|
383
|
+
);
|
|
384
|
+
const row = stmt.get(filePath, modifiedTime.toISOString());
|
|
385
|
+
if (!row) return null;
|
|
386
|
+
return this.rowToDocument(row);
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* 根据内容哈希获取文档(用于检测重复内容)
|
|
390
|
+
*/
|
|
391
|
+
getByHash(contentHash) {
|
|
392
|
+
const stmt = this.db.prepare("SELECT * FROM documents WHERE content_hash = ? LIMIT 1");
|
|
393
|
+
const row = stmt.get(contentHash);
|
|
394
|
+
if (!row) return null;
|
|
395
|
+
return this.rowToDocument(row);
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* 根据内容哈希获取所有相同内容的文档路径
|
|
399
|
+
*/
|
|
400
|
+
getPathsByHash(contentHash) {
|
|
401
|
+
const stmt = this.db.prepare("SELECT path FROM documents WHERE content_hash = ?");
|
|
402
|
+
const rows = stmt.all(contentHash);
|
|
403
|
+
return rows.map((row) => row.path);
|
|
404
|
+
}
|
|
405
|
+
/**
|
|
406
|
+
* 根据 ID 列表获取文档
|
|
407
|
+
*/
|
|
408
|
+
getByIds(ids) {
|
|
409
|
+
if (ids.length === 0) return [];
|
|
410
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
411
|
+
const stmt = this.db.prepare(`SELECT * FROM documents WHERE id IN (${placeholders})`);
|
|
412
|
+
const rows = stmt.all(...ids);
|
|
413
|
+
return rows.map((row) => this.rowToDocument(row));
|
|
414
|
+
}
|
|
415
|
+
/**
|
|
416
|
+
* 删除文档
|
|
417
|
+
*/
|
|
418
|
+
delete(id) {
|
|
419
|
+
const stmt = this.db.prepare("DELETE FROM documents WHERE id = ?");
|
|
420
|
+
stmt.run(id);
|
|
421
|
+
}
|
|
422
|
+
/**
|
|
423
|
+
* 根据路径删除文档
|
|
424
|
+
*/
|
|
425
|
+
deleteByPath(filePath) {
|
|
426
|
+
const stmt = this.db.prepare("DELETE FROM documents WHERE path = ?");
|
|
427
|
+
stmt.run(filePath);
|
|
428
|
+
}
|
|
429
|
+
/**
|
|
430
|
+
* 获取所有文档
|
|
431
|
+
*/
|
|
432
|
+
getAll() {
|
|
433
|
+
const stmt = this.db.prepare("SELECT * FROM documents");
|
|
434
|
+
const rows = stmt.all();
|
|
435
|
+
return rows.map((row) => this.rowToDocument(row));
|
|
436
|
+
}
|
|
437
|
+
/**
|
|
438
|
+
* 获取所有文档路径和哈希(用于增量更新)
|
|
439
|
+
*/
|
|
440
|
+
getAllPathsAndHashes() {
|
|
441
|
+
const stmt = this.db.prepare("SELECT id, path, content_hash, modified_at FROM documents");
|
|
442
|
+
const rows = stmt.all();
|
|
443
|
+
const result = /* @__PURE__ */ new Map();
|
|
444
|
+
for (const row of rows) {
|
|
445
|
+
result.set(row.path, {
|
|
446
|
+
id: row.id,
|
|
447
|
+
hash: row.content_hash,
|
|
448
|
+
modifiedAt: new Date(row.modified_at)
|
|
449
|
+
});
|
|
450
|
+
}
|
|
451
|
+
return result;
|
|
452
|
+
}
|
|
453
|
+
/**
|
|
454
|
+
* 获取统计信息
|
|
455
|
+
*/
|
|
456
|
+
getStats() {
|
|
457
|
+
const totalStmt = this.db.prepare("SELECT COUNT(*) as count FROM documents");
|
|
458
|
+
const total = totalStmt.get().count;
|
|
459
|
+
const byTypeStmt = this.db.prepare(
|
|
460
|
+
"SELECT file_type, COUNT(*) as count FROM documents GROUP BY file_type"
|
|
461
|
+
);
|
|
462
|
+
const byTypeRows = byTypeStmt.all();
|
|
463
|
+
const byType = {};
|
|
464
|
+
for (const row of byTypeRows) {
|
|
465
|
+
byType[row.file_type] = row.count;
|
|
466
|
+
}
|
|
467
|
+
const lastUpdatedStmt = this.db.prepare(
|
|
468
|
+
"SELECT MAX(indexed_at) as last FROM documents"
|
|
469
|
+
);
|
|
470
|
+
const lastRow = lastUpdatedStmt.get();
|
|
471
|
+
const lastUpdated = lastRow.last ? new Date(lastRow.last) : void 0;
|
|
472
|
+
let indexSize = 0;
|
|
473
|
+
try {
|
|
474
|
+
const stat6 = fs.statSync(this.dbPath);
|
|
475
|
+
indexSize = stat6.size;
|
|
476
|
+
} catch {
|
|
477
|
+
}
|
|
478
|
+
return {
|
|
479
|
+
totalDocuments: total,
|
|
480
|
+
byType,
|
|
481
|
+
directories: [],
|
|
482
|
+
// 这个需要从配置获取
|
|
483
|
+
lastUpdated,
|
|
484
|
+
indexSize
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
/**
|
|
488
|
+
* 清空所有数据
|
|
489
|
+
*/
|
|
490
|
+
clear() {
|
|
491
|
+
this.db.exec("DELETE FROM documents");
|
|
492
|
+
}
|
|
493
|
+
/**
|
|
494
|
+
* 关闭数据库连接
|
|
495
|
+
*/
|
|
496
|
+
close() {
|
|
497
|
+
this.db.close();
|
|
498
|
+
}
|
|
499
|
+
/**
|
|
500
|
+
* 数据行转换为文档对象
|
|
501
|
+
*/
|
|
502
|
+
rowToDocument(row) {
|
|
503
|
+
return {
|
|
504
|
+
id: row.id,
|
|
505
|
+
path: row.path,
|
|
506
|
+
name: row.name,
|
|
507
|
+
fileType: row.file_type,
|
|
508
|
+
extension: row.extension,
|
|
509
|
+
title: row.title,
|
|
510
|
+
content: row.content,
|
|
511
|
+
fileSize: row.file_size,
|
|
512
|
+
createdAt: new Date(row.created_at),
|
|
513
|
+
modifiedAt: new Date(row.modified_at),
|
|
514
|
+
indexedAt: new Date(row.indexed_at),
|
|
515
|
+
contentHash: row.content_hash
|
|
516
|
+
};
|
|
517
|
+
}
|
|
518
|
+
};
|
|
519
|
+
|
|
520
|
+
// src/parsers/index.ts
|
|
521
|
+
import mammoth from "mammoth";
|
|
522
|
+
import pdfParse from "pdf-parse";
|
|
523
|
+
import xlsx from "xlsx";
|
|
524
|
+
import JSZip from "jszip";
|
|
525
|
+
import { parseString } from "xml2js";
|
|
526
|
+
import * as fs2 from "fs/promises";
|
|
527
|
+
import * as path3 from "path";
|
|
528
|
+
async function parseWord(filePath) {
|
|
529
|
+
const ext = path3.extname(filePath).toLowerCase();
|
|
530
|
+
if (ext === ".doc") {
|
|
531
|
+
const title = path3.basename(filePath, ext);
|
|
532
|
+
return {
|
|
533
|
+
content: "",
|
|
534
|
+
title,
|
|
535
|
+
metadata: { unsupported: true, reason: "\u65E7\u7248 .doc \u683C\u5F0F\u6682\u4E0D\u652F\u6301\uFF0C\u8BF7\u8F6C\u6362\u4E3A .docx" }
|
|
536
|
+
};
|
|
537
|
+
}
|
|
538
|
+
try {
|
|
539
|
+
const buffer = await fs2.readFile(filePath);
|
|
540
|
+
const result = await mammoth.extractRawText({ buffer });
|
|
541
|
+
const content = result.value.trim();
|
|
542
|
+
const lines = content.split("\n").filter((l) => l.trim());
|
|
543
|
+
const title = lines[0]?.slice(0, 100);
|
|
544
|
+
return { content, title };
|
|
545
|
+
} catch (error) {
|
|
546
|
+
const title = path3.basename(filePath, ext);
|
|
547
|
+
return {
|
|
548
|
+
content: "",
|
|
549
|
+
title,
|
|
550
|
+
metadata: {
|
|
551
|
+
error: true,
|
|
552
|
+
reason: error instanceof Error ? error.message : String(error)
|
|
553
|
+
}
|
|
554
|
+
};
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
async function parsePDF(filePath) {
|
|
558
|
+
const pathLower = filePath.toLowerCase();
|
|
559
|
+
if (pathLower.includes(".xcassets") || pathLower.includes(".imageset") || pathLower.includes(".appiconset")) {
|
|
560
|
+
return {
|
|
561
|
+
content: "",
|
|
562
|
+
title: path3.basename(filePath, ".pdf"),
|
|
563
|
+
metadata: { skipped: true, reason: "\u8D44\u6E90\u6587\u4EF6\uFF0C\u8DF3\u8FC7\u89E3\u6790" }
|
|
564
|
+
};
|
|
565
|
+
}
|
|
566
|
+
try {
|
|
567
|
+
const originalWarn = console.warn;
|
|
568
|
+
const warnings = [];
|
|
569
|
+
console.warn = (...args) => {
|
|
570
|
+
const msg = args.join(" ");
|
|
571
|
+
if (msg.includes("Ignoring invalid character") || msg.includes("Warning:") && msg.includes("hex string")) {
|
|
572
|
+
return;
|
|
573
|
+
}
|
|
574
|
+
originalWarn.apply(console, args);
|
|
575
|
+
};
|
|
576
|
+
try {
|
|
577
|
+
const buffer = await fs2.readFile(filePath);
|
|
578
|
+
const data = await pdfParse(buffer);
|
|
579
|
+
const content = data.text.trim();
|
|
580
|
+
console.warn = originalWarn;
|
|
581
|
+
if (!content) {
|
|
582
|
+
return {
|
|
583
|
+
content: "",
|
|
584
|
+
title: path3.basename(filePath, ".pdf"),
|
|
585
|
+
metadata: { empty: true }
|
|
586
|
+
};
|
|
587
|
+
}
|
|
588
|
+
const title = data.info?.Title || content.split("\n")[0]?.slice(0, 100);
|
|
589
|
+
return {
|
|
590
|
+
content,
|
|
591
|
+
title,
|
|
592
|
+
metadata: {
|
|
593
|
+
pages: data.numpages,
|
|
594
|
+
info: data.info
|
|
595
|
+
}
|
|
596
|
+
};
|
|
597
|
+
} finally {
|
|
598
|
+
console.warn = originalWarn;
|
|
599
|
+
}
|
|
600
|
+
} catch (error) {
|
|
601
|
+
const title = path3.basename(filePath, ".pdf");
|
|
602
|
+
return {
|
|
603
|
+
content: "",
|
|
604
|
+
title,
|
|
605
|
+
metadata: {
|
|
606
|
+
error: true,
|
|
607
|
+
reason: error instanceof Error ? error.message : String(error)
|
|
608
|
+
}
|
|
609
|
+
};
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
async function parseExcel(filePath) {
|
|
613
|
+
try {
|
|
614
|
+
const workbook = xlsx.readFile(filePath);
|
|
615
|
+
const sheets = [];
|
|
616
|
+
for (const sheetName of workbook.SheetNames) {
|
|
617
|
+
const sheet = workbook.Sheets[sheetName];
|
|
618
|
+
const text = xlsx.utils.sheet_to_txt(sheet);
|
|
619
|
+
if (text.trim()) {
|
|
620
|
+
sheets.push(`[${sheetName}]
|
|
621
|
+
${text}`);
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
const content = sheets.join("\n\n");
|
|
625
|
+
const title = workbook.SheetNames[0];
|
|
626
|
+
return {
|
|
627
|
+
content,
|
|
628
|
+
title,
|
|
629
|
+
metadata: {
|
|
630
|
+
sheetCount: workbook.SheetNames.length,
|
|
631
|
+
sheetNames: workbook.SheetNames
|
|
632
|
+
}
|
|
633
|
+
};
|
|
634
|
+
} catch (error) {
|
|
635
|
+
const title = path3.basename(filePath, path3.extname(filePath));
|
|
636
|
+
return {
|
|
637
|
+
content: "",
|
|
638
|
+
title,
|
|
639
|
+
metadata: {
|
|
640
|
+
error: true,
|
|
641
|
+
reason: error instanceof Error ? error.message : String(error)
|
|
642
|
+
}
|
|
643
|
+
};
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
async function parseText(filePath) {
|
|
647
|
+
const content = await fs2.readFile(filePath, "utf-8");
|
|
648
|
+
const lines = content.split("\n").filter((l) => l.trim());
|
|
649
|
+
let title = lines[0]?.slice(0, 100);
|
|
650
|
+
if (title?.startsWith("#")) {
|
|
651
|
+
title = title.replace(/^#+\s*/, "");
|
|
652
|
+
}
|
|
653
|
+
return { content: content.trim(), title };
|
|
654
|
+
}
|
|
655
|
+
async function parsePPT(filePath) {
|
|
656
|
+
const title = path3.basename(filePath, path3.extname(filePath));
|
|
657
|
+
try {
|
|
658
|
+
const absolutePath = path3.isAbsolute(filePath) ? filePath : path3.resolve(filePath);
|
|
659
|
+
const fileBuffer = await fs2.readFile(absolutePath);
|
|
660
|
+
const zip = await JSZip.loadAsync(fileBuffer);
|
|
661
|
+
const slides = [];
|
|
662
|
+
const slideFiles = [];
|
|
663
|
+
zip.forEach((relativePath) => {
|
|
664
|
+
if (relativePath.startsWith("ppt/slides/slide") && relativePath.endsWith(".xml")) {
|
|
665
|
+
slideFiles.push(relativePath);
|
|
666
|
+
}
|
|
667
|
+
});
|
|
668
|
+
slideFiles.sort((a, b) => {
|
|
669
|
+
const matchA = a.match(/slide(\d+)\.xml/);
|
|
670
|
+
const matchB = b.match(/slide(\d+)\.xml/);
|
|
671
|
+
const numA = matchA ? parseInt(matchA[1]) : 0;
|
|
672
|
+
const numB = matchB ? parseInt(matchB[1]) : 0;
|
|
673
|
+
return numA - numB;
|
|
674
|
+
});
|
|
675
|
+
for (const slidePath of slideFiles) {
|
|
676
|
+
const slideMatch = slidePath.match(/slide(\d+)\.xml/);
|
|
677
|
+
const slideNumber = slideMatch ? parseInt(slideMatch[1]) : 0;
|
|
678
|
+
const slideFile = zip.file(slidePath);
|
|
679
|
+
if (!slideFile) continue;
|
|
680
|
+
const slideXml = await slideFile.async("string");
|
|
681
|
+
if (!slideXml) continue;
|
|
682
|
+
const slideJson = await new Promise((resolve2, reject) => {
|
|
683
|
+
parseString(slideXml, {
|
|
684
|
+
explicitArray: false,
|
|
685
|
+
mergeAttrs: true,
|
|
686
|
+
trim: true,
|
|
687
|
+
normalize: true
|
|
688
|
+
}, (err, result) => {
|
|
689
|
+
if (err) reject(err);
|
|
690
|
+
else resolve2(result);
|
|
691
|
+
});
|
|
692
|
+
});
|
|
693
|
+
const extractTextFromXml = (obj, inTextNode = false) => {
|
|
694
|
+
const texts = [];
|
|
695
|
+
if (typeof obj === "string") {
|
|
696
|
+
if (inTextNode) {
|
|
697
|
+
const trimmed = obj.trim();
|
|
698
|
+
if (trimmed && trimmed.length > 0) {
|
|
699
|
+
texts.push(trimmed);
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
} else if (Array.isArray(obj)) {
|
|
703
|
+
for (const item of obj) {
|
|
704
|
+
texts.push(...extractTextFromXml(item, inTextNode));
|
|
705
|
+
}
|
|
706
|
+
} else if (obj && typeof obj === "object") {
|
|
707
|
+
if (obj["a:t"]) {
|
|
708
|
+
const fieldTexts = extractTextFromXml(obj["a:t"], true);
|
|
709
|
+
texts.push(...fieldTexts);
|
|
710
|
+
}
|
|
711
|
+
for (const value of Object.values(obj)) {
|
|
712
|
+
texts.push(...extractTextFromXml(value, inTextNode));
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
return texts;
|
|
716
|
+
};
|
|
717
|
+
const slideTexts = extractTextFromXml(slideJson);
|
|
718
|
+
const filteredTexts = slideTexts.filter((t) => {
|
|
719
|
+
const trimmed = t.trim();
|
|
720
|
+
if (!trimmed || trimmed.length === 0) return false;
|
|
721
|
+
if (/^-?\d+$/.test(trimmed)) return false;
|
|
722
|
+
if (/^\{[0-9A-F-]+\}$/i.test(trimmed)) return false;
|
|
723
|
+
if (trimmed.startsWith("http://") || trimmed.startsWith("https://")) return false;
|
|
724
|
+
if (trimmed.includes("xmlns") || trimmed.includes("schemas.openxmlformats")) return false;
|
|
725
|
+
const formatKeywords = [
|
|
726
|
+
"ctrTitle",
|
|
727
|
+
"zh-CN",
|
|
728
|
+
"en-US",
|
|
729
|
+
"body",
|
|
730
|
+
"title",
|
|
731
|
+
"subtitle",
|
|
732
|
+
"connsite",
|
|
733
|
+
"islide",
|
|
734
|
+
"\u7EC4\u5408",
|
|
735
|
+
"\u77E9\u5F62",
|
|
736
|
+
"\u4EFB\u610F\u591A\u8FB9\u5F62",
|
|
737
|
+
"\u6587\u672C\u6846",
|
|
738
|
+
"\u6587\u672C\u5360\u4F4D\u7B26",
|
|
739
|
+
"\u56FE\u6587\u6846",
|
|
740
|
+
"\u76F4\u63A5\u8FDE\u63A5\u7B26",
|
|
741
|
+
"\u692D\u5706",
|
|
742
|
+
"rId",
|
|
743
|
+
"accent",
|
|
744
|
+
"minor",
|
|
745
|
+
"lt",
|
|
746
|
+
"ctr",
|
|
747
|
+
"rect",
|
|
748
|
+
"square",
|
|
749
|
+
"roundRect",
|
|
750
|
+
"ellipse",
|
|
751
|
+
"none",
|
|
752
|
+
"solid",
|
|
753
|
+
"horz",
|
|
754
|
+
"Bullet",
|
|
755
|
+
"noStrike",
|
|
756
|
+
"Arial",
|
|
757
|
+
"\u65B9\u6B63",
|
|
758
|
+
"Component",
|
|
759
|
+
"Icon",
|
|
760
|
+
"Group",
|
|
761
|
+
"Presenter",
|
|
762
|
+
"subTitle",
|
|
763
|
+
"quarter",
|
|
764
|
+
"Shape",
|
|
765
|
+
"Number",
|
|
766
|
+
"Text",
|
|
767
|
+
"TextBox",
|
|
768
|
+
"Picture",
|
|
769
|
+
"QR",
|
|
770
|
+
"adj",
|
|
771
|
+
"Thank you",
|
|
772
|
+
"IMPORTANT NOTE",
|
|
773
|
+
"Template file",
|
|
774
|
+
"20XX.XX.XX"
|
|
775
|
+
];
|
|
776
|
+
if (formatKeywords.some((keyword) => trimmed.includes(keyword))) return false;
|
|
777
|
+
if (/^\*\/.*[wh]$/.test(trimmed)) return false;
|
|
778
|
+
if (/^connsite[XY]\d+$/.test(trimmed)) return false;
|
|
779
|
+
if (/^[0-9A-F]{6}$/i.test(trimmed)) return false;
|
|
780
|
+
if (trimmed.length === 1) {
|
|
781
|
+
return false;
|
|
782
|
+
}
|
|
783
|
+
if (/^[a-zA-Z-]+$/.test(trimmed) && trimmed.length < 3) return false;
|
|
784
|
+
const hasChinese = /[\u4e00-\u9fa5]/.test(trimmed);
|
|
785
|
+
const chineseCount = (trimmed.match(/[\u4e00-\u9fa5]/g) || []).length;
|
|
786
|
+
if (hasChinese && chineseCount < 2) return false;
|
|
787
|
+
if (!hasChinese && trimmed.length < 3) return false;
|
|
788
|
+
return true;
|
|
789
|
+
});
|
|
790
|
+
if (filteredTexts.length > 0) {
|
|
791
|
+
const uniqueTexts = [...new Set(filteredTexts)];
|
|
792
|
+
const mergedTexts = [];
|
|
793
|
+
for (let i = 0; i < uniqueTexts.length; i++) {
|
|
794
|
+
const current = uniqueTexts[i].trim();
|
|
795
|
+
if (!current) continue;
|
|
796
|
+
const hasChinese = /[\u4e00-\u9fa5]/.test(current);
|
|
797
|
+
const isShort = current.length <= 3 && hasChinese;
|
|
798
|
+
if (isShort && mergedTexts.length > 0) {
|
|
799
|
+
const lastIndex = mergedTexts.length - 1;
|
|
800
|
+
const lastText = mergedTexts[lastIndex];
|
|
801
|
+
if (/[\u4e00-\u9fa5]/.test(lastText)) {
|
|
802
|
+
mergedTexts[lastIndex] = lastText + current;
|
|
803
|
+
continue;
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
if ((/^\d+%?$/.test(current) || /^\d+\.\d+$/.test(current)) && mergedTexts.length > 0) {
|
|
807
|
+
const lastIndex = mergedTexts.length - 1;
|
|
808
|
+
const lastText = mergedTexts[lastIndex];
|
|
809
|
+
if (/[\u4e00-\u9fa5]/.test(lastText)) {
|
|
810
|
+
mergedTexts[lastIndex] = lastText + current;
|
|
811
|
+
continue;
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
mergedTexts.push(current);
|
|
815
|
+
}
|
|
816
|
+
const slideContent = mergedTexts.join("\n\n").trim();
|
|
817
|
+
if (slideContent) {
|
|
818
|
+
slides.push({
|
|
819
|
+
number: slideNumber,
|
|
820
|
+
content: slideContent
|
|
821
|
+
});
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
if (slides.length === 0) {
|
|
826
|
+
return {
|
|
827
|
+
content: `[PPT\u6587\u6863] ${title}`,
|
|
828
|
+
title,
|
|
829
|
+
metadata: {
|
|
830
|
+
error: false,
|
|
831
|
+
slides: []
|
|
832
|
+
}
|
|
833
|
+
};
|
|
834
|
+
}
|
|
835
|
+
const markdownContent = slides.map((slide, index) => {
|
|
836
|
+
const slideHeader = `## \u5E7B\u706F\u7247 ${slide.number}${index === 0 ? "\uFF08\u5C01\u9762\uFF09" : ""}
|
|
837
|
+
|
|
838
|
+
`;
|
|
839
|
+
return slideHeader + slide.content;
|
|
840
|
+
}).join("\n\n---\n\n");
|
|
841
|
+
const extractedTitle = slides[0]?.content.split("\n")[0]?.slice(0, 100) || title;
|
|
842
|
+
return {
|
|
843
|
+
content: markdownContent,
|
|
844
|
+
title: extractedTitle,
|
|
845
|
+
metadata: {
|
|
846
|
+
slides: slides.map((s) => ({
|
|
847
|
+
number: s.number,
|
|
848
|
+
content: s.content
|
|
849
|
+
})),
|
|
850
|
+
totalSlides: slides.length
|
|
851
|
+
}
|
|
852
|
+
};
|
|
853
|
+
} catch (error) {
|
|
854
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
855
|
+
console.warn(`PPT \u89E3\u6790\u5931\u8D25: ${filePath} - ${errorMsg}`);
|
|
856
|
+
return {
|
|
857
|
+
content: `[PPT\u6587\u6863] ${title}`,
|
|
858
|
+
title,
|
|
859
|
+
metadata: {
|
|
860
|
+
error: true,
|
|
861
|
+
reason: errorMsg
|
|
862
|
+
}
|
|
863
|
+
};
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
async function parseDocument(filePath) {
|
|
867
|
+
const ext = path3.extname(filePath).toLowerCase();
|
|
868
|
+
switch (ext) {
|
|
869
|
+
case ".docx":
|
|
870
|
+
case ".doc":
|
|
871
|
+
return parseWord(filePath);
|
|
872
|
+
case ".pdf":
|
|
873
|
+
return parsePDF(filePath);
|
|
874
|
+
case ".xlsx":
|
|
875
|
+
case ".xls":
|
|
876
|
+
return parseExcel(filePath);
|
|
877
|
+
case ".pptx":
|
|
878
|
+
case ".ppt":
|
|
879
|
+
return parsePPT(filePath);
|
|
880
|
+
case ".txt":
|
|
881
|
+
case ".md":
|
|
882
|
+
case ".rtf":
|
|
883
|
+
case ".text":
|
|
884
|
+
return parseText(filePath);
|
|
885
|
+
default:
|
|
886
|
+
try {
|
|
887
|
+
return await parseText(filePath);
|
|
888
|
+
} catch {
|
|
889
|
+
return { content: "", title: path3.basename(filePath) };
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
function isSupportedDocument(filePath) {
|
|
894
|
+
const ext = path3.extname(filePath).toLowerCase();
|
|
895
|
+
return [".docx", ".doc", ".pdf", ".xlsx", ".xls", ".pptx", ".ppt", ".txt", ".md", ".rtf"].includes(ext);
|
|
896
|
+
}
|
|
897
|
+
function getDocumentType(filePath) {
|
|
898
|
+
const ext = path3.extname(filePath).toLowerCase();
|
|
899
|
+
if ([".docx", ".doc"].includes(ext)) return "word";
|
|
900
|
+
if (ext === ".pdf") return "pdf";
|
|
901
|
+
if ([".xlsx", ".xls"].includes(ext)) return "excel";
|
|
902
|
+
if ([".pptx", ".ppt"].includes(ext)) return "powerpoint";
|
|
903
|
+
if ([".txt", ".md", ".rtf"].includes(ext)) return "text";
|
|
904
|
+
return "unknown";
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
// src/embeddings/index.ts
|
|
908
|
+
var arkApiKey = null;
|
|
909
|
+
var currentModel = "doubao-embedding-vision-250615";
|
|
910
|
+
var initialized = false;
|
|
911
|
+
var ARK_EMBEDDING_URL = "https://ark.cn-beijing.volces.com/api/v3/embeddings/multimodal";
|
|
912
|
+
var embeddingDimension = 1024;
|
|
913
|
+
var DEFAULT_INSTRUCTIONS = {
|
|
914
|
+
/** 文档索引指令 */
|
|
915
|
+
document: "\u5C06\u4EE5\u4E0B\u6587\u6863\u5185\u5BB9\u8F6C\u6362\u4E3A\u8BED\u4E49\u5411\u91CF\uFF0C\u7528\u4E8E\u540E\u7EED\u7684\u76F8\u4F3C\u5EA6\u68C0\u7D22",
|
|
916
|
+
/** 查询检索指令 */
|
|
917
|
+
query: "\u5C06\u4EE5\u4E0B\u641C\u7D22\u67E5\u8BE2\u8F6C\u6362\u4E3A\u8BED\u4E49\u5411\u91CF\uFF0C\u7528\u4E8E\u68C0\u7D22\u76F8\u5173\u6587\u6863"
|
|
918
|
+
};
|
|
919
|
+
async function initEmbedder(apiKey, modelName = "doubao-embedding-vision-250615", dimensions = 1024) {
|
|
920
|
+
const key = apiKey || process.env.ARK_API_KEY;
|
|
921
|
+
if (!key) {
|
|
922
|
+
throw new Error("\u7F3A\u5C11 ARK_API_KEY\uFF0C\u8BF7\u901A\u8FC7\u53C2\u6570\u6216\u73AF\u5883\u53D8\u91CF\u63D0\u4F9B");
|
|
923
|
+
}
|
|
924
|
+
arkApiKey = key;
|
|
925
|
+
currentModel = modelName;
|
|
926
|
+
embeddingDimension = dimensions;
|
|
927
|
+
initialized = true;
|
|
928
|
+
console.log(`\u8C46\u5305 Embedding \u6A21\u578B\u5DF2\u914D\u7F6E: ${modelName}, \u7EF4\u5EA6: ${dimensions}`);
|
|
929
|
+
}
|
|
930
|
+
function delay(ms) {
|
|
931
|
+
return new Promise((resolve2) => setTimeout(resolve2, ms));
|
|
932
|
+
}
|
|
933
|
+
var RETRY_CONFIG = {
|
|
934
|
+
maxRetries: 3,
|
|
935
|
+
// 最大重试次数
|
|
936
|
+
baseDelay: 1e3,
|
|
937
|
+
// 基础延迟 1 秒
|
|
938
|
+
maxDelay: 1e4
|
|
939
|
+
// 最大延迟 10 秒
|
|
940
|
+
};
|
|
941
|
+
async function embed(text, options) {
|
|
942
|
+
if (!initialized || !arkApiKey) {
|
|
943
|
+
await initEmbedder();
|
|
944
|
+
}
|
|
945
|
+
const maxLength = options?.maxLength ?? 8192;
|
|
946
|
+
const instructions = options?.instructions;
|
|
947
|
+
const truncated = text.slice(0, maxLength);
|
|
948
|
+
const requestBody = {
|
|
949
|
+
model: currentModel,
|
|
950
|
+
encoding_format: "float",
|
|
951
|
+
dimensions: embeddingDimension,
|
|
952
|
+
input: [
|
|
953
|
+
{
|
|
954
|
+
type: "text",
|
|
955
|
+
text: truncated
|
|
956
|
+
}
|
|
957
|
+
]
|
|
958
|
+
};
|
|
959
|
+
if (instructions) {
|
|
960
|
+
requestBody.instructions = instructions;
|
|
961
|
+
}
|
|
962
|
+
let lastError = null;
|
|
963
|
+
for (let attempt = 0; attempt <= RETRY_CONFIG.maxRetries; attempt++) {
|
|
964
|
+
try {
|
|
965
|
+
const response = await fetch(ARK_EMBEDDING_URL, {
|
|
966
|
+
method: "POST",
|
|
967
|
+
headers: {
|
|
968
|
+
"Content-Type": "application/json",
|
|
969
|
+
"Authorization": `Bearer ${arkApiKey}`
|
|
970
|
+
},
|
|
971
|
+
body: JSON.stringify(requestBody)
|
|
972
|
+
});
|
|
973
|
+
if (response.ok) {
|
|
974
|
+
const result = await response.json();
|
|
975
|
+
if (!result.data?.embedding) {
|
|
976
|
+
throw new Error(`\u8C46\u5305 Embedding API \u8FD4\u56DE\u683C\u5F0F\u9519\u8BEF: ${JSON.stringify(result)}`);
|
|
977
|
+
}
|
|
978
|
+
return result.data.embedding;
|
|
979
|
+
}
|
|
980
|
+
if (response.status === 429 && attempt < RETRY_CONFIG.maxRetries) {
|
|
981
|
+
const retryDelay = Math.min(
|
|
982
|
+
RETRY_CONFIG.baseDelay * Math.pow(2, attempt),
|
|
983
|
+
RETRY_CONFIG.maxDelay
|
|
984
|
+
);
|
|
985
|
+
await delay(retryDelay);
|
|
986
|
+
continue;
|
|
987
|
+
}
|
|
988
|
+
const errorText = await response.text();
|
|
989
|
+
throw new Error(`\u8C46\u5305 Embedding API \u9519\u8BEF (${response.status}): ${errorText}`);
|
|
990
|
+
} catch (error) {
|
|
991
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
992
|
+
if (attempt < RETRY_CONFIG.maxRetries) {
|
|
993
|
+
const retryDelay = Math.min(
|
|
994
|
+
RETRY_CONFIG.baseDelay * Math.pow(2, attempt),
|
|
995
|
+
RETRY_CONFIG.maxDelay
|
|
996
|
+
);
|
|
997
|
+
await delay(retryDelay);
|
|
998
|
+
continue;
|
|
999
|
+
}
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
throw lastError || new Error("Embedding \u8BF7\u6C42\u5931\u8D25");
|
|
1003
|
+
}
|
|
1004
|
+
async function embedDocument(text, maxLength = 8192) {
|
|
1005
|
+
return embed(text, {
|
|
1006
|
+
maxLength,
|
|
1007
|
+
instructions: DEFAULT_INSTRUCTIONS.document
|
|
1008
|
+
});
|
|
1009
|
+
}
|
|
1010
|
+
async function embedQuery(text, maxLength = 8192) {
|
|
1011
|
+
return embed(text, {
|
|
1012
|
+
maxLength,
|
|
1013
|
+
instructions: DEFAULT_INSTRUCTIONS.query
|
|
1014
|
+
});
|
|
1015
|
+
}
|
|
1016
|
+
async function embedBatch(texts, maxLength = 8192) {
|
|
1017
|
+
if (!initialized || !arkApiKey) {
|
|
1018
|
+
await initEmbedder();
|
|
1019
|
+
}
|
|
1020
|
+
const results = [];
|
|
1021
|
+
for (const text of texts) {
|
|
1022
|
+
const vector = await embedDocument(text, maxLength);
|
|
1023
|
+
results.push(vector);
|
|
1024
|
+
}
|
|
1025
|
+
return results;
|
|
1026
|
+
}
|
|
1027
|
+
async function embedBatchConcurrent(texts, concurrency = 5, maxLength = 8192) {
|
|
1028
|
+
if (!initialized || !arkApiKey) {
|
|
1029
|
+
await initEmbedder();
|
|
1030
|
+
}
|
|
1031
|
+
const results = new Array(texts.length);
|
|
1032
|
+
const executing = [];
|
|
1033
|
+
for (let i = 0; i < texts.length; i++) {
|
|
1034
|
+
const promise = (async (index) => {
|
|
1035
|
+
results[index] = await embedDocument(texts[index], maxLength);
|
|
1036
|
+
})(i);
|
|
1037
|
+
executing.push(promise);
|
|
1038
|
+
if (executing.length >= concurrency) {
|
|
1039
|
+
await Promise.race(executing);
|
|
1040
|
+
const completed = executing.filter((p) => {
|
|
1041
|
+
const state = p;
|
|
1042
|
+
return state._state === "fulfilled" || state._state === "rejected";
|
|
1043
|
+
});
|
|
1044
|
+
for (const c of completed) {
|
|
1045
|
+
const idx = executing.indexOf(c);
|
|
1046
|
+
if (idx > -1) executing.splice(idx, 1);
|
|
1047
|
+
}
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
1050
|
+
await Promise.all(executing);
|
|
1051
|
+
return results;
|
|
1052
|
+
}
|
|
1053
|
+
async function embedImage(imageUrl) {
|
|
1054
|
+
if (!initialized || !arkApiKey) {
|
|
1055
|
+
await initEmbedder();
|
|
1056
|
+
}
|
|
1057
|
+
const response = await fetch(ARK_EMBEDDING_URL, {
|
|
1058
|
+
method: "POST",
|
|
1059
|
+
headers: {
|
|
1060
|
+
"Content-Type": "application/json",
|
|
1061
|
+
"Authorization": `Bearer ${arkApiKey}`
|
|
1062
|
+
},
|
|
1063
|
+
body: JSON.stringify({
|
|
1064
|
+
model: currentModel,
|
|
1065
|
+
encoding_format: "float",
|
|
1066
|
+
dimensions: embeddingDimension,
|
|
1067
|
+
input: [
|
|
1068
|
+
{
|
|
1069
|
+
type: "image_url",
|
|
1070
|
+
image_url: {
|
|
1071
|
+
url: imageUrl
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
]
|
|
1075
|
+
})
|
|
1076
|
+
});
|
|
1077
|
+
if (!response.ok) {
|
|
1078
|
+
const errorText = await response.text();
|
|
1079
|
+
throw new Error(`\u8C46\u5305 Embedding API \u9519\u8BEF (${response.status}): ${errorText}`);
|
|
1080
|
+
}
|
|
1081
|
+
const result = await response.json();
|
|
1082
|
+
if (!result.data?.embedding) {
|
|
1083
|
+
throw new Error(`\u8C46\u5305 Embedding API \u8FD4\u56DE\u683C\u5F0F\u9519\u8BEF: ${JSON.stringify(result)}`);
|
|
1084
|
+
}
|
|
1085
|
+
return result.data.embedding;
|
|
1086
|
+
}
|
|
1087
|
+
async function embedVideo(videoUrl) {
|
|
1088
|
+
if (!initialized || !arkApiKey) {
|
|
1089
|
+
await initEmbedder();
|
|
1090
|
+
}
|
|
1091
|
+
const response = await fetch(ARK_EMBEDDING_URL, {
|
|
1092
|
+
method: "POST",
|
|
1093
|
+
headers: {
|
|
1094
|
+
"Content-Type": "application/json",
|
|
1095
|
+
"Authorization": `Bearer ${arkApiKey}`
|
|
1096
|
+
},
|
|
1097
|
+
body: JSON.stringify({
|
|
1098
|
+
model: currentModel,
|
|
1099
|
+
encoding_format: "float",
|
|
1100
|
+
dimensions: embeddingDimension,
|
|
1101
|
+
input: [
|
|
1102
|
+
{
|
|
1103
|
+
type: "video_url",
|
|
1104
|
+
video_url: {
|
|
1105
|
+
url: videoUrl
|
|
1106
|
+
}
|
|
1107
|
+
}
|
|
1108
|
+
]
|
|
1109
|
+
})
|
|
1110
|
+
});
|
|
1111
|
+
if (!response.ok) {
|
|
1112
|
+
const errorText = await response.text();
|
|
1113
|
+
throw new Error(`\u8C46\u5305 Embedding API \u9519\u8BEF (${response.status}): ${errorText}`);
|
|
1114
|
+
}
|
|
1115
|
+
const result = await response.json();
|
|
1116
|
+
if (!result.data?.embedding) {
|
|
1117
|
+
throw new Error(`\u8C46\u5305 Embedding API \u8FD4\u56DE\u683C\u5F0F\u9519\u8BEF: ${JSON.stringify(result)}`);
|
|
1118
|
+
}
|
|
1119
|
+
return result.data.embedding;
|
|
1120
|
+
}
|
|
1121
|
+
async function embedMultimodal(inputs) {
|
|
1122
|
+
if (!initialized || !arkApiKey) {
|
|
1123
|
+
await initEmbedder();
|
|
1124
|
+
}
|
|
1125
|
+
const response = await fetch(ARK_EMBEDDING_URL, {
|
|
1126
|
+
method: "POST",
|
|
1127
|
+
headers: {
|
|
1128
|
+
"Content-Type": "application/json",
|
|
1129
|
+
"Authorization": `Bearer ${arkApiKey}`
|
|
1130
|
+
},
|
|
1131
|
+
body: JSON.stringify({
|
|
1132
|
+
model: currentModel,
|
|
1133
|
+
encoding_format: "float",
|
|
1134
|
+
dimensions: embeddingDimension,
|
|
1135
|
+
input: inputs
|
|
1136
|
+
})
|
|
1137
|
+
});
|
|
1138
|
+
if (!response.ok) {
|
|
1139
|
+
const errorText = await response.text();
|
|
1140
|
+
throw new Error(`\u8C46\u5305 Embedding API \u9519\u8BEF (${response.status}): ${errorText}`);
|
|
1141
|
+
}
|
|
1142
|
+
const result = await response.json();
|
|
1143
|
+
if (!result.data?.embedding) {
|
|
1144
|
+
throw new Error(`\u8C46\u5305 Embedding API \u8FD4\u56DE\u683C\u5F0F\u9519\u8BEF: ${JSON.stringify(result)}`);
|
|
1145
|
+
}
|
|
1146
|
+
return result.data.embedding;
|
|
1147
|
+
}
|
|
1148
|
+
function getEmbeddingDimension() {
|
|
1149
|
+
return embeddingDimension;
|
|
1150
|
+
}
|
|
1151
|
+
function setEmbeddingDimension(dimension) {
|
|
1152
|
+
embeddingDimension = dimension;
|
|
1153
|
+
}
|
|
1154
|
+
function disposeEmbedder() {
|
|
1155
|
+
arkApiKey = null;
|
|
1156
|
+
initialized = false;
|
|
1157
|
+
}
|
|
1158
|
+
function isEmbedderInitialized() {
|
|
1159
|
+
return initialized && arkApiKey !== null;
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
// src/core/scanner.ts
|
|
1163
|
+
import { fdir } from "fdir";
|
|
1164
|
+
import * as path5 from "path";
|
|
1165
|
+
import * as os2 from "os";
|
|
1166
|
+
|
|
1167
|
+
// src/core/rules.ts
|
|
1168
|
+
import * as path4 from "path";
|
|
1169
|
+
import * as os from "os";
|
|
1170
|
+
var DEFAULT_EXTENSION_RULES = {
|
|
1171
|
+
allowed: [
|
|
1172
|
+
".docx",
|
|
1173
|
+
".doc",
|
|
1174
|
+
".pdf",
|
|
1175
|
+
".xlsx",
|
|
1176
|
+
".xls",
|
|
1177
|
+
".pptx",
|
|
1178
|
+
".ppt",
|
|
1179
|
+
".txt",
|
|
1180
|
+
".md",
|
|
1181
|
+
".rtf"
|
|
1182
|
+
],
|
|
1183
|
+
excluded: [
|
|
1184
|
+
// .doc 格式明确不支持(解析困难)
|
|
1185
|
+
".doc"
|
|
1186
|
+
]
|
|
1187
|
+
};
|
|
1188
|
+
var DEFAULT_DIRECTORY_RULES = {
|
|
1189
|
+
// 版本控制目录(非隐藏的)
|
|
1190
|
+
excludedNames: [
|
|
1191
|
+
"node_modules"
|
|
1192
|
+
],
|
|
1193
|
+
// 缓存和临时目录(非隐藏的)
|
|
1194
|
+
buildDirs: [
|
|
1195
|
+
"__pycache__",
|
|
1196
|
+
"build",
|
|
1197
|
+
"dist",
|
|
1198
|
+
"out",
|
|
1199
|
+
"target"
|
|
1200
|
+
],
|
|
1201
|
+
// 资源目录(非隐藏的)
|
|
1202
|
+
resourceDirs: [
|
|
1203
|
+
"Assets.xcassets"
|
|
1204
|
+
],
|
|
1205
|
+
// 系统目录(平台特定)
|
|
1206
|
+
windowsSystemDirs: [
|
|
1207
|
+
"Windows",
|
|
1208
|
+
"Program Files",
|
|
1209
|
+
"Program Files (x86)",
|
|
1210
|
+
"ProgramData",
|
|
1211
|
+
"System Volume Information",
|
|
1212
|
+
"AppData",
|
|
1213
|
+
"Library",
|
|
1214
|
+
"$RECYCLE.BIN"
|
|
1215
|
+
],
|
|
1216
|
+
// 用户配置的排除路径(默认空,由用户配置)
|
|
1217
|
+
excludedPaths: []
|
|
1218
|
+
};
|
|
1219
|
+
var DEFAULT_FILE_RULES = {
|
|
1220
|
+
// 默认排除所有隐藏文件(以 . 开头)
|
|
1221
|
+
excludeHidden: true,
|
|
1222
|
+
excludedPatterns: [],
|
|
1223
|
+
excludedPathContains: [
|
|
1224
|
+
// 资源文件路径模式(即使文件名不是隐藏的,但路径中包含这些模式也要排除)
|
|
1225
|
+
".imageset",
|
|
1226
|
+
".xcassets",
|
|
1227
|
+
".appiconset"
|
|
1228
|
+
]
|
|
1229
|
+
};
|
|
1230
|
+
var DEFAULT_PATH_RULES = {
|
|
1231
|
+
windows: {
|
|
1232
|
+
onlyUserDirs: true,
|
|
1233
|
+
excludedRootDirs: [
|
|
1234
|
+
"Windows",
|
|
1235
|
+
"Program Files",
|
|
1236
|
+
"Program Files (x86)",
|
|
1237
|
+
"ProgramData",
|
|
1238
|
+
"System Volume Information"
|
|
1239
|
+
]
|
|
1240
|
+
}
|
|
1241
|
+
};
|
|
1242
|
+
var DEFAULT_SCAN_RULES = {
|
|
1243
|
+
extensions: DEFAULT_EXTENSION_RULES,
|
|
1244
|
+
directories: DEFAULT_DIRECTORY_RULES,
|
|
1245
|
+
files: DEFAULT_FILE_RULES,
|
|
1246
|
+
paths: DEFAULT_PATH_RULES
|
|
1247
|
+
};
|
|
1248
|
+
var ScanRulesManager = class {
|
|
1249
|
+
rules;
|
|
1250
|
+
excludedDirNamesSet;
|
|
1251
|
+
allowedExtensionsSet;
|
|
1252
|
+
excludedExtensionsSet;
|
|
1253
|
+
constructor(rules = DEFAULT_SCAN_RULES) {
|
|
1254
|
+
this.rules = rules;
|
|
1255
|
+
this.excludedDirNamesSet = new Set(
|
|
1256
|
+
[
|
|
1257
|
+
...rules.directories.excludedNames,
|
|
1258
|
+
...rules.directories.buildDirs,
|
|
1259
|
+
...rules.directories.resourceDirs
|
|
1260
|
+
].map((name) => name.toLowerCase())
|
|
1261
|
+
);
|
|
1262
|
+
this.allowedExtensionsSet = new Set(
|
|
1263
|
+
rules.extensions.allowed.map((ext) => ext.toLowerCase())
|
|
1264
|
+
);
|
|
1265
|
+
this.excludedExtensionsSet = new Set(
|
|
1266
|
+
rules.extensions.excluded.map((ext) => ext.toLowerCase())
|
|
1267
|
+
);
|
|
1268
|
+
}
|
|
1269
|
+
/**
|
|
1270
|
+
* 检查文件扩展名是否允许
|
|
1271
|
+
*/
|
|
1272
|
+
isExtensionAllowed(ext) {
|
|
1273
|
+
const extLower = ext.toLowerCase();
|
|
1274
|
+
if (this.excludedExtensionsSet.has(extLower)) {
|
|
1275
|
+
return false;
|
|
1276
|
+
}
|
|
1277
|
+
return this.allowedExtensionsSet.has(extLower);
|
|
1278
|
+
}
|
|
1279
|
+
/**
|
|
1280
|
+
* 检查目录是否应该被排除(用于 fdir 的 exclude 方法)
|
|
1281
|
+
* 这是性能优化的关键:在进入目录之前就排除,避免扫描目录内的文件
|
|
1282
|
+
*/
|
|
1283
|
+
shouldExcludeDirectory(dirName) {
|
|
1284
|
+
if (this.rules.files.excludeHidden && dirName.startsWith(".")) {
|
|
1285
|
+
return true;
|
|
1286
|
+
}
|
|
1287
|
+
return this.excludedDirNamesSet.has(dirName.toLowerCase());
|
|
1288
|
+
}
|
|
1289
|
+
/**
|
|
1290
|
+
* 检查文件是否应该被排除
|
|
1291
|
+
* 注意:隐藏目录已经在 shouldExcludeDirectory 中被排除,不会进入这里
|
|
1292
|
+
* 这里主要处理隐藏文件和路径模式匹配
|
|
1293
|
+
*/
|
|
1294
|
+
shouldExcludeFile(filePath, fileName) {
|
|
1295
|
+
if (this.rules.files.excludeHidden && fileName.startsWith(".")) {
|
|
1296
|
+
return true;
|
|
1297
|
+
}
|
|
1298
|
+
const normalizedPath = filePath.replace(/[/\\]/g, path4.sep);
|
|
1299
|
+
const pathLower = normalizedPath.toLowerCase();
|
|
1300
|
+
for (const pattern of this.rules.files.excludedPathContains) {
|
|
1301
|
+
if (pathLower.includes(pattern.toLowerCase())) {
|
|
1302
|
+
return true;
|
|
1303
|
+
}
|
|
1304
|
+
}
|
|
1305
|
+
const parts = normalizedPath.split(path4.sep).filter((p) => p.length > 0);
|
|
1306
|
+
for (const excludedPath of this.rules.directories.excludedPaths) {
|
|
1307
|
+
const normalizedExclude = excludedPath.replace(/[/\\]/g, path4.sep);
|
|
1308
|
+
if (parts.includes(normalizedExclude) || parts.some((p) => p === path4.basename(normalizedExclude))) {
|
|
1309
|
+
return true;
|
|
1310
|
+
}
|
|
1311
|
+
}
|
|
1312
|
+
if (os.platform() === "win32") {
|
|
1313
|
+
return this.shouldExcludeWindowsPath(normalizedPath);
|
|
1314
|
+
}
|
|
1315
|
+
return false;
|
|
1316
|
+
}
|
|
1317
|
+
/**
|
|
1318
|
+
* Windows 平台路径排除检查
|
|
1319
|
+
*/
|
|
1320
|
+
shouldExcludeWindowsPath(normalizedPath) {
|
|
1321
|
+
const winRules = this.rules.paths.windows;
|
|
1322
|
+
const winRootMatch = normalizedPath.match(/^([A-Z]:)\\([^\\]+)/);
|
|
1323
|
+
if (winRootMatch) {
|
|
1324
|
+
const drive = winRootMatch[1];
|
|
1325
|
+
const firstDir = winRootMatch[2];
|
|
1326
|
+
if (this.rules.directories.windowsSystemDirs.includes(firstDir)) {
|
|
1327
|
+
return true;
|
|
1328
|
+
}
|
|
1329
|
+
if (winRules.onlyUserDirs && drive === "C:" && firstDir !== "Users") {
|
|
1330
|
+
return true;
|
|
1331
|
+
}
|
|
1332
|
+
}
|
|
1333
|
+
return false;
|
|
1334
|
+
}
|
|
1335
|
+
/**
|
|
1336
|
+
* 获取所有排除的目录名(用于 fdir exclude)
|
|
1337
|
+
*/
|
|
1338
|
+
getExcludedDirectoryNames() {
|
|
1339
|
+
return /* @__PURE__ */ new Set([
|
|
1340
|
+
...this.rules.directories.excludedNames,
|
|
1341
|
+
...this.rules.directories.buildDirs,
|
|
1342
|
+
...this.rules.directories.resourceDirs
|
|
1343
|
+
]);
|
|
1344
|
+
}
|
|
1345
|
+
/**
|
|
1346
|
+
* 更新规则(允许运行时修改)
|
|
1347
|
+
*/
|
|
1348
|
+
updateRules(newRules) {
|
|
1349
|
+
this.rules = {
|
|
1350
|
+
...this.rules,
|
|
1351
|
+
...newRules,
|
|
1352
|
+
extensions: { ...this.rules.extensions, ...newRules.extensions },
|
|
1353
|
+
directories: { ...this.rules.directories, ...newRules.directories },
|
|
1354
|
+
files: { ...this.rules.files, ...newRules.files },
|
|
1355
|
+
paths: { ...this.rules.paths, ...newRules.paths }
|
|
1356
|
+
};
|
|
1357
|
+
this.excludedDirNamesSet = new Set(
|
|
1358
|
+
[
|
|
1359
|
+
...this.rules.directories.excludedNames,
|
|
1360
|
+
...this.rules.directories.buildDirs,
|
|
1361
|
+
...this.rules.directories.resourceDirs
|
|
1362
|
+
].map((name) => name.toLowerCase())
|
|
1363
|
+
);
|
|
1364
|
+
this.allowedExtensionsSet = new Set(
|
|
1365
|
+
this.rules.extensions.allowed.map((ext) => ext.toLowerCase())
|
|
1366
|
+
);
|
|
1367
|
+
this.excludedExtensionsSet = new Set(
|
|
1368
|
+
this.rules.extensions.excluded.map((ext) => ext.toLowerCase())
|
|
1369
|
+
);
|
|
1370
|
+
}
|
|
1371
|
+
/**
|
|
1372
|
+
* 获取当前规则配置
|
|
1373
|
+
*/
|
|
1374
|
+
getRules() {
|
|
1375
|
+
return this.rules;
|
|
1376
|
+
}
|
|
1377
|
+
};
|
|
1378
|
+
function createRulesManager(customRules) {
|
|
1379
|
+
const rules = customRules ? {
|
|
1380
|
+
extensions: { ...DEFAULT_EXTENSION_RULES, ...customRules.extensions },
|
|
1381
|
+
directories: { ...DEFAULT_DIRECTORY_RULES, ...customRules.directories },
|
|
1382
|
+
files: { ...DEFAULT_FILE_RULES, ...customRules.files },
|
|
1383
|
+
paths: { ...DEFAULT_PATH_RULES, ...customRules.paths }
|
|
1384
|
+
} : DEFAULT_SCAN_RULES;
|
|
1385
|
+
return new ScanRulesManager(rules);
|
|
1386
|
+
}
|
|
1387
|
+
|
|
1388
|
+
// src/core/scanner.ts
|
|
1389
|
+
function expandPath(p) {
|
|
1390
|
+
if (p.startsWith("~")) {
|
|
1391
|
+
return path5.join(os2.homedir(), p.slice(1));
|
|
1392
|
+
}
|
|
1393
|
+
return p;
|
|
1394
|
+
}
|
|
1395
|
+
async function scanDirectories(directories, config) {
|
|
1396
|
+
const onProgress = config?.onProgress;
|
|
1397
|
+
const customRules = config?.customRules || {};
|
|
1398
|
+
if (config?.excludeDirs && config.excludeDirs.length > 0) {
|
|
1399
|
+
const existingPaths = customRules.directories?.excludedPaths || [];
|
|
1400
|
+
customRules.directories = {
|
|
1401
|
+
...customRules.directories,
|
|
1402
|
+
excludedNames: customRules.directories?.excludedNames || [],
|
|
1403
|
+
excludedPaths: [...existingPaths, ...config.excludeDirs],
|
|
1404
|
+
windowsSystemDirs: customRules.directories?.windowsSystemDirs || [],
|
|
1405
|
+
buildDirs: customRules.directories?.buildDirs || [],
|
|
1406
|
+
resourceDirs: customRules.directories?.resourceDirs || []
|
|
1407
|
+
};
|
|
1408
|
+
}
|
|
1409
|
+
if (config?.extensions && config.extensions.length > 0) {
|
|
1410
|
+
const existingAllowed = customRules.extensions?.allowed || [];
|
|
1411
|
+
customRules.extensions = {
|
|
1412
|
+
...customRules.extensions,
|
|
1413
|
+
allowed: [...existingAllowed, ...config.extensions],
|
|
1414
|
+
excluded: customRules.extensions?.excluded || []
|
|
1415
|
+
};
|
|
1416
|
+
}
|
|
1417
|
+
const rulesManager = createRulesManager(customRules);
|
|
1418
|
+
const allFiles = [];
|
|
1419
|
+
let totalScanned = 0;
|
|
1420
|
+
for (const dir of directories) {
|
|
1421
|
+
const expandedDir = expandPath(dir);
|
|
1422
|
+
onProgress?.({ scanned: totalScanned, currentDir: expandedDir });
|
|
1423
|
+
try {
|
|
1424
|
+
let fileCount = 0;
|
|
1425
|
+
let lastLogTime = Date.now();
|
|
1426
|
+
const logInterval = 2e3;
|
|
1427
|
+
const crawler = new fdir().withFullPaths().exclude((dirName) => {
|
|
1428
|
+
return rulesManager.shouldExcludeDirectory(dirName);
|
|
1429
|
+
}).filter((filePath, isDirectory) => {
|
|
1430
|
+
if (isDirectory) {
|
|
1431
|
+
return false;
|
|
1432
|
+
}
|
|
1433
|
+
fileCount++;
|
|
1434
|
+
totalScanned++;
|
|
1435
|
+
const now = Date.now();
|
|
1436
|
+
if (fileCount % 5e3 === 0 || now - lastLogTime >= logInterval) {
|
|
1437
|
+
onProgress?.({ scanned: totalScanned, currentDir: expandedDir });
|
|
1438
|
+
console.log(` \u{1F50D} \u5DF2\u626B\u63CF ${totalScanned.toLocaleString()} \u4E2A\u6587\u4EF6...`);
|
|
1439
|
+
lastLogTime = now;
|
|
1440
|
+
}
|
|
1441
|
+
const fileName = path5.basename(filePath);
|
|
1442
|
+
if (rulesManager.shouldExcludeFile(filePath, fileName)) {
|
|
1443
|
+
return false;
|
|
1444
|
+
}
|
|
1445
|
+
const ext = path5.extname(filePath);
|
|
1446
|
+
return rulesManager.isExtensionAllowed(ext);
|
|
1447
|
+
});
|
|
1448
|
+
const files = await crawler.crawl(expandedDir).withPromise();
|
|
1449
|
+
allFiles.push(...files);
|
|
1450
|
+
onProgress?.({ scanned: totalScanned, currentDir: expandedDir });
|
|
1451
|
+
} catch (err) {
|
|
1452
|
+
console.warn(` \u26A0\uFE0F \u626B\u63CF\u76EE\u5F55\u5931\u8D25: ${dir}`, err);
|
|
1453
|
+
}
|
|
1454
|
+
}
|
|
1455
|
+
console.log(` \u{1F4CA} \u626B\u63CF\u5B8C\u6210\uFF0C\u5171\u627E\u5230 ${allFiles.length} \u4E2A\u6587\u4EF6`);
|
|
1456
|
+
return allFiles;
|
|
1457
|
+
}
|
|
1458
|
+
function getDefaultDirectories() {
|
|
1459
|
+
const home = os2.homedir();
|
|
1460
|
+
const platform3 = os2.platform();
|
|
1461
|
+
const userDirs = [
|
|
1462
|
+
path5.join(home, "Documents"),
|
|
1463
|
+
path5.join(home, "Desktop"),
|
|
1464
|
+
path5.join(home, "Downloads"),
|
|
1465
|
+
path5.join(home, "Pictures"),
|
|
1466
|
+
path5.join(home, "Music")
|
|
1467
|
+
];
|
|
1468
|
+
if (platform3 === "darwin") {
|
|
1469
|
+
userDirs.push(path5.join(home, "Movies"));
|
|
1470
|
+
} else {
|
|
1471
|
+
userDirs.push(path5.join(home, "Videos"));
|
|
1472
|
+
}
|
|
1473
|
+
return userDirs;
|
|
1474
|
+
}
|
|
1475
|
+
|
|
1476
|
+
// src/core/utils.ts
|
|
1477
|
+
import * as crypto from "crypto";
|
|
1478
|
+
import * as fs3 from "fs/promises";
|
|
1479
|
+
import * as path6 from "path";
|
|
1480
|
+
async function hashFile(filePath) {
|
|
1481
|
+
const stats = await fs3.stat(filePath);
|
|
1482
|
+
const hashInput = `${filePath}:${stats.size}:${stats.mtime.getTime()}`;
|
|
1483
|
+
return crypto.createHash("md5").update(hashInput).digest("hex").slice(0, 16);
|
|
1484
|
+
}
|
|
1485
|
+
function generateId() {
|
|
1486
|
+
return crypto.randomUUID();
|
|
1487
|
+
}
|
|
1488
|
+
function formatSize(bytes) {
|
|
1489
|
+
if (bytes < 1024) return `${bytes} B`;
|
|
1490
|
+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
1491
|
+
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
1492
|
+
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
|
|
1493
|
+
}
|
|
1494
|
+
function formatDate(date) {
|
|
1495
|
+
return date.toISOString().split("T")[0];
|
|
1496
|
+
}
|
|
1497
|
+
function getFileType(filePath) {
|
|
1498
|
+
const ext = path6.extname(filePath).toLowerCase();
|
|
1499
|
+
if ([".docx", ".doc", ".rtf"].includes(ext)) return FileType.DOCUMENT;
|
|
1500
|
+
if (ext === ".pdf") return FileType.PDF;
|
|
1501
|
+
if ([".xlsx", ".xls"].includes(ext)) return FileType.DOCUMENT;
|
|
1502
|
+
if ([".pptx", ".ppt"].includes(ext)) return FileType.DOCUMENT;
|
|
1503
|
+
if ([".txt", ".md"].includes(ext)) return FileType.TEXT;
|
|
1504
|
+
return FileType.FILE;
|
|
1505
|
+
}
|
|
1506
|
+
function extractSnippet(content, query, maxLength = 200) {
|
|
1507
|
+
const cleanContent = content.replace(/\s+/g, " ").trim();
|
|
1508
|
+
const queryTokens = query.toLowerCase().split(/\s+/).filter((t) => t.length > 1);
|
|
1509
|
+
const lowerContent = cleanContent.toLowerCase();
|
|
1510
|
+
let bestIndex = -1;
|
|
1511
|
+
for (const token of queryTokens) {
|
|
1512
|
+
const idx = lowerContent.indexOf(token);
|
|
1513
|
+
if (idx !== -1 && (bestIndex === -1 || idx < bestIndex)) {
|
|
1514
|
+
bestIndex = idx;
|
|
1515
|
+
}
|
|
1516
|
+
}
|
|
1517
|
+
if (bestIndex === -1) {
|
|
1518
|
+
const snippet2 = cleanContent.slice(0, maxLength);
|
|
1519
|
+
return snippet2 + (cleanContent.length > maxLength ? "..." : "");
|
|
1520
|
+
}
|
|
1521
|
+
const contextBefore = Math.floor(maxLength * 0.3);
|
|
1522
|
+
const contextAfter = Math.floor(maxLength * 0.7);
|
|
1523
|
+
const start = Math.max(0, bestIndex - contextBefore);
|
|
1524
|
+
const end = Math.min(cleanContent.length, bestIndex + contextAfter);
|
|
1525
|
+
let snippet = cleanContent.slice(start, end);
|
|
1526
|
+
if (start > 0) snippet = "..." + snippet;
|
|
1527
|
+
if (end < cleanContent.length) snippet = snippet + "...";
|
|
1528
|
+
return snippet;
|
|
1529
|
+
}
|
|
1530
|
+
function reciprocalRankFusion(lists, k = 60) {
|
|
1531
|
+
const scores = /* @__PURE__ */ new Map();
|
|
1532
|
+
for (const { results, weight = 1 } of lists) {
|
|
1533
|
+
for (let rank = 0; rank < results.length; rank++) {
|
|
1534
|
+
const id = results[rank].id;
|
|
1535
|
+
const rrf = weight / (k + rank + 1);
|
|
1536
|
+
scores.set(id, (scores.get(id) || 0) + rrf);
|
|
1537
|
+
}
|
|
1538
|
+
}
|
|
1539
|
+
const sortedScores = Array.from(scores.entries()).sort((a, b) => b[1] - a[1]);
|
|
1540
|
+
if (sortedScores.length === 0) {
|
|
1541
|
+
return [];
|
|
1542
|
+
}
|
|
1543
|
+
const maxScore = sortedScores[0][1];
|
|
1544
|
+
return sortedScores.map(([id, score]) => ({
|
|
1545
|
+
id,
|
|
1546
|
+
score: maxScore > 0 ? score / maxScore : 0
|
|
1547
|
+
}));
|
|
1548
|
+
}
|
|
1549
|
+
function sleep(ms) {
|
|
1550
|
+
return new Promise((resolve2) => setTimeout(resolve2, ms));
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
// src/core/progress.ts
|
|
1554
|
+
var LISTENERS_KEY = /* @__PURE__ */ Symbol.for("ai-search-progress-listeners");
|
|
1555
|
+
function getGlobalProgressListeners() {
|
|
1556
|
+
const g = globalThis;
|
|
1557
|
+
if (!g[LISTENERS_KEY]) {
|
|
1558
|
+
g[LISTENERS_KEY] = /* @__PURE__ */ new Set();
|
|
1559
|
+
}
|
|
1560
|
+
return g[LISTENERS_KEY];
|
|
1561
|
+
}
|
|
1562
|
+
function addGlobalProgressListener(listener) {
|
|
1563
|
+
getGlobalProgressListeners().add(listener);
|
|
1564
|
+
}
|
|
1565
|
+
function removeGlobalProgressListener(listener) {
|
|
1566
|
+
getGlobalProgressListeners().delete(listener);
|
|
1567
|
+
}
|
|
1568
|
+
function notifyGlobalProgress(progress) {
|
|
1569
|
+
const listeners = getGlobalProgressListeners();
|
|
1570
|
+
listeners.forEach((listener) => {
|
|
1571
|
+
try {
|
|
1572
|
+
listener(progress);
|
|
1573
|
+
} catch (error) {
|
|
1574
|
+
console.error("[DocumentSearch] \u5168\u5C40\u8FDB\u5EA6\u76D1\u542C\u5668\u9519\u8BEF:", error);
|
|
1575
|
+
}
|
|
1576
|
+
});
|
|
1577
|
+
}
|
|
1578
|
+
|
|
1579
|
+
// src/core/watch.ts
|
|
1580
|
+
import * as path7 from "path";
|
|
1581
|
+
import chokidar from "chokidar";
|
|
1582
|
+
var DirectoryWatcher = class {
|
|
1583
|
+
constructor(deps) {
|
|
1584
|
+
this.deps = deps;
|
|
1585
|
+
}
|
|
1586
|
+
watchers = /* @__PURE__ */ new Map();
|
|
1587
|
+
debounceTimers = /* @__PURE__ */ new Map();
|
|
1588
|
+
/** 检查文件扩展名是否支持 */
|
|
1589
|
+
isSupportedFile(filePath) {
|
|
1590
|
+
const ext = path7.extname(filePath).toLowerCase();
|
|
1591
|
+
const extensions = this.deps.config.extensions || DEFAULT_CONFIG.extensions || [];
|
|
1592
|
+
return extensions.includes(ext);
|
|
1593
|
+
}
|
|
1594
|
+
/** 检查是否应该排除 */
|
|
1595
|
+
shouldExclude(filePath) {
|
|
1596
|
+
const excludeDirs = this.deps.config.excludeDirs || DEFAULT_CONFIG.excludeDirs || [];
|
|
1597
|
+
return excludeDirs.some((excludeDir) => filePath.includes(excludeDir));
|
|
1598
|
+
}
|
|
1599
|
+
/**
|
|
1600
|
+
* 监听目录变化并自动更新索引
|
|
1601
|
+
*/
|
|
1602
|
+
watch(directory, options) {
|
|
1603
|
+
const {
|
|
1604
|
+
ignoreInitial = true,
|
|
1605
|
+
debounce = 1e3,
|
|
1606
|
+
onEvent
|
|
1607
|
+
} = options || {};
|
|
1608
|
+
if (this.watchers.has(directory)) {
|
|
1609
|
+
this.unwatch(directory);
|
|
1610
|
+
}
|
|
1611
|
+
const debouncedHandler = (eventType, filePath) => {
|
|
1612
|
+
const timerKey = `${eventType}:${filePath}`;
|
|
1613
|
+
const existingTimer = this.debounceTimers.get(timerKey);
|
|
1614
|
+
if (existingTimer) {
|
|
1615
|
+
clearTimeout(existingTimer);
|
|
1616
|
+
}
|
|
1617
|
+
const timer = setTimeout(async () => {
|
|
1618
|
+
this.debounceTimers.delete(timerKey);
|
|
1619
|
+
const event = {
|
|
1620
|
+
type: eventType,
|
|
1621
|
+
path: filePath,
|
|
1622
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
1623
|
+
};
|
|
1624
|
+
onEvent?.(event);
|
|
1625
|
+
try {
|
|
1626
|
+
if (eventType === "add" || eventType === "change") {
|
|
1627
|
+
if (this.isSupportedFile(filePath) && !this.shouldExclude(filePath)) {
|
|
1628
|
+
await this.deps.indexFile(filePath);
|
|
1629
|
+
await this.deps.fullTextIndex.save();
|
|
1630
|
+
}
|
|
1631
|
+
} else if (eventType === "unlink") {
|
|
1632
|
+
await this.deps.removeFile(filePath);
|
|
1633
|
+
}
|
|
1634
|
+
} catch (error) {
|
|
1635
|
+
console.warn(`\u6587\u4EF6\u76D1\u542C\u5904\u7406\u5931\u8D25: ${filePath}`, error);
|
|
1636
|
+
}
|
|
1637
|
+
}, debounce);
|
|
1638
|
+
this.debounceTimers.set(timerKey, timer);
|
|
1639
|
+
};
|
|
1640
|
+
const watcher = chokidar.watch(directory, {
|
|
1641
|
+
ignored: (filePath) => {
|
|
1642
|
+
if (this.shouldExclude(filePath)) {
|
|
1643
|
+
return true;
|
|
1644
|
+
}
|
|
1645
|
+
if (path7.extname(filePath)) {
|
|
1646
|
+
return !this.isSupportedFile(filePath);
|
|
1647
|
+
}
|
|
1648
|
+
return false;
|
|
1649
|
+
},
|
|
1650
|
+
ignoreInitial,
|
|
1651
|
+
persistent: true,
|
|
1652
|
+
awaitWriteFinish: {
|
|
1653
|
+
stabilityThreshold: 500,
|
|
1654
|
+
pollInterval: 100
|
|
1655
|
+
}
|
|
1656
|
+
});
|
|
1657
|
+
watcher.on("add", (filePath) => debouncedHandler("add", filePath));
|
|
1658
|
+
watcher.on("change", (filePath) => debouncedHandler("change", filePath));
|
|
1659
|
+
watcher.on("unlink", (filePath) => debouncedHandler("unlink", filePath));
|
|
1660
|
+
watcher.on("unlinkDir", (dirPath) => {
|
|
1661
|
+
const event = {
|
|
1662
|
+
type: "unlinkDir",
|
|
1663
|
+
path: dirPath,
|
|
1664
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
1665
|
+
};
|
|
1666
|
+
onEvent?.(event);
|
|
1667
|
+
});
|
|
1668
|
+
this.watchers.set(directory, watcher);
|
|
1669
|
+
console.log(`\u{1F4C1} \u5F00\u59CB\u76D1\u542C\u76EE\u5F55: ${directory}`);
|
|
1670
|
+
}
|
|
1671
|
+
/**
|
|
1672
|
+
* 停止监听目录
|
|
1673
|
+
*/
|
|
1674
|
+
unwatch(directory) {
|
|
1675
|
+
const watcher = this.watchers.get(directory);
|
|
1676
|
+
if (watcher) {
|
|
1677
|
+
watcher.close();
|
|
1678
|
+
this.watchers.delete(directory);
|
|
1679
|
+
console.log(`\u{1F4C1} \u505C\u6B62\u76D1\u542C\u76EE\u5F55: ${directory}`);
|
|
1680
|
+
}
|
|
1681
|
+
for (const [key, timer] of this.debounceTimers.entries()) {
|
|
1682
|
+
if (key.includes(directory)) {
|
|
1683
|
+
clearTimeout(timer);
|
|
1684
|
+
this.debounceTimers.delete(key);
|
|
1685
|
+
}
|
|
1686
|
+
}
|
|
1687
|
+
}
|
|
1688
|
+
/**
|
|
1689
|
+
* 停止所有监听
|
|
1690
|
+
*/
|
|
1691
|
+
unwatchAll() {
|
|
1692
|
+
for (const directory of this.watchers.keys()) {
|
|
1693
|
+
this.unwatch(directory);
|
|
1694
|
+
}
|
|
1695
|
+
}
|
|
1696
|
+
/**
|
|
1697
|
+
* 获取正在监听的目录列表
|
|
1698
|
+
*/
|
|
1699
|
+
getWatchedDirectories() {
|
|
1700
|
+
return Array.from(this.watchers.keys());
|
|
1701
|
+
}
|
|
1702
|
+
/**
|
|
1703
|
+
* 清理所有定时器
|
|
1704
|
+
*/
|
|
1705
|
+
clearTimers() {
|
|
1706
|
+
for (const timer of this.debounceTimers.values()) {
|
|
1707
|
+
clearTimeout(timer);
|
|
1708
|
+
}
|
|
1709
|
+
this.debounceTimers.clear();
|
|
1710
|
+
}
|
|
1711
|
+
};
|
|
1712
|
+
|
|
1713
|
+
// src/core/maintenance.ts
|
|
1714
|
+
import * as fs4 from "fs/promises";
|
|
1715
|
+
async function cleanup(deps) {
|
|
1716
|
+
const allDocs = deps.metaStore.getAll();
|
|
1717
|
+
let removed = 0;
|
|
1718
|
+
let updated = 0;
|
|
1719
|
+
for (const doc of allDocs) {
|
|
1720
|
+
try {
|
|
1721
|
+
await fs4.access(doc.path);
|
|
1722
|
+
const stat6 = await fs4.stat(doc.path);
|
|
1723
|
+
const contentHash = await hashFile(doc.path);
|
|
1724
|
+
if (doc.contentHash !== contentHash || doc.modifiedAt.getTime() !== stat6.mtime.getTime()) {
|
|
1725
|
+
await deps.indexFile(doc.path);
|
|
1726
|
+
updated++;
|
|
1727
|
+
}
|
|
1728
|
+
} catch {
|
|
1729
|
+
await deps.removeFile(doc.path);
|
|
1730
|
+
removed++;
|
|
1731
|
+
}
|
|
1732
|
+
}
|
|
1733
|
+
await deps.fullTextIndex.save();
|
|
1734
|
+
return { removed, updated };
|
|
1735
|
+
}
|
|
1736
|
+
async function optimize(deps) {
|
|
1737
|
+
await cleanup(deps);
|
|
1738
|
+
await deps.fullTextIndex.save();
|
|
1739
|
+
console.log("\u7D22\u5F15\u4F18\u5316\u5B8C\u6210");
|
|
1740
|
+
}
|
|
1741
|
+
async function healthCheck(deps) {
|
|
1742
|
+
const allDocs = deps.metaStore.getAll();
|
|
1743
|
+
let invalidIndexes = 0;
|
|
1744
|
+
let staleIndexes = 0;
|
|
1745
|
+
const integrity = {
|
|
1746
|
+
meta: true,
|
|
1747
|
+
vectors: true,
|
|
1748
|
+
fulltext: true
|
|
1749
|
+
};
|
|
1750
|
+
try {
|
|
1751
|
+
const stats = deps.metaStore.getStats();
|
|
1752
|
+
integrity.meta = stats.totalDocuments >= 0;
|
|
1753
|
+
} catch {
|
|
1754
|
+
integrity.meta = false;
|
|
1755
|
+
}
|
|
1756
|
+
try {
|
|
1757
|
+
await deps.vectorStore.search(new Array(getEmbeddingDimension()).fill(0), 1);
|
|
1758
|
+
integrity.vectors = true;
|
|
1759
|
+
} catch {
|
|
1760
|
+
integrity.vectors = false;
|
|
1761
|
+
}
|
|
1762
|
+
try {
|
|
1763
|
+
deps.fullTextIndex.search("test", 1);
|
|
1764
|
+
integrity.fulltext = true;
|
|
1765
|
+
} catch {
|
|
1766
|
+
integrity.fulltext = false;
|
|
1767
|
+
}
|
|
1768
|
+
const sampleSize = Math.min(100, allDocs.length);
|
|
1769
|
+
const sampleDocs = allDocs.slice(0, sampleSize);
|
|
1770
|
+
for (const doc of sampleDocs) {
|
|
1771
|
+
try {
|
|
1772
|
+
await fs4.access(doc.path);
|
|
1773
|
+
const stat6 = await fs4.stat(doc.path);
|
|
1774
|
+
const contentHash = await hashFile(doc.path);
|
|
1775
|
+
if (doc.contentHash !== contentHash || doc.modifiedAt.getTime() !== stat6.mtime.getTime()) {
|
|
1776
|
+
staleIndexes++;
|
|
1777
|
+
}
|
|
1778
|
+
} catch {
|
|
1779
|
+
invalidIndexes++;
|
|
1780
|
+
}
|
|
1781
|
+
}
|
|
1782
|
+
if (allDocs.length > sampleSize) {
|
|
1783
|
+
const ratio = invalidIndexes / sampleSize;
|
|
1784
|
+
invalidIndexes = Math.floor(ratio * allDocs.length);
|
|
1785
|
+
const staleRatio = staleIndexes / sampleSize;
|
|
1786
|
+
staleIndexes = Math.floor(staleRatio * allDocs.length);
|
|
1787
|
+
}
|
|
1788
|
+
const healthy = integrity.meta && integrity.vectors && integrity.fulltext && invalidIndexes === 0 && staleIndexes === 0 && deps.indexErrors.size === 0;
|
|
1789
|
+
return {
|
|
1790
|
+
healthy,
|
|
1791
|
+
totalDocuments: allDocs.length,
|
|
1792
|
+
invalidIndexes,
|
|
1793
|
+
staleIndexes,
|
|
1794
|
+
errorCount: deps.indexErrors.size,
|
|
1795
|
+
integrity
|
|
1796
|
+
};
|
|
1797
|
+
}
|
|
1798
|
+
function getIndexErrors(deps) {
|
|
1799
|
+
return Array.from(deps.indexErrors.values());
|
|
1800
|
+
}
|
|
1801
|
+
async function retryFailedIndexes(deps) {
|
|
1802
|
+
const errors = getIndexErrors(deps);
|
|
1803
|
+
const filePaths = errors.map((e) => e.filePath);
|
|
1804
|
+
deps.indexErrors.clear();
|
|
1805
|
+
return deps.indexFiles(filePaths);
|
|
1806
|
+
}
|
|
1807
|
+
|
|
1808
|
+
// src/core/backup.ts
|
|
1809
|
+
import * as fs5 from "fs/promises";
|
|
1810
|
+
import * as path8 from "path";
|
|
1811
|
+
async function copyDirectory(source, target) {
|
|
1812
|
+
await fs5.mkdir(target, { recursive: true });
|
|
1813
|
+
const entries = await fs5.readdir(source, { withFileTypes: true });
|
|
1814
|
+
for (const entry of entries) {
|
|
1815
|
+
const sourcePath = path8.join(source, entry.name);
|
|
1816
|
+
const targetPath = path8.join(target, entry.name);
|
|
1817
|
+
if (entry.isDirectory()) {
|
|
1818
|
+
await copyDirectory(sourcePath, targetPath);
|
|
1819
|
+
} else {
|
|
1820
|
+
await fs5.copyFile(sourcePath, targetPath);
|
|
1821
|
+
}
|
|
1822
|
+
}
|
|
1823
|
+
}
|
|
1824
|
+
async function calculateDirSize(dir) {
|
|
1825
|
+
const entries = await fs5.readdir(dir, { withFileTypes: true });
|
|
1826
|
+
let size = 0;
|
|
1827
|
+
for (const entry of entries) {
|
|
1828
|
+
const fullPath = path8.join(dir, entry.name);
|
|
1829
|
+
if (entry.isDirectory()) {
|
|
1830
|
+
size += await calculateDirSize(fullPath);
|
|
1831
|
+
} else {
|
|
1832
|
+
const stat6 = await fs5.stat(fullPath);
|
|
1833
|
+
size += stat6.size;
|
|
1834
|
+
}
|
|
1835
|
+
}
|
|
1836
|
+
return size;
|
|
1837
|
+
}
|
|
1838
|
+
async function exportIndex(deps, outputPath) {
|
|
1839
|
+
const outputDir = path8.dirname(outputPath);
|
|
1840
|
+
await fs5.mkdir(outputDir, { recursive: true });
|
|
1841
|
+
const exportDir = path8.join(outputDir, path8.basename(outputPath, path8.extname(outputPath)));
|
|
1842
|
+
await fs5.mkdir(exportDir, { recursive: true });
|
|
1843
|
+
const components = {
|
|
1844
|
+
meta: false,
|
|
1845
|
+
vectors: false,
|
|
1846
|
+
fulltext: false
|
|
1847
|
+
};
|
|
1848
|
+
try {
|
|
1849
|
+
const metaDbPath = path8.join(deps.config.dataDir, "meta.db");
|
|
1850
|
+
const targetMetaPath = path8.join(exportDir, "meta.db");
|
|
1851
|
+
await fs5.copyFile(metaDbPath, targetMetaPath);
|
|
1852
|
+
components.meta = true;
|
|
1853
|
+
} catch (error) {
|
|
1854
|
+
console.warn("\u5BFC\u51FA\u5143\u6570\u636E\u5931\u8D25:", error);
|
|
1855
|
+
}
|
|
1856
|
+
try {
|
|
1857
|
+
const vectorsDir = path8.join(deps.config.dataDir, "vectors");
|
|
1858
|
+
const targetVectorsDir = path8.join(exportDir, "vectors");
|
|
1859
|
+
await copyDirectory(vectorsDir, targetVectorsDir);
|
|
1860
|
+
components.vectors = true;
|
|
1861
|
+
} catch (error) {
|
|
1862
|
+
console.warn("\u5BFC\u51FA\u5411\u91CF\u6570\u636E\u5931\u8D25:", error);
|
|
1863
|
+
}
|
|
1864
|
+
try {
|
|
1865
|
+
const fulltextPath = path8.join(deps.config.dataDir, "fulltext.json");
|
|
1866
|
+
const targetFulltextPath = path8.join(exportDir, "fulltext.json");
|
|
1867
|
+
try {
|
|
1868
|
+
await fs5.access(fulltextPath);
|
|
1869
|
+
await fs5.copyFile(fulltextPath, targetFulltextPath);
|
|
1870
|
+
components.fulltext = true;
|
|
1871
|
+
} catch {
|
|
1872
|
+
}
|
|
1873
|
+
} catch (error) {
|
|
1874
|
+
console.warn("\u5BFC\u51FA\u5168\u6587\u7D22\u5F15\u5931\u8D25:", error);
|
|
1875
|
+
}
|
|
1876
|
+
const stats = deps.getStats();
|
|
1877
|
+
const exportInfo = {
|
|
1878
|
+
exportPath: exportDir,
|
|
1879
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
1880
|
+
components,
|
|
1881
|
+
stats
|
|
1882
|
+
};
|
|
1883
|
+
const infoPath = path8.join(exportDir, "export-info.json");
|
|
1884
|
+
await fs5.writeFile(infoPath, JSON.stringify(exportInfo, null, 2), "utf-8");
|
|
1885
|
+
return exportInfo;
|
|
1886
|
+
}
|
|
1887
|
+
async function importIndex(deps, inputPath, createStores) {
|
|
1888
|
+
const stat6 = await fs5.stat(inputPath);
|
|
1889
|
+
let importDir;
|
|
1890
|
+
if (stat6.isDirectory()) {
|
|
1891
|
+
importDir = inputPath;
|
|
1892
|
+
} else {
|
|
1893
|
+
throw new Error("\u76EE\u524D\u53EA\u652F\u6301\u4ECE\u76EE\u5F55\u5BFC\u5165\uFF0C\u8BF7\u5148\u89E3\u538B\u5907\u4EFD\u6587\u4EF6");
|
|
1894
|
+
}
|
|
1895
|
+
const infoPath = path8.join(importDir, "export-info.json");
|
|
1896
|
+
try {
|
|
1897
|
+
await fs5.readFile(infoPath, "utf-8");
|
|
1898
|
+
} catch {
|
|
1899
|
+
}
|
|
1900
|
+
let newMetaStore = deps.metaStore;
|
|
1901
|
+
let newVectorStore = deps.vectorStore;
|
|
1902
|
+
let newFullTextIndex = deps.fullTextIndex;
|
|
1903
|
+
const sourceMetaPath = path8.join(importDir, "meta.db");
|
|
1904
|
+
const targetMetaPath = path8.join(deps.config.dataDir, "meta.db");
|
|
1905
|
+
try {
|
|
1906
|
+
await fs5.access(sourceMetaPath);
|
|
1907
|
+
await fs5.copyFile(sourceMetaPath, targetMetaPath);
|
|
1908
|
+
newMetaStore = new createStores.MetaStore(deps.config.dataDir);
|
|
1909
|
+
} catch (error) {
|
|
1910
|
+
console.warn("\u5BFC\u5165\u5143\u6570\u636E\u5931\u8D25:", error);
|
|
1911
|
+
}
|
|
1912
|
+
const sourceVectorsDir = path8.join(importDir, "vectors");
|
|
1913
|
+
const targetVectorsDir = path8.join(deps.config.dataDir, "vectors");
|
|
1914
|
+
try {
|
|
1915
|
+
await fs5.access(sourceVectorsDir);
|
|
1916
|
+
await copyDirectory(sourceVectorsDir, targetVectorsDir);
|
|
1917
|
+
newVectorStore = new createStores.VectorStore(
|
|
1918
|
+
targetVectorsDir,
|
|
1919
|
+
"documents",
|
|
1920
|
+
getEmbeddingDimension()
|
|
1921
|
+
);
|
|
1922
|
+
await newVectorStore.init();
|
|
1923
|
+
} catch (error) {
|
|
1924
|
+
console.warn("\u5BFC\u5165\u5411\u91CF\u6570\u636E\u5931\u8D25:", error);
|
|
1925
|
+
}
|
|
1926
|
+
const sourceFulltextPath = path8.join(importDir, "fulltext.json");
|
|
1927
|
+
const targetFulltextPath = path8.join(deps.config.dataDir, "fulltext.json");
|
|
1928
|
+
try {
|
|
1929
|
+
await fs5.access(sourceFulltextPath);
|
|
1930
|
+
await fs5.copyFile(sourceFulltextPath, targetFulltextPath);
|
|
1931
|
+
newFullTextIndex = new createStores.FullTextIndex(deps.config.dataDir);
|
|
1932
|
+
await newFullTextIndex.init();
|
|
1933
|
+
} catch (error) {
|
|
1934
|
+
console.warn("\u5BFC\u5165\u5168\u6587\u7D22\u5F15\u5931\u8D25:", error);
|
|
1935
|
+
}
|
|
1936
|
+
deps.reinitializeStores(newMetaStore, newVectorStore, newFullTextIndex);
|
|
1937
|
+
console.log("\u7D22\u5F15\u5BFC\u5165\u5B8C\u6210");
|
|
1938
|
+
}
|
|
1939
|
+
async function listBackups(backupDir) {
|
|
1940
|
+
try {
|
|
1941
|
+
const entries = await fs5.readdir(backupDir, { withFileTypes: true });
|
|
1942
|
+
const backups = [];
|
|
1943
|
+
for (const entry of entries) {
|
|
1944
|
+
if (!entry.isDirectory()) continue;
|
|
1945
|
+
const backupPath = path8.join(backupDir, entry.name);
|
|
1946
|
+
const infoPath = path8.join(backupPath, "export-info.json");
|
|
1947
|
+
try {
|
|
1948
|
+
const infoContent = await fs5.readFile(infoPath, "utf-8");
|
|
1949
|
+
const exportInfo = JSON.parse(infoContent);
|
|
1950
|
+
const totalSize = await calculateDirSize(backupPath);
|
|
1951
|
+
backups.push({
|
|
1952
|
+
path: backupPath,
|
|
1953
|
+
timestamp: new Date(exportInfo.timestamp),
|
|
1954
|
+
size: totalSize
|
|
1955
|
+
});
|
|
1956
|
+
} catch {
|
|
1957
|
+
}
|
|
1958
|
+
}
|
|
1959
|
+
backups.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime());
|
|
1960
|
+
return backups;
|
|
1961
|
+
} catch {
|
|
1962
|
+
return [];
|
|
1963
|
+
}
|
|
1964
|
+
}
|
|
1965
|
+
|
|
1966
|
+
// src/core/pipeline.ts
|
|
1967
|
+
import * as path9 from "path";
|
|
1968
|
+
import * as fs6 from "fs/promises";
|
|
1969
|
+
import { createHash as createHash2 } from "crypto";
|
|
1970
|
+
|
|
1971
|
+
// src/core/chunker.ts
|
|
1972
|
+
var DEFAULT_OPTIONS = {
|
|
1973
|
+
chunkSize: 800,
|
|
1974
|
+
// 目标块大小
|
|
1975
|
+
overlap: 100,
|
|
1976
|
+
// 重叠大小
|
|
1977
|
+
minChunkSize: 100
|
|
1978
|
+
// 最小块大小
|
|
1979
|
+
};
|
|
1980
|
+
var SEPARATORS = [
|
|
1981
|
+
"\n\n\n",
|
|
1982
|
+
// 多空行(章节分隔)
|
|
1983
|
+
"\n\n",
|
|
1984
|
+
// 段落分隔
|
|
1985
|
+
"\n",
|
|
1986
|
+
// 换行
|
|
1987
|
+
"\u3002",
|
|
1988
|
+
// 中文句号
|
|
1989
|
+
"\uFF01",
|
|
1990
|
+
// 中文感叹号
|
|
1991
|
+
"\uFF1F",
|
|
1992
|
+
// 中文问号
|
|
1993
|
+
"\uFF1B",
|
|
1994
|
+
// 中文分号
|
|
1995
|
+
". ",
|
|
1996
|
+
// 英文句号
|
|
1997
|
+
"! ",
|
|
1998
|
+
// 英文感叹号
|
|
1999
|
+
"? ",
|
|
2000
|
+
// 英文问号
|
|
2001
|
+
"; ",
|
|
2002
|
+
// 英文分号
|
|
2003
|
+
"\uFF0C",
|
|
2004
|
+
// 中文逗号
|
|
2005
|
+
", ",
|
|
2006
|
+
// 英文逗号
|
|
2007
|
+
" "
|
|
2008
|
+
// 空格
|
|
2009
|
+
];
|
|
2010
|
+
function splitText(text, options) {
|
|
2011
|
+
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
2012
|
+
if (!text.trim()) return [];
|
|
2013
|
+
if (text.length <= opts.chunkSize) {
|
|
2014
|
+
return [{
|
|
2015
|
+
content: text,
|
|
2016
|
+
index: 0,
|
|
2017
|
+
startOffset: 0,
|
|
2018
|
+
endOffset: text.length
|
|
2019
|
+
}];
|
|
2020
|
+
}
|
|
2021
|
+
const rawChunks = recursiveSplit(text, opts.chunkSize, 0);
|
|
2022
|
+
return mergeAndOverlap(rawChunks, text, opts);
|
|
2023
|
+
}
|
|
2024
|
+
function recursiveSplit(text, targetSize, separatorIndex) {
|
|
2025
|
+
if (text.length <= targetSize) {
|
|
2026
|
+
return [text];
|
|
2027
|
+
}
|
|
2028
|
+
while (separatorIndex < SEPARATORS.length) {
|
|
2029
|
+
const separator = SEPARATORS[separatorIndex];
|
|
2030
|
+
const parts = text.split(separator);
|
|
2031
|
+
if (parts.length > 1) {
|
|
2032
|
+
const chunks = [];
|
|
2033
|
+
let current = "";
|
|
2034
|
+
for (const part of parts) {
|
|
2035
|
+
const withSep = current ? current + separator + part : part;
|
|
2036
|
+
if (withSep.length <= targetSize) {
|
|
2037
|
+
current = withSep;
|
|
2038
|
+
} else if (current) {
|
|
2039
|
+
chunks.push(current);
|
|
2040
|
+
if (part.length > targetSize) {
|
|
2041
|
+
chunks.push(...recursiveSplit(part, targetSize, separatorIndex + 1));
|
|
2042
|
+
current = "";
|
|
2043
|
+
} else {
|
|
2044
|
+
current = part;
|
|
2045
|
+
}
|
|
2046
|
+
} else {
|
|
2047
|
+
chunks.push(...recursiveSplit(part, targetSize, separatorIndex + 1));
|
|
2048
|
+
}
|
|
2049
|
+
}
|
|
2050
|
+
if (current) {
|
|
2051
|
+
chunks.push(current);
|
|
2052
|
+
}
|
|
2053
|
+
return chunks;
|
|
2054
|
+
}
|
|
2055
|
+
separatorIndex++;
|
|
2056
|
+
}
|
|
2057
|
+
return forceChunk(text, targetSize);
|
|
2058
|
+
}
|
|
2059
|
+
function forceChunk(text, targetSize) {
|
|
2060
|
+
const chunks = [];
|
|
2061
|
+
let start = 0;
|
|
2062
|
+
while (start < text.length) {
|
|
2063
|
+
chunks.push(text.slice(start, start + targetSize));
|
|
2064
|
+
start += targetSize;
|
|
2065
|
+
}
|
|
2066
|
+
return chunks;
|
|
2067
|
+
}
|
|
2068
|
+
function mergeAndOverlap(rawChunks, originalText, opts) {
|
|
2069
|
+
const result = [];
|
|
2070
|
+
let currentOffset = 0;
|
|
2071
|
+
for (let i = 0; i < rawChunks.length; i++) {
|
|
2072
|
+
let content = rawChunks[i].trim();
|
|
2073
|
+
if (!content) {
|
|
2074
|
+
const idx = originalText.indexOf(rawChunks[i], currentOffset);
|
|
2075
|
+
if (idx >= 0) {
|
|
2076
|
+
currentOffset = idx + rawChunks[i].length;
|
|
2077
|
+
}
|
|
2078
|
+
continue;
|
|
2079
|
+
}
|
|
2080
|
+
if (content.length < opts.minChunkSize && result.length > 0) {
|
|
2081
|
+
const lastChunk = result[result.length - 1];
|
|
2082
|
+
if (lastChunk.content.length + content.length < opts.chunkSize * 1.5) {
|
|
2083
|
+
lastChunk.content += "\n" + content;
|
|
2084
|
+
lastChunk.endOffset = currentOffset + rawChunks[i].length;
|
|
2085
|
+
continue;
|
|
2086
|
+
}
|
|
2087
|
+
}
|
|
2088
|
+
const startOffset = originalText.indexOf(content.slice(0, 50), Math.max(0, currentOffset - 10));
|
|
2089
|
+
const actualStart = startOffset >= 0 ? startOffset : currentOffset;
|
|
2090
|
+
if (opts.overlap > 0 && result.length > 0) {
|
|
2091
|
+
const lastChunk = result[result.length - 1];
|
|
2092
|
+
const overlapText = lastChunk.content.slice(-opts.overlap);
|
|
2093
|
+
const sentenceStart = findSentenceStart(overlapText);
|
|
2094
|
+
if (sentenceStart > 0) {
|
|
2095
|
+
content = overlapText.slice(sentenceStart) + "\n" + content;
|
|
2096
|
+
}
|
|
2097
|
+
}
|
|
2098
|
+
result.push({
|
|
2099
|
+
content,
|
|
2100
|
+
index: result.length,
|
|
2101
|
+
startOffset: actualStart,
|
|
2102
|
+
endOffset: actualStart + content.length
|
|
2103
|
+
});
|
|
2104
|
+
currentOffset = actualStart + rawChunks[i].length;
|
|
2105
|
+
}
|
|
2106
|
+
return result;
|
|
2107
|
+
}
|
|
2108
|
+
function findSentenceStart(text) {
|
|
2109
|
+
const sentenceEnders = ["\u3002", "\uFF01", "\uFF1F", ". ", "! ", "? ", "\n"];
|
|
2110
|
+
for (const ender of sentenceEnders) {
|
|
2111
|
+
const idx = text.lastIndexOf(ender);
|
|
2112
|
+
if (idx >= 0) {
|
|
2113
|
+
return idx + ender.length;
|
|
2114
|
+
}
|
|
2115
|
+
}
|
|
2116
|
+
return 0;
|
|
2117
|
+
}
|
|
2118
|
+
function getChunkStats(chunks) {
|
|
2119
|
+
if (chunks.length === 0) {
|
|
2120
|
+
return { count: 0, avgSize: 0, minSize: 0, maxSize: 0, totalSize: 0 };
|
|
2121
|
+
}
|
|
2122
|
+
const sizes = chunks.map((c) => c.content.length);
|
|
2123
|
+
const totalSize = sizes.reduce((a, b) => a + b, 0);
|
|
2124
|
+
return {
|
|
2125
|
+
count: chunks.length,
|
|
2126
|
+
avgSize: Math.round(totalSize / chunks.length),
|
|
2127
|
+
minSize: Math.min(...sizes),
|
|
2128
|
+
maxSize: Math.max(...sizes),
|
|
2129
|
+
totalSize
|
|
2130
|
+
};
|
|
2131
|
+
}
|
|
2132
|
+
|
|
2133
|
+
// src/core/pipeline.ts
|
|
2134
|
+
async function hashFile2(filePath) {
|
|
2135
|
+
const content = await fs6.readFile(filePath);
|
|
2136
|
+
return createHash2("md5").update(content).digest("hex");
|
|
2137
|
+
}
|
|
2138
|
+
var DEFAULT_CONFIG2 = {
|
|
2139
|
+
concurrency: 50,
|
|
2140
|
+
// 高并发
|
|
2141
|
+
chunk: {
|
|
2142
|
+
chunkSize: 8e3,
|
|
2143
|
+
// 8k 字符/块 ≈ 多个段落,小文档自然 1 个 chunk
|
|
2144
|
+
overlap: 500,
|
|
2145
|
+
// 500 字符重叠
|
|
2146
|
+
minChunkSize: 500
|
|
2147
|
+
// 最小块 500 字符
|
|
2148
|
+
}
|
|
2149
|
+
};
|
|
2150
|
+
var IndexingPipeline = class {
|
|
2151
|
+
config;
|
|
2152
|
+
onProgress;
|
|
2153
|
+
storeCallback;
|
|
2154
|
+
skipCheck;
|
|
2155
|
+
cancelled = false;
|
|
2156
|
+
// 取消标志
|
|
2157
|
+
stats = {
|
|
2158
|
+
totalFiles: 0,
|
|
2159
|
+
completed: 0,
|
|
2160
|
+
stored: 0,
|
|
2161
|
+
skipped: 0,
|
|
2162
|
+
failed: 0,
|
|
2163
|
+
totalTime: 0
|
|
2164
|
+
};
|
|
2165
|
+
constructor(config) {
|
|
2166
|
+
this.config = { ...DEFAULT_CONFIG2, ...config };
|
|
2167
|
+
}
|
|
2168
|
+
/** 取消正在执行的任务 */
|
|
2169
|
+
cancel() {
|
|
2170
|
+
this.cancelled = true;
|
|
2171
|
+
console.log(" \u26A0\uFE0F \u7D22\u5F15\u4EFB\u52A1\u5DF2\u53D6\u6D88");
|
|
2172
|
+
}
|
|
2173
|
+
/** 检查是否已取消 */
|
|
2174
|
+
isCancelled() {
|
|
2175
|
+
return this.cancelled;
|
|
2176
|
+
}
|
|
2177
|
+
/**
|
|
2178
|
+
* 处理单个文件(完整流程:解析 → 分块 → 嵌入 → 存储)
|
|
2179
|
+
*/
|
|
2180
|
+
async processFile(filePath) {
|
|
2181
|
+
try {
|
|
2182
|
+
if (this.skipCheck) {
|
|
2183
|
+
const stat6 = await fs6.stat(filePath);
|
|
2184
|
+
if (this.skipCheck(filePath, stat6.mtime)) {
|
|
2185
|
+
this.stats.skipped++;
|
|
2186
|
+
return;
|
|
2187
|
+
}
|
|
2188
|
+
}
|
|
2189
|
+
const parsed = await parseDocument(filePath);
|
|
2190
|
+
if (!parsed.content.trim()) {
|
|
2191
|
+
this.stats.skipped++;
|
|
2192
|
+
return;
|
|
2193
|
+
}
|
|
2194
|
+
if (this.cancelled) return;
|
|
2195
|
+
const contentHash = await hashFile2(filePath);
|
|
2196
|
+
if (this.cancelled) return;
|
|
2197
|
+
const chunks = splitText(parsed.content, this.config.chunk);
|
|
2198
|
+
const totalChunks = chunks.length;
|
|
2199
|
+
const embedPromises = chunks.map((chunk) => embedDocument(chunk.content));
|
|
2200
|
+
const vectors = await Promise.all(embedPromises);
|
|
2201
|
+
if (this.cancelled) return;
|
|
2202
|
+
const storePromises = chunks.map(async (chunk, i) => {
|
|
2203
|
+
if (this.cancelled) return false;
|
|
2204
|
+
const doc = {
|
|
2205
|
+
filePath,
|
|
2206
|
+
content: chunk.content,
|
|
2207
|
+
title: parsed.title || path9.basename(filePath),
|
|
2208
|
+
contentHash,
|
|
2209
|
+
vector: vectors[i],
|
|
2210
|
+
chunkIndex: i,
|
|
2211
|
+
totalChunks
|
|
2212
|
+
};
|
|
2213
|
+
const result = await this.storeCallback(doc);
|
|
2214
|
+
return result.stored;
|
|
2215
|
+
});
|
|
2216
|
+
const results = await Promise.all(storePromises);
|
|
2217
|
+
const storedCount = results.filter(Boolean).length;
|
|
2218
|
+
if (storedCount > 0) {
|
|
2219
|
+
this.stats.stored++;
|
|
2220
|
+
} else {
|
|
2221
|
+
this.stats.skipped++;
|
|
2222
|
+
}
|
|
2223
|
+
} catch (error) {
|
|
2224
|
+
this.stats.failed++;
|
|
2225
|
+
console.error(` \u274C \u5904\u7406\u5931\u8D25 [${filePath}]:`, error instanceof Error ? error.message : error);
|
|
2226
|
+
}
|
|
2227
|
+
}
|
|
2228
|
+
/**
|
|
2229
|
+
* 执行流水线
|
|
2230
|
+
*/
|
|
2231
|
+
async run(files, storeCallback, onProgress, skipCheck) {
|
|
2232
|
+
const startTime = Date.now();
|
|
2233
|
+
this.cancelled = false;
|
|
2234
|
+
this.stats = {
|
|
2235
|
+
totalFiles: files.length,
|
|
2236
|
+
completed: 0,
|
|
2237
|
+
stored: 0,
|
|
2238
|
+
skipped: 0,
|
|
2239
|
+
failed: 0,
|
|
2240
|
+
totalTime: 0
|
|
2241
|
+
};
|
|
2242
|
+
this.onProgress = onProgress;
|
|
2243
|
+
this.storeCallback = storeCallback;
|
|
2244
|
+
this.skipCheck = skipCheck;
|
|
2245
|
+
console.log(` \u{1F680} \u5F00\u59CB\u5904\u7406 ${files.length} \u4E2A\u6587\u4EF6 (\u5E76\u53D1: ${this.config.concurrency})`);
|
|
2246
|
+
const executing = [];
|
|
2247
|
+
let index = 0;
|
|
2248
|
+
while ((index < files.length || executing.length > 0) && !this.cancelled) {
|
|
2249
|
+
while (index < files.length && executing.length < this.config.concurrency && !this.cancelled) {
|
|
2250
|
+
const filePath = files[index++];
|
|
2251
|
+
const fileIndex = index;
|
|
2252
|
+
const promise = this.processFile(filePath).then(() => {
|
|
2253
|
+
if (this.cancelled) return;
|
|
2254
|
+
this.stats.completed++;
|
|
2255
|
+
this.reportProgress(filePath);
|
|
2256
|
+
console.log(` \u{1F4C4} [${this.stats.completed}/${this.stats.totalFiles}] (\u5E76\u53D1:${executing.length}) ${filePath}`);
|
|
2257
|
+
}).finally(() => {
|
|
2258
|
+
const idx = executing.indexOf(promise);
|
|
2259
|
+
if (idx > -1) executing.splice(idx, 1);
|
|
2260
|
+
});
|
|
2261
|
+
executing.push(promise);
|
|
2262
|
+
}
|
|
2263
|
+
if (executing.length > 0) {
|
|
2264
|
+
await Promise.race(executing);
|
|
2265
|
+
}
|
|
2266
|
+
}
|
|
2267
|
+
if (executing.length > 0) {
|
|
2268
|
+
await Promise.all(executing);
|
|
2269
|
+
}
|
|
2270
|
+
this.stats.totalTime = Date.now() - startTime;
|
|
2271
|
+
if (this.cancelled) {
|
|
2272
|
+
console.log(`
|
|
2273
|
+
\u26A0\uFE0F \u7D22\u5F15\u5DF2\u53D6\u6D88:`);
|
|
2274
|
+
console.log(` - \u5DF2\u5904\u7406: ${this.stats.completed}/${this.stats.totalFiles}`);
|
|
2275
|
+
} else {
|
|
2276
|
+
console.log(`
|
|
2277
|
+
\u2705 \u5904\u7406\u5B8C\u6210:`);
|
|
2278
|
+
}
|
|
2279
|
+
console.log(` - \u603B\u8017\u65F6: ${(this.stats.totalTime / 1e3).toFixed(1)}s`);
|
|
2280
|
+
console.log(` - \u65B0\u589E/\u66F4\u65B0: ${this.stats.stored} \u4E2A`);
|
|
2281
|
+
console.log(` - \u8DF3\u8FC7: ${this.stats.skipped} \u4E2A`);
|
|
2282
|
+
console.log(` - \u5931\u8D25: ${this.stats.failed} \u4E2A`);
|
|
2283
|
+
return this.stats;
|
|
2284
|
+
}
|
|
2285
|
+
/** 报告进度 */
|
|
2286
|
+
reportProgress(currentFile) {
|
|
2287
|
+
const { completed, totalFiles } = this.stats;
|
|
2288
|
+
this.onProgress?.({
|
|
2289
|
+
indexed: completed,
|
|
2290
|
+
total: totalFiles,
|
|
2291
|
+
currentFile,
|
|
2292
|
+
stage: completed === totalFiles ? "done" : "storing"
|
|
2293
|
+
});
|
|
2294
|
+
}
|
|
2295
|
+
};
|
|
2296
|
+
function createIndexingPipeline(config) {
|
|
2297
|
+
return new IndexingPipeline(config);
|
|
2298
|
+
}
|
|
2299
|
+
|
|
2300
|
+
// src/core/search.ts
|
|
2301
|
+
var DocumentSearch = class {
|
|
2302
|
+
config;
|
|
2303
|
+
vectorStore;
|
|
2304
|
+
fullTextIndex;
|
|
2305
|
+
metaStore;
|
|
2306
|
+
initialized = false;
|
|
2307
|
+
directoryWatcher;
|
|
2308
|
+
indexErrors = /* @__PURE__ */ new Map();
|
|
2309
|
+
maxRetries = 3;
|
|
2310
|
+
/** 索引锁:防止同一文件被并发索引 */
|
|
2311
|
+
indexingLocks = /* @__PURE__ */ new Set();
|
|
2312
|
+
/** 当前运行的流水线(用于取消) */
|
|
2313
|
+
currentPipeline = null;
|
|
2314
|
+
constructor(config) {
|
|
2315
|
+
this.config = {
|
|
2316
|
+
...DEFAULT_CONFIG,
|
|
2317
|
+
...config
|
|
2318
|
+
};
|
|
2319
|
+
const dataDir = this.config.dataDir;
|
|
2320
|
+
this.vectorStore = new VectorStore(
|
|
2321
|
+
path10.join(dataDir, "vectors"),
|
|
2322
|
+
"documents",
|
|
2323
|
+
getEmbeddingDimension()
|
|
2324
|
+
);
|
|
2325
|
+
this.fullTextIndex = new FullTextIndex(dataDir);
|
|
2326
|
+
this.metaStore = new MetaStore(dataDir);
|
|
2327
|
+
this.directoryWatcher = new DirectoryWatcher({
|
|
2328
|
+
config: this.config,
|
|
2329
|
+
fullTextIndex: this.fullTextIndex,
|
|
2330
|
+
indexFile: (filePath) => this.indexFile(filePath),
|
|
2331
|
+
removeFile: (filePath) => this.removeFile(filePath)
|
|
2332
|
+
});
|
|
2333
|
+
}
|
|
2334
|
+
/**
|
|
2335
|
+
* 初始化搜索引擎
|
|
2336
|
+
*/
|
|
2337
|
+
async init() {
|
|
2338
|
+
if (this.initialized) return;
|
|
2339
|
+
await initEmbedder(
|
|
2340
|
+
this.config.arkApiKey,
|
|
2341
|
+
this.config.embeddingModel,
|
|
2342
|
+
this.config.embeddingDimension
|
|
2343
|
+
);
|
|
2344
|
+
await this.vectorStore.init();
|
|
2345
|
+
await this.fullTextIndex.init();
|
|
2346
|
+
this.initialized = true;
|
|
2347
|
+
console.log("DocumentSearch \u521D\u59CB\u5316\u5B8C\u6210");
|
|
2348
|
+
}
|
|
2349
|
+
/**
|
|
2350
|
+
* 索引单个文件(带重试机制和并发安全)
|
|
2351
|
+
*/
|
|
2352
|
+
async indexFile(filePath, retryCount = 0) {
|
|
2353
|
+
await this.ensureInitialized();
|
|
2354
|
+
const normalizedPath = path10.normalize(filePath);
|
|
2355
|
+
if (this.indexingLocks.has(normalizedPath)) {
|
|
2356
|
+
return;
|
|
2357
|
+
}
|
|
2358
|
+
this.indexingLocks.add(normalizedPath);
|
|
2359
|
+
try {
|
|
2360
|
+
const stat6 = await fs7.stat(filePath);
|
|
2361
|
+
if (stat6.size > (this.config.maxFileSize || DEFAULT_CONFIG.maxFileSize)) {
|
|
2362
|
+
return;
|
|
2363
|
+
}
|
|
2364
|
+
const existing = this.metaStore.getByPath(filePath);
|
|
2365
|
+
if (existing) {
|
|
2366
|
+
const timeMatch = existing.modifiedAt.getTime() === stat6.mtime.getTime();
|
|
2367
|
+
const sizeMatch = existing.fileSize === stat6.size;
|
|
2368
|
+
if (timeMatch && sizeMatch) {
|
|
2369
|
+
return;
|
|
2370
|
+
}
|
|
2371
|
+
if (sizeMatch) {
|
|
2372
|
+
const contentHash2 = await hashFile(filePath);
|
|
2373
|
+
if (existing.contentHash === contentHash2) {
|
|
2374
|
+
existing.modifiedAt = stat6.mtime;
|
|
2375
|
+
existing.fileSize = stat6.size;
|
|
2376
|
+
this.metaStore.upsert(existing);
|
|
2377
|
+
return;
|
|
2378
|
+
}
|
|
2379
|
+
}
|
|
2380
|
+
}
|
|
2381
|
+
const contentHash = await hashFile(filePath);
|
|
2382
|
+
const duplicateByHash = this.metaStore.getByHash(contentHash);
|
|
2383
|
+
if (duplicateByHash && duplicateByHash.path !== filePath) {
|
|
2384
|
+
const docId2 = generateId();
|
|
2385
|
+
const doc2 = {
|
|
2386
|
+
id: docId2,
|
|
2387
|
+
path: filePath,
|
|
2388
|
+
name: path10.basename(filePath),
|
|
2389
|
+
fileType: getFileType(filePath),
|
|
2390
|
+
extension: path10.extname(filePath).toLowerCase(),
|
|
2391
|
+
title: duplicateByHash.title,
|
|
2392
|
+
// 复用标题
|
|
2393
|
+
content: duplicateByHash.content,
|
|
2394
|
+
// 复用内容
|
|
2395
|
+
fileSize: stat6.size,
|
|
2396
|
+
createdAt: stat6.birthtime,
|
|
2397
|
+
modifiedAt: stat6.mtime,
|
|
2398
|
+
indexedAt: /* @__PURE__ */ new Date(),
|
|
2399
|
+
contentHash
|
|
2400
|
+
};
|
|
2401
|
+
this.metaStore.upsert(doc2);
|
|
2402
|
+
const existingVector = await this.vectorStore.getById(duplicateByHash.id);
|
|
2403
|
+
if (existingVector) {
|
|
2404
|
+
await this.vectorStore.update({ id: docId2, vector: existingVector });
|
|
2405
|
+
}
|
|
2406
|
+
this.fullTextIndex.update({
|
|
2407
|
+
id: docId2,
|
|
2408
|
+
title: doc2.title || doc2.name,
|
|
2409
|
+
content: doc2.content
|
|
2410
|
+
});
|
|
2411
|
+
return;
|
|
2412
|
+
}
|
|
2413
|
+
const parsed = await parseDocument(filePath);
|
|
2414
|
+
if (!parsed.content.trim()) {
|
|
2415
|
+
if (parsed.metadata?.error) {
|
|
2416
|
+
const reason = parsed.metadata.reason;
|
|
2417
|
+
if (reason?.includes("bad XRef") || reason?.includes("FormatError") || reason?.includes("XRef")) {
|
|
2418
|
+
console.warn(`\u8DF3\u8FC7\u635F\u574F\u7684 PDF \u6587\u4EF6: ${filePath} - ${reason}`);
|
|
2419
|
+
} else {
|
|
2420
|
+
console.warn(`\u8DF3\u8FC7\u65E0\u6CD5\u89E3\u6790\u7684\u6587\u4EF6: ${filePath} - ${reason}`);
|
|
2421
|
+
}
|
|
2422
|
+
}
|
|
2423
|
+
return;
|
|
2424
|
+
}
|
|
2425
|
+
const vector = await embedDocument(parsed.content);
|
|
2426
|
+
const docId = existing?.id || generateId();
|
|
2427
|
+
const doc = {
|
|
2428
|
+
id: docId,
|
|
2429
|
+
path: filePath,
|
|
2430
|
+
name: path10.basename(filePath),
|
|
2431
|
+
fileType: getFileType(filePath),
|
|
2432
|
+
extension: path10.extname(filePath).toLowerCase(),
|
|
2433
|
+
title: parsed.title,
|
|
2434
|
+
content: parsed.content,
|
|
2435
|
+
fileSize: stat6.size,
|
|
2436
|
+
createdAt: stat6.birthtime,
|
|
2437
|
+
modifiedAt: stat6.mtime,
|
|
2438
|
+
indexedAt: /* @__PURE__ */ new Date(),
|
|
2439
|
+
contentHash
|
|
2440
|
+
};
|
|
2441
|
+
this.metaStore.upsert(doc);
|
|
2442
|
+
await this.vectorStore.update({ id: docId, vector });
|
|
2443
|
+
this.fullTextIndex.update({
|
|
2444
|
+
id: docId,
|
|
2445
|
+
title: doc.title || doc.name,
|
|
2446
|
+
content: doc.content
|
|
2447
|
+
});
|
|
2448
|
+
} catch (err) {
|
|
2449
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
2450
|
+
const errorStack = err instanceof Error ? err.stack : String(err);
|
|
2451
|
+
const isCommonError = errorMsg.includes("bad XRef") || errorMsg.includes("FormatError") || errorMsg.includes("Invalid PDF") || errorMsg.includes("XRef") || errorStack?.includes("pdf-parse") || errorStack?.includes("pdf.js");
|
|
2452
|
+
const indexError = {
|
|
2453
|
+
filePath,
|
|
2454
|
+
error: errorMsg,
|
|
2455
|
+
retryCount,
|
|
2456
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
2457
|
+
};
|
|
2458
|
+
this.indexErrors.set(filePath, indexError);
|
|
2459
|
+
if (!isCommonError && retryCount < this.maxRetries) {
|
|
2460
|
+
console.warn(`\u7D22\u5F15\u6587\u4EF6\u5931\u8D25\uFF0C\u5C06\u91CD\u8BD5 (${retryCount + 1}/${this.maxRetries}): ${filePath}`);
|
|
2461
|
+
await sleep(1e3 * (retryCount + 1));
|
|
2462
|
+
return this.indexFile(filePath, retryCount + 1);
|
|
2463
|
+
}
|
|
2464
|
+
if (isCommonError) {
|
|
2465
|
+
console.warn(`\u8DF3\u8FC7\u635F\u574F\u7684 PDF \u6587\u4EF6: ${filePath} - ${errorMsg}`);
|
|
2466
|
+
} else {
|
|
2467
|
+
console.warn(`\u7D22\u5F15\u6587\u4EF6\u5931\u8D25: ${filePath} - ${errorMsg}`);
|
|
2468
|
+
}
|
|
2469
|
+
return;
|
|
2470
|
+
} finally {
|
|
2471
|
+
this.indexingLocks.delete(normalizedPath);
|
|
2472
|
+
}
|
|
2473
|
+
}
|
|
2474
|
+
/**
|
|
2475
|
+
* 索引目录(流水线架构)
|
|
2476
|
+
* 三阶段并行处理:解析 → 嵌入 → 存储
|
|
2477
|
+
* 智能增量索引:只处理新增、修改的文件,跳过未变化的文件
|
|
2478
|
+
*/
|
|
2479
|
+
async indexDirectory(directory, onProgress, pipelineConfig) {
|
|
2480
|
+
await this.ensureInitialized();
|
|
2481
|
+
const startTime = Date.now();
|
|
2482
|
+
console.log(`
|
|
2483
|
+
\u{1F4C1} \u5F00\u59CB\u7D22\u5F15\u76EE\u5F55: ${directory}`);
|
|
2484
|
+
console.log(` \u23F1\uFE0F \u5F00\u59CB\u65F6\u95F4: ${(/* @__PURE__ */ new Date()).toLocaleTimeString()}`);
|
|
2485
|
+
const notifyProgress = (progress) => {
|
|
2486
|
+
onProgress?.(progress);
|
|
2487
|
+
notifyGlobalProgress(progress);
|
|
2488
|
+
};
|
|
2489
|
+
notifyProgress({ indexed: 0, total: 0, stage: "scanning" });
|
|
2490
|
+
const phase1Start = Date.now();
|
|
2491
|
+
const indexedDocs = this.metaStore.getAll();
|
|
2492
|
+
const indexedPathsInDir = new Set(
|
|
2493
|
+
indexedDocs.filter((doc) => doc.path.startsWith(directory)).map((doc) => doc.path)
|
|
2494
|
+
);
|
|
2495
|
+
console.log(` \u23F1\uFE0F \u52A0\u8F7D\u5DF2\u7D22\u5F15\u5217\u8868: ${Date.now() - phase1Start}ms`);
|
|
2496
|
+
console.log(` \u{1F4CA} \u5DF2\u7D22\u5F15: ${indexedPathsInDir.size} \u4E2A\u6587\u4EF6`);
|
|
2497
|
+
const scanStart = Date.now();
|
|
2498
|
+
console.log(` \u{1F50D} \u6B63\u5728\u626B\u63CF\u6587\u4EF6\u7CFB\u7EDF...`);
|
|
2499
|
+
let scannedCount = 0;
|
|
2500
|
+
const files = await scanDirectories([directory], {
|
|
2501
|
+
excludeDirs: this.config.excludeDirs,
|
|
2502
|
+
extensions: this.config.extensions,
|
|
2503
|
+
maxFileSize: this.config.maxFileSize,
|
|
2504
|
+
directories: [directory],
|
|
2505
|
+
onProgress: (progress) => {
|
|
2506
|
+
scannedCount = progress.scanned;
|
|
2507
|
+
notifyProgress({
|
|
2508
|
+
indexed: progress.scanned,
|
|
2509
|
+
total: 0,
|
|
2510
|
+
currentFile: progress.currentDir,
|
|
2511
|
+
stage: "scanning"
|
|
2512
|
+
});
|
|
2513
|
+
}
|
|
2514
|
+
});
|
|
2515
|
+
console.log(` \u23F1\uFE0F \u626B\u63CF\u8017\u65F6: ${Date.now() - scanStart}ms`);
|
|
2516
|
+
console.log(` \u{1F4C4} \u627E\u5230 ${files.length} \u4E2A\u6587\u4EF6 (\u626B\u63CF\u4E86 ${scannedCount.toLocaleString()} \u4E2A)`);
|
|
2517
|
+
const analyzeStart = Date.now();
|
|
2518
|
+
notifyProgress({ indexed: 0, total: 0, stage: "parsing", currentFile: "\u6B63\u5728\u5206\u6790\u6587\u4EF6\u53D8\u66F4..." });
|
|
2519
|
+
const filesSet = new Set(files);
|
|
2520
|
+
const normalizedFilesSet = new Set(files.map((f) => path10.normalize(f)));
|
|
2521
|
+
const normalizedIndexedPaths = new Set(
|
|
2522
|
+
Array.from(indexedPathsInDir).map((p) => path10.normalize(p))
|
|
2523
|
+
);
|
|
2524
|
+
const filesToProcess = [];
|
|
2525
|
+
for (const file of files) {
|
|
2526
|
+
const existing = this.metaStore.getByPath(file);
|
|
2527
|
+
if (!existing) {
|
|
2528
|
+
filesToProcess.push(file);
|
|
2529
|
+
} else {
|
|
2530
|
+
filesToProcess.push(file);
|
|
2531
|
+
}
|
|
2532
|
+
}
|
|
2533
|
+
const deletedFiles = [];
|
|
2534
|
+
for (const indexedPath of indexedPathsInDir) {
|
|
2535
|
+
const normalizedIndexedPath = path10.normalize(indexedPath);
|
|
2536
|
+
if (!normalizedFilesSet.has(normalizedIndexedPath) && !filesSet.has(indexedPath)) {
|
|
2537
|
+
deletedFiles.push(indexedPath);
|
|
2538
|
+
}
|
|
2539
|
+
}
|
|
2540
|
+
console.log(` \u23F1\uFE0F \u5206\u6790\u53D8\u66F4: ${Date.now() - analyzeStart}ms`);
|
|
2541
|
+
let deletedCount = 0;
|
|
2542
|
+
if (deletedFiles.length > 0) {
|
|
2543
|
+
console.log(` \u{1F5D1}\uFE0F \u6E05\u7406 ${deletedFiles.length} \u4E2A\u5DF2\u5220\u9664\u6587\u4EF6\u7684\u7D22\u5F15...`);
|
|
2544
|
+
const deleteResult = await this.removeFiles(deletedFiles);
|
|
2545
|
+
deletedCount = deleteResult.success;
|
|
2546
|
+
console.log(` \u2705 \u5DF2\u6E05\u7406 ${deletedCount} \u4E2A\u7D22\u5F15`);
|
|
2547
|
+
}
|
|
2548
|
+
console.log(`
|
|
2549
|
+
\u{1F4C8} \u7D22\u5F15\u7EDF\u8BA1:`);
|
|
2550
|
+
console.log(` - \u626B\u63CF\u6587\u4EF6: ${files.length}`);
|
|
2551
|
+
console.log(` - \u9700\u8981\u5904\u7406: ${filesToProcess.length}`);
|
|
2552
|
+
console.log(` - \u5DF2\u5220\u9664: ${deletedFiles.length}`);
|
|
2553
|
+
const indexedMtimeMap = /* @__PURE__ */ new Map();
|
|
2554
|
+
for (const doc of indexedDocs) {
|
|
2555
|
+
if (doc.path.startsWith(directory) && doc.modifiedAt) {
|
|
2556
|
+
indexedMtimeMap.set(doc.path, doc.modifiedAt.getTime());
|
|
2557
|
+
}
|
|
2558
|
+
}
|
|
2559
|
+
this.currentPipeline = createIndexingPipeline(pipelineConfig);
|
|
2560
|
+
const docIdCache = /* @__PURE__ */ new Map();
|
|
2561
|
+
const stats = await this.currentPipeline.run(
|
|
2562
|
+
filesToProcess,
|
|
2563
|
+
// 存储回调(每个 chunk 调用一次)
|
|
2564
|
+
async (doc) => {
|
|
2565
|
+
try {
|
|
2566
|
+
const chunkIndex = doc.chunkIndex ?? 0;
|
|
2567
|
+
const totalChunks = doc.totalChunks ?? 1;
|
|
2568
|
+
if (chunkIndex === 0) {
|
|
2569
|
+
const existing = this.metaStore.getByPath(doc.filePath);
|
|
2570
|
+
if (existing && existing.contentHash === doc.contentHash) {
|
|
2571
|
+
return { stored: false, skipped: true };
|
|
2572
|
+
}
|
|
2573
|
+
if (existing && existing.chunkCount) {
|
|
2574
|
+
for (let i = 0; i < existing.chunkCount; i++) {
|
|
2575
|
+
const oldChunkId = `${existing.id}_${i}`;
|
|
2576
|
+
await this.vectorStore.delete(oldChunkId);
|
|
2577
|
+
this.fullTextIndex.remove(oldChunkId);
|
|
2578
|
+
}
|
|
2579
|
+
}
|
|
2580
|
+
const stat6 = await fs7.stat(doc.filePath);
|
|
2581
|
+
const docId2 = existing?.id || generateId();
|
|
2582
|
+
docIdCache.set(doc.filePath, docId2);
|
|
2583
|
+
const indexedDoc = {
|
|
2584
|
+
id: docId2,
|
|
2585
|
+
path: doc.filePath,
|
|
2586
|
+
name: path10.basename(doc.filePath),
|
|
2587
|
+
fileType: getFileType(doc.filePath),
|
|
2588
|
+
extension: path10.extname(doc.filePath).toLowerCase(),
|
|
2589
|
+
title: doc.title,
|
|
2590
|
+
content: doc.content,
|
|
2591
|
+
// 第一个 chunk 的内容
|
|
2592
|
+
fileSize: stat6.size,
|
|
2593
|
+
createdAt: stat6.birthtime,
|
|
2594
|
+
modifiedAt: stat6.mtime,
|
|
2595
|
+
indexedAt: /* @__PURE__ */ new Date(),
|
|
2596
|
+
contentHash: doc.contentHash,
|
|
2597
|
+
chunkCount: totalChunks
|
|
2598
|
+
// 记录 chunk 数量
|
|
2599
|
+
};
|
|
2600
|
+
this.metaStore.upsert(indexedDoc);
|
|
2601
|
+
}
|
|
2602
|
+
const docId = docIdCache.get(doc.filePath) || this.metaStore.getByPath(doc.filePath)?.id;
|
|
2603
|
+
if (!docId) {
|
|
2604
|
+
return { stored: false, skipped: false };
|
|
2605
|
+
}
|
|
2606
|
+
const chunkId = `${docId}_${chunkIndex}`;
|
|
2607
|
+
await this.vectorStore.update({ id: chunkId, vector: doc.vector });
|
|
2608
|
+
this.fullTextIndex.update({
|
|
2609
|
+
id: chunkId,
|
|
2610
|
+
title: doc.title,
|
|
2611
|
+
content: doc.content
|
|
2612
|
+
});
|
|
2613
|
+
return { stored: true, skipped: false };
|
|
2614
|
+
} catch {
|
|
2615
|
+
return { stored: false, skipped: false };
|
|
2616
|
+
}
|
|
2617
|
+
},
|
|
2618
|
+
notifyProgress,
|
|
2619
|
+
// 快速跳过检查(基于 mtime)
|
|
2620
|
+
(filePath, mtime) => {
|
|
2621
|
+
const indexedMtime = indexedMtimeMap.get(filePath);
|
|
2622
|
+
if (!indexedMtime) return false;
|
|
2623
|
+
return mtime.getTime() <= indexedMtime;
|
|
2624
|
+
}
|
|
2625
|
+
);
|
|
2626
|
+
const wasCancelled = this.currentPipeline?.isCancelled() ?? false;
|
|
2627
|
+
this.currentPipeline = null;
|
|
2628
|
+
await this.fullTextIndex.save();
|
|
2629
|
+
const totalTime = Date.now() - startTime;
|
|
2630
|
+
if (wasCancelled) {
|
|
2631
|
+
console.log(`
|
|
2632
|
+
\u26A0\uFE0F \u7D22\u5F15\u5DF2\u53D6\u6D88:`);
|
|
2633
|
+
} else {
|
|
2634
|
+
console.log(`
|
|
2635
|
+
\u2705 \u7D22\u5F15\u5B8C\u6210:`);
|
|
2636
|
+
}
|
|
2637
|
+
console.log(` \u23F1\uFE0F \u603B\u8017\u65F6: ${(totalTime / 1e3).toFixed(1)}s`);
|
|
2638
|
+
console.log(` \u{1F4E5} \u65B0\u589E/\u66F4\u65B0: ${stats.stored} \u4E2A\u6587\u4EF6`);
|
|
2639
|
+
console.log(` \u23ED\uFE0F \u8DF3\u8FC7: ${stats.skipped} \u4E2A\u6587\u4EF6`);
|
|
2640
|
+
console.log(` \u274C \u5931\u8D25: ${stats.failed} \u4E2A\u6587\u4EF6`);
|
|
2641
|
+
console.log(` \u{1F5D1}\uFE0F \u5220\u9664: ${deletedCount} \u4E2A\u6587\u4EF6`);
|
|
2642
|
+
console.log(` \u{1F4CA} \u603B\u8BA1: ${this.metaStore.getAll().filter((doc) => doc.path.startsWith(directory)).length} \u4E2A\u5DF2\u7D22\u5F15\u6587\u4EF6
|
|
2643
|
+
`);
|
|
2644
|
+
if (wasCancelled) {
|
|
2645
|
+
notifyProgress({ indexed: stats.completed, total: filesToProcess.length, stage: "cancelled" });
|
|
2646
|
+
} else {
|
|
2647
|
+
notifyProgress({ indexed: filesToProcess.length, total: filesToProcess.length, stage: "done" });
|
|
2648
|
+
}
|
|
2649
|
+
}
|
|
2650
|
+
/**
|
|
2651
|
+
* 取消正在进行的索引任务
|
|
2652
|
+
* @returns 是否成功取消(如果没有运行中的任务则返回 false)
|
|
2653
|
+
*/
|
|
2654
|
+
cancelIndexing() {
|
|
2655
|
+
if (this.currentPipeline) {
|
|
2656
|
+
this.currentPipeline.cancel();
|
|
2657
|
+
console.log("[AI-Search] \u7D22\u5F15\u4EFB\u52A1\u5DF2\u53D1\u51FA\u53D6\u6D88\u4FE1\u53F7");
|
|
2658
|
+
return true;
|
|
2659
|
+
}
|
|
2660
|
+
return false;
|
|
2661
|
+
}
|
|
2662
|
+
/**
|
|
2663
|
+
* 检查是否正在索引
|
|
2664
|
+
*/
|
|
2665
|
+
isIndexing() {
|
|
2666
|
+
return this.currentPipeline !== null && !this.currentPipeline.isCancelled();
|
|
2667
|
+
}
|
|
2668
|
+
/**
|
|
2669
|
+
* 索引默认目录
|
|
2670
|
+
*/
|
|
2671
|
+
async indexDefaultDirectories(onProgress) {
|
|
2672
|
+
const directories = this.config.indexDirectories || getDefaultDirectories();
|
|
2673
|
+
for (const dir of directories) {
|
|
2674
|
+
await this.indexDirectory(dir, onProgress);
|
|
2675
|
+
}
|
|
2676
|
+
}
|
|
2677
|
+
/**
|
|
2678
|
+
* 搜索文档
|
|
2679
|
+
*/
|
|
2680
|
+
async search(query, options) {
|
|
2681
|
+
await this.ensureInitialized();
|
|
2682
|
+
const limit = options?.limit || 10;
|
|
2683
|
+
const mode = options?.mode || "hybrid";
|
|
2684
|
+
let vectorResults = [];
|
|
2685
|
+
let textResults = [];
|
|
2686
|
+
if (mode === "semantic" || mode === "hybrid") {
|
|
2687
|
+
const queryVector = await embedQuery(query);
|
|
2688
|
+
vectorResults = await this.vectorStore.search(queryVector, limit * 2);
|
|
2689
|
+
}
|
|
2690
|
+
if (mode === "keyword" || mode === "hybrid") {
|
|
2691
|
+
textResults = this.fullTextIndex.search(query, limit * 2);
|
|
2692
|
+
}
|
|
2693
|
+
let fusedResults;
|
|
2694
|
+
if (mode === "hybrid") {
|
|
2695
|
+
const hasVectorResults = vectorResults.length > 0;
|
|
2696
|
+
const hasTextResults = textResults.length > 0;
|
|
2697
|
+
if (hasVectorResults && hasTextResults) {
|
|
2698
|
+
fusedResults = reciprocalRankFusion([
|
|
2699
|
+
{
|
|
2700
|
+
results: vectorResults.map((r) => ({
|
|
2701
|
+
id: r.id,
|
|
2702
|
+
score: Math.max(0, 1 - r.distance)
|
|
2703
|
+
// 距离转相似度
|
|
2704
|
+
})),
|
|
2705
|
+
weight: 0.6
|
|
2706
|
+
// 语义搜索权重更高
|
|
2707
|
+
},
|
|
2708
|
+
{
|
|
2709
|
+
results: textResults,
|
|
2710
|
+
weight: 0.4
|
|
2711
|
+
}
|
|
2712
|
+
]);
|
|
2713
|
+
} else if (hasVectorResults) {
|
|
2714
|
+
fusedResults = this.normalizeVectorScores(vectorResults);
|
|
2715
|
+
} else if (hasTextResults) {
|
|
2716
|
+
fusedResults = textResults;
|
|
2717
|
+
} else {
|
|
2718
|
+
fusedResults = [];
|
|
2719
|
+
}
|
|
2720
|
+
} else if (mode === "semantic") {
|
|
2721
|
+
fusedResults = this.normalizeVectorScores(vectorResults);
|
|
2722
|
+
} else {
|
|
2723
|
+
fusedResults = textResults;
|
|
2724
|
+
}
|
|
2725
|
+
const parseChunkId = (chunkId) => {
|
|
2726
|
+
const lastUnderscore = chunkId.lastIndexOf("_");
|
|
2727
|
+
const chunkIndex = parseInt(chunkId.slice(lastUnderscore + 1), 10);
|
|
2728
|
+
return {
|
|
2729
|
+
docId: chunkId.slice(0, lastUnderscore),
|
|
2730
|
+
chunkIndex
|
|
2731
|
+
};
|
|
2732
|
+
};
|
|
2733
|
+
const topChunkIds = fusedResults.slice(0, limit * 2).map((r) => r.id);
|
|
2734
|
+
const docIds = [...new Set(topChunkIds.map((id) => parseChunkId(id).docId))];
|
|
2735
|
+
const documents = this.metaStore.getByIds(docIds);
|
|
2736
|
+
const docMap = new Map(documents.map((d) => [d.id, d]));
|
|
2737
|
+
const scoreMap = new Map(fusedResults.map((r) => [r.id, r.score]));
|
|
2738
|
+
const results = [];
|
|
2739
|
+
const seenDocs = /* @__PURE__ */ new Set();
|
|
2740
|
+
for (const chunkId of topChunkIds) {
|
|
2741
|
+
if (results.length >= limit) break;
|
|
2742
|
+
const { docId, chunkIndex } = parseChunkId(chunkId);
|
|
2743
|
+
if (seenDocs.has(docId)) continue;
|
|
2744
|
+
const doc = docMap.get(docId);
|
|
2745
|
+
if (!doc) continue;
|
|
2746
|
+
const score = scoreMap.get(chunkId) || 0;
|
|
2747
|
+
if (!this.matchesFilters(doc, options)) {
|
|
2748
|
+
continue;
|
|
2749
|
+
}
|
|
2750
|
+
seenDocs.add(docId);
|
|
2751
|
+
const chunkContent = this.fullTextIndex.getContent(chunkId);
|
|
2752
|
+
results.push({
|
|
2753
|
+
// FileItem 兼容字段
|
|
2754
|
+
id: doc.path,
|
|
2755
|
+
// 使用路径作为 ID,与 file-explorer 兼容
|
|
2756
|
+
name: doc.name,
|
|
2757
|
+
type: doc.fileType,
|
|
2758
|
+
size: formatSize(doc.fileSize),
|
|
2759
|
+
dateModified: formatDate(doc.modifiedAt),
|
|
2760
|
+
url: `file://${doc.path}`,
|
|
2761
|
+
thumbnailUrl: void 0,
|
|
2762
|
+
// 文档类型通常没有缩略图
|
|
2763
|
+
// 搜索特有字段
|
|
2764
|
+
score,
|
|
2765
|
+
snippet: extractSnippet(chunkContent || doc.content, query),
|
|
2766
|
+
matchType: mode === "hybrid" ? "hybrid" : mode === "semantic" ? "semantic" : "keyword",
|
|
2767
|
+
// chunk 信息(可选)
|
|
2768
|
+
chunkIndex
|
|
2769
|
+
});
|
|
2770
|
+
}
|
|
2771
|
+
return results;
|
|
2772
|
+
}
|
|
2773
|
+
/**
|
|
2774
|
+
* 检查文档是否匹配所有过滤条件
|
|
2775
|
+
*/
|
|
2776
|
+
matchesFilters(doc, options) {
|
|
2777
|
+
if (!options) return true;
|
|
2778
|
+
const combineMode = options.combineMode || "AND";
|
|
2779
|
+
const filters = [];
|
|
2780
|
+
if (options.fileTypes) {
|
|
2781
|
+
filters.push(options.fileTypes.includes(doc.fileType));
|
|
2782
|
+
}
|
|
2783
|
+
if (options.dateRange) {
|
|
2784
|
+
let dateMatch = true;
|
|
2785
|
+
if (options.dateRange.start && doc.modifiedAt < options.dateRange.start) {
|
|
2786
|
+
dateMatch = false;
|
|
2787
|
+
}
|
|
2788
|
+
if (options.dateRange.end && doc.modifiedAt > options.dateRange.end) {
|
|
2789
|
+
dateMatch = false;
|
|
2790
|
+
}
|
|
2791
|
+
filters.push(dateMatch);
|
|
2792
|
+
}
|
|
2793
|
+
if (options.directories && options.directories.length > 0) {
|
|
2794
|
+
const dirMatch = options.directories.some((dir) => doc.path.startsWith(dir));
|
|
2795
|
+
filters.push(dirMatch);
|
|
2796
|
+
}
|
|
2797
|
+
if (options.sizeRange) {
|
|
2798
|
+
let sizeMatch = true;
|
|
2799
|
+
if (options.sizeRange.min !== void 0 && doc.fileSize < options.sizeRange.min) {
|
|
2800
|
+
sizeMatch = false;
|
|
2801
|
+
}
|
|
2802
|
+
if (options.sizeRange.max !== void 0 && doc.fileSize > options.sizeRange.max) {
|
|
2803
|
+
sizeMatch = false;
|
|
2804
|
+
}
|
|
2805
|
+
filters.push(sizeMatch);
|
|
2806
|
+
}
|
|
2807
|
+
if (options.fileNamePattern) {
|
|
2808
|
+
const pattern = this.wildcardToRegex(options.fileNamePattern);
|
|
2809
|
+
filters.push(pattern.test(doc.name));
|
|
2810
|
+
}
|
|
2811
|
+
if (options.titleContains) {
|
|
2812
|
+
const title = doc.title || doc.name;
|
|
2813
|
+
filters.push(title.toLowerCase().includes(options.titleContains.toLowerCase()));
|
|
2814
|
+
}
|
|
2815
|
+
if (filters.length === 0) return true;
|
|
2816
|
+
if (combineMode === "AND") {
|
|
2817
|
+
return filters.every((f) => f);
|
|
2818
|
+
} else {
|
|
2819
|
+
return filters.some((f) => f);
|
|
2820
|
+
}
|
|
2821
|
+
}
|
|
2822
|
+
/**
|
|
2823
|
+
* 将通配符模式转换为正则表达式
|
|
2824
|
+
* * 匹配任意字符
|
|
2825
|
+
* ? 匹配单个字符
|
|
2826
|
+
*/
|
|
2827
|
+
wildcardToRegex(pattern) {
|
|
2828
|
+
const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
|
|
2829
|
+
return new RegExp(`^${escaped}$`, "i");
|
|
2830
|
+
}
|
|
2831
|
+
/**
|
|
2832
|
+
* 标准化向量搜索分数
|
|
2833
|
+
* 将距离转换为 0-1 的相似度分数
|
|
2834
|
+
*/
|
|
2835
|
+
normalizeVectorScores(vectorResults) {
|
|
2836
|
+
if (vectorResults.length === 0) {
|
|
2837
|
+
return [];
|
|
2838
|
+
}
|
|
2839
|
+
const scores = vectorResults.map((r) => ({
|
|
2840
|
+
id: r.id,
|
|
2841
|
+
rawScore: 1 / (1 + r.distance)
|
|
2842
|
+
}));
|
|
2843
|
+
const maxScore = Math.max(...scores.map((s) => s.rawScore));
|
|
2844
|
+
return scores.map((s) => ({
|
|
2845
|
+
id: s.id,
|
|
2846
|
+
score: maxScore > 0 ? s.rawScore / maxScore : 0
|
|
2847
|
+
}));
|
|
2848
|
+
}
|
|
2849
|
+
/**
|
|
2850
|
+
* 删除文件索引
|
|
2851
|
+
*/
|
|
2852
|
+
async removeFile(filePath) {
|
|
2853
|
+
const doc = this.metaStore.getByPath(filePath);
|
|
2854
|
+
if (!doc) return;
|
|
2855
|
+
await this.vectorStore.delete(doc.id);
|
|
2856
|
+
this.fullTextIndex.remove(doc.id);
|
|
2857
|
+
this.metaStore.deleteByPath(filePath);
|
|
2858
|
+
}
|
|
2859
|
+
/**
|
|
2860
|
+
* 获取统计信息
|
|
2861
|
+
*/
|
|
2862
|
+
getStats() {
|
|
2863
|
+
const stats = this.metaStore.getStats();
|
|
2864
|
+
stats.directories = this.config.indexDirectories || getDefaultDirectories();
|
|
2865
|
+
return stats;
|
|
2866
|
+
}
|
|
2867
|
+
/**
|
|
2868
|
+
* 清空所有索引
|
|
2869
|
+
*/
|
|
2870
|
+
async clear() {
|
|
2871
|
+
this.metaStore.clear();
|
|
2872
|
+
this.fullTextIndex.clear();
|
|
2873
|
+
}
|
|
2874
|
+
/**
|
|
2875
|
+
* 保存索引
|
|
2876
|
+
*/
|
|
2877
|
+
async save() {
|
|
2878
|
+
await this.fullTextIndex.save();
|
|
2879
|
+
}
|
|
2880
|
+
/**
|
|
2881
|
+
* 批量索引文件(根据配置自动选择串行或并行)
|
|
2882
|
+
*/
|
|
2883
|
+
async indexFiles(filePaths, onProgress) {
|
|
2884
|
+
const enableParallel = this.config.enableParallelIndexing !== false;
|
|
2885
|
+
if (enableParallel && filePaths.length > 1) {
|
|
2886
|
+
return this.indexFilesParallel(filePaths, onProgress);
|
|
2887
|
+
}
|
|
2888
|
+
return this.indexFilesSerial(filePaths, onProgress);
|
|
2889
|
+
}
|
|
2890
|
+
/**
|
|
2891
|
+
* 串行索引文件(原实现)
|
|
2892
|
+
*/
|
|
2893
|
+
async indexFilesSerial(filePaths, onProgress) {
|
|
2894
|
+
await this.ensureInitialized();
|
|
2895
|
+
const result = {
|
|
2896
|
+
success: 0,
|
|
2897
|
+
failed: 0,
|
|
2898
|
+
errors: []
|
|
2899
|
+
};
|
|
2900
|
+
const total = filePaths.length;
|
|
2901
|
+
onProgress?.({ indexed: 0, total, stage: "parsing" });
|
|
2902
|
+
for (let i = 0; i < filePaths.length; i++) {
|
|
2903
|
+
const filePath = filePaths[i];
|
|
2904
|
+
try {
|
|
2905
|
+
await this.indexFile(filePath);
|
|
2906
|
+
result.success++;
|
|
2907
|
+
} catch (error) {
|
|
2908
|
+
result.failed++;
|
|
2909
|
+
result.errors.push({
|
|
2910
|
+
path: filePath,
|
|
2911
|
+
error: error instanceof Error ? error.message : String(error)
|
|
2912
|
+
});
|
|
2913
|
+
}
|
|
2914
|
+
onProgress?.({
|
|
2915
|
+
indexed: i + 1,
|
|
2916
|
+
total,
|
|
2917
|
+
currentFile: filePath,
|
|
2918
|
+
stage: i === filePaths.length - 1 ? "done" : "parsing"
|
|
2919
|
+
});
|
|
2920
|
+
if (i % 10 === 0) {
|
|
2921
|
+
await sleep(10);
|
|
2922
|
+
}
|
|
2923
|
+
}
|
|
2924
|
+
await this.fullTextIndex.save();
|
|
2925
|
+
return result;
|
|
2926
|
+
}
|
|
2927
|
+
/**
|
|
2928
|
+
* 并行索引文件(控制并发数)
|
|
2929
|
+
*/
|
|
2930
|
+
async indexFilesParallel(filePaths, onProgress, concurrency) {
|
|
2931
|
+
await this.ensureInitialized();
|
|
2932
|
+
const result = {
|
|
2933
|
+
success: 0,
|
|
2934
|
+
failed: 0,
|
|
2935
|
+
errors: []
|
|
2936
|
+
};
|
|
2937
|
+
const total = filePaths.length;
|
|
2938
|
+
const concurrencyLimit = concurrency || this.config.indexConcurrency || 5;
|
|
2939
|
+
onProgress?.({ indexed: 0, total, stage: "parsing" });
|
|
2940
|
+
let processed = 0;
|
|
2941
|
+
let currentIndex = 0;
|
|
2942
|
+
const running = /* @__PURE__ */ new Set();
|
|
2943
|
+
const processNext = async () => {
|
|
2944
|
+
while (currentIndex < total) {
|
|
2945
|
+
const fileIndex = currentIndex++;
|
|
2946
|
+
const filePath = filePaths[fileIndex];
|
|
2947
|
+
const task = (async () => {
|
|
2948
|
+
try {
|
|
2949
|
+
await this.indexFile(filePath);
|
|
2950
|
+
result.success++;
|
|
2951
|
+
} catch (error) {
|
|
2952
|
+
result.failed++;
|
|
2953
|
+
result.errors.push({
|
|
2954
|
+
path: filePath,
|
|
2955
|
+
error: error instanceof Error ? error.message : String(error)
|
|
2956
|
+
});
|
|
2957
|
+
} finally {
|
|
2958
|
+
processed++;
|
|
2959
|
+
onProgress?.({
|
|
2960
|
+
indexed: processed,
|
|
2961
|
+
total,
|
|
2962
|
+
currentFile: filePath,
|
|
2963
|
+
stage: processed === total ? "done" : "parsing"
|
|
2964
|
+
});
|
|
2965
|
+
}
|
|
2966
|
+
})();
|
|
2967
|
+
running.add(task);
|
|
2968
|
+
task.finally(() => {
|
|
2969
|
+
running.delete(task);
|
|
2970
|
+
});
|
|
2971
|
+
if (running.size >= concurrencyLimit) {
|
|
2972
|
+
await Promise.race(running);
|
|
2973
|
+
}
|
|
2974
|
+
}
|
|
2975
|
+
};
|
|
2976
|
+
const workers = Array.from({ length: Math.min(concurrencyLimit, total) }, () => processNext());
|
|
2977
|
+
await Promise.all(workers);
|
|
2978
|
+
await Promise.all(running);
|
|
2979
|
+
await this.fullTextIndex.save();
|
|
2980
|
+
return result;
|
|
2981
|
+
}
|
|
2982
|
+
/**
|
|
2983
|
+
* 批量删除文件索引
|
|
2984
|
+
*/
|
|
2985
|
+
async removeFiles(filePaths) {
|
|
2986
|
+
const result = {
|
|
2987
|
+
success: 0,
|
|
2988
|
+
failed: 0,
|
|
2989
|
+
errors: []
|
|
2990
|
+
};
|
|
2991
|
+
let notFound = 0;
|
|
2992
|
+
for (const filePath of filePaths) {
|
|
2993
|
+
try {
|
|
2994
|
+
const doc = this.metaStore.getByPath(filePath);
|
|
2995
|
+
if (!doc) {
|
|
2996
|
+
notFound++;
|
|
2997
|
+
continue;
|
|
2998
|
+
}
|
|
2999
|
+
await this.removeFile(filePath);
|
|
3000
|
+
result.success++;
|
|
3001
|
+
} catch (error) {
|
|
3002
|
+
result.failed++;
|
|
3003
|
+
result.errors.push({
|
|
3004
|
+
path: filePath,
|
|
3005
|
+
error: error instanceof Error ? error.message : String(error)
|
|
3006
|
+
});
|
|
3007
|
+
}
|
|
3008
|
+
}
|
|
3009
|
+
if (notFound > 0 && result.errors.length < 10) {
|
|
3010
|
+
result.errors.push({
|
|
3011
|
+
path: `[${notFound} \u4E2A\u6587\u4EF6\u4E0D\u5B58\u5728\u4E8E\u7D22\u5F15\u4E2D]`,
|
|
3012
|
+
error: "\u6587\u4EF6\u53EF\u80FD\u5DF2\u88AB\u5220\u9664\u6216\u8DEF\u5F84\u4E0D\u5339\u914D"
|
|
3013
|
+
});
|
|
3014
|
+
}
|
|
3015
|
+
return result;
|
|
3016
|
+
}
|
|
3017
|
+
/**
|
|
3018
|
+
* 批量更新文件索引(重新索引)
|
|
3019
|
+
*/
|
|
3020
|
+
async updateFiles(filePaths, onProgress) {
|
|
3021
|
+
await this.removeFiles(filePaths);
|
|
3022
|
+
return this.indexFiles(filePaths, onProgress);
|
|
3023
|
+
}
|
|
3024
|
+
// ==================== 目录监听(委托给 DirectoryWatcher)====================
|
|
3025
|
+
/** 监听目录变化并自动更新索引 */
|
|
3026
|
+
watchDirectory(directory, options) {
|
|
3027
|
+
this.directoryWatcher.watch(directory, options);
|
|
3028
|
+
}
|
|
3029
|
+
/** 停止监听目录 */
|
|
3030
|
+
unwatchDirectory(directory) {
|
|
3031
|
+
this.directoryWatcher.unwatch(directory);
|
|
3032
|
+
}
|
|
3033
|
+
/** 停止所有监听 */
|
|
3034
|
+
unwatchAll() {
|
|
3035
|
+
this.directoryWatcher.unwatchAll();
|
|
3036
|
+
}
|
|
3037
|
+
/** 获取正在监听的目录列表 */
|
|
3038
|
+
getWatchedDirectories() {
|
|
3039
|
+
return this.directoryWatcher.getWatchedDirectories();
|
|
3040
|
+
}
|
|
3041
|
+
// ==================== 维护功能(委托给 maintenance 模块)====================
|
|
3042
|
+
/** 获取维护依赖 */
|
|
3043
|
+
getMaintenanceDeps() {
|
|
3044
|
+
return {
|
|
3045
|
+
metaStore: this.metaStore,
|
|
3046
|
+
vectorStore: this.vectorStore,
|
|
3047
|
+
fullTextIndex: this.fullTextIndex,
|
|
3048
|
+
indexErrors: this.indexErrors,
|
|
3049
|
+
indexFile: (filePath) => this.indexFile(filePath),
|
|
3050
|
+
removeFile: (filePath) => this.removeFile(filePath),
|
|
3051
|
+
indexFiles: (filePaths) => this.indexFiles(filePaths)
|
|
3052
|
+
};
|
|
3053
|
+
}
|
|
3054
|
+
/** 清理无效索引(文件已删除但索引还在) */
|
|
3055
|
+
async cleanup() {
|
|
3056
|
+
await this.ensureInitialized();
|
|
3057
|
+
return cleanup(this.getMaintenanceDeps());
|
|
3058
|
+
}
|
|
3059
|
+
/** 优化索引(压缩、碎片整理) */
|
|
3060
|
+
async optimize() {
|
|
3061
|
+
await this.ensureInitialized();
|
|
3062
|
+
return optimize(this.getMaintenanceDeps());
|
|
3063
|
+
}
|
|
3064
|
+
/** 健康检查 */
|
|
3065
|
+
async healthCheck() {
|
|
3066
|
+
await this.ensureInitialized();
|
|
3067
|
+
return healthCheck(this.getMaintenanceDeps());
|
|
3068
|
+
}
|
|
3069
|
+
/** 获取索引错误列表 */
|
|
3070
|
+
getIndexErrors() {
|
|
3071
|
+
return getIndexErrors(this.getMaintenanceDeps());
|
|
3072
|
+
}
|
|
3073
|
+
/** 重试失败的索引 */
|
|
3074
|
+
async retryFailedIndexes() {
|
|
3075
|
+
return retryFailedIndexes(this.getMaintenanceDeps());
|
|
3076
|
+
}
|
|
3077
|
+
/** 确保已初始化 */
|
|
3078
|
+
async ensureInitialized() {
|
|
3079
|
+
if (!this.initialized) {
|
|
3080
|
+
await this.init();
|
|
3081
|
+
}
|
|
3082
|
+
}
|
|
3083
|
+
// ==================== 备份功能(委托给 backup 模块)====================
|
|
3084
|
+
/** 获取备份依赖 */
|
|
3085
|
+
getBackupDeps() {
|
|
3086
|
+
return {
|
|
3087
|
+
config: this.config,
|
|
3088
|
+
metaStore: this.metaStore,
|
|
3089
|
+
vectorStore: this.vectorStore,
|
|
3090
|
+
fullTextIndex: this.fullTextIndex,
|
|
3091
|
+
getStats: () => this.getStats(),
|
|
3092
|
+
reinitializeStores: (newMetaStore, newVectorStore, newFullTextIndex) => {
|
|
3093
|
+
this.metaStore = newMetaStore;
|
|
3094
|
+
this.vectorStore = newVectorStore;
|
|
3095
|
+
this.fullTextIndex = newFullTextIndex;
|
|
3096
|
+
}
|
|
3097
|
+
};
|
|
3098
|
+
}
|
|
3099
|
+
/** 导出索引数据 */
|
|
3100
|
+
async exportIndex(outputPath) {
|
|
3101
|
+
await this.ensureInitialized();
|
|
3102
|
+
return exportIndex(this.getBackupDeps(), outputPath);
|
|
3103
|
+
}
|
|
3104
|
+
/** 导入索引数据 */
|
|
3105
|
+
async importIndex(inputPath) {
|
|
3106
|
+
await this.ensureInitialized();
|
|
3107
|
+
return importIndex(this.getBackupDeps(), inputPath, {
|
|
3108
|
+
MetaStore,
|
|
3109
|
+
VectorStore,
|
|
3110
|
+
FullTextIndex
|
|
3111
|
+
});
|
|
3112
|
+
}
|
|
3113
|
+
/** 列出备份 */
|
|
3114
|
+
async listBackups(backupDir) {
|
|
3115
|
+
return listBackups(backupDir);
|
|
3116
|
+
}
|
|
3117
|
+
// ==================== 资源清理 ====================
|
|
3118
|
+
/** 销毁资源,释放所有占用的资源 */
|
|
3119
|
+
async destroy() {
|
|
3120
|
+
this.unwatchAll();
|
|
3121
|
+
this.directoryWatcher.clearTimers();
|
|
3122
|
+
this.indexingLocks.clear();
|
|
3123
|
+
try {
|
|
3124
|
+
this.metaStore.close();
|
|
3125
|
+
} catch (error) {
|
|
3126
|
+
console.warn("\u5173\u95ED MetaStore \u5931\u8D25:", error);
|
|
3127
|
+
}
|
|
3128
|
+
try {
|
|
3129
|
+
this.vectorStore.close();
|
|
3130
|
+
} catch (error) {
|
|
3131
|
+
console.warn("\u5173\u95ED VectorStore \u5931\u8D25:", error);
|
|
3132
|
+
}
|
|
3133
|
+
try {
|
|
3134
|
+
disposeEmbedder();
|
|
3135
|
+
} catch (error) {
|
|
3136
|
+
console.warn("\u6E05\u7406 Embedding \u6A21\u578B\u5931\u8D25:", error);
|
|
3137
|
+
}
|
|
3138
|
+
this.indexErrors.clear();
|
|
3139
|
+
this.initialized = false;
|
|
3140
|
+
console.log("DocumentSearch \u8D44\u6E90\u5DF2\u6E05\u7406");
|
|
3141
|
+
}
|
|
3142
|
+
};
|
|
3143
|
+
|
|
3144
|
+
// src/tools/index.ts
|
|
3145
|
+
var SEARCH_PLUGIN_KEY = /* @__PURE__ */ Symbol.for("ai-search-plugin-instance");
|
|
3146
|
+
function getGlobalSearchPlugin() {
|
|
3147
|
+
const g = globalThis;
|
|
3148
|
+
return g[SEARCH_PLUGIN_KEY] ?? null;
|
|
3149
|
+
}
|
|
3150
|
+
function setGlobalSearchPlugin(instance) {
|
|
3151
|
+
const g = globalThis;
|
|
3152
|
+
g[SEARCH_PLUGIN_KEY] = instance;
|
|
3153
|
+
}
|
|
3154
|
+
function searchPlugin(options) {
|
|
3155
|
+
return createSearchPluginInstance(options);
|
|
3156
|
+
}
|
|
3157
|
+
function getSearchPlugin() {
|
|
3158
|
+
return getGlobalSearchPlugin();
|
|
3159
|
+
}
|
|
3160
|
+
async function createSearchPluginInstance(options) {
|
|
3161
|
+
const { dataDir, workspace, arkApiKey: arkApiKey2, embeddingDimension: embeddingDimension2 } = options;
|
|
3162
|
+
try {
|
|
3163
|
+
console.log("\u{1F4C2} \u6B63\u5728\u521D\u59CB\u5316\u6587\u6863\u641C\u7D22\u5F15\u64CE...", dataDir);
|
|
3164
|
+
const search = new DocumentSearch({
|
|
3165
|
+
dataDir,
|
|
3166
|
+
arkApiKey: arkApiKey2,
|
|
3167
|
+
embeddingDimension: embeddingDimension2
|
|
3168
|
+
});
|
|
3169
|
+
await search.init();
|
|
3170
|
+
console.log("\u2705 \u6587\u6863\u641C\u7D22\u5F15\u64CE\u521D\u59CB\u5316\u5B8C\u6210:", dataDir);
|
|
3171
|
+
const workspaceState = {
|
|
3172
|
+
directory: null,
|
|
3173
|
+
indexed: false,
|
|
3174
|
+
filesIndexed: 0
|
|
3175
|
+
};
|
|
3176
|
+
const setWorkspace = async (directory) => {
|
|
3177
|
+
workspaceState.directory = directory;
|
|
3178
|
+
workspaceState.indexed = false;
|
|
3179
|
+
workspaceState.filesIndexed = 0;
|
|
3180
|
+
console.log(`\u{1F4C1} \u5F00\u59CB\u7D22\u5F15\u5DE5\u4F5C\u7A7A\u95F4: ${directory}`);
|
|
3181
|
+
try {
|
|
3182
|
+
await search.indexDirectory(directory, (progress) => {
|
|
3183
|
+
if (progress.stage !== "scanning" && progress.total > 0) {
|
|
3184
|
+
workspaceState.filesIndexed = progress.indexed;
|
|
3185
|
+
if (progress.indexed % 100 === 0) {
|
|
3186
|
+
console.log(` \u{1F4C4} \u5DF2\u7D22\u5F15 ${progress.indexed}/${progress.total} \u4E2A\u6587\u4EF6...`);
|
|
3187
|
+
}
|
|
3188
|
+
}
|
|
3189
|
+
});
|
|
3190
|
+
workspaceState.indexed = true;
|
|
3191
|
+
console.log(`\u2705 \u5DE5\u4F5C\u7A7A\u95F4\u7D22\u5F15\u5B8C\u6210\uFF0C\u5171 ${workspaceState.filesIndexed} \u4E2A\u6587\u4EF6`);
|
|
3192
|
+
} catch (error) {
|
|
3193
|
+
console.error("\u274C \u5DE5\u4F5C\u7A7A\u95F4\u7D22\u5F15\u5931\u8D25:", error);
|
|
3194
|
+
throw error;
|
|
3195
|
+
}
|
|
3196
|
+
};
|
|
3197
|
+
if (workspace) {
|
|
3198
|
+
console.log(`\u{1F4C1} \u68C0\u6D4B\u5230\u5DE5\u4F5C\u7A7A\u95F4\uFF0C\u5C06\u5728\u540E\u53F0\u5F00\u59CB\u7D22\u5F15: ${workspace}`);
|
|
3199
|
+
setWorkspace(workspace).catch((error) => {
|
|
3200
|
+
console.error("\u274C \u540E\u53F0\u7D22\u5F15\u5931\u8D25:", error);
|
|
3201
|
+
});
|
|
3202
|
+
}
|
|
3203
|
+
const tools = [
|
|
3204
|
+
createSearchDocumentsTool(search, workspaceState),
|
|
3205
|
+
createIndexFileTool(search),
|
|
3206
|
+
createIndexDirectoryTool(search),
|
|
3207
|
+
createIndexFilesTool(search),
|
|
3208
|
+
createRemoveFilesTool(search),
|
|
3209
|
+
createUpdateFilesTool(search),
|
|
3210
|
+
createGetStatsTool(search, workspaceState),
|
|
3211
|
+
createClearIndexTool(search),
|
|
3212
|
+
createSetWorkspaceTool(search, workspaceState),
|
|
3213
|
+
createGetWorkspaceTool(workspaceState),
|
|
3214
|
+
createWatchDirectoryTool(search),
|
|
3215
|
+
createUnwatchDirectoryTool(search),
|
|
3216
|
+
createGetWatchedDirectoriesTool(search),
|
|
3217
|
+
createCleanupIndexTool(search),
|
|
3218
|
+
createExportIndexTool(search),
|
|
3219
|
+
createImportIndexTool(search),
|
|
3220
|
+
createListBackupsTool(search),
|
|
3221
|
+
createOptimizeIndexTool(search),
|
|
3222
|
+
createHealthCheckTool(search),
|
|
3223
|
+
createGetIndexErrorsTool(search),
|
|
3224
|
+
createRetryFailedIndexesTool(search)
|
|
3225
|
+
];
|
|
3226
|
+
const instance = {
|
|
3227
|
+
tools,
|
|
3228
|
+
setWorkspace,
|
|
3229
|
+
getWorkspaceState: () => ({ ...workspaceState }),
|
|
3230
|
+
search
|
|
3231
|
+
};
|
|
3232
|
+
setGlobalSearchPlugin(instance);
|
|
3233
|
+
return instance;
|
|
3234
|
+
} catch (error) {
|
|
3235
|
+
console.error("\u274C \u641C\u7D22\u63D2\u4EF6\u521D\u59CB\u5316\u5931\u8D25:", error);
|
|
3236
|
+
throw error;
|
|
3237
|
+
}
|
|
3238
|
+
}
|
|
3239
|
+
function createSearchDocumentsTool(search, workspaceState) {
|
|
3240
|
+
return {
|
|
3241
|
+
name: "search_local_documents",
|
|
3242
|
+
description: `\u641C\u7D22\u7528\u6237\u7535\u8111\u4E0A\u7684\u672C\u5730\u6587\u6863\uFF0C\u652F\u6301\u8BED\u4E49\u641C\u7D22\uFF08\u7406\u89E3\u610F\u56FE\uFF09\u548C\u5173\u952E\u8BCD\u641C\u7D22\u3002
|
|
3243
|
+
|
|
3244
|
+
\u26A0\uFE0F \u91CD\u8981\uFF1A\u67E5\u627E\u6587\u6863\u65F6\uFF0C\u4F18\u5148\u4F7F\u7528\u6B64\u5DE5\u5177\u800C\u4E0D\u662F execute_command \u6267\u884C find \u547D\u4EE4\uFF01
|
|
3245
|
+
- \u6B64\u5DE5\u5177\u652F\u6301\u8BED\u4E49\u7406\u89E3\uFF0C\u53EF\u4EE5\u6839\u636E\u5185\u5BB9\u67E5\u627E\u6587\u6863\uFF0C\u4E0D\u4EC5\u4EC5\u662F\u6587\u4EF6\u540D\u5339\u914D
|
|
3246
|
+
- \u5DF2\u7D22\u5F15\u7684\u6587\u6863\u53EF\u4EE5\u76F4\u63A5\u641C\u7D22\uFF0C\u65E0\u9700\u904D\u5386\u6587\u4EF6\u7CFB\u7EDF
|
|
3247
|
+
- \u652F\u6301\u6309\u6587\u4EF6\u7C7B\u578B\u8FC7\u6EE4\uFF08\u5982\u53EA\u641C\u7D22 PDF\uFF1Afile_types="pdf"\uFF09
|
|
3248
|
+
|
|
3249
|
+
\u652F\u6301\u7684\u6587\u4EF6\u7C7B\u578B\uFF1A
|
|
3250
|
+
- Word \u6587\u6863 (.docx, .doc)
|
|
3251
|
+
- PDF \u6587\u4EF6 (.pdf)
|
|
3252
|
+
- Excel \u8868\u683C (.xlsx, .xls)
|
|
3253
|
+
- PPT \u6F14\u793A\u6587\u7A3F (.pptx, .ppt)
|
|
3254
|
+
- \u6587\u672C\u6587\u4EF6 (.txt, .md)
|
|
3255
|
+
|
|
3256
|
+
\u4F7F\u7528\u573A\u666F\uFF1A
|
|
3257
|
+
- \u7528\u6237\u60F3\u67E5\u627E\u67D0\u4E2A\u4E3B\u9898\u7684\u6587\u6863\uFF0C\u5982"\u627E\u4E00\u4E0B\u53BB\u5E74\u7684\u91C7\u8D2D\u5408\u540C"\u3001"\u641C\u7D22\u5173\u4E8E\u7CD6\u5C3F\u75C5\u7684PDF\u6587\u6863"
|
|
3258
|
+
- \u7528\u6237\u9700\u8981\u7279\u5B9A\u5185\u5BB9\u7684\u6587\u4EF6\uFF0C\u5982"\u5173\u4E8E\u7528\u6237\u9690\u79C1\u653F\u7B56\u7684\u6587\u6863"
|
|
3259
|
+
- \u7528\u6237\u8BB0\u4E0D\u6E05\u6587\u4EF6\u540D\u4F46\u8BB0\u5F97\u5185\u5BB9\uFF0C\u5982"\u6709\u4E2A\u6587\u6863\u63D0\u5230\u4E86\u5B63\u5EA6\u9500\u552E\u76EE\u6807"
|
|
3260
|
+
- \u7528\u6237\u60F3\u627E\u7279\u5B9A\u7C7B\u578B\u7684\u6587\u4EF6\uFF0C\u5982"\u627E\u6240\u6709PDF\u6587\u4EF6"\uFF08\u4F7F\u7528 file_types="pdf"\uFF09
|
|
3261
|
+
|
|
3262
|
+
\u641C\u7D22\u8303\u56F4\uFF1A
|
|
3263
|
+
- \u5982\u679C\u8BBE\u7F6E\u4E86\u5DE5\u4F5C\u7A7A\u95F4\uFF0C\u9ED8\u8BA4\u53EA\u641C\u7D22\u5DE5\u4F5C\u7A7A\u95F4\u5185\u7684\u6587\u6863
|
|
3264
|
+
- \u53EF\u4EE5\u901A\u8FC7 scope \u53C2\u6570\u9009\u62E9\u641C\u7D22\u8303\u56F4\uFF1Aworkspace\uFF08\u5DE5\u4F5C\u7A7A\u95F4\uFF09\u3001all\uFF08\u5168\u90E8\u5DF2\u7D22\u5F15\uFF09
|
|
3265
|
+
- \u4F7F\u7528 set_search_workspace \u5DE5\u5177\u53EF\u4EE5\u8BBE\u7F6E\u5DE5\u4F5C\u7A7A\u95F4`,
|
|
3266
|
+
parameters: {
|
|
3267
|
+
type: "object",
|
|
3268
|
+
properties: {
|
|
3269
|
+
query: {
|
|
3270
|
+
type: "string",
|
|
3271
|
+
description: '\u641C\u7D22\u5185\u5BB9\uFF0C\u53EF\u4EE5\u662F\u5173\u952E\u8BCD\u6216\u81EA\u7136\u8BED\u8A00\u63CF\u8FF0\uFF0C\u5982"\u91C7\u8D2D\u5408\u540C"\u6216"\u5173\u4E8E\u9879\u76EE\u9884\u7B97\u7684\u6587\u6863"'
|
|
3272
|
+
},
|
|
3273
|
+
limit: {
|
|
3274
|
+
type: "number",
|
|
3275
|
+
description: "\u8FD4\u56DE\u7ED3\u679C\u6570\u91CF\uFF0C\u9ED8\u8BA4 10\uFF0C\u6700\u5927 50"
|
|
3276
|
+
},
|
|
3277
|
+
mode: {
|
|
3278
|
+
type: "string",
|
|
3279
|
+
enum: ["semantic", "keyword", "hybrid"],
|
|
3280
|
+
description: "\u641C\u7D22\u6A21\u5F0F\uFF1Asemantic\uFF08\u8BED\u4E49\uFF0C\u7406\u89E3\u610F\u56FE\uFF09\u3001keyword\uFF08\u7CBE\u786E\u5173\u952E\u8BCD\uFF09\u3001hybrid\uFF08\u6DF7\u5408\uFF0C\u63A8\u8350\uFF09"
|
|
3281
|
+
},
|
|
3282
|
+
file_types: {
|
|
3283
|
+
type: "string",
|
|
3284
|
+
description: '\u9650\u5B9A\u6587\u4EF6\u7C7B\u578B\uFF0C\u9017\u53F7\u5206\u9694\uFF0C\u53EF\u9009\uFF1Adocument,pdf,text\u3002\u5982 "pdf,document"'
|
|
3285
|
+
},
|
|
3286
|
+
scope: {
|
|
3287
|
+
type: "string",
|
|
3288
|
+
enum: ["workspace", "all"],
|
|
3289
|
+
description: "\u641C\u7D22\u8303\u56F4\uFF1Aworkspace\uFF08\u4EC5\u5DE5\u4F5C\u7A7A\u95F4\uFF0C\u9ED8\u8BA4\uFF09\u3001all\uFF08\u5168\u90E8\u5DF2\u7D22\u5F15\u6587\u6863\uFF09"
|
|
3290
|
+
}
|
|
3291
|
+
},
|
|
3292
|
+
required: ["query"]
|
|
3293
|
+
},
|
|
3294
|
+
execute: async (args, _context) => {
|
|
3295
|
+
const query = args.query;
|
|
3296
|
+
const limit = Math.min(args.limit || 10, 50);
|
|
3297
|
+
const mode = args.mode || "hybrid";
|
|
3298
|
+
const scope = args.scope || "workspace";
|
|
3299
|
+
const options = { limit, mode };
|
|
3300
|
+
if (args.file_types) {
|
|
3301
|
+
const types = args.file_types.split(",").map((t) => t.trim());
|
|
3302
|
+
options.fileTypes = types;
|
|
3303
|
+
}
|
|
3304
|
+
let results = await search.search(query, options);
|
|
3305
|
+
if (scope === "workspace" && workspaceState.directory) {
|
|
3306
|
+
results = results.filter((r) => r.id.startsWith(workspaceState.directory));
|
|
3307
|
+
}
|
|
3308
|
+
if (results.length === 0) {
|
|
3309
|
+
const suggestion = workspaceState.directory && scope === "workspace" ? '\u53EF\u4EE5\u5C1D\u8BD5\u6362\u4E2A\u5173\u952E\u8BCD\uFF0C\u6216\u4F7F\u7528 scope="all" \u641C\u7D22\u5168\u90E8\u6587\u6863\uFF0C\u6216\u7528 set_search_workspace \u8BBE\u7F6E\u65B0\u7684\u5DE5\u4F5C\u7A7A\u95F4' : "\u53EF\u4EE5\u5C1D\u8BD5\u6362\u4E2A\u5173\u952E\u8BCD\uFF0C\u6216\u8005\u5148\u7528 set_search_workspace \u8BBE\u7F6E\u5DE5\u4F5C\u7A7A\u95F4";
|
|
3310
|
+
return JSON.stringify({
|
|
3311
|
+
message: "\u6CA1\u6709\u627E\u5230\u5339\u914D\u7684\u6587\u6863",
|
|
3312
|
+
suggestion,
|
|
3313
|
+
workspace: workspaceState.directory,
|
|
3314
|
+
scope
|
|
3315
|
+
});
|
|
3316
|
+
}
|
|
3317
|
+
const formattedResults = results.map((r, idx) => ({
|
|
3318
|
+
rank: idx + 1,
|
|
3319
|
+
path: r.id,
|
|
3320
|
+
name: r.name,
|
|
3321
|
+
type: r.type,
|
|
3322
|
+
size: r.size,
|
|
3323
|
+
modified: r.dateModified,
|
|
3324
|
+
relevance: `${Math.round(r.score * 100)}%`,
|
|
3325
|
+
matchType: r.matchType,
|
|
3326
|
+
snippet: r.snippet
|
|
3327
|
+
}));
|
|
3328
|
+
return JSON.stringify({
|
|
3329
|
+
query,
|
|
3330
|
+
mode,
|
|
3331
|
+
scope,
|
|
3332
|
+
workspace: workspaceState.directory,
|
|
3333
|
+
total: results.length,
|
|
3334
|
+
results: formattedResults
|
|
3335
|
+
});
|
|
3336
|
+
}
|
|
3337
|
+
};
|
|
3338
|
+
}
|
|
3339
|
+
function createIndexFileTool(search) {
|
|
3340
|
+
return {
|
|
3341
|
+
name: "index_document_file",
|
|
3342
|
+
description: `\u5C06\u6307\u5B9A\u7684\u6587\u6863\u6587\u4EF6\u6DFB\u52A0\u5230\u641C\u7D22\u7D22\u5F15\u4E2D\u3002
|
|
3343
|
+
\u7D22\u5F15\u540E\uFF0C\u8BE5\u6587\u4EF6\u53EF\u4EE5\u901A\u8FC7 search_local_documents \u641C\u7D22\u5230\u3002
|
|
3344
|
+
\u652F\u6301\u7684\u6587\u4EF6\u7C7B\u578B\uFF1A.docx, .doc, .pdf, .xlsx, .xls, .pptx, .ppt, .txt, .md`,
|
|
3345
|
+
parameters: {
|
|
3346
|
+
type: "object",
|
|
3347
|
+
properties: {
|
|
3348
|
+
file_path: {
|
|
3349
|
+
type: "string",
|
|
3350
|
+
description: "\u8981\u7D22\u5F15\u7684\u6587\u4EF6\u7EDD\u5BF9\u8DEF\u5F84\uFF0C\u5982 /Users/xxx/Documents/report.pdf"
|
|
3351
|
+
}
|
|
3352
|
+
},
|
|
3353
|
+
required: ["file_path"]
|
|
3354
|
+
},
|
|
3355
|
+
sideEffects: [{ type: "filesystem", success: true }],
|
|
3356
|
+
// 声明会修改索引数据
|
|
3357
|
+
execute: async (args, _context) => {
|
|
3358
|
+
const filePath = args.file_path;
|
|
3359
|
+
try {
|
|
3360
|
+
await search.indexFile(filePath);
|
|
3361
|
+
return JSON.stringify({
|
|
3362
|
+
success: true,
|
|
3363
|
+
message: `\u6587\u4EF6\u5DF2\u6210\u529F\u7D22\u5F15: ${filePath}`,
|
|
3364
|
+
path: filePath
|
|
3365
|
+
});
|
|
3366
|
+
} catch (error) {
|
|
3367
|
+
return JSON.stringify({
|
|
3368
|
+
success: false,
|
|
3369
|
+
error: error instanceof Error ? error.message : String(error),
|
|
3370
|
+
path: filePath
|
|
3371
|
+
});
|
|
3372
|
+
}
|
|
3373
|
+
}
|
|
3374
|
+
};
|
|
3375
|
+
}
|
|
3376
|
+
function createIndexDirectoryTool(search) {
|
|
3377
|
+
return {
|
|
3378
|
+
name: "index_document_directory",
|
|
3379
|
+
description: `\u626B\u63CF\u5E76\u7D22\u5F15\u6307\u5B9A\u76EE\u5F55\u4E0B\u7684\u6240\u6709\u6587\u6863\u6587\u4EF6\uFF08\u5305\u62EC\u5B50\u76EE\u5F55\uFF09\u3002
|
|
3380
|
+
\u7D22\u5F15\u5B8C\u6210\u540E\uFF0C\u76EE\u5F55\u5185\u7684\u6240\u6709\u652F\u6301\u7684\u6587\u6863\u90FD\u53EF\u4EE5\u88AB\u641C\u7D22\u3002
|
|
3381
|
+
\u9996\u6B21\u7D22\u5F15\u53EF\u80FD\u9700\u8981\u4E00\u4E9B\u65F6\u95F4\uFF0C\u53D6\u51B3\u4E8E\u6587\u4EF6\u6570\u91CF\u3002
|
|
3382
|
+
|
|
3383
|
+
\u4F7F\u7528\u573A\u666F\uFF1A
|
|
3384
|
+
- \u7528\u6237\u60F3\u641C\u7D22\u67D0\u4E2A\u76EE\u5F55\u4F46\u8FD8\u6CA1\u7D22\u5F15\u8FC7
|
|
3385
|
+
- \u7528\u6237\u7684\u6587\u6863\u76EE\u5F55\u6709\u66F4\u65B0\uFF0C\u9700\u8981\u91CD\u65B0\u7D22\u5F15`,
|
|
3386
|
+
parameters: {
|
|
3387
|
+
type: "object",
|
|
3388
|
+
properties: {
|
|
3389
|
+
directory: {
|
|
3390
|
+
type: "string",
|
|
3391
|
+
description: "\u8981\u7D22\u5F15\u7684\u76EE\u5F55\u7EDD\u5BF9\u8DEF\u5F84\uFF0C\u5982 /Users/xxx/Documents \u6216 ~/Documents"
|
|
3392
|
+
}
|
|
3393
|
+
},
|
|
3394
|
+
required: ["directory"]
|
|
3395
|
+
},
|
|
3396
|
+
sideEffects: [{ type: "filesystem", success: true }],
|
|
3397
|
+
execute: async (args, context) => {
|
|
3398
|
+
let directory = args.directory;
|
|
3399
|
+
if (directory.startsWith("~")) {
|
|
3400
|
+
const os3 = await import("os");
|
|
3401
|
+
directory = directory.replace("~", os3.homedir());
|
|
3402
|
+
}
|
|
3403
|
+
const path11 = await import("path");
|
|
3404
|
+
const isAbsolute2 = path11.isAbsolute(directory);
|
|
3405
|
+
if (!isAbsolute2) {
|
|
3406
|
+
directory = path11.resolve(context.cwd, directory);
|
|
3407
|
+
}
|
|
3408
|
+
try {
|
|
3409
|
+
let indexed = 0;
|
|
3410
|
+
let total = 0;
|
|
3411
|
+
await search.indexDirectory(directory, (progress) => {
|
|
3412
|
+
indexed = progress.indexed;
|
|
3413
|
+
total = progress.total;
|
|
3414
|
+
});
|
|
3415
|
+
return JSON.stringify({
|
|
3416
|
+
success: true,
|
|
3417
|
+
message: `\u76EE\u5F55\u7D22\u5F15\u5B8C\u6210`,
|
|
3418
|
+
directory,
|
|
3419
|
+
filesIndexed: indexed,
|
|
3420
|
+
filesFound: total
|
|
3421
|
+
});
|
|
3422
|
+
} catch (error) {
|
|
3423
|
+
return JSON.stringify({
|
|
3424
|
+
success: false,
|
|
3425
|
+
error: error instanceof Error ? error.message : String(error),
|
|
3426
|
+
directory
|
|
3427
|
+
});
|
|
3428
|
+
}
|
|
3429
|
+
}
|
|
3430
|
+
};
|
|
3431
|
+
}
|
|
3432
|
+
function createGetStatsTool(search, workspaceState) {
|
|
3433
|
+
return {
|
|
3434
|
+
name: "get_document_index_stats",
|
|
3435
|
+
description: `\u83B7\u53D6\u6587\u6863\u641C\u7D22\u7D22\u5F15\u7684\u7EDF\u8BA1\u4FE1\u606F\u3002
|
|
3436
|
+
\u5305\u62EC\u5DF2\u7D22\u5F15\u6587\u6863\u6570\u91CF\u3001\u6587\u4EF6\u7C7B\u578B\u5206\u5E03\u3001\u7D22\u5F15\u76EE\u5F55\u3001\u5F53\u524D\u5DE5\u4F5C\u7A7A\u95F4\u7B49\u3002
|
|
3437
|
+
\u7528\u4E8E\u4E86\u89E3\u5F53\u524D\u7D22\u5F15\u72B6\u6001\uFF0C\u5E2E\u52A9\u51B3\u5B9A\u662F\u5426\u9700\u8981\u7D22\u5F15\u65B0\u76EE\u5F55\u3002`,
|
|
3438
|
+
parameters: {
|
|
3439
|
+
type: "object",
|
|
3440
|
+
properties: {},
|
|
3441
|
+
required: []
|
|
3442
|
+
},
|
|
3443
|
+
execute: async (_args, _context) => {
|
|
3444
|
+
const stats = search.getStats();
|
|
3445
|
+
return JSON.stringify({
|
|
3446
|
+
totalDocuments: stats.totalDocuments,
|
|
3447
|
+
byType: stats.byType,
|
|
3448
|
+
indexedDirectories: stats.directories,
|
|
3449
|
+
lastUpdated: stats.lastUpdated?.toISOString() || "N/A",
|
|
3450
|
+
indexSize: stats.indexSize,
|
|
3451
|
+
// 工作空间信息
|
|
3452
|
+
workspace: {
|
|
3453
|
+
directory: workspaceState.directory,
|
|
3454
|
+
indexed: workspaceState.indexed,
|
|
3455
|
+
filesIndexed: workspaceState.filesIndexed
|
|
3456
|
+
}
|
|
3457
|
+
});
|
|
3458
|
+
}
|
|
3459
|
+
};
|
|
3460
|
+
}
|
|
3461
|
+
function createClearIndexTool(search) {
|
|
3462
|
+
return {
|
|
3463
|
+
name: "clear_document_index",
|
|
3464
|
+
description: `\u6E05\u9664\u6240\u6709\u6587\u6863\u7D22\u5F15\u6570\u636E\u3002
|
|
3465
|
+
\u8FD9\u662F\u4E00\u4E2A\u5371\u9669\u64CD\u4F5C\uFF0C\u4F1A\u5220\u9664\u6240\u6709\u5DF2\u7D22\u5F15\u7684\u6587\u6863\u4FE1\u606F\u3002
|
|
3466
|
+
\u6E05\u9664\u540E\u9700\u8981\u91CD\u65B0\u7D22\u5F15\u624D\u80FD\u641C\u7D22\u3002
|
|
3467
|
+
\u901A\u5E38\u53EA\u5728\u7D22\u5F15\u51FA\u95EE\u9898\u6216\u9700\u8981\u91CD\u5EFA\u65F6\u4F7F\u7528\u3002`,
|
|
3468
|
+
parameters: {
|
|
3469
|
+
type: "object",
|
|
3470
|
+
properties: {
|
|
3471
|
+
confirm: {
|
|
3472
|
+
type: "string",
|
|
3473
|
+
description: '\u786E\u8BA4\u6E05\u9664\uFF0C\u5FC5\u987B\u8F93\u5165 "YES" \u624D\u4F1A\u6267\u884C'
|
|
3474
|
+
}
|
|
3475
|
+
},
|
|
3476
|
+
required: ["confirm"]
|
|
3477
|
+
},
|
|
3478
|
+
sideEffects: [{ type: "filesystem", success: true }],
|
|
3479
|
+
execute: async (args, _context) => {
|
|
3480
|
+
if (args.confirm !== "YES") {
|
|
3481
|
+
return JSON.stringify({
|
|
3482
|
+
success: false,
|
|
3483
|
+
message: '\u64CD\u4F5C\u53D6\u6D88\uFF1A\u9700\u8981\u8F93\u5165 "YES" \u786E\u8BA4\u6E05\u9664'
|
|
3484
|
+
});
|
|
3485
|
+
}
|
|
3486
|
+
try {
|
|
3487
|
+
await search.clear();
|
|
3488
|
+
return JSON.stringify({
|
|
3489
|
+
success: true,
|
|
3490
|
+
message: "\u7D22\u5F15\u5DF2\u6E05\u9664\uFF0C\u9700\u8981\u91CD\u65B0\u7D22\u5F15\u6587\u6863"
|
|
3491
|
+
});
|
|
3492
|
+
} catch (error) {
|
|
3493
|
+
return JSON.stringify({
|
|
3494
|
+
success: false,
|
|
3495
|
+
error: error instanceof Error ? error.message : String(error)
|
|
3496
|
+
});
|
|
3497
|
+
}
|
|
3498
|
+
}
|
|
3499
|
+
};
|
|
3500
|
+
}
|
|
3501
|
+
function createSetWorkspaceTool(search, workspaceState) {
|
|
3502
|
+
return {
|
|
3503
|
+
name: "set_search_workspace",
|
|
3504
|
+
description: `\u8BBE\u7F6E\u6587\u6863\u641C\u7D22\u7684\u5DE5\u4F5C\u7A7A\u95F4\u76EE\u5F55\u3002
|
|
3505
|
+
|
|
3506
|
+
\u8BBE\u7F6E\u540E\uFF1A
|
|
3507
|
+
- \u5DE5\u4F5C\u7A7A\u95F4\u5185\u7684\u6587\u6863\u4F1A\u88AB\u81EA\u52A8\u7D22\u5F15
|
|
3508
|
+
- search_local_documents \u9ED8\u8BA4\u53EA\u641C\u7D22\u5DE5\u4F5C\u7A7A\u95F4\u5185\u7684\u6587\u6863
|
|
3509
|
+
- \u53EF\u4EE5\u968F\u65F6\u66F4\u6362\u5DE5\u4F5C\u7A7A\u95F4
|
|
3510
|
+
|
|
3511
|
+
\u4F7F\u7528\u573A\u666F\uFF1A
|
|
3512
|
+
- \u7528\u6237\u8BF4"\u5728\u8FD9\u4E2A\u9879\u76EE\u91CC\u627E\u6587\u6863"\u65F6\uFF0C\u8BBE\u7F6E\u5F53\u524D\u76EE\u5F55\u4E3A\u5DE5\u4F5C\u7A7A\u95F4
|
|
3513
|
+
- \u7528\u6237\u5207\u6362\u9879\u76EE\u65F6\uFF0C\u66F4\u65B0\u5DE5\u4F5C\u7A7A\u95F4
|
|
3514
|
+
- \u7528\u6237\u60F3\u641C\u7D22\u7279\u5B9A\u6587\u4EF6\u5939\u7684\u6587\u6863`,
|
|
3515
|
+
parameters: {
|
|
3516
|
+
type: "object",
|
|
3517
|
+
properties: {
|
|
3518
|
+
directory: {
|
|
3519
|
+
type: "string",
|
|
3520
|
+
description: "\u5DE5\u4F5C\u7A7A\u95F4\u76EE\u5F55\u8DEF\u5F84\uFF0C\u53EF\u4EE5\u662F\u7EDD\u5BF9\u8DEF\u5F84\u6216\u76F8\u5BF9\u8DEF\u5F84\uFF08\u76F8\u5BF9\u4E8E\u5F53\u524D cwd\uFF09"
|
|
3521
|
+
}
|
|
3522
|
+
},
|
|
3523
|
+
required: ["directory"]
|
|
3524
|
+
},
|
|
3525
|
+
sideEffects: [{ type: "filesystem", success: true }],
|
|
3526
|
+
execute: async (args, context) => {
|
|
3527
|
+
let directory = args.directory;
|
|
3528
|
+
if (directory.startsWith("~")) {
|
|
3529
|
+
const os3 = await import("os");
|
|
3530
|
+
directory = directory.replace("~", os3.homedir());
|
|
3531
|
+
}
|
|
3532
|
+
const pathModule = await import("path");
|
|
3533
|
+
if (!pathModule.isAbsolute(directory)) {
|
|
3534
|
+
directory = pathModule.resolve(context.cwd, directory);
|
|
3535
|
+
}
|
|
3536
|
+
const fs8 = await import("fs/promises");
|
|
3537
|
+
try {
|
|
3538
|
+
const stat6 = await fs8.stat(directory);
|
|
3539
|
+
if (!stat6.isDirectory()) {
|
|
3540
|
+
return JSON.stringify({
|
|
3541
|
+
success: false,
|
|
3542
|
+
error: "\u6307\u5B9A\u8DEF\u5F84\u4E0D\u662F\u4E00\u4E2A\u76EE\u5F55",
|
|
3543
|
+
path: directory
|
|
3544
|
+
});
|
|
3545
|
+
}
|
|
3546
|
+
} catch {
|
|
3547
|
+
return JSON.stringify({
|
|
3548
|
+
success: false,
|
|
3549
|
+
error: "\u76EE\u5F55\u4E0D\u5B58\u5728",
|
|
3550
|
+
path: directory
|
|
3551
|
+
});
|
|
3552
|
+
}
|
|
3553
|
+
workspaceState.directory = directory;
|
|
3554
|
+
workspaceState.indexed = false;
|
|
3555
|
+
workspaceState.filesIndexed = 0;
|
|
3556
|
+
try {
|
|
3557
|
+
await search.indexDirectory(directory, (progress) => {
|
|
3558
|
+
workspaceState.filesIndexed = progress.indexed;
|
|
3559
|
+
});
|
|
3560
|
+
workspaceState.indexed = true;
|
|
3561
|
+
return JSON.stringify({
|
|
3562
|
+
success: true,
|
|
3563
|
+
message: `\u5DE5\u4F5C\u7A7A\u95F4\u5DF2\u8BBE\u7F6E\u5E76\u7D22\u5F15\u5B8C\u6210`,
|
|
3564
|
+
workspace: directory,
|
|
3565
|
+
filesIndexed: workspaceState.filesIndexed
|
|
3566
|
+
});
|
|
3567
|
+
} catch (error) {
|
|
3568
|
+
return JSON.stringify({
|
|
3569
|
+
success: false,
|
|
3570
|
+
error: error instanceof Error ? error.message : String(error),
|
|
3571
|
+
workspace: directory
|
|
3572
|
+
});
|
|
3573
|
+
}
|
|
3574
|
+
}
|
|
3575
|
+
};
|
|
3576
|
+
}
|
|
3577
|
+
function createIndexFilesTool(search) {
|
|
3578
|
+
return {
|
|
3579
|
+
name: "index_document_files",
|
|
3580
|
+
description: `\u6279\u91CF\u7D22\u5F15\u591A\u4E2A\u6587\u6863\u6587\u4EF6\u3002
|
|
3581
|
+
\u76F8\u6BD4\u5355\u4E2A\u6587\u4EF6\u7D22\u5F15\uFF0C\u6279\u91CF\u7D22\u5F15\u6548\u7387\u66F4\u9AD8\uFF0C\u9002\u5408\u4E00\u6B21\u6027\u7D22\u5F15\u591A\u4E2A\u6587\u4EF6\u3002
|
|
3582
|
+
\u4F1A\u8FD4\u56DE\u6210\u529F\u548C\u5931\u8D25\u7684\u6570\u91CF\uFF0C\u4EE5\u53CA\u5931\u8D25\u7684\u6587\u4EF6\u5217\u8868\u3002`,
|
|
3583
|
+
parameters: {
|
|
3584
|
+
type: "object",
|
|
3585
|
+
properties: {
|
|
3586
|
+
file_paths: {
|
|
3587
|
+
type: "array",
|
|
3588
|
+
items: {
|
|
3589
|
+
type: "string"
|
|
3590
|
+
},
|
|
3591
|
+
description: "\u8981\u7D22\u5F15\u7684\u6587\u4EF6\u8DEF\u5F84\u6570\u7EC4\uFF08\u7EDD\u5BF9\u8DEF\u5F84\uFF09"
|
|
3592
|
+
}
|
|
3593
|
+
},
|
|
3594
|
+
required: ["file_paths"]
|
|
3595
|
+
},
|
|
3596
|
+
sideEffects: [{ type: "filesystem", success: true }],
|
|
3597
|
+
execute: async (args, _context) => {
|
|
3598
|
+
const filePaths = args.file_paths;
|
|
3599
|
+
if (!Array.isArray(filePaths) || filePaths.length === 0) {
|
|
3600
|
+
return JSON.stringify({
|
|
3601
|
+
success: false,
|
|
3602
|
+
error: "file_paths \u5FC5\u987B\u662F\u975E\u7A7A\u6570\u7EC4"
|
|
3603
|
+
});
|
|
3604
|
+
}
|
|
3605
|
+
try {
|
|
3606
|
+
const result = await search.indexFiles(filePaths);
|
|
3607
|
+
return JSON.stringify({
|
|
3608
|
+
success: true,
|
|
3609
|
+
message: `\u6279\u91CF\u7D22\u5F15\u5B8C\u6210`,
|
|
3610
|
+
successCount: result.success,
|
|
3611
|
+
failedCount: result.failed,
|
|
3612
|
+
errors: result.errors
|
|
3613
|
+
});
|
|
3614
|
+
} catch (error) {
|
|
3615
|
+
return JSON.stringify({
|
|
3616
|
+
success: false,
|
|
3617
|
+
error: error instanceof Error ? error.message : String(error)
|
|
3618
|
+
});
|
|
3619
|
+
}
|
|
3620
|
+
}
|
|
3621
|
+
};
|
|
3622
|
+
}
|
|
3623
|
+
function createRemoveFilesTool(search) {
|
|
3624
|
+
return {
|
|
3625
|
+
name: "remove_document_files",
|
|
3626
|
+
description: `\u6279\u91CF\u5220\u9664\u591A\u4E2A\u6587\u4EF6\u7684\u7D22\u5F15\u3002
|
|
3627
|
+
\u7528\u4E8E\u6E05\u7406\u4E0D\u518D\u9700\u8981\u7684\u6587\u6863\u7D22\u5F15\uFF0C\u6BD4\u5982\u6587\u4EF6\u5DF2\u5220\u9664\u6216\u79FB\u52A8\u5230\u5176\u4ED6\u4F4D\u7F6E\u3002`,
|
|
3628
|
+
parameters: {
|
|
3629
|
+
type: "object",
|
|
3630
|
+
properties: {
|
|
3631
|
+
file_paths: {
|
|
3632
|
+
type: "array",
|
|
3633
|
+
items: {
|
|
3634
|
+
type: "string"
|
|
3635
|
+
},
|
|
3636
|
+
description: "\u8981\u5220\u9664\u7D22\u5F15\u7684\u6587\u4EF6\u8DEF\u5F84\u6570\u7EC4\uFF08\u7EDD\u5BF9\u8DEF\u5F84\uFF09"
|
|
3637
|
+
}
|
|
3638
|
+
},
|
|
3639
|
+
required: ["file_paths"]
|
|
3640
|
+
},
|
|
3641
|
+
sideEffects: [{ type: "filesystem", success: true }],
|
|
3642
|
+
execute: async (args, _context) => {
|
|
3643
|
+
const filePaths = args.file_paths;
|
|
3644
|
+
if (!Array.isArray(filePaths) || filePaths.length === 0) {
|
|
3645
|
+
return JSON.stringify({
|
|
3646
|
+
success: false,
|
|
3647
|
+
error: "file_paths \u5FC5\u987B\u662F\u975E\u7A7A\u6570\u7EC4"
|
|
3648
|
+
});
|
|
3649
|
+
}
|
|
3650
|
+
try {
|
|
3651
|
+
const result = await search.removeFiles(filePaths);
|
|
3652
|
+
return JSON.stringify({
|
|
3653
|
+
success: true,
|
|
3654
|
+
message: `\u6279\u91CF\u5220\u9664\u5B8C\u6210`,
|
|
3655
|
+
successCount: result.success,
|
|
3656
|
+
failedCount: result.failed,
|
|
3657
|
+
errors: result.errors
|
|
3658
|
+
});
|
|
3659
|
+
} catch (error) {
|
|
3660
|
+
return JSON.stringify({
|
|
3661
|
+
success: false,
|
|
3662
|
+
error: error instanceof Error ? error.message : String(error)
|
|
3663
|
+
});
|
|
3664
|
+
}
|
|
3665
|
+
}
|
|
3666
|
+
};
|
|
3667
|
+
}
|
|
3668
|
+
function createUpdateFilesTool(search) {
|
|
3669
|
+
return {
|
|
3670
|
+
name: "update_document_files",
|
|
3671
|
+
description: `\u6279\u91CF\u66F4\u65B0\u591A\u4E2A\u6587\u4EF6\u7684\u7D22\u5F15\uFF08\u91CD\u65B0\u7D22\u5F15\uFF09\u3002
|
|
3672
|
+
\u7528\u4E8E\u6587\u4EF6\u5185\u5BB9\u5DF2\u4FEE\u6539\uFF0C\u9700\u8981\u66F4\u65B0\u7D22\u5F15\u7684\u573A\u666F\u3002
|
|
3673
|
+
\u4F1A\u5148\u5220\u9664\u65E7\u7D22\u5F15\uFF0C\u518D\u91CD\u65B0\u7D22\u5F15\u6587\u4EF6\u3002`,
|
|
3674
|
+
parameters: {
|
|
3675
|
+
type: "object",
|
|
3676
|
+
properties: {
|
|
3677
|
+
file_paths: {
|
|
3678
|
+
type: "array",
|
|
3679
|
+
items: {
|
|
3680
|
+
type: "string"
|
|
3681
|
+
},
|
|
3682
|
+
description: "\u8981\u66F4\u65B0\u7D22\u5F15\u7684\u6587\u4EF6\u8DEF\u5F84\u6570\u7EC4\uFF08\u7EDD\u5BF9\u8DEF\u5F84\uFF09"
|
|
3683
|
+
}
|
|
3684
|
+
},
|
|
3685
|
+
required: ["file_paths"]
|
|
3686
|
+
},
|
|
3687
|
+
sideEffects: [{ type: "filesystem", success: true }],
|
|
3688
|
+
execute: async (args, _context) => {
|
|
3689
|
+
const filePaths = args.file_paths;
|
|
3690
|
+
if (!Array.isArray(filePaths) || filePaths.length === 0) {
|
|
3691
|
+
return JSON.stringify({
|
|
3692
|
+
success: false,
|
|
3693
|
+
error: "file_paths \u5FC5\u987B\u662F\u975E\u7A7A\u6570\u7EC4"
|
|
3694
|
+
});
|
|
3695
|
+
}
|
|
3696
|
+
try {
|
|
3697
|
+
const result = await search.updateFiles(filePaths);
|
|
3698
|
+
return JSON.stringify({
|
|
3699
|
+
success: true,
|
|
3700
|
+
message: `\u6279\u91CF\u66F4\u65B0\u5B8C\u6210`,
|
|
3701
|
+
successCount: result.success,
|
|
3702
|
+
failedCount: result.failed,
|
|
3703
|
+
errors: result.errors
|
|
3704
|
+
});
|
|
3705
|
+
} catch (error) {
|
|
3706
|
+
return JSON.stringify({
|
|
3707
|
+
success: false,
|
|
3708
|
+
error: error instanceof Error ? error.message : String(error)
|
|
3709
|
+
});
|
|
3710
|
+
}
|
|
3711
|
+
}
|
|
3712
|
+
};
|
|
3713
|
+
}
|
|
3714
|
+
function createWatchDirectoryTool(search) {
|
|
3715
|
+
return {
|
|
3716
|
+
name: "watch_document_directory",
|
|
3717
|
+
description: `\u5F00\u59CB\u76D1\u542C\u76EE\u5F55\u53D8\u5316\uFF0C\u81EA\u52A8\u66F4\u65B0\u7D22\u5F15\u3002
|
|
3718
|
+
\u76D1\u542C\u540E\uFF0C\u76EE\u5F55\u5185\u7684\u6587\u4EF6\u65B0\u589E\u3001\u4FEE\u6539\u3001\u5220\u9664\u90FD\u4F1A\u81EA\u52A8\u66F4\u65B0\u7D22\u5F15\uFF0C\u65E0\u9700\u624B\u52A8\u64CD\u4F5C\u3002
|
|
3719
|
+
\u9002\u5408\u9700\u8981\u5B9E\u65F6\u4FDD\u6301\u7D22\u5F15\u540C\u6B65\u7684\u573A\u666F\u3002
|
|
3720
|
+
|
|
3721
|
+
\u6CE8\u610F\uFF1A\u76D1\u542C\u4F1A\u6301\u7EED\u8FD0\u884C\uFF0C\u76F4\u5230\u8C03\u7528 unwatch_document_directory \u505C\u6B62\u3002`,
|
|
3722
|
+
parameters: {
|
|
3723
|
+
type: "object",
|
|
3724
|
+
properties: {
|
|
3725
|
+
directory: {
|
|
3726
|
+
type: "string",
|
|
3727
|
+
description: "\u8981\u76D1\u542C\u7684\u76EE\u5F55\u7EDD\u5BF9\u8DEF\u5F84"
|
|
3728
|
+
},
|
|
3729
|
+
ignore_initial: {
|
|
3730
|
+
type: "boolean",
|
|
3731
|
+
description: "\u662F\u5426\u5FFD\u7565\u521D\u59CB\u626B\u63CF\uFF08\u53EA\u76D1\u542C\u540E\u7EED\u53D8\u5316\uFF09\uFF0C\u9ED8\u8BA4 true"
|
|
3732
|
+
},
|
|
3733
|
+
debounce: {
|
|
3734
|
+
type: "number",
|
|
3735
|
+
description: "\u9632\u6296\u5EF6\u8FDF\uFF08\u6BEB\u79D2\uFF09\uFF0C\u907F\u514D\u9891\u7E41\u89E6\u53D1\uFF0C\u9ED8\u8BA4 1000"
|
|
3736
|
+
}
|
|
3737
|
+
},
|
|
3738
|
+
required: ["directory"]
|
|
3739
|
+
},
|
|
3740
|
+
sideEffects: [{ type: "filesystem", success: true }],
|
|
3741
|
+
execute: async (args, context) => {
|
|
3742
|
+
let directory = args.directory;
|
|
3743
|
+
if (directory.startsWith("~")) {
|
|
3744
|
+
const os3 = await import("os");
|
|
3745
|
+
directory = directory.replace("~", os3.homedir());
|
|
3746
|
+
}
|
|
3747
|
+
const pathModule = await import("path");
|
|
3748
|
+
if (!pathModule.isAbsolute(directory)) {
|
|
3749
|
+
directory = pathModule.resolve(context.cwd, directory);
|
|
3750
|
+
}
|
|
3751
|
+
const fs8 = await import("fs/promises");
|
|
3752
|
+
try {
|
|
3753
|
+
const stat6 = await fs8.stat(directory);
|
|
3754
|
+
if (!stat6.isDirectory()) {
|
|
3755
|
+
return JSON.stringify({
|
|
3756
|
+
success: false,
|
|
3757
|
+
error: "\u6307\u5B9A\u8DEF\u5F84\u4E0D\u662F\u4E00\u4E2A\u76EE\u5F55",
|
|
3758
|
+
path: directory
|
|
3759
|
+
});
|
|
3760
|
+
}
|
|
3761
|
+
} catch {
|
|
3762
|
+
return JSON.stringify({
|
|
3763
|
+
success: false,
|
|
3764
|
+
error: "\u76EE\u5F55\u4E0D\u5B58\u5728",
|
|
3765
|
+
path: directory
|
|
3766
|
+
});
|
|
3767
|
+
}
|
|
3768
|
+
try {
|
|
3769
|
+
search.watchDirectory(directory, {
|
|
3770
|
+
ignoreInitial: args.ignore_initial !== false,
|
|
3771
|
+
debounce: typeof args.debounce === "number" ? args.debounce : 1e3
|
|
3772
|
+
});
|
|
3773
|
+
return JSON.stringify({
|
|
3774
|
+
success: true,
|
|
3775
|
+
message: `\u5F00\u59CB\u76D1\u542C\u76EE\u5F55`,
|
|
3776
|
+
directory
|
|
3777
|
+
});
|
|
3778
|
+
} catch (error) {
|
|
3779
|
+
return JSON.stringify({
|
|
3780
|
+
success: false,
|
|
3781
|
+
error: error instanceof Error ? error.message : String(error),
|
|
3782
|
+
directory
|
|
3783
|
+
});
|
|
3784
|
+
}
|
|
3785
|
+
}
|
|
3786
|
+
};
|
|
3787
|
+
}
|
|
3788
|
+
function createUnwatchDirectoryTool(search) {
|
|
3789
|
+
return {
|
|
3790
|
+
name: "unwatch_document_directory",
|
|
3791
|
+
description: `\u505C\u6B62\u76D1\u542C\u6307\u5B9A\u76EE\u5F55\u7684\u53D8\u5316\u3002
|
|
3792
|
+
\u505C\u6B62\u540E\uFF0C\u8BE5\u76EE\u5F55\u7684\u6587\u4EF6\u53D8\u5316\u5C06\u4E0D\u518D\u81EA\u52A8\u66F4\u65B0\u7D22\u5F15\u3002`,
|
|
3793
|
+
parameters: {
|
|
3794
|
+
type: "object",
|
|
3795
|
+
properties: {
|
|
3796
|
+
directory: {
|
|
3797
|
+
type: "string",
|
|
3798
|
+
description: "\u8981\u505C\u6B62\u76D1\u542C\u7684\u76EE\u5F55\u7EDD\u5BF9\u8DEF\u5F84"
|
|
3799
|
+
}
|
|
3800
|
+
},
|
|
3801
|
+
required: ["directory"]
|
|
3802
|
+
},
|
|
3803
|
+
execute: async (args, context) => {
|
|
3804
|
+
let directory = args.directory;
|
|
3805
|
+
if (directory.startsWith("~")) {
|
|
3806
|
+
const os3 = await import("os");
|
|
3807
|
+
directory = directory.replace("~", os3.homedir());
|
|
3808
|
+
}
|
|
3809
|
+
const pathModule = await import("path");
|
|
3810
|
+
if (!pathModule.isAbsolute(directory)) {
|
|
3811
|
+
directory = pathModule.resolve(context.cwd, directory);
|
|
3812
|
+
}
|
|
3813
|
+
try {
|
|
3814
|
+
search.unwatchDirectory(directory);
|
|
3815
|
+
return JSON.stringify({
|
|
3816
|
+
success: true,
|
|
3817
|
+
message: `\u5DF2\u505C\u6B62\u76D1\u542C\u76EE\u5F55`,
|
|
3818
|
+
directory
|
|
3819
|
+
});
|
|
3820
|
+
} catch (error) {
|
|
3821
|
+
return JSON.stringify({
|
|
3822
|
+
success: false,
|
|
3823
|
+
error: error instanceof Error ? error.message : String(error),
|
|
3824
|
+
directory
|
|
3825
|
+
});
|
|
3826
|
+
}
|
|
3827
|
+
}
|
|
3828
|
+
};
|
|
3829
|
+
}
|
|
3830
|
+
function createGetWatchedDirectoriesTool(search) {
|
|
3831
|
+
return {
|
|
3832
|
+
name: "get_watched_directories",
|
|
3833
|
+
description: `\u83B7\u53D6\u5F53\u524D\u6B63\u5728\u76D1\u542C\u7684\u76EE\u5F55\u5217\u8868\u3002
|
|
3834
|
+
\u7528\u4E8E\u67E5\u770B\u54EA\u4E9B\u76EE\u5F55\u6B63\u5728\u88AB\u81EA\u52A8\u76D1\u542C\u548C\u66F4\u65B0\u3002`,
|
|
3835
|
+
parameters: {
|
|
3836
|
+
type: "object",
|
|
3837
|
+
properties: {},
|
|
3838
|
+
required: []
|
|
3839
|
+
},
|
|
3840
|
+
execute: async (_args, _context) => {
|
|
3841
|
+
const directories = search.getWatchedDirectories();
|
|
3842
|
+
return JSON.stringify({
|
|
3843
|
+
success: true,
|
|
3844
|
+
directories,
|
|
3845
|
+
count: directories.length
|
|
3846
|
+
});
|
|
3847
|
+
}
|
|
3848
|
+
};
|
|
3849
|
+
}
|
|
3850
|
+
function createCleanupIndexTool(search) {
|
|
3851
|
+
return {
|
|
3852
|
+
name: "cleanup_document_index",
|
|
3853
|
+
description: `\u6E05\u7406\u65E0\u6548\u7684\u7D22\u5F15\uFF08\u6587\u4EF6\u5DF2\u5220\u9664\u4F46\u7D22\u5F15\u8FD8\u5728\uFF09\u3002
|
|
3854
|
+
\u4F1A\u68C0\u67E5\u6240\u6709\u5DF2\u7D22\u5F15\u7684\u6587\u4EF6\u662F\u5426\u8FD8\u5B58\u5728\uFF1A
|
|
3855
|
+
- \u5982\u679C\u6587\u4EF6\u4E0D\u5B58\u5728\uFF0C\u5220\u9664\u7D22\u5F15
|
|
3856
|
+
- \u5982\u679C\u6587\u4EF6\u5DF2\u4FEE\u6539\uFF0C\u91CD\u65B0\u7D22\u5F15
|
|
3857
|
+
|
|
3858
|
+
\u7528\u4E8E\u7EF4\u62A4\u7D22\u5F15\u7684\u4E00\u81F4\u6027\uFF0C\u5EFA\u8BAE\u5B9A\u671F\u6267\u884C\u3002`,
|
|
3859
|
+
parameters: {
|
|
3860
|
+
type: "object",
|
|
3861
|
+
properties: {},
|
|
3862
|
+
required: []
|
|
3863
|
+
},
|
|
3864
|
+
sideEffects: [{ type: "filesystem", success: true }],
|
|
3865
|
+
execute: async (_args, _context) => {
|
|
3866
|
+
try {
|
|
3867
|
+
const result = await search.cleanup();
|
|
3868
|
+
return JSON.stringify({
|
|
3869
|
+
success: true,
|
|
3870
|
+
message: `\u6E05\u7406\u5B8C\u6210`,
|
|
3871
|
+
removedCount: result.removed,
|
|
3872
|
+
updatedCount: result.updated
|
|
3873
|
+
});
|
|
3874
|
+
} catch (error) {
|
|
3875
|
+
return JSON.stringify({
|
|
3876
|
+
success: false,
|
|
3877
|
+
error: error instanceof Error ? error.message : String(error)
|
|
3878
|
+
});
|
|
3879
|
+
}
|
|
3880
|
+
}
|
|
3881
|
+
};
|
|
3882
|
+
}
|
|
3883
|
+
function createExportIndexTool(search) {
|
|
3884
|
+
return {
|
|
3885
|
+
name: "export_document_index",
|
|
3886
|
+
description: `\u5BFC\u51FA\u6587\u6863\u7D22\u5F15\u6570\u636E\u5230\u6307\u5B9A\u8DEF\u5F84\u3002
|
|
3887
|
+
\u5BFC\u51FA\u5185\u5BB9\u5305\u62EC\uFF1A
|
|
3888
|
+
- \u5143\u6570\u636E\uFF08SQLite \u6570\u636E\u5E93\uFF09
|
|
3889
|
+
- \u5411\u91CF\u6570\u636E\uFF08LanceDB\uFF09
|
|
3890
|
+
- \u5168\u6587\u7D22\u5F15\uFF08FlexSearch\uFF09
|
|
3891
|
+
|
|
3892
|
+
\u5BFC\u51FA\u7684\u6570\u636E\u53EF\u4EE5\u7528\u4E8E\u5907\u4EFD\u3001\u8FC1\u79FB\u6216\u6062\u590D\u7D22\u5F15\u3002`,
|
|
3893
|
+
parameters: {
|
|
3894
|
+
type: "object",
|
|
3895
|
+
properties: {
|
|
3896
|
+
output_path: {
|
|
3897
|
+
type: "string",
|
|
3898
|
+
description: "\u5BFC\u51FA\u8DEF\u5F84\uFF08\u76EE\u5F55\u8DEF\u5F84\uFF0C\u4F1A\u5728\u8BE5\u76EE\u5F55\u4E0B\u521B\u5EFA\u5BFC\u51FA\u6587\u4EF6\u5939\uFF09"
|
|
3899
|
+
}
|
|
3900
|
+
},
|
|
3901
|
+
required: ["output_path"]
|
|
3902
|
+
},
|
|
3903
|
+
sideEffects: [{ type: "filesystem", success: true }],
|
|
3904
|
+
execute: async (args, context) => {
|
|
3905
|
+
let outputPath = args.output_path;
|
|
3906
|
+
if (outputPath.startsWith("~")) {
|
|
3907
|
+
const os3 = await import("os");
|
|
3908
|
+
outputPath = outputPath.replace("~", os3.homedir());
|
|
3909
|
+
}
|
|
3910
|
+
const pathModule = await import("path");
|
|
3911
|
+
if (!pathModule.isAbsolute(outputPath)) {
|
|
3912
|
+
outputPath = pathModule.resolve(context.cwd, outputPath);
|
|
3913
|
+
}
|
|
3914
|
+
try {
|
|
3915
|
+
const exportInfo = await search.exportIndex(outputPath);
|
|
3916
|
+
return JSON.stringify({
|
|
3917
|
+
success: true,
|
|
3918
|
+
message: "\u7D22\u5F15\u5BFC\u51FA\u5B8C\u6210",
|
|
3919
|
+
exportPath: exportInfo.exportPath,
|
|
3920
|
+
timestamp: exportInfo.timestamp.toISOString(),
|
|
3921
|
+
components: exportInfo.components,
|
|
3922
|
+
stats: {
|
|
3923
|
+
totalDocuments: exportInfo.stats.totalDocuments,
|
|
3924
|
+
byType: exportInfo.stats.byType
|
|
3925
|
+
}
|
|
3926
|
+
});
|
|
3927
|
+
} catch (error) {
|
|
3928
|
+
return JSON.stringify({
|
|
3929
|
+
success: false,
|
|
3930
|
+
error: error instanceof Error ? error.message : String(error)
|
|
3931
|
+
});
|
|
3932
|
+
}
|
|
3933
|
+
}
|
|
3934
|
+
};
|
|
3935
|
+
}
|
|
3936
|
+
function createImportIndexTool(search) {
|
|
3937
|
+
return {
|
|
3938
|
+
name: "import_document_index",
|
|
3939
|
+
description: `\u4ECE\u6307\u5B9A\u8DEF\u5F84\u5BFC\u5165\u6587\u6863\u7D22\u5F15\u6570\u636E\u3002
|
|
3940
|
+
\u5BFC\u5165\u524D\u4F1A\u68C0\u67E5\u5BFC\u51FA\u4FE1\u606F\uFF0C\u786E\u4FDD\u6570\u636E\u5B8C\u6574\u6027\u3002
|
|
3941
|
+
\u5BFC\u5165\u540E\u4F1A\u66FF\u6362\u5F53\u524D\u7D22\u5F15\u6570\u636E\uFF0C\u8BF7\u8C28\u614E\u4F7F\u7528\u3002`,
|
|
3942
|
+
parameters: {
|
|
3943
|
+
type: "object",
|
|
3944
|
+
properties: {
|
|
3945
|
+
input_path: {
|
|
3946
|
+
type: "string",
|
|
3947
|
+
description: "\u5BFC\u5165\u8DEF\u5F84\uFF08\u5BFC\u51FA\u65F6\u521B\u5EFA\u7684\u76EE\u5F55\u8DEF\u5F84\uFF09"
|
|
3948
|
+
}
|
|
3949
|
+
},
|
|
3950
|
+
required: ["input_path"]
|
|
3951
|
+
},
|
|
3952
|
+
sideEffects: [{ type: "filesystem", success: true }],
|
|
3953
|
+
execute: async (args, context) => {
|
|
3954
|
+
let inputPath = args.input_path;
|
|
3955
|
+
if (inputPath.startsWith("~")) {
|
|
3956
|
+
const os3 = await import("os");
|
|
3957
|
+
inputPath = inputPath.replace("~", os3.homedir());
|
|
3958
|
+
}
|
|
3959
|
+
const pathModule = await import("path");
|
|
3960
|
+
if (!pathModule.isAbsolute(inputPath)) {
|
|
3961
|
+
inputPath = pathModule.resolve(context.cwd, inputPath);
|
|
3962
|
+
}
|
|
3963
|
+
try {
|
|
3964
|
+
await search.importIndex(inputPath);
|
|
3965
|
+
return JSON.stringify({
|
|
3966
|
+
success: true,
|
|
3967
|
+
message: "\u7D22\u5F15\u5BFC\u5165\u5B8C\u6210",
|
|
3968
|
+
inputPath
|
|
3969
|
+
});
|
|
3970
|
+
} catch (error) {
|
|
3971
|
+
return JSON.stringify({
|
|
3972
|
+
success: false,
|
|
3973
|
+
error: error instanceof Error ? error.message : String(error)
|
|
3974
|
+
});
|
|
3975
|
+
}
|
|
3976
|
+
}
|
|
3977
|
+
};
|
|
3978
|
+
}
|
|
3979
|
+
function createListBackupsTool(search) {
|
|
3980
|
+
return {
|
|
3981
|
+
name: "list_document_index_backups",
|
|
3982
|
+
description: `\u5217\u51FA\u6307\u5B9A\u76EE\u5F55\u4E0B\u7684\u6240\u6709\u7D22\u5F15\u5907\u4EFD\u3002
|
|
3983
|
+
\u7528\u4E8E\u67E5\u770B\u53EF\u7528\u7684\u5907\u4EFD\uFF0C\u4EE5\u4FBF\u9009\u62E9\u6062\u590D\u3002`,
|
|
3984
|
+
parameters: {
|
|
3985
|
+
type: "object",
|
|
3986
|
+
properties: {
|
|
3987
|
+
backup_dir: {
|
|
3988
|
+
type: "string",
|
|
3989
|
+
description: "\u5907\u4EFD\u76EE\u5F55\u8DEF\u5F84"
|
|
3990
|
+
}
|
|
3991
|
+
},
|
|
3992
|
+
required: ["backup_dir"]
|
|
3993
|
+
},
|
|
3994
|
+
execute: async (args, context) => {
|
|
3995
|
+
let backupDir = args.backup_dir;
|
|
3996
|
+
if (backupDir.startsWith("~")) {
|
|
3997
|
+
const os3 = await import("os");
|
|
3998
|
+
backupDir = backupDir.replace("~", os3.homedir());
|
|
3999
|
+
}
|
|
4000
|
+
const pathModule = await import("path");
|
|
4001
|
+
if (!pathModule.isAbsolute(backupDir)) {
|
|
4002
|
+
backupDir = pathModule.resolve(context.cwd, backupDir);
|
|
4003
|
+
}
|
|
4004
|
+
try {
|
|
4005
|
+
const backups = await search.listBackups(backupDir);
|
|
4006
|
+
return JSON.stringify({
|
|
4007
|
+
success: true,
|
|
4008
|
+
backups: backups.map((b) => ({
|
|
4009
|
+
path: b.path,
|
|
4010
|
+
timestamp: b.timestamp.toISOString(),
|
|
4011
|
+
size: b.size,
|
|
4012
|
+
sizeFormatted: formatBytes(b.size)
|
|
4013
|
+
})),
|
|
4014
|
+
count: backups.length
|
|
4015
|
+
});
|
|
4016
|
+
} catch (error) {
|
|
4017
|
+
return JSON.stringify({
|
|
4018
|
+
success: false,
|
|
4019
|
+
error: error instanceof Error ? error.message : String(error)
|
|
4020
|
+
});
|
|
4021
|
+
}
|
|
4022
|
+
}
|
|
4023
|
+
};
|
|
4024
|
+
}
|
|
4025
|
+
function formatBytes(bytes) {
|
|
4026
|
+
if (bytes === 0) return "0 B";
|
|
4027
|
+
const k = 1024;
|
|
4028
|
+
const sizes = ["B", "KB", "MB", "GB"];
|
|
4029
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
4030
|
+
return `${(bytes / Math.pow(k, i)).toFixed(2)} ${sizes[i]}`;
|
|
4031
|
+
}
|
|
4032
|
+
function createOptimizeIndexTool(search) {
|
|
4033
|
+
return {
|
|
4034
|
+
name: "optimize_document_index",
|
|
4035
|
+
description: `\u4F18\u5316\u6587\u6863\u7D22\u5F15\uFF0C\u5305\u62EC\uFF1A
|
|
4036
|
+
- \u6E05\u7406\u65E0\u6548\u7D22\u5F15\uFF08\u6587\u4EF6\u5DF2\u5220\u9664\uFF09
|
|
4037
|
+
- \u66F4\u65B0\u8FC7\u671F\u7D22\u5F15\uFF08\u6587\u4EF6\u5DF2\u4FEE\u6539\uFF09
|
|
4038
|
+
- \u538B\u7F29\u7D22\u5F15\u6570\u636E
|
|
4039
|
+
- \u788E\u7247\u6574\u7406
|
|
4040
|
+
|
|
4041
|
+
\u5EFA\u8BAE\u5B9A\u671F\u6267\u884C\u4EE5\u4FDD\u6301\u7D22\u5F15\u6027\u80FD\u548C\u4E00\u81F4\u6027\u3002`,
|
|
4042
|
+
parameters: {
|
|
4043
|
+
type: "object",
|
|
4044
|
+
properties: {},
|
|
4045
|
+
required: []
|
|
4046
|
+
},
|
|
4047
|
+
sideEffects: [{ type: "filesystem", success: true }],
|
|
4048
|
+
execute: async (_args, _context) => {
|
|
4049
|
+
try {
|
|
4050
|
+
await search.optimize();
|
|
4051
|
+
return JSON.stringify({
|
|
4052
|
+
success: true,
|
|
4053
|
+
message: "\u7D22\u5F15\u4F18\u5316\u5B8C\u6210"
|
|
4054
|
+
});
|
|
4055
|
+
} catch (error) {
|
|
4056
|
+
return JSON.stringify({
|
|
4057
|
+
success: false,
|
|
4058
|
+
error: error instanceof Error ? error.message : String(error)
|
|
4059
|
+
});
|
|
4060
|
+
}
|
|
4061
|
+
}
|
|
4062
|
+
};
|
|
4063
|
+
}
|
|
4064
|
+
function createHealthCheckTool(search) {
|
|
4065
|
+
return {
|
|
4066
|
+
name: "check_document_index_health",
|
|
4067
|
+
description: `\u68C0\u67E5\u6587\u6863\u7D22\u5F15\u7684\u5065\u5EB7\u72B6\u6001\u3002
|
|
4068
|
+
\u5305\u62EC\uFF1A
|
|
4069
|
+
- \u7D22\u5F15\u5B8C\u6574\u6027\u68C0\u67E5\uFF08\u5143\u6570\u636E\u3001\u5411\u91CF\u3001\u5168\u6587\u7D22\u5F15\uFF09
|
|
4070
|
+
- \u65E0\u6548\u7D22\u5F15\u7EDF\u8BA1\uFF08\u6587\u4EF6\u5DF2\u5220\u9664\uFF09
|
|
4071
|
+
- \u8FC7\u671F\u7D22\u5F15\u7EDF\u8BA1\uFF08\u6587\u4EF6\u5DF2\u4FEE\u6539\uFF09
|
|
4072
|
+
- \u9519\u8BEF\u7EDF\u8BA1
|
|
4073
|
+
|
|
4074
|
+
\u7528\u4E8E\u8BCA\u65AD\u7D22\u5F15\u95EE\u9898\uFF0C\u51B3\u5B9A\u662F\u5426\u9700\u8981\u4F18\u5316\u6216\u4FEE\u590D\u3002`,
|
|
4075
|
+
parameters: {
|
|
4076
|
+
type: "object",
|
|
4077
|
+
properties: {},
|
|
4078
|
+
required: []
|
|
4079
|
+
},
|
|
4080
|
+
execute: async (_args, _context) => {
|
|
4081
|
+
try {
|
|
4082
|
+
const health = await search.healthCheck();
|
|
4083
|
+
return JSON.stringify({
|
|
4084
|
+
success: true,
|
|
4085
|
+
healthy: health.healthy,
|
|
4086
|
+
totalDocuments: health.totalDocuments,
|
|
4087
|
+
invalidIndexes: health.invalidIndexes,
|
|
4088
|
+
staleIndexes: health.staleIndexes,
|
|
4089
|
+
errorCount: health.errorCount,
|
|
4090
|
+
integrity: health.integrity,
|
|
4091
|
+
message: health.healthy ? "\u7D22\u5F15\u5065\u5EB7" : "\u7D22\u5F15\u5B58\u5728\u95EE\u9898\uFF0C\u5EFA\u8BAE\u6267\u884C optimize_document_index \u6216 cleanup_document_index"
|
|
4092
|
+
});
|
|
4093
|
+
} catch (error) {
|
|
4094
|
+
return JSON.stringify({
|
|
4095
|
+
success: false,
|
|
4096
|
+
error: error instanceof Error ? error.message : String(error)
|
|
4097
|
+
});
|
|
4098
|
+
}
|
|
4099
|
+
}
|
|
4100
|
+
};
|
|
4101
|
+
}
|
|
4102
|
+
function createGetIndexErrorsTool(search) {
|
|
4103
|
+
return {
|
|
4104
|
+
name: "get_document_index_errors",
|
|
4105
|
+
description: `\u83B7\u53D6\u7D22\u5F15\u8FC7\u7A0B\u4E2D\u53D1\u751F\u7684\u9519\u8BEF\u5217\u8868\u3002
|
|
4106
|
+
\u5305\u62EC\u6587\u4EF6\u8DEF\u5F84\u3001\u9519\u8BEF\u4FE1\u606F\u3001\u91CD\u8BD5\u6B21\u6570\u548C\u65F6\u95F4\u6233\u3002
|
|
4107
|
+
\u7528\u4E8E\u6392\u67E5\u7D22\u5F15\u95EE\u9898\uFF0C\u4E86\u89E3\u54EA\u4E9B\u6587\u4EF6\u7D22\u5F15\u5931\u8D25\u3002`,
|
|
4108
|
+
parameters: {
|
|
4109
|
+
type: "object",
|
|
4110
|
+
properties: {},
|
|
4111
|
+
required: []
|
|
4112
|
+
},
|
|
4113
|
+
execute: async (_args, _context) => {
|
|
4114
|
+
try {
|
|
4115
|
+
const errors = search.getIndexErrors();
|
|
4116
|
+
return JSON.stringify({
|
|
4117
|
+
success: true,
|
|
4118
|
+
errors: errors.map((e) => ({
|
|
4119
|
+
filePath: e.filePath,
|
|
4120
|
+
error: e.error,
|
|
4121
|
+
retryCount: e.retryCount,
|
|
4122
|
+
timestamp: e.timestamp.toISOString()
|
|
4123
|
+
})),
|
|
4124
|
+
count: errors.length
|
|
4125
|
+
});
|
|
4126
|
+
} catch (error) {
|
|
4127
|
+
return JSON.stringify({
|
|
4128
|
+
success: false,
|
|
4129
|
+
error: error instanceof Error ? error.message : String(error)
|
|
4130
|
+
});
|
|
4131
|
+
}
|
|
4132
|
+
}
|
|
4133
|
+
};
|
|
4134
|
+
}
|
|
4135
|
+
function createRetryFailedIndexesTool(search) {
|
|
4136
|
+
return {
|
|
4137
|
+
name: "retry_failed_document_indexes",
|
|
4138
|
+
description: `\u91CD\u8BD5\u4E4B\u524D\u7D22\u5F15\u5931\u8D25\u7684\u6587\u4EF6\u3002
|
|
4139
|
+
\u4F1A\u6E05\u9664\u9519\u8BEF\u8BB0\u5F55\uFF0C\u7136\u540E\u91CD\u65B0\u5C1D\u8BD5\u7D22\u5F15\u6240\u6709\u5931\u8D25\u7684\u6587\u4EF6\u3002
|
|
4140
|
+
\u7528\u4E8E\u4FEE\u590D\u7D22\u5F15\u9519\u8BEF\uFF0C\u6062\u590D\u5B8C\u6574\u7684\u7D22\u5F15\u8986\u76D6\u3002`,
|
|
4141
|
+
parameters: {
|
|
4142
|
+
type: "object",
|
|
4143
|
+
properties: {},
|
|
4144
|
+
required: []
|
|
4145
|
+
},
|
|
4146
|
+
sideEffects: [{ type: "filesystem", success: true }],
|
|
4147
|
+
execute: async (_args, _context) => {
|
|
4148
|
+
try {
|
|
4149
|
+
const result = await search.retryFailedIndexes();
|
|
4150
|
+
return JSON.stringify({
|
|
4151
|
+
success: true,
|
|
4152
|
+
message: "\u91CD\u8BD5\u5B8C\u6210",
|
|
4153
|
+
successCount: result.success,
|
|
4154
|
+
failedCount: result.failed,
|
|
4155
|
+
errors: result.errors
|
|
4156
|
+
});
|
|
4157
|
+
} catch (error) {
|
|
4158
|
+
return JSON.stringify({
|
|
4159
|
+
success: false,
|
|
4160
|
+
error: error instanceof Error ? error.message : String(error)
|
|
4161
|
+
});
|
|
4162
|
+
}
|
|
4163
|
+
}
|
|
4164
|
+
};
|
|
4165
|
+
}
|
|
4166
|
+
function createGetWorkspaceTool(workspaceState) {
|
|
4167
|
+
return {
|
|
4168
|
+
name: "get_search_workspace",
|
|
4169
|
+
description: `\u83B7\u53D6\u5F53\u524D\u6587\u6863\u641C\u7D22\u7684\u5DE5\u4F5C\u7A7A\u95F4\u72B6\u6001\u3002
|
|
4170
|
+
\u8FD4\u56DE\u5F53\u524D\u5DE5\u4F5C\u7A7A\u95F4\u76EE\u5F55\u3001\u662F\u5426\u5DF2\u7D22\u5F15\u3001\u7D22\u5F15\u6587\u4EF6\u6570\u91CF\u7B49\u4FE1\u606F\u3002`,
|
|
4171
|
+
parameters: {
|
|
4172
|
+
type: "object",
|
|
4173
|
+
properties: {},
|
|
4174
|
+
required: []
|
|
4175
|
+
},
|
|
4176
|
+
execute: async (_args, _context) => {
|
|
4177
|
+
if (!workspaceState.directory) {
|
|
4178
|
+
return JSON.stringify({
|
|
4179
|
+
hasWorkspace: false,
|
|
4180
|
+
message: "\u5C1A\u672A\u8BBE\u7F6E\u5DE5\u4F5C\u7A7A\u95F4\uFF0C\u4F7F\u7528 set_search_workspace \u8BBE\u7F6E"
|
|
4181
|
+
});
|
|
4182
|
+
}
|
|
4183
|
+
return JSON.stringify({
|
|
4184
|
+
hasWorkspace: true,
|
|
4185
|
+
directory: workspaceState.directory,
|
|
4186
|
+
indexed: workspaceState.indexed,
|
|
4187
|
+
filesIndexed: workspaceState.filesIndexed
|
|
4188
|
+
});
|
|
4189
|
+
}
|
|
4190
|
+
};
|
|
4191
|
+
}
|
|
4192
|
+
|
|
4193
|
+
export {
|
|
4194
|
+
FileType,
|
|
4195
|
+
DEFAULT_CONFIG,
|
|
4196
|
+
VectorStore,
|
|
4197
|
+
FullTextIndex,
|
|
4198
|
+
MetaStore,
|
|
4199
|
+
parseDocument,
|
|
4200
|
+
isSupportedDocument,
|
|
4201
|
+
getDocumentType,
|
|
4202
|
+
DEFAULT_INSTRUCTIONS,
|
|
4203
|
+
initEmbedder,
|
|
4204
|
+
embed,
|
|
4205
|
+
embedDocument,
|
|
4206
|
+
embedQuery,
|
|
4207
|
+
embedBatch,
|
|
4208
|
+
embedBatchConcurrent,
|
|
4209
|
+
embedImage,
|
|
4210
|
+
embedVideo,
|
|
4211
|
+
embedMultimodal,
|
|
4212
|
+
getEmbeddingDimension,
|
|
4213
|
+
setEmbeddingDimension,
|
|
4214
|
+
disposeEmbedder,
|
|
4215
|
+
isEmbedderInitialized,
|
|
4216
|
+
DEFAULT_EXTENSION_RULES,
|
|
4217
|
+
DEFAULT_DIRECTORY_RULES,
|
|
4218
|
+
DEFAULT_FILE_RULES,
|
|
4219
|
+
DEFAULT_PATH_RULES,
|
|
4220
|
+
DEFAULT_SCAN_RULES,
|
|
4221
|
+
ScanRulesManager,
|
|
4222
|
+
createRulesManager,
|
|
4223
|
+
scanDirectories,
|
|
4224
|
+
getDefaultDirectories,
|
|
4225
|
+
formatSize,
|
|
4226
|
+
formatDate,
|
|
4227
|
+
getFileType,
|
|
4228
|
+
extractSnippet,
|
|
4229
|
+
addGlobalProgressListener,
|
|
4230
|
+
removeGlobalProgressListener,
|
|
4231
|
+
splitText,
|
|
4232
|
+
getChunkStats,
|
|
4233
|
+
IndexingPipeline,
|
|
4234
|
+
createIndexingPipeline,
|
|
4235
|
+
DocumentSearch,
|
|
4236
|
+
searchPlugin,
|
|
4237
|
+
getSearchPlugin
|
|
4238
|
+
};
|
|
4239
|
+
//# sourceMappingURL=chunk-YJIIX54F.js.map
|