deepfish-ai 1.0.21 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/package.json +1 -1
  2. package/src/AgentRobot/AgentRobotFactory/MainAgentRobot.js +4 -7
  3. package/src/AgentRobot/AgentRobotFactory/SubAgentRobot.js +3 -8
  4. package/src/AgentRobot/AgentRobotFactory/SubSkillAgentRobot.js +3 -8
  5. package/src/AgentRobot/BaseAgentRobot/Brain.js +8 -8
  6. package/src/AgentRobot/BaseAgentRobot/Hand.js +3 -3
  7. package/src/AgentRobot/BaseAgentRobot/index.js +25 -95
  8. package/src/AgentRobot/BaseAgentRobot/lazy-tools/doc-transform.js +204 -0
  9. package/src/AgentRobot/BaseAgentRobot/lazy-tools/docx.js +552 -1
  10. package/src/AgentRobot/BaseAgentRobot/lazy-tools/embedding.js +763 -0
  11. package/src/AgentRobot/BaseAgentRobot/lazy-tools/img.js +1 -0
  12. package/src/AgentRobot/BaseAgentRobot/lazy-tools/pdf.js +1 -0
  13. package/src/AgentRobot/BaseAgentRobot/lazy-tools/pptx.js +1 -0
  14. package/src/AgentRobot/BaseAgentRobot/lazy-tools/xlsx.js +1 -0
  15. package/src/AgentRobot/BaseAgentRobot/tools/BaseTools.js +1 -0
  16. package/src/AgentRobot/BaseAgentRobot/tools/CreateAgentTools.js +3 -2
  17. package/src/AgentRobot/BaseAgentRobot/tools/FileTools.js +1 -0
  18. package/src/AgentRobot/BaseAgentRobot/tools/GenerateTools.js +4 -2
  19. package/src/AgentRobot/BaseAgentRobot/tools/InquirerTools.js +1 -0
  20. package/src/AgentRobot/BaseAgentRobot/tools/SystemTools.js +3 -4
  21. package/src/AgentRobot/BaseAgentRobot/tools/TaskTools.js +1 -0
  22. package/src/AgentRobot/BaseAgentRobot/tools/TestTools.js +1 -0
  23. package/src/AgentRobot/BaseAgentRobot/tools/UserTool.js +87 -0
  24. package/src/AgentRobot/BaseAgentRobot/tools/WebTools.js +257 -0
  25. package/src/AgentRobot/BaseAgentRobot/utils/AIRequest.js +9 -2
  26. package/src/AgentRobot/BaseAgentRobot/utils/AIToolManager.js +128 -0
  27. package/src/AgentRobot/BaseAgentRobot/utils/AttachmentToolScanner.js +4 -5
  28. package/src/cli/DefaultConfig.js +3 -0
@@ -0,0 +1,763 @@
1
+ const path = require('path')
2
+ const os = require('os')
3
+ const fs = require('fs-extra')
4
+ const crypto = require('crypto')
5
+ const mammoth = require('mammoth')
6
+ const pdfParse = require('pdf-parse')
7
+ const XLSX = require('xlsx')
8
+ const aiInquirer = require('../utils/aiInquirer')
9
+
10
+ function ok(data = null) {
11
+ return { success: true, data }
12
+ }
13
+
14
+ function fail(error, data = null) {
15
+ return { success: false, error: error?.message || String(error), data }
16
+ }
17
+
18
+ // 获取配置地址
19
+ function _getConfigFilePath() {
20
+ const configDir = path.join(os.homedir(), './.deepfish-ai')
21
+ const configPath = path.join(configDir, './config.js')
22
+ return configPath
23
+ }
24
+
25
+ function _loadConfig(configPath) {
26
+ fs.ensureDirSync(path.dirname(configPath))
27
+ if (!fs.existsSync(configPath)) {
28
+ fs.writeFileSync(configPath, 'module.exports = {}')
29
+ }
30
+ const resolved = require.resolve(configPath)
31
+ delete require.cache[resolved]
32
+ return require(configPath)
33
+ }
34
+
35
+ function _getKbRootPath(knowledgeBasePath = '') {
36
+ return path.resolve(process.cwd(), knowledgeBasePath || '.deepfish-rag')
37
+ }
38
+
39
+ function _getKbIndexPath(kbRootPath) {
40
+ return path.join(kbRootPath, 'index.json')
41
+ }
42
+
43
+ function _sha256(content) {
44
+ return crypto.createHash('sha256').update(content).digest('hex')
45
+ }
46
+
47
+ function _isSupportedFile(filePath) {
48
+ const ext = path.extname(filePath).toLowerCase()
49
+ const supportedExts = new Set([
50
+ '.md', '.txt', '.json', '.js', '.mjs', '.cjs', '.ts', '.tsx', '.jsx', '.html', '.htm', '.css', '.scss', '.less', '.xml', '.yaml', '.yml', '.csv', '.log', '.sql', '.py', '.java', '.go', '.rs', '.cpp', '.c', '.h', '.docx', '.pdf', '.xlsx', '.xls',
51
+ ])
52
+ return supportedExts.has(ext)
53
+ }
54
+
55
+ function _collectSourceFiles(sourcePath) {
56
+ const stat = fs.statSync(sourcePath)
57
+ if (stat.isFile()) {
58
+ return [sourcePath]
59
+ }
60
+ const files = []
61
+ const walk = (current) => {
62
+ const children = fs.readdirSync(current)
63
+ for (const child of children) {
64
+ const fullPath = path.join(current, child)
65
+ const childStat = fs.statSync(fullPath)
66
+ if (childStat.isDirectory()) {
67
+ walk(fullPath)
68
+ } else if (childStat.isFile()) {
69
+ files.push(fullPath)
70
+ }
71
+ }
72
+ }
73
+ walk(sourcePath)
74
+ return files
75
+ }
76
+
77
+ async function _readDocumentContent(filePath) {
78
+ const ext = path.extname(filePath).toLowerCase()
79
+ if (ext === '.docx') {
80
+ const result = await mammoth.extractRawText({ path: filePath })
81
+ return result.value || ''
82
+ }
83
+ if (ext === '.pdf') {
84
+ const buffer = fs.readFileSync(filePath)
85
+ const result = await pdfParse(buffer)
86
+ return result.text || ''
87
+ }
88
+ if (ext === '.xlsx' || ext === '.xls') {
89
+ const workbook = XLSX.readFile(filePath)
90
+ return workbook.SheetNames.map((sheetName) => {
91
+ const rows = XLSX.utils.sheet_to_json(workbook.Sheets[sheetName], { header: 1 })
92
+ return [`# ${sheetName}`, ...rows.map((row) => row.join(' | '))].join('\n')
93
+ }).join('\n\n')
94
+ }
95
+ return fs.readFileSync(filePath, 'utf8')
96
+ }
97
+
98
+ function _getEmptyKnowledgeBase(kbRootPath) {
99
+ const now = new Date().toISOString()
100
+ return {
101
+ version: 1,
102
+ name: 'deepfish-rag',
103
+ kbRootPath,
104
+ createdAt: now,
105
+ updatedAt: now,
106
+ sourceHistory: [],
107
+ documents: [],
108
+ }
109
+ }
110
+
111
+ function _loadKnowledgeBase(kbRootPath) {
112
+ const indexPath = _getKbIndexPath(kbRootPath)
113
+ fs.ensureDirSync(kbRootPath)
114
+ if (!fs.existsSync(indexPath)) {
115
+ const emptyKb = _getEmptyKnowledgeBase(kbRootPath)
116
+ fs.writeFileSync(indexPath, JSON.stringify(emptyKb, null, 2), 'utf8')
117
+ return emptyKb
118
+ }
119
+ const content = fs.readFileSync(indexPath, 'utf8')
120
+ const parsed = JSON.parse(content)
121
+ return {
122
+ ..._getEmptyKnowledgeBase(kbRootPath),
123
+ ...parsed,
124
+ kbRootPath,
125
+ }
126
+ }
127
+
128
+ function _saveKnowledgeBase(kbRootPath, knowledgeBase) {
129
+ const indexPath = _getKbIndexPath(kbRootPath)
130
+ knowledgeBase.updatedAt = new Date().toISOString()
131
+ fs.writeFileSync(indexPath, JSON.stringify(knowledgeBase, null, 2), 'utf8')
132
+ }
133
+
134
+ function _chunkText(content = '', chunkSize = 800, overlap = 120) {
135
+ const text = String(content || '')
136
+ const size = Math.max(200, Number(chunkSize) || 800)
137
+ const overlapSize = Math.max(0, Math.min(size - 1, Number(overlap) || 120))
138
+ const step = Math.max(1, size - overlapSize)
139
+ const chunks = []
140
+ for (let i = 0; i < text.length; i += step) {
141
+ const chunk = text.slice(i, i + size)
142
+ if (!chunk.trim()) continue
143
+ chunks.push({
144
+ offsetStart: i,
145
+ offsetEnd: i + chunk.length,
146
+ content: chunk,
147
+ })
148
+ if (i + size >= text.length) {
149
+ break
150
+ }
151
+ }
152
+ return chunks
153
+ }
154
+
155
+ function _calcKeywordScore(content = '', keyword = '') {
156
+ if (!keyword) return 0
157
+ const src = String(content || '').toLowerCase()
158
+ const kw = String(keyword || '').toLowerCase()
159
+ let count = 0
160
+ let fromIndex = 0
161
+ while (true) {
162
+ const idx = src.indexOf(kw, fromIndex)
163
+ if (idx < 0) break
164
+ count += 1
165
+ fromIndex = idx + kw.length
166
+ }
167
+ return count
168
+ }
169
+
170
+ // 获取向量化配置
171
+ async function getEmbeddingConfig() {
172
+ const configPath = _getConfigFilePath()
173
+ const config = _loadConfig(configPath)
174
+ if (!config.EMBEDDING_API) {
175
+ // 提示用户输入
176
+ const res = await aiInquirer.askInput('请输入向量化接口地址', '', {})
177
+ if (res) {
178
+ config.EMBEDDING_API = res
179
+ setEmbeddingConfig(config.EMBEDDING_API, config.EMBEDDING_API_KEY)
180
+ }
181
+ }
182
+ if (!config.EMBEDDING_API_KEY) {
183
+ // 提示用户输入
184
+ const res = await aiInquirer.askInput('请输入向量化接口密钥', '', {})
185
+ if (res) {
186
+ config.EMBEDDING_API_KEY = res
187
+ setEmbeddingConfig(config.EMBEDDING_API, config.EMBEDDING_API_KEY)
188
+ }
189
+ }
190
+ return {
191
+ EMBEDDING_API: config.EMBEDDING_API || '',
192
+ EMBEDDING_API_KEY: config.EMBEDDING_API_KEY || '',
193
+ }
194
+ }
195
+
196
+ // 写入向量化配置
197
+ function setEmbeddingConfig(embeddingApi, embeddingApiKey) {
198
+ const configPath = _getConfigFilePath()
199
+ const config = _loadConfig(configPath)
200
+ const newConfig = {
201
+ ...config,
202
+ EMBEDDING_API: embeddingApi,
203
+ EMBEDDING_API_KEY: embeddingApiKey,
204
+ }
205
+ fs.writeFileSync(configPath, `module.exports = ${JSON.stringify(newConfig, null, 2)}`)
206
+ return ok({
207
+ configPath,
208
+ EMBEDDING_API: embeddingApi || '',
209
+ EMBEDDING_API_KEY: embeddingApiKey || '',
210
+ })
211
+ }
212
+
213
+ // 创建/续加知识库,默认路径为命令执行目录下 .deepfish-rag
214
+ async function buildKnowledgeBase(sourcePath = '', knowledgeBasePath = '') {
215
+ try {
216
+ const inputSourcePath = sourcePath || (await aiInquirer.askInput('请输入源文件目录或文件路径', '', {}))
217
+ if (!inputSourcePath) {
218
+ return fail('未提供源文件目录或文件路径')
219
+ }
220
+
221
+ const resolvedSourcePath = path.resolve(process.cwd(), inputSourcePath)
222
+ if (!fs.existsSync(resolvedSourcePath)) {
223
+ return fail(`Source path does not exist: ${resolvedSourcePath}`, {
224
+ sourcePath: resolvedSourcePath,
225
+ })
226
+ }
227
+
228
+ const kbRootPath = _getKbRootPath(knowledgeBasePath)
229
+ const knowledgeBase = _loadKnowledgeBase(kbRootPath)
230
+ const sourceFiles = _collectSourceFiles(resolvedSourcePath)
231
+
232
+ const supportedFiles = sourceFiles.filter((filePath) => _isSupportedFile(filePath))
233
+ let addedCount = 0
234
+ let updatedCount = 0
235
+ let skippedCount = 0
236
+
237
+ for (const filePath of supportedFiles) {
238
+ try {
239
+ const content = await _readDocumentContent(filePath)
240
+ if (!content || !content.trim()) {
241
+ skippedCount += 1
242
+ continue
243
+ }
244
+
245
+ const sourceHash = _sha256(content)
246
+ const existingIndex = knowledgeBase.documents.findIndex((item) => item.sourcePath === filePath)
247
+ if (existingIndex >= 0) {
248
+ if (knowledgeBase.documents[existingIndex].sourceHash === sourceHash) {
249
+ skippedCount += 1
250
+ continue
251
+ }
252
+ knowledgeBase.documents[existingIndex] = {
253
+ ...knowledgeBase.documents[existingIndex],
254
+ sourceHash,
255
+ size: Buffer.byteLength(content, 'utf8'),
256
+ content,
257
+ updatedAt: new Date().toISOString(),
258
+ }
259
+ updatedCount += 1
260
+ continue
261
+ }
262
+
263
+ knowledgeBase.documents.push({
264
+ id: _sha256(filePath).slice(0, 16),
265
+ sourcePath: filePath,
266
+ sourceHash,
267
+ size: Buffer.byteLength(content, 'utf8'),
268
+ content,
269
+ createdAt: new Date().toISOString(),
270
+ updatedAt: new Date().toISOString(),
271
+ })
272
+ addedCount += 1
273
+ } catch {
274
+ skippedCount += 1
275
+ }
276
+ }
277
+
278
+ knowledgeBase.sourceHistory.push({
279
+ sourcePath: resolvedSourcePath,
280
+ loadedAt: new Date().toISOString(),
281
+ scannedFiles: sourceFiles.length,
282
+ supportedFiles: supportedFiles.length,
283
+ addedCount,
284
+ updatedCount,
285
+ skippedCount,
286
+ })
287
+
288
+ _saveKnowledgeBase(kbRootPath, knowledgeBase)
289
+ return ok({
290
+ knowledgeBasePath: kbRootPath,
291
+ sourcePath: resolvedSourcePath,
292
+ scannedFiles: sourceFiles.length,
293
+ supportedFiles: supportedFiles.length,
294
+ addedCount,
295
+ updatedCount,
296
+ skippedCount,
297
+ totalDocuments: knowledgeBase.documents.length,
298
+ })
299
+ } catch (error) {
300
+ return fail(error, { sourcePath, knowledgeBasePath })
301
+ }
302
+ }
303
+
304
+ // 读取知识库文档摘要,支持关键词检索
305
+ function readKnowledgeBase(keyword = '', knowledgeBasePath = '', limit = 10) {
306
+ try {
307
+ const kbRootPath = _getKbRootPath(knowledgeBasePath)
308
+ const knowledgeBase = _loadKnowledgeBase(kbRootPath)
309
+ const normalizedKeyword = (keyword || '').trim().toLowerCase()
310
+ const maxResult = Number(limit) > 0 ? Number(limit) : 10
311
+
312
+ const filtered = knowledgeBase.documents.filter((item) => {
313
+ if (!normalizedKeyword) return true
314
+ return (
315
+ item.sourcePath.toLowerCase().includes(normalizedKeyword) ||
316
+ item.content.toLowerCase().includes(normalizedKeyword)
317
+ )
318
+ })
319
+
320
+ const result = filtered.slice(0, maxResult).map((item) => ({
321
+ id: item.id,
322
+ sourcePath: item.sourcePath,
323
+ size: item.size,
324
+ updatedAt: item.updatedAt,
325
+ preview: (item.content || '').slice(0, 240),
326
+ }))
327
+
328
+ return ok({
329
+ knowledgeBasePath: kbRootPath,
330
+ totalDocuments: knowledgeBase.documents.length,
331
+ matchedDocuments: filtered.length,
332
+ items: result,
333
+ })
334
+ } catch (error) {
335
+ return fail(error, { keyword, knowledgeBasePath, limit })
336
+ }
337
+ }
338
+
339
+ // 按文档ID读取完整内容
340
+ function readKnowledgeBaseDocument(documentId, knowledgeBasePath = '') {
341
+ try {
342
+ if (!documentId) {
343
+ return fail('documentId is required')
344
+ }
345
+ const kbRootPath = _getKbRootPath(knowledgeBasePath)
346
+ const knowledgeBase = _loadKnowledgeBase(kbRootPath)
347
+ const doc = knowledgeBase.documents.find((item) => item.id === documentId)
348
+ if (!doc) {
349
+ return fail(`Document not found: ${documentId}`, {
350
+ documentId,
351
+ knowledgeBasePath: kbRootPath,
352
+ })
353
+ }
354
+ return ok({
355
+ id: doc.id,
356
+ sourcePath: doc.sourcePath,
357
+ size: doc.size,
358
+ createdAt: doc.createdAt,
359
+ updatedAt: doc.updatedAt,
360
+ content: doc.content,
361
+ })
362
+ } catch (error) {
363
+ return fail(error, { documentId, knowledgeBasePath })
364
+ }
365
+ }
366
+
367
+ // 按分块检索知识库内容,适用于后续RAG召回
368
+ function searchKnowledgeBaseChunks(keyword = '', knowledgeBasePath = '', chunkSize = 800, overlap = 120, limit = 10) {
369
+ try {
370
+ const normalizedKeyword = (keyword || '').trim()
371
+ if (!normalizedKeyword) {
372
+ return fail('keyword is required')
373
+ }
374
+
375
+ const kbRootPath = _getKbRootPath(knowledgeBasePath)
376
+ const knowledgeBase = _loadKnowledgeBase(kbRootPath)
377
+ const maxResult = Number(limit) > 0 ? Number(limit) : 10
378
+ const allChunks = []
379
+
380
+ for (const doc of knowledgeBase.documents) {
381
+ const chunks = _chunkText(doc.content || '', chunkSize, overlap)
382
+ chunks.forEach((chunk, index) => {
383
+ const score = _calcKeywordScore(chunk.content, normalizedKeyword)
384
+ if (score > 0) {
385
+ allChunks.push({
386
+ documentId: doc.id,
387
+ sourcePath: doc.sourcePath,
388
+ chunkIndex: index,
389
+ offsetStart: chunk.offsetStart,
390
+ offsetEnd: chunk.offsetEnd,
391
+ score,
392
+ content: chunk.content,
393
+ })
394
+ }
395
+ })
396
+ }
397
+
398
+ const items = allChunks
399
+ .sort((a, b) => b.score - a.score || a.sourcePath.localeCompare(b.sourcePath) || a.chunkIndex - b.chunkIndex)
400
+ .slice(0, maxResult)
401
+
402
+ return ok({
403
+ knowledgeBasePath: kbRootPath,
404
+ keyword: normalizedKeyword,
405
+ totalMatchedChunks: allChunks.length,
406
+ items,
407
+ })
408
+ } catch (error) {
409
+ return fail(error, { keyword, knowledgeBasePath, chunkSize, overlap, limit })
410
+ }
411
+ }
412
+
413
+ // 读取知识库统计信息
414
+ function getKnowledgeBaseInfo(knowledgeBasePath = '') {
415
+ try {
416
+ const kbRootPath = _getKbRootPath(knowledgeBasePath)
417
+ const knowledgeBase = _loadKnowledgeBase(kbRootPath)
418
+ return ok({
419
+ knowledgeBasePath: kbRootPath,
420
+ version: knowledgeBase.version,
421
+ name: knowledgeBase.name,
422
+ createdAt: knowledgeBase.createdAt,
423
+ updatedAt: knowledgeBase.updatedAt,
424
+ totalDocuments: knowledgeBase.documents.length,
425
+ sourceHistory: knowledgeBase.sourceHistory,
426
+ })
427
+ } catch (error) {
428
+ return fail(error, { knowledgeBasePath })
429
+ }
430
+ }
431
+
432
+ // 删除知识库目录
433
+ function deleteKnowledgeBase(knowledgeBasePath = '') {
434
+ try {
435
+ const kbRootPath = _getKbRootPath(knowledgeBasePath)
436
+ if (!fs.existsSync(kbRootPath)) {
437
+ return ok({
438
+ knowledgeBasePath: kbRootPath,
439
+ deleted: false,
440
+ message: 'knowledge base path not found',
441
+ })
442
+ }
443
+ fs.removeSync(kbRootPath)
444
+ return ok({
445
+ knowledgeBasePath: kbRootPath,
446
+ deleted: true,
447
+ })
448
+ } catch (error) {
449
+ return fail(error, { knowledgeBasePath })
450
+ }
451
+ }
452
+
453
+ // 先删除再重建知识库
454
+ async function rebuildKnowledgeBase(sourcePath = '', knowledgeBasePath = '') {
455
+ try {
456
+ const deleteResult = deleteKnowledgeBase(knowledgeBasePath)
457
+ if (!deleteResult.success) {
458
+ return deleteResult
459
+ }
460
+ return await buildKnowledgeBase(sourcePath, knowledgeBasePath)
461
+ } catch (error) {
462
+ return fail(error, { sourcePath, knowledgeBasePath })
463
+ }
464
+ }
465
+
466
+ // 知识库创建描述
467
+ function getKnowledgeBaseCreationDescription() {
468
+ return `# Knowledge Base Creation Guide
469
+
470
+ ## 目标
471
+ 使用本工具在命令执行目录下创建或维护本地知识库(默认目录为 .deepfish-rag),并支持后续持续增量更新。
472
+
473
+ ## 能完成的任务
474
+ 1. 从用户给定的目录或文件加载文档内容,构建本地知识库。
475
+ 2. 自动识别常见文本与文档格式(如 md、txt、json、js、pdf、docx、xlsx 等)。
476
+ 3. 在重复导入时执行增量更新:
477
+ - 内容未变化:跳过
478
+ - 内容已变化:更新
479
+ - 新文件:新增
480
+ 4. 记录每次构建来源和统计信息(sourceHistory),方便审计和复盘。
481
+ 5. 支持删除后重建,快速恢复知识库状态。
482
+
483
+ ## 关键函数与协同关系
484
+
485
+ ### 1) 构建层
486
+ - buildKnowledgeBase(sourcePath, knowledgeBasePath)
487
+ - 创建或续加知识库的主入口。
488
+ - sourcePath 为空时会交互式要求用户输入文件或目录。
489
+ - 默认知识库路径为 process.cwd()/.deepfish-rag。
490
+
491
+ 它在内部协同调用:
492
+ - _getKbRootPath:统一知识库目录解析。
493
+ - _loadKnowledgeBase:读取或初始化 index.json。
494
+ - _collectSourceFiles:展开目录得到文件集合。
495
+ - _isSupportedFile:过滤可处理文件类型。
496
+ - _readDocumentContent:按不同文件类型提取文本。
497
+ - _sha256:计算内容哈希,用于增量判断。
498
+ - _saveKnowledgeBase:保存最终索引。
499
+
500
+ ### 2) 重建层
501
+ - deleteKnowledgeBase(knowledgeBasePath)
502
+ - 删除现有知识库目录。
503
+ - rebuildKnowledgeBase(sourcePath, knowledgeBasePath)
504
+ - 先删后建。
505
+ - 典型用于“索引异常修复”或“结构升级后重建”。
506
+
507
+ ## 推荐执行流程
508
+ 1. 调用 buildKnowledgeBase 进行首次构建。
509
+ 2. 后续补充文档时再次调用 buildKnowledgeBase(续加)。
510
+ 3. 如需彻底刷新:调用 rebuildKnowledgeBase。
511
+ 4. 构建完成后再进入检索阶段(read/search 系列函数)。
512
+
513
+ ## 面向用户任务的协同策略
514
+ - 用户说“请把这个目录做成知识库”:
515
+ - buildKnowledgeBase(目录路径)
516
+ - 用户说“继续把新资料加进去”:
517
+ - buildKnowledgeBase(新目录路径)
518
+ - 用户说“从头重建”:
519
+ - rebuildKnowledgeBase(目录路径)
520
+
521
+ ## 结果校验建议
522
+ 构建后建议检查:
523
+ 1. getKnowledgeBaseInfo 的 totalDocuments 是否大于 0。
524
+ 2. sourceHistory 是否新增一条构建记录。
525
+ 3. addedCount / updatedCount / skippedCount 是否符合预期。
526
+ `
527
+ }
528
+
529
+ // 知识库检索描述
530
+ function getKnowledgeBaseRetrievalDescription() {
531
+ return `# Knowledge Base Retrieval Guide
532
+
533
+ ## 目标
534
+ 从本地知识库中高效找到“相关文档”与“关键片段”,用于问答、总结、比对和后续 RAG 召回。
535
+
536
+ ## 能完成的任务
537
+ 1. 查看知识库总体状态与构建历史。
538
+ 2. 按关键词筛选文档摘要,快速定位候选文档。
539
+ 3. 按文档 ID 读取全文,进行精读分析。
540
+ 4. 按分块检索返回命中片段,适合长文档场景。
541
+
542
+ ## 关键函数与协同关系
543
+
544
+ ### 1) 元信息确认
545
+ - getKnowledgeBaseInfo(knowledgeBasePath)
546
+ - 获取总文档数、创建时间、更新时间、构建历史。
547
+ - 作用:先判断库是否可用,再决定检索策略。
548
+
549
+ ### 2) 粗粒度召回(文档级)
550
+ - readKnowledgeBase(keyword, knowledgeBasePath, limit)
551
+ - 返回文档摘要(id、sourcePath、preview)。
552
+ - 作用:先召回候选文档,缩小范围。
553
+
554
+ ### 3) 细粒度阅读(全文级)
555
+ - readKnowledgeBaseDocument(documentId, knowledgeBasePath)
556
+ - 读取指定文档全文。
557
+ - 作用:对高价值候选文档做精读与引用。
558
+
559
+ ### 4) 片段级召回(chunk级)
560
+ - searchKnowledgeBaseChunks(keyword, knowledgeBasePath, chunkSize, overlap, limit)
561
+ - 将文档切块并按关键词命中分数排序。
562
+ - 作用:在超长文档里快速找到最相关上下文。
563
+
564
+ 它在内部协同调用:
565
+ - _chunkText:按 chunkSize + overlap 生成可检索片段。
566
+ - _calcKeywordScore:计算关键词命中次数并排序。
567
+
568
+ ## 推荐检索流程
569
+ 1. 调用 getKnowledgeBaseInfo,确认知识库可用。
570
+ 2. 调用 readKnowledgeBase(keyword),拿到候选文档列表。
571
+ 3. 对重点文档调用 readKnowledgeBaseDocument 进行精读。
572
+ 4. 若候选文档过大或命中不精确,调用 searchKnowledgeBaseChunks 做片段召回。
573
+ 5. 将片段结果组织为回答证据,必要时回看全文补全上下文。
574
+
575
+ ## 典型用户任务协同方案
576
+
577
+ ### 场景 A:用户问“知识库里有没有某主题”
578
+ 1. readKnowledgeBase(主题词)
579
+ 2. 返回候选文档与预览
580
+
581
+ ### 场景 B:用户问“请给出该主题的依据段落”
582
+ 1. readKnowledgeBase(主题词)
583
+ 2. searchKnowledgeBaseChunks(主题词)
584
+ 3. 输出高分片段 + 源文件路径
585
+
586
+ ### 场景 C:用户问“请基于某篇文档做总结”
587
+ 1. readKnowledgeBase(文档名关键词)
588
+ 2. readKnowledgeBaseDocument(documentId)
589
+ 3. 对全文执行总结
590
+
591
+ ## 检索参数建议
592
+ 1. limit
593
+ - 初筛推荐 5-20
594
+ 2. chunkSize
595
+ - 一般 600-1200
596
+ 3. overlap
597
+ - 一般 80-200
598
+ - 过小可能断句,过大可能冗余
599
+
600
+ ## 结果质量建议
601
+ 1. 优先返回包含 sourcePath 与文档 ID 的证据。
602
+ 2. 先文档级筛选,再 chunk 级定位,避免全库全文扫描输出过大。
603
+ 3. 对高分片段做二次核对,防止关键词误命中。
604
+ `
605
+ }
606
+
607
+ const descriptions = [
608
+ {
609
+ type: 'function',
610
+ function: {
611
+ name: 'getKnowledgeBaseCreationDescription',
612
+ description: '知识库创建与续加的完整说明文档,包含可完成任务、函数协同关系与推荐执行流程。在执行知识库创建、续加或重建前,建议先阅读此文档以明确使用方法与注意事项。',
613
+ parameters: {
614
+ type: 'object',
615
+ properties: {},
616
+ },
617
+ },
618
+ },
619
+ {
620
+ type: 'function',
621
+ function: {
622
+ name: 'getKnowledgeBaseRetrievalDescription',
623
+ description: '知识库检索的完整说明文档,包含检索策略、函数协同关系与典型任务执行方案。在执行知识库检索前,建议先阅读此文档以明确使用方法与注意事项。',
624
+ parameters: {
625
+ type: 'object',
626
+ properties: {},
627
+ },
628
+ },
629
+ },
630
+ {
631
+ type: 'function',
632
+ function: {
633
+ name: 'buildKnowledgeBase',
634
+ description: '创建或续加知识库。默认知识库路径为命令执行目录下的 .deepfish-rag。sourcePath 为空时会提示用户输入源文件目录或文件路径。',
635
+ parameters: {
636
+ type: 'object',
637
+ properties: {
638
+ sourcePath: { type: 'string', description: '源文件目录或文件路径。为空时会交互输入。' },
639
+ knowledgeBasePath: { type: 'string', description: '知识库目录路径,默认 .deepfish-rag(相对命令执行目录)。' },
640
+ },
641
+ required: [],
642
+ },
643
+ },
644
+ },
645
+ {
646
+ type: 'function',
647
+ function: {
648
+ name: 'readKnowledgeBase',
649
+ description: '读取知识库中的文档摘要,支持关键词过滤。可用于快速检索知识库内容。',
650
+ parameters: {
651
+ type: 'object',
652
+ properties: {
653
+ keyword: { type: 'string', description: '关键词,不传表示返回全部文档摘要。' },
654
+ knowledgeBasePath: { type: 'string', description: '知识库目录路径,默认 .deepfish-rag。' },
655
+ limit: { type: 'number', description: '返回条数上限,默认 10。' },
656
+ },
657
+ required: [],
658
+ },
659
+ },
660
+ },
661
+ {
662
+ type: 'function',
663
+ function: {
664
+ name: 'readKnowledgeBaseDocument',
665
+ description: '按文档ID读取知识库中的完整文档内容。',
666
+ parameters: {
667
+ type: 'object',
668
+ properties: {
669
+ documentId: { type: 'string', description: '文档ID(由 buildKnowledgeBase 生成)。' },
670
+ knowledgeBasePath: { type: 'string', description: '知识库目录路径,默认 .deepfish-rag。' },
671
+ },
672
+ required: ['documentId'],
673
+ },
674
+ },
675
+ },
676
+ {
677
+ type: 'function',
678
+ function: {
679
+ name: 'getKnowledgeBaseInfo',
680
+ description: '读取知识库元信息与历史加载记录(sourceHistory)。',
681
+ parameters: {
682
+ type: 'object',
683
+ properties: {
684
+ knowledgeBasePath: { type: 'string', description: '知识库目录路径,默认 .deepfish-rag。' },
685
+ },
686
+ required: [],
687
+ },
688
+ },
689
+ },
690
+ {
691
+ type: 'function',
692
+ function: {
693
+ name: 'searchKnowledgeBaseChunks',
694
+ description: '对知识库进行分块检索,返回命中关键词的文本块(chunk),用于RAG召回。',
695
+ parameters: {
696
+ type: 'object',
697
+ properties: {
698
+ keyword: { type: 'string', description: '检索关键词。' },
699
+ knowledgeBasePath: { type: 'string', description: '知识库目录路径,默认 .deepfish-rag。' },
700
+ chunkSize: { type: 'number', description: '分块长度,默认 800。' },
701
+ overlap: { type: 'number', description: '分块重叠长度,默认 120。' },
702
+ limit: { type: 'number', description: '返回数量上限,默认 10。' },
703
+ },
704
+ required: ['keyword'],
705
+ },
706
+ },
707
+ },
708
+ {
709
+ type: 'function',
710
+ function: {
711
+ name: 'deleteKnowledgeBase',
712
+ description: '删除知识库目录(默认删除命令执行目录下的 .deepfish-rag)。',
713
+ parameters: {
714
+ type: 'object',
715
+ properties: {
716
+ knowledgeBasePath: { type: 'string', description: '知识库目录路径,默认 .deepfish-rag。' },
717
+ },
718
+ required: [],
719
+ },
720
+ },
721
+ },
722
+ {
723
+ type: 'function',
724
+ function: {
725
+ name: 'rebuildKnowledgeBase',
726
+ description: '重建知识库:先删除原知识库,再从源目录/文件重新构建。',
727
+ parameters: {
728
+ type: 'object',
729
+ properties: {
730
+ sourcePath: { type: 'string', description: '源文件目录或文件路径。为空时会交互输入。' },
731
+ knowledgeBasePath: { type: 'string', description: '知识库目录路径,默认 .deepfish-rag。' },
732
+ },
733
+ required: [],
734
+ },
735
+ },
736
+ },
737
+ ]
738
+
739
+ const functions = {
740
+ getKnowledgeBaseCreationDescription,
741
+ getKnowledgeBaseRetrievalDescription,
742
+ buildKnowledgeBase,
743
+ readKnowledgeBase,
744
+ readKnowledgeBaseDocument,
745
+ getKnowledgeBaseInfo,
746
+ searchKnowledgeBaseChunks,
747
+ deleteKnowledgeBase,
748
+ rebuildKnowledgeBase,
749
+ }
750
+
751
+ const EmbeddingTool = {
752
+ name: 'EmbeddingTool',
753
+ description: '提供本地知识库构建/读取能力,默认知识库路径为命令执行目录下的 .deepfish-rag',
754
+ platform: 'all',
755
+ descriptions,
756
+ functions,
757
+ isSystem: true
758
+ }
759
+
760
+ module.exports = EmbeddingTool
761
+
762
+
763
+