deepfish-ai 1.0.21 → 1.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/AgentRobot/BaseAgentRobot/index.js +11 -17
- package/src/AgentRobot/BaseAgentRobot/lazy-tools/doc-transform.js +203 -0
- package/src/AgentRobot/BaseAgentRobot/lazy-tools/docx.js +551 -1
- package/src/AgentRobot/BaseAgentRobot/lazy-tools/embedding.js +762 -0
- package/src/AgentRobot/BaseAgentRobot/tools/GenerateTools.js +2 -1
- package/src/AgentRobot/BaseAgentRobot/tools/WebTools.js +256 -0
- package/src/AgentRobot/BaseAgentRobot/utils/AttachmentToolScanner.js +4 -5
- package/src/cli/DefaultConfig.js +2 -0
|
@@ -102,8 +102,9 @@ async function getGenerateSkillRules(goal) {
|
|
|
102
102
|
2. 命名规范:
|
|
103
103
|
- 函数名称前缀:「领域用途+分隔符」(如systemFileManagement_)
|
|
104
104
|
- 函数描述开头:统一格式「领域用途+分隔符+功能描述」(如系统文件管理:重命名文件)
|
|
105
|
-
3. 内置工具函数调用:函数内可以使用内置工具函数requestAI来获取AI请求结果,在环境中通过this.Tools
|
|
105
|
+
3. 内置工具函数调用:函数内可以使用内置工具函数requestAI来获取AI请求结果,在环境中通过this.Tools注入,必要时也可以使用其他函数,示例:
|
|
106
106
|
- this.Tools.requestAI(systemDescription, prompt, temperature)
|
|
107
|
+
- this.Tools.executeCommand(command)
|
|
107
108
|
4. 函数数量:至少包含1个可被AI工作流调用的函数
|
|
108
109
|
5. 拆分成多个文件,保持文件结构清晰
|
|
109
110
|
6. 对于大于5个的扩展功能,需要在functions中输出一个说明函数,只需返回一个markdown类型的英文字符串,专门用于解释当前扩展工具的使用方法、参数说明、示例等内容,函数名称为「readme」,如「systemFileManagement_readme」;函数描述需要强调调用该扩展模块前必须先阅读该规则文档。
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
const axios = require('axios')
|
|
2
|
+
const cheerio = require('cheerio')
|
|
3
|
+
const puppeteer = require('puppeteer')
|
|
4
|
+
|
|
5
|
+
function ok(data = null) {
|
|
6
|
+
return { success: true, data }
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
function fail(error, data = null) {
|
|
10
|
+
return { success: false, error: error?.message || String(error), data }
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function normalizeWhitespace(text = '') {
|
|
14
|
+
return String(text || '').replace(/\s+/g, ' ').trim()
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function absoluteUrl(baseUrl, href = '') {
|
|
18
|
+
try {
|
|
19
|
+
return new URL(href, baseUrl).toString()
|
|
20
|
+
} catch {
|
|
21
|
+
return href
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function extractByCheerio(html = '', url = '') {
|
|
26
|
+
const $ = cheerio.load(html)
|
|
27
|
+
$('script, style, noscript').remove()
|
|
28
|
+
|
|
29
|
+
const title = normalizeWhitespace($('title').first().text())
|
|
30
|
+
const bodyText = normalizeWhitespace($('body').text())
|
|
31
|
+
const metaDescription = normalizeWhitespace(
|
|
32
|
+
$('meta[name="description"]').attr('content') ||
|
|
33
|
+
$('meta[property="og:description"]').attr('content') ||
|
|
34
|
+
'',
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
const links = []
|
|
38
|
+
$('a[href]').each((_, el) => {
|
|
39
|
+
const href = $(el).attr('href') || ''
|
|
40
|
+
const text = normalizeWhitespace($(el).text())
|
|
41
|
+
if (!href) return
|
|
42
|
+
links.push({
|
|
43
|
+
href: absoluteUrl(url, href),
|
|
44
|
+
text,
|
|
45
|
+
})
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
return {
|
|
49
|
+
title,
|
|
50
|
+
description: metaDescription,
|
|
51
|
+
content: bodyText,
|
|
52
|
+
links,
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
async function fetchStatic(url, timeout = 15000) {
|
|
57
|
+
const response = await axios.get(url, {
|
|
58
|
+
timeout,
|
|
59
|
+
headers: {
|
|
60
|
+
'User-Agent':
|
|
61
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130 Safari/537.36 DeepFish-MCP-Web/1.0',
|
|
62
|
+
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
63
|
+
},
|
|
64
|
+
})
|
|
65
|
+
return response.data
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async function fetchDynamic(url, waitUntil = 'networkidle2', timeout = 30000) {
|
|
69
|
+
const browser = await puppeteer.launch({
|
|
70
|
+
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
|
71
|
+
})
|
|
72
|
+
try {
|
|
73
|
+
const page = await browser.newPage()
|
|
74
|
+
await page.goto(url, { waitUntil, timeout })
|
|
75
|
+
return await page.content()
|
|
76
|
+
} finally {
|
|
77
|
+
await browser.close()
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async function mcpBrowseWebpage(url, mode = 'auto', maxChars = 4000) {
|
|
82
|
+
try {
|
|
83
|
+
if (!url) {
|
|
84
|
+
return fail('url is required')
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
let html = ''
|
|
88
|
+
let resolvedMode = mode
|
|
89
|
+
|
|
90
|
+
if (mode === 'dynamic') {
|
|
91
|
+
html = await fetchDynamic(url)
|
|
92
|
+
} else if (mode === 'static') {
|
|
93
|
+
html = await fetchStatic(url)
|
|
94
|
+
} else {
|
|
95
|
+
try {
|
|
96
|
+
html = await fetchStatic(url)
|
|
97
|
+
resolvedMode = 'static'
|
|
98
|
+
} catch {
|
|
99
|
+
html = await fetchDynamic(url)
|
|
100
|
+
resolvedMode = 'dynamic'
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const parsed = extractByCheerio(html, url)
|
|
105
|
+
const content = parsed.content.slice(0, Number(maxChars) > 0 ? Number(maxChars) : 4000)
|
|
106
|
+
const links = parsed.links.slice(0, 50)
|
|
107
|
+
|
|
108
|
+
return ok({
|
|
109
|
+
url,
|
|
110
|
+
mode: resolvedMode,
|
|
111
|
+
title: parsed.title,
|
|
112
|
+
description: parsed.description,
|
|
113
|
+
content,
|
|
114
|
+
contentLength: parsed.content.length,
|
|
115
|
+
links,
|
|
116
|
+
linkCount: parsed.links.length,
|
|
117
|
+
})
|
|
118
|
+
} catch (error) {
|
|
119
|
+
return fail(error, { url, mode, maxChars })
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
async function mcpFetchWebpageByQuery(url, query, mode = 'auto', limit = 20) {
|
|
124
|
+
try {
|
|
125
|
+
if (!url) {
|
|
126
|
+
return fail('url is required')
|
|
127
|
+
}
|
|
128
|
+
if (!query) {
|
|
129
|
+
return fail('query is required')
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const browseResult = await mcpBrowseWebpage(url, mode, 200000)
|
|
133
|
+
if (!browseResult.success) {
|
|
134
|
+
return browseResult
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const keyword = String(query).toLowerCase().trim()
|
|
138
|
+
const rawContent = browseResult.data.content || ''
|
|
139
|
+
const sentences = rawContent
|
|
140
|
+
.split(/[。!?.!?\n]/)
|
|
141
|
+
.map((item) => normalizeWhitespace(item))
|
|
142
|
+
.filter(Boolean)
|
|
143
|
+
|
|
144
|
+
const matchedSentences = sentences
|
|
145
|
+
.map((text) => {
|
|
146
|
+
const lower = text.toLowerCase()
|
|
147
|
+
let score = 0
|
|
148
|
+
let fromIndex = 0
|
|
149
|
+
while (true) {
|
|
150
|
+
const idx = lower.indexOf(keyword, fromIndex)
|
|
151
|
+
if (idx < 0) break
|
|
152
|
+
score += 1
|
|
153
|
+
fromIndex = idx + keyword.length
|
|
154
|
+
}
|
|
155
|
+
return { text, score }
|
|
156
|
+
})
|
|
157
|
+
.filter((item) => item.score > 0)
|
|
158
|
+
.sort((a, b) => b.score - a.score)
|
|
159
|
+
.slice(0, Number(limit) > 0 ? Number(limit) : 20)
|
|
160
|
+
|
|
161
|
+
const matchedLinks = (browseResult.data.links || [])
|
|
162
|
+
.filter((item) => {
|
|
163
|
+
const href = String(item.href || '').toLowerCase()
|
|
164
|
+
const text = String(item.text || '').toLowerCase()
|
|
165
|
+
return href.includes(keyword) || text.includes(keyword)
|
|
166
|
+
})
|
|
167
|
+
.slice(0, Number(limit) > 0 ? Number(limit) : 20)
|
|
168
|
+
|
|
169
|
+
return ok({
|
|
170
|
+
url,
|
|
171
|
+
query,
|
|
172
|
+
mode: browseResult.data.mode,
|
|
173
|
+
title: browseResult.data.title,
|
|
174
|
+
matchedSentenceCount: matchedSentences.length,
|
|
175
|
+
matchedLinkCount: matchedLinks.length,
|
|
176
|
+
matchedSentences,
|
|
177
|
+
matchedLinks,
|
|
178
|
+
})
|
|
179
|
+
} catch (error) {
|
|
180
|
+
return fail(error, { url, query, mode, limit })
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const descriptions = [
|
|
185
|
+
{
|
|
186
|
+
type: 'function',
|
|
187
|
+
function: {
|
|
188
|
+
name: 'mcpBrowseWebpage',
|
|
189
|
+
description:
|
|
190
|
+
'MCP网页浏览: 打开并抓取任意网页的标题、摘要正文与链接列表。适合快速浏览网页内容。',
|
|
191
|
+
parameters: {
|
|
192
|
+
type: 'object',
|
|
193
|
+
properties: {
|
|
194
|
+
url: {
|
|
195
|
+
type: 'string',
|
|
196
|
+
description: '要浏览的网页地址(http或https)。',
|
|
197
|
+
},
|
|
198
|
+
mode: {
|
|
199
|
+
type: 'string',
|
|
200
|
+
description: '抓取模式:auto|static|dynamic。默认auto。',
|
|
201
|
+
},
|
|
202
|
+
maxChars: {
|
|
203
|
+
type: 'number',
|
|
204
|
+
description: '返回正文最大字符数,默认4000。',
|
|
205
|
+
},
|
|
206
|
+
},
|
|
207
|
+
required: ['url'],
|
|
208
|
+
},
|
|
209
|
+
},
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
type: 'function',
|
|
213
|
+
function: {
|
|
214
|
+
name: 'mcpFetchWebpageByQuery',
|
|
215
|
+
description:
|
|
216
|
+
'MCP网页抓取: 按关键词从网页正文和链接中提取匹配内容,返回高相关片段。',
|
|
217
|
+
parameters: {
|
|
218
|
+
type: 'object',
|
|
219
|
+
properties: {
|
|
220
|
+
url: {
|
|
221
|
+
type: 'string',
|
|
222
|
+
description: '目标网页地址(http或https)。',
|
|
223
|
+
},
|
|
224
|
+
query: {
|
|
225
|
+
type: 'string',
|
|
226
|
+
description: '要匹配的关键词。',
|
|
227
|
+
},
|
|
228
|
+
mode: {
|
|
229
|
+
type: 'string',
|
|
230
|
+
description: '抓取模式:auto|static|dynamic。默认auto。',
|
|
231
|
+
},
|
|
232
|
+
limit: {
|
|
233
|
+
type: 'number',
|
|
234
|
+
description: '返回匹配结果上限,默认20。',
|
|
235
|
+
},
|
|
236
|
+
},
|
|
237
|
+
required: ['url', 'query'],
|
|
238
|
+
},
|
|
239
|
+
},
|
|
240
|
+
},
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
const functions = {
|
|
244
|
+
mcpBrowseWebpage,
|
|
245
|
+
mcpFetchWebpageByQuery,
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const MCPWebTool = {
|
|
249
|
+
name: 'MCPWebTool',
|
|
250
|
+
description: '提供网页浏览与内容抓取能力,支持任意网页的读取与关键词提取。',
|
|
251
|
+
platform: 'all',
|
|
252
|
+
descriptions,
|
|
253
|
+
functions,
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
module.exports = MCPWebTool
|
|
@@ -166,7 +166,7 @@ ${table}
|
|
|
166
166
|
`| ${s.name} | ${s.type} | ${s.description || s.extensionDescription} | ${s.location} | ${s.filePath || s.skillFilePath} |`,
|
|
167
167
|
)
|
|
168
168
|
.join('\n')
|
|
169
|
-
let skills1 = `
|
|
169
|
+
let skills1 = clawSkills.length > 0 ? `
|
|
170
170
|
### 优先使用的Skills
|
|
171
171
|
可以优先调用以下Skill来完成用户的请求,Skill的调用方式:
|
|
172
172
|
- 使用用户请求匹配 skill description,
|
|
@@ -179,8 +179,8 @@ ${table}
|
|
|
179
179
|
|-------|------|-------------|----------|---------------|
|
|
180
180
|
${table1}
|
|
181
181
|
|-------|------|-------------|----------|---------------|
|
|
182
|
-
`
|
|
183
|
-
let skills2 = `
|
|
182
|
+
` : '### 无优先使用的Skills'
|
|
183
|
+
let skills2 = toolCollection.length + clawSkillCollection.length > 0 ? `
|
|
184
184
|
### 其他可以使用的Skills
|
|
185
185
|
可以调用以下Skill来完成用户的请求,Skill的调用方式:
|
|
186
186
|
- 使用用户请求匹配 skill description,
|
|
@@ -194,9 +194,8 @@ ${table1}
|
|
|
194
194
|
|-------|------|-------------|----------|----------|
|
|
195
195
|
${table2}
|
|
196
196
|
|-------|------|-------------|----------|----------|
|
|
197
|
-
|
|
197
|
+
`: '### 无其他可用的Skills'
|
|
198
198
|
return skills1 + '\n' + skills2
|
|
199
|
-
|
|
200
199
|
}
|
|
201
200
|
|
|
202
201
|
// 扫描包
|
package/src/cli/DefaultConfig.js
CHANGED
|
@@ -8,6 +8,8 @@ const defaultConfig = {
|
|
|
8
8
|
maxLogExpireTime: 3, // 日志过期时间,单位天,-1表示无限制,0表示不记录
|
|
9
9
|
maxBlockFileSize: 20, // 最大分块文件大小,单位KB;超过该大小的文件需要分块处理
|
|
10
10
|
encoding: 'auto', // 命令行编码格式, 可设置为utf-8、gbk等, 也可以设置成auto或空值自动判断
|
|
11
|
+
EMBEDDING_API: '', // 向量化接口地址
|
|
12
|
+
EMBEDDING_API_KEY: '' // 向量化接口密钥
|
|
11
13
|
}
|
|
12
14
|
|
|
13
15
|
const aiCliConfig = {
|