minimal-agent 0.1.9 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +383 -122
- package/package.json +19 -12
- package/plugins/HOW-TO-WRITE-A-PLUGIN.md +186 -0
- package/plugins/ralph-wiggum/commands/ralph-loop.md +6 -16
- package/plugins/ralph-wiggum/plugin.js +205 -0
- package/plugins/ralph-wiggum/src/goalState.js +260 -0
- package/plugins/ralph-wiggum/src/sentinels.js +21 -0
- package/plugins/ralph-wiggum/src/stopHookRunner.js +104 -0
- package/plugins/ralph-wiggum/src/verificationGate.js +202 -0
- package/plugins/workflow-runner/.claude-plugin/plugin.json +5 -0
- package/plugins/workflow-runner/commands/workflow.md +15 -0
- package/plugins/workflow-runner/commands/workflows.md +8 -0
- package/plugins/workflow-runner/plugin.js +36 -0
- package/plugins/workflow-runner/src/expressions.js +369 -0
- package/plugins/workflow-runner/src/index.js +174 -0
- package/plugins/workflow-runner/src/loader.js +183 -0
- package/plugins/workflow-runner/src/runner.js +290 -0
- package/plugins/workflow-runner/src/stepExecutors/assert.js +28 -0
- package/plugins/workflow-runner/src/stepExecutors/llm.js +44 -0
- package/plugins/workflow-runner/src/stepExecutors/skill.js +103 -0
- package/plugins/workflow-runner/src/stepExecutors/tool.js +35 -0
- package/plugins/workflow-runner/src/types.js +59 -0
- package/plugins/workflow-runner/src/workflowState.js +46 -0
- package/skills/image-gen-openrouter/SKILL.md +121 -0
- package/skills/subtitle-srt/SKILL.md +134 -0
- package/skills/tts-zh/SKILL.md +137 -0
- package/skills/video-compose/SKILL.md +139 -0
- package/src/bootstrap/cwdArg.js +22 -0
- package/src/bootstrap/workingDir.js +31 -0
- package/src/cli/configWizard.js +272 -0
- package/src/cli/print.js +192 -0
- package/src/config/configFile.js +78 -0
- package/src/config.js +118 -0
- package/src/context/compact.js +357 -0
- package/src/context/microCompactLite.js +151 -0
- package/src/context/persistContext.js +109 -0
- package/src/context/reactiveCompact.js +121 -0
- package/src/context/sessionPath.js +58 -0
- package/src/context/snipCompact.js +112 -0
- package/src/context/tokenCounter.js +66 -0
- package/src/llm/client.js +182 -0
- package/src/loop.js +230 -0
- package/src/main.js +116 -0
- package/src/plugin-sdk.js +24 -0
- package/src/plugins/commandRouter.js +169 -0
- package/src/plugins/hookEngine.js +258 -0
- package/src/plugins/pluginApi.js +23 -0
- package/src/plugins/pluginLoader.js +71 -0
- package/src/plugins/pluginRunner.js +65 -0
- package/src/plugins/transcript.js +171 -0
- package/src/prompts/projectInstructions.js +48 -0
- package/src/prompts/skillList.js +126 -0
- package/src/prompts/system.js +155 -0
- package/src/session/runTurn.js +41 -0
- package/src/session/sessionState.js +19 -0
- package/src/tools/bash/bash.js +352 -0
- package/src/tools/bash/semantics.js +85 -0
- package/src/tools/bash/warnings.js +98 -0
- package/src/tools/edit/edit.js +253 -0
- package/src/tools/edit/multi-edit.js +155 -0
- package/src/tools/glob/glob.js +97 -0
- package/src/tools/grep/grep.js +185 -0
- package/src/tools/grep/rgPath.js +173 -0
- package/src/tools/index.js +94 -0
- package/src/tools/read/read.js +209 -0
- package/src/tools/shared/fileState.js +61 -0
- package/src/tools/shared/fileUtils.js +281 -0
- package/src/tools/shared/schemas.js +16 -0
- package/src/tools/types.js +21 -0
- package/src/tools/webbrowser/browser.js +55 -0
- package/src/tools/webbrowser/webbrowser.js +194 -0
- package/src/tools/webfetch/preapproved.js +267 -0
- package/src/tools/webfetch/webfetch.js +317 -0
- package/src/tools/websearch/websearch.js +161 -0
- package/src/tools/write/write.js +125 -0
- package/src/types/turndown.d.ts +23 -0
- package/src/types.js +16 -0
- package/src/ui/App.js +37 -0
- package/src/ui/InputBox.js +240 -0
- package/src/ui/MessageList.js +28 -0
- package/src/ui/Root.js +70 -0
- package/src/ui/StatusLine.js +41 -0
- package/src/ui/ToolStatus.js +11 -0
- package/src/ui/hooks/useChat.js +234 -0
- package/src/ui/hooks/usePasteHandler.js +137 -0
- package/src/ui/hooks/useTextBuffer.js +55 -0
- package/src/ui/hooks/useTokenUsage.js +30 -0
- package/src/ui/textBuffer.js +217 -0
- package/src/utils/packageRoot.js +37 -0
- package/src/utils/resourcePaths.js +49 -0
- package/src/utils/zodToJson.js +29 -0
- package/workflows/book-review-short.yaml +99 -0
- package/workflows/e2e-write-greet.yaml +27 -0
- package/workflows/schema.json +74 -0
- package/workflows/youtube-shorts.yaml +171 -0
- package/dist/main.js +0 -5936
- package/plugins/ralph-wiggum/scripts/setup-ralph-loop.sh +0 -203
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ============================================================
|
|
3
|
+
* src/tools/webfetch-preapproved.ts —— WebFetch 预批准域名白名单
|
|
4
|
+
* ------------------------------------------------------------
|
|
5
|
+
* 维护一份可信任域名列表,用于过滤潜在风险 URL
|
|
6
|
+
* ============================================================
|
|
7
|
+
*/
|
|
8
|
+
const PREAPPROVED_HOSTS = new Set([
|
|
9
|
+
// GitHub
|
|
10
|
+
'github.com',
|
|
11
|
+
'gist.github.com',
|
|
12
|
+
// 开发文档
|
|
13
|
+
'docs.github.com',
|
|
14
|
+
'developer.github.com',
|
|
15
|
+
'help.github.com',
|
|
16
|
+
// NPM
|
|
17
|
+
'npmjs.com',
|
|
18
|
+
'www.npmjs.com',
|
|
19
|
+
// 包注册表
|
|
20
|
+
'pypi.org',
|
|
21
|
+
'www.pypi.org',
|
|
22
|
+
'crates.io',
|
|
23
|
+
'pub.dev',
|
|
24
|
+
'packagist.org',
|
|
25
|
+
'rubygems.org',
|
|
26
|
+
// 开发平台
|
|
27
|
+
'stackoverflow.com',
|
|
28
|
+
'www.stackoverflow.com',
|
|
29
|
+
'serverfault.com',
|
|
30
|
+
'superuser.com',
|
|
31
|
+
'askubuntu.com',
|
|
32
|
+
// 文档与 wiki
|
|
33
|
+
'readthedocs.io',
|
|
34
|
+
'www.readthedocs.io',
|
|
35
|
+
'readthedocs.org',
|
|
36
|
+
'wiki.python.org',
|
|
37
|
+
'en.wikipedia.org',
|
|
38
|
+
'zh.wikipedia.org',
|
|
39
|
+
// 官方文档
|
|
40
|
+
'nodejs.org',
|
|
41
|
+
'www.nodejs.org',
|
|
42
|
+
'deno.land',
|
|
43
|
+
'www.deno.land',
|
|
44
|
+
'bun.sh',
|
|
45
|
+
'www.bun.sh',
|
|
46
|
+
// Rust
|
|
47
|
+
'doc.rust-lang.org',
|
|
48
|
+
'www.rust-lang.org',
|
|
49
|
+
'rust-lang.org',
|
|
50
|
+
// 云平台
|
|
51
|
+
'aws.amazon.com',
|
|
52
|
+
'docs.aws.amazon.com',
|
|
53
|
+
'cloud.google.com',
|
|
54
|
+
'docs.microsoft.com',
|
|
55
|
+
'azure.microsoft.com',
|
|
56
|
+
'developer.microsoft.com',
|
|
57
|
+
// AI/LLM
|
|
58
|
+
'openai.com',
|
|
59
|
+
'platform.openai.com',
|
|
60
|
+
'docs.anthropic.com',
|
|
61
|
+
'anthropic.com',
|
|
62
|
+
'claude.ai',
|
|
63
|
+
'docs.cohere.com',
|
|
64
|
+
'cohere.com',
|
|
65
|
+
// AI 模型文档
|
|
66
|
+
'ai.google.dev',
|
|
67
|
+
'ai.google.com',
|
|
68
|
+
'developers.google.com',
|
|
69
|
+
'learn.deepmind.com',
|
|
70
|
+
// AI 开发框架
|
|
71
|
+
'python.langchain.com',
|
|
72
|
+
'js.langchain.com',
|
|
73
|
+
'docs.litellm.ai',
|
|
74
|
+
'litellm.ai',
|
|
75
|
+
// 前端框架
|
|
76
|
+
'react.dev',
|
|
77
|
+
'reactjs.org',
|
|
78
|
+
'www.reactjs.org',
|
|
79
|
+
'nextjs.org',
|
|
80
|
+
'www.nextjs.org',
|
|
81
|
+
'vuejs.org',
|
|
82
|
+
'www.vuejs.org',
|
|
83
|
+
'svelte.dev',
|
|
84
|
+
'svelte.org',
|
|
85
|
+
'angular.io',
|
|
86
|
+
'www.angular.io',
|
|
87
|
+
// 构建工具
|
|
88
|
+
'vitejs.dev',
|
|
89
|
+
'vite.dev',
|
|
90
|
+
'webpack.js.org',
|
|
91
|
+
'esbuild.github.io',
|
|
92
|
+
'rollupjs.org',
|
|
93
|
+
'esbuild.github.io',
|
|
94
|
+
// CSS
|
|
95
|
+
'tailwindcss.com',
|
|
96
|
+
'www.tailwindcss.com',
|
|
97
|
+
'postcss.org',
|
|
98
|
+
// 数据库
|
|
99
|
+
'redis.io',
|
|
100
|
+
'www.redis.io',
|
|
101
|
+
'postgresql.org',
|
|
102
|
+
'www.postgresql.org',
|
|
103
|
+
'www.mysql.com',
|
|
104
|
+
'dev.mysql.com',
|
|
105
|
+
'docs.mongodb.com',
|
|
106
|
+
'www.mongodb.com',
|
|
107
|
+
'sqlite.org',
|
|
108
|
+
'www.sqlite.org',
|
|
109
|
+
// 工具类
|
|
110
|
+
'regex101.com',
|
|
111
|
+
'ihateregex.io',
|
|
112
|
+
'explainshell.com',
|
|
113
|
+
'tldr.sh',
|
|
114
|
+
// 代码分享
|
|
115
|
+
'replit.com',
|
|
116
|
+
'www.replit.com',
|
|
117
|
+
'codesandbox.io',
|
|
118
|
+
'www.codesandbox.io',
|
|
119
|
+
'codepen.io',
|
|
120
|
+
'www.codepen.io',
|
|
121
|
+
'jsfiddle.net',
|
|
122
|
+
'www.jsfiddle.net',
|
|
123
|
+
// CI/CD
|
|
124
|
+
'docs.github.com/en/actions',
|
|
125
|
+
'circleci.com',
|
|
126
|
+
'docs.circleci.com',
|
|
127
|
+
'travis-ci.org',
|
|
128
|
+
'www.travis-ci.com',
|
|
129
|
+
'jenkins.io',
|
|
130
|
+
'www.jenkins.io',
|
|
131
|
+
// 容器/云原生
|
|
132
|
+
'kubernetes.io',
|
|
133
|
+
'www.kubernetes.io',
|
|
134
|
+
'docker.com',
|
|
135
|
+
'www.docker.com',
|
|
136
|
+
'docs.docker.com',
|
|
137
|
+
// 测试
|
|
138
|
+
'jestjs.io',
|
|
139
|
+
'www.jestjs.io',
|
|
140
|
+
'vitest.dev',
|
|
141
|
+
'testing-library.com',
|
|
142
|
+
'www.testing-library.com',
|
|
143
|
+
'playwright.dev',
|
|
144
|
+
'www.playwright.dev',
|
|
145
|
+
'webdriver.io',
|
|
146
|
+
'webdriver.io',
|
|
147
|
+
// API 文档
|
|
148
|
+
'httpbin.org',
|
|
149
|
+
'restfulapi.net',
|
|
150
|
+
'swagger.io',
|
|
151
|
+
'www.swagger.io',
|
|
152
|
+
'openapi.net',
|
|
153
|
+
// 安全
|
|
154
|
+
'owasp.org',
|
|
155
|
+
'www.owasp.org',
|
|
156
|
+
'cve.mitre.org',
|
|
157
|
+
// 证书
|
|
158
|
+
'letsencrypt.org',
|
|
159
|
+
'www.letsencrypt.org',
|
|
160
|
+
'acmev2.pki.duckdns.org',
|
|
161
|
+
// 博客与技术文章
|
|
162
|
+
'medium.com',
|
|
163
|
+
'www.medium.com',
|
|
164
|
+
'dev.to',
|
|
165
|
+
'www.dev.to',
|
|
166
|
+
'hashnode.com',
|
|
167
|
+
'www.hashnode.com',
|
|
168
|
+
'devblogs.microsoft.com',
|
|
169
|
+
// 浏览器
|
|
170
|
+
'caniuse.com',
|
|
171
|
+
'developer.mozilla.org',
|
|
172
|
+
'web.dev',
|
|
173
|
+
'www.w3.org',
|
|
174
|
+
// 开源项目
|
|
175
|
+
'apache.org',
|
|
176
|
+
'www.apache.org',
|
|
177
|
+
'gnu.org',
|
|
178
|
+
'www.gnu.org',
|
|
179
|
+
'fsf.org',
|
|
180
|
+
'www.fsf.org',
|
|
181
|
+
'opensource.org',
|
|
182
|
+
'www.opensource.org',
|
|
183
|
+
// 技术社区
|
|
184
|
+
'reddit.com',
|
|
185
|
+
'www.reddit.com',
|
|
186
|
+
'news.ycombinator.com',
|
|
187
|
+
'lobste.rs',
|
|
188
|
+
// 文件格式
|
|
189
|
+
'json.org',
|
|
190
|
+
'yaml.org',
|
|
191
|
+
'www.yaml.org',
|
|
192
|
+
'toml.io',
|
|
193
|
+
'www.toml.io',
|
|
194
|
+
// 版本控制
|
|
195
|
+
'git-scm.com',
|
|
196
|
+
'www.git-scm.com',
|
|
197
|
+
'github.blog',
|
|
198
|
+
'githubstatus.com',
|
|
199
|
+
// AI 搜索
|
|
200
|
+
'tavily.com',
|
|
201
|
+
'www.tavily.com',
|
|
202
|
+
'perplexity.ai',
|
|
203
|
+
'www.perplexity.ai',
|
|
204
|
+
// AI 图片
|
|
205
|
+
'midjourney.com',
|
|
206
|
+
'www.midjourney.com',
|
|
207
|
+
'stability.ai',
|
|
208
|
+
'www.stability.ai',
|
|
209
|
+
// Embeddings / 向量
|
|
210
|
+
'qdrant.tech',
|
|
211
|
+
'www.qdrant.tech',
|
|
212
|
+
'weaviate.io',
|
|
213
|
+
'www.weaviate.io',
|
|
214
|
+
'pinecone.io',
|
|
215
|
+
'www.pinecone.io',
|
|
216
|
+
// API 平台
|
|
217
|
+
'ngrok.com',
|
|
218
|
+
'www.ngrok.com',
|
|
219
|
+
'requestly.io',
|
|
220
|
+
'www.requestly.io',
|
|
221
|
+
// MCP
|
|
222
|
+
'modelcontextprotocol.io',
|
|
223
|
+
'www.modelcontextprotocol.io',
|
|
224
|
+
'github.com/modelcontextprotocol',
|
|
225
|
+
]);
|
|
226
|
+
const PREAPPROVED_SUFFIXES = [
|
|
227
|
+
'.github.io',
|
|
228
|
+
'.readthedocs.io',
|
|
229
|
+
'.vercel.app',
|
|
230
|
+
'.vercel.dev',
|
|
231
|
+
'.netlify.app',
|
|
232
|
+
'.netlify.com',
|
|
233
|
+
'.cloudflare-pages.com',
|
|
234
|
+
'.pages.dev',
|
|
235
|
+
'.surge.sh',
|
|
236
|
+
'.herokuapp.com',
|
|
237
|
+
'.railway.app',
|
|
238
|
+
'.fly.dev',
|
|
239
|
+
'.repl.co',
|
|
240
|
+
'.workers.dev',
|
|
241
|
+
'.pages.plus',
|
|
242
|
+
'.coded.app',
|
|
243
|
+
'.preview.app',
|
|
244
|
+
'.staging.app',
|
|
245
|
+
];
|
|
246
|
+
export function isPreapprovedUrl(url) {
|
|
247
|
+
try {
|
|
248
|
+
const parsed = new URL(url);
|
|
249
|
+
return isPreapprovedHost(parsed.hostname);
|
|
250
|
+
}
|
|
251
|
+
catch {
|
|
252
|
+
return false;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
export function isPreapprovedHost(hostname) {
|
|
256
|
+
const normalized = hostname.toLowerCase();
|
|
257
|
+
if (PREAPPROVED_HOSTS.has(normalized)) {
|
|
258
|
+
return true;
|
|
259
|
+
}
|
|
260
|
+
for (const suffix of PREAPPROVED_SUFFIXES) {
|
|
261
|
+
if (normalized.endsWith(suffix)) {
|
|
262
|
+
return true;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
return false;
|
|
266
|
+
}
|
|
267
|
+
export const isPreapprovedDomain = isPreapprovedHost;
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ============================================================
|
|
3
|
+
* src/tools/webfetch.ts —— WebFetch 工具(获取 URL 内容)
|
|
4
|
+
* ------------------------------------------------------------
|
|
5
|
+
* 获取 URL → HTML 转 Markdown → 用 prompt 提取信息
|
|
6
|
+
*
|
|
7
|
+
* 输入:
|
|
8
|
+
* url 必填,要获取内容的 URL
|
|
9
|
+
* prompt 必填,对内容进行处理的指令
|
|
10
|
+
*
|
|
11
|
+
* 输出:
|
|
12
|
+
* { bytes, code, codeText, result, durationMs, url }
|
|
13
|
+
*
|
|
14
|
+
* 流程:
|
|
15
|
+
* 1. URL 验证(合法格式、2000字符以内、无凭据)
|
|
16
|
+
* 2. fetch(自动 http→https,超时 60s)
|
|
17
|
+
* 3. 检测跨 host 重定向,返回提示而非自动跟随
|
|
18
|
+
* 4. HTML → Markdown(turndown);非 HTML 原样返回
|
|
19
|
+
* 5. 输出截断到 30K chars(超长内容直接截断)
|
|
20
|
+
* ============================================================
|
|
21
|
+
*/
|
|
22
|
+
import { z } from 'zod';
|
|
23
|
+
import { DEFAULT_MAX_RESULT_SIZE_CHARS } from '../types.js';
|
|
24
|
+
import { toToolParameters } from '../../utils/zodToJson.js';
|
|
25
|
+
import { isPreapprovedDomain } from './preapproved.js';
|
|
26
|
+
// ---------------- 1. 常量 ----------------
|
|
27
|
+
const MAX_URL_LENGTH = 2000;
|
|
28
|
+
const FETCH_TIMEOUT_MS = 60_000;
|
|
29
|
+
const MAX_HTTP_CONTENT_LENGTH = 10 * 1024 * 1024;
|
|
30
|
+
const MAX_REDIRECTS = 10;
|
|
31
|
+
const URL_CACHE = new Map();
|
|
32
|
+
const CACHE_TTL_MS = 15 * 60 * 1000;
|
|
33
|
+
const MAX_CACHE_SIZE_BYTES = 50 * 1024 * 1024;
|
|
34
|
+
function cleanCache() {
|
|
35
|
+
const now = Date.now();
|
|
36
|
+
let totalSize = 0;
|
|
37
|
+
const entries = [];
|
|
38
|
+
for (const [url, entry] of URL_CACHE) {
|
|
39
|
+
if (now - entry.fetchedAt > CACHE_TTL_MS) {
|
|
40
|
+
URL_CACHE.delete(url);
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
const size = entry.bytes;
|
|
44
|
+
totalSize += size;
|
|
45
|
+
entries.push({ url, entry, size });
|
|
46
|
+
}
|
|
47
|
+
if (totalSize > MAX_CACHE_SIZE_BYTES) {
|
|
48
|
+
entries.sort((a, b) => a.entry.fetchedAt - b.entry.fetchedAt);
|
|
49
|
+
for (const { url, size } of entries) {
|
|
50
|
+
URL_CACHE.delete(url);
|
|
51
|
+
totalSize -= size;
|
|
52
|
+
if (totalSize <= MAX_CACHE_SIZE_BYTES * 0.8)
|
|
53
|
+
break;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
// ---------------- 3. Zod 输入 schema ----------------
|
|
58
|
+
const inputSchema = z.object({
|
|
59
|
+
url: z.string().describe('要获取内容的 URL'),
|
|
60
|
+
prompt: z.string().describe('对内容进行处理的指令,描述你想从页面提取什么信息'),
|
|
61
|
+
});
|
|
62
|
+
// ---------------- 4. JSON Schema(由 Zod 自动派生) ----------------
|
|
63
|
+
const parameters = toToolParameters(inputSchema);
|
|
64
|
+
// ---------------- 4. Description ----------------
|
|
65
|
+
const description = `- Fetches content from a specified URL and processes it using an AI model.
|
|
66
|
+
- Takes a URL and a prompt as input.
|
|
67
|
+
- Fetches the URL content, converts HTML to markdown.
|
|
68
|
+
- Processes the content with the prompt (e.g., extract summary, find specific info).
|
|
69
|
+
- Returns the processed result.
|
|
70
|
+
- HTTP URLs are automatically upgraded to HTTPS.
|
|
71
|
+
- When a URL redirects to a different host, returns a warning with the redirect URL.
|
|
72
|
+
- This tool is read-only and does not modify any files.
|
|
73
|
+
- Results may be summarized if the content is very large.
|
|
74
|
+
- ⚠️ IMPORTANT: This tool WILL FAIL for authenticated or private URLs.
|
|
75
|
+
Before using this tool, check if the URL points to an authenticated service
|
|
76
|
+
(e.g. Google Docs, Confluence, Jira, GitHub private repos). If so, look for
|
|
77
|
+
a specialized MCP tool that provides authenticated access.
|
|
78
|
+
- 💡 For GitHub URLs (repos, issues, PRs), prefer using the \`gh\` CLI via Bash
|
|
79
|
+
instead (e.g. \`gh pr view <pr-number>\`, \`gh issue view <number>\`, \`gh api <endpoint>\`).
|
|
80
|
+
- ⚡ If an MCP-provided web fetch tool is available, prefer using that tool instead,
|
|
81
|
+
as it may have fewer restrictions and better performance.
|
|
82
|
+
- 🔒 Domain preapproved list includes common documentation sites (MDN, TypeScript,
|
|
83
|
+
React, Vue, Angular, Node.js, Bun, Rust, Go, Python, etc.). Other domains
|
|
84
|
+
will work but results may be less reliable.
|
|
85
|
+
- 📝 This tool includes a self-cleaning 15-minute cache for faster repeated access
|
|
86
|
+
to the same URL.
|
|
87
|
+
- ⚠️ For PDF files, the tool will attempt to extract readable text but results may
|
|
88
|
+
be limited. Binary images cannot be processed.`;
|
|
89
|
+
// ---------------- 5. 辅助函数 ----------------
|
|
90
|
+
// ---------------- 6. call() 实现 ----------------
|
|
91
|
+
/** 验证 URL */
|
|
92
|
+
function validateURL(url) {
|
|
93
|
+
if (url.length > MAX_URL_LENGTH) {
|
|
94
|
+
return { ok: false, error: `URL 太长(最大 ${MAX_URL_LENGTH} 字符)` };
|
|
95
|
+
}
|
|
96
|
+
let parsed;
|
|
97
|
+
try {
|
|
98
|
+
parsed = new URL(url);
|
|
99
|
+
}
|
|
100
|
+
catch {
|
|
101
|
+
return { ok: false, error: `无效的 URL 格式` };
|
|
102
|
+
}
|
|
103
|
+
if (parsed.username || parsed.password) {
|
|
104
|
+
return { ok: false, error: `URL 不能包含用户名或密码` };
|
|
105
|
+
}
|
|
106
|
+
return { ok: true, parsed };
|
|
107
|
+
}
|
|
108
|
+
/** 获取 URL 内容(返回 HTML 字符串或重定向信息) */
|
|
109
|
+
async function fetchURL(url, signal, depth = 0) {
|
|
110
|
+
const validated = validateURL(url);
|
|
111
|
+
if (!validated.ok)
|
|
112
|
+
return { type: 'error', error: validated.error };
|
|
113
|
+
let targetUrl = url;
|
|
114
|
+
const parsed = validated.parsed;
|
|
115
|
+
if (parsed.protocol === 'http:') {
|
|
116
|
+
parsed.protocol = 'https:';
|
|
117
|
+
targetUrl = parsed.toString();
|
|
118
|
+
}
|
|
119
|
+
let response;
|
|
120
|
+
try {
|
|
121
|
+
const timeoutSignal = AbortSignal.timeout(FETCH_TIMEOUT_MS);
|
|
122
|
+
const controller = new AbortController();
|
|
123
|
+
const combinedSignal = signal
|
|
124
|
+
? AbortSignal.any([signal, timeoutSignal])
|
|
125
|
+
: timeoutSignal;
|
|
126
|
+
response = await fetch(targetUrl, {
|
|
127
|
+
signal: combinedSignal,
|
|
128
|
+
headers: {
|
|
129
|
+
Accept: 'text/markdown, text/html, */*',
|
|
130
|
+
'User-Agent': 'minimal-agent/1.0',
|
|
131
|
+
},
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
catch (e) {
|
|
135
|
+
if (signal?.aborted)
|
|
136
|
+
return { type: 'error', error: '请求被中断' };
|
|
137
|
+
return { type: 'error', error: `网络请求失败:${e.message}` };
|
|
138
|
+
}
|
|
139
|
+
const code = response.status;
|
|
140
|
+
const codeText = response.statusText;
|
|
141
|
+
const contentType = response.headers.get('content-type') ?? '';
|
|
142
|
+
const location = response.headers.get('location');
|
|
143
|
+
if (location && [301, 302, 307, 308].includes(code)) {
|
|
144
|
+
const redirectUrl = new URL(location, targetUrl).toString();
|
|
145
|
+
const originalHost = new URL(url).hostname;
|
|
146
|
+
const redirectHost = new URL(redirectUrl).hostname;
|
|
147
|
+
const stripWww = (h) => h.replace(/^www\./, '');
|
|
148
|
+
if (stripWww(originalHost) !== stripWww(redirectHost)) {
|
|
149
|
+
return {
|
|
150
|
+
type: 'redirect',
|
|
151
|
+
originalUrl: url,
|
|
152
|
+
redirectUrl,
|
|
153
|
+
statusCode: code,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
if (depth >= MAX_REDIRECTS) {
|
|
157
|
+
return { type: 'error', error: `重定向循环超过限制(最多 ${MAX_REDIRECTS} 次)` };
|
|
158
|
+
}
|
|
159
|
+
return fetchURL(redirectUrl, signal, depth + 1);
|
|
160
|
+
}
|
|
161
|
+
// 读取内容(限制 10MB)
|
|
162
|
+
let rawBuffer;
|
|
163
|
+
try {
|
|
164
|
+
rawBuffer = await response.arrayBuffer();
|
|
165
|
+
}
|
|
166
|
+
catch (e) {
|
|
167
|
+
return { type: 'error', error: `读取响应体失败:${e.message}` };
|
|
168
|
+
}
|
|
169
|
+
if (rawBuffer.byteLength > MAX_HTTP_CONTENT_LENGTH) {
|
|
170
|
+
return { type: 'error', error: `内容太大(${rawBuffer.byteLength} bytes,超过 ${MAX_HTTP_CONTENT_LENGTH})` };
|
|
171
|
+
}
|
|
172
|
+
// 解码
|
|
173
|
+
let content;
|
|
174
|
+
try {
|
|
175
|
+
const decoder = new TextDecoder('utf-8', { fatal: false });
|
|
176
|
+
content = decoder.decode(rawBuffer);
|
|
177
|
+
}
|
|
178
|
+
catch (e) {
|
|
179
|
+
return { type: 'error', error: `解码失败:${e.message}` };
|
|
180
|
+
}
|
|
181
|
+
return { type: 'success', content, bytes: rawBuffer.byteLength, code, codeText, contentType };
|
|
182
|
+
}
|
|
183
|
+
/** HTML 转 Markdown */
|
|
184
|
+
async function htmlToMarkdown(html) {
|
|
185
|
+
// 动态 import turndown(~1.4MB,按需加载)
|
|
186
|
+
const TurndownService = (await import('turndown')).default;
|
|
187
|
+
const td = new TurndownService();
|
|
188
|
+
return td.turndown(html);
|
|
189
|
+
}
|
|
190
|
+
// ---------------- 6. call() 实现 ----------------
|
|
191
|
+
async function call(input, signal) {
|
|
192
|
+
const { url } = input;
|
|
193
|
+
const start = Date.now();
|
|
194
|
+
const cacheKey = url;
|
|
195
|
+
let targetHostname;
|
|
196
|
+
try {
|
|
197
|
+
targetHostname = new URL(url).hostname;
|
|
198
|
+
}
|
|
199
|
+
catch {
|
|
200
|
+
targetHostname = '';
|
|
201
|
+
}
|
|
202
|
+
const preapproved = isPreapprovedDomain(targetHostname);
|
|
203
|
+
// 检查缓存
|
|
204
|
+
const cached = URL_CACHE.get(cacheKey);
|
|
205
|
+
if (cached && Date.now() - cached.fetchedAt <= CACHE_TTL_MS) {
|
|
206
|
+
let content = cached.content;
|
|
207
|
+
const contentType = cached.contentType;
|
|
208
|
+
if (contentType.includes('text/html')) {
|
|
209
|
+
try {
|
|
210
|
+
content = await htmlToMarkdown(content);
|
|
211
|
+
}
|
|
212
|
+
catch (e) {
|
|
213
|
+
console.warn(`turndown 失败: ${e.message}`);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
const formattedSize = cached.bytes < 1024
|
|
217
|
+
? `${cached.bytes} B`
|
|
218
|
+
: cached.bytes < 1024 * 1024
|
|
219
|
+
? `${(cached.bytes / 1024).toFixed(1)} KB`
|
|
220
|
+
: `${(cached.bytes / 1024 / 1024).toFixed(1)} MB`;
|
|
221
|
+
const domainNote = preapproved
|
|
222
|
+
? '[🔒 预批准域名,内容可信]'
|
|
223
|
+
: '[⚠️ 非预批准域名,内容可能不准确]';
|
|
224
|
+
const output = `【WebFetch 结果】
|
|
225
|
+
URL: ${url}
|
|
226
|
+
状态: ${cached.code} ${cached.codeText}
|
|
227
|
+
大小: ${formattedSize}
|
|
228
|
+
耗时: 0.00s (缓存命中)
|
|
229
|
+
${domainNote}
|
|
230
|
+
[📦 来自缓存(15分钟 TTL)]
|
|
231
|
+
|
|
232
|
+
--- 内容 ---
|
|
233
|
+
${content}`;
|
|
234
|
+
let final = output;
|
|
235
|
+
if (final.length > DEFAULT_MAX_RESULT_SIZE_CHARS) {
|
|
236
|
+
final = final.slice(0, DEFAULT_MAX_RESULT_SIZE_CHARS) + `\n\n... (输出超过 ${DEFAULT_MAX_RESULT_SIZE_CHARS} 字符,已截断)`;
|
|
237
|
+
}
|
|
238
|
+
return { ok: true, content: final };
|
|
239
|
+
}
|
|
240
|
+
// 1. fetch
|
|
241
|
+
const fetched = await fetchURL(url, signal ?? new AbortController().signal);
|
|
242
|
+
const durationMs = Date.now() - start;
|
|
243
|
+
if (fetched.type === 'redirect') {
|
|
244
|
+
const statusText = fetched.statusCode === 301 ? 'Moved Permanently' :
|
|
245
|
+
fetched.statusCode === 308 ? 'Permanent Redirect' :
|
|
246
|
+
fetched.statusCode === 307 ? 'Temporary Redirect' : 'Found';
|
|
247
|
+
return {
|
|
248
|
+
ok: true,
|
|
249
|
+
content: `【重定向检测】
|
|
250
|
+
|
|
251
|
+
原始 URL: ${fetched.originalUrl}
|
|
252
|
+
重定向到: ${fetched.redirectUrl}
|
|
253
|
+
状态: ${fetched.statusCode} ${statusText}
|
|
254
|
+
|
|
255
|
+
请使用重定向后的 URL 再次调用 WebFetch 工具。`,
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
if (fetched.type === 'error') {
|
|
259
|
+
return { ok: false, error: fetched.error };
|
|
260
|
+
}
|
|
261
|
+
let { content, bytes, code, codeText, contentType } = fetched;
|
|
262
|
+
// 写入缓存
|
|
263
|
+
URL_CACHE.set(cacheKey, {
|
|
264
|
+
bytes,
|
|
265
|
+
code,
|
|
266
|
+
codeText,
|
|
267
|
+
content,
|
|
268
|
+
contentType,
|
|
269
|
+
fetchedAt: Date.now(),
|
|
270
|
+
});
|
|
271
|
+
cleanCache();
|
|
272
|
+
// 2. HTML → Markdown
|
|
273
|
+
if (contentType.includes('text/html')) {
|
|
274
|
+
try {
|
|
275
|
+
content = await htmlToMarkdown(content);
|
|
276
|
+
}
|
|
277
|
+
catch (e) {
|
|
278
|
+
console.warn(`turndown 失败: ${e.message}`);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
// 3. 内容直接返回(超长内容在这里直接截断)
|
|
282
|
+
// 4. 追加 metadata
|
|
283
|
+
const formattedSize = bytes < 1024
|
|
284
|
+
? `${bytes} B`
|
|
285
|
+
: bytes < 1024 * 1024
|
|
286
|
+
? `${(bytes / 1024).toFixed(1)} KB`
|
|
287
|
+
: `${(bytes / 1024 / 1024).toFixed(1)} MB`;
|
|
288
|
+
const domainNote = preapproved
|
|
289
|
+
? '[🔒 预批准域名,内容可信]'
|
|
290
|
+
: '[⚠️ 非预批准域名,内容可能不准确]';
|
|
291
|
+
const output = `【WebFetch 结果】
|
|
292
|
+
URL: ${url}
|
|
293
|
+
状态: ${code} ${codeText}
|
|
294
|
+
大小: ${formattedSize}
|
|
295
|
+
耗时: ${(durationMs / 1000).toFixed(2)}s
|
|
296
|
+
${domainNote}
|
|
297
|
+
|
|
298
|
+
--- 内容 ---
|
|
299
|
+
${content}`;
|
|
300
|
+
// 5. 截断
|
|
301
|
+
let final = output;
|
|
302
|
+
if (final.length > DEFAULT_MAX_RESULT_SIZE_CHARS) {
|
|
303
|
+
final = final.slice(0, DEFAULT_MAX_RESULT_SIZE_CHARS) + `\n\n... (输出超过 ${DEFAULT_MAX_RESULT_SIZE_CHARS} 字符,已截断)`;
|
|
304
|
+
}
|
|
305
|
+
return { ok: true, content: final };
|
|
306
|
+
}
|
|
307
|
+
// ---------------- 7. 导出 ----------------
|
|
308
|
+
export const webfetchTool = {
|
|
309
|
+
name: 'WebFetch',
|
|
310
|
+
description,
|
|
311
|
+
inputSchema,
|
|
312
|
+
parameters,
|
|
313
|
+
isReadOnly: true,
|
|
314
|
+
isConcurrencySafe: true,
|
|
315
|
+
maxResultSizeChars: DEFAULT_MAX_RESULT_SIZE_CHARS,
|
|
316
|
+
call,
|
|
317
|
+
};
|