minimal-agent 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/README.md +50 -72
  2. package/package.json +18 -13
  3. package/plugins/ralph-wiggum/plugin.js +205 -0
  4. package/plugins/ralph-wiggum/src/goalState.js +260 -0
  5. package/plugins/ralph-wiggum/src/{sentinels.ts → sentinels.js} +4 -7
  6. package/plugins/ralph-wiggum/src/stopHookRunner.js +104 -0
  7. package/plugins/ralph-wiggum/src/verificationGate.js +202 -0
  8. package/plugins/workflow-runner/{plugin.ts → plugin.js} +20 -26
  9. package/plugins/workflow-runner/src/expressions.js +369 -0
  10. package/plugins/workflow-runner/src/index.js +174 -0
  11. package/plugins/workflow-runner/src/loader.js +183 -0
  12. package/plugins/workflow-runner/src/runner.js +290 -0
  13. package/plugins/workflow-runner/src/stepExecutors/assert.js +28 -0
  14. package/plugins/workflow-runner/src/stepExecutors/llm.js +44 -0
  15. package/plugins/workflow-runner/src/stepExecutors/skill.js +103 -0
  16. package/plugins/workflow-runner/src/stepExecutors/{tool.ts → tool.js} +19 -25
  17. package/plugins/workflow-runner/src/types.js +59 -0
  18. package/plugins/workflow-runner/src/{workflowState.ts → workflowState.js} +21 -40
  19. package/src/bootstrap/cwdArg.js +22 -0
  20. package/src/bootstrap/workingDir.js +31 -0
  21. package/src/cli/configWizard.js +272 -0
  22. package/src/cli/print.js +192 -0
  23. package/src/config/configFile.js +78 -0
  24. package/src/config.js +118 -0
  25. package/src/context/compact.js +357 -0
  26. package/src/context/microCompactLite.js +151 -0
  27. package/src/context/persistContext.js +109 -0
  28. package/src/context/reactiveCompact.js +121 -0
  29. package/src/context/sessionPath.js +58 -0
  30. package/src/context/snipCompact.js +112 -0
  31. package/src/context/tokenCounter.js +66 -0
  32. package/src/llm/client.js +182 -0
  33. package/src/loop.js +230 -0
  34. package/src/main.js +116 -0
  35. package/src/plugin-sdk.js +24 -0
  36. package/src/plugins/commandRouter.js +169 -0
  37. package/src/plugins/hookEngine.js +258 -0
  38. package/src/plugins/pluginApi.js +23 -0
  39. package/src/plugins/pluginLoader.js +71 -0
  40. package/src/plugins/pluginRunner.js +65 -0
  41. package/src/plugins/transcript.js +171 -0
  42. package/src/prompts/projectInstructions.js +48 -0
  43. package/src/prompts/skillList.js +126 -0
  44. package/src/prompts/system.js +155 -0
  45. package/src/session/runTurn.js +41 -0
  46. package/src/session/sessionState.js +19 -0
  47. package/src/tools/bash/bash.js +352 -0
  48. package/src/tools/bash/semantics.js +85 -0
  49. package/src/tools/bash/warnings.js +98 -0
  50. package/src/tools/edit/edit.js +253 -0
  51. package/src/tools/edit/multi-edit.js +155 -0
  52. package/src/tools/glob/glob.js +97 -0
  53. package/src/tools/grep/grep.js +185 -0
  54. package/src/tools/grep/rgPath.js +173 -0
  55. package/src/tools/index.js +94 -0
  56. package/src/tools/read/read.js +209 -0
  57. package/src/tools/shared/fileState.js +61 -0
  58. package/src/tools/shared/fileUtils.js +281 -0
  59. package/src/tools/shared/schemas.js +16 -0
  60. package/src/tools/types.js +21 -0
  61. package/src/tools/webbrowser/browser.js +55 -0
  62. package/src/tools/webbrowser/webbrowser.js +194 -0
  63. package/src/tools/webfetch/preapproved.js +267 -0
  64. package/src/tools/webfetch/webfetch.js +317 -0
  65. package/src/tools/websearch/websearch.js +161 -0
  66. package/src/tools/write/write.js +125 -0
  67. package/src/types/turndown.d.ts +23 -0
  68. package/src/types.js +16 -0
  69. package/src/ui/App.js +37 -0
  70. package/src/ui/InputBox.js +240 -0
  71. package/src/ui/MessageList.js +28 -0
  72. package/src/ui/Root.js +70 -0
  73. package/src/ui/StatusLine.js +41 -0
  74. package/src/ui/ToolStatus.js +11 -0
  75. package/src/ui/hooks/useChat.js +234 -0
  76. package/src/ui/hooks/usePasteHandler.js +137 -0
  77. package/src/ui/hooks/useTextBuffer.js +55 -0
  78. package/src/ui/hooks/useTokenUsage.js +30 -0
  79. package/src/ui/textBuffer.js +217 -0
  80. package/src/utils/packageRoot.js +37 -0
  81. package/src/utils/resourcePaths.js +49 -0
  82. package/src/utils/zodToJson.js +29 -0
  83. package/dist/main.js +0 -5315
  84. package/plugins/ralph-wiggum/plugin.ts +0 -275
  85. package/plugins/ralph-wiggum/scripts/setup-ralph-loop.sh +0 -203
  86. package/plugins/ralph-wiggum/src/goalState.ts +0 -310
  87. package/plugins/ralph-wiggum/src/stopHookRunner.ts +0 -136
  88. package/plugins/ralph-wiggum/src/verificationGate.ts +0 -252
  89. package/plugins/ralph-wiggum/test/goalState.test.ts +0 -410
  90. package/plugins/ralph-wiggum/test/verificationGate.test.ts +0 -122
  91. package/plugins/workflow-runner/src/expressions.ts +0 -371
  92. package/plugins/workflow-runner/src/index.ts +0 -194
  93. package/plugins/workflow-runner/src/loader.ts +0 -193
  94. package/plugins/workflow-runner/src/runner.ts +0 -313
  95. package/plugins/workflow-runner/src/stepExecutors/assert.ts +0 -30
  96. package/plugins/workflow-runner/src/stepExecutors/llm.ts +0 -54
  97. package/plugins/workflow-runner/src/stepExecutors/skill.ts +0 -115
  98. package/plugins/workflow-runner/src/types.ts +0 -183
  99. package/plugins/workflow-runner/test/cli.e2e.test.ts +0 -114
  100. package/plugins/workflow-runner/test/e2e.test.ts +0 -268
  101. package/plugins/workflow-runner/test/expressions.test.ts +0 -140
  102. package/plugins/workflow-runner/test/fixtures/cli-e2e.yaml +0 -27
  103. package/plugins/workflow-runner/test/fixtures/hello-workflow.yaml +0 -49
  104. package/plugins/workflow-runner/test/graceful.test.ts +0 -139
  105. package/plugins/workflow-runner/test/loader.test.ts +0 -216
  106. package/plugins/workflow-runner/test/pluginRunner.isolation.test.ts +0 -230
  107. package/plugins/workflow-runner/test/runner.test.ts +0 -511
@@ -0,0 +1,267 @@
1
+ /**
2
+ * ============================================================
3
+ * src/tools/webfetch-preapproved.ts —— WebFetch 预批准域名白名单
4
+ * ------------------------------------------------------------
5
+ * 维护一份可信任域名列表,用于过滤潜在风险 URL
6
+ * ============================================================
7
+ */
8
+ const PREAPPROVED_HOSTS = new Set([
9
+ // GitHub
10
+ 'github.com',
11
+ 'gist.github.com',
12
+ // 开发文档
13
+ 'docs.github.com',
14
+ 'developer.github.com',
15
+ 'help.github.com',
16
+ // NPM
17
+ 'npmjs.com',
18
+ 'www.npmjs.com',
19
+ // 包注册表
20
+ 'pypi.org',
21
+ 'www.pypi.org',
22
+ 'crates.io',
23
+ 'pub.dev',
24
+ 'packagist.org',
25
+ 'rubygems.org',
26
+ // 开发平台
27
+ 'stackoverflow.com',
28
+ 'www.stackoverflow.com',
29
+ 'serverfault.com',
30
+ 'superuser.com',
31
+ 'askubuntu.com',
32
+ // 文档与 wiki
33
+ 'readthedocs.io',
34
+ 'www.readthedocs.io',
35
+ 'readthedocs.org',
36
+ 'wiki.python.org',
37
+ 'en.wikipedia.org',
38
+ 'zh.wikipedia.org',
39
+ // 官方文档
40
+ 'nodejs.org',
41
+ 'www.nodejs.org',
42
+ 'deno.land',
43
+ 'www.deno.land',
44
+ 'bun.sh',
45
+ 'www.bun.sh',
46
+ // Rust
47
+ 'doc.rust-lang.org',
48
+ 'www.rust-lang.org',
49
+ 'rust-lang.org',
50
+ // 云平台
51
+ 'aws.amazon.com',
52
+ 'docs.aws.amazon.com',
53
+ 'cloud.google.com',
54
+ 'docs.microsoft.com',
55
+ 'azure.microsoft.com',
56
+ 'developer.microsoft.com',
57
+ // AI/LLM
58
+ 'openai.com',
59
+ 'platform.openai.com',
60
+ 'docs.anthropic.com',
61
+ 'anthropic.com',
62
+ 'claude.ai',
63
+ 'docs.cohere.com',
64
+ 'cohere.com',
65
+ // AI 模型文档
66
+ 'ai.google.dev',
67
+ 'ai.google.com',
68
+ 'developers.google.com',
69
+ 'learn.deepmind.com',
70
+ // AI 开发框架
71
+ 'python.langchain.com',
72
+ 'js.langchain.com',
73
+ 'docs.litellm.ai',
74
+ 'litellm.ai',
75
+ // 前端框架
76
+ 'react.dev',
77
+ 'reactjs.org',
78
+ 'www.reactjs.org',
79
+ 'nextjs.org',
80
+ 'www.nextjs.org',
81
+ 'vuejs.org',
82
+ 'www.vuejs.org',
83
+ 'svelte.dev',
84
+ 'svelte.org',
85
+ 'angular.io',
86
+ 'www.angular.io',
87
+ // 构建工具
88
+ 'vitejs.dev',
89
+ 'vite.dev',
90
+ 'webpack.js.org',
91
+ 'esbuild.github.io',
92
+ 'rollupjs.org',
93
+ 'esbuild.github.io',
94
+ // CSS
95
+ 'tailwindcss.com',
96
+ 'www.tailwindcss.com',
97
+ 'postcss.org',
98
+ // 数据库
99
+ 'redis.io',
100
+ 'www.redis.io',
101
+ 'postgresql.org',
102
+ 'www.postgresql.org',
103
+ 'www.mysql.com',
104
+ 'dev.mysql.com',
105
+ 'docs.mongodb.com',
106
+ 'www.mongodb.com',
107
+ 'sqlite.org',
108
+ 'www.sqlite.org',
109
+ // 工具类
110
+ 'regex101.com',
111
+ 'ihateregex.io',
112
+ 'explainshell.com',
113
+ 'tldr.sh',
114
+ // 代码分享
115
+ 'replit.com',
116
+ 'www.replit.com',
117
+ 'codesandbox.io',
118
+ 'www.codesandbox.io',
119
+ 'codepen.io',
120
+ 'www.codepen.io',
121
+ 'jsfiddle.net',
122
+ 'www.jsfiddle.net',
123
+ // CI/CD
124
+ 'docs.github.com/en/actions',
125
+ 'circleci.com',
126
+ 'docs.circleci.com',
127
+ 'travis-ci.org',
128
+ 'www.travis-ci.com',
129
+ 'jenkins.io',
130
+ 'www.jenkins.io',
131
+ // 容器/云原生
132
+ 'kubernetes.io',
133
+ 'www.kubernetes.io',
134
+ 'docker.com',
135
+ 'www.docker.com',
136
+ 'docs.docker.com',
137
+ // 测试
138
+ 'jestjs.io',
139
+ 'www.jestjs.io',
140
+ 'vitest.dev',
141
+ 'testing-library.com',
142
+ 'www.testing-library.com',
143
+ 'playwright.dev',
144
+ 'www.playwright.dev',
145
+ 'webdriver.io',
146
+ 'webdriver.io',
147
+ // API 文档
148
+ 'httpbin.org',
149
+ 'restfulapi.net',
150
+ 'swagger.io',
151
+ 'www.swagger.io',
152
+ 'openapi.net',
153
+ // 安全
154
+ 'owasp.org',
155
+ 'www.owasp.org',
156
+ 'cve.mitre.org',
157
+ // 证书
158
+ 'letsencrypt.org',
159
+ 'www.letsencrypt.org',
160
+ 'acmev2.pki.duckdns.org',
161
+ // 博客与技术文章
162
+ 'medium.com',
163
+ 'www.medium.com',
164
+ 'dev.to',
165
+ 'www.dev.to',
166
+ 'hashnode.com',
167
+ 'www.hashnode.com',
168
+ 'devblogs.microsoft.com',
169
+ // 浏览器
170
+ 'caniuse.com',
171
+ 'developer.mozilla.org',
172
+ 'web.dev',
173
+ 'www.w3.org',
174
+ // 开源项目
175
+ 'apache.org',
176
+ 'www.apache.org',
177
+ 'gnu.org',
178
+ 'www.gnu.org',
179
+ 'fsf.org',
180
+ 'www.fsf.org',
181
+ 'opensource.org',
182
+ 'www.opensource.org',
183
+ // 技术社区
184
+ 'reddit.com',
185
+ 'www.reddit.com',
186
+ 'news.ycombinator.com',
187
+ 'lobste.rs',
188
+ // 文件格式
189
+ 'json.org',
190
+ 'yaml.org',
191
+ 'www.yaml.org',
192
+ 'toml.io',
193
+ 'www.toml.io',
194
+ // 版本控制
195
+ 'git-scm.com',
196
+ 'www.git-scm.com',
197
+ 'github.blog',
198
+ 'githubstatus.com',
199
+ // AI 搜索
200
+ 'tavily.com',
201
+ 'www.tavily.com',
202
+ 'perplexity.ai',
203
+ 'www.perplexity.ai',
204
+ // AI 图片
205
+ 'midjourney.com',
206
+ 'www.midjourney.com',
207
+ 'stability.ai',
208
+ 'www.stability.ai',
209
+ // Embeddings / 向量
210
+ 'qdrant.tech',
211
+ 'www.qdrant.tech',
212
+ 'weaviate.io',
213
+ 'www.weaviate.io',
214
+ 'pinecone.io',
215
+ 'www.pinecone.io',
216
+ // API 平台
217
+ 'ngrok.com',
218
+ 'www.ngrok.com',
219
+ 'requestly.io',
220
+ 'www.requestly.io',
221
+ // MCP
222
+ 'modelcontextprotocol.io',
223
+ 'www.modelcontextprotocol.io',
224
+ 'github.com/modelcontextprotocol',
225
+ ]);
226
+ const PREAPPROVED_SUFFIXES = [
227
+ '.github.io',
228
+ '.readthedocs.io',
229
+ '.vercel.app',
230
+ '.vercel.dev',
231
+ '.netlify.app',
232
+ '.netlify.com',
233
+ '.cloudflare-pages.com',
234
+ '.pages.dev',
235
+ '.surge.sh',
236
+ '.herokuapp.com',
237
+ '.railway.app',
238
+ '.fly.dev',
239
+ '.repl.co',
240
+ '.workers.dev',
241
+ '.pages.plus',
242
+ '.coded.app',
243
+ '.preview.app',
244
+ '.staging.app',
245
+ ];
246
+ export function isPreapprovedUrl(url) {
247
+ try {
248
+ const parsed = new URL(url);
249
+ return isPreapprovedHost(parsed.hostname);
250
+ }
251
+ catch {
252
+ return false;
253
+ }
254
+ }
255
+ export function isPreapprovedHost(hostname) {
256
+ const normalized = hostname.toLowerCase();
257
+ if (PREAPPROVED_HOSTS.has(normalized)) {
258
+ return true;
259
+ }
260
+ for (const suffix of PREAPPROVED_SUFFIXES) {
261
+ if (normalized.endsWith(suffix)) {
262
+ return true;
263
+ }
264
+ }
265
+ return false;
266
+ }
267
+ export const isPreapprovedDomain = isPreapprovedHost;
@@ -0,0 +1,317 @@
1
+ /**
2
+ * ============================================================
3
+ * src/tools/webfetch.ts —— WebFetch 工具(获取 URL 内容)
4
+ * ------------------------------------------------------------
5
+ * 获取 URL → HTML 转 Markdown → 用 prompt 提取信息
6
+ *
7
+ * 输入:
8
+ * url 必填,要获取内容的 URL
9
+ * prompt 必填,对内容进行处理的指令
10
+ *
11
+ * 输出:
12
+ * { bytes, code, codeText, result, durationMs, url }
13
+ *
14
+ * 流程:
15
+ * 1. URL 验证(合法格式、2000字符以内、无凭据)
16
+ * 2. fetch(自动 http→https,超时 60s)
17
+ * 3. 检测跨 host 重定向,返回提示而非自动跟随
18
+ * 4. HTML → Markdown(turndown);非 HTML 原样返回
19
+ * 5. 输出截断到 30K chars(超长内容直接截断)
20
+ * ============================================================
21
+ */
22
+ import { z } from 'zod';
23
+ import { DEFAULT_MAX_RESULT_SIZE_CHARS } from '../types.js';
24
+ import { toToolParameters } from '../../utils/zodToJson.js';
25
+ import { isPreapprovedDomain } from './preapproved.js';
26
+ // ---------------- 1. 常量 ----------------
27
+ const MAX_URL_LENGTH = 2000;
28
+ const FETCH_TIMEOUT_MS = 60_000;
29
+ const MAX_HTTP_CONTENT_LENGTH = 10 * 1024 * 1024;
30
+ const MAX_REDIRECTS = 10;
31
+ const URL_CACHE = new Map();
32
+ const CACHE_TTL_MS = 15 * 60 * 1000;
33
+ const MAX_CACHE_SIZE_BYTES = 50 * 1024 * 1024;
34
+ function cleanCache() {
35
+ const now = Date.now();
36
+ let totalSize = 0;
37
+ const entries = [];
38
+ for (const [url, entry] of URL_CACHE) {
39
+ if (now - entry.fetchedAt > CACHE_TTL_MS) {
40
+ URL_CACHE.delete(url);
41
+ continue;
42
+ }
43
+ const size = entry.bytes;
44
+ totalSize += size;
45
+ entries.push({ url, entry, size });
46
+ }
47
+ if (totalSize > MAX_CACHE_SIZE_BYTES) {
48
+ entries.sort((a, b) => a.entry.fetchedAt - b.entry.fetchedAt);
49
+ for (const { url, size } of entries) {
50
+ URL_CACHE.delete(url);
51
+ totalSize -= size;
52
+ if (totalSize <= MAX_CACHE_SIZE_BYTES * 0.8)
53
+ break;
54
+ }
55
+ }
56
+ }
57
+ // ---------------- 3. Zod 输入 schema ----------------
58
+ const inputSchema = z.object({
59
+ url: z.string().describe('要获取内容的 URL'),
60
+ prompt: z.string().describe('对内容进行处理的指令,描述你想从页面提取什么信息'),
61
+ });
62
+ // ---------------- 4. JSON Schema(由 Zod 自动派生) ----------------
63
+ const parameters = toToolParameters(inputSchema);
64
+ // ---------------- 4. Description ----------------
65
+ const description = `- Fetches content from a specified URL and processes it using an AI model.
66
+ - Takes a URL and a prompt as input.
67
+ - Fetches the URL content, converts HTML to markdown.
68
+ - Processes the content with the prompt (e.g., extract summary, find specific info).
69
+ - Returns the processed result.
70
+ - HTTP URLs are automatically upgraded to HTTPS.
71
+ - When a URL redirects to a different host, returns a warning with the redirect URL.
72
+ - This tool is read-only and does not modify any files.
73
+ - Results may be summarized if the content is very large.
74
+ - ⚠️ IMPORTANT: This tool WILL FAIL for authenticated or private URLs.
75
+ Before using this tool, check if the URL points to an authenticated service
76
+ (e.g. Google Docs, Confluence, Jira, GitHub private repos). If so, look for
77
+ a specialized MCP tool that provides authenticated access.
78
+ - 💡 For GitHub URLs (repos, issues, PRs), prefer using the \`gh\` CLI via Bash
79
+ instead (e.g. \`gh pr view <pr-number>\`, \`gh issue view <number>\`, \`gh api <endpoint>\`).
80
+ - ⚡ If an MCP-provided web fetch tool is available, prefer using that tool instead,
81
+ as it may have fewer restrictions and better performance.
82
+ - 🔒 Domain preapproved list includes common documentation sites (MDN, TypeScript,
83
+ React, Vue, Angular, Node.js, Bun, Rust, Go, Python, etc.). Other domains
84
+ will work but results may be less reliable.
85
+ - 📝 This tool includes a self-cleaning 15-minute cache for faster repeated access
86
+ to the same URL.
87
+ - ⚠️ For PDF files, the tool will attempt to extract readable text but results may
88
+ be limited. Binary images cannot be processed.`;
89
+ // ---------------- 5. 辅助函数 ----------------
90
+ // ---------------- 6. call() 实现 ----------------
91
+ /** 验证 URL */
92
+ function validateURL(url) {
93
+ if (url.length > MAX_URL_LENGTH) {
94
+ return { ok: false, error: `URL 太长(最大 ${MAX_URL_LENGTH} 字符)` };
95
+ }
96
+ let parsed;
97
+ try {
98
+ parsed = new URL(url);
99
+ }
100
+ catch {
101
+ return { ok: false, error: `无效的 URL 格式` };
102
+ }
103
+ if (parsed.username || parsed.password) {
104
+ return { ok: false, error: `URL 不能包含用户名或密码` };
105
+ }
106
+ return { ok: true, parsed };
107
+ }
108
+ /** 获取 URL 内容(返回 HTML 字符串或重定向信息) */
109
+ async function fetchURL(url, signal, depth = 0) {
110
+ const validated = validateURL(url);
111
+ if (!validated.ok)
112
+ return { type: 'error', error: validated.error };
113
+ let targetUrl = url;
114
+ const parsed = validated.parsed;
115
+ if (parsed.protocol === 'http:') {
116
+ parsed.protocol = 'https:';
117
+ targetUrl = parsed.toString();
118
+ }
119
+ let response;
120
+ try {
121
+ const timeoutSignal = AbortSignal.timeout(FETCH_TIMEOUT_MS);
122
+ const controller = new AbortController();
123
+ const combinedSignal = signal
124
+ ? AbortSignal.any([signal, timeoutSignal])
125
+ : timeoutSignal;
126
+ response = await fetch(targetUrl, {
127
+ signal: combinedSignal,
128
+ headers: {
129
+ Accept: 'text/markdown, text/html, */*',
130
+ 'User-Agent': 'minimal-agent/1.0',
131
+ },
132
+ });
133
+ }
134
+ catch (e) {
135
+ if (signal?.aborted)
136
+ return { type: 'error', error: '请求被中断' };
137
+ return { type: 'error', error: `网络请求失败:${e.message}` };
138
+ }
139
+ const code = response.status;
140
+ const codeText = response.statusText;
141
+ const contentType = response.headers.get('content-type') ?? '';
142
+ const location = response.headers.get('location');
143
+ if (location && [301, 302, 307, 308].includes(code)) {
144
+ const redirectUrl = new URL(location, targetUrl).toString();
145
+ const originalHost = new URL(url).hostname;
146
+ const redirectHost = new URL(redirectUrl).hostname;
147
+ const stripWww = (h) => h.replace(/^www\./, '');
148
+ if (stripWww(originalHost) !== stripWww(redirectHost)) {
149
+ return {
150
+ type: 'redirect',
151
+ originalUrl: url,
152
+ redirectUrl,
153
+ statusCode: code,
154
+ };
155
+ }
156
+ if (depth >= MAX_REDIRECTS) {
157
+ return { type: 'error', error: `重定向循环超过限制(最多 ${MAX_REDIRECTS} 次)` };
158
+ }
159
+ return fetchURL(redirectUrl, signal, depth + 1);
160
+ }
161
+ // 读取内容(限制 10MB)
162
+ let rawBuffer;
163
+ try {
164
+ rawBuffer = await response.arrayBuffer();
165
+ }
166
+ catch (e) {
167
+ return { type: 'error', error: `读取响应体失败:${e.message}` };
168
+ }
169
+ if (rawBuffer.byteLength > MAX_HTTP_CONTENT_LENGTH) {
170
+ return { type: 'error', error: `内容太大(${rawBuffer.byteLength} bytes,超过 ${MAX_HTTP_CONTENT_LENGTH})` };
171
+ }
172
+ // 解码
173
+ let content;
174
+ try {
175
+ const decoder = new TextDecoder('utf-8', { fatal: false });
176
+ content = decoder.decode(rawBuffer);
177
+ }
178
+ catch (e) {
179
+ return { type: 'error', error: `解码失败:${e.message}` };
180
+ }
181
+ return { type: 'success', content, bytes: rawBuffer.byteLength, code, codeText, contentType };
182
+ }
183
+ /** HTML 转 Markdown */
184
+ async function htmlToMarkdown(html) {
185
+ // 动态 import turndown(~1.4MB,按需加载)
186
+ const TurndownService = (await import('turndown')).default;
187
+ const td = new TurndownService();
188
+ return td.turndown(html);
189
+ }
190
+ // ---------------- 6. call() 实现 ----------------
191
+ async function call(input, signal) {
192
+ const { url } = input;
193
+ const start = Date.now();
194
+ const cacheKey = url;
195
+ let targetHostname;
196
+ try {
197
+ targetHostname = new URL(url).hostname;
198
+ }
199
+ catch {
200
+ targetHostname = '';
201
+ }
202
+ const preapproved = isPreapprovedDomain(targetHostname);
203
+ // 检查缓存
204
+ const cached = URL_CACHE.get(cacheKey);
205
+ if (cached && Date.now() - cached.fetchedAt <= CACHE_TTL_MS) {
206
+ let content = cached.content;
207
+ const contentType = cached.contentType;
208
+ if (contentType.includes('text/html')) {
209
+ try {
210
+ content = await htmlToMarkdown(content);
211
+ }
212
+ catch (e) {
213
+ console.warn(`turndown 失败: ${e.message}`);
214
+ }
215
+ }
216
+ const formattedSize = cached.bytes < 1024
217
+ ? `${cached.bytes} B`
218
+ : cached.bytes < 1024 * 1024
219
+ ? `${(cached.bytes / 1024).toFixed(1)} KB`
220
+ : `${(cached.bytes / 1024 / 1024).toFixed(1)} MB`;
221
+ const domainNote = preapproved
222
+ ? '[🔒 预批准域名,内容可信]'
223
+ : '[⚠️ 非预批准域名,内容可能不准确]';
224
+ const output = `【WebFetch 结果】
225
+ URL: ${url}
226
+ 状态: ${cached.code} ${cached.codeText}
227
+ 大小: ${formattedSize}
228
+ 耗时: 0.00s (缓存命中)
229
+ ${domainNote}
230
+ [📦 来自缓存(15分钟 TTL)]
231
+
232
+ --- 内容 ---
233
+ ${content}`;
234
+ let final = output;
235
+ if (final.length > DEFAULT_MAX_RESULT_SIZE_CHARS) {
236
+ final = final.slice(0, DEFAULT_MAX_RESULT_SIZE_CHARS) + `\n\n... (输出超过 ${DEFAULT_MAX_RESULT_SIZE_CHARS} 字符,已截断)`;
237
+ }
238
+ return { ok: true, content: final };
239
+ }
240
+ // 1. fetch
241
+ const fetched = await fetchURL(url, signal ?? new AbortController().signal);
242
+ const durationMs = Date.now() - start;
243
+ if (fetched.type === 'redirect') {
244
+ const statusText = fetched.statusCode === 301 ? 'Moved Permanently' :
245
+ fetched.statusCode === 308 ? 'Permanent Redirect' :
246
+ fetched.statusCode === 307 ? 'Temporary Redirect' : 'Found';
247
+ return {
248
+ ok: true,
249
+ content: `【重定向检测】
250
+
251
+ 原始 URL: ${fetched.originalUrl}
252
+ 重定向到: ${fetched.redirectUrl}
253
+ 状态: ${fetched.statusCode} ${statusText}
254
+
255
+ 请使用重定向后的 URL 再次调用 WebFetch 工具。`,
256
+ };
257
+ }
258
+ if (fetched.type === 'error') {
259
+ return { ok: false, error: fetched.error };
260
+ }
261
+ let { content, bytes, code, codeText, contentType } = fetched;
262
+ // 写入缓存
263
+ URL_CACHE.set(cacheKey, {
264
+ bytes,
265
+ code,
266
+ codeText,
267
+ content,
268
+ contentType,
269
+ fetchedAt: Date.now(),
270
+ });
271
+ cleanCache();
272
+ // 2. HTML → Markdown
273
+ if (contentType.includes('text/html')) {
274
+ try {
275
+ content = await htmlToMarkdown(content);
276
+ }
277
+ catch (e) {
278
+ console.warn(`turndown 失败: ${e.message}`);
279
+ }
280
+ }
281
+ // 3. 内容直接返回(超长内容在这里直接截断)
282
+ // 4. 追加 metadata
283
+ const formattedSize = bytes < 1024
284
+ ? `${bytes} B`
285
+ : bytes < 1024 * 1024
286
+ ? `${(bytes / 1024).toFixed(1)} KB`
287
+ : `${(bytes / 1024 / 1024).toFixed(1)} MB`;
288
+ const domainNote = preapproved
289
+ ? '[🔒 预批准域名,内容可信]'
290
+ : '[⚠️ 非预批准域名,内容可能不准确]';
291
+ const output = `【WebFetch 结果】
292
+ URL: ${url}
293
+ 状态: ${code} ${codeText}
294
+ 大小: ${formattedSize}
295
+ 耗时: ${(durationMs / 1000).toFixed(2)}s
296
+ ${domainNote}
297
+
298
+ --- 内容 ---
299
+ ${content}`;
300
+ // 5. 截断
301
+ let final = output;
302
+ if (final.length > DEFAULT_MAX_RESULT_SIZE_CHARS) {
303
+ final = final.slice(0, DEFAULT_MAX_RESULT_SIZE_CHARS) + `\n\n... (输出超过 ${DEFAULT_MAX_RESULT_SIZE_CHARS} 字符,已截断)`;
304
+ }
305
+ return { ok: true, content: final };
306
+ }
307
+ // ---------------- 7. 导出 ----------------
308
+ export const webfetchTool = {
309
+ name: 'WebFetch',
310
+ description,
311
+ inputSchema,
312
+ parameters,
313
+ isReadOnly: true,
314
+ isConcurrencySafe: true,
315
+ maxResultSizeChars: DEFAULT_MAX_RESULT_SIZE_CHARS,
316
+ call,
317
+ };