@becrafter/prompt-manager 0.0.18 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/IFLOW.md +175 -0
  2. package/README.md +145 -234
  3. package/app/desktop/assets/app.1.png +0 -0
  4. package/app/desktop/assets/app.png +0 -0
  5. package/app/desktop/assets/icons/icon.icns +0 -0
  6. package/app/desktop/assets/icons/icon.ico +0 -0
  7. package/app/desktop/assets/icons/icon.png +0 -0
  8. package/app/desktop/assets/icons/tray.png +0 -0
  9. package/app/desktop/assets/templates/about.html +147 -0
  10. package/app/desktop/assets/tray.png +0 -0
  11. package/app/desktop/main.js +187 -732
  12. package/app/desktop/package-lock.json +723 -522
  13. package/app/desktop/package.json +54 -25
  14. package/app/desktop/preload.js +7 -0
  15. package/app/desktop/src/core/error-handler.js +108 -0
  16. package/app/desktop/src/core/event-emitter.js +84 -0
  17. package/app/desktop/src/core/logger.js +108 -0
  18. package/app/desktop/src/core/state-manager.js +125 -0
  19. package/app/desktop/src/services/module-loader.js +214 -0
  20. package/app/desktop/src/services/runtime-manager.js +301 -0
  21. package/app/desktop/src/services/service-manager.js +169 -0
  22. package/app/desktop/src/services/update-manager.js +268 -0
  23. package/app/desktop/src/ui/about-dialog-manager.js +208 -0
  24. package/app/desktop/src/ui/admin-window-manager.js +757 -0
  25. package/app/desktop/src/ui/splash-manager.js +253 -0
  26. package/app/desktop/src/ui/tray-manager.js +186 -0
  27. package/app/desktop/src/utils/icon-manager.js +133 -0
  28. package/app/desktop/src/utils/path-utils.js +58 -0
  29. package/app/desktop/src/utils/resource-paths.js +49 -0
  30. package/app/desktop/src/utils/resource-sync.js +260 -0
  31. package/app/desktop/src/utils/runtime-sync.js +241 -0
  32. package/app/desktop/src/utils/template-renderer.js +284 -0
  33. package/app/desktop/src/utils/version-utils.js +59 -0
  34. package/examples/prompts/engineer/engineer-professional.yaml +92 -0
  35. package/examples/prompts/engineer/laowang-engineer.yaml +132 -0
  36. package/examples/prompts/engineer/nekomata-engineer.yaml +123 -0
  37. package/examples/prompts/engineer/ojousama-engineer.yaml +124 -0
  38. package/examples/prompts/recommend/human_3-0_growth_diagnostic_coach_prompt.yaml +105 -0
  39. package/examples/prompts/workflow/sixstep-workflow.yaml +192 -0
  40. package/package.json +18 -9
  41. package/packages/admin-ui/.babelrc +3 -0
  42. package/packages/admin-ui/admin.html +237 -4784
  43. package/packages/admin-ui/css/main.css +2592 -0
  44. package/packages/admin-ui/css/recommended-prompts.css +610 -0
  45. package/packages/admin-ui/package-lock.json +6981 -0
  46. package/packages/admin-ui/package.json +36 -0
  47. package/packages/admin-ui/src/codemirror.js +53 -0
  48. package/packages/admin-ui/src/index.js +3188 -0
  49. package/packages/admin-ui/webpack.config.js +76 -0
  50. package/packages/resources/tools/chrome-devtools/README.md +310 -0
  51. package/packages/resources/tools/chrome-devtools/chrome-devtools.tool.js +1703 -0
  52. package/packages/resources/tools/file-reader/README.md +289 -0
  53. package/packages/resources/tools/file-reader/file-reader.tool.js +1545 -0
  54. package/packages/resources/tools/filesystem/README.md +359 -0
  55. package/packages/resources/tools/filesystem/filesystem.tool.js +538 -0
  56. package/packages/resources/tools/ollama-remote/README.md +192 -0
  57. package/packages/resources/tools/ollama-remote/ollama-remote.tool.js +421 -0
  58. package/packages/resources/tools/pdf-reader/README.md +236 -0
  59. package/packages/resources/tools/pdf-reader/pdf-reader.tool.js +565 -0
  60. package/packages/resources/tools/playwright/README.md +306 -0
  61. package/packages/resources/tools/playwright/playwright.tool.js +1186 -0
  62. package/packages/resources/tools/todolist/README.md +394 -0
  63. package/packages/resources/tools/todolist/todolist.tool.js +1312 -0
  64. package/packages/server/README.md +142 -0
  65. package/packages/server/api/admin.routes.js +42 -11
  66. package/packages/server/api/surge.routes.js +43 -0
  67. package/packages/server/app.js +119 -14
  68. package/packages/server/index.js +39 -0
  69. package/packages/server/mcp/mcp.server.js +346 -28
  70. package/packages/server/mcp/{mcp.handler.js → prompt.handler.js} +108 -9
  71. package/packages/server/mcp/sequential-thinking.handler.js +318 -0
  72. package/packages/server/mcp/think-plan.handler.js +274 -0
  73. package/packages/server/middlewares/auth.middleware.js +6 -0
  74. package/packages/server/package.json +51 -0
  75. package/packages/server/server.js +37 -1
  76. package/packages/server/toolm/index.js +9 -0
  77. package/packages/server/toolm/package-installer.service.js +267 -0
  78. package/packages/server/toolm/test-tools.js +264 -0
  79. package/packages/server/toolm/tool-context.service.js +334 -0
  80. package/packages/server/toolm/tool-dependency.service.js +168 -0
  81. package/packages/server/toolm/tool-description-generator-optimized.service.js +375 -0
  82. package/packages/server/toolm/tool-description-generator.service.js +312 -0
  83. package/packages/server/toolm/tool-environment.service.js +200 -0
  84. package/packages/server/toolm/tool-execution.service.js +277 -0
  85. package/packages/server/toolm/tool-loader.service.js +219 -0
  86. package/packages/server/toolm/tool-logger.service.js +223 -0
  87. package/packages/server/toolm/tool-manager.handler.js +65 -0
  88. package/packages/server/toolm/tool-manual-generator.service.js +389 -0
  89. package/packages/server/toolm/tool-mode-handlers.service.js +224 -0
  90. package/packages/server/toolm/tool-storage.service.js +111 -0
  91. package/packages/server/toolm/tool-sync.service.js +138 -0
  92. package/packages/server/toolm/tool-utils.js +20 -0
  93. package/packages/server/toolm/tool-yaml-parser.service.js +81 -0
  94. package/packages/server/toolm/validate-system.js +421 -0
  95. package/packages/server/utils/config.js +49 -5
  96. package/packages/server/utils/util.js +65 -10
  97. package/scripts/build-icons.js +135 -0
  98. package/scripts/build.sh +57 -0
  99. package/scripts/surge/CNAME +1 -0
  100. package/scripts/surge/README.md +47 -0
  101. package/scripts/surge/package-lock.json +34 -0
  102. package/scripts/surge/package.json +20 -0
  103. package/scripts/surge/sync-to-surge.js +151 -0
  104. package/packages/admin-ui/js/closebrackets.min.js +0 -8
  105. package/packages/admin-ui/js/codemirror.min.js +0 -8
  106. package/packages/admin-ui/js/js-yaml.min.js +0 -2
  107. package/packages/admin-ui/js/markdown.min.js +0 -8
  108. /package/app/desktop/assets/{icon.png → tray.1.png} +0 -0
@@ -0,0 +1,1545 @@
1
+ /**
2
+ * File Reader - 统一文件读取工具(本地+远程,支持多种格式转换)
3
+ *
4
+ * 战略意义:
5
+ * 1. 架构隔离性 - 统一的文件读取接口,确保AI操作安全
6
+ * 2. 平台独立性 - 支持本地和远程文件,不依赖特定平台
7
+ * 3. 生态自主性 - 作为Prompt Manager生态的关键组件
8
+ *
9
+ * 核心能力:
10
+ * - 自动识别本地/远程文件(file://, http://, https://)
11
+ * - 智能识别文件类型(文本、JSON、XML、图片、视频等)
12
+ * - 转换为模型友好格式(结构化数据、文本描述、base64等)
13
+ * - 支持多种编码格式(UTF-8, GBK等)
14
+ *
15
+ * 设计理念:
16
+ * - 统一接口:一个方法处理所有文件类型
17
+ * - 智能转换:根据文件类型自动选择最佳转换策略
18
+ * - 模型友好:输出格式便于AI模型理解和处理
19
+ * - 可扩展性:易于添加新的文件类型支持
20
+ */
21
+
22
+ import path from 'path';
23
+ import os from 'os';
24
+ import { URL } from 'url';
25
+
26
+ export default {
27
+ /**
28
+ * 获取工具依赖
29
+ */
30
+ getDependencies() {
31
+ return {
32
+ 'axios': '^1.6.0', // HTTP请求
33
+ 'mime-types': '^2.1.35', // MIME类型检测
34
+ 'iconv-lite': '^0.6.3', // 编码转换(支持GBK等)
35
+ 'yaml': '^2.3.4' // YAML解析(可选,用于YAML文件)
36
+ };
37
+ },
38
+
39
+ /**
40
+ * 获取工具元信息
41
+ */
42
+ getMetadata() {
43
+ return {
44
+ id: 'file-reader',
45
+ name: 'File Reader',
46
+ description: '统一文件读取工具,支持本地和远程文件,自动识别文件类型并转换为模型友好格式',
47
+ version: '1.0.0',
48
+ category: 'utility',
49
+ author: 'Prompt Manager',
50
+ tags: ['file', 'reader', 'http', 'local', 'remote', 'converter', 'mcp', 'utility'],
51
+ scenarios: [
52
+ '读取本地文本文件',
53
+ '下载并读取远程文件',
54
+ '解析JSON/XML/YAML文件',
55
+ '处理图片文件(转换为base64+描述)',
56
+ '处理视频/音频文件(提取元信息)',
57
+ '读取代码文件(保留格式)',
58
+ '批量读取多个文件'
59
+ ],
60
+ limitations: [
61
+ '远程文件大小限制为10MB',
62
+ '视频文件仅提取元信息,不提取完整内容',
63
+ '需要配置URL白名单才能访问远程文件',
64
+ '大文件处理可能较慢'
65
+ ]
66
+ };
67
+ },
68
+
69
+ /**
70
+ * 获取参数Schema
71
+ */
72
+ getSchema() {
73
+ return {
74
+ parameters: {
75
+ type: 'object',
76
+ properties: {
77
+ method: {
78
+ type: 'string',
79
+ description: '操作方法',
80
+ enum: ['read_file'],
81
+ default: 'read_file'
82
+ },
83
+ url: {
84
+ type: 'string',
85
+ description: '文件URL或路径(支持 file://, http://, https:// 或本地路径)',
86
+ minLength: 1
87
+ },
88
+ encoding: {
89
+ type: 'string',
90
+ description: '文本文件编码(默认自动检测,支持 utf-8, gbk, gb2312等)',
91
+ default: 'auto'
92
+ },
93
+ convertTo: {
94
+ type: 'string',
95
+ description: '转换目标格式(auto=自动, text=纯文本, json=JSON对象, xml=XML对象, image=图片base64, video=视频元信息)',
96
+ enum: ['auto', 'text', 'json', 'xml', 'image', 'video', 'audio'],
97
+ default: 'auto'
98
+ },
99
+ includeMetadata: {
100
+ type: 'boolean',
101
+ description: '是否包含文件元信息(大小、类型、修改时间等)',
102
+ default: true
103
+ },
104
+ maxSize: {
105
+ type: 'number',
106
+ description: '最大文件大小(字节),默认10MB',
107
+ default: 10485760
108
+ },
109
+ timeout: {
110
+ type: 'number',
111
+ description: '远程文件下载超时时间(毫秒),默认30秒',
112
+ default: 30000
113
+ },
114
+ basePath: {
115
+ type: 'string',
116
+ description: '基础路径(用于解析相对路径,通常是当前文件所在目录)'
117
+ },
118
+ extractLinks: {
119
+ type: 'boolean',
120
+ description: '是否提取并读取文本中的嵌套链接(默认false)',
121
+ default: false
122
+ },
123
+ maxDepth: {
124
+ type: 'number',
125
+ description: '递归读取链接的最大深度(防止无限递归,默认3)',
126
+ default: 3,
127
+ minimum: 1,
128
+ maximum: 10
129
+ },
130
+ cacheTimeout: {
131
+ type: 'number',
132
+ description: '缓存超时时间(毫秒),默认10分钟(600000)',
133
+ default: 600000
134
+ },
135
+ useCache: {
136
+ type: 'boolean',
137
+ description: '是否使用缓存(默认true)',
138
+ default: true
139
+ }
140
+ },
141
+ required: ['method', 'url']
142
+ },
143
+ environment: {
144
+ type: 'object',
145
+ properties: {
146
+ ALLOWED_DIRECTORIES: {
147
+ type: 'string',
148
+ description: '允许访问的本地目录列表(JSON数组格式)',
149
+ default: '["~/.prompt-manager"]'
150
+ },
151
+ ALLOWED_URLS: {
152
+ type: 'string',
153
+ description: '允许访问的远程URL白名单(JSON数组格式,支持通配符)',
154
+ default: '["https://*", "http://*"]'
155
+ },
156
+ MAX_FILE_SIZE: {
157
+ type: 'string',
158
+ description: '最大文件大小(字节)',
159
+ default: '10485760'
160
+ },
161
+ DEFAULT_TIMEOUT: {
162
+ type: 'string',
163
+ description: '默认超时时间(毫秒)',
164
+ default: '30000'
165
+ },
166
+ DEFAULT_CACHE_TIMEOUT: {
167
+ type: 'string',
168
+ description: '默认缓存超时时间(毫秒)',
169
+ default: '600000'
170
+ },
171
+ MAX_RECURSION_DEPTH: {
172
+ type: 'string',
173
+ description: '最大递归深度',
174
+ default: '3'
175
+ }
176
+ },
177
+ required: []
178
+ }
179
+ };
180
+ },
181
+
182
+ /**
183
+ * 获取业务错误定义
184
+ */
185
+ getBusinessErrors() {
186
+ return [
187
+ {
188
+ code: 'INVALID_URL',
189
+ description: '无效的URL格式',
190
+ match: /invalid url|Invalid URL|URL格式错误/i,
191
+ solution: '请检查URL格式是否正确(支持 file://, http://, https:// 或本地路径)',
192
+ retryable: false
193
+ },
194
+ {
195
+ code: 'URL_NOT_ALLOWED',
196
+ description: 'URL不在白名单中',
197
+ match: /URL.*not allowed|不在白名单|URL.*blocked/i,
198
+ solution: '请将URL添加到环境变量 ALLOWED_URLS 白名单中',
199
+ retryable: false
200
+ },
201
+ {
202
+ code: 'FILE_NOT_FOUND',
203
+ description: '文件不存在',
204
+ match: /ENOENT|no such file|404|Not Found|cannot find/i,
205
+ solution: '请检查文件路径或URL是否正确',
206
+ retryable: false
207
+ },
208
+ {
209
+ code: 'FILE_TOO_LARGE',
210
+ description: '文件过大',
211
+ match: /File too large|文件过大|exceeds.*size|413/i,
212
+ solution: '文件大小超过限制,请使用较小的文件或增加 MAX_FILE_SIZE 配置',
213
+ retryable: false
214
+ },
215
+ {
216
+ code: 'NETWORK_ERROR',
217
+ description: '网络错误',
218
+ match: /network error|ECONNREFUSED|ETIMEDOUT|timeout|网络错误/i,
219
+ solution: '请检查网络连接,稍后重试',
220
+ retryable: true
221
+ },
222
+ {
223
+ code: 'PERMISSION_DENIED',
224
+ description: '权限不足',
225
+ match: /EACCES|permission denied|403|Forbidden|权限不足/i,
226
+ solution: '请检查文件权限或URL访问权限',
227
+ retryable: false
228
+ },
229
+ {
230
+ code: 'UNSUPPORTED_ENCODING',
231
+ description: '不支持的编码格式',
232
+ match: /unsupported encoding|不支持的编码|encoding.*error/i,
233
+ solution: '请指定正确的编码格式(如 utf-8, gbk)',
234
+ retryable: false
235
+ },
236
+ {
237
+ code: 'PARSE_ERROR',
238
+ description: '文件解析失败',
239
+ match: /parse error|解析失败|Invalid JSON|Invalid XML|格式错误/i,
240
+ solution: '文件格式可能不正确,请检查文件内容',
241
+ retryable: false
242
+ },
243
+ {
244
+ code: 'PATH_OUTSIDE_SCOPE',
245
+ description: '路径越权访问',
246
+ match: /路径越权|不在允许的目录范围内/i,
247
+ solution: '请使用允许的目录范围内的路径',
248
+ retryable: false
249
+ }
250
+ ];
251
+ },
252
+
253
+ /**
254
+ * 执行工具
255
+ */
256
+ async execute(params) {
257
+ const { api } = this;
258
+
259
+ api?.logger?.info('执行文件读取', {
260
+ method: params.method,
261
+ url: params.url
262
+ });
263
+
264
+ try {
265
+ // 参数验证
266
+ if (params.method !== 'read_file') {
267
+ throw new Error(`不支持的方法: ${params.method}`);
268
+ }
269
+
270
+ if (!params.url) {
271
+ throw new Error('缺少必需参数: url');
272
+ }
273
+
274
+ // 解析相对路径(如果有basePath)
275
+ const resolvedUrl = this.resolveRelativeUrl(params.url, params.basePath);
276
+ api?.logger?.info('URL解析', {
277
+ original: params.url,
278
+ resolved: resolvedUrl,
279
+ basePath: params.basePath
280
+ });
281
+
282
+ // 识别URL类型
283
+ const urlInfo = this.parseUrl(resolvedUrl);
284
+ api?.logger?.info('URL解析结果', urlInfo);
285
+
286
+ // 检查缓存
287
+ const cacheKey = this.getCacheKey(resolvedUrl, params);
288
+ const useCache = params.useCache !== false;
289
+ let cachedData = null;
290
+ let fileData = null;
291
+ let fileType = null;
292
+ let converted = null;
293
+
294
+ if (useCache) {
295
+ cachedData = this.getCachedData(cacheKey, params);
296
+ if (cachedData) {
297
+ api?.logger?.info('使用缓存数据', { cacheKey });
298
+ // 如果不需要提取链接,直接返回缓存
299
+ if (!params.extractLinks) {
300
+ return cachedData;
301
+ }
302
+ // 如果需要提取链接,使用缓存的内容作为基础
303
+ if (params.extractLinks && cachedData.content) {
304
+ converted = { content: cachedData.content, format: cachedData.format };
305
+ fileType = { type: cachedData.file?.type || 'text', mimeType: cachedData.file?.mimeType || 'text/plain' };
306
+ }
307
+ }
308
+ }
309
+
310
+ // 如果没有缓存或需要重新读取,读取文件内容
311
+ if (!cachedData || !converted) {
312
+ // 读取文件内容
313
+ if (urlInfo.isLocal) {
314
+ fileData = await this.readLocalFile(urlInfo.path, params);
315
+ } else {
316
+ fileData = await this.readRemoteFile(urlInfo.url, params);
317
+ }
318
+
319
+ // 检测文件类型
320
+ fileType = await this.detectFileType(urlInfo.path || urlInfo.url, fileData);
321
+ api?.logger?.info('文件类型检测', { fileType });
322
+
323
+ // 转换文件内容为模型友好格式
324
+ converted = await this.convertToModelFormat(fileData, fileType, params);
325
+ }
326
+
327
+ // 提取并读取嵌套链接(如果需要)
328
+ let finalContent = converted.content;
329
+ let nestedLinks = [];
330
+
331
+ if (params.extractLinks && typeof converted.content === 'string') {
332
+ const maxDepth = params.maxDepth || parseInt(api.environment.get('MAX_RECURSION_DEPTH') || '3', 10);
333
+ const currentDepth = params._currentDepth || 0;
334
+
335
+ if (currentDepth < maxDepth) {
336
+ nestedLinks = await this.extractAndReadLinks(
337
+ converted.content,
338
+ resolvedUrl,
339
+ params,
340
+ currentDepth + 1
341
+ );
342
+
343
+ // 将嵌套链接内容嵌入到原文本中
344
+ if (nestedLinks.length > 0) {
345
+ finalContent = this.embedLinksInContent(converted.content, nestedLinks);
346
+ }
347
+ } else {
348
+ api?.logger?.warn('达到最大递归深度,停止提取链接', { maxDepth });
349
+ }
350
+ }
351
+
352
+ // 构建返回结果
353
+ const result = {
354
+ success: true,
355
+ source: {
356
+ url: params.url,
357
+ resolvedUrl: resolvedUrl,
358
+ type: urlInfo.isLocal ? 'local' : 'remote',
359
+ path: urlInfo.path || urlInfo.url
360
+ },
361
+ file: {
362
+ type: fileType.type,
363
+ mimeType: fileType.mimeType,
364
+ encoding: fileType.encoding || 'utf-8',
365
+ size: fileData ? fileData.buffer.length : (cachedData?.file?.size || 0)
366
+ },
367
+ content: finalContent,
368
+ format: converted.format
369
+ };
370
+
371
+ // 添加嵌套链接信息
372
+ if (nestedLinks.length > 0) {
373
+ result.nestedLinks = nestedLinks.map(link => ({
374
+ url: link.url,
375
+ resolvedUrl: link.resolvedUrl,
376
+ type: link.type,
377
+ size: link.size,
378
+ depth: link.depth
379
+ }));
380
+ }
381
+
382
+ // 添加元信息(如果需要)
383
+ if (params.includeMetadata !== false) {
384
+ result.metadata = {
385
+ detectedType: fileType.type,
386
+ conversion: converted.conversion || 'none',
387
+ originalSize: fileData ? fileData.buffer.length : (cachedData?.file?.size || 0),
388
+ convertedSize: typeof finalContent === 'string'
389
+ ? finalContent.length
390
+ : JSON.stringify(finalContent).length,
391
+ fromCache: !!cachedData,
392
+ nestedLinksCount: nestedLinks.length
393
+ };
394
+ }
395
+
396
+ // 保存到缓存
397
+ if (useCache && !cachedData) {
398
+ this.setCachedData(cacheKey, result, params);
399
+ }
400
+
401
+ // 清理过期缓存
402
+ this.cleanExpiredCache();
403
+
404
+ api?.logger?.info('文件读取完成', {
405
+ type: fileType.type,
406
+ size: fileData ? fileData.buffer.length : (cachedData?.file?.size || 0),
407
+ fromCache: !!cachedData,
408
+ nestedLinks: nestedLinks.length
409
+ });
410
+
411
+ return result;
412
+
413
+ } catch (error) {
414
+ api?.logger?.error('文件读取失败', {
415
+ error: error.message,
416
+ stack: error.stack
417
+ });
418
+ throw error;
419
+ }
420
+ },
421
+
422
+ /**
423
+ * 内部文件读取方法(避免递归调用execute)
424
+ */
425
+ async readFileInternal(params) {
426
+ const { api } = this;
427
+
428
+ try {
429
+ // 解析相对路径(如果有basePath)
430
+ const resolvedUrl = this.resolveRelativeUrl(params.url, params.basePath);
431
+
432
+ // 识别URL类型
433
+ const urlInfo = this.parseUrl(resolvedUrl);
434
+
435
+ // 检查缓存
436
+ const cacheKey = this.getCacheKey(resolvedUrl, params);
437
+ const useCache = params.useCache !== false;
438
+ let cachedData = null;
439
+ let fileData = null;
440
+ let fileType = null;
441
+ let converted = null;
442
+
443
+ if (useCache) {
444
+ cachedData = this.getCachedData(cacheKey, params);
445
+ // 如果有缓存且不需要提取链接,直接返回
446
+ if (cachedData && !params.extractLinks) {
447
+ return cachedData;
448
+ }
449
+ // 如果有缓存且需要提取链接,使用缓存的内容作为基础
450
+ if (cachedData && params.extractLinks && cachedData.content) {
451
+ converted = { content: cachedData.content, format: cachedData.format };
452
+ fileType = { type: cachedData.file?.type || 'text', mimeType: cachedData.file?.mimeType || 'text/plain' };
453
+ }
454
+ }
455
+
456
+ // 如果没有缓存或需要重新读取,读取文件内容
457
+ if (!cachedData || !converted) {
458
+ if (urlInfo.isLocal) {
459
+ fileData = await this.readLocalFile(urlInfo.path, params);
460
+ } else {
461
+ fileData = await this.readRemoteFile(urlInfo.url, params);
462
+ }
463
+
464
+ // 检测文件类型
465
+ fileType = await this.detectFileType(urlInfo.path || urlInfo.url, fileData);
466
+
467
+ // 转换文件内容为模型友好格式
468
+ converted = await this.convertToModelFormat(fileData, fileType, params);
469
+ }
470
+
471
+ // 提取并读取嵌套链接(如果需要)
472
+ let finalContent = converted.content;
473
+ let nestedLinks = [];
474
+
475
+ if (params.extractLinks && typeof converted.content === 'string') {
476
+ const maxDepth = params.maxDepth || parseInt(api.environment.get('MAX_RECURSION_DEPTH') || '3', 10);
477
+ const currentDepth = params._currentDepth || 0;
478
+
479
+ if (currentDepth < maxDepth) {
480
+ nestedLinks = await this.extractAndReadLinks(
481
+ converted.content,
482
+ resolvedUrl,
483
+ params,
484
+ currentDepth + 1
485
+ );
486
+
487
+ // 将嵌套链接内容嵌入到原文本中
488
+ if (nestedLinks.length > 0) {
489
+ finalContent = this.embedLinksInContent(converted.content, nestedLinks);
490
+ }
491
+ }
492
+ }
493
+
494
+ // 构建返回结果
495
+ const result = {
496
+ success: true,
497
+ source: {
498
+ url: params.url,
499
+ resolvedUrl: resolvedUrl,
500
+ type: urlInfo.isLocal ? 'local' : 'remote',
501
+ path: urlInfo.path || urlInfo.url
502
+ },
503
+ file: {
504
+ type: fileType.type,
505
+ mimeType: fileType.mimeType,
506
+ encoding: fileType.encoding || 'utf-8',
507
+ size: fileData.buffer.length
508
+ },
509
+ content: finalContent,
510
+ format: converted.format
511
+ };
512
+
513
+ // 添加嵌套链接信息
514
+ if (nestedLinks.length > 0) {
515
+ result.nestedLinks = nestedLinks.map(link => ({
516
+ url: link.url,
517
+ resolvedUrl: link.resolvedUrl,
518
+ type: link.type,
519
+ size: link.size,
520
+ depth: link.depth
521
+ }));
522
+ }
523
+
524
+ // 添加元信息
525
+ if (params.includeMetadata !== false) {
526
+ result.metadata = {
527
+ detectedType: fileType.type,
528
+ conversion: converted.conversion || 'none',
529
+ originalSize: fileData ? fileData.buffer.length : (cachedData?.file?.size || 0),
530
+ convertedSize: typeof finalContent === 'string'
531
+ ? finalContent.length
532
+ : JSON.stringify(finalContent).length,
533
+ fromCache: !!cachedData,
534
+ nestedLinksCount: nestedLinks.length
535
+ };
536
+ }
537
+
538
+ // 保存到缓存
539
+ if (useCache && !cachedData) {
540
+ this.setCachedData(cacheKey, result, params);
541
+ }
542
+
543
+ return result;
544
+ } catch (error) {
545
+ api?.logger?.error('内部文件读取失败', {
546
+ error: error.message
547
+ });
548
+ throw error;
549
+ }
550
+ },
551
+
552
+ /**
553
+ * 解析URL,识别本地/远程
554
+ */
555
+ parseUrl(urlString) {
556
+ try {
557
+ // 处理 file:// 协议
558
+ if (urlString.startsWith('file://')) {
559
+ const url = new URL(urlString);
560
+ // Windows路径处理:file:///C:/path -> C:/path
561
+ let filePath = url.pathname;
562
+ if (process.platform === 'win32' && filePath.startsWith('/')) {
563
+ filePath = filePath.slice(1);
564
+ }
565
+ return {
566
+ isLocal: true,
567
+ path: filePath,
568
+ url: urlString
569
+ };
570
+ }
571
+
572
+ // 处理 http:// 或 https://
573
+ if (urlString.startsWith('http://') || urlString.startsWith('https://')) {
574
+ return {
575
+ isLocal: false,
576
+ url: urlString,
577
+ path: null
578
+ };
579
+ }
580
+
581
+ // 处理本地路径(相对路径或绝对路径)
582
+ // 如果以 ~ 开头,需要展开
583
+ let localPath = urlString;
584
+ if (localPath.startsWith('~')) {
585
+ localPath = path.join(os.homedir(), localPath.slice(1));
586
+ }
587
+
588
+ return {
589
+ isLocal: true,
590
+ path: localPath,
591
+ url: `file://${localPath}`
592
+ };
593
+ } catch (error) {
594
+ throw new Error(`无效的URL格式: ${urlString}`);
595
+ }
596
+ },
597
+
598
+ /**
599
+ * 读取本地文件
600
+ */
601
+ async readLocalFile(filePath, params) {
602
+ const { api } = this;
603
+
604
+ // 初始化文件系统
605
+ await this.initializeFilesystem();
606
+
607
+ // 解析并验证路径
608
+ const safePath = this.resolvePromptManagerPath(filePath);
609
+
610
+ // 读取文件
611
+ const fs = await import('fs');
612
+ const fsPromises = fs.promises;
613
+
614
+ const buffer = await fsPromises.readFile(safePath);
615
+
616
+ // 检查文件大小
617
+ const maxSize = params.maxSize || parseInt(this.api.environment.get('MAX_FILE_SIZE') || '10485760', 10);
618
+ if (buffer.length > maxSize) {
619
+ throw new Error(`文件过大: ${buffer.length} 字节,超过限制 ${maxSize} 字节`);
620
+ }
621
+
622
+ return {
623
+ buffer,
624
+ path: safePath,
625
+ size: buffer.length
626
+ };
627
+ },
628
+
629
+ /**
630
+ * 读取远程文件
631
+ */
632
+ async readRemoteFile(url, params) {
633
+ const { api } = this;
634
+
635
+ // 检查URL白名单
636
+ await this.validateRemoteUrl(url);
637
+
638
+ // 导入axios
639
+ const axiosModule = await this.importToolModule('axios');
640
+ const axios = axiosModule.default || axiosModule;
641
+
642
+ // 配置请求参数
643
+ const timeout = params.timeout || parseInt(this.api.environment.get('DEFAULT_TIMEOUT') || '30000', 10);
644
+ const maxSize = params.maxSize || parseInt(this.api.environment.get('MAX_FILE_SIZE') || '10485760', 10);
645
+
646
+ api?.logger?.info('开始下载远程文件', { url, timeout, maxSize });
647
+
648
+ try {
649
+ const response = await axios({
650
+ method: 'GET',
651
+ url: url,
652
+ responseType: 'arraybuffer',
653
+ timeout: timeout,
654
+ maxContentLength: maxSize,
655
+ maxBodyLength: maxSize,
656
+ headers: {
657
+ 'User-Agent': 'Prompt-Manager-File-Reader/1.0.0'
658
+ }
659
+ });
660
+
661
+ const buffer = Buffer.from(response.data);
662
+
663
+ api?.logger?.info('远程文件下载成功', {
664
+ size: buffer.length,
665
+ contentType: response.headers['content-type']
666
+ });
667
+
668
+ return {
669
+ buffer,
670
+ url: url,
671
+ size: buffer.length,
672
+ contentType: response.headers['content-type']
673
+ };
674
+ } catch (error) {
675
+ if (error.code === 'ECONNABORTED' || error.message.includes('timeout')) {
676
+ throw new Error(`下载超时: ${url}`);
677
+ }
678
+ if (error.response?.status === 404) {
679
+ throw new Error(`文件不存在: ${url}`);
680
+ }
681
+ if (error.response?.status === 403) {
682
+ throw new Error(`访问被拒绝: ${url}`);
683
+ }
684
+ if (error.response?.status === 413 || error.message.includes('maxContentLength')) {
685
+ throw new Error(`文件过大: 超过 ${maxSize} 字节限制`);
686
+ }
687
+ throw new Error(`下载失败: ${error.message}`);
688
+ }
689
+ },
690
+
691
+ /**
692
+ * 验证远程URL是否在白名单中
693
+ */
694
+ async validateRemoteUrl(url) {
695
+ const { api } = this;
696
+
697
+ // 获取配置的白名单,如果没有配置则默认为允许所有 http/https
698
+ const allowedUrlsValue = api.environment.get('ALLOWED_URLS');
699
+ let allowedUrls;
700
+
701
+ // 处理环境变量的不同格式
702
+ if (!allowedUrlsValue) {
703
+ // 没有配置,默认允许所有 http/https 链接
704
+ allowedUrls = ['https://*', 'http://*'];
705
+ } else if (Array.isArray(allowedUrlsValue)) {
706
+ // 如果已经是数组(从 .env 文件解析后的格式),直接使用
707
+ allowedUrls = allowedUrlsValue.length > 0 ? allowedUrlsValue : ['https://*', 'http://*'];
708
+ } else if (typeof allowedUrlsValue === 'string') {
709
+ // 如果是字符串,尝试解析 JSON
710
+ const trimmed = allowedUrlsValue.trim();
711
+ if (!trimmed || trimmed === '') {
712
+ allowedUrls = ['https://*', 'http://*'];
713
+ } else {
714
+ try {
715
+ allowedUrls = JSON.parse(trimmed);
716
+ // 如果解析后为空数组,也使用默认值
717
+ if (!Array.isArray(allowedUrls) || allowedUrls.length === 0) {
718
+ allowedUrls = ['https://*', 'http://*'];
719
+ }
720
+ } catch {
721
+ // 解析失败,使用默认值
722
+ allowedUrls = ['https://*', 'http://*'];
723
+ }
724
+ }
725
+ } else {
726
+ // 其他类型,使用默认值
727
+ allowedUrls = ['https://*', 'http://*'];
728
+ }
729
+
730
+ // 检查URL是否匹配白名单
731
+ try {
732
+ const urlObj = new URL(url);
733
+
734
+ // 检查协议是否为 http 或 https
735
+ if (urlObj.protocol !== 'http:' && urlObj.protocol !== 'https:') {
736
+ throw new Error(`不支持的协议: ${urlObj.protocol},仅支持 http:// 和 https://`);
737
+ }
738
+
739
+ const isAllowed = allowedUrls.some(pattern => {
740
+ // 处理通配符模式
741
+ // https://* 或 http://* 应该匹配所有对应协议的 URL
742
+ if (pattern === 'https://*' || pattern === 'http://*') {
743
+ return url.startsWith(pattern.replace('*', ''));
744
+ }
745
+
746
+ // 处理带主机名的通配符:https://example.com/* 或 https://*.example.com/*
747
+ if (pattern.includes('://')) {
748
+ // 如果模式以 /* 结尾,匹配该域名下的所有路径
749
+ if (pattern.endsWith('/*')) {
750
+ const baseUrl = pattern.slice(0, -2); // 移除 '/*'
751
+ return url.startsWith(baseUrl + '/') || url === baseUrl;
752
+ }
753
+
754
+ // 如果模式包含 * 在主机名中,转换为正则表达式
755
+ if (pattern.includes('*')) {
756
+ // 转义特殊字符,但保留 * 和 ?
757
+ const regexPattern = pattern
758
+ .replace(/[.+^${}()|[\]\\]/g, '\\$&') // 转义特殊字符
759
+ .replace(/\*/g, '.*') // * 匹配任意字符
760
+ .replace(/\?/g, '.'); // ? 匹配单个字符
761
+
762
+ const regex = new RegExp(`^${regexPattern}`);
763
+ return regex.test(url);
764
+ }
765
+
766
+ // 精确匹配或前缀匹配
767
+ return url === pattern || url.startsWith(pattern + '/');
768
+ }
769
+
770
+ // 处理其他模式:转换为正则表达式
771
+ // 转义特殊字符,但保留 * 和 ?
772
+ const regexPattern = pattern
773
+ .replace(/[.+^${}()|[\]\\]/g, '\\$&') // 转义特殊字符
774
+ .replace(/\*/g, '.*') // * 匹配任意字符
775
+ .replace(/\?/g, '.'); // ? 匹配单个字符
776
+
777
+ const regex = new RegExp(`^${regexPattern}`);
778
+ return regex.test(url);
779
+ });
780
+
781
+ if (!isAllowed) {
782
+ throw new Error(`URL不在白名单中: ${url}。请添加到环境变量 ALLOWED_URLS`);
783
+ }
784
+ } catch (error) {
785
+ if (error.message.includes('不在白名单') || error.message.includes('不支持的协议')) {
786
+ throw error;
787
+ }
788
+ throw new Error(`无效的URL格式: ${url}`);
789
+ }
790
+ },
791
+
792
+ /**
793
+ * 检测文件类型
794
+ */
795
+ async detectFileType(filePathOrUrl, fileData) {
796
+ // 从URL或路径获取扩展名
797
+ let ext = '';
798
+ try {
799
+ const url = new URL(filePathOrUrl);
800
+ ext = path.extname(url.pathname).toLowerCase();
801
+ } catch {
802
+ ext = path.extname(filePathOrUrl).toLowerCase();
803
+ }
804
+
805
+ // 检测MIME类型
806
+ let mimeType = fileData.contentType || 'application/octet-stream';
807
+ try {
808
+ const mimeTypes = await this.importToolModule('mime-types');
809
+ const detectedMime = mimeTypes.default?.lookup?.(filePathOrUrl) || mimeTypes.lookup?.(filePathOrUrl);
810
+ if (detectedMime) {
811
+ mimeType = detectedMime;
812
+ }
813
+ } catch (error) {
814
+ // mime-types 不可用,使用默认值
815
+ }
816
+
817
+ // 根据扩展名和MIME类型判断文件类型
818
+ const typeMap = {
819
+ // 文本类型
820
+ text: ['.txt', '.md', '.markdown', '.log', '.csv', '.text'],
821
+ code: ['.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.cpp', '.c', '.h', '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala', '.sh', '.bash', '.zsh', '.ps1', '.bat', '.cmd'],
822
+ config: ['.json', '.yaml', '.yml', '.toml', '.ini', '.conf', '.config'],
823
+ markup: ['.html', '.htm', '.xml', '.svg', '.xhtml'],
824
+ css: ['.css', '.scss', '.sass', '.less'],
825
+
826
+ // 图片类型
827
+ image: ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico', '.svg'],
828
+
829
+ // 视频类型
830
+ video: ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', '.mkv', '.m4v'],
831
+
832
+ // 音频类型
833
+ audio: ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a', '.wma'],
834
+
835
+ // 文档类型
836
+ document: ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx'],
837
+
838
+ // 压缩文件
839
+ archive: ['.zip', '.tar', '.gz', '.rar', '.7z', '.bz2']
840
+ };
841
+
842
+ // 查找文件类型
843
+ let detectedType = 'binary';
844
+ for (const [type, extensions] of Object.entries(typeMap)) {
845
+ if (extensions.includes(ext)) {
846
+ detectedType = type;
847
+ break;
848
+ }
849
+ }
850
+
851
+ // 根据MIME类型进一步确认
852
+ if (mimeType.startsWith('text/')) {
853
+ detectedType = 'text';
854
+ } else if (mimeType.startsWith('image/')) {
855
+ detectedType = 'image';
856
+ } else if (mimeType.startsWith('video/')) {
857
+ detectedType = 'video';
858
+ } else if (mimeType.startsWith('audio/')) {
859
+ detectedType = 'audio';
860
+ } else if (mimeType.includes('json')) {
861
+ detectedType = 'json';
862
+ } else if (mimeType.includes('xml')) {
863
+ detectedType = 'xml';
864
+ }
865
+
866
+ return {
867
+ type: detectedType,
868
+ mimeType: mimeType,
869
+ extension: ext
870
+ };
871
+ },
872
+
873
+ /**
874
+ * 转换为模型友好格式
875
+ */
876
+ async convertToModelFormat(fileData, fileType, params) {
877
+ const { api } = this;
878
+ const convertTo = params.convertTo || 'auto';
879
+
880
+ api?.logger?.info('开始转换文件格式', {
881
+ fileType: fileType.type,
882
+ convertTo
883
+ });
884
+
885
+ // 根据文件类型和转换目标选择转换策略
886
+ const targetFormat = convertTo === 'auto'
887
+ ? this.getDefaultConversionFormat(fileType.type)
888
+ : convertTo;
889
+
890
+ switch (targetFormat) {
891
+ case 'text':
892
+ return await this.convertToText(fileData, fileType, params);
893
+
894
+ case 'json':
895
+ return await this.convertToJson(fileData, fileType, params);
896
+
897
+ case 'xml':
898
+ return await this.convertToXml(fileData, fileType, params);
899
+
900
+ case 'image':
901
+ return await this.convertToImage(fileData, fileType, params);
902
+
903
+ case 'video':
904
+ case 'audio':
905
+ return await this.convertToMedia(fileData, fileType, params);
906
+
907
+ default:
908
+ // 二进制文件或未知类型
909
+ return await this.convertToBinary(fileData, fileType, params);
910
+ }
911
+ },
912
+
913
+ /**
914
+ * 获取默认转换格式
915
+ */
916
+ getDefaultConversionFormat(fileType) {
917
+ const formatMap = {
918
+ 'text': 'text',
919
+ 'code': 'text',
920
+ 'config': 'json', // JSON/YAML等配置文件
921
+ 'markup': 'text', // HTML/XML等标记语言
922
+ 'css': 'text',
923
+ 'image': 'image',
924
+ 'video': 'video',
925
+ 'audio': 'audio',
926
+ 'document': 'text', // PDF等文档(需要特殊处理)
927
+ 'archive': 'binary',
928
+ 'binary': 'binary'
929
+ };
930
+
931
+ return formatMap[fileType] || 'text';
932
+ },
933
+
934
+ /**
935
+ * 转换为文本格式
936
+ */
937
+ async convertToText(fileData, fileType, params) {
938
+ const { api } = this;
939
+
940
+ // 检测编码
941
+ let encoding = params.encoding || 'auto';
942
+ if (encoding === 'auto') {
943
+ encoding = this.detectEncoding(fileData.buffer);
944
+ }
945
+
946
+ // 解码文本
947
+ let text;
948
+ if (encoding === 'utf-8') {
949
+ text = fileData.buffer.toString('utf-8');
950
+ } else {
951
+ // 使用iconv-lite进行编码转换
952
+ const iconvModule = await this.importToolModule('iconv-lite');
953
+ const iconv = iconvModule.default || iconvModule;
954
+ text = iconv.decode(fileData.buffer, encoding);
955
+ }
956
+
957
+ return {
958
+ content: text,
959
+ format: 'text',
960
+ conversion: 'text_decode',
961
+ encoding: encoding
962
+ };
963
+ },
964
+
965
+ /**
966
+ * 转换为JSON格式
967
+ */
968
+ async convertToJson(fileData, fileType, params) {
969
+ const { api } = this;
970
+
971
+ // 先转换为文本
972
+ const textResult = await this.convertToText(fileData, fileType, params);
973
+ const text = textResult.content;
974
+
975
+ try {
976
+ // 尝试解析JSON
977
+ const json = JSON.parse(text);
978
+
979
+ return {
980
+ content: json,
981
+ format: 'json',
982
+ conversion: 'json_parse',
983
+ originalText: text
984
+ };
985
+ } catch (error) {
986
+ // 如果不是有效的JSON,尝试解析YAML
987
+ if (fileType.extension === '.yaml' || fileType.extension === '.yml') {
988
+ try {
989
+ const yamlModule = await this.importToolModule('yaml');
990
+ const yaml = yamlModule.default || yamlModule;
991
+ const json = yaml.parse?.(text) || yaml.parse(text);
992
+
993
+ return {
994
+ content: json,
995
+ format: 'json',
996
+ conversion: 'yaml_to_json',
997
+ originalText: text
998
+ };
999
+ } catch (yamlError) {
1000
+ throw new Error(`无法解析为JSON或YAML: ${error.message}`);
1001
+ }
1002
+ }
1003
+
1004
+ throw new Error(`无效的JSON格式: ${error.message}`);
1005
+ }
1006
+ },
1007
+
1008
+ /**
1009
+ * 转换为XML格式
1010
+ */
1011
+ async convertToXml(fileData, fileType, params) {
1012
+ const { api } = this;
1013
+
1014
+ // 先转换为文本
1015
+ const textResult = await this.convertToText(fileData, fileType, params);
1016
+ const text = textResult.content;
1017
+
1018
+ try {
1019
+ // 简单的XML解析(转换为对象)
1020
+ // 注意:这里使用简单的正则解析,复杂XML可能需要专门的库
1021
+ const xmlObject = this.parseXmlToObject(text);
1022
+
1023
+ return {
1024
+ content: xmlObject,
1025
+ format: 'xml',
1026
+ conversion: 'xml_parse',
1027
+ originalText: text
1028
+ };
1029
+ } catch (error) {
1030
+ throw new Error(`无效的XML格式: ${error.message}`);
1031
+ }
1032
+ },
1033
+
1034
+ /**
1035
+ * 转换为图片格式(base64 + 描述)
1036
+ */
1037
+ async convertToImage(fileData, fileType, params) {
1038
+ const { api } = this;
1039
+
1040
+ // 转换为base64
1041
+ const base64 = fileData.buffer.toString('base64');
1042
+ const dataUrl = `data:${fileType.mimeType};base64,${base64}`;
1043
+
1044
+ // 获取图片基本信息
1045
+ const size = fileData.buffer.length;
1046
+ const dimensions = await this.getImageDimensions(fileData.buffer, fileType);
1047
+
1048
+ return {
1049
+ content: {
1050
+ dataUrl: dataUrl,
1051
+ base64: base64,
1052
+ mimeType: fileType.mimeType,
1053
+ dimensions: dimensions,
1054
+ size: size,
1055
+ description: `图片文件,格式: ${fileType.mimeType},大小: ${(size / 1024).toFixed(2)}KB${dimensions ? `,尺寸: ${dimensions.width}x${dimensions.height}` : ''}`
1056
+ },
1057
+ format: 'image',
1058
+ conversion: 'image_base64'
1059
+ };
1060
+ },
1061
+
1062
+ /**
1063
+ * 转换为媒体格式(视频/音频元信息)
1064
+ */
1065
+ async convertToMedia(fileData, fileType, params) {
1066
+ const { api } = this;
1067
+
1068
+ // 对于视频和音频,我们只提取元信息,不提取完整内容
1069
+ const size = fileData.buffer.length;
1070
+
1071
+ return {
1072
+ content: {
1073
+ type: fileType.type,
1074
+ mimeType: fileType.mimeType,
1075
+ size: size,
1076
+ sizeFormatted: this.formatFileSize(size),
1077
+ description: `${fileType.type === 'video' ? '视频' : '音频'}文件,格式: ${fileType.mimeType},大小: ${this.formatFileSize(size)}。注意:此工具仅提取元信息,不提取完整内容。`
1078
+ },
1079
+ format: fileType.type,
1080
+ conversion: 'media_metadata'
1081
+ };
1082
+ },
1083
+
1084
+ /**
1085
+ * 转换为二进制格式
1086
+ */
1087
+ async convertToBinary(fileData, fileType, params) {
1088
+ const { api } = this;
1089
+
1090
+ // 对于二进制文件,转换为base64
1091
+ const base64 = fileData.buffer.toString('base64');
1092
+
1093
+ return {
1094
+ content: {
1095
+ base64: base64,
1096
+ mimeType: fileType.mimeType,
1097
+ size: fileData.buffer.length,
1098
+ description: `二进制文件,格式: ${fileType.mimeType},大小: ${this.formatFileSize(fileData.buffer.length)}`
1099
+ },
1100
+ format: 'binary',
1101
+ conversion: 'binary_base64'
1102
+ };
1103
+ },
1104
+
1105
+ /**
1106
+ * 检测文本编码
1107
+ */
1108
+ detectEncoding(buffer) {
1109
+ // 简单的UTF-8检测
1110
+ // 检查BOM
1111
+ if (buffer.length >= 3 && buffer[0] === 0xEF && buffer[1] === 0xBB && buffer[2] === 0xBF) {
1112
+ return 'utf-8';
1113
+ }
1114
+
1115
+ // 尝试UTF-8解码
1116
+ try {
1117
+ const text = buffer.toString('utf-8');
1118
+ // 检查是否包含无效字符
1119
+ if (!/[^\x00-\x7F]/.test(text) || buffer.toString('utf-8').length === buffer.length) {
1120
+ return 'utf-8';
1121
+ }
1122
+ } catch {
1123
+ // UTF-8解码失败
1124
+ }
1125
+
1126
+ // 默认返回utf-8,如果失败可以在convertToText中处理
1127
+ return 'utf-8';
1128
+ },
1129
+
1130
+ /**
1131
+ * 简单的XML解析(转换为对象)
1132
+ */
1133
+ parseXmlToObject(xmlString) {
1134
+ // 这是一个简化的XML解析器
1135
+ // 对于复杂XML,建议使用专门的库如xml2js
1136
+ const result = {};
1137
+
1138
+ // 移除XML声明和注释
1139
+ xmlString = xmlString.replace(/<\?xml[^>]*\?>/g, '');
1140
+ xmlString = xmlString.replace(/<!--[\s\S]*?-->/g, '');
1141
+
1142
+ // 简单的标签提取
1143
+ const tagRegex = /<(\w+)([^>]*)>([\s\S]*?)<\/\1>/g;
1144
+ let match;
1145
+
1146
+ while ((match = tagRegex.exec(xmlString)) !== null) {
1147
+ const tagName = match[1];
1148
+ const content = match[3].trim();
1149
+
1150
+ if (!result[tagName]) {
1151
+ result[tagName] = [];
1152
+ }
1153
+
1154
+ result[tagName].push(content);
1155
+ }
1156
+
1157
+ return result;
1158
+ },
1159
+
1160
+ /**
1161
+ * 获取图片尺寸
1162
+ */
1163
+ async getImageDimensions(buffer, fileType) {
1164
+ // 这是一个简化的实现
1165
+ // 对于生产环境,建议使用专门的图片处理库如sharp或jimp
1166
+ try {
1167
+ // PNG: 前8字节是签名,然后4字节宽度,4字节高度
1168
+ if (fileType.mimeType === 'image/png') {
1169
+ if (buffer.length >= 24) {
1170
+ const width = buffer.readUInt32BE(16);
1171
+ const height = buffer.readUInt32BE(20);
1172
+ return { width, height };
1173
+ }
1174
+ }
1175
+
1176
+ // JPEG: 更复杂,需要查找SOF标记
1177
+ if (fileType.mimeType === 'image/jpeg') {
1178
+ // 简化实现:返回null,表示无法获取
1179
+ return null;
1180
+ }
1181
+ } catch (error) {
1182
+ // 解析失败,返回null
1183
+ }
1184
+
1185
+ return null;
1186
+ },
1187
+
1188
+ /**
1189
+ * 格式化文件大小
1190
+ */
1191
+ formatFileSize(bytes) {
1192
+ if (bytes < 1024) return bytes + ' B';
1193
+ if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(2) + ' KB';
1194
+ if (bytes < 1024 * 1024 * 1024) return (bytes / (1024 * 1024)).toFixed(2) + ' MB';
1195
+ return (bytes / (1024 * 1024 * 1024)).toFixed(2) + ' GB';
1196
+ },
1197
+
1198
+ /**
1199
+ * 解析相对路径URL
1200
+ */
1201
+ resolveRelativeUrl(url, basePath) {
1202
+ // 如果URL已经是绝对路径(http://, https://, file://, /开头),直接返回
1203
+ if (url.startsWith('http://') || url.startsWith('https://') ||
1204
+ url.startsWith('file://') ||
1205
+ (process.platform === 'win32' && /^[A-Za-z]:/.test(url))) {
1206
+ return url;
1207
+ }
1208
+
1209
+ // 如果没有basePath,无法解析相对路径
1210
+ if (!basePath) {
1211
+ return url;
1212
+ }
1213
+
1214
+ try {
1215
+ // 解析basePath,获取目录路径
1216
+ const baseUrlInfo = this.parseUrl(basePath);
1217
+
1218
+ if (baseUrlInfo.isLocal && baseUrlInfo.path) {
1219
+ // 本地文件:获取文件所在目录
1220
+ const baseDir = path.dirname(baseUrlInfo.path);
1221
+ // 解析相对路径
1222
+ const resolvedPath = path.resolve(baseDir, url);
1223
+ return resolvedPath;
1224
+ } else if (baseUrlInfo.url && (baseUrlInfo.url.startsWith('http://') || baseUrlInfo.url.startsWith('https://'))) {
1225
+ // 远程URL:使用URL对象解析相对路径
1226
+ const baseUrlObj = new URL(baseUrlInfo.url);
1227
+ // 获取基础目录(去掉文件名)
1228
+ const basePathname = baseUrlObj.pathname;
1229
+ const baseDir = basePathname.substring(0, basePathname.lastIndexOf('/') + 1);
1230
+ // 解析相对路径(URL风格的路径拼接)
1231
+ const resolvedPathname = this.resolveUrlPath(baseDir, url);
1232
+ // 构建完整的URL
1233
+ const resolvedUrl = new URL(resolvedPathname, baseUrlObj);
1234
+ return resolvedUrl.toString();
1235
+ } else {
1236
+ // 无法解析,返回原始URL
1237
+ return url;
1238
+ }
1239
+ } catch (error) {
1240
+ // 解析失败,返回原始URL
1241
+ return url;
1242
+ }
1243
+ },
1244
+
1245
+ /**
1246
+ * 解析URL路径(类似path.resolve,但用于URL路径)
1247
+ */
1248
+ resolveUrlPath(basePath, relativePath) {
1249
+ // 规范化basePath(确保以/结尾)
1250
+ if (!basePath.endsWith('/')) {
1251
+ const lastSlash = basePath.lastIndexOf('/');
1252
+ if (lastSlash >= 0) {
1253
+ basePath = basePath.substring(0, lastSlash + 1);
1254
+ } else {
1255
+ basePath = '/';
1256
+ }
1257
+ }
1258
+
1259
+ // 处理相对路径
1260
+ const parts = [];
1261
+ const baseParts = basePath.split('/').filter(p => p && p !== '.');
1262
+ const relParts = relativePath.split('/').filter(p => p);
1263
+
1264
+ // 从basePath开始
1265
+ parts.push(...baseParts);
1266
+
1267
+ // 处理相对路径的每个部分
1268
+ for (const part of relParts) {
1269
+ if (part === '.') {
1270
+ // 当前目录,忽略
1271
+ continue;
1272
+ } else if (part === '..') {
1273
+ // 上级目录,移除最后一个部分
1274
+ if (parts.length > 0) {
1275
+ parts.pop();
1276
+ }
1277
+ } else {
1278
+ // 普通路径部分
1279
+ parts.push(part);
1280
+ }
1281
+ }
1282
+
1283
+ // 构建最终路径(确保以/开头)
1284
+ return '/' + parts.join('/');
1285
+ },
1286
+
1287
+ /**
1288
+ * 生成缓存键
1289
+ */
1290
+ getCacheKey(url, params) {
1291
+ // 使用URL和关键参数生成缓存键
1292
+ const keyParts = [
1293
+ url,
1294
+ params.convertTo || 'auto',
1295
+ params.encoding || 'auto',
1296
+ params.extractLinks ? '1' : '0'
1297
+ ];
1298
+ return `file-reader:${keyParts.join(':')}`;
1299
+ },
1300
+
1301
+ /**
1302
+ * 获取缓存数据
1303
+ */
1304
+ getCachedData(cacheKey, params) {
1305
+ const { api } = this;
1306
+
1307
+ try {
1308
+ const cacheData = api.storage.getItem(cacheKey);
1309
+ if (!cacheData) {
1310
+ return null;
1311
+ }
1312
+
1313
+ // 检查是否过期
1314
+ const cacheTimeout = params.cacheTimeout ||
1315
+ parseInt(api.environment.get('DEFAULT_CACHE_TIMEOUT') || '600000', 10);
1316
+ const now = Date.now();
1317
+ const cacheAge = now - (cacheData.timestamp || 0);
1318
+
1319
+ if (cacheAge > cacheTimeout) {
1320
+ // 缓存已过期,删除
1321
+ api.storage.setItem(cacheKey, null);
1322
+ api?.logger?.debug('缓存已过期', { cacheKey, cacheAge, cacheTimeout });
1323
+ return null;
1324
+ }
1325
+
1326
+ // 返回缓存数据(不包含timestamp)
1327
+ const { timestamp, ...cachedResult } = cacheData;
1328
+ api?.logger?.debug('缓存命中', { cacheKey, cacheAge });
1329
+ return cachedResult;
1330
+ } catch (error) {
1331
+ api?.logger?.warn('读取缓存失败', { cacheKey, error: error.message });
1332
+ return null;
1333
+ }
1334
+ },
1335
+
1336
+ /**
1337
+ * 设置缓存数据
1338
+ */
1339
+ setCachedData(cacheKey, data, params) {
1340
+ const { api } = this;
1341
+
1342
+ try {
1343
+ const cacheData = {
1344
+ ...data,
1345
+ timestamp: Date.now()
1346
+ };
1347
+ api.storage.setItem(cacheKey, cacheData);
1348
+ api?.logger?.debug('缓存已保存', { cacheKey });
1349
+ } catch (error) {
1350
+ api?.logger?.warn('保存缓存失败', { cacheKey, error: error.message });
1351
+ }
1352
+ },
1353
+
1354
+ /**
1355
+ * 清理过期缓存
1356
+ */
1357
+ cleanExpiredCache() {
1358
+ const { api } = this;
1359
+
1360
+ try {
1361
+ // 获取所有缓存键(这里需要遍历storage,但Storage API可能不支持)
1362
+ // 简化实现:在每次读取时检查过期,这里只记录最后清理时间
1363
+ const lastCleanTime = api.storage.getItem('file-reader:last-clean-time') || 0;
1364
+ const now = Date.now();
1365
+
1366
+ // 每5分钟清理一次
1367
+ if (now - lastCleanTime > 5 * 60 * 1000) {
1368
+ api.storage.setItem('file-reader:last-clean-time', now);
1369
+ api?.logger?.debug('缓存清理完成');
1370
+ }
1371
+ } catch (error) {
1372
+ api?.logger?.warn('清理缓存失败', { error: error.message });
1373
+ }
1374
+ },
1375
+
1376
+ /**
1377
+ * 从文本中提取链接
1378
+ */
1379
+ extractLinksFromText(text) {
1380
+ const links = [];
1381
+
1382
+ // 匹配Markdown链接: [text](url)
1383
+ const markdownLinkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
1384
+ let match;
1385
+ while ((match = markdownLinkRegex.exec(text)) !== null) {
1386
+ links.push({
1387
+ text: match[1],
1388
+ url: match[2],
1389
+ type: 'markdown'
1390
+ });
1391
+ }
1392
+
1393
+ // 匹配HTML链接: <a href="url">text</a>
1394
+ const htmlLinkRegex = /<a\s+[^>]*href=["']([^"']+)["'][^>]*>([^<]+)<\/a>/gi;
1395
+ while ((match = htmlLinkRegex.exec(text)) !== null) {
1396
+ links.push({
1397
+ text: match[2],
1398
+ url: match[1],
1399
+ type: 'html'
1400
+ });
1401
+ }
1402
+
1403
+ // 匹配直接URL: http://, https://, file://
1404
+ const urlRegex = /(https?:\/\/[^\s<>"']+|file:\/\/[^\s<>"']+)/g;
1405
+ while ((match = urlRegex.exec(text)) !== null) {
1406
+ links.push({
1407
+ text: match[1],
1408
+ url: match[1],
1409
+ type: 'direct'
1410
+ });
1411
+ }
1412
+
1413
+ // 匹配相对路径: ./path, ../path (在特定上下文中)
1414
+ // 注意:不匹配以/开头的绝对路径,因为可能是URL路径的一部分
1415
+ const relativePathRegex = /(?:^|\s|\(|\[)(\.\.?\/[^\s<>"')]+)/g;
1416
+ while ((match = relativePathRegex.exec(text)) !== null) {
1417
+ const url = match[1].trim();
1418
+ // 过滤掉明显不是文件路径的内容
1419
+ if (!url.includes('://') && (url.includes('.') || url.includes('/'))) {
1420
+ // 移除可能的括号
1421
+ const cleanUrl = url.replace(/^[(\[]/, '').replace(/[)\]]$/, '');
1422
+ links.push({
1423
+ text: cleanUrl,
1424
+ url: cleanUrl,
1425
+ type: 'relative'
1426
+ });
1427
+ }
1428
+ }
1429
+
1430
+ return links;
1431
+ },
1432
+
1433
+ /**
1434
+ * 提取并读取嵌套链接
1435
+ */
1436
+ async extractAndReadLinks(content, baseUrl, params, currentDepth) {
1437
+ const { api } = this;
1438
+
1439
+ if (typeof content !== 'string') {
1440
+ return [];
1441
+ }
1442
+
1443
+ // 提取链接
1444
+ const links = this.extractLinksFromText(content);
1445
+ api?.logger?.info('提取到链接', { count: links.length, depth: currentDepth });
1446
+
1447
+ if (links.length === 0) {
1448
+ return [];
1449
+ }
1450
+
1451
+ // 读取每个链接的内容
1452
+ const nestedLinks = [];
1453
+ const maxDepth = params.maxDepth || parseInt(api.environment.get('MAX_RECURSION_DEPTH') || '3', 10);
1454
+
1455
+ for (const link of links) {
1456
+ try {
1457
+ // 解析相对路径
1458
+ const resolvedUrl = this.resolveRelativeUrl(link.url, baseUrl);
1459
+
1460
+ // 递归读取(使用内部方法,避免递归调用execute)
1461
+ const nestedResult = await this.readFileInternal({
1462
+ url: resolvedUrl,
1463
+ basePath: resolvedUrl, // 更新basePath为当前文件路径
1464
+ convertTo: params.convertTo,
1465
+ encoding: params.encoding,
1466
+ includeMetadata: params.includeMetadata,
1467
+ extractLinks: true, // 继续提取嵌套链接
1468
+ maxDepth: params.maxDepth,
1469
+ _currentDepth: currentDepth, // 传递当前深度
1470
+ useCache: true, // 使用缓存避免重复读取
1471
+ cacheTimeout: params.cacheTimeout
1472
+ });
1473
+
1474
+ nestedLinks.push({
1475
+ url: link.url,
1476
+ resolvedUrl: resolvedUrl,
1477
+ text: link.text,
1478
+ type: link.type,
1479
+ content: nestedResult.content,
1480
+ format: nestedResult.format,
1481
+ size: nestedResult.file?.size || 0,
1482
+ depth: currentDepth,
1483
+ nestedLinks: nestedResult.nestedLinks || []
1484
+ });
1485
+
1486
+ api?.logger?.info('嵌套链接读取成功', {
1487
+ url: link.url,
1488
+ resolvedUrl: resolvedUrl,
1489
+ depth: currentDepth
1490
+ });
1491
+ } catch (error) {
1492
+ api?.logger?.warn('读取嵌套链接失败', {
1493
+ url: link.url,
1494
+ error: error.message
1495
+ });
1496
+ // 继续处理其他链接,不中断
1497
+ }
1498
+ }
1499
+
1500
+ return nestedLinks;
1501
+ },
1502
+
1503
+ /**
1504
+ * 将链接内容嵌入到原文本中
1505
+ */
1506
+ embedLinksInContent(originalContent, nestedLinks) {
1507
+ let embeddedContent = originalContent;
1508
+
1509
+ // 为每个链接创建嵌入内容
1510
+ for (const link of nestedLinks) {
1511
+ const linkPlaceholder = `[${link.text}](${link.url})`;
1512
+ const linkContent = typeof link.content === 'string'
1513
+ ? link.content
1514
+ : JSON.stringify(link.content, null, 2);
1515
+
1516
+ // 创建嵌入块
1517
+ const embedBlock = `\n\n---\n**链接内容: ${link.text}** (${link.url})\n---\n${linkContent}\n---\n`;
1518
+
1519
+ // 替换链接为嵌入内容
1520
+ // 处理Markdown链接
1521
+ const markdownPattern = new RegExp(`\\[${this.escapeRegex(link.text)}\\]\\(${this.escapeRegex(link.url)}\\)`, 'g');
1522
+ embeddedContent = embeddedContent.replace(markdownPattern, embedBlock);
1523
+
1524
+ // 处理HTML链接
1525
+ const htmlPattern = new RegExp(`<a[^>]*href=["']${this.escapeRegex(link.url)}["'][^>]*>${this.escapeRegex(link.text)}<\\/a>`, 'gi');
1526
+ embeddedContent = embeddedContent.replace(htmlPattern, embedBlock);
1527
+
1528
+ // 处理直接URL
1529
+ if (link.type === 'direct' || link.type === 'relative') {
1530
+ const urlPattern = new RegExp(this.escapeRegex(link.url), 'g');
1531
+ embeddedContent = embeddedContent.replace(urlPattern, embedBlock);
1532
+ }
1533
+ }
1534
+
1535
+ return embeddedContent;
1536
+ },
1537
+
1538
+ /**
1539
+ * 转义正则表达式特殊字符
1540
+ */
1541
+ escapeRegex(str) {
1542
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
1543
+ }
1544
+ };
1545
+