@becrafter/prompt-manager 0.0.18 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/IFLOW.md +175 -0
- package/README.md +145 -234
- package/app/desktop/assets/app.1.png +0 -0
- package/app/desktop/assets/app.png +0 -0
- package/app/desktop/assets/icons/icon.icns +0 -0
- package/app/desktop/assets/icons/icon.ico +0 -0
- package/app/desktop/assets/icons/icon.png +0 -0
- package/app/desktop/assets/icons/tray.png +0 -0
- package/app/desktop/assets/templates/about.html +147 -0
- package/app/desktop/assets/tray.png +0 -0
- package/app/desktop/main.js +187 -732
- package/app/desktop/package-lock.json +723 -522
- package/app/desktop/package.json +54 -25
- package/app/desktop/preload.js +7 -0
- package/app/desktop/src/core/error-handler.js +108 -0
- package/app/desktop/src/core/event-emitter.js +84 -0
- package/app/desktop/src/core/logger.js +108 -0
- package/app/desktop/src/core/state-manager.js +125 -0
- package/app/desktop/src/services/module-loader.js +214 -0
- package/app/desktop/src/services/runtime-manager.js +301 -0
- package/app/desktop/src/services/service-manager.js +169 -0
- package/app/desktop/src/services/update-manager.js +268 -0
- package/app/desktop/src/ui/about-dialog-manager.js +208 -0
- package/app/desktop/src/ui/admin-window-manager.js +757 -0
- package/app/desktop/src/ui/splash-manager.js +253 -0
- package/app/desktop/src/ui/tray-manager.js +186 -0
- package/app/desktop/src/utils/icon-manager.js +133 -0
- package/app/desktop/src/utils/path-utils.js +58 -0
- package/app/desktop/src/utils/resource-paths.js +49 -0
- package/app/desktop/src/utils/resource-sync.js +260 -0
- package/app/desktop/src/utils/runtime-sync.js +241 -0
- package/app/desktop/src/utils/template-renderer.js +284 -0
- package/app/desktop/src/utils/version-utils.js +59 -0
- package/examples/prompts/engineer/engineer-professional.yaml +92 -0
- package/examples/prompts/engineer/laowang-engineer.yaml +132 -0
- package/examples/prompts/engineer/nekomata-engineer.yaml +123 -0
- package/examples/prompts/engineer/ojousama-engineer.yaml +124 -0
- package/examples/prompts/recommend/human_3-0_growth_diagnostic_coach_prompt.yaml +105 -0
- package/examples/prompts/workflow/sixstep-workflow.yaml +192 -0
- package/package.json +18 -9
- package/packages/admin-ui/.babelrc +3 -0
- package/packages/admin-ui/admin.html +237 -4784
- package/packages/admin-ui/css/main.css +2592 -0
- package/packages/admin-ui/css/recommended-prompts.css +610 -0
- package/packages/admin-ui/package-lock.json +6981 -0
- package/packages/admin-ui/package.json +36 -0
- package/packages/admin-ui/src/codemirror.js +53 -0
- package/packages/admin-ui/src/index.js +3188 -0
- package/packages/admin-ui/webpack.config.js +76 -0
- package/packages/resources/tools/chrome-devtools/README.md +310 -0
- package/packages/resources/tools/chrome-devtools/chrome-devtools.tool.js +1703 -0
- package/packages/resources/tools/file-reader/README.md +289 -0
- package/packages/resources/tools/file-reader/file-reader.tool.js +1545 -0
- package/packages/resources/tools/filesystem/README.md +359 -0
- package/packages/resources/tools/filesystem/filesystem.tool.js +538 -0
- package/packages/resources/tools/ollama-remote/README.md +192 -0
- package/packages/resources/tools/ollama-remote/ollama-remote.tool.js +421 -0
- package/packages/resources/tools/pdf-reader/README.md +236 -0
- package/packages/resources/tools/pdf-reader/pdf-reader.tool.js +565 -0
- package/packages/resources/tools/playwright/README.md +306 -0
- package/packages/resources/tools/playwright/playwright.tool.js +1186 -0
- package/packages/resources/tools/todolist/README.md +394 -0
- package/packages/resources/tools/todolist/todolist.tool.js +1312 -0
- package/packages/server/README.md +142 -0
- package/packages/server/api/admin.routes.js +42 -11
- package/packages/server/api/surge.routes.js +43 -0
- package/packages/server/app.js +119 -14
- package/packages/server/index.js +39 -0
- package/packages/server/mcp/mcp.server.js +346 -28
- package/packages/server/mcp/{mcp.handler.js → prompt.handler.js} +108 -9
- package/packages/server/mcp/sequential-thinking.handler.js +318 -0
- package/packages/server/mcp/think-plan.handler.js +274 -0
- package/packages/server/middlewares/auth.middleware.js +6 -0
- package/packages/server/package.json +51 -0
- package/packages/server/server.js +37 -1
- package/packages/server/toolm/index.js +9 -0
- package/packages/server/toolm/package-installer.service.js +267 -0
- package/packages/server/toolm/test-tools.js +264 -0
- package/packages/server/toolm/tool-context.service.js +334 -0
- package/packages/server/toolm/tool-dependency.service.js +168 -0
- package/packages/server/toolm/tool-description-generator-optimized.service.js +375 -0
- package/packages/server/toolm/tool-description-generator.service.js +312 -0
- package/packages/server/toolm/tool-environment.service.js +200 -0
- package/packages/server/toolm/tool-execution.service.js +277 -0
- package/packages/server/toolm/tool-loader.service.js +219 -0
- package/packages/server/toolm/tool-logger.service.js +223 -0
- package/packages/server/toolm/tool-manager.handler.js +65 -0
- package/packages/server/toolm/tool-manual-generator.service.js +389 -0
- package/packages/server/toolm/tool-mode-handlers.service.js +224 -0
- package/packages/server/toolm/tool-storage.service.js +111 -0
- package/packages/server/toolm/tool-sync.service.js +138 -0
- package/packages/server/toolm/tool-utils.js +20 -0
- package/packages/server/toolm/tool-yaml-parser.service.js +81 -0
- package/packages/server/toolm/validate-system.js +421 -0
- package/packages/server/utils/config.js +49 -5
- package/packages/server/utils/util.js +65 -10
- package/scripts/build-icons.js +135 -0
- package/scripts/build.sh +57 -0
- package/scripts/surge/CNAME +1 -0
- package/scripts/surge/README.md +47 -0
- package/scripts/surge/package-lock.json +34 -0
- package/scripts/surge/package.json +20 -0
- package/scripts/surge/sync-to-surge.js +151 -0
- package/packages/admin-ui/js/closebrackets.min.js +0 -8
- package/packages/admin-ui/js/codemirror.min.js +0 -8
- package/packages/admin-ui/js/js-yaml.min.js +0 -2
- package/packages/admin-ui/js/markdown.min.js +0 -8
- /package/app/desktop/assets/{icon.png → tray.1.png} +0 -0
|
@@ -0,0 +1,565 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDF Reader - 纯 Node.js 实现的 PDF 分页阅读工具(带智能缓存)
|
|
3
|
+
*
|
|
4
|
+
* 战略意义:
|
|
5
|
+
* 1. 架构隔离性 - 专门的PDF读取工具,确保AI操作安全
|
|
6
|
+
* 2. 平台独立性 - 不依赖特定AI平台的PDF读取能力
|
|
7
|
+
* 3. 生态自主性 - 作为Prompt Manager生态的关键组件
|
|
8
|
+
*
|
|
9
|
+
* 设计理念:
|
|
10
|
+
* - 不传 pages:只返回 PDF 元信息(总页数、标题等)
|
|
11
|
+
* - 传 pages:提取指定页的文本和图片
|
|
12
|
+
* - 图片存储在 toolbox 目录,统一管理
|
|
13
|
+
* - 使用 storage 缓存解析状态,避免重复解析
|
|
14
|
+
*
|
|
15
|
+
* 生态定位:
|
|
16
|
+
* 为 AI 提供高效的 PDF 分页阅读能力
|
|
17
|
+
* 与 Claude Code 的 Read 工具无缝配合
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import path from 'path';
|
|
21
|
+
import os from 'os';
|
|
22
|
+
|
|
23
|
+
export default {
|
|
24
|
+
/**
|
|
25
|
+
* 获取工具依赖
|
|
26
|
+
*/
|
|
27
|
+
getDependencies() {
|
|
28
|
+
return {
|
|
29
|
+
'pdf-parse': '^2.1.10'
|
|
30
|
+
};
|
|
31
|
+
},
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* 获取工具元信息
|
|
35
|
+
*/
|
|
36
|
+
getMetadata() {
|
|
37
|
+
return {
|
|
38
|
+
id: 'pdf-reader',
|
|
39
|
+
name: 'PDF Reader',
|
|
40
|
+
description: 'PDF 分页阅读工具,支持按页码提取文本和图片,智能缓存避免重复解析',
|
|
41
|
+
version: '2.1.0',
|
|
42
|
+
category: 'utility',
|
|
43
|
+
author: '鲁班',
|
|
44
|
+
tags: ['pdf', 'reader', 'text', 'image', 'mcp', 'utility'],
|
|
45
|
+
scenarios: [
|
|
46
|
+
'PDF文本提取',
|
|
47
|
+
'PDF图片提取',
|
|
48
|
+
'分页阅读PDF',
|
|
49
|
+
'文档内容分析',
|
|
50
|
+
'PDF元信息获取'
|
|
51
|
+
],
|
|
52
|
+
limitations: [
|
|
53
|
+
'需要有效的PDF文件路径',
|
|
54
|
+
'仅支持PDF格式文件',
|
|
55
|
+
'大文件可能需要较长时间处理',
|
|
56
|
+
'图片提取依赖于PDF的图片存储格式'
|
|
57
|
+
]
|
|
58
|
+
};
|
|
59
|
+
},
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* 获取参数Schema
|
|
63
|
+
*/
|
|
64
|
+
getSchema() {
|
|
65
|
+
return {
|
|
66
|
+
parameters: {
|
|
67
|
+
type: 'object',
|
|
68
|
+
properties: {
|
|
69
|
+
method: {
|
|
70
|
+
type: 'string',
|
|
71
|
+
description: 'MCP方法名',
|
|
72
|
+
enum: [
|
|
73
|
+
'read_pdf'
|
|
74
|
+
]
|
|
75
|
+
},
|
|
76
|
+
pdfPath: {
|
|
77
|
+
type: 'string',
|
|
78
|
+
description: 'PDF 文件的绝对路径',
|
|
79
|
+
minLength: 1
|
|
80
|
+
},
|
|
81
|
+
pages: {
|
|
82
|
+
oneOf: [
|
|
83
|
+
{ type: 'number', minimum: 1 },
|
|
84
|
+
{ type: 'array', items: { type: 'number', minimum: 1 } }
|
|
85
|
+
],
|
|
86
|
+
description: '要读取的页码(可选)。不传则只返回 PDF 元信息;传数字读取单页;传数组读取多页'
|
|
87
|
+
},
|
|
88
|
+
extractImages: {
|
|
89
|
+
type: 'boolean',
|
|
90
|
+
description: '是否提取图片(默认 true)',
|
|
91
|
+
default: true
|
|
92
|
+
},
|
|
93
|
+
forceRefresh: {
|
|
94
|
+
type: 'boolean',
|
|
95
|
+
description: '强制重新解析,忽略缓存(默认 false)',
|
|
96
|
+
default: false
|
|
97
|
+
}
|
|
98
|
+
},
|
|
99
|
+
required: ['method', 'pdfPath']
|
|
100
|
+
},
|
|
101
|
+
environment: {
|
|
102
|
+
type: 'object',
|
|
103
|
+
properties: {
|
|
104
|
+
ALLOWED_DIRECTORIES: {
|
|
105
|
+
type: 'string',
|
|
106
|
+
description: '允许访问的目录列表(JSON数组格式),默认为 ["~/.prompt-manager"]',
|
|
107
|
+
default: '["~/.prompt-manager"]'
|
|
108
|
+
}
|
|
109
|
+
},
|
|
110
|
+
required: []
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
},
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* 获取业务错误定义
|
|
117
|
+
*/
|
|
118
|
+
getBusinessErrors() {
|
|
119
|
+
return [
|
|
120
|
+
{
|
|
121
|
+
code: 'PATH_OUTSIDE_SCOPE',
|
|
122
|
+
description: '路径越权访问',
|
|
123
|
+
match: /路径越权/,
|
|
124
|
+
solution: '确保路径在允许的目录范围内',
|
|
125
|
+
retryable: false
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
code: 'FILE_NOT_FOUND',
|
|
129
|
+
description: 'PDF文件不存在',
|
|
130
|
+
match: /ENOENT|no such file|cannot find/i,
|
|
131
|
+
solution: '检查PDF文件路径是否正确',
|
|
132
|
+
retryable: false
|
|
133
|
+
},
|
|
134
|
+
{
|
|
135
|
+
code: 'INVALID_PDF',
|
|
136
|
+
description: '无效的PDF文件',
|
|
137
|
+
match: /Invalid PDF/i,
|
|
138
|
+
solution: '检查PDF文件是否损坏',
|
|
139
|
+
retryable: false
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
code: 'PERMISSION_DENIED',
|
|
143
|
+
description: '权限不足',
|
|
144
|
+
match: /EACCES|permission denied/i,
|
|
145
|
+
solution: '检查文件或目录的访问权限',
|
|
146
|
+
retryable: false
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
code: 'PAGE_OUT_OF_RANGE',
|
|
150
|
+
description: '页码超出范围',
|
|
151
|
+
match: /页码.*超出范围/i,
|
|
152
|
+
solution: '检查请求的页码是否在PDF总页数范围内',
|
|
153
|
+
retryable: false
|
|
154
|
+
}
|
|
155
|
+
];
|
|
156
|
+
},
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* 执行工具
|
|
160
|
+
*
|
|
161
|
+
* 注意:文件系统基础能力(getAllowedDirectories、initializeFilesystem、resolvePromptManagerPath)
|
|
162
|
+
* 已由框架层提供,工具可直接通过 this 调用这些方法
|
|
163
|
+
*/
|
|
164
|
+
async execute(params) {
|
|
165
|
+
const { api } = this;
|
|
166
|
+
|
|
167
|
+
// 记录执行开始
|
|
168
|
+
api?.logger?.info('Executing PDF Reader operation', {
|
|
169
|
+
method: params.method,
|
|
170
|
+
pdfPath: params.pdfPath
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
// 参数验证由 ToolValidator 根据 getSchema() 自动处理
|
|
174
|
+
// 这里进行 method 相关的业务验证
|
|
175
|
+
const methodRequirements = {
|
|
176
|
+
'read_pdf': ['pdfPath']
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
const required = methodRequirements[params.method];
|
|
180
|
+
if (!required) {
|
|
181
|
+
throw new Error(`不支持的方法: ${params.method}`);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const missing = required.filter(field => !params[field]);
|
|
185
|
+
if (missing.length > 0) {
|
|
186
|
+
throw new Error(`方法 ${params.method} 缺少必需参数: ${missing.join(', ')}`);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
try {
|
|
190
|
+
// 初始化文件系统
|
|
191
|
+
await this.initializeFilesystem();
|
|
192
|
+
|
|
193
|
+
// 使用安全路径解析
|
|
194
|
+
const resolvedPdfPath = this.resolvePromptManagerPath(params.pdfPath);
|
|
195
|
+
|
|
196
|
+
const { pdfPath, pages, extractImages = true, forceRefresh = false } = params;
|
|
197
|
+
|
|
198
|
+
// 导入依赖
|
|
199
|
+
const fs = await import('fs');
|
|
200
|
+
const fsPromises = fs.promises;
|
|
201
|
+
const crypto = await import('crypto');
|
|
202
|
+
|
|
203
|
+
// 使用工具上下文的模块导入函数,从工具的 node_modules 导入
|
|
204
|
+
let PDFParse;
|
|
205
|
+
try {
|
|
206
|
+
if (this.importToolModule) {
|
|
207
|
+
// 使用工具上下文提供的导入函数
|
|
208
|
+
const pdfParseModule = await this.importToolModule('pdf-parse');
|
|
209
|
+
api?.logger?.debug('pdf-parse 模块导入结果', {
|
|
210
|
+
hasDefault: !!pdfParseModule.default,
|
|
211
|
+
type: typeof pdfParseModule.default,
|
|
212
|
+
keys: Object.keys(pdfParseModule)
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
// pdf-parse v2.x 导出 PDFParse 类
|
|
216
|
+
// 首先尝试从模块中获取 PDFParse 类
|
|
217
|
+
if (pdfParseModule.PDFParse && typeof pdfParseModule.PDFParse === 'function') {
|
|
218
|
+
PDFParse = pdfParseModule.PDFParse;
|
|
219
|
+
} else if (pdfParseModule.default && typeof pdfParseModule.default === 'function') {
|
|
220
|
+
// 检查 default 是否是类
|
|
221
|
+
const defaultExport = pdfParseModule.default;
|
|
222
|
+
if (defaultExport.toString().startsWith('class') || defaultExport.prototype) {
|
|
223
|
+
PDFParse = defaultExport;
|
|
224
|
+
} else {
|
|
225
|
+
// 可能是旧版本的函数导出
|
|
226
|
+
PDFParse = defaultExport;
|
|
227
|
+
}
|
|
228
|
+
} else if (typeof pdfParseModule === 'function') {
|
|
229
|
+
// 直接是函数(旧版本)
|
|
230
|
+
PDFParse = pdfParseModule;
|
|
231
|
+
} else {
|
|
232
|
+
// 尝试从对象中查找 PDFParse
|
|
233
|
+
PDFParse = pdfParseModule.PDFParse || pdfParseModule.default || pdfParseModule;
|
|
234
|
+
}
|
|
235
|
+
} else if (this.requireToolModule) {
|
|
236
|
+
// 降级方案:使用 require
|
|
237
|
+
const pdfParseModule = this.requireToolModule('pdf-parse');
|
|
238
|
+
// pdf-parse v2.x 导出对象,包含 PDFParse 类
|
|
239
|
+
PDFParse = pdfParseModule.PDFParse || pdfParseModule.default || pdfParseModule;
|
|
240
|
+
} else {
|
|
241
|
+
// 最后尝试:直接导入
|
|
242
|
+
const pdfParseModule = await import('pdf-parse');
|
|
243
|
+
PDFParse = pdfParseModule.PDFParse || pdfParseModule.default || pdfParseModule;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// 验证 PDFParse 是否存在
|
|
247
|
+
if (!PDFParse || (typeof PDFParse !== 'function' && typeof PDFParse !== 'object')) {
|
|
248
|
+
api?.logger?.error('pdf-parse 模块格式错误', {
|
|
249
|
+
type: typeof PDFParse,
|
|
250
|
+
isObject: typeof PDFParse === 'object',
|
|
251
|
+
keys: PDFParse && typeof PDFParse === 'object' ? Object.keys(PDFParse).slice(0, 10) : []
|
|
252
|
+
});
|
|
253
|
+
throw new Error(`pdf-parse 模块格式错误:无法找到 PDFParse 类或函数。请检查模块导出格式。`);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
api?.logger?.info('pdf-parse 模块加载成功', {
|
|
257
|
+
type: typeof PDFParse,
|
|
258
|
+
isClass: typeof PDFParse === 'function' && PDFParse.toString().startsWith('class')
|
|
259
|
+
});
|
|
260
|
+
} catch (error) {
|
|
261
|
+
api?.logger?.error('加载 pdf-parse 模块失败', {
|
|
262
|
+
error: error.message,
|
|
263
|
+
stack: error.stack
|
|
264
|
+
});
|
|
265
|
+
throw new Error(`无法加载 pdf-parse 模块:${error.message}。请确保依赖已正确安装到工具的 node_modules 目录。`);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
api.logger.info('开始处理 PDF', { pdfPath: resolvedPdfPath, pages, extractImages, forceRefresh });
|
|
269
|
+
|
|
270
|
+
// 1. 读取 PDF 文件
|
|
271
|
+
const pdfBuffer = await fsPromises.readFile(resolvedPdfPath);
|
|
272
|
+
api.logger.info('PDF 文件读取成功', { size: pdfBuffer.length });
|
|
273
|
+
|
|
274
|
+
// 2. 生成 PDF hash
|
|
275
|
+
const hash = crypto.createHash('md5');
|
|
276
|
+
hash.update(pdfBuffer);
|
|
277
|
+
const pdfHash = hash.digest('hex');
|
|
278
|
+
|
|
279
|
+
// 3. 构建 PDF 数据目录,使用安全路径
|
|
280
|
+
// 使用已初始化的目录列表,如果未初始化则获取
|
|
281
|
+
const pdfDir = path.join(this.__toolDir, 'pdf-cache', pdfHash);
|
|
282
|
+
|
|
283
|
+
// 确保目录存在
|
|
284
|
+
await fsPromises.mkdir(pdfDir, { recursive: true });
|
|
285
|
+
|
|
286
|
+
api.logger.info('PDF 数据目录', { pdfDir });
|
|
287
|
+
|
|
288
|
+
// 4. 检查 storage 中的 PDF 元信息缓存
|
|
289
|
+
const allPdfs = api.storage.getItem('pdfs') || {};
|
|
290
|
+
let pdfMetadata = allPdfs[pdfHash];
|
|
291
|
+
|
|
292
|
+
api.logger.info('Storage 读取结果', {
|
|
293
|
+
hasPdfs: !!allPdfs,
|
|
294
|
+
pdfKeys: Object.keys(allPdfs),
|
|
295
|
+
hasPdfMetadata: !!pdfMetadata,
|
|
296
|
+
cachedPages: pdfMetadata ? Object.keys(pdfMetadata.parsedPages || {}) : []
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
if (!pdfMetadata || forceRefresh) {
|
|
300
|
+
// 首次处理此 PDF,解析基本信息
|
|
301
|
+
api.logger.info('首次处理此 PDF,解析元信息');
|
|
302
|
+
|
|
303
|
+
// 使用 PDFParse 类解析 PDF
|
|
304
|
+
let pdfData;
|
|
305
|
+
let pdfParser = null;
|
|
306
|
+
try {
|
|
307
|
+
if (typeof PDFParse === 'function' && PDFParse.toString().startsWith('class')) {
|
|
308
|
+
// pdf-parse v2.x: 使用类
|
|
309
|
+
pdfParser = new PDFParse({ data: pdfBuffer });
|
|
310
|
+
const textResult = await pdfParser.getText();
|
|
311
|
+
const infoResult = await pdfParser.getInfo();
|
|
312
|
+
pdfData = {
|
|
313
|
+
text: textResult.text || '',
|
|
314
|
+
info: infoResult.info || {},
|
|
315
|
+
metadata: infoResult.metadata || {}
|
|
316
|
+
};
|
|
317
|
+
} else {
|
|
318
|
+
// 旧版本: 直接调用函数
|
|
319
|
+
pdfData = await PDFParse(pdfBuffer);
|
|
320
|
+
}
|
|
321
|
+
} finally {
|
|
322
|
+
// 清理资源
|
|
323
|
+
if (pdfParser && typeof pdfParser.destroy === 'function') {
|
|
324
|
+
await pdfParser.destroy();
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
const lines = pdfData.text.split('\n');
|
|
329
|
+
const totalPages = (pdfData.info && pdfData.info.Pages) || this.estimatePages(lines);
|
|
330
|
+
|
|
331
|
+
pdfMetadata = {
|
|
332
|
+
totalPages: totalPages,
|
|
333
|
+
title: pdfData.info?.Title || null,
|
|
334
|
+
author: pdfData.info?.Author || null,
|
|
335
|
+
pdfHash: pdfHash,
|
|
336
|
+
pdfPath: resolvedPdfPath,
|
|
337
|
+
createdAt: Date.now(),
|
|
338
|
+
parsedPages: {}
|
|
339
|
+
};
|
|
340
|
+
|
|
341
|
+
allPdfs[pdfHash] = pdfMetadata;
|
|
342
|
+
api.storage.setItem('pdfs', allPdfs);
|
|
343
|
+
api.logger.info('PDF 元信息已缓存', { totalPages });
|
|
344
|
+
} else {
|
|
345
|
+
api.logger.info('使用缓存的 PDF 元信息', {
|
|
346
|
+
totalPages: pdfMetadata.totalPages,
|
|
347
|
+
cachedPages: Object.keys(pdfMetadata.parsedPages || {}).length
|
|
348
|
+
});
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
const totalPages = pdfMetadata.totalPages;
|
|
352
|
+
|
|
353
|
+
// 5. 如果没有传 pages,只返回元信息
|
|
354
|
+
if (pages === undefined || pages === null) {
|
|
355
|
+
return {
|
|
356
|
+
success: true,
|
|
357
|
+
metadata: {
|
|
358
|
+
totalPages: pdfMetadata.totalPages,
|
|
359
|
+
title: pdfMetadata.title,
|
|
360
|
+
author: pdfMetadata.author,
|
|
361
|
+
cachedPages: Object.keys(pdfMetadata.parsedPages || {}).length
|
|
362
|
+
},
|
|
363
|
+
message: '使用 pages 参数指定要读取的页码,例如:pages: 1 或 pages: [1,2,3]'
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// 6. 标准化 pages 为数组
|
|
368
|
+
const pageNumbers = Array.isArray(pages) ? pages : [pages];
|
|
369
|
+
|
|
370
|
+
// 验证页码范围
|
|
371
|
+
for (const pageNum of pageNumbers) {
|
|
372
|
+
if (pageNum < 1 || pageNum > totalPages) {
|
|
373
|
+
throw new Error(`页码 ${pageNum} 超出范围(总页数:${totalPages})`);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
api.logger.info('准备读取指定页面', { pageNumbers });
|
|
378
|
+
|
|
379
|
+
// 7. 提取指定页面的内容(使用缓存)
|
|
380
|
+
const pagesContent = [];
|
|
381
|
+
const allImages = [];
|
|
382
|
+
let fullText = '';
|
|
383
|
+
let cacheHits = 0;
|
|
384
|
+
let cacheMisses = 0;
|
|
385
|
+
|
|
386
|
+
// 解析整个 PDF 以获取内容
|
|
387
|
+
let pdfData;
|
|
388
|
+
let pdfParser = null;
|
|
389
|
+
try {
|
|
390
|
+
if (typeof PDFParse === 'function' && PDFParse.toString().startsWith('class')) {
|
|
391
|
+
// pdf-parse v2.x: 使用类
|
|
392
|
+
pdfParser = new PDFParse({ data: pdfBuffer });
|
|
393
|
+
const textResult = await pdfParser.getText();
|
|
394
|
+
pdfData = {
|
|
395
|
+
text: textResult.text || ''
|
|
396
|
+
};
|
|
397
|
+
} else {
|
|
398
|
+
// 旧版本: 直接调用函数
|
|
399
|
+
pdfData = await PDFParse(pdfBuffer);
|
|
400
|
+
}
|
|
401
|
+
} finally {
|
|
402
|
+
// 清理资源
|
|
403
|
+
if (pdfParser && typeof pdfParser.destroy === 'function') {
|
|
404
|
+
await pdfParser.destroy();
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
const allText = pdfData.text;
|
|
408
|
+
const textByPage = this.splitTextByPages(allText, totalPages);
|
|
409
|
+
|
|
410
|
+
for (const pageNum of pageNumbers) {
|
|
411
|
+
api.logger.info(`处理第 ${pageNum} 页`);
|
|
412
|
+
|
|
413
|
+
// 检查缓存
|
|
414
|
+
const pageCache = pdfMetadata.parsedPages?.[pageNum];
|
|
415
|
+
const shouldUseCache = pageCache && !forceRefresh;
|
|
416
|
+
|
|
417
|
+
if (shouldUseCache && extractImages) {
|
|
418
|
+
api.logger.info(`第 ${pageNum} 页使用缓存`, {
|
|
419
|
+
imageCount: pageCache.imageCount
|
|
420
|
+
});
|
|
421
|
+
|
|
422
|
+
const pageImages = [];
|
|
423
|
+
for (let i = 0; i < (pageCache.imageCount || 0); i++) {
|
|
424
|
+
const imgPath = path.join(pdfDir, `page-${pageNum}-img-${i}.png`);
|
|
425
|
+
|
|
426
|
+
pageImages.push({
|
|
427
|
+
page: pageNum,
|
|
428
|
+
index: i,
|
|
429
|
+
path: imgPath,
|
|
430
|
+
width: null,
|
|
431
|
+
height: null
|
|
432
|
+
});
|
|
433
|
+
allImages.push(pageImages[pageImages.length - 1]);
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
pagesContent.push({
|
|
437
|
+
page: pageNum,
|
|
438
|
+
hasImages: pageImages.length > 0,
|
|
439
|
+
imageCount: pageImages.length,
|
|
440
|
+
images: pageImages,
|
|
441
|
+
fromCache: true
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
// 从分割的文本中获取当前页内容
|
|
445
|
+
const pageText = textByPage[pageNum - 1] || '';
|
|
446
|
+
fullText += `\n=== 第 ${pageNum} 页 ===\n${pageText}\n`;
|
|
447
|
+
|
|
448
|
+
cacheHits++;
|
|
449
|
+
continue;
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
// 缓存未命中,需要处理
|
|
453
|
+
cacheMisses++;
|
|
454
|
+
api.logger.info(`第 ${pageNum} 页缓存未命中,开始解析`);
|
|
455
|
+
|
|
456
|
+
const pageImages = [];
|
|
457
|
+
|
|
458
|
+
// 从分割的文本中获取当前页内容
|
|
459
|
+
const pageText = textByPage[pageNum - 1] || '';
|
|
460
|
+
fullText += `\n=== 第 ${pageNum} 页 ===\n${pageText}\n`;
|
|
461
|
+
|
|
462
|
+
// 缓存页面信息
|
|
463
|
+
// 重新从 storage 读取以确保数据最新
|
|
464
|
+
const currentPdfs = api.storage.getItem('pdfs') || {};
|
|
465
|
+
const currentMetadata = currentPdfs[pdfHash] || pdfMetadata;
|
|
466
|
+
|
|
467
|
+
if (!currentMetadata.parsedPages) {
|
|
468
|
+
currentMetadata.parsedPages = {};
|
|
469
|
+
}
|
|
470
|
+
currentMetadata.parsedPages[pageNum] = {
|
|
471
|
+
timestamp: Date.now(),
|
|
472
|
+
imageCount: pageImages.length,
|
|
473
|
+
hasText: true
|
|
474
|
+
};
|
|
475
|
+
|
|
476
|
+
currentPdfs[pdfHash] = currentMetadata;
|
|
477
|
+
api.storage.setItem('pdfs', currentPdfs);
|
|
478
|
+
|
|
479
|
+
// 更新本地引用
|
|
480
|
+
pdfMetadata = currentMetadata;
|
|
481
|
+
|
|
482
|
+
pagesContent.push({
|
|
483
|
+
page: pageNum,
|
|
484
|
+
hasImages: pageImages.length > 0,
|
|
485
|
+
imageCount: pageImages.length,
|
|
486
|
+
images: pageImages,
|
|
487
|
+
fromCache: false
|
|
488
|
+
});
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// 9. 构建返回结果
|
|
492
|
+
const result = {
|
|
493
|
+
success: true,
|
|
494
|
+
metadata: {
|
|
495
|
+
totalPages: totalPages,
|
|
496
|
+
title: pdfMetadata.title,
|
|
497
|
+
author: pdfMetadata.author,
|
|
498
|
+
requestedPages: pageNumbers
|
|
499
|
+
},
|
|
500
|
+
content: {
|
|
501
|
+
text: fullText.trim(),
|
|
502
|
+
pages: pagesContent,
|
|
503
|
+
hasImages: allImages.length > 0,
|
|
504
|
+
imageCount: allImages.length,
|
|
505
|
+
imagesDirectory: pdfDir
|
|
506
|
+
},
|
|
507
|
+
cache: {
|
|
508
|
+
hits: cacheHits,
|
|
509
|
+
misses: cacheMisses,
|
|
510
|
+
totalCachedPages: Object.keys(pdfMetadata.parsedPages || {}).length
|
|
511
|
+
}
|
|
512
|
+
};
|
|
513
|
+
|
|
514
|
+
api.logger.info('PDF 分页读取完成', {
|
|
515
|
+
readPages: pageNumbers.length,
|
|
516
|
+
totalImages: allImages.length,
|
|
517
|
+
cacheHits,
|
|
518
|
+
cacheMisses
|
|
519
|
+
});
|
|
520
|
+
|
|
521
|
+
return result;
|
|
522
|
+
|
|
523
|
+
} catch (error) {
|
|
524
|
+
// 记录错误
|
|
525
|
+
api?.logger?.error('PDF Reader operation failed', {
|
|
526
|
+
method: params.method,
|
|
527
|
+
error: error.message
|
|
528
|
+
});
|
|
529
|
+
throw error;
|
|
530
|
+
}
|
|
531
|
+
},
|
|
532
|
+
|
|
533
|
+
/**
|
|
534
|
+
* 根据页数分割文本
|
|
535
|
+
* @param {string} text - 完整的PDF文本
|
|
536
|
+
* @param {number} totalPages - 总页数
|
|
537
|
+
* @returns {string[]} - 每页的文本数组
|
|
538
|
+
*/
|
|
539
|
+
splitTextByPages(text, totalPages) {
|
|
540
|
+
const lines = text.split('\n');
|
|
541
|
+
const linesPerPaged = Math.ceil(lines.length / totalPages);
|
|
542
|
+
const pages = [];
|
|
543
|
+
|
|
544
|
+
for (let i = 0; i < totalPages; i++) {
|
|
545
|
+
const start = i * linesPerPaged;
|
|
546
|
+
const end = Math.min(start + linesPerPaged, lines.length);
|
|
547
|
+
pages.push(lines.slice(start, end).join('\n'));
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
return pages;
|
|
551
|
+
},
|
|
552
|
+
|
|
553
|
+
/**
|
|
554
|
+
* 估算页数(如果PDF元信息中没有页数)
|
|
555
|
+
* @param {string[]} lines - PDF文本行数组
|
|
556
|
+
* @returns {number} - 估算的页数
|
|
557
|
+
*/
|
|
558
|
+
estimatePages(lines) {
|
|
559
|
+
// 简单估算:基于文本长度和行数
|
|
560
|
+
// 在实际应用中,可能需要更复杂的算法
|
|
561
|
+
if (lines.length < 100) return 1;
|
|
562
|
+
if (lines.length < 500) return Math.ceil(lines.length / 100);
|
|
563
|
+
return Math.min(100, Math.ceil(lines.length / 50)); // 限制最大估算值
|
|
564
|
+
}
|
|
565
|
+
};
|