node-pdf2img 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -0
- package/package.json +2 -2
- package/src/core/converter.js +32 -359
- package/src/core/downloader.js +98 -0
- package/src/core/output-handler.js +176 -0
- package/src/core/renderer.js +224 -0
- package/src/core/thread-pool.js +78 -0
- package/src/index.d.ts +2 -2
package/README.md
CHANGED
|
@@ -157,6 +157,32 @@ const result = await convert('./document.pdf', {
|
|
|
157
157
|
});
|
|
158
158
|
```
|
|
159
159
|
|
|
160
|
+
### pages 参数说明
|
|
161
|
+
|
|
162
|
+
`pages` 参数控制要转换的页面:
|
|
163
|
+
|
|
164
|
+
- **空数组 `[]` 或不传**:转换所有页面
|
|
165
|
+
- **指定页码数组**:只转换指定页面(1-based)
|
|
166
|
+
|
|
167
|
+
```javascript
|
|
168
|
+
// 转换所有页面(推荐)
|
|
169
|
+
await convert('./document.pdf', { pages: [] });
|
|
170
|
+
|
|
171
|
+
// 只转换第 1、3、5 页
|
|
172
|
+
await convert('./document.pdf', { pages: [1, 3, 5] });
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
> **最佳实践**:如果需要转换所有页面,直接使用 `convert()` 并传空 `pages` 数组,**不要**先调用 `getPageCount()` 获取页数再传入。这样可以避免 URL 输入时重复下载 PDF 文件。
|
|
176
|
+
>
|
|
177
|
+
> ```javascript
|
|
178
|
+
> // ❌ 不推荐:URL 会被下载两次
|
|
179
|
+
> const pageCount = await getPageCount(url);
|
|
180
|
+
> const result = await convert(url, { pages: Array.from({length: pageCount}, (_, i) => i + 1) });
|
|
181
|
+
>
|
|
182
|
+
> // ✅ 推荐:直接转换所有页面
|
|
183
|
+
> const result = await convert(url, { pages: [] });
|
|
184
|
+
> ```
|
|
185
|
+
|
|
160
186
|
### 自定义渲染宽度
|
|
161
187
|
|
|
162
188
|
```javascript
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-pdf2img",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.8",
|
|
4
4
|
"description": "High-performance PDF to image converter using PDFium native renderer",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.js",
|
|
@@ -55,7 +55,7 @@
|
|
|
55
55
|
"p-limit": "^7.2.0",
|
|
56
56
|
"piscina": "^5.1.4",
|
|
57
57
|
"sharp": "^0.33.0",
|
|
58
|
-
"node-pdf2img-native": "^1.1.
|
|
58
|
+
"node-pdf2img-native": "^1.1.9"
|
|
59
59
|
},
|
|
60
60
|
"devDependencies": {
|
|
61
61
|
"@types/node": "^20.0.0"
|
package/src/core/converter.js
CHANGED
|
@@ -15,63 +15,16 @@
|
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
17
|
import fs from 'fs';
|
|
18
|
-
import path from 'path';
|
|
19
|
-
import os from 'os';
|
|
20
|
-
import { pipeline } from 'stream/promises';
|
|
21
|
-
import { fileURLToPath } from 'url';
|
|
22
|
-
import pLimit from 'p-limit';
|
|
23
|
-
import Piscina from 'piscina';
|
|
24
18
|
import { createLogger } from '../utils/logger.js';
|
|
25
|
-
import { RENDER_CONFIG,
|
|
19
|
+
import { RENDER_CONFIG, SUPPORTED_FORMATS } from './config.js';
|
|
26
20
|
import * as nativeRenderer from '../renderers/native.js';
|
|
21
|
+
import { getThreadCount, getThreadPoolStats, destroyThreadPool } from './thread-pool.js';
|
|
22
|
+
import { downloadToTempFile } from './downloader.js';
|
|
23
|
+
import { saveToFiles, uploadToCos, DEFAULT_CONCURRENCY } from './output-handler.js';
|
|
24
|
+
import { InputType, detectInputType, renderPages } from './renderer.js';
|
|
27
25
|
|
|
28
26
|
const logger = createLogger('Converter');
|
|
29
27
|
|
|
30
|
-
// ==================== 线程池初始化 ====================
|
|
31
|
-
|
|
32
|
-
// 获取 worker.js 的路径
|
|
33
|
-
const __filename = fileURLToPath(import.meta.url);
|
|
34
|
-
const __dirname = path.dirname(__filename);
|
|
35
|
-
const workerPath = path.resolve(__dirname, '../worker.js');
|
|
36
|
-
|
|
37
|
-
// 创建全局线程池实例
|
|
38
|
-
// 线程数默认为 CPU 核心数,可通过环境变量调整
|
|
39
|
-
const threadCount = parseInt(process.env.PDF2IMG_THREAD_COUNT, 10) || os.cpus().length;
|
|
40
|
-
|
|
41
|
-
let piscina = null;
|
|
42
|
-
|
|
43
|
-
/**
|
|
44
|
-
* 获取或创建线程池实例(懒加载)
|
|
45
|
-
*/
|
|
46
|
-
function getThreadPool() {
|
|
47
|
-
if (!piscina) {
|
|
48
|
-
piscina = new Piscina({
|
|
49
|
-
filename: workerPath,
|
|
50
|
-
maxThreads: threadCount,
|
|
51
|
-
idleTimeout: 30000, // 空闲 30 秒后销毁线程
|
|
52
|
-
});
|
|
53
|
-
logger.info(`Thread pool initialized with ${threadCount} workers`);
|
|
54
|
-
}
|
|
55
|
-
return piscina;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
* 默认并发限制
|
|
60
|
-
*/
|
|
61
|
-
const DEFAULT_CONCURRENCY = {
|
|
62
|
-
FILE_IO: 10, // 文件写入并发数
|
|
63
|
-
COS_UPLOAD: 8, // COS 上传并发数
|
|
64
|
-
};
|
|
65
|
-
|
|
66
|
-
/**
|
|
67
|
-
* 输入类型枚举
|
|
68
|
-
*/
|
|
69
|
-
export const InputType = {
|
|
70
|
-
FILE: 'file',
|
|
71
|
-
URL: 'url',
|
|
72
|
-
BUFFER: 'buffer',
|
|
73
|
-
};
|
|
74
|
-
|
|
75
28
|
/**
|
|
76
29
|
* 输出类型枚举
|
|
77
30
|
*/
|
|
@@ -81,279 +34,8 @@ export const OutputType = {
|
|
|
81
34
|
COS: 'cos', // 上传到腾讯云 COS
|
|
82
35
|
};
|
|
83
36
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
*/
|
|
87
|
-
function detectInputType(input) {
|
|
88
|
-
if (Buffer.isBuffer(input)) {
|
|
89
|
-
return InputType.BUFFER;
|
|
90
|
-
}
|
|
91
|
-
if (typeof input === 'string') {
|
|
92
|
-
if (input.startsWith('http://') || input.startsWith('https://')) {
|
|
93
|
-
return InputType.URL;
|
|
94
|
-
}
|
|
95
|
-
return InputType.FILE;
|
|
96
|
-
}
|
|
97
|
-
throw new Error('Invalid input: must be a file path, URL, or Buffer');
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
/**
|
|
101
|
-
* 从 URL 获取文件大小
|
|
102
|
-
*/
|
|
103
|
-
async function getRemoteFileSize(url) {
|
|
104
|
-
const response = await fetch(url, {
|
|
105
|
-
method: 'HEAD',
|
|
106
|
-
signal: AbortSignal.timeout(TIMEOUT_CONFIG.DOWNLOAD_TIMEOUT),
|
|
107
|
-
});
|
|
108
|
-
|
|
109
|
-
if (!response.ok) {
|
|
110
|
-
throw new Error(`Failed to get file size: ${response.status} ${response.statusText}`);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
const contentLength = response.headers.get('content-length');
|
|
114
|
-
if (!contentLength) {
|
|
115
|
-
throw new Error('Server did not return Content-Length header');
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
return parseInt(contentLength, 10);
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
/**
|
|
122
|
-
* 流式下载远程文件到临时文件
|
|
123
|
-
*/
|
|
124
|
-
async function downloadToTempFile(url) {
|
|
125
|
-
const response = await fetch(url, {
|
|
126
|
-
signal: AbortSignal.timeout(TIMEOUT_CONFIG.DOWNLOAD_TIMEOUT),
|
|
127
|
-
});
|
|
128
|
-
|
|
129
|
-
if (!response.ok) {
|
|
130
|
-
throw new Error(`Failed to download file: ${response.status} ${response.statusText}`);
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
const tempDir = os.tmpdir();
|
|
134
|
-
const tempFile = path.join(tempDir, `pdf2img_${Date.now()}_${Math.random().toString(36).slice(2)}.pdf`);
|
|
135
|
-
|
|
136
|
-
const fileStream = fs.createWriteStream(tempFile);
|
|
137
|
-
|
|
138
|
-
try {
|
|
139
|
-
await pipeline(response.body, fileStream);
|
|
140
|
-
return tempFile;
|
|
141
|
-
} catch (err) {
|
|
142
|
-
try {
|
|
143
|
-
await fs.promises.unlink(tempFile);
|
|
144
|
-
} catch {}
|
|
145
|
-
throw err;
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
/**
|
|
150
|
-
* 保存单个页面到文件
|
|
151
|
-
*/
|
|
152
|
-
async function savePageToFile(page, outputDir, prefix, ext) {
|
|
153
|
-
if (!page.success || !page.buffer) {
|
|
154
|
-
return { ...page, outputPath: null };
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
try {
|
|
158
|
-
const filename = `${prefix}_${page.pageNum}.${ext}`;
|
|
159
|
-
const outputPath = path.join(outputDir, filename);
|
|
160
|
-
await fs.promises.writeFile(outputPath, page.buffer);
|
|
161
|
-
|
|
162
|
-
return {
|
|
163
|
-
pageNum: page.pageNum,
|
|
164
|
-
width: page.width,
|
|
165
|
-
height: page.height,
|
|
166
|
-
success: true,
|
|
167
|
-
outputPath,
|
|
168
|
-
size: page.buffer.length,
|
|
169
|
-
};
|
|
170
|
-
} catch (err) {
|
|
171
|
-
return {
|
|
172
|
-
pageNum: page.pageNum,
|
|
173
|
-
width: page.width,
|
|
174
|
-
height: page.height,
|
|
175
|
-
success: false,
|
|
176
|
-
error: `File save failed: ${err.message}`,
|
|
177
|
-
outputPath: null,
|
|
178
|
-
};
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
/**
|
|
183
|
-
* 保存渲染结果到文件
|
|
184
|
-
*/
|
|
185
|
-
async function saveToFiles(pages, outputDir, prefix = 'page', format = 'webp', concurrency = DEFAULT_CONCURRENCY.FILE_IO) {
|
|
186
|
-
await fs.promises.mkdir(outputDir, { recursive: true });
|
|
187
|
-
|
|
188
|
-
const ext = getExtension(format);
|
|
189
|
-
const limit = pLimit(concurrency);
|
|
190
|
-
|
|
191
|
-
const results = await Promise.all(
|
|
192
|
-
pages.map(page => limit(() => savePageToFile(page, outputDir, prefix, ext)))
|
|
193
|
-
);
|
|
194
|
-
|
|
195
|
-
return results.sort((a, b) => a.pageNum - b.pageNum);
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
/**
|
|
199
|
-
* 上传单个页面到 COS
|
|
200
|
-
*/
|
|
201
|
-
async function uploadPageToCos(page, cos, cosConfig, keyPrefix, ext, mimeType) {
|
|
202
|
-
if (!page.success || !page.buffer) {
|
|
203
|
-
return { ...page, cosKey: null };
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
try {
|
|
207
|
-
const key = `${keyPrefix}/page_${page.pageNum}.${ext}`;
|
|
208
|
-
|
|
209
|
-
await new Promise((resolve, reject) => {
|
|
210
|
-
cos.putObject({
|
|
211
|
-
Bucket: cosConfig.bucket,
|
|
212
|
-
Region: cosConfig.region,
|
|
213
|
-
Key: key,
|
|
214
|
-
Body: page.buffer,
|
|
215
|
-
ContentType: mimeType,
|
|
216
|
-
}, (err) => {
|
|
217
|
-
if (err) reject(err);
|
|
218
|
-
else resolve();
|
|
219
|
-
});
|
|
220
|
-
});
|
|
221
|
-
|
|
222
|
-
return {
|
|
223
|
-
pageNum: page.pageNum,
|
|
224
|
-
width: page.width,
|
|
225
|
-
height: page.height,
|
|
226
|
-
success: true,
|
|
227
|
-
cosKey: key,
|
|
228
|
-
size: page.buffer.length,
|
|
229
|
-
};
|
|
230
|
-
} catch (err) {
|
|
231
|
-
return {
|
|
232
|
-
pageNum: page.pageNum,
|
|
233
|
-
width: page.width,
|
|
234
|
-
height: page.height,
|
|
235
|
-
success: false,
|
|
236
|
-
error: `Upload failed: ${err.message}`,
|
|
237
|
-
cosKey: null,
|
|
238
|
-
};
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
/**
|
|
243
|
-
* 上传渲染结果到 COS
|
|
244
|
-
*/
|
|
245
|
-
async function uploadToCos(pages, cosConfig, keyPrefix, format = 'webp', concurrency = DEFAULT_CONCURRENCY.COS_UPLOAD) {
|
|
246
|
-
const COS = (await import('cos-nodejs-sdk-v5')).default;
|
|
247
|
-
|
|
248
|
-
const cos = new COS({
|
|
249
|
-
SecretId: cosConfig.secretId,
|
|
250
|
-
SecretKey: cosConfig.secretKey,
|
|
251
|
-
});
|
|
252
|
-
|
|
253
|
-
const ext = getExtension(format);
|
|
254
|
-
const mimeType = getMimeType(format);
|
|
255
|
-
const limit = pLimit(concurrency);
|
|
256
|
-
|
|
257
|
-
const results = await Promise.all(
|
|
258
|
-
pages.map(page => limit(() => uploadPageToCos(page, cos, cosConfig, keyPrefix, ext, mimeType)))
|
|
259
|
-
);
|
|
260
|
-
|
|
261
|
-
return results.sort((a, b) => a.pageNum - b.pageNum);
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
/**
|
|
265
|
-
* 使用线程池渲染 PDF 页面
|
|
266
|
-
*
|
|
267
|
-
* 主线程负责协调,工作线程负责 CPU 密集型任务
|
|
268
|
-
*
|
|
269
|
-
* @param {string|Buffer} input - 输入
|
|
270
|
-
* @param {string} inputType - 输入类型
|
|
271
|
-
* @param {number[]} pages - 页码数组
|
|
272
|
-
* @param {Object} options - 选项
|
|
273
|
-
* @returns {Promise<Object>} 渲染结果
|
|
274
|
-
*/
|
|
275
|
-
async function renderPages(input, inputType, pages, options) {
|
|
276
|
-
const startTime = Date.now();
|
|
277
|
-
let filePath = null;
|
|
278
|
-
let pdfBuffer = null;
|
|
279
|
-
let tempFile = null;
|
|
280
|
-
let numPages;
|
|
281
|
-
|
|
282
|
-
// 准备输入
|
|
283
|
-
if (inputType === InputType.FILE) {
|
|
284
|
-
try {
|
|
285
|
-
await fs.promises.access(input, fs.constants.R_OK);
|
|
286
|
-
} catch {
|
|
287
|
-
throw new Error(`File not found or not readable: ${input}`);
|
|
288
|
-
}
|
|
289
|
-
filePath = input;
|
|
290
|
-
numPages = nativeRenderer.getPageCountFromFile(filePath);
|
|
291
|
-
} else if (inputType === InputType.BUFFER) {
|
|
292
|
-
pdfBuffer = Buffer.isBuffer(input) ? input : Buffer.from(input);
|
|
293
|
-
numPages = nativeRenderer.getPageCount(pdfBuffer);
|
|
294
|
-
} else if (inputType === InputType.URL) {
|
|
295
|
-
const fileSize = await getRemoteFileSize(input);
|
|
296
|
-
logger.debug(`Remote file size: ${(fileSize / 1024 / 1024).toFixed(2)}MB, downloading...`);
|
|
297
|
-
tempFile = await downloadToTempFile(input);
|
|
298
|
-
filePath = tempFile;
|
|
299
|
-
numPages = nativeRenderer.getPageCountFromFile(filePath);
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
// 确定目标页码
|
|
303
|
-
let targetPages;
|
|
304
|
-
if (pages.length === 0) {
|
|
305
|
-
targetPages = Array.from({ length: numPages }, (_, i) => i + 1);
|
|
306
|
-
} else {
|
|
307
|
-
targetPages = pages.filter(p => p >= 1 && p <= numPages);
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
logger.debug(`Rendering ${targetPages.length} pages using thread pool (${threadCount} workers)`);
|
|
311
|
-
|
|
312
|
-
// 获取线程池
|
|
313
|
-
const pool = getThreadPool();
|
|
314
|
-
|
|
315
|
-
try {
|
|
316
|
-
// 为每一页创建任务并提交到线程池
|
|
317
|
-
const tasks = targetPages.map(pageNum => {
|
|
318
|
-
const task = {
|
|
319
|
-
pageNum,
|
|
320
|
-
options,
|
|
321
|
-
};
|
|
322
|
-
|
|
323
|
-
if (filePath) {
|
|
324
|
-
task.filePath = filePath;
|
|
325
|
-
} else if (pdfBuffer) {
|
|
326
|
-
// 注意:Buffer 会被序列化传递给工作线程
|
|
327
|
-
// 对于大文件,建议先保存到临时文件再传递路径
|
|
328
|
-
task.pdfBuffer = pdfBuffer;
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
// 提交任务到线程池
|
|
332
|
-
return pool.run(task);
|
|
333
|
-
});
|
|
334
|
-
|
|
335
|
-
// 等待所有页面的并行处理完成
|
|
336
|
-
const results = await Promise.all(tasks);
|
|
337
|
-
|
|
338
|
-
results.sort((a, b) => a.pageNum - b.pageNum);
|
|
339
|
-
|
|
340
|
-
return {
|
|
341
|
-
success: true,
|
|
342
|
-
numPages,
|
|
343
|
-
pages: results,
|
|
344
|
-
totalTime: Date.now() - startTime,
|
|
345
|
-
renderTime: results.reduce((sum, p) => sum + (p.renderTime || 0), 0),
|
|
346
|
-
encodeTime: results.reduce((sum, p) => sum + (p.encodeTime || 0), 0),
|
|
347
|
-
};
|
|
348
|
-
} finally {
|
|
349
|
-
// 清理临时文件
|
|
350
|
-
if (tempFile) {
|
|
351
|
-
try {
|
|
352
|
-
await fs.promises.unlink(tempFile);
|
|
353
|
-
} catch {}
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
}
|
|
37
|
+
// 重新导出 InputType
|
|
38
|
+
export { InputType };
|
|
357
39
|
|
|
358
40
|
/**
|
|
359
41
|
* PDF 转图片
|
|
@@ -497,6 +179,8 @@ export async function convert(input, options = {}) {
|
|
|
497
179
|
outputResult = normalizedPages.sort((a, b) => a.pageNum - b.pageNum);
|
|
498
180
|
}
|
|
499
181
|
|
|
182
|
+
const threadCount = getThreadCount();
|
|
183
|
+
|
|
500
184
|
return {
|
|
501
185
|
success: true,
|
|
502
186
|
numPages: result.numPages,
|
|
@@ -511,13 +195,15 @@ export async function convert(input, options = {}) {
|
|
|
511
195
|
threadPool: {
|
|
512
196
|
workers: threadCount,
|
|
513
197
|
},
|
|
198
|
+
// 流式渲染统计(仅 URL 输入时存在)
|
|
199
|
+
...(result.streamStats && { streamStats: result.streamStats }),
|
|
514
200
|
};
|
|
515
201
|
}
|
|
516
202
|
|
|
517
203
|
/**
|
|
518
204
|
* 获取 PDF 页数(异步版本)
|
|
519
205
|
*
|
|
520
|
-
* @param {string|Buffer} input - PDF
|
|
206
|
+
* @param {string|Buffer} input - PDF 输入(文件路径、URL 或 Buffer)
|
|
521
207
|
* @returns {Promise<number>} 页数
|
|
522
208
|
*/
|
|
523
209
|
export async function getPageCount(input) {
|
|
@@ -530,6 +216,23 @@ export async function getPageCount(input) {
|
|
|
530
216
|
}
|
|
531
217
|
|
|
532
218
|
if (typeof input === 'string') {
|
|
219
|
+
// 检查是否是 URL
|
|
220
|
+
if (input.startsWith('http://') || input.startsWith('https://')) {
|
|
221
|
+
// URL 输入:下载到临时文件后获取页数
|
|
222
|
+
const tempFile = await downloadToTempFile(input);
|
|
223
|
+
try {
|
|
224
|
+
return nativeRenderer.getPageCountFromFile(tempFile);
|
|
225
|
+
} finally {
|
|
226
|
+
// 清理临时文件
|
|
227
|
+
try {
|
|
228
|
+
await fs.promises.unlink(tempFile);
|
|
229
|
+
} catch {
|
|
230
|
+
// 忽略清理错误
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// 本地文件路径
|
|
533
236
|
try {
|
|
534
237
|
await fs.promises.access(input, fs.constants.R_OK);
|
|
535
238
|
} catch {
|
|
@@ -538,7 +241,7 @@ export async function getPageCount(input) {
|
|
|
538
241
|
return nativeRenderer.getPageCountFromFile(input);
|
|
539
242
|
}
|
|
540
243
|
|
|
541
|
-
throw new Error('Invalid input: must be a file path or Buffer');
|
|
244
|
+
throw new Error('Invalid input: must be a file path, URL, or Buffer');
|
|
542
245
|
}
|
|
543
246
|
|
|
544
247
|
/**
|
|
@@ -577,35 +280,5 @@ export function getVersion() {
|
|
|
577
280
|
return nativeRenderer.getVersion();
|
|
578
281
|
}
|
|
579
282
|
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
*/
|
|
583
|
-
export function getThreadPoolStats() {
|
|
584
|
-
if (!piscina) {
|
|
585
|
-
return {
|
|
586
|
-
initialized: false,
|
|
587
|
-
workers: threadCount,
|
|
588
|
-
};
|
|
589
|
-
}
|
|
590
|
-
return {
|
|
591
|
-
initialized: true,
|
|
592
|
-
workers: threadCount,
|
|
593
|
-
completed: piscina.completed,
|
|
594
|
-
waitTime: piscina.waitTime,
|
|
595
|
-
runTime: piscina.runTime,
|
|
596
|
-
utilization: piscina.utilization,
|
|
597
|
-
};
|
|
598
|
-
}
|
|
599
|
-
|
|
600
|
-
/**
|
|
601
|
-
* 销毁线程池
|
|
602
|
-
*
|
|
603
|
-
* 在应用关闭时调用,释放工作线程资源
|
|
604
|
-
*/
|
|
605
|
-
export async function destroyThreadPool() {
|
|
606
|
-
if (piscina) {
|
|
607
|
-
await piscina.destroy();
|
|
608
|
-
piscina = null;
|
|
609
|
-
logger.info('Thread pool destroyed');
|
|
610
|
-
}
|
|
611
|
-
}
|
|
283
|
+
// 重新导出线程池相关函数
|
|
284
|
+
export { getThreadPoolStats, destroyThreadPool };
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 远程文件下载模块
|
|
3
|
+
*
|
|
4
|
+
* 提供流式下载和文件大小获取功能
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import fs from 'fs';
|
|
8
|
+
import path from 'path';
|
|
9
|
+
import os from 'os';
|
|
10
|
+
import { pipeline } from 'stream/promises';
|
|
11
|
+
import { createLogger } from '../utils/logger.js';
|
|
12
|
+
import { TIMEOUT_CONFIG } from './config.js';
|
|
13
|
+
|
|
14
|
+
const logger = createLogger('Downloader');
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* 延迟函数
|
|
18
|
+
*/
|
|
19
|
+
function sleep(ms) {
|
|
20
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* 从 URL 获取文件大小
|
|
25
|
+
*
|
|
26
|
+
* @param {string} url - 远程文件 URL
|
|
27
|
+
* @returns {Promise<number>} 文件大小(字节)
|
|
28
|
+
*/
|
|
29
|
+
export async function getRemoteFileSize(url) {
|
|
30
|
+
const response = await fetch(url, {
|
|
31
|
+
method: 'HEAD',
|
|
32
|
+
signal: AbortSignal.timeout(TIMEOUT_CONFIG.DOWNLOAD_TIMEOUT),
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
if (!response.ok) {
|
|
36
|
+
throw new Error(`Failed to get file size: ${response.status} ${response.statusText}`);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const contentLength = response.headers.get('content-length');
|
|
40
|
+
if (!contentLength) {
|
|
41
|
+
throw new Error('Server did not return Content-Length header');
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return parseInt(contentLength, 10);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* 流式下载远程文件到临时文件(带重试)
|
|
49
|
+
*
|
|
50
|
+
* @param {string} url - 远程文件 URL
|
|
51
|
+
* @param {number} maxRetries - 最大重试次数
|
|
52
|
+
* @returns {Promise<string>} 临时文件路径
|
|
53
|
+
*/
|
|
54
|
+
export async function downloadToTempFile(url, maxRetries = 3) {
|
|
55
|
+
let lastError;
|
|
56
|
+
|
|
57
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
58
|
+
const tempFile = path.join(os.tmpdir(), `pdf2img_${Date.now()}_${Math.random().toString(36).slice(2)}.pdf`);
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
const response = await fetch(url, {
|
|
62
|
+
signal: AbortSignal.timeout(TIMEOUT_CONFIG.DOWNLOAD_TIMEOUT),
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
if (!response.ok) {
|
|
66
|
+
throw new Error(`Failed to download file: ${response.status} ${response.statusText}`);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const fileStream = fs.createWriteStream(tempFile);
|
|
70
|
+
await pipeline(response.body, fileStream);
|
|
71
|
+
return tempFile;
|
|
72
|
+
} catch (err) {
|
|
73
|
+
lastError = err;
|
|
74
|
+
|
|
75
|
+
// 清理临时文件
|
|
76
|
+
try {
|
|
77
|
+
await fs.promises.unlink(tempFile);
|
|
78
|
+
} catch {}
|
|
79
|
+
|
|
80
|
+
const isRetryable = err.code === 'EPIPE' ||
|
|
81
|
+
err.code === 'ECONNRESET' ||
|
|
82
|
+
err.code === 'ETIMEDOUT' ||
|
|
83
|
+
err.code === 'ECONNREFUSED' ||
|
|
84
|
+
err.code === 'UND_ERR_SOCKET' ||
|
|
85
|
+
err.name === 'AbortError';
|
|
86
|
+
|
|
87
|
+
if (isRetryable && attempt < maxRetries) {
|
|
88
|
+
const delay = Math.pow(2, attempt - 1) * 1000;
|
|
89
|
+
logger.debug(`Download failed (${err.code || err.message}), retrying in ${delay}ms (attempt ${attempt}/${maxRetries})`);
|
|
90
|
+
await sleep(delay);
|
|
91
|
+
} else if (!isRetryable) {
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
throw new Error(`Download failed after ${maxRetries} attempts: ${lastError.message}`);
|
|
98
|
+
}
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 输出处理模块
|
|
3
|
+
*
|
|
4
|
+
* 负责将渲染结果保存到文件或上传到 COS
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import fs from 'fs';
|
|
8
|
+
import path from 'path';
|
|
9
|
+
import pLimit from 'p-limit';
|
|
10
|
+
import { createLogger } from '../utils/logger.js';
|
|
11
|
+
import { getExtension, getMimeType } from './config.js';
|
|
12
|
+
|
|
13
|
+
const logger = createLogger('OutputHandler');
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* 默认并发限制
|
|
17
|
+
*/
|
|
18
|
+
export const DEFAULT_CONCURRENCY = {
|
|
19
|
+
FILE_IO: 10, // 文件写入并发数
|
|
20
|
+
COS_UPLOAD: 4, // COS 上传并发数(降低以减少 EPIPE 错误)
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* 延迟函数
|
|
25
|
+
*/
|
|
26
|
+
function sleep(ms) {
|
|
27
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* 保存单个页面到文件
|
|
32
|
+
*/
|
|
33
|
+
async function savePageToFile(page, outputDir, prefix, ext) {
|
|
34
|
+
if (!page.success || !page.buffer) {
|
|
35
|
+
return { ...page, outputPath: null };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
const filename = `${prefix}_${page.pageNum}.${ext}`;
|
|
40
|
+
const outputPath = path.join(outputDir, filename);
|
|
41
|
+
await fs.promises.writeFile(outputPath, page.buffer);
|
|
42
|
+
|
|
43
|
+
return {
|
|
44
|
+
pageNum: page.pageNum,
|
|
45
|
+
width: page.width,
|
|
46
|
+
height: page.height,
|
|
47
|
+
success: true,
|
|
48
|
+
outputPath,
|
|
49
|
+
size: page.buffer.length,
|
|
50
|
+
};
|
|
51
|
+
} catch (err) {
|
|
52
|
+
return {
|
|
53
|
+
pageNum: page.pageNum,
|
|
54
|
+
width: page.width,
|
|
55
|
+
height: page.height,
|
|
56
|
+
success: false,
|
|
57
|
+
error: `File save failed: ${err.message}`,
|
|
58
|
+
outputPath: null,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* 保存渲染结果到文件
|
|
65
|
+
*
|
|
66
|
+
* @param {Array} pages - 渲染结果数组
|
|
67
|
+
* @param {string} outputDir - 输出目录
|
|
68
|
+
* @param {string} prefix - 文件名前缀
|
|
69
|
+
* @param {string} format - 输出格式
|
|
70
|
+
* @param {number} concurrency - 并发数
|
|
71
|
+
* @returns {Promise<Array>} 保存结果
|
|
72
|
+
*/
|
|
73
|
+
export async function saveToFiles(pages, outputDir, prefix = 'page', format = 'webp', concurrency = DEFAULT_CONCURRENCY.FILE_IO) {
|
|
74
|
+
await fs.promises.mkdir(outputDir, { recursive: true });
|
|
75
|
+
|
|
76
|
+
const ext = getExtension(format);
|
|
77
|
+
const limit = pLimit(concurrency);
|
|
78
|
+
|
|
79
|
+
const results = await Promise.all(
|
|
80
|
+
pages.map(page => limit(() => savePageToFile(page, outputDir, prefix, ext)))
|
|
81
|
+
);
|
|
82
|
+
|
|
83
|
+
return results.sort((a, b) => a.pageNum - b.pageNum);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* 上传单个页面到 COS(带重试)
|
|
88
|
+
*/
|
|
89
|
+
async function uploadPageToCos(page, cos, cosConfig, keyPrefix, ext, mimeType, maxRetries = 3) {
|
|
90
|
+
if (!page.success || !page.buffer) {
|
|
91
|
+
return { ...page, cosKey: null };
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const key = `${keyPrefix}/page_${page.pageNum}.${ext}`;
|
|
95
|
+
let lastError;
|
|
96
|
+
|
|
97
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
98
|
+
try {
|
|
99
|
+
await new Promise((resolve, reject) => {
|
|
100
|
+
cos.putObject({
|
|
101
|
+
Bucket: cosConfig.bucket,
|
|
102
|
+
Region: cosConfig.region,
|
|
103
|
+
Key: key,
|
|
104
|
+
Body: page.buffer,
|
|
105
|
+
ContentType: mimeType,
|
|
106
|
+
}, (err) => {
|
|
107
|
+
if (err) reject(err);
|
|
108
|
+
else resolve();
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
pageNum: page.pageNum,
|
|
114
|
+
width: page.width,
|
|
115
|
+
height: page.height,
|
|
116
|
+
success: true,
|
|
117
|
+
cosKey: key,
|
|
118
|
+
size: page.buffer.length,
|
|
119
|
+
};
|
|
120
|
+
} catch (err) {
|
|
121
|
+
lastError = err;
|
|
122
|
+
const isRetryable = err.code === 'EPIPE' ||
|
|
123
|
+
err.code === 'ECONNRESET' ||
|
|
124
|
+
err.code === 'ETIMEDOUT' ||
|
|
125
|
+
err.code === 'ECONNREFUSED' ||
|
|
126
|
+
(err.statusCode && err.statusCode >= 500);
|
|
127
|
+
|
|
128
|
+
if (isRetryable && attempt < maxRetries) {
|
|
129
|
+
// 指数退避:1s, 2s, 4s...
|
|
130
|
+
const delay = Math.pow(2, attempt - 1) * 1000;
|
|
131
|
+
logger.debug(`Page ${page.pageNum} upload failed (${err.code || err.message}), retrying in ${delay}ms (attempt ${attempt}/${maxRetries})`);
|
|
132
|
+
await sleep(delay);
|
|
133
|
+
} else if (!isRetryable) {
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return {
|
|
140
|
+
pageNum: page.pageNum,
|
|
141
|
+
width: page.width,
|
|
142
|
+
height: page.height,
|
|
143
|
+
success: false,
|
|
144
|
+
error: `Upload failed after ${maxRetries} attempts: ${lastError.message}`,
|
|
145
|
+
cosKey: null,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* 上传渲染结果到 COS
|
|
151
|
+
*
|
|
152
|
+
* @param {Array} pages - 渲染结果数组
|
|
153
|
+
* @param {Object} cosConfig - COS 配置
|
|
154
|
+
* @param {string} keyPrefix - COS key 前缀
|
|
155
|
+
* @param {string} format - 输出格式
|
|
156
|
+
* @param {number} concurrency - 并发数
|
|
157
|
+
* @returns {Promise<Array>} 上传结果
|
|
158
|
+
*/
|
|
159
|
+
export async function uploadToCos(pages, cosConfig, keyPrefix, format = 'webp', concurrency = DEFAULT_CONCURRENCY.COS_UPLOAD) {
|
|
160
|
+
const COS = (await import('cos-nodejs-sdk-v5')).default;
|
|
161
|
+
|
|
162
|
+
const cos = new COS({
|
|
163
|
+
SecretId: cosConfig.secretId,
|
|
164
|
+
SecretKey: cosConfig.secretKey,
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
const ext = getExtension(format);
|
|
168
|
+
const mimeType = getMimeType(format);
|
|
169
|
+
const limit = pLimit(concurrency);
|
|
170
|
+
|
|
171
|
+
const results = await Promise.all(
|
|
172
|
+
pages.map(page => limit(() => uploadPageToCos(page, cos, cosConfig, keyPrefix, ext, mimeType)))
|
|
173
|
+
);
|
|
174
|
+
|
|
175
|
+
return results.sort((a, b) => a.pageNum - b.pageNum);
|
|
176
|
+
}
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDF 渲染模块
|
|
3
|
+
*
|
|
4
|
+
* 负责 PDF 页面的渲染逻辑,支持本地文件、Buffer 和 URL 输入
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import fs from 'fs';
|
|
8
|
+
import { createLogger } from '../utils/logger.js';
|
|
9
|
+
import * as nativeRenderer from '../renderers/native.js';
|
|
10
|
+
import { getThreadPool, getThreadCount } from './thread-pool.js';
|
|
11
|
+
import { getRemoteFileSize, downloadToTempFile } from './downloader.js';
|
|
12
|
+
|
|
13
|
+
const logger = createLogger('Renderer');
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* 输入类型枚举
|
|
17
|
+
*/
|
|
18
|
+
export const InputType = {
|
|
19
|
+
FILE: 'file',
|
|
20
|
+
URL: 'url',
|
|
21
|
+
BUFFER: 'buffer',
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* 流式渲染阈值(小于此值使用下载模式)
|
|
26
|
+
*/
|
|
27
|
+
const STREAM_THRESHOLD = 2 * 1024 * 1024; // 2MB
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* 检测输入类型
|
|
31
|
+
*
|
|
32
|
+
* @param {string|Buffer} input - 输入
|
|
33
|
+
* @returns {string} 输入类型
|
|
34
|
+
*/
|
|
35
|
+
export function detectInputType(input) {
|
|
36
|
+
if (Buffer.isBuffer(input)) {
|
|
37
|
+
return InputType.BUFFER;
|
|
38
|
+
}
|
|
39
|
+
if (typeof input === 'string') {
|
|
40
|
+
if (input.startsWith('http://') || input.startsWith('https://')) {
|
|
41
|
+
return InputType.URL;
|
|
42
|
+
}
|
|
43
|
+
return InputType.FILE;
|
|
44
|
+
}
|
|
45
|
+
throw new Error('Invalid input: must be a file path, URL, or Buffer');
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* 使用线程池渲染 PDF 页面
|
|
50
|
+
*
|
|
51
|
+
* 主线程负责协调,工作线程负责 CPU 密集型任务
|
|
52
|
+
*
|
|
53
|
+
* @param {string|Buffer} input - 输入
|
|
54
|
+
* @param {string} inputType - 输入类型
|
|
55
|
+
* @param {number[]} pages - 页码数组
|
|
56
|
+
* @param {Object} options - 选项
|
|
57
|
+
* @returns {Promise<Object>} 渲染结果
|
|
58
|
+
*/
|
|
59
|
+
export async function renderPages(input, inputType, pages, options) {
|
|
60
|
+
const startTime = Date.now();
|
|
61
|
+
|
|
62
|
+
// URL 输入:优先使用流式渲染
|
|
63
|
+
if (inputType === InputType.URL) {
|
|
64
|
+
return renderPagesFromUrl(input, pages, options, startTime);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// 本地文件或 Buffer 输入:使用线程池渲染
|
|
68
|
+
return renderPagesFromLocal(input, inputType, pages, options, startTime);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* 从 URL 渲染 PDF 页面(流式)
|
|
73
|
+
*
|
|
74
|
+
* 使用 HTTP Range 请求按需获取数据,避免完整下载
|
|
75
|
+
*/
|
|
76
|
+
async function renderPagesFromUrl(url, pages, options, startTime) {
|
|
77
|
+
// 获取文件大小
|
|
78
|
+
const fileSize = await getRemoteFileSize(url);
|
|
79
|
+
|
|
80
|
+
// 小文件直接下载后渲染,避免多次 Range 请求开销
|
|
81
|
+
if (fileSize < STREAM_THRESHOLD) {
|
|
82
|
+
logger.debug(`Remote file size: ${(fileSize / 1024 / 1024).toFixed(2)}MB (< 2MB), using download mode`);
|
|
83
|
+
return renderPagesWithDownload(url, pages, options, startTime);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
logger.debug(`Remote file size: ${(fileSize / 1024 / 1024).toFixed(2)}MB, using stream rendering`);
|
|
87
|
+
|
|
88
|
+
try {
|
|
89
|
+
// 使用流式渲染
|
|
90
|
+
const result = await nativeRenderer.renderFromStream(url, fileSize, pages, options);
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
success: true,
|
|
94
|
+
numPages: result.numPages,
|
|
95
|
+
pages: result.pages,
|
|
96
|
+
totalTime: Date.now() - startTime,
|
|
97
|
+
renderTime: result.pages.reduce((sum, p) => sum + (p.renderTime || 0), 0),
|
|
98
|
+
encodeTime: result.pages.reduce((sum, p) => sum + (p.encodeTime || 0), 0),
|
|
99
|
+
streamStats: result.streamStats,
|
|
100
|
+
};
|
|
101
|
+
} catch (err) {
|
|
102
|
+
// 流式渲染失败,回退到下载后渲染
|
|
103
|
+
logger.warn(`Stream rendering failed: ${err.message}, falling back to download`);
|
|
104
|
+
return renderPagesWithDownload(url, pages, options, startTime);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* 下载后渲染(回退方案)
|
|
110
|
+
*/
|
|
111
|
+
async function renderPagesWithDownload(url, pages, options, startTime) {
|
|
112
|
+
const tempFile = await downloadToTempFile(url);
|
|
113
|
+
const threadCount = getThreadCount();
|
|
114
|
+
|
|
115
|
+
try {
|
|
116
|
+
const numPages = nativeRenderer.getPageCountFromFile(tempFile);
|
|
117
|
+
|
|
118
|
+
// 确定目标页码
|
|
119
|
+
let targetPages;
|
|
120
|
+
if (pages.length === 0) {
|
|
121
|
+
targetPages = Array.from({ length: numPages }, (_, i) => i + 1);
|
|
122
|
+
} else {
|
|
123
|
+
targetPages = pages.filter(p => p >= 1 && p <= numPages);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
logger.debug(`Rendering ${targetPages.length} pages using thread pool (${threadCount} workers)`);
|
|
127
|
+
|
|
128
|
+
const pool = getThreadPool();
|
|
129
|
+
|
|
130
|
+
const tasks = targetPages.map(pageNum => {
|
|
131
|
+
return pool.run({
|
|
132
|
+
pageNum,
|
|
133
|
+
options,
|
|
134
|
+
filePath: tempFile,
|
|
135
|
+
});
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
const results = await Promise.all(tasks);
|
|
139
|
+
results.sort((a, b) => a.pageNum - b.pageNum);
|
|
140
|
+
|
|
141
|
+
return {
|
|
142
|
+
success: true,
|
|
143
|
+
numPages,
|
|
144
|
+
pages: results,
|
|
145
|
+
totalTime: Date.now() - startTime,
|
|
146
|
+
renderTime: results.reduce((sum, p) => sum + (p.renderTime || 0), 0),
|
|
147
|
+
encodeTime: results.reduce((sum, p) => sum + (p.encodeTime || 0), 0),
|
|
148
|
+
};
|
|
149
|
+
} finally {
|
|
150
|
+
try {
|
|
151
|
+
await fs.promises.unlink(tempFile);
|
|
152
|
+
} catch {}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* 从本地文件或 Buffer 渲染 PDF 页面
|
|
158
|
+
*/
|
|
159
|
+
async function renderPagesFromLocal(input, inputType, pages, options, startTime) {
|
|
160
|
+
let filePath = null;
|
|
161
|
+
let pdfBuffer = null;
|
|
162
|
+
let numPages;
|
|
163
|
+
const threadCount = getThreadCount();
|
|
164
|
+
|
|
165
|
+
// 准备输入
|
|
166
|
+
if (inputType === InputType.FILE) {
|
|
167
|
+
try {
|
|
168
|
+
await fs.promises.access(input, fs.constants.R_OK);
|
|
169
|
+
} catch {
|
|
170
|
+
throw new Error(`File not found or not readable: ${input}`);
|
|
171
|
+
}
|
|
172
|
+
filePath = input;
|
|
173
|
+
numPages = nativeRenderer.getPageCountFromFile(filePath);
|
|
174
|
+
} else if (inputType === InputType.BUFFER) {
|
|
175
|
+
pdfBuffer = Buffer.isBuffer(input) ? input : Buffer.from(input);
|
|
176
|
+
numPages = nativeRenderer.getPageCount(pdfBuffer);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// 确定目标页码
|
|
180
|
+
let targetPages;
|
|
181
|
+
if (pages.length === 0) {
|
|
182
|
+
targetPages = Array.from({ length: numPages }, (_, i) => i + 1);
|
|
183
|
+
} else {
|
|
184
|
+
targetPages = pages.filter(p => p >= 1 && p <= numPages);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
logger.debug(`Rendering ${targetPages.length} pages using thread pool (${threadCount} workers)`);
|
|
188
|
+
|
|
189
|
+
// 获取线程池
|
|
190
|
+
const pool = getThreadPool();
|
|
191
|
+
|
|
192
|
+
// 为每一页创建任务并提交到线程池
|
|
193
|
+
const tasks = targetPages.map(pageNum => {
|
|
194
|
+
const task = {
|
|
195
|
+
pageNum,
|
|
196
|
+
options,
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
if (filePath) {
|
|
200
|
+
task.filePath = filePath;
|
|
201
|
+
} else if (pdfBuffer) {
|
|
202
|
+
// 注意:Buffer 会被序列化传递给工作线程
|
|
203
|
+
// 对于大文件,建议先保存到临时文件再传递路径
|
|
204
|
+
task.pdfBuffer = pdfBuffer;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// 提交任务到线程池
|
|
208
|
+
return pool.run(task);
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
// 等待所有页面的并行处理完成
|
|
212
|
+
const results = await Promise.all(tasks);
|
|
213
|
+
|
|
214
|
+
results.sort((a, b) => a.pageNum - b.pageNum);
|
|
215
|
+
|
|
216
|
+
return {
|
|
217
|
+
success: true,
|
|
218
|
+
numPages,
|
|
219
|
+
pages: results,
|
|
220
|
+
totalTime: Date.now() - startTime,
|
|
221
|
+
renderTime: results.reduce((sum, p) => sum + (p.renderTime || 0), 0),
|
|
222
|
+
encodeTime: results.reduce((sum, p) => sum + (p.encodeTime || 0), 0),
|
|
223
|
+
};
|
|
224
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 线程池管理模块
|
|
3
|
+
*
|
|
4
|
+
* 使用 Piscina 管理工作线程池,用于 CPU 密集型任务
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import os from 'os';
|
|
8
|
+
import path from 'path';
|
|
9
|
+
import { fileURLToPath } from 'url';
|
|
10
|
+
import Piscina from 'piscina';
|
|
11
|
+
import { createLogger } from '../utils/logger.js';
|
|
12
|
+
|
|
13
|
+
const logger = createLogger('ThreadPool');
|
|
14
|
+
|
|
15
|
+
// 获取 worker.js 的路径
|
|
16
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
17
|
+
const __dirname = path.dirname(__filename);
|
|
18
|
+
const workerPath = path.resolve(__dirname, '../worker.js');
|
|
19
|
+
|
|
20
|
+
// 线程数默认为 CPU 核心数,可通过环境变量调整
|
|
21
|
+
const threadCount = parseInt(process.env.PDF2IMG_THREAD_COUNT, 10) || os.cpus().length;
|
|
22
|
+
|
|
23
|
+
let piscina = null;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* 获取或创建线程池实例(懒加载)
|
|
27
|
+
*/
|
|
28
|
+
export function getThreadPool() {
|
|
29
|
+
if (!piscina) {
|
|
30
|
+
piscina = new Piscina({
|
|
31
|
+
filename: workerPath,
|
|
32
|
+
maxThreads: threadCount,
|
|
33
|
+
idleTimeout: 30000, // 空闲 30 秒后销毁线程
|
|
34
|
+
});
|
|
35
|
+
logger.info(`Thread pool initialized with ${threadCount} workers`);
|
|
36
|
+
}
|
|
37
|
+
return piscina;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* 获取线程数
|
|
42
|
+
*/
|
|
43
|
+
export function getThreadCount() {
|
|
44
|
+
return threadCount;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* 获取线程池统计信息
|
|
49
|
+
*/
|
|
50
|
+
export function getThreadPoolStats() {
|
|
51
|
+
if (!piscina) {
|
|
52
|
+
return {
|
|
53
|
+
initialized: false,
|
|
54
|
+
workers: threadCount,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
return {
|
|
58
|
+
initialized: true,
|
|
59
|
+
workers: threadCount,
|
|
60
|
+
completed: piscina.completed,
|
|
61
|
+
waitTime: piscina.waitTime,
|
|
62
|
+
runTime: piscina.runTime,
|
|
63
|
+
utilization: piscina.utilization,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* 销毁线程池
|
|
69
|
+
*
|
|
70
|
+
* 在应用关闭时调用,释放工作线程资源
|
|
71
|
+
*/
|
|
72
|
+
export async function destroyThreadPool() {
|
|
73
|
+
if (piscina) {
|
|
74
|
+
await piscina.destroy();
|
|
75
|
+
piscina = null;
|
|
76
|
+
logger.info('Thread pool destroyed');
|
|
77
|
+
}
|
|
78
|
+
}
|
package/src/index.d.ts
CHANGED
|
@@ -92,10 +92,10 @@ export function convert(input: string | Buffer, options?: ConvertOptions): Promi
|
|
|
92
92
|
/**
|
|
93
93
|
* 获取 PDF 页数
|
|
94
94
|
*
|
|
95
|
-
* @param input - PDF
|
|
95
|
+
* @param input - PDF 文件路径、URL 或 Buffer
|
|
96
96
|
* @returns 页数
|
|
97
97
|
*/
|
|
98
|
-
export function getPageCount(input: string | Buffer): number
|
|
98
|
+
export function getPageCount(input: string | Buffer): Promise<number>;
|
|
99
99
|
|
|
100
100
|
/**
|
|
101
101
|
* 检查原生渲染器是否可用
|