node-pdf2img 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +95 -9
- package/bin/cli.js +36 -20
- package/package.json +8 -3
- package/src/core/config.js +9 -0
- package/src/core/converter.js +78 -372
- package/src/core/downloader.js +98 -0
- package/src/core/output-handler.js +179 -0
- package/src/core/renderer.js +289 -0
- package/src/core/thread-pool.js +78 -0
- package/src/index.d.ts +182 -49
- package/src/index.js +18 -4
- package/src/renderers/pdfjs.js +867 -0
package/src/core/converter.js
CHANGED
|
@@ -7,6 +7,10 @@
|
|
|
7
7
|
* - 主线程:负责 I/O、任务分发、结果收集
|
|
8
8
|
* - 工作线程池:负责 CPU 密集型任务(PDFium 渲染 + Sharp 编码)
|
|
9
9
|
*
|
|
10
|
+
* 渲染器支持:
|
|
11
|
+
* - pdfium: PDFium 原生渲染器(默认,高性能)
|
|
12
|
+
* - pdfjs: PDF.js 渲染器(纯 JavaScript,无需原生依赖)
|
|
13
|
+
*
|
|
10
14
|
* 性能优化:
|
|
11
15
|
* - 使用 piscina 线程池,充分利用多核 CPU
|
|
12
16
|
* - 异步文件 I/O,不阻塞事件循环
|
|
@@ -15,63 +19,17 @@
|
|
|
15
19
|
*/
|
|
16
20
|
|
|
17
21
|
import fs from 'fs';
|
|
18
|
-
import path from 'path';
|
|
19
|
-
import os from 'os';
|
|
20
|
-
import { pipeline } from 'stream/promises';
|
|
21
|
-
import { fileURLToPath } from 'url';
|
|
22
|
-
import pLimit from 'p-limit';
|
|
23
|
-
import Piscina from 'piscina';
|
|
24
22
|
import { createLogger } from '../utils/logger.js';
|
|
25
|
-
import { RENDER_CONFIG,
|
|
23
|
+
import { RENDER_CONFIG, SUPPORTED_FORMATS, RendererType, DEFAULT_RENDERER } from './config.js';
|
|
26
24
|
import * as nativeRenderer from '../renderers/native.js';
|
|
25
|
+
import * as pdfjsRenderer from '../renderers/pdfjs.js';
|
|
26
|
+
import { getThreadCount, getThreadPoolStats, destroyThreadPool } from './thread-pool.js';
|
|
27
|
+
import { downloadToTempFile } from './downloader.js';
|
|
28
|
+
import { saveToFiles, uploadToCos, DEFAULT_CONCURRENCY } from './output-handler.js';
|
|
29
|
+
import { InputType, detectInputType, renderPages, getRendererType } from './renderer.js';
|
|
27
30
|
|
|
28
31
|
const logger = createLogger('Converter');
|
|
29
32
|
|
|
30
|
-
// ==================== 线程池初始化 ====================
|
|
31
|
-
|
|
32
|
-
// 获取 worker.js 的路径
|
|
33
|
-
const __filename = fileURLToPath(import.meta.url);
|
|
34
|
-
const __dirname = path.dirname(__filename);
|
|
35
|
-
const workerPath = path.resolve(__dirname, '../worker.js');
|
|
36
|
-
|
|
37
|
-
// 创建全局线程池实例
|
|
38
|
-
// 线程数默认为 CPU 核心数,可通过环境变量调整
|
|
39
|
-
const threadCount = parseInt(process.env.PDF2IMG_THREAD_COUNT, 10) || os.cpus().length;
|
|
40
|
-
|
|
41
|
-
let piscina = null;
|
|
42
|
-
|
|
43
|
-
/**
|
|
44
|
-
* 获取或创建线程池实例(懒加载)
|
|
45
|
-
*/
|
|
46
|
-
function getThreadPool() {
|
|
47
|
-
if (!piscina) {
|
|
48
|
-
piscina = new Piscina({
|
|
49
|
-
filename: workerPath,
|
|
50
|
-
maxThreads: threadCount,
|
|
51
|
-
idleTimeout: 30000, // 空闲 30 秒后销毁线程
|
|
52
|
-
});
|
|
53
|
-
logger.info(`Thread pool initialized with ${threadCount} workers`);
|
|
54
|
-
}
|
|
55
|
-
return piscina;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
* 默认并发限制
|
|
60
|
-
*/
|
|
61
|
-
const DEFAULT_CONCURRENCY = {
|
|
62
|
-
FILE_IO: 10, // 文件写入并发数
|
|
63
|
-
COS_UPLOAD: 8, // COS 上传并发数
|
|
64
|
-
};
|
|
65
|
-
|
|
66
|
-
/**
|
|
67
|
-
* 输入类型枚举
|
|
68
|
-
*/
|
|
69
|
-
export const InputType = {
|
|
70
|
-
FILE: 'file',
|
|
71
|
-
URL: 'url',
|
|
72
|
-
BUFFER: 'buffer',
|
|
73
|
-
};
|
|
74
|
-
|
|
75
33
|
/**
|
|
76
34
|
* 输出类型枚举
|
|
77
35
|
*/
|
|
@@ -81,279 +39,8 @@ export const OutputType = {
|
|
|
81
39
|
COS: 'cos', // 上传到腾讯云 COS
|
|
82
40
|
};
|
|
83
41
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
*/
|
|
87
|
-
function detectInputType(input) {
|
|
88
|
-
if (Buffer.isBuffer(input)) {
|
|
89
|
-
return InputType.BUFFER;
|
|
90
|
-
}
|
|
91
|
-
if (typeof input === 'string') {
|
|
92
|
-
if (input.startsWith('http://') || input.startsWith('https://')) {
|
|
93
|
-
return InputType.URL;
|
|
94
|
-
}
|
|
95
|
-
return InputType.FILE;
|
|
96
|
-
}
|
|
97
|
-
throw new Error('Invalid input: must be a file path, URL, or Buffer');
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
/**
|
|
101
|
-
* 从 URL 获取文件大小
|
|
102
|
-
*/
|
|
103
|
-
async function getRemoteFileSize(url) {
|
|
104
|
-
const response = await fetch(url, {
|
|
105
|
-
method: 'HEAD',
|
|
106
|
-
signal: AbortSignal.timeout(TIMEOUT_CONFIG.DOWNLOAD_TIMEOUT),
|
|
107
|
-
});
|
|
108
|
-
|
|
109
|
-
if (!response.ok) {
|
|
110
|
-
throw new Error(`Failed to get file size: ${response.status} ${response.statusText}`);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
const contentLength = response.headers.get('content-length');
|
|
114
|
-
if (!contentLength) {
|
|
115
|
-
throw new Error('Server did not return Content-Length header');
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
return parseInt(contentLength, 10);
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
/**
|
|
122
|
-
* 流式下载远程文件到临时文件
|
|
123
|
-
*/
|
|
124
|
-
async function downloadToTempFile(url) {
|
|
125
|
-
const response = await fetch(url, {
|
|
126
|
-
signal: AbortSignal.timeout(TIMEOUT_CONFIG.DOWNLOAD_TIMEOUT),
|
|
127
|
-
});
|
|
128
|
-
|
|
129
|
-
if (!response.ok) {
|
|
130
|
-
throw new Error(`Failed to download file: ${response.status} ${response.statusText}`);
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
const tempDir = os.tmpdir();
|
|
134
|
-
const tempFile = path.join(tempDir, `pdf2img_${Date.now()}_${Math.random().toString(36).slice(2)}.pdf`);
|
|
135
|
-
|
|
136
|
-
const fileStream = fs.createWriteStream(tempFile);
|
|
137
|
-
|
|
138
|
-
try {
|
|
139
|
-
await pipeline(response.body, fileStream);
|
|
140
|
-
return tempFile;
|
|
141
|
-
} catch (err) {
|
|
142
|
-
try {
|
|
143
|
-
await fs.promises.unlink(tempFile);
|
|
144
|
-
} catch {}
|
|
145
|
-
throw err;
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
/**
|
|
150
|
-
* 保存单个页面到文件
|
|
151
|
-
*/
|
|
152
|
-
async function savePageToFile(page, outputDir, prefix, ext) {
|
|
153
|
-
if (!page.success || !page.buffer) {
|
|
154
|
-
return { ...page, outputPath: null };
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
try {
|
|
158
|
-
const filename = `${prefix}_${page.pageNum}.${ext}`;
|
|
159
|
-
const outputPath = path.join(outputDir, filename);
|
|
160
|
-
await fs.promises.writeFile(outputPath, page.buffer);
|
|
161
|
-
|
|
162
|
-
return {
|
|
163
|
-
pageNum: page.pageNum,
|
|
164
|
-
width: page.width,
|
|
165
|
-
height: page.height,
|
|
166
|
-
success: true,
|
|
167
|
-
outputPath,
|
|
168
|
-
size: page.buffer.length,
|
|
169
|
-
};
|
|
170
|
-
} catch (err) {
|
|
171
|
-
return {
|
|
172
|
-
pageNum: page.pageNum,
|
|
173
|
-
width: page.width,
|
|
174
|
-
height: page.height,
|
|
175
|
-
success: false,
|
|
176
|
-
error: `File save failed: ${err.message}`,
|
|
177
|
-
outputPath: null,
|
|
178
|
-
};
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
/**
|
|
183
|
-
* 保存渲染结果到文件
|
|
184
|
-
*/
|
|
185
|
-
async function saveToFiles(pages, outputDir, prefix = 'page', format = 'webp', concurrency = DEFAULT_CONCURRENCY.FILE_IO) {
|
|
186
|
-
await fs.promises.mkdir(outputDir, { recursive: true });
|
|
187
|
-
|
|
188
|
-
const ext = getExtension(format);
|
|
189
|
-
const limit = pLimit(concurrency);
|
|
190
|
-
|
|
191
|
-
const results = await Promise.all(
|
|
192
|
-
pages.map(page => limit(() => savePageToFile(page, outputDir, prefix, ext)))
|
|
193
|
-
);
|
|
194
|
-
|
|
195
|
-
return results.sort((a, b) => a.pageNum - b.pageNum);
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
/**
|
|
199
|
-
* 上传单个页面到 COS
|
|
200
|
-
*/
|
|
201
|
-
async function uploadPageToCos(page, cos, cosConfig, keyPrefix, ext, mimeType) {
|
|
202
|
-
if (!page.success || !page.buffer) {
|
|
203
|
-
return { ...page, cosKey: null };
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
try {
|
|
207
|
-
const key = `${keyPrefix}/page_${page.pageNum}.${ext}`;
|
|
208
|
-
|
|
209
|
-
await new Promise((resolve, reject) => {
|
|
210
|
-
cos.putObject({
|
|
211
|
-
Bucket: cosConfig.bucket,
|
|
212
|
-
Region: cosConfig.region,
|
|
213
|
-
Key: key,
|
|
214
|
-
Body: page.buffer,
|
|
215
|
-
ContentType: mimeType,
|
|
216
|
-
}, (err) => {
|
|
217
|
-
if (err) reject(err);
|
|
218
|
-
else resolve();
|
|
219
|
-
});
|
|
220
|
-
});
|
|
221
|
-
|
|
222
|
-
return {
|
|
223
|
-
pageNum: page.pageNum,
|
|
224
|
-
width: page.width,
|
|
225
|
-
height: page.height,
|
|
226
|
-
success: true,
|
|
227
|
-
cosKey: key,
|
|
228
|
-
size: page.buffer.length,
|
|
229
|
-
};
|
|
230
|
-
} catch (err) {
|
|
231
|
-
return {
|
|
232
|
-
pageNum: page.pageNum,
|
|
233
|
-
width: page.width,
|
|
234
|
-
height: page.height,
|
|
235
|
-
success: false,
|
|
236
|
-
error: `Upload failed: ${err.message}`,
|
|
237
|
-
cosKey: null,
|
|
238
|
-
};
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
/**
|
|
243
|
-
* 上传渲染结果到 COS
|
|
244
|
-
*/
|
|
245
|
-
async function uploadToCos(pages, cosConfig, keyPrefix, format = 'webp', concurrency = DEFAULT_CONCURRENCY.COS_UPLOAD) {
|
|
246
|
-
const COS = (await import('cos-nodejs-sdk-v5')).default;
|
|
247
|
-
|
|
248
|
-
const cos = new COS({
|
|
249
|
-
SecretId: cosConfig.secretId,
|
|
250
|
-
SecretKey: cosConfig.secretKey,
|
|
251
|
-
});
|
|
252
|
-
|
|
253
|
-
const ext = getExtension(format);
|
|
254
|
-
const mimeType = getMimeType(format);
|
|
255
|
-
const limit = pLimit(concurrency);
|
|
256
|
-
|
|
257
|
-
const results = await Promise.all(
|
|
258
|
-
pages.map(page => limit(() => uploadPageToCos(page, cos, cosConfig, keyPrefix, ext, mimeType)))
|
|
259
|
-
);
|
|
260
|
-
|
|
261
|
-
return results.sort((a, b) => a.pageNum - b.pageNum);
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
/**
|
|
265
|
-
* 使用线程池渲染 PDF 页面
|
|
266
|
-
*
|
|
267
|
-
* 主线程负责协调,工作线程负责 CPU 密集型任务
|
|
268
|
-
*
|
|
269
|
-
* @param {string|Buffer} input - 输入
|
|
270
|
-
* @param {string} inputType - 输入类型
|
|
271
|
-
* @param {number[]} pages - 页码数组
|
|
272
|
-
* @param {Object} options - 选项
|
|
273
|
-
* @returns {Promise<Object>} 渲染结果
|
|
274
|
-
*/
|
|
275
|
-
async function renderPages(input, inputType, pages, options) {
|
|
276
|
-
const startTime = Date.now();
|
|
277
|
-
let filePath = null;
|
|
278
|
-
let pdfBuffer = null;
|
|
279
|
-
let tempFile = null;
|
|
280
|
-
let numPages;
|
|
281
|
-
|
|
282
|
-
// 准备输入
|
|
283
|
-
if (inputType === InputType.FILE) {
|
|
284
|
-
try {
|
|
285
|
-
await fs.promises.access(input, fs.constants.R_OK);
|
|
286
|
-
} catch {
|
|
287
|
-
throw new Error(`File not found or not readable: ${input}`);
|
|
288
|
-
}
|
|
289
|
-
filePath = input;
|
|
290
|
-
numPages = nativeRenderer.getPageCountFromFile(filePath);
|
|
291
|
-
} else if (inputType === InputType.BUFFER) {
|
|
292
|
-
pdfBuffer = Buffer.isBuffer(input) ? input : Buffer.from(input);
|
|
293
|
-
numPages = nativeRenderer.getPageCount(pdfBuffer);
|
|
294
|
-
} else if (inputType === InputType.URL) {
|
|
295
|
-
const fileSize = await getRemoteFileSize(input);
|
|
296
|
-
logger.debug(`Remote file size: ${(fileSize / 1024 / 1024).toFixed(2)}MB, downloading...`);
|
|
297
|
-
tempFile = await downloadToTempFile(input);
|
|
298
|
-
filePath = tempFile;
|
|
299
|
-
numPages = nativeRenderer.getPageCountFromFile(filePath);
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
// 确定目标页码
|
|
303
|
-
let targetPages;
|
|
304
|
-
if (pages.length === 0) {
|
|
305
|
-
targetPages = Array.from({ length: numPages }, (_, i) => i + 1);
|
|
306
|
-
} else {
|
|
307
|
-
targetPages = pages.filter(p => p >= 1 && p <= numPages);
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
logger.debug(`Rendering ${targetPages.length} pages using thread pool (${threadCount} workers)`);
|
|
311
|
-
|
|
312
|
-
// 获取线程池
|
|
313
|
-
const pool = getThreadPool();
|
|
314
|
-
|
|
315
|
-
try {
|
|
316
|
-
// 为每一页创建任务并提交到线程池
|
|
317
|
-
const tasks = targetPages.map(pageNum => {
|
|
318
|
-
const task = {
|
|
319
|
-
pageNum,
|
|
320
|
-
options,
|
|
321
|
-
};
|
|
322
|
-
|
|
323
|
-
if (filePath) {
|
|
324
|
-
task.filePath = filePath;
|
|
325
|
-
} else if (pdfBuffer) {
|
|
326
|
-
// 注意:Buffer 会被序列化传递给工作线程
|
|
327
|
-
// 对于大文件,建议先保存到临时文件再传递路径
|
|
328
|
-
task.pdfBuffer = pdfBuffer;
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
// 提交任务到线程池
|
|
332
|
-
return pool.run(task);
|
|
333
|
-
});
|
|
334
|
-
|
|
335
|
-
// 等待所有页面的并行处理完成
|
|
336
|
-
const results = await Promise.all(tasks);
|
|
337
|
-
|
|
338
|
-
results.sort((a, b) => a.pageNum - b.pageNum);
|
|
339
|
-
|
|
340
|
-
return {
|
|
341
|
-
success: true,
|
|
342
|
-
numPages,
|
|
343
|
-
pages: results,
|
|
344
|
-
totalTime: Date.now() - startTime,
|
|
345
|
-
renderTime: results.reduce((sum, p) => sum + (p.renderTime || 0), 0),
|
|
346
|
-
encodeTime: results.reduce((sum, p) => sum + (p.encodeTime || 0), 0),
|
|
347
|
-
};
|
|
348
|
-
} finally {
|
|
349
|
-
// 清理临时文件
|
|
350
|
-
if (tempFile) {
|
|
351
|
-
try {
|
|
352
|
-
await fs.promises.unlink(tempFile);
|
|
353
|
-
} catch {}
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
}
|
|
42
|
+
// 重新导出 InputType
|
|
43
|
+
export { InputType };
|
|
357
44
|
|
|
358
45
|
/**
|
|
359
46
|
* PDF 转图片
|
|
@@ -366,6 +53,7 @@ async function renderPages(input, inputType, pages, options) {
|
|
|
366
53
|
* @param {string} [options.prefix='page'] - 输出文件名前缀
|
|
367
54
|
* @param {string} [options.format='webp'] - 输出格式:'webp'、'png'、'jpg'
|
|
368
55
|
* @param {number} [options.quality] - 图片质量(0-100,用于 webp 和 jpg)
|
|
56
|
+
* @param {string} [options.renderer='pdfium'] - 渲染器:'pdfium'(默认)或 'pdfjs'
|
|
369
57
|
* @param {Object} [options.webp] - WebP 编码配置
|
|
370
58
|
* @param {number} [options.webp.quality] - WebP 质量(0-100,默认 80)
|
|
371
59
|
* @param {number} [options.webp.method] - WebP 编码方法(0-6,默认 4,0最快6最慢)
|
|
@@ -388,6 +76,7 @@ export async function convert(input, options = {}) {
|
|
|
388
76
|
outputDir,
|
|
389
77
|
prefix = 'page',
|
|
390
78
|
format = RENDER_CONFIG.OUTPUT_FORMAT,
|
|
79
|
+
renderer = DEFAULT_RENDERER,
|
|
391
80
|
cos: cosConfig,
|
|
392
81
|
cosKeyPrefix = `pdf2img/${Date.now()}`,
|
|
393
82
|
concurrency,
|
|
@@ -400,9 +89,13 @@ export async function convert(input, options = {}) {
|
|
|
400
89
|
throw new Error(`Unsupported format: ${format}. Supported formats: ${SUPPORTED_FORMATS.join(', ')}`);
|
|
401
90
|
}
|
|
402
91
|
|
|
92
|
+
// 获取实际使用的渲染器
|
|
93
|
+
const actualRenderer = getRendererType({ renderer });
|
|
94
|
+
logger.debug(`Renderer: ${actualRenderer}`);
|
|
95
|
+
|
|
403
96
|
// 检查渲染器可用性
|
|
404
|
-
if (!nativeRenderer.isNativeAvailable()) {
|
|
405
|
-
throw new Error('Native renderer is not available. Please ensure PDFium library is installed.');
|
|
97
|
+
if (actualRenderer === RendererType.PDFIUM && !nativeRenderer.isNativeAvailable()) {
|
|
98
|
+
throw new Error('Native renderer is not available. Please ensure PDFium library is installed or use renderer: "pdfjs".');
|
|
406
99
|
}
|
|
407
100
|
|
|
408
101
|
// 检测输入类型
|
|
@@ -419,6 +112,7 @@ export async function convert(input, options = {}) {
|
|
|
419
112
|
pngCompression: renderOptions.png?.compressionLevel,
|
|
420
113
|
targetWidth: renderOptions.targetWidth,
|
|
421
114
|
detectScan: renderOptions.detectScan,
|
|
115
|
+
renderer: actualRenderer,
|
|
422
116
|
};
|
|
423
117
|
|
|
424
118
|
// 使用线程池渲染页面
|
|
@@ -497,11 +191,14 @@ export async function convert(input, options = {}) {
|
|
|
497
191
|
outputResult = normalizedPages.sort((a, b) => a.pageNum - b.pageNum);
|
|
498
192
|
}
|
|
499
193
|
|
|
194
|
+
const threadCount = getThreadCount();
|
|
195
|
+
|
|
500
196
|
return {
|
|
501
197
|
success: true,
|
|
502
198
|
numPages: result.numPages,
|
|
503
199
|
renderedPages: outputResult.filter(p => p.success).length,
|
|
504
200
|
format: normalizedFormat,
|
|
201
|
+
renderer: result.renderer || actualRenderer,
|
|
505
202
|
pages: outputResult,
|
|
506
203
|
timing: {
|
|
507
204
|
total: Date.now() - startTime,
|
|
@@ -511,6 +208,8 @@ export async function convert(input, options = {}) {
|
|
|
511
208
|
threadPool: {
|
|
512
209
|
workers: threadCount,
|
|
513
210
|
},
|
|
211
|
+
// 流式渲染统计(仅 URL 输入时存在)
|
|
212
|
+
...(result.streamStats && { streamStats: result.streamStats }),
|
|
514
213
|
};
|
|
515
214
|
}
|
|
516
215
|
|
|
@@ -518,9 +217,34 @@ export async function convert(input, options = {}) {
|
|
|
518
217
|
* 获取 PDF 页数(异步版本)
|
|
519
218
|
*
|
|
520
219
|
* @param {string|Buffer} input - PDF 输入(文件路径、URL 或 Buffer)
|
|
220
|
+
* @param {Object} [options] - 选项
|
|
221
|
+
* @param {string} [options.renderer] - 渲染器:'pdfium' 或 'pdfjs'
|
|
521
222
|
* @returns {Promise<number>} 页数
|
|
522
223
|
*/
|
|
523
|
-
export async function getPageCount(input) {
|
|
224
|
+
export async function getPageCount(input, options = {}) {
|
|
225
|
+
const rendererType = getRendererType(options);
|
|
226
|
+
|
|
227
|
+
// 使用 PDF.js 渲染器
|
|
228
|
+
if (rendererType === RendererType.PDFJS) {
|
|
229
|
+
if (Buffer.isBuffer(input)) {
|
|
230
|
+
return pdfjsRenderer.getPageCount(input);
|
|
231
|
+
}
|
|
232
|
+
if (typeof input === 'string') {
|
|
233
|
+
if (input.startsWith('http://') || input.startsWith('https://')) {
|
|
234
|
+
// URL 输入需要下载
|
|
235
|
+
const tempFile = await downloadToTempFile(input);
|
|
236
|
+
try {
|
|
237
|
+
return pdfjsRenderer.getPageCountFromFile(tempFile);
|
|
238
|
+
} finally {
|
|
239
|
+
try { await fs.promises.unlink(tempFile); } catch {}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
return pdfjsRenderer.getPageCountFromFile(input);
|
|
243
|
+
}
|
|
244
|
+
throw new Error('Invalid input: must be a file path, URL, or Buffer');
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// 使用 PDFium 渲染器
|
|
524
248
|
if (!nativeRenderer.isNativeAvailable()) {
|
|
525
249
|
throw new Error('Native renderer is not available');
|
|
526
250
|
}
|
|
@@ -530,23 +254,15 @@ export async function getPageCount(input) {
|
|
|
530
254
|
}
|
|
531
255
|
|
|
532
256
|
if (typeof input === 'string') {
|
|
533
|
-
// 检查是否是 URL
|
|
534
257
|
if (input.startsWith('http://') || input.startsWith('https://')) {
|
|
535
|
-
// URL 输入:下载到临时文件后获取页数
|
|
536
258
|
const tempFile = await downloadToTempFile(input);
|
|
537
259
|
try {
|
|
538
260
|
return nativeRenderer.getPageCountFromFile(tempFile);
|
|
539
261
|
} finally {
|
|
540
|
-
|
|
541
|
-
try {
|
|
542
|
-
await fs.promises.unlink(tempFile);
|
|
543
|
-
} catch {
|
|
544
|
-
// 忽略清理错误
|
|
545
|
-
}
|
|
262
|
+
try { await fs.promises.unlink(tempFile); } catch {}
|
|
546
263
|
}
|
|
547
264
|
}
|
|
548
265
|
|
|
549
|
-
// 本地文件路径
|
|
550
266
|
try {
|
|
551
267
|
await fs.promises.access(input, fs.constants.R_OK);
|
|
552
268
|
} catch {
|
|
@@ -582,47 +298,37 @@ export function getPageCountSync(input) {
|
|
|
582
298
|
|
|
583
299
|
/**
|
|
584
300
|
* 检查渲染器是否可用
|
|
301
|
+
*
|
|
302
|
+
* @param {string} [renderer] - 渲染器类型:'pdfium' 或 'pdfjs'
|
|
303
|
+
* @returns {boolean} 是否可用
|
|
585
304
|
*/
|
|
586
|
-
export function isAvailable() {
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
/**
|
|
591
|
-
* 获取版本信息
|
|
592
|
-
*/
|
|
593
|
-
export function getVersion() {
|
|
594
|
-
return nativeRenderer.getVersion();
|
|
595
|
-
}
|
|
596
|
-
|
|
597
|
-
/**
|
|
598
|
-
* 获取线程池统计信息
|
|
599
|
-
*/
|
|
600
|
-
export function getThreadPoolStats() {
|
|
601
|
-
if (!piscina) {
|
|
602
|
-
return {
|
|
603
|
-
initialized: false,
|
|
604
|
-
workers: threadCount,
|
|
605
|
-
};
|
|
305
|
+
export function isAvailable(renderer) {
|
|
306
|
+
if (renderer === RendererType.PDFJS) {
|
|
307
|
+
return pdfjsRenderer.isPdfjsAvailable();
|
|
606
308
|
}
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
runTime: piscina.runTime,
|
|
613
|
-
utilization: piscina.utilization,
|
|
614
|
-
};
|
|
309
|
+
if (renderer === RendererType.PDFIUM) {
|
|
310
|
+
return nativeRenderer.isNativeAvailable();
|
|
311
|
+
}
|
|
312
|
+
// 默认检查 pdfium,如果不可用则检查 pdfjs
|
|
313
|
+
return nativeRenderer.isNativeAvailable() || pdfjsRenderer.isPdfjsAvailable();
|
|
615
314
|
}
|
|
616
315
|
|
|
617
316
|
/**
|
|
618
|
-
*
|
|
317
|
+
* 获取版本信息
|
|
619
318
|
*
|
|
620
|
-
*
|
|
319
|
+
* @param {string} [renderer] - 渲染器类型
|
|
320
|
+
* @returns {string} 版本信息
|
|
621
321
|
*/
|
|
622
|
-
export
|
|
623
|
-
if (
|
|
624
|
-
|
|
625
|
-
piscina = null;
|
|
626
|
-
logger.info('Thread pool destroyed');
|
|
322
|
+
export function getVersion(renderer) {
|
|
323
|
+
if (renderer === RendererType.PDFJS) {
|
|
324
|
+
return pdfjsRenderer.getPdfjsVersion();
|
|
627
325
|
}
|
|
326
|
+
if (nativeRenderer.isNativeAvailable()) {
|
|
327
|
+
return nativeRenderer.getVersion();
|
|
328
|
+
}
|
|
329
|
+
return pdfjsRenderer.getPdfjsVersion();
|
|
628
330
|
}
|
|
331
|
+
|
|
332
|
+
// 重新导出渲染器类型和线程池相关函数
|
|
333
|
+
export { RendererType };
|
|
334
|
+
export { getThreadPoolStats, destroyThreadPool };
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 远程文件下载模块
|
|
3
|
+
*
|
|
4
|
+
* 提供流式下载和文件大小获取功能
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import fs from 'fs';
|
|
8
|
+
import path from 'path';
|
|
9
|
+
import os from 'os';
|
|
10
|
+
import { pipeline } from 'stream/promises';
|
|
11
|
+
import { createLogger } from '../utils/logger.js';
|
|
12
|
+
import { TIMEOUT_CONFIG } from './config.js';
|
|
13
|
+
|
|
14
|
+
const logger = createLogger('Downloader');
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* 延迟函数
|
|
18
|
+
*/
|
|
19
|
+
function sleep(ms) {
|
|
20
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* 从 URL 获取文件大小
|
|
25
|
+
*
|
|
26
|
+
* @param {string} url - 远程文件 URL
|
|
27
|
+
* @returns {Promise<number>} 文件大小(字节)
|
|
28
|
+
*/
|
|
29
|
+
export async function getRemoteFileSize(url) {
|
|
30
|
+
const response = await fetch(url, {
|
|
31
|
+
method: 'HEAD',
|
|
32
|
+
signal: AbortSignal.timeout(TIMEOUT_CONFIG.DOWNLOAD_TIMEOUT),
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
if (!response.ok) {
|
|
36
|
+
throw new Error(`Failed to get file size: ${response.status} ${response.statusText}`);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const contentLength = response.headers.get('content-length');
|
|
40
|
+
if (!contentLength) {
|
|
41
|
+
throw new Error('Server did not return Content-Length header');
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return parseInt(contentLength, 10);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* 流式下载远程文件到临时文件(带重试)
|
|
49
|
+
*
|
|
50
|
+
* @param {string} url - 远程文件 URL
|
|
51
|
+
* @param {number} maxRetries - 最大重试次数
|
|
52
|
+
* @returns {Promise<string>} 临时文件路径
|
|
53
|
+
*/
|
|
54
|
+
export async function downloadToTempFile(url, maxRetries = 3) {
|
|
55
|
+
let lastError;
|
|
56
|
+
|
|
57
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
58
|
+
const tempFile = path.join(os.tmpdir(), `pdf2img_${Date.now()}_${Math.random().toString(36).slice(2)}.pdf`);
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
const response = await fetch(url, {
|
|
62
|
+
signal: AbortSignal.timeout(TIMEOUT_CONFIG.DOWNLOAD_TIMEOUT),
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
if (!response.ok) {
|
|
66
|
+
throw new Error(`Failed to download file: ${response.status} ${response.statusText}`);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const fileStream = fs.createWriteStream(tempFile);
|
|
70
|
+
await pipeline(response.body, fileStream);
|
|
71
|
+
return tempFile;
|
|
72
|
+
} catch (err) {
|
|
73
|
+
lastError = err;
|
|
74
|
+
|
|
75
|
+
// 清理临时文件
|
|
76
|
+
try {
|
|
77
|
+
await fs.promises.unlink(tempFile);
|
|
78
|
+
} catch {}
|
|
79
|
+
|
|
80
|
+
const isRetryable = err.code === 'EPIPE' ||
|
|
81
|
+
err.code === 'ECONNRESET' ||
|
|
82
|
+
err.code === 'ETIMEDOUT' ||
|
|
83
|
+
err.code === 'ECONNREFUSED' ||
|
|
84
|
+
err.code === 'UND_ERR_SOCKET' ||
|
|
85
|
+
err.name === 'AbortError';
|
|
86
|
+
|
|
87
|
+
if (isRetryable && attempt < maxRetries) {
|
|
88
|
+
const delay = Math.pow(2, attempt - 1) * 1000;
|
|
89
|
+
logger.debug(`Download failed (${err.code || err.message}), retrying in ${delay}ms (attempt ${attempt}/${maxRetries})`);
|
|
90
|
+
await sleep(delay);
|
|
91
|
+
} else if (!isRetryable) {
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
throw new Error(`Download failed after ${maxRetries} attempts: ${lastError.message}`);
|
|
98
|
+
}
|