node-pdf2img 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-pdf2img",
3
- "version": "0.1.7",
3
+ "version": "0.1.8",
4
4
  "description": "High-performance PDF to image converter using PDFium native renderer",
5
5
  "type": "module",
6
6
  "main": "./src/index.js",
@@ -55,7 +55,7 @@
55
55
  "p-limit": "^7.2.0",
56
56
  "piscina": "^5.1.4",
57
57
  "sharp": "^0.33.0",
58
- "node-pdf2img-native": "^1.1.8"
58
+ "node-pdf2img-native": "^1.1.9"
59
59
  },
60
60
  "devDependencies": {
61
61
  "@types/node": "^20.0.0"
@@ -15,63 +15,16 @@
15
15
  */
16
16
 
17
17
  import fs from 'fs';
18
- import path from 'path';
19
- import os from 'os';
20
- import { pipeline } from 'stream/promises';
21
- import { fileURLToPath } from 'url';
22
- import pLimit from 'p-limit';
23
- import Piscina from 'piscina';
24
18
  import { createLogger } from '../utils/logger.js';
25
- import { RENDER_CONFIG, TIMEOUT_CONFIG, SUPPORTED_FORMATS, getExtension, getMimeType } from './config.js';
19
+ import { RENDER_CONFIG, SUPPORTED_FORMATS } from './config.js';
26
20
  import * as nativeRenderer from '../renderers/native.js';
21
+ import { getThreadCount, getThreadPoolStats, destroyThreadPool } from './thread-pool.js';
22
+ import { downloadToTempFile } from './downloader.js';
23
+ import { saveToFiles, uploadToCos, DEFAULT_CONCURRENCY } from './output-handler.js';
24
+ import { InputType, detectInputType, renderPages } from './renderer.js';
27
25
 
28
26
  const logger = createLogger('Converter');
29
27
 
30
- // ==================== 线程池初始化 ====================
31
-
32
- // 获取 worker.js 的路径
33
- const __filename = fileURLToPath(import.meta.url);
34
- const __dirname = path.dirname(__filename);
35
- const workerPath = path.resolve(__dirname, '../worker.js');
36
-
37
- // 创建全局线程池实例
38
- // 线程数默认为 CPU 核心数,可通过环境变量调整
39
- const threadCount = parseInt(process.env.PDF2IMG_THREAD_COUNT, 10) || os.cpus().length;
40
-
41
- let piscina = null;
42
-
43
- /**
44
- * 获取或创建线程池实例(懒加载)
45
- */
46
- function getThreadPool() {
47
- if (!piscina) {
48
- piscina = new Piscina({
49
- filename: workerPath,
50
- maxThreads: threadCount,
51
- idleTimeout: 30000, // 空闲 30 秒后销毁线程
52
- });
53
- logger.info(`Thread pool initialized with ${threadCount} workers`);
54
- }
55
- return piscina;
56
- }
57
-
58
- /**
59
- * 默认并发限制
60
- */
61
- const DEFAULT_CONCURRENCY = {
62
- FILE_IO: 10, // 文件写入并发数
63
- COS_UPLOAD: 8, // COS 上传并发数
64
- };
65
-
66
- /**
67
- * 输入类型枚举
68
- */
69
- export const InputType = {
70
- FILE: 'file',
71
- URL: 'url',
72
- BUFFER: 'buffer',
73
- };
74
-
75
28
  /**
76
29
  * 输出类型枚举
77
30
  */
@@ -81,279 +34,8 @@ export const OutputType = {
81
34
  COS: 'cos', // 上传到腾讯云 COS
82
35
  };
83
36
 
84
- /**
85
- * 检测输入类型
86
- */
87
- function detectInputType(input) {
88
- if (Buffer.isBuffer(input)) {
89
- return InputType.BUFFER;
90
- }
91
- if (typeof input === 'string') {
92
- if (input.startsWith('http://') || input.startsWith('https://')) {
93
- return InputType.URL;
94
- }
95
- return InputType.FILE;
96
- }
97
- throw new Error('Invalid input: must be a file path, URL, or Buffer');
98
- }
99
-
100
- /**
101
- * 从 URL 获取文件大小
102
- */
103
- async function getRemoteFileSize(url) {
104
- const response = await fetch(url, {
105
- method: 'HEAD',
106
- signal: AbortSignal.timeout(TIMEOUT_CONFIG.DOWNLOAD_TIMEOUT),
107
- });
108
-
109
- if (!response.ok) {
110
- throw new Error(`Failed to get file size: ${response.status} ${response.statusText}`);
111
- }
112
-
113
- const contentLength = response.headers.get('content-length');
114
- if (!contentLength) {
115
- throw new Error('Server did not return Content-Length header');
116
- }
117
-
118
- return parseInt(contentLength, 10);
119
- }
120
-
121
- /**
122
- * 流式下载远程文件到临时文件
123
- */
124
- async function downloadToTempFile(url) {
125
- const response = await fetch(url, {
126
- signal: AbortSignal.timeout(TIMEOUT_CONFIG.DOWNLOAD_TIMEOUT),
127
- });
128
-
129
- if (!response.ok) {
130
- throw new Error(`Failed to download file: ${response.status} ${response.statusText}`);
131
- }
132
-
133
- const tempDir = os.tmpdir();
134
- const tempFile = path.join(tempDir, `pdf2img_${Date.now()}_${Math.random().toString(36).slice(2)}.pdf`);
135
-
136
- const fileStream = fs.createWriteStream(tempFile);
137
-
138
- try {
139
- await pipeline(response.body, fileStream);
140
- return tempFile;
141
- } catch (err) {
142
- try {
143
- await fs.promises.unlink(tempFile);
144
- } catch {}
145
- throw err;
146
- }
147
- }
148
-
149
- /**
150
- * 保存单个页面到文件
151
- */
152
- async function savePageToFile(page, outputDir, prefix, ext) {
153
- if (!page.success || !page.buffer) {
154
- return { ...page, outputPath: null };
155
- }
156
-
157
- try {
158
- const filename = `${prefix}_${page.pageNum}.${ext}`;
159
- const outputPath = path.join(outputDir, filename);
160
- await fs.promises.writeFile(outputPath, page.buffer);
161
-
162
- return {
163
- pageNum: page.pageNum,
164
- width: page.width,
165
- height: page.height,
166
- success: true,
167
- outputPath,
168
- size: page.buffer.length,
169
- };
170
- } catch (err) {
171
- return {
172
- pageNum: page.pageNum,
173
- width: page.width,
174
- height: page.height,
175
- success: false,
176
- error: `File save failed: ${err.message}`,
177
- outputPath: null,
178
- };
179
- }
180
- }
181
-
182
- /**
183
- * 保存渲染结果到文件
184
- */
185
- async function saveToFiles(pages, outputDir, prefix = 'page', format = 'webp', concurrency = DEFAULT_CONCURRENCY.FILE_IO) {
186
- await fs.promises.mkdir(outputDir, { recursive: true });
187
-
188
- const ext = getExtension(format);
189
- const limit = pLimit(concurrency);
190
-
191
- const results = await Promise.all(
192
- pages.map(page => limit(() => savePageToFile(page, outputDir, prefix, ext)))
193
- );
194
-
195
- return results.sort((a, b) => a.pageNum - b.pageNum);
196
- }
197
-
198
- /**
199
- * 上传单个页面到 COS
200
- */
201
- async function uploadPageToCos(page, cos, cosConfig, keyPrefix, ext, mimeType) {
202
- if (!page.success || !page.buffer) {
203
- return { ...page, cosKey: null };
204
- }
205
-
206
- try {
207
- const key = `${keyPrefix}/page_${page.pageNum}.${ext}`;
208
-
209
- await new Promise((resolve, reject) => {
210
- cos.putObject({
211
- Bucket: cosConfig.bucket,
212
- Region: cosConfig.region,
213
- Key: key,
214
- Body: page.buffer,
215
- ContentType: mimeType,
216
- }, (err) => {
217
- if (err) reject(err);
218
- else resolve();
219
- });
220
- });
221
-
222
- return {
223
- pageNum: page.pageNum,
224
- width: page.width,
225
- height: page.height,
226
- success: true,
227
- cosKey: key,
228
- size: page.buffer.length,
229
- };
230
- } catch (err) {
231
- return {
232
- pageNum: page.pageNum,
233
- width: page.width,
234
- height: page.height,
235
- success: false,
236
- error: `Upload failed: ${err.message}`,
237
- cosKey: null,
238
- };
239
- }
240
- }
241
-
242
- /**
243
- * 上传渲染结果到 COS
244
- */
245
- async function uploadToCos(pages, cosConfig, keyPrefix, format = 'webp', concurrency = DEFAULT_CONCURRENCY.COS_UPLOAD) {
246
- const COS = (await import('cos-nodejs-sdk-v5')).default;
247
-
248
- const cos = new COS({
249
- SecretId: cosConfig.secretId,
250
- SecretKey: cosConfig.secretKey,
251
- });
252
-
253
- const ext = getExtension(format);
254
- const mimeType = getMimeType(format);
255
- const limit = pLimit(concurrency);
256
-
257
- const results = await Promise.all(
258
- pages.map(page => limit(() => uploadPageToCos(page, cos, cosConfig, keyPrefix, ext, mimeType)))
259
- );
260
-
261
- return results.sort((a, b) => a.pageNum - b.pageNum);
262
- }
263
-
264
- /**
265
- * 使用线程池渲染 PDF 页面
266
- *
267
- * 主线程负责协调,工作线程负责 CPU 密集型任务
268
- *
269
- * @param {string|Buffer} input - 输入
270
- * @param {string} inputType - 输入类型
271
- * @param {number[]} pages - 页码数组
272
- * @param {Object} options - 选项
273
- * @returns {Promise<Object>} 渲染结果
274
- */
275
- async function renderPages(input, inputType, pages, options) {
276
- const startTime = Date.now();
277
- let filePath = null;
278
- let pdfBuffer = null;
279
- let tempFile = null;
280
- let numPages;
281
-
282
- // 准备输入
283
- if (inputType === InputType.FILE) {
284
- try {
285
- await fs.promises.access(input, fs.constants.R_OK);
286
- } catch {
287
- throw new Error(`File not found or not readable: ${input}`);
288
- }
289
- filePath = input;
290
- numPages = nativeRenderer.getPageCountFromFile(filePath);
291
- } else if (inputType === InputType.BUFFER) {
292
- pdfBuffer = Buffer.isBuffer(input) ? input : Buffer.from(input);
293
- numPages = nativeRenderer.getPageCount(pdfBuffer);
294
- } else if (inputType === InputType.URL) {
295
- const fileSize = await getRemoteFileSize(input);
296
- logger.debug(`Remote file size: ${(fileSize / 1024 / 1024).toFixed(2)}MB, downloading...`);
297
- tempFile = await downloadToTempFile(input);
298
- filePath = tempFile;
299
- numPages = nativeRenderer.getPageCountFromFile(filePath);
300
- }
301
-
302
- // 确定目标页码
303
- let targetPages;
304
- if (pages.length === 0) {
305
- targetPages = Array.from({ length: numPages }, (_, i) => i + 1);
306
- } else {
307
- targetPages = pages.filter(p => p >= 1 && p <= numPages);
308
- }
309
-
310
- logger.debug(`Rendering ${targetPages.length} pages using thread pool (${threadCount} workers)`);
311
-
312
- // 获取线程池
313
- const pool = getThreadPool();
314
-
315
- try {
316
- // 为每一页创建任务并提交到线程池
317
- const tasks = targetPages.map(pageNum => {
318
- const task = {
319
- pageNum,
320
- options,
321
- };
322
-
323
- if (filePath) {
324
- task.filePath = filePath;
325
- } else if (pdfBuffer) {
326
- // 注意:Buffer 会被序列化传递给工作线程
327
- // 对于大文件,建议先保存到临时文件再传递路径
328
- task.pdfBuffer = pdfBuffer;
329
- }
330
-
331
- // 提交任务到线程池
332
- return pool.run(task);
333
- });
334
-
335
- // 等待所有页面的并行处理完成
336
- const results = await Promise.all(tasks);
337
-
338
- results.sort((a, b) => a.pageNum - b.pageNum);
339
-
340
- return {
341
- success: true,
342
- numPages,
343
- pages: results,
344
- totalTime: Date.now() - startTime,
345
- renderTime: results.reduce((sum, p) => sum + (p.renderTime || 0), 0),
346
- encodeTime: results.reduce((sum, p) => sum + (p.encodeTime || 0), 0),
347
- };
348
- } finally {
349
- // 清理临时文件
350
- if (tempFile) {
351
- try {
352
- await fs.promises.unlink(tempFile);
353
- } catch {}
354
- }
355
- }
356
- }
37
+ // 重新导出 InputType
38
+ export { InputType };
357
39
 
358
40
  /**
359
41
  * PDF 转图片
@@ -497,6 +179,8 @@ export async function convert(input, options = {}) {
497
179
  outputResult = normalizedPages.sort((a, b) => a.pageNum - b.pageNum);
498
180
  }
499
181
 
182
+ const threadCount = getThreadCount();
183
+
500
184
  return {
501
185
  success: true,
502
186
  numPages: result.numPages,
@@ -511,6 +195,8 @@ export async function convert(input, options = {}) {
511
195
  threadPool: {
512
196
  workers: threadCount,
513
197
  },
198
+ // 流式渲染统计(仅 URL 输入时存在)
199
+ ...(result.streamStats && { streamStats: result.streamStats }),
514
200
  };
515
201
  }
516
202
 
@@ -594,35 +280,5 @@ export function getVersion() {
594
280
  return nativeRenderer.getVersion();
595
281
  }
596
282
 
597
- /**
598
- * 获取线程池统计信息
599
- */
600
- export function getThreadPoolStats() {
601
- if (!piscina) {
602
- return {
603
- initialized: false,
604
- workers: threadCount,
605
- };
606
- }
607
- return {
608
- initialized: true,
609
- workers: threadCount,
610
- completed: piscina.completed,
611
- waitTime: piscina.waitTime,
612
- runTime: piscina.runTime,
613
- utilization: piscina.utilization,
614
- };
615
- }
616
-
617
- /**
618
- * 销毁线程池
619
- *
620
- * 在应用关闭时调用,释放工作线程资源
621
- */
622
- export async function destroyThreadPool() {
623
- if (piscina) {
624
- await piscina.destroy();
625
- piscina = null;
626
- logger.info('Thread pool destroyed');
627
- }
628
- }
283
+ // 重新导出线程池相关函数
284
+ export { getThreadPoolStats, destroyThreadPool };
@@ -0,0 +1,98 @@
1
+ /**
2
+ * 远程文件下载模块
3
+ *
4
+ * 提供流式下载和文件大小获取功能
5
+ */
6
+
7
+ import fs from 'fs';
8
+ import path from 'path';
9
+ import os from 'os';
10
+ import { pipeline } from 'stream/promises';
11
+ import { createLogger } from '../utils/logger.js';
12
+ import { TIMEOUT_CONFIG } from './config.js';
13
+
14
+ const logger = createLogger('Downloader');
15
+
16
+ /**
17
+ * 延迟函数
18
+ */
19
+ function sleep(ms) {
20
+ return new Promise(resolve => setTimeout(resolve, ms));
21
+ }
22
+
23
+ /**
24
+ * 从 URL 获取文件大小
25
+ *
26
+ * @param {string} url - 远程文件 URL
27
+ * @returns {Promise<number>} 文件大小(字节)
28
+ */
29
+ export async function getRemoteFileSize(url) {
30
+ const response = await fetch(url, {
31
+ method: 'HEAD',
32
+ signal: AbortSignal.timeout(TIMEOUT_CONFIG.DOWNLOAD_TIMEOUT),
33
+ });
34
+
35
+ if (!response.ok) {
36
+ throw new Error(`Failed to get file size: ${response.status} ${response.statusText}`);
37
+ }
38
+
39
+ const contentLength = response.headers.get('content-length');
40
+ if (!contentLength) {
41
+ throw new Error('Server did not return Content-Length header');
42
+ }
43
+
44
+ return parseInt(contentLength, 10);
45
+ }
46
+
47
+ /**
48
+ * 流式下载远程文件到临时文件(带重试)
49
+ *
50
+ * @param {string} url - 远程文件 URL
51
+ * @param {number} maxRetries - 最大重试次数
52
+ * @returns {Promise<string>} 临时文件路径
53
+ */
54
+ export async function downloadToTempFile(url, maxRetries = 3) {
55
+ let lastError;
56
+
57
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
58
+ const tempFile = path.join(os.tmpdir(), `pdf2img_${Date.now()}_${Math.random().toString(36).slice(2)}.pdf`);
59
+
60
+ try {
61
+ const response = await fetch(url, {
62
+ signal: AbortSignal.timeout(TIMEOUT_CONFIG.DOWNLOAD_TIMEOUT),
63
+ });
64
+
65
+ if (!response.ok) {
66
+ throw new Error(`Failed to download file: ${response.status} ${response.statusText}`);
67
+ }
68
+
69
+ const fileStream = fs.createWriteStream(tempFile);
70
+ await pipeline(response.body, fileStream);
71
+ return tempFile;
72
+ } catch (err) {
73
+ lastError = err;
74
+
75
+ // 清理临时文件
76
+ try {
77
+ await fs.promises.unlink(tempFile);
78
+ } catch {}
79
+
80
+ const isRetryable = err.code === 'EPIPE' ||
81
+ err.code === 'ECONNRESET' ||
82
+ err.code === 'ETIMEDOUT' ||
83
+ err.code === 'ECONNREFUSED' ||
84
+ err.code === 'UND_ERR_SOCKET' ||
85
+ err.name === 'AbortError';
86
+
87
+ if (isRetryable && attempt < maxRetries) {
88
+ const delay = Math.pow(2, attempt - 1) * 1000;
89
+ logger.debug(`Download failed (${err.code || err.message}), retrying in ${delay}ms (attempt ${attempt}/${maxRetries})`);
90
+ await sleep(delay);
91
+ } else if (!isRetryable) {
92
+ break;
93
+ }
94
+ }
95
+ }
96
+
97
+ throw new Error(`Download failed after ${maxRetries} attempts: ${lastError.message}`);
98
+ }
@@ -0,0 +1,176 @@
1
+ /**
2
+ * 输出处理模块
3
+ *
4
+ * 负责将渲染结果保存到文件或上传到 COS
5
+ */
6
+
7
+ import fs from 'fs';
8
+ import path from 'path';
9
+ import pLimit from 'p-limit';
10
+ import { createLogger } from '../utils/logger.js';
11
+ import { getExtension, getMimeType } from './config.js';
12
+
13
+ const logger = createLogger('OutputHandler');
14
+
15
+ /**
16
+ * 默认并发限制
17
+ */
18
+ export const DEFAULT_CONCURRENCY = {
19
+ FILE_IO: 10, // 文件写入并发数
20
+ COS_UPLOAD: 4, // COS 上传并发数(降低以减少 EPIPE 错误)
21
+ };
22
+
23
+ /**
24
+ * 延迟函数
25
+ */
26
+ function sleep(ms) {
27
+ return new Promise(resolve => setTimeout(resolve, ms));
28
+ }
29
+
30
+ /**
31
+ * 保存单个页面到文件
32
+ */
33
+ async function savePageToFile(page, outputDir, prefix, ext) {
34
+ if (!page.success || !page.buffer) {
35
+ return { ...page, outputPath: null };
36
+ }
37
+
38
+ try {
39
+ const filename = `${prefix}_${page.pageNum}.${ext}`;
40
+ const outputPath = path.join(outputDir, filename);
41
+ await fs.promises.writeFile(outputPath, page.buffer);
42
+
43
+ return {
44
+ pageNum: page.pageNum,
45
+ width: page.width,
46
+ height: page.height,
47
+ success: true,
48
+ outputPath,
49
+ size: page.buffer.length,
50
+ };
51
+ } catch (err) {
52
+ return {
53
+ pageNum: page.pageNum,
54
+ width: page.width,
55
+ height: page.height,
56
+ success: false,
57
+ error: `File save failed: ${err.message}`,
58
+ outputPath: null,
59
+ };
60
+ }
61
+ }
62
+
63
+ /**
64
+ * 保存渲染结果到文件
65
+ *
66
+ * @param {Array} pages - 渲染结果数组
67
+ * @param {string} outputDir - 输出目录
68
+ * @param {string} prefix - 文件名前缀
69
+ * @param {string} format - 输出格式
70
+ * @param {number} concurrency - 并发数
71
+ * @returns {Promise<Array>} 保存结果
72
+ */
73
+ export async function saveToFiles(pages, outputDir, prefix = 'page', format = 'webp', concurrency = DEFAULT_CONCURRENCY.FILE_IO) {
74
+ await fs.promises.mkdir(outputDir, { recursive: true });
75
+
76
+ const ext = getExtension(format);
77
+ const limit = pLimit(concurrency);
78
+
79
+ const results = await Promise.all(
80
+ pages.map(page => limit(() => savePageToFile(page, outputDir, prefix, ext)))
81
+ );
82
+
83
+ return results.sort((a, b) => a.pageNum - b.pageNum);
84
+ }
85
+
86
+ /**
87
+ * 上传单个页面到 COS(带重试)
88
+ */
89
+ async function uploadPageToCos(page, cos, cosConfig, keyPrefix, ext, mimeType, maxRetries = 3) {
90
+ if (!page.success || !page.buffer) {
91
+ return { ...page, cosKey: null };
92
+ }
93
+
94
+ const key = `${keyPrefix}/page_${page.pageNum}.${ext}`;
95
+ let lastError;
96
+
97
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
98
+ try {
99
+ await new Promise((resolve, reject) => {
100
+ cos.putObject({
101
+ Bucket: cosConfig.bucket,
102
+ Region: cosConfig.region,
103
+ Key: key,
104
+ Body: page.buffer,
105
+ ContentType: mimeType,
106
+ }, (err) => {
107
+ if (err) reject(err);
108
+ else resolve();
109
+ });
110
+ });
111
+
112
+ return {
113
+ pageNum: page.pageNum,
114
+ width: page.width,
115
+ height: page.height,
116
+ success: true,
117
+ cosKey: key,
118
+ size: page.buffer.length,
119
+ };
120
+ } catch (err) {
121
+ lastError = err;
122
+ const isRetryable = err.code === 'EPIPE' ||
123
+ err.code === 'ECONNRESET' ||
124
+ err.code === 'ETIMEDOUT' ||
125
+ err.code === 'ECONNREFUSED' ||
126
+ (err.statusCode && err.statusCode >= 500);
127
+
128
+ if (isRetryable && attempt < maxRetries) {
129
+ // 指数退避:1s, 2s, 4s...
130
+ const delay = Math.pow(2, attempt - 1) * 1000;
131
+ logger.debug(`Page ${page.pageNum} upload failed (${err.code || err.message}), retrying in ${delay}ms (attempt ${attempt}/${maxRetries})`);
132
+ await sleep(delay);
133
+ } else if (!isRetryable) {
134
+ break;
135
+ }
136
+ }
137
+ }
138
+
139
+ return {
140
+ pageNum: page.pageNum,
141
+ width: page.width,
142
+ height: page.height,
143
+ success: false,
144
+ error: `Upload failed after ${maxRetries} attempts: ${lastError.message}`,
145
+ cosKey: null,
146
+ };
147
+ }
148
+
149
+ /**
150
+ * 上传渲染结果到 COS
151
+ *
152
+ * @param {Array} pages - 渲染结果数组
153
+ * @param {Object} cosConfig - COS 配置
154
+ * @param {string} keyPrefix - COS key 前缀
155
+ * @param {string} format - 输出格式
156
+ * @param {number} concurrency - 并发数
157
+ * @returns {Promise<Array>} 上传结果
158
+ */
159
+ export async function uploadToCos(pages, cosConfig, keyPrefix, format = 'webp', concurrency = DEFAULT_CONCURRENCY.COS_UPLOAD) {
160
+ const COS = (await import('cos-nodejs-sdk-v5')).default;
161
+
162
+ const cos = new COS({
163
+ SecretId: cosConfig.secretId,
164
+ SecretKey: cosConfig.secretKey,
165
+ });
166
+
167
+ const ext = getExtension(format);
168
+ const mimeType = getMimeType(format);
169
+ const limit = pLimit(concurrency);
170
+
171
+ const results = await Promise.all(
172
+ pages.map(page => limit(() => uploadPageToCos(page, cos, cosConfig, keyPrefix, ext, mimeType)))
173
+ );
174
+
175
+ return results.sort((a, b) => a.pageNum - b.pageNum);
176
+ }
@@ -0,0 +1,224 @@
1
+ /**
2
+ * PDF 渲染模块
3
+ *
4
+ * 负责 PDF 页面的渲染逻辑,支持本地文件、Buffer 和 URL 输入
5
+ */
6
+
7
+ import fs from 'fs';
8
+ import { createLogger } from '../utils/logger.js';
9
+ import * as nativeRenderer from '../renderers/native.js';
10
+ import { getThreadPool, getThreadCount } from './thread-pool.js';
11
+ import { getRemoteFileSize, downloadToTempFile } from './downloader.js';
12
+
13
+ const logger = createLogger('Renderer');
14
+
15
+ /**
16
+ * 输入类型枚举
17
+ */
18
+ export const InputType = {
19
+ FILE: 'file',
20
+ URL: 'url',
21
+ BUFFER: 'buffer',
22
+ };
23
+
24
+ /**
25
+ * 流式渲染阈值(小于此值使用下载模式)
26
+ */
27
+ const STREAM_THRESHOLD = 2 * 1024 * 1024; // 2MB
28
+
29
+ /**
30
+ * 检测输入类型
31
+ *
32
+ * @param {string|Buffer} input - 输入
33
+ * @returns {string} 输入类型
34
+ */
35
+ export function detectInputType(input) {
36
+ if (Buffer.isBuffer(input)) {
37
+ return InputType.BUFFER;
38
+ }
39
+ if (typeof input === 'string') {
40
+ if (input.startsWith('http://') || input.startsWith('https://')) {
41
+ return InputType.URL;
42
+ }
43
+ return InputType.FILE;
44
+ }
45
+ throw new Error('Invalid input: must be a file path, URL, or Buffer');
46
+ }
47
+
48
+ /**
49
+ * 使用线程池渲染 PDF 页面
50
+ *
51
+ * 主线程负责协调,工作线程负责 CPU 密集型任务
52
+ *
53
+ * @param {string|Buffer} input - 输入
54
+ * @param {string} inputType - 输入类型
55
+ * @param {number[]} pages - 页码数组
56
+ * @param {Object} options - 选项
57
+ * @returns {Promise<Object>} 渲染结果
58
+ */
59
+ export async function renderPages(input, inputType, pages, options) {
60
+ const startTime = Date.now();
61
+
62
+ // URL 输入:优先使用流式渲染
63
+ if (inputType === InputType.URL) {
64
+ return renderPagesFromUrl(input, pages, options, startTime);
65
+ }
66
+
67
+ // 本地文件或 Buffer 输入:使用线程池渲染
68
+ return renderPagesFromLocal(input, inputType, pages, options, startTime);
69
+ }
70
+
71
+ /**
72
+ * 从 URL 渲染 PDF 页面(流式)
73
+ *
74
+ * 使用 HTTP Range 请求按需获取数据,避免完整下载
75
+ */
76
+ async function renderPagesFromUrl(url, pages, options, startTime) {
77
+ // 获取文件大小
78
+ const fileSize = await getRemoteFileSize(url);
79
+
80
+ // 小文件直接下载后渲染,避免多次 Range 请求开销
81
+ if (fileSize < STREAM_THRESHOLD) {
82
+ logger.debug(`Remote file size: ${(fileSize / 1024 / 1024).toFixed(2)}MB (< 2MB), using download mode`);
83
+ return renderPagesWithDownload(url, pages, options, startTime);
84
+ }
85
+
86
+ logger.debug(`Remote file size: ${(fileSize / 1024 / 1024).toFixed(2)}MB, using stream rendering`);
87
+
88
+ try {
89
+ // 使用流式渲染
90
+ const result = await nativeRenderer.renderFromStream(url, fileSize, pages, options);
91
+
92
+ return {
93
+ success: true,
94
+ numPages: result.numPages,
95
+ pages: result.pages,
96
+ totalTime: Date.now() - startTime,
97
+ renderTime: result.pages.reduce((sum, p) => sum + (p.renderTime || 0), 0),
98
+ encodeTime: result.pages.reduce((sum, p) => sum + (p.encodeTime || 0), 0),
99
+ streamStats: result.streamStats,
100
+ };
101
+ } catch (err) {
102
+ // 流式渲染失败,回退到下载后渲染
103
+ logger.warn(`Stream rendering failed: ${err.message}, falling back to download`);
104
+ return renderPagesWithDownload(url, pages, options, startTime);
105
+ }
106
+ }
107
+
108
+ /**
109
+ * 下载后渲染(回退方案)
110
+ */
111
+ async function renderPagesWithDownload(url, pages, options, startTime) {
112
+ const tempFile = await downloadToTempFile(url);
113
+ const threadCount = getThreadCount();
114
+
115
+ try {
116
+ const numPages = nativeRenderer.getPageCountFromFile(tempFile);
117
+
118
+ // 确定目标页码
119
+ let targetPages;
120
+ if (pages.length === 0) {
121
+ targetPages = Array.from({ length: numPages }, (_, i) => i + 1);
122
+ } else {
123
+ targetPages = pages.filter(p => p >= 1 && p <= numPages);
124
+ }
125
+
126
+ logger.debug(`Rendering ${targetPages.length} pages using thread pool (${threadCount} workers)`);
127
+
128
+ const pool = getThreadPool();
129
+
130
+ const tasks = targetPages.map(pageNum => {
131
+ return pool.run({
132
+ pageNum,
133
+ options,
134
+ filePath: tempFile,
135
+ });
136
+ });
137
+
138
+ const results = await Promise.all(tasks);
139
+ results.sort((a, b) => a.pageNum - b.pageNum);
140
+
141
+ return {
142
+ success: true,
143
+ numPages,
144
+ pages: results,
145
+ totalTime: Date.now() - startTime,
146
+ renderTime: results.reduce((sum, p) => sum + (p.renderTime || 0), 0),
147
+ encodeTime: results.reduce((sum, p) => sum + (p.encodeTime || 0), 0),
148
+ };
149
+ } finally {
150
+ try {
151
+ await fs.promises.unlink(tempFile);
152
+ } catch {}
153
+ }
154
+ }
155
+
156
+ /**
157
+ * 从本地文件或 Buffer 渲染 PDF 页面
158
+ */
159
+ async function renderPagesFromLocal(input, inputType, pages, options, startTime) {
160
+ let filePath = null;
161
+ let pdfBuffer = null;
162
+ let numPages;
163
+ const threadCount = getThreadCount();
164
+
165
+ // 准备输入
166
+ if (inputType === InputType.FILE) {
167
+ try {
168
+ await fs.promises.access(input, fs.constants.R_OK);
169
+ } catch {
170
+ throw new Error(`File not found or not readable: ${input}`);
171
+ }
172
+ filePath = input;
173
+ numPages = nativeRenderer.getPageCountFromFile(filePath);
174
+ } else if (inputType === InputType.BUFFER) {
175
+ pdfBuffer = Buffer.isBuffer(input) ? input : Buffer.from(input);
176
+ numPages = nativeRenderer.getPageCount(pdfBuffer);
177
+ }
178
+
179
+ // 确定目标页码
180
+ let targetPages;
181
+ if (pages.length === 0) {
182
+ targetPages = Array.from({ length: numPages }, (_, i) => i + 1);
183
+ } else {
184
+ targetPages = pages.filter(p => p >= 1 && p <= numPages);
185
+ }
186
+
187
+ logger.debug(`Rendering ${targetPages.length} pages using thread pool (${threadCount} workers)`);
188
+
189
+ // 获取线程池
190
+ const pool = getThreadPool();
191
+
192
+ // 为每一页创建任务并提交到线程池
193
+ const tasks = targetPages.map(pageNum => {
194
+ const task = {
195
+ pageNum,
196
+ options,
197
+ };
198
+
199
+ if (filePath) {
200
+ task.filePath = filePath;
201
+ } else if (pdfBuffer) {
202
+ // 注意:Buffer 会被序列化传递给工作线程
203
+ // 对于大文件,建议先保存到临时文件再传递路径
204
+ task.pdfBuffer = pdfBuffer;
205
+ }
206
+
207
+ // 提交任务到线程池
208
+ return pool.run(task);
209
+ });
210
+
211
+ // 等待所有页面的并行处理完成
212
+ const results = await Promise.all(tasks);
213
+
214
+ results.sort((a, b) => a.pageNum - b.pageNum);
215
+
216
+ return {
217
+ success: true,
218
+ numPages,
219
+ pages: results,
220
+ totalTime: Date.now() - startTime,
221
+ renderTime: results.reduce((sum, p) => sum + (p.renderTime || 0), 0),
222
+ encodeTime: results.reduce((sum, p) => sum + (p.encodeTime || 0), 0),
223
+ };
224
+ }
@@ -0,0 +1,78 @@
1
+ /**
2
+ * 线程池管理模块
3
+ *
4
+ * 使用 Piscina 管理工作线程池,用于 CPU 密集型任务
5
+ */
6
+
7
+ import os from 'os';
8
+ import path from 'path';
9
+ import { fileURLToPath } from 'url';
10
+ import Piscina from 'piscina';
11
+ import { createLogger } from '../utils/logger.js';
12
+
13
+ const logger = createLogger('ThreadPool');
14
+
15
+ // 获取 worker.js 的路径
16
+ const __filename = fileURLToPath(import.meta.url);
17
+ const __dirname = path.dirname(__filename);
18
+ const workerPath = path.resolve(__dirname, '../worker.js');
19
+
20
+ // 线程数默认为 CPU 核心数,可通过环境变量调整
21
+ const threadCount = parseInt(process.env.PDF2IMG_THREAD_COUNT, 10) || os.cpus().length;
22
+
23
+ let piscina = null;
24
+
25
+ /**
26
+ * 获取或创建线程池实例(懒加载)
27
+ */
28
+ export function getThreadPool() {
29
+ if (!piscina) {
30
+ piscina = new Piscina({
31
+ filename: workerPath,
32
+ maxThreads: threadCount,
33
+ idleTimeout: 30000, // 空闲 30 秒后销毁线程
34
+ });
35
+ logger.info(`Thread pool initialized with ${threadCount} workers`);
36
+ }
37
+ return piscina;
38
+ }
39
+
40
+ /**
41
+ * 获取线程数
42
+ */
43
+ export function getThreadCount() {
44
+ return threadCount;
45
+ }
46
+
47
+ /**
48
+ * 获取线程池统计信息
49
+ */
50
+ export function getThreadPoolStats() {
51
+ if (!piscina) {
52
+ return {
53
+ initialized: false,
54
+ workers: threadCount,
55
+ };
56
+ }
57
+ return {
58
+ initialized: true,
59
+ workers: threadCount,
60
+ completed: piscina.completed,
61
+ waitTime: piscina.waitTime,
62
+ runTime: piscina.runTime,
63
+ utilization: piscina.utilization,
64
+ };
65
+ }
66
+
67
+ /**
68
+ * 销毁线程池
69
+ *
70
+ * 在应用关闭时调用,释放工作线程资源
71
+ */
72
+ export async function destroyThreadPool() {
73
+ if (piscina) {
74
+ await piscina.destroy();
75
+ piscina = null;
76
+ logger.info('Thread pool destroyed');
77
+ }
78
+ }