node-pdf2img 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,867 @@
1
+ /**
2
+ * PDF.js Renderer - PDF.js 渲染器模块
3
+ *
4
+ * 使用 PDF.js + RangeLoader 进行 PDF 渲染,特点:
5
+ * - 分片加载,只下载需要的数据
6
+ * - 适合大文件
7
+ * - 稳定可靠
8
+ * - 纯 JavaScript 实现,无需原生依赖
9
+ *
10
+ * 渲染策略:串行渲染避免资源争抢
11
+ *
12
+ * @module renderers/pdfjs
13
+ */
14
+
15
+ import path from 'path';
16
+ import fs from 'fs';
17
+ import { fileURLToPath } from 'url';
18
+ import { createLogger, IS_DEV } from '../utils/logger.js';
19
+ import { RENDER_CONFIG, ENCODER_CONFIG, TIMEOUT_CONFIG } from '../core/config.js';
20
+ import { getDocument, OPS } from "pdfjs-dist/legacy/build/pdf.mjs";
21
+
22
+ const __filename = fileURLToPath(import.meta.url);
23
+ const __dirname = path.dirname(__filename);
24
+
25
+ const logger = createLogger('PdfjsRenderer');
26
+
27
+ // PDF.js 操作符映射(用于内容分析)
28
+ const pdfjsLib = { OPS };
29
+
30
+ // ==================== PDF.js 配置 ====================
31
+
32
+ const CMAP_URL = path.join(__dirname, '../../../node_modules/pdfjs-dist/cmaps/');
33
+ const STANDARD_FONT_DATA_URL = path.join(__dirname, '../../../node_modules/pdfjs-dist/standard_fonts/');
34
+
35
+ // ==================== 渲染配置 ====================
36
+
37
+ const TARGET_RENDER_WIDTH = RENDER_CONFIG.TARGET_RENDER_WIDTH;
38
+ const IMAGE_HEAVY_TARGET_WIDTH = RENDER_CONFIG.IMAGE_HEAVY_TARGET_WIDTH;
39
+ const MAX_RENDER_SCALE = RENDER_CONFIG.MAX_RENDER_SCALE;
40
+ const WEBP_QUALITY = ENCODER_CONFIG.WEBP_QUALITY;
41
+ const WEBP_METHOD = ENCODER_CONFIG.WEBP_METHOD;
42
+ const JPEG_QUALITY = ENCODER_CONFIG.JPEG_QUALITY;
43
+ const PNG_COMPRESSION = ENCODER_CONFIG.PNG_COMPRESSION;
44
+ // PDF.js 日志级别: 0=关闭, 1=error, 2=warn, 3=info, 4=debug, 5=verbose
45
+ const PDFJS_VERBOSITY = IS_DEV ? 1 : 0;
46
+
47
+ // WebP 格式限制
48
+ const WEBP_MAX_DIMENSION = 16383;
49
+ const WEBP_MAX_PIXELS = 16383 * 16383;
50
+
51
+ // ==================== Range Loader 配置 ====================
52
+
53
+ const RANGE_CHUNK_SIZE = parseInt(process.env.RANGE_CHUNK_SIZE) || 2 * 1024 * 1024; // 2MB
54
+ const RANGE_CONCURRENCY = parseInt(process.env.RANGE_CONCURRENCY) || 4;
55
+ const RANGE_TIMEOUT = TIMEOUT_CONFIG.RANGE_REQUEST_TIMEOUT;
56
+ const RANGE_MAX_RETRIES = parseInt(process.env.RANGE_MAX_RETRIES) || 3;
57
+ const RANGE_RETRY_DELAY = parseInt(process.env.RANGE_RETRY_DELAY) || 500;
58
+
59
+ // 小文件阈值:小于此值直接全量下载
60
+ const SMALL_FILE_THRESHOLD = parseInt(process.env.SMALL_FILE_THRESHOLD) || 2 * 1024 * 1024; // 2MB
61
+
62
+ // 探测请求大小
63
+ const PROBE_SIZE = parseInt(process.env.PROBE_SIZE) || 20 * 1024 - 1;
64
+
65
+ // ==================== sharp 动态导入 ====================
66
+
67
+ let sharp = null;
68
+ let sharpAvailable = false;
69
+
70
+ try {
71
+ sharp = (await import('sharp')).default;
72
+ sharpAvailable = true;
73
+ logger.info('sharp 库已加载');
74
+ } catch (e) {
75
+ logger.warn('sharp 库未安装,回退到 canvas.toBuffer 编码');
76
+ sharp = null;
77
+ sharpAvailable = false;
78
+ }
79
+
80
+ // ==================== PDFDataRangeTransport 动态导入 ====================
81
+
82
+ let PDFDataRangeTransport = null;
83
+
84
+ try {
85
+ const pdfModule = await import("pdfjs-dist/legacy/build/pdf.mjs");
86
+ PDFDataRangeTransport = pdfModule.PDFDataRangeTransport;
87
+ } catch (e) {
88
+ logger.warn(`PDFDataRangeTransport 不可用: ${e.message}`);
89
+ }
90
+
91
+ // ==================== RangeLoader 类 ====================
92
+
93
+ /**
94
+ * PDF 分片加载器
95
+ *
96
+ * 实现 PDFDataRangeTransport 接口,支持按需加载 PDF 数据
97
+ */
98
+ class RangeLoader extends PDFDataRangeTransport {
99
+ /**
100
+ * @param {number} length - PDF 文件总大小
101
+ * @param {ArrayBuffer} initialData - 初始数据
102
+ * @param {string} pdfUrl - PDF 文件 URL
103
+ * @param {Object} options - 配置选项
104
+ */
105
+ constructor(length, initialData, pdfUrl, options = {}) {
106
+ super(length, initialData);
107
+ this.pdfUrl = pdfUrl;
108
+ this.pdfSize = length;
109
+ this.chunkSize = options.chunkSize || RANGE_CHUNK_SIZE;
110
+ this.maxConcurrency = options.concurrency || RANGE_CONCURRENCY;
111
+ this.timeout = options.timeout || RANGE_TIMEOUT;
112
+
113
+ this.inflight = 0;
114
+ this.queue = [];
115
+
116
+ this.stats = {
117
+ totalRequests: 0,
118
+ totalBytes: 0,
119
+ requestTimes: [],
120
+ };
121
+ }
122
+
123
+ async runWithLimit(fn) {
124
+ if (this.inflight >= this.maxConcurrency) {
125
+ await new Promise(resolve => this.queue.push(resolve));
126
+ }
127
+ this.inflight++;
128
+ try {
129
+ return await fn();
130
+ } finally {
131
+ this.inflight--;
132
+ const next = this.queue.shift();
133
+ if (next) next();
134
+ }
135
+ }
136
+
137
+ async requestDataRange(start, end) {
138
+ const realEnd = end - 1;
139
+ const groups = this.splitIntoChunks(start, realEnd, this.chunkSize);
140
+
141
+ const startTime = Date.now();
142
+ const datas = await Promise.all(
143
+ groups.map(([chunkStart, chunkEnd]) => {
144
+ return this.runWithLimit(() => this.fetchRange(chunkStart, chunkEnd));
145
+ })
146
+ );
147
+
148
+ const byteLength = datas.reduce((total, data) => total + data.byteLength, 0);
149
+ const byteData = new Uint8Array(byteLength);
150
+ let offset = 0;
151
+ for (const data of datas) {
152
+ byteData.set(new Uint8Array(data), offset);
153
+ offset += data.byteLength;
154
+ }
155
+
156
+ this.stats.requestTimes.push(Date.now() - startTime);
157
+
158
+ this.onDataProgress(byteData.byteLength, this.pdfSize);
159
+ this.onDataRange(start, byteData);
160
+ }
161
+
162
+ splitIntoChunks(start, end, chunkSize) {
163
+ const count = Math.ceil((end - start) / chunkSize);
164
+ return new Array(count).fill(0).map((_, index) => {
165
+ const chunkStart = index * chunkSize + start;
166
+ const chunkEnd = Math.min(chunkStart + chunkSize - 1, end);
167
+ return [chunkStart, chunkEnd];
168
+ });
169
+ }
170
+
171
+ async fetchRange(start, end, retries = RANGE_MAX_RETRIES) {
172
+ const controller = new AbortController();
173
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
174
+
175
+ try {
176
+ const response = await fetch(this.pdfUrl, {
177
+ headers: { Range: `bytes=${start}-${end}` },
178
+ signal: controller.signal,
179
+ });
180
+
181
+ clearTimeout(timeoutId);
182
+
183
+ if (!response.ok && response.status !== 206) {
184
+ throw new Error(`Range 请求失败: ${response.status}`);
185
+ }
186
+
187
+ const data = await response.arrayBuffer();
188
+
189
+ this.stats.totalRequests++;
190
+ this.stats.totalBytes += data.byteLength;
191
+
192
+ return data;
193
+ } catch (error) {
194
+ clearTimeout(timeoutId);
195
+
196
+ const isRetryable = error.name === 'AbortError' ||
197
+ error.cause?.code === 'ECONNRESET' ||
198
+ error.cause?.code === 'ECONNREFUSED' ||
199
+ error.cause?.code === 'UND_ERR_SOCKET' ||
200
+ error.message?.includes('fetch failed');
201
+
202
+ if (isRetryable && retries > 0) {
203
+ const delay = RANGE_RETRY_DELAY * (RANGE_MAX_RETRIES - retries + 1);
204
+ await new Promise(resolve => setTimeout(resolve, delay));
205
+ return this.fetchRange(start, end, retries - 1);
206
+ }
207
+
208
+ if (error.name === 'AbortError') {
209
+ throw new Error(`请求超时 (${this.timeout}ms)`);
210
+ }
211
+ throw error;
212
+ }
213
+ }
214
+
215
+ getStats() {
216
+ const avgTime = this.stats.requestTimes.length > 0
217
+ ? this.stats.requestTimes.reduce((a, b) => a + b, 0) / this.stats.requestTimes.length
218
+ : 0;
219
+ return {
220
+ ...this.stats,
221
+ avgRequestTime: Math.round(avgTime),
222
+ totalBytesMB: (this.stats.totalBytes / 1024 / 1024).toFixed(2),
223
+ };
224
+ }
225
+ }
226
+
227
+ // ==================== PDF 信息获取 ====================
228
+
229
+ /**
230
+ * 智能获取 PDF 文件信息
231
+ */
232
+ async function getPdfInfo(pdfUrl, retries = RANGE_MAX_RETRIES) {
233
+ try {
234
+ const controller = new AbortController();
235
+ const timeoutId = setTimeout(() => controller.abort(), RANGE_TIMEOUT);
236
+
237
+ const probeResponse = await fetch(pdfUrl, {
238
+ headers: { Range: `bytes=0-${PROBE_SIZE}` },
239
+ signal: controller.signal,
240
+ });
241
+
242
+ clearTimeout(timeoutId);
243
+
244
+ if (!probeResponse.ok && probeResponse.status !== 206) {
245
+ throw new Error(`获取文件信息失败: ${probeResponse.status}`);
246
+ }
247
+
248
+ const contentRange = probeResponse.headers.get('Content-Range');
249
+ let pdfSize = 0;
250
+
251
+ if (contentRange) {
252
+ const match = contentRange.match(/\/(\d+)$/);
253
+ if (match) {
254
+ pdfSize = parseInt(match[1], 10);
255
+ }
256
+ }
257
+
258
+ if (!pdfSize) {
259
+ pdfSize = parseInt(probeResponse.headers.get('Content-Length') || '0', 10);
260
+ }
261
+
262
+ if (!pdfSize) {
263
+ throw new Error('无法获取文件大小,服务器可能不支持 Range 请求');
264
+ }
265
+
266
+ const probeData = await probeResponse.arrayBuffer();
267
+ const isSmallFile = pdfSize <= SMALL_FILE_THRESHOLD;
268
+ const isComplete = probeData.byteLength >= pdfSize;
269
+
270
+ if (isComplete) {
271
+ return {
272
+ pdfSize,
273
+ initialData: probeData,
274
+ fullData: probeData,
275
+ isSmallFile: true,
276
+ };
277
+ } else if (isSmallFile) {
278
+ const fullData = await downloadFullPdf(pdfUrl);
279
+ return {
280
+ pdfSize,
281
+ initialData: probeData,
282
+ fullData,
283
+ isSmallFile: true,
284
+ };
285
+ } else {
286
+ return {
287
+ pdfSize,
288
+ initialData: probeData,
289
+ fullData: null,
290
+ isSmallFile: false,
291
+ };
292
+ }
293
+ } catch (error) {
294
+ const isRetryable = error.name === 'AbortError' ||
295
+ error.cause?.code === 'ECONNRESET' ||
296
+ error.cause?.code === 'ECONNREFUSED' ||
297
+ error.cause?.code === 'UND_ERR_SOCKET' ||
298
+ error.message?.includes('fetch failed');
299
+
300
+ if (isRetryable && retries > 0) {
301
+ const delay = RANGE_RETRY_DELAY * (RANGE_MAX_RETRIES - retries + 1);
302
+ await new Promise(resolve => setTimeout(resolve, delay));
303
+ return getPdfInfo(pdfUrl, retries - 1);
304
+ }
305
+
306
+ throw error;
307
+ }
308
+ }
309
+
310
+ /**
311
+ * 下载完整 PDF 文件
312
+ */
313
+ async function downloadFullPdf(pdfUrl, retries = RANGE_MAX_RETRIES) {
314
+ try {
315
+ const controller = new AbortController();
316
+ const timeoutId = setTimeout(() => controller.abort(), TIMEOUT_CONFIG.DOWNLOAD_TIMEOUT);
317
+
318
+ const response = await fetch(pdfUrl, {
319
+ signal: controller.signal,
320
+ });
321
+
322
+ clearTimeout(timeoutId);
323
+
324
+ if (!response.ok) {
325
+ throw new Error(`下载 PDF 失败: ${response.status}`);
326
+ }
327
+
328
+ return await response.arrayBuffer();
329
+ } catch (error) {
330
+ const isRetryable = error.name === 'AbortError' ||
331
+ error.cause?.code === 'ECONNRESET' ||
332
+ error.cause?.code === 'ECONNREFUSED' ||
333
+ error.cause?.code === 'UND_ERR_SOCKET' ||
334
+ error.message?.includes('fetch failed');
335
+
336
+ if (isRetryable && retries > 0) {
337
+ const delay = RANGE_RETRY_DELAY * (RANGE_MAX_RETRIES - retries + 1);
338
+ await new Promise(resolve => setTimeout(resolve, delay));
339
+ return downloadFullPdf(pdfUrl, retries - 1);
340
+ }
341
+
342
+ throw error;
343
+ }
344
+ }
345
+
346
+ // ==================== 编码函数 ====================
347
+
348
+ /**
349
+ * 使用 sharp 编码图片
350
+ */
351
+ async function encodeWithSharp(data, width, height, format, options = {}) {
352
+ const buffer = Buffer.from(data.buffer, data.byteOffset, data.byteLength);
353
+
354
+ let sharpInstance = sharp(buffer, {
355
+ raw: {
356
+ width: Math.round(width),
357
+ height: Math.round(height),
358
+ channels: 4,
359
+ },
360
+ });
361
+
362
+ switch (format) {
363
+ case 'webp':
364
+ return sharpInstance.webp({
365
+ quality: options.webpQuality || WEBP_QUALITY,
366
+ effort: options.webpMethod || WEBP_METHOD,
367
+ smartSubsample: true,
368
+ }).toBuffer();
369
+ case 'png':
370
+ return sharpInstance.png({
371
+ compressionLevel: options.pngCompression || PNG_COMPRESSION,
372
+ }).toBuffer();
373
+ case 'jpg':
374
+ case 'jpeg':
375
+ return sharpInstance.jpeg({
376
+ quality: options.jpegQuality || JPEG_QUALITY,
377
+ }).toBuffer();
378
+ default:
379
+ return sharpInstance.webp({
380
+ quality: options.webpQuality || WEBP_QUALITY,
381
+ effort: options.webpMethod || WEBP_METHOD,
382
+ }).toBuffer();
383
+ }
384
+ }
385
+
386
+ // ==================== 渲染函数 ====================
387
+
388
+ /**
389
+ * 渲染单个 PDF 页面
390
+ */
391
+ async function renderPage(pdfDocument, pageNum, options = {}) {
392
+ let page;
393
+ let canvasAndContext;
394
+ const pageStartTime = Date.now();
395
+ const timing = {
396
+ getPage: 0,
397
+ heuristic: 0,
398
+ getOperatorList: 0,
399
+ render: 0,
400
+ getImageData: 0,
401
+ encode: 0,
402
+ total: 0,
403
+ };
404
+
405
+ const format = options.format || 'webp';
406
+ const targetWidth = options.targetWidth || TARGET_RENDER_WIDTH;
407
+
408
+ try {
409
+ const getPageStart = Date.now();
410
+ page = await pdfDocument.getPage(pageNum);
411
+ timing.getPage = Date.now() - getPageStart;
412
+
413
+ // 启发式预判
414
+ const heuristicStart = Date.now();
415
+ let effectiveTargetWidth = targetWidth;
416
+ let isLikelyScan = false;
417
+
418
+ try {
419
+ const pageDict = page._pageInfo?.pageDict || page.pageDict;
420
+
421
+ if (pageDict) {
422
+ const resources = pageDict.get('Resources');
423
+ if (resources) {
424
+ const xobjects = resources.get('XObject');
425
+ const fonts = resources.get('Font');
426
+
427
+ const hasImages = xobjects && (
428
+ typeof xobjects.getKeys === 'function'
429
+ ? xobjects.getKeys().length > 0
430
+ : Object.keys(xobjects).length > 0
431
+ );
432
+ const hasFonts = fonts && (
433
+ typeof fonts.getKeys === 'function'
434
+ ? fonts.getKeys().length > 0
435
+ : Object.keys(fonts).length > 0
436
+ );
437
+
438
+ if (hasImages && !hasFonts && options.detectScan !== false) {
439
+ isLikelyScan = true;
440
+ effectiveTargetWidth = options.imageHeavyWidth || IMAGE_HEAVY_TARGET_WIDTH;
441
+ }
442
+ }
443
+ }
444
+ } catch (e) {
445
+ if (IS_DEV) {
446
+ logger.debug(`Page ${pageNum} 启发式预判失败: ${e.message}`);
447
+ }
448
+ }
449
+
450
+ timing.heuristic = Date.now() - heuristicStart;
451
+
452
+ // 计算缩放比例
453
+ const originalViewport = page.getViewport({ scale: 1.0 });
454
+ const originalWidth = originalViewport.width;
455
+
456
+ let scale = effectiveTargetWidth / originalWidth;
457
+ scale = Math.min(scale, options.maxScale || MAX_RENDER_SCALE);
458
+
459
+ let viewport = page.getViewport({ scale });
460
+ let width = Math.round(viewport.width);
461
+ let height = Math.round(viewport.height);
462
+
463
+ // WebP 尺寸限制检查
464
+ if (format === 'webp') {
465
+ if (width > WEBP_MAX_DIMENSION || height > WEBP_MAX_DIMENSION) {
466
+ const widthFactor = width > WEBP_MAX_DIMENSION ? WEBP_MAX_DIMENSION / width : 1;
467
+ const heightFactor = height > WEBP_MAX_DIMENSION ? WEBP_MAX_DIMENSION / height : 1;
468
+ const limitFactor = Math.min(widthFactor, heightFactor);
469
+
470
+ logger.warn(`Page ${pageNum} 尺寸超过 WebP 限制 (${width}x${height}),缩放至 ${(limitFactor * 100).toFixed(1)}%`);
471
+
472
+ scale = scale * limitFactor;
473
+ viewport = page.getViewport({ scale });
474
+ width = Math.round(viewport.width);
475
+ height = Math.round(viewport.height);
476
+ }
477
+
478
+ if (width * height > WEBP_MAX_PIXELS) {
479
+ const pixelFactor = Math.sqrt(WEBP_MAX_PIXELS / (width * height));
480
+ logger.warn(`Page ${pageNum} 像素数超过 WebP 限制,进一步缩放至 ${(pixelFactor * 100).toFixed(1)}%`);
481
+
482
+ scale = scale * pixelFactor;
483
+ viewport = page.getViewport({ scale });
484
+ width = Math.round(viewport.width);
485
+ height = Math.round(viewport.height);
486
+ }
487
+ }
488
+
489
+ // 获取操作符列表
490
+ const getOperatorListStart = Date.now();
491
+ const operatorList = await page.getOperatorList();
492
+ timing.getOperatorList = Date.now() - getOperatorListStart;
493
+
494
+ // 渲染
495
+ canvasAndContext = pdfDocument.canvasFactory.create(width, height);
496
+
497
+ const renderContext = {
498
+ canvasContext: canvasAndContext.context,
499
+ viewport,
500
+ operatorList,
501
+ };
502
+
503
+ const renderStart = Date.now();
504
+ await page.render(renderContext).promise;
505
+ timing.render = Date.now() - renderStart;
506
+
507
+ // 编码
508
+ const encodeStart = Date.now();
509
+ let buffer;
510
+
511
+ if (sharpAvailable) {
512
+ const getImageDataStart = Date.now();
513
+ const imageData = canvasAndContext.context.getImageData(0, 0, width, height);
514
+ timing.getImageData = Date.now() - getImageDataStart;
515
+ buffer = await encodeWithSharp(imageData.data, width, height, format, options);
516
+ } else {
517
+ // 回退到 canvas 原生编码
518
+ const mimeType = format === 'png' ? 'image/png' :
519
+ (format === 'jpg' || format === 'jpeg') ? 'image/jpeg' : 'image/webp';
520
+ buffer = canvasAndContext.canvas.toBuffer(mimeType);
521
+ }
522
+ timing.encode = Date.now() - encodeStart;
523
+ timing.total = Date.now() - pageStartTime;
524
+
525
+ return {
526
+ pageNum,
527
+ buffer,
528
+ width,
529
+ height,
530
+ scale: parseFloat(scale.toFixed(3)),
531
+ success: true,
532
+ renderTime: timing.render,
533
+ encodeTime: timing.encode,
534
+ timing,
535
+ };
536
+ } catch (error) {
537
+ timing.total = Date.now() - pageStartTime;
538
+ logger.error(`渲染页面 ${pageNum} 失败: ${error.message}`);
539
+ return {
540
+ pageNum,
541
+ success: false,
542
+ error: error.message,
543
+ timing,
544
+ };
545
+ } finally {
546
+ try { if (page) page.cleanup(); } catch (e) { /* 忽略 */ }
547
+ try {
548
+ if (canvasAndContext && pdfDocument) {
549
+ pdfDocument.canvasFactory.reset(canvasAndContext, 1, 1);
550
+ }
551
+ } catch (e) { /* 忽略 */ }
552
+ }
553
+ }
554
+
555
+ /**
556
+ * 串行渲染页面
557
+ */
558
+ async function serialRenderPages(pdfDocument, pageNums, options = {}) {
559
+ const results = [];
560
+
561
+ for (const pageNum of pageNums) {
562
+ const result = await renderPage(pdfDocument, pageNum, options);
563
+ results.push(result);
564
+ }
565
+
566
+ return results;
567
+ }
568
+
569
+ // ==================== 主入口函数 ====================
570
+
571
+ /**
572
+ * 检查 PDF.js 渲染器是否可用
573
+ */
574
+ export function isPdfjsAvailable() {
575
+ return PDFDataRangeTransport !== null;
576
+ }
577
+
578
+ /**
579
+ * 获取 PDF.js 版本信息
580
+ */
581
+ export function getPdfjsVersion() {
582
+ return 'pdfjs-dist (legacy)';
583
+ }
584
+
585
+ /**
586
+ * 从 URL 渲染 PDF(使用 Range Loader)
587
+ *
588
+ * @param {string} pdfUrl - PDF 文件 URL
589
+ * @param {number[]} pages - 要渲染的页码数组
590
+ * @param {Object} options - 渲染选项
591
+ * @returns {Promise<Object>} 渲染结果
592
+ */
593
+ export async function renderFromUrl(pdfUrl, pages = [], options = {}) {
594
+ const startTime = Date.now();
595
+ let pdfDocument;
596
+ let rangeLoader;
597
+
598
+ try {
599
+ // 获取 PDF 信息
600
+ const { pdfSize, initialData, fullData, isSmallFile } = await getPdfInfo(pdfUrl);
601
+
602
+ let loadingTask;
603
+ let rangeStats = null;
604
+
605
+ if (isSmallFile && fullData) {
606
+ logger.debug(`小文件模式: ${(pdfSize / 1024 / 1024).toFixed(2)}MB`);
607
+
608
+ loadingTask = getDocument({
609
+ data: new Uint8Array(fullData),
610
+ cMapUrl: CMAP_URL,
611
+ cMapPacked: true,
612
+ standardFontDataUrl: STANDARD_FONT_DATA_URL,
613
+ verbosity: PDFJS_VERBOSITY,
614
+ });
615
+
616
+ rangeStats = {
617
+ requestCount: 1,
618
+ totalBytes: fullData.byteLength,
619
+ totalBytesMB: (fullData.byteLength / 1024 / 1024).toFixed(2),
620
+ mode: 'full-download',
621
+ };
622
+ } else {
623
+ logger.debug(`分片加载模式: ${(pdfSize / 1024 / 1024).toFixed(2)}MB`);
624
+
625
+ rangeLoader = new RangeLoader(pdfSize, initialData, pdfUrl);
626
+
627
+ loadingTask = getDocument({
628
+ range: rangeLoader,
629
+ cMapUrl: CMAP_URL,
630
+ cMapPacked: true,
631
+ standardFontDataUrl: STANDARD_FONT_DATA_URL,
632
+ rangeChunkSize: RANGE_CHUNK_SIZE,
633
+ disableAutoFetch: true,
634
+ verbosity: PDFJS_VERBOSITY,
635
+ });
636
+ }
637
+
638
+ pdfDocument = await loadingTask.promise;
639
+ const numPages = pdfDocument.numPages;
640
+
641
+ // 确定目标页码
642
+ let targetPages;
643
+ if (pages.length === 0) {
644
+ targetPages = Array.from({ length: numPages }, (_, i) => i + 1);
645
+ } else {
646
+ targetPages = pages.filter(p => p >= 1 && p <= numPages);
647
+ }
648
+
649
+ // 串行渲染
650
+ const renderStart = Date.now();
651
+ const results = await serialRenderPages(pdfDocument, targetPages, options);
652
+ const renderTime = Date.now() - renderStart;
653
+
654
+ if (!rangeStats) {
655
+ rangeStats = rangeLoader?.getStats() || null;
656
+ }
657
+
658
+ return {
659
+ success: true,
660
+ numPages,
661
+ pages: results.map(r => ({
662
+ pageNum: r.pageNum,
663
+ width: r.width,
664
+ height: r.height,
665
+ buffer: r.success ? r.buffer : undefined,
666
+ success: r.success,
667
+ error: r.error,
668
+ renderTime: r.renderTime,
669
+ encodeTime: r.encodeTime,
670
+ })),
671
+ totalTime: Date.now() - startTime,
672
+ renderTime,
673
+ streamStats: rangeStats,
674
+ };
675
+ } catch (error) {
676
+ logger.error(`PDF.js 处理失败: ${error.message}`);
677
+ return {
678
+ success: false,
679
+ error: error.message,
680
+ pages: [],
681
+ totalTime: Date.now() - startTime,
682
+ };
683
+ } finally {
684
+ if (pdfDocument) {
685
+ try { await pdfDocument.destroy(); } catch (e) { /* 忽略 */ }
686
+ }
687
+ }
688
+ }
689
+
690
+ /**
691
+ * 从 Buffer 渲染 PDF
692
+ *
693
+ * @param {Buffer} pdfBuffer - PDF 文件数据
694
+ * @param {number[]} pages - 要渲染的页码数组
695
+ * @param {Object} options - 渲染选项
696
+ * @returns {Promise<Object>} 渲染结果
697
+ */
698
+ export async function renderFromBuffer(pdfBuffer, pages = [], options = {}) {
699
+ const startTime = Date.now();
700
+ let pdfDocument;
701
+
702
+ try {
703
+ const buffer = Buffer.isBuffer(pdfBuffer) ? pdfBuffer : Buffer.from(pdfBuffer);
704
+
705
+ const loadingTask = getDocument({
706
+ data: new Uint8Array(buffer),
707
+ cMapUrl: CMAP_URL,
708
+ cMapPacked: true,
709
+ standardFontDataUrl: STANDARD_FONT_DATA_URL,
710
+ verbosity: PDFJS_VERBOSITY,
711
+ });
712
+
713
+ pdfDocument = await loadingTask.promise;
714
+ const numPages = pdfDocument.numPages;
715
+
716
+ // 确定目标页码
717
+ let targetPages;
718
+ if (pages.length === 0) {
719
+ targetPages = Array.from({ length: numPages }, (_, i) => i + 1);
720
+ } else {
721
+ targetPages = pages.filter(p => p >= 1 && p <= numPages);
722
+ }
723
+
724
+ logger.debug(`Rendering ${targetPages.length} pages from buffer (${(buffer.length / 1024 / 1024).toFixed(2)}MB)`);
725
+
726
+ // 串行渲染
727
+ const renderStart = Date.now();
728
+ const results = await serialRenderPages(pdfDocument, targetPages, options);
729
+ const renderTime = Date.now() - renderStart;
730
+
731
+ return {
732
+ success: true,
733
+ numPages,
734
+ pages: results.map(r => ({
735
+ pageNum: r.pageNum,
736
+ width: r.width,
737
+ height: r.height,
738
+ buffer: r.success ? r.buffer : undefined,
739
+ success: r.success,
740
+ error: r.error,
741
+ renderTime: r.renderTime,
742
+ encodeTime: r.encodeTime,
743
+ })),
744
+ totalTime: Date.now() - startTime,
745
+ renderTime,
746
+ };
747
+ } catch (error) {
748
+ logger.error(`PDF.js 处理失败: ${error.message}`);
749
+ return {
750
+ success: false,
751
+ error: error.message,
752
+ pages: [],
753
+ totalTime: Date.now() - startTime,
754
+ };
755
+ } finally {
756
+ if (pdfDocument) {
757
+ try { await pdfDocument.destroy(); } catch (e) { /* 忽略 */ }
758
+ }
759
+ }
760
+ }
761
+
762
+ /**
763
+ * 从文件路径渲染 PDF
764
+ *
765
+ * @param {string} filePath - PDF 文件路径
766
+ * @param {number[]} pages - 要渲染的页码数组
767
+ * @param {Object} options - 渲染选项
768
+ * @returns {Promise<Object>} 渲染结果
769
+ */
770
+ export async function renderFromFile(filePath, pages = [], options = {}) {
771
+ const startTime = Date.now();
772
+ let pdfDocument;
773
+
774
+ try {
775
+ const buffer = await fs.promises.readFile(filePath);
776
+
777
+ const loadingTask = getDocument({
778
+ data: new Uint8Array(buffer),
779
+ cMapUrl: CMAP_URL,
780
+ cMapPacked: true,
781
+ standardFontDataUrl: STANDARD_FONT_DATA_URL,
782
+ verbosity: PDFJS_VERBOSITY,
783
+ });
784
+
785
+ pdfDocument = await loadingTask.promise;
786
+ const numPages = pdfDocument.numPages;
787
+
788
+ // 确定目标页码
789
+ let targetPages;
790
+ if (pages.length === 0) {
791
+ targetPages = Array.from({ length: numPages }, (_, i) => i + 1);
792
+ } else {
793
+ targetPages = pages.filter(p => p >= 1 && p <= numPages);
794
+ }
795
+
796
+ logger.debug(`Rendering ${targetPages.length} pages from file: ${filePath}`);
797
+
798
+ // 串行渲染
799
+ const renderStart = Date.now();
800
+ const results = await serialRenderPages(pdfDocument, targetPages, options);
801
+ const renderTime = Date.now() - renderStart;
802
+
803
+ return {
804
+ success: true,
805
+ numPages,
806
+ pages: results.map(r => ({
807
+ pageNum: r.pageNum,
808
+ width: r.width,
809
+ height: r.height,
810
+ buffer: r.success ? r.buffer : undefined,
811
+ success: r.success,
812
+ error: r.error,
813
+ renderTime: r.renderTime,
814
+ encodeTime: r.encodeTime,
815
+ })),
816
+ totalTime: Date.now() - startTime,
817
+ renderTime,
818
+ };
819
+ } catch (error) {
820
+ logger.error(`PDF.js 处理失败: ${error.message}`);
821
+ return {
822
+ success: false,
823
+ error: error.message,
824
+ pages: [],
825
+ totalTime: Date.now() - startTime,
826
+ };
827
+ } finally {
828
+ if (pdfDocument) {
829
+ try { await pdfDocument.destroy(); } catch (e) { /* 忽略 */ }
830
+ }
831
+ }
832
+ }
833
+
834
+ /**
835
+ * 获取 PDF 页数(从 Buffer)
836
+ *
837
+ * @param {Buffer} pdfBuffer - PDF 文件数据
838
+ * @returns {Promise<number>} 页数
839
+ */
840
+ export async function getPageCount(pdfBuffer) {
841
+ const buffer = Buffer.isBuffer(pdfBuffer) ? pdfBuffer : Buffer.from(pdfBuffer);
842
+
843
+ const loadingTask = getDocument({
844
+ data: new Uint8Array(buffer),
845
+ cMapUrl: CMAP_URL,
846
+ cMapPacked: true,
847
+ standardFontDataUrl: STANDARD_FONT_DATA_URL,
848
+ verbosity: 0,
849
+ });
850
+
851
+ const pdfDocument = await loadingTask.promise;
852
+ const numPages = pdfDocument.numPages;
853
+ await pdfDocument.destroy();
854
+
855
+ return numPages;
856
+ }
857
+
858
+ /**
859
+ * 获取 PDF 页数(从文件路径)
860
+ *
861
+ * @param {string} filePath - PDF 文件路径
862
+ * @returns {Promise<number>} 页数
863
+ */
864
+ export async function getPageCountFromFile(filePath) {
865
+ const buffer = await fs.promises.readFile(filePath);
866
+ return getPageCount(buffer);
867
+ }