node-pdf2img 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/worker.js ADDED
@@ -0,0 +1,237 @@
1
+ /**
2
+ * PDF2IMG Worker Thread
3
+ *
4
+ * 在工作线程中执行 CPU 密集型任务:
5
+ * 1. PDFium 渲染 PDF 页面到原始位图
6
+ * 2. Sharp 编码位图到目标格式
7
+ *
8
+ * 主线程负责协调和 I/O,工作线程负责计算密集型转换
9
+ */
10
+
11
+ import sharp from 'sharp';
12
+
13
+ // ==================== Native Renderer 懒加载 ====================
14
+
15
+ let nativeRenderer = null;
16
+ let nativeAvailable = false;
17
+ let initPromise = null;
18
+
19
+ /**
20
+ * 初始化原生渲染器(懒加载)
21
+ */
22
+ async function initNativeRenderer() {
23
+ if (initPromise) {
24
+ return initPromise;
25
+ }
26
+
27
+ initPromise = (async () => {
28
+ try {
29
+ nativeRenderer = await import('node-pdf2img-native');
30
+
31
+ if (nativeRenderer.isPdfiumAvailable()) {
32
+ nativeAvailable = true;
33
+
34
+ try {
35
+ nativeRenderer.warmup();
36
+ } catch (warmupErr) {
37
+ // 忽略 warmup 错误
38
+ }
39
+ }
40
+ } catch (e) {
41
+ nativeRenderer = {};
42
+ nativeAvailable = false;
43
+ }
44
+ })();
45
+
46
+ return initPromise;
47
+ }
48
+
49
+ /**
50
+ * 合并配置
51
+ */
52
+ function mergeConfig(options = {}) {
53
+ return {
54
+ targetWidth: options.targetWidth ?? 1280,
55
+ detectScan: options.detectScan ?? false,
56
+ };
57
+ }
58
+
59
+ /**
60
+ * 使用 Sharp 编码原始位图
61
+ *
62
+ * @param {Buffer} rawBitmap - 原始 RGBA 像素数据
63
+ * @param {number} width - 图像宽度
64
+ * @param {number} height - 图像高度
65
+ * @param {string} format - 输出格式
66
+ * @param {Object} options - 编码选项
67
+ * @returns {Promise<Buffer>} 编码后的图像数据
68
+ */
69
+ async function encodeWithSharp(rawBitmap, width, height, format, options = {}) {
70
+ let sharpInstance = sharp(rawBitmap, {
71
+ raw: {
72
+ width,
73
+ height,
74
+ channels: 4, // RGBA
75
+ }
76
+ });
77
+
78
+ let buffer;
79
+
80
+ if (format === 'webp') {
81
+ buffer = await sharpInstance.webp({
82
+ quality: options.webpQuality || options.quality || 80,
83
+ effort: options.webpMethod ?? 4,
84
+ }).toBuffer();
85
+ } else if (format === 'png') {
86
+ buffer = await sharpInstance.png({
87
+ compressionLevel: options.pngCompression ?? 6,
88
+ adaptiveFiltering: true,
89
+ }).toBuffer();
90
+ } else if (format === 'jpeg' || format === 'jpg') {
91
+ // 移除 alpha 通道,与白色背景混合
92
+ sharpInstance = sharpInstance.flatten({ background: { r: 255, g: 255, b: 255 } });
93
+ buffer = await sharpInstance.jpeg({
94
+ quality: options.jpegQuality || options.quality || 85,
95
+ mozjpeg: true,
96
+ }).toBuffer();
97
+ } else {
98
+ throw new Error(`Unsupported format: ${format}`);
99
+ }
100
+
101
+ // 确保返回的是 Buffer
102
+ // Sharp 的 toBuffer() 可能返回 Uint8Array 或其他类型
103
+ if (!buffer || buffer.length === 0) {
104
+ throw new Error(`${format} encoding failed: empty buffer`);
105
+ }
106
+
107
+ // 如果不是 Buffer,尝试转换
108
+ if (!Buffer.isBuffer(buffer)) {
109
+ console.error(`[Worker] ${format} buffer type: ${typeof buffer}, constructor: ${buffer?.constructor?.name}`);
110
+ console.error(`[Worker] Converting to Buffer from type: ${typeof buffer}`);
111
+
112
+ if (buffer.buffer && typeof buffer.buffer === 'object') {
113
+ // Uint8Array 或 TypedArray
114
+ return Buffer.from(buffer);
115
+ }
116
+
117
+ if (Array.isArray(buffer)) {
118
+ return Buffer.from(buffer);
119
+ }
120
+
121
+ // 最后的手段:直接尝试 from
122
+ try {
123
+ return Buffer.from(buffer);
124
+ } catch (e) {
125
+ throw new Error(`${format} encoding failed: cannot convert buffer to Buffer. Type: ${typeof buffer}`);
126
+ }
127
+ }
128
+
129
+ return buffer;
130
+ }
131
+
132
+ /**
133
+ * 处理单个页面任务
134
+ *
135
+ * 这是在工作线程中执行的主函数
136
+ *
137
+ * @param {Object} task - 任务对象
138
+ * @param {string} [task.filePath] - PDF 文件路径(文件输入时)
139
+ * @param {Buffer} [task.pdfBuffer] - PDF Buffer(Buffer 输入时)
140
+ * @param {number} task.pageNum - 要处理的页码(1-based)
141
+ * @param {Object} task.options - 转换选项
142
+ * @returns {Promise<Object>} 处理结果
143
+ */
144
+ export default async function processPage(task) {
145
+ const { filePath, pdfBuffer, pageNum, options = {} } = task;
146
+
147
+ // 确保原生渲染器已初始化
148
+ await initNativeRenderer();
149
+
150
+ if (!nativeAvailable) {
151
+ return {
152
+ pageNum,
153
+ success: false,
154
+ error: 'Native renderer not available in worker thread',
155
+ width: 0,
156
+ height: 0,
157
+ buffer: null,
158
+ renderTime: 0,
159
+ encodeTime: 0,
160
+ };
161
+ }
162
+
163
+ const config = mergeConfig(options);
164
+
165
+ try {
166
+ // 步骤 1: PDFium 渲染原始位图
167
+ let rawResult;
168
+
169
+ if (filePath) {
170
+ rawResult = nativeRenderer.renderPageToRawBitmap(filePath, pageNum, config);
171
+ } else if (pdfBuffer) {
172
+ const buffer = Buffer.isBuffer(pdfBuffer) ? pdfBuffer : Buffer.from(pdfBuffer);
173
+ rawResult = nativeRenderer.renderPageToRawBitmapFromBuffer(buffer, pageNum, config);
174
+ } else {
175
+ return {
176
+ pageNum,
177
+ success: false,
178
+ error: 'No input provided: filePath or pdfBuffer required',
179
+ width: 0,
180
+ height: 0,
181
+ buffer: null,
182
+ renderTime: 0,
183
+ encodeTime: 0,
184
+ };
185
+ }
186
+
187
+ if (!rawResult.success) {
188
+ return {
189
+ pageNum,
190
+ success: false,
191
+ error: rawResult.error || 'Render failed',
192
+ width: 0,
193
+ height: 0,
194
+ buffer: null,
195
+ renderTime: rawResult.renderTime || 0,
196
+ encodeTime: 0,
197
+ };
198
+ }
199
+
200
+ const renderTime = rawResult.renderTime || 0;
201
+ const encodeStart = Date.now();
202
+
203
+ // 步骤 2: Sharp 编码
204
+ const format = options.format || 'webp';
205
+ const encodedBuffer = await encodeWithSharp(
206
+ rawResult.buffer,
207
+ rawResult.width,
208
+ rawResult.height,
209
+ format,
210
+ options
211
+ );
212
+
213
+ const encodeTime = Date.now() - encodeStart;
214
+
215
+ return {
216
+ pageNum,
217
+ success: true,
218
+ width: rawResult.width,
219
+ height: rawResult.height,
220
+ buffer: encodedBuffer,
221
+ size: encodedBuffer.length,
222
+ renderTime,
223
+ encodeTime,
224
+ };
225
+ } catch (err) {
226
+ return {
227
+ pageNum,
228
+ success: false,
229
+ error: err.message || 'Unknown error',
230
+ width: 0,
231
+ height: 0,
232
+ buffer: null,
233
+ renderTime: 0,
234
+ encodeTime: 0,
235
+ };
236
+ }
237
+ }