node-pdf2img 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.d.ts ADDED
@@ -0,0 +1,186 @@
1
+ /**
2
+ * @tencent/pdf2img - 高性能 PDF 转图片工具
3
+ */
4
+
5
+ export interface RenderOptions {
6
+ /** 目标渲染宽度(像素),默认:1280 */
7
+ targetWidth?: number;
8
+ /** 图片密集型页面的目标宽度(像素),默认:1024 */
9
+ imageHeavyWidth?: number;
10
+ /** 最大渲染缩放比例,默认:4.0 */
11
+ maxScale?: number;
12
+ /** WebP 质量 0-100,默认:70 */
13
+ webpQuality?: number;
14
+ /** 启用扫描件检测,默认:true */
15
+ detectScan?: boolean;
16
+ }
17
+
18
+ export interface CosConfig {
19
+ /** 腾讯云 SecretId */
20
+ secretId: string;
21
+ /** 腾讯云 SecretKey */
22
+ secretKey: string;
23
+ /** COS 存储桶名称 */
24
+ bucket: string;
25
+ /** COS 地域 */
26
+ region: string;
27
+ }
28
+
29
+ export interface ConvertOptions extends RenderOptions {
30
+ /** 要转换的页码(1-based),空数组表示全部页面 */
31
+ pages?: number[];
32
+ /** 输出类型:'file'、'buffer' 或 'cos' */
33
+ outputType?: 'file' | 'buffer' | 'cos';
34
+ /** 输出目录(outputType 为 'file' 时必需) */
35
+ outputDir?: string;
36
+ /** 输出文件名前缀,默认:'page' */
37
+ prefix?: string;
38
+ /** COS 配置(outputType 为 'cos' 时必需) */
39
+ cos?: CosConfig;
40
+ /** COS key 前缀 */
41
+ cosKeyPrefix?: string;
42
+ }
43
+
44
+ export interface PageResult {
45
+ /** 页码(1-based) */
46
+ pageNum: number;
47
+ /** 图片宽度(像素) */
48
+ width: number;
49
+ /** 图片高度(像素) */
50
+ height: number;
51
+ /** 是否成功渲染 */
52
+ success: boolean;
53
+ /** 图片 Buffer(outputType 为 'buffer' 时) */
54
+ buffer?: Buffer;
55
+ /** 输出文件路径(outputType 为 'file' 时) */
56
+ outputPath?: string;
57
+ /** COS key(outputType 为 'cos' 时) */
58
+ cosKey?: string;
59
+ /** 图片大小(字节) */
60
+ size?: number;
61
+ /** 错误信息(失败时) */
62
+ error?: string;
63
+ }
64
+
65
+ export interface ConvertResult {
66
+ /** 是否成功 */
67
+ success: boolean;
68
+ /** PDF 总页数 */
69
+ numPages: number;
70
+ /** 成功渲染的页数 */
71
+ renderedPages: number;
72
+ /** 页面结果数组 */
73
+ pages: PageResult[];
74
+ /** 耗时信息 */
75
+ timing: {
76
+ /** 总耗时(毫秒) */
77
+ total: number;
78
+ /** 原生渲染器耗时(毫秒) */
79
+ native: number;
80
+ };
81
+ }
82
+
83
+ /**
84
+ * PDF 转图片
85
+ *
86
+ * @param input - PDF 文件路径、URL 或 Buffer
87
+ * @param options - 转换选项
88
+ * @returns 转换结果
89
+ */
90
+ export function convert(input: string | Buffer, options?: ConvertOptions): Promise<ConvertResult>;
91
+
92
+ /**
93
+ * 获取 PDF 页数
94
+ *
95
+ * @param input - PDF 文件路径或 Buffer
96
+ * @returns 页数
97
+ */
98
+ export function getPageCount(input: string | Buffer): number;
99
+
100
+ /**
101
+ * 检查原生渲染器是否可用
102
+ */
103
+ export function isAvailable(): boolean;
104
+
105
+ /**
106
+ * 获取版本信息
107
+ */
108
+ export function getVersion(): string;
109
+
110
+ /** 输入类型常量 */
111
+ export const InputType: {
112
+ FILE: 'file';
113
+ URL: 'url';
114
+ BUFFER: 'buffer';
115
+ };
116
+
117
+ /** 输出类型常量 */
118
+ export const OutputType: {
119
+ FILE: 'file';
120
+ BUFFER: 'buffer';
121
+ COS: 'cos';
122
+ };
123
+
124
+ /** 渲染配置 */
125
+ export const RENDER_CONFIG: {
126
+ TARGET_RENDER_WIDTH: number;
127
+ IMAGE_HEAVY_TARGET_WIDTH: number;
128
+ MAX_RENDER_SCALE: number;
129
+ WEBP_QUALITY: number;
130
+ NATIVE_STREAM_THRESHOLD: number;
131
+ };
132
+
133
+ /** 超时配置 */
134
+ export const TIMEOUT_CONFIG: {
135
+ RANGE_REQUEST_TIMEOUT: number;
136
+ DOWNLOAD_TIMEOUT: number;
137
+ };
138
+
139
+ /** 检查原生渲染器是否可用 */
140
+ export function isNativeAvailable(): boolean;
141
+
142
+ /** 从 Buffer 渲染 PDF */
143
+ export function renderFromBuffer(
144
+ pdfBuffer: Buffer,
145
+ pages?: number[],
146
+ options?: RenderOptions
147
+ ): Promise<{
148
+ success: boolean;
149
+ numPages: number;
150
+ pages: Array<{
151
+ pageNum: number;
152
+ width: number;
153
+ height: number;
154
+ buffer?: Buffer;
155
+ success: boolean;
156
+ error?: string;
157
+ renderTime: number;
158
+ encodeTime: number;
159
+ }>;
160
+ totalTime: number;
161
+ nativeTime: number;
162
+ }>;
163
+
164
+ /** 从流渲染 PDF(用于远程 URL) */
165
+ export function renderFromStream(
166
+ pdfUrl: string,
167
+ pdfSize: number,
168
+ pages?: number[],
169
+ options?: RenderOptions
170
+ ): Promise<{
171
+ success: boolean;
172
+ numPages: number;
173
+ pages: Array<{
174
+ pageNum: number;
175
+ width: number;
176
+ height: number;
177
+ buffer?: Buffer;
178
+ success: boolean;
179
+ error?: string;
180
+ renderTime: number;
181
+ encodeTime: number;
182
+ }>;
183
+ totalTime: number;
184
+ nativeTime: number;
185
+ streamStats?: object;
186
+ }>;
package/src/index.js ADDED
@@ -0,0 +1,58 @@
1
+ /**
2
+ * @tencent/pdf2img - 高性能 PDF 转图片工具
3
+ *
4
+ * 使用 PDFium 原生渲染器实现最佳性能。
5
+ *
6
+ * @example
7
+ * ```javascript
8
+ * import { convert, getPageCount, isAvailable } from '@tencent/pdf2img';
9
+ *
10
+ * // 转换 PDF 为图片(返回 Buffer)
11
+ * const result = await convert('./document.pdf');
12
+ * console.log(`转换了 ${result.renderedPages} 页`);
13
+ *
14
+ * // 保存到文件
15
+ * const result = await convert('./document.pdf', {
16
+ * outputType: 'file',
17
+ * outputDir: './output',
18
+ * });
19
+ *
20
+ * // 转换指定页面
21
+ * const result = await convert('./document.pdf', {
22
+ * pages: [1, 2, 3],
23
+ * targetWidth: 1920,
24
+ * webpQuality: 80,
25
+ * });
26
+ *
27
+ * // 从 URL 转换(大文件自动使用流式加载)
28
+ * const result = await convert('https://example.com/document.pdf', {
29
+ * outputType: 'file',
30
+ * outputDir: './output',
31
+ * });
32
+ * ```
33
+ *
34
+ * @module @tencent/pdf2img
35
+ */
36
+
37
+ export {
38
+ convert,
39
+ getPageCount,
40
+ getPageCountSync,
41
+ isAvailable,
42
+ getVersion,
43
+ getThreadPoolStats,
44
+ destroyThreadPool,
45
+ InputType,
46
+ OutputType,
47
+ } from './core/converter.js';
48
+
49
+ export { RENDER_CONFIG, TIMEOUT_CONFIG } from './core/config.js';
50
+
51
+ // 导出原生渲染器工具供高级用法
52
+ export {
53
+ isNativeAvailable,
54
+ getPageCount as getPageCountNative,
55
+ getPageCountFromFile,
56
+ renderPageToRawBitmap,
57
+ renderPageToRawBitmapFromBuffer,
58
+ } from './renderers/native.js';
@@ -0,0 +1,331 @@
1
+ /**
2
+ * Native Renderer - PDFium 原生渲染器
3
+ *
4
+ * 支持两种模式:
5
+ * - Native: 直接加载 PDF Buffer 渲染
6
+ * - Native Stream: 流式加载 PDF 渲染(适合大文件)
7
+ */
8
+
9
+ import { createLogger } from '../utils/logger.js';
10
+ import { mergeConfig, TIMEOUT_CONFIG } from '../core/config.js';
11
+
12
+ const logger = createLogger('NativeRenderer');
13
+
14
+ // ==================== Native Renderer 动态导入 ====================
15
+
16
+ let nativeRenderer = null;
17
+ let nativeAvailable = false;
18
+
19
+ try {
20
+ // 从 workspace 的 native-renderer 包导入
21
+ nativeRenderer = await import('node-pdf2img-native');
22
+
23
+ if (nativeRenderer.isPdfiumAvailable()) {
24
+ nativeAvailable = true;
25
+
26
+ try {
27
+ const warmupTime = nativeRenderer.warmup();
28
+ logger.info(`Native renderer loaded: ${nativeRenderer.getVersion()}, warmup: ${warmupTime}ms`);
29
+ } catch (warmupErr) {
30
+ logger.warn(`Native renderer warmup failed: ${warmupErr.message}`);
31
+ }
32
+ } else {
33
+ logger.warn('Native renderer loaded but PDFium library not available');
34
+ }
35
+ } catch (e) {
36
+ logger.warn(`Native renderer not available: ${e.message}`);
37
+ nativeRenderer = {};
38
+ nativeAvailable = false;
39
+ }
40
+
41
+ /**
42
+ * 检查 Native Renderer 是否可用
43
+ */
44
+ export function isNativeAvailable() {
45
+ return nativeAvailable;
46
+ }
47
+
48
+ /**
49
+ * 获取 PDF 页数(从 Buffer)
50
+ * @param {Buffer} pdfBuffer - PDF 文件数据
51
+ * @returns {number} 页数
52
+ */
53
+ export function getPageCount(pdfBuffer) {
54
+ if (!nativeAvailable) {
55
+ throw new Error('Native renderer not available');
56
+ }
57
+ return nativeRenderer.getPageCount(pdfBuffer);
58
+ }
59
+
60
+ /**
61
+ * 获取 PDF 页数(从文件路径)
62
+ *
63
+ * 直接从文件读取,避免在 Node.js 堆中创建大 Buffer
64
+ *
65
+ * @param {string} filePath - PDF 文件路径
66
+ * @returns {number} 页数
67
+ */
68
+ export function getPageCountFromFile(filePath) {
69
+ if (!nativeAvailable) {
70
+ throw new Error('Native renderer not available');
71
+ }
72
+ return nativeRenderer.getPageCountFromFile(filePath);
73
+ }
74
+
75
+ /**
76
+ * 渲染单页到原始位图(不编码)
77
+ *
78
+ * 只进行 PDFium 渲染,跳过图像编码步骤,返回原始 RGBA 像素数据。
79
+ * 编码工作可以交给 Sharp 等更高效的库处理。
80
+ *
81
+ * @param {string} filePath - PDF 文件路径
82
+ * @param {number} pageNum - 页码(从 1 开始)
83
+ * @param {Object} options - 渲染选项
84
+ * @returns {Object} { success, buffer, width, height, channels, renderTime, error }
85
+ */
86
+ export function renderPageToRawBitmap(filePath, pageNum, options = {}) {
87
+ if (!nativeAvailable) {
88
+ throw new Error('Native renderer not available');
89
+ }
90
+ const config = mergeConfig(options);
91
+ return nativeRenderer.renderPageToRawBitmap(filePath, pageNum, config);
92
+ }
93
+
94
+ /**
95
+ * 从 Buffer 渲染单页到原始位图(不编码)
96
+ *
97
+ * @param {Buffer} pdfBuffer - PDF 文件数据
98
+ * @param {number} pageNum - 页码(从 1 开始)
99
+ * @param {Object} options - 渲染选项
100
+ * @returns {Object} { success, buffer, width, height, channels, renderTime, error }
101
+ */
102
+ export function renderPageToRawBitmapFromBuffer(pdfBuffer, pageNum, options = {}) {
103
+ if (!nativeAvailable) {
104
+ throw new Error('Native renderer not available');
105
+ }
106
+ const config = mergeConfig(options);
107
+ const buffer = Buffer.isBuffer(pdfBuffer) ? pdfBuffer : Buffer.from(pdfBuffer);
108
+ return nativeRenderer.renderPageToRawBitmapFromBuffer(buffer, pageNum, config);
109
+ }
110
+
111
+ /**
112
+ * 获取版本信息
113
+ */
114
+ export function getVersion() {
115
+ if (!nativeAvailable) {
116
+ return 'Native renderer not available';
117
+ }
118
+ return nativeRenderer.getVersion();
119
+ }
120
+
121
+ /**
122
+ * 使用 Native Renderer 渲染 PDF Buffer
123
+ *
124
+ * @param {Buffer} pdfBuffer - PDF 文件数据
125
+ * @param {number[]} pages - 要渲染的页码数组(1-based),空数组表示全部页面
126
+ * @param {Object} options - 渲染选项
127
+ * @returns {Promise<Object>} 渲染结果
128
+ */
129
+ export async function renderFromBuffer(pdfBuffer, pages = [], options = {}) {
130
+ if (!nativeAvailable) {
131
+ throw new Error('Native renderer not available');
132
+ }
133
+
134
+ const config = mergeConfig(options);
135
+ const buffer = Buffer.isBuffer(pdfBuffer) ? pdfBuffer : Buffer.from(pdfBuffer);
136
+ const numPages = nativeRenderer.getPageCount(buffer);
137
+
138
+ // 确定目标页码
139
+ let targetPages;
140
+ if (pages.length === 0) {
141
+ targetPages = Array.from({ length: numPages }, (_, i) => i + 1);
142
+ } else {
143
+ targetPages = pages.filter(p => p >= 1 && p <= numPages);
144
+ }
145
+
146
+ logger.debug(`Rendering ${targetPages.length} pages from buffer (${(buffer.length / 1024 / 1024).toFixed(2)}MB)`);
147
+
148
+ const startTime = Date.now();
149
+ const result = nativeRenderer.renderPages(buffer, targetPages, config);
150
+
151
+ if (!result.success) {
152
+ throw new Error(result.error || 'Native renderer failed');
153
+ }
154
+
155
+ return {
156
+ success: true,
157
+ numPages,
158
+ pages: result.pages.map(page => ({
159
+ pageNum: page.pageNum,
160
+ width: page.width,
161
+ height: page.height,
162
+ buffer: page.success ? page.buffer : undefined,
163
+ success: page.success,
164
+ error: page.error,
165
+ renderTime: page.renderTime,
166
+ encodeTime: page.encodeTime,
167
+ })),
168
+ totalTime: Date.now() - startTime,
169
+ nativeTime: result.totalTime,
170
+ };
171
+ }
172
+
173
+ /**
174
+ * 使用 Native Renderer 渲染 PDF 文件
175
+ *
176
+ * 直接从文件路径读取,避免在 Node.js 堆中创建大 Buffer。
177
+ * 这是处理本地文件的最高效方式。
178
+ *
179
+ * @param {string} filePath - PDF 文件路径
180
+ * @param {number[]} pages - 要渲染的页码数组(1-based),空数组表示全部页面
181
+ * @param {Object} options - 渲染选项
182
+ * @returns {Promise<Object>} 渲染结果
183
+ */
184
+ export async function renderFromFile(filePath, pages = [], options = {}) {
185
+ if (!nativeAvailable) {
186
+ throw new Error('Native renderer not available');
187
+ }
188
+
189
+ const config = mergeConfig(options);
190
+ const numPages = nativeRenderer.getPageCountFromFile(filePath);
191
+
192
+ // 确定目标页码
193
+ let targetPages;
194
+ if (pages.length === 0) {
195
+ targetPages = Array.from({ length: numPages }, (_, i) => i + 1);
196
+ } else {
197
+ targetPages = pages.filter(p => p >= 1 && p <= numPages);
198
+ }
199
+
200
+ logger.debug(`Rendering ${targetPages.length} pages from file: ${filePath}`);
201
+
202
+ const startTime = Date.now();
203
+ const result = nativeRenderer.renderPagesFromFile(filePath, targetPages, config);
204
+
205
+ if (!result.success) {
206
+ throw new Error(result.error || 'Native renderer failed');
207
+ }
208
+
209
+ return {
210
+ success: true,
211
+ numPages,
212
+ pages: result.pages.map(page => ({
213
+ pageNum: page.pageNum,
214
+ width: page.width,
215
+ height: page.height,
216
+ buffer: page.success ? page.buffer : undefined,
217
+ success: page.success,
218
+ error: page.error,
219
+ renderTime: page.renderTime,
220
+ encodeTime: page.encodeTime,
221
+ })),
222
+ totalTime: Date.now() - startTime,
223
+ nativeTime: result.totalTime,
224
+ };
225
+ }
226
+
227
+ /**
228
+ * 使用 Native Stream 渲染远程 PDF
229
+ *
230
+ * 通过回调按需获取 PDF 数据,避免一次性下载整个文件
231
+ *
232
+ * @param {string} pdfUrl - PDF 文件 URL
233
+ * @param {number} pdfSize - PDF 文件大小
234
+ * @param {number[]} pages - 要渲染的页码数组(1-based),空数组表示全部页面
235
+ * @param {Object} options - 渲染选项
236
+ * @returns {Promise<Object>} 渲染结果
237
+ */
238
+ export async function renderFromStream(pdfUrl, pdfSize, pages = [], options = {}) {
239
+ if (!nativeAvailable) {
240
+ throw new Error('Native renderer not available');
241
+ }
242
+
243
+ if (!pdfUrl || !pdfSize) {
244
+ throw new Error('pdfUrl and pdfSize are required for stream mode');
245
+ }
246
+
247
+ const config = mergeConfig(options);
248
+
249
+ logger.debug(`Stream rendering from ${pdfUrl} (${(pdfSize / 1024 / 1024).toFixed(2)}MB)`);
250
+
251
+ /**
252
+ * fetcher 回调函数 - 被 Rust 通过 ThreadsafeFunction 调用
253
+ */
254
+ const fetcher = (error, req) => {
255
+ if (error) {
256
+ logger.error(`Fetcher received error: ${error.message}`);
257
+ return;
258
+ }
259
+
260
+ const { offset, size, requestId } = req;
261
+ const start = Number(offset);
262
+ const end = start + size - 1;
263
+
264
+ fetch(pdfUrl, {
265
+ headers: { 'Range': `bytes=${start}-${end}` },
266
+ signal: AbortSignal.timeout(TIMEOUT_CONFIG.RANGE_REQUEST_TIMEOUT),
267
+ })
268
+ .then(response => {
269
+ if (!response.ok && response.status !== 206) {
270
+ throw new Error(`Range request failed with status ${response.status}`);
271
+ }
272
+ return response.arrayBuffer();
273
+ })
274
+ .then(data => {
275
+ nativeRenderer.completeStreamRequest(requestId, Buffer.from(data), null);
276
+ })
277
+ .catch(err => {
278
+ logger.error(`Fetcher failed (offset=${start}, size=${size}): ${err.message}`);
279
+ nativeRenderer.completeStreamRequest(requestId, null, err.message);
280
+ });
281
+ };
282
+
283
+ const startTime = Date.now();
284
+
285
+ // 首次调用获取页数
286
+ let result = await nativeRenderer.renderPagesFromStream(
287
+ pdfSize,
288
+ pages,
289
+ config,
290
+ fetcher
291
+ );
292
+
293
+ if (!result.success) {
294
+ throw new Error(result.error || 'Native stream renderer failed');
295
+ }
296
+
297
+ const numPages = result.numPages;
298
+
299
+ // 如果需要渲染所有页面但之前不知道页数
300
+ if (pages.length === 0 && numPages > 0 && result.pages.length === 0) {
301
+ const allPages = Array.from({ length: numPages }, (_, i) => i + 1);
302
+ result = await nativeRenderer.renderPagesFromStream(
303
+ pdfSize,
304
+ allPages,
305
+ config,
306
+ fetcher
307
+ );
308
+
309
+ if (!result.success) {
310
+ throw new Error(result.error || 'Native stream renderer failed');
311
+ }
312
+ }
313
+
314
+ return {
315
+ success: true,
316
+ numPages,
317
+ pages: result.pages.map(page => ({
318
+ pageNum: page.pageNum,
319
+ width: page.width,
320
+ height: page.height,
321
+ buffer: page.success ? page.buffer : undefined,
322
+ success: page.success,
323
+ error: page.error,
324
+ renderTime: page.renderTime,
325
+ encodeTime: page.encodeTime,
326
+ })),
327
+ totalTime: Date.now() - startTime,
328
+ nativeTime: result.totalTime,
329
+ streamStats: result.streamStats,
330
+ };
331
+ }
@@ -0,0 +1,58 @@
1
+ /**
2
+ * 简化日志模块
3
+ */
4
+
5
+ const IS_DEBUG = process.env.DEBUG === 'true' || process.env.PDF2IMG_DEBUG === 'true';
6
+
7
+ const colors = {
8
+ reset: '\x1b[0m',
9
+ green: '\x1b[32m',
10
+ red: '\x1b[31m',
11
+ yellow: '\x1b[33m',
12
+ cyan: '\x1b[36m',
13
+ dim: '\x1b[2m',
14
+ };
15
+
16
+ /**
17
+ * 创建日志实例
18
+ * @param {string} module - 模块名称
19
+ */
20
+ export function createLogger(module) {
21
+ const prefix = `[${module}]`;
22
+
23
+ return {
24
+ info: (msg, data) => {
25
+ if (IS_DEBUG) {
26
+ const dataStr = data ? ` ${JSON.stringify(data)}` : '';
27
+ console.log(`${colors.green}${prefix}${colors.reset} ${msg}${dataStr}`);
28
+ }
29
+ },
30
+
31
+ warn: (msg, data) => {
32
+ const dataStr = data ? ` ${JSON.stringify(data)}` : '';
33
+ console.warn(`${colors.yellow}${prefix}${colors.reset} ${msg}${dataStr}`);
34
+ },
35
+
36
+ error: (msg, data) => {
37
+ const dataStr = data ? ` ${JSON.stringify(data)}` : '';
38
+ console.error(`${colors.red}${prefix}${colors.reset} ${msg}${dataStr}`);
39
+ },
40
+
41
+ debug: (msg, data) => {
42
+ if (IS_DEBUG) {
43
+ const dataStr = data ? ` ${JSON.stringify(data)}` : '';
44
+ console.log(`${colors.dim}${prefix}${colors.reset} ${msg}${dataStr}`);
45
+ }
46
+ },
47
+
48
+ perf: (msg, data) => {
49
+ if (IS_DEBUG) {
50
+ const dataStr = data ? ` ${JSON.stringify(data)}` : '';
51
+ console.log(`${colors.cyan}${prefix}${colors.reset} ${msg}${dataStr}`);
52
+ }
53
+ },
54
+ };
55
+ }
56
+
57
+ export const IS_DEV = process.env.NODE_ENV !== 'production';
58
+ export const IS_TEST = process.env.NODE_ENV === 'test';