pdf-plus 1.2.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -25
- package/dist/index.d.mts +1009 -115
- package/dist/index.d.ts +1009 -115
- package/dist/index.js +39 -38
- package/dist/index.mjs +39 -38
- package/dist/workers/jp2-converter.worker.js +1 -1
- package/dist/workers/jp2-converter.worker.mjs +1 -1
- package/package.json +2 -6
package/README.md
CHANGED
|
@@ -82,33 +82,55 @@ for await (const event of stream) {
|
|
|
82
82
|
|
|
83
83
|
See [PHASE4-STREAMING.md](./PHASE4-STREAMING.md) for complete streaming API documentation.
|
|
84
84
|
|
|
85
|
-
###
|
|
85
|
+
### Generate Page Images (NEW! - Phase 5)
|
|
86
86
|
|
|
87
|
-
|
|
87
|
+
Render PDF pages to high-quality images with a simple function call:
|
|
88
88
|
|
|
89
89
|
```typescript
|
|
90
|
-
import {
|
|
90
|
+
import { generatePageImages } from "pdf-plus";
|
|
91
91
|
|
|
92
|
-
|
|
92
|
+
// Simple - render all pages to JPG images
|
|
93
|
+
const imagePaths = await generatePageImages(
|
|
94
|
+
"document.pdf", // PDF file path
|
|
95
|
+
"./page-images" // Output directory where images will be saved
|
|
96
|
+
);
|
|
93
97
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
verbose: true,
|
|
100
|
-
});
|
|
98
|
+
console.log(`Generated ${imagePaths.length} page images`);
|
|
99
|
+
// Returns: ['/path/to/page-images/jpg/page-001.jpg', '/path/to/page-images/jpg/page-002.jpg', ...]
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
**With Options:**
|
|
101
103
|
|
|
102
|
-
|
|
104
|
+
```typescript
|
|
105
|
+
const imagePaths = await generatePageImages("document.pdf", "./page-images", {
|
|
106
|
+
pageImageFormat: "jpg", // 'jpg', 'png', or 'webp'
|
|
107
|
+
pageImageDpi: 150, // DPI quality (72, 150, 300, 600)
|
|
108
|
+
pageRenderEngine: "poppler", // 'poppler' (recommended) or 'pdfjs'
|
|
109
|
+
specificPages: [1, 2, 3], // Optional: only render specific pages
|
|
110
|
+
parallelProcessing: true, // Parallel rendering (default: true)
|
|
111
|
+
maxConcurrentPages: 10, // Max parallel pages (default: 10)
|
|
112
|
+
verbose: true, // Show progress
|
|
113
|
+
});
|
|
103
114
|
```
|
|
104
115
|
|
|
105
116
|
**Features:**
|
|
106
117
|
|
|
107
|
-
- 🎨 **Multiple formats** -
|
|
108
|
-
- 📐 **Quality control** - Adjustable DPI (72, 150, 300, 600)
|
|
109
|
-
- 📄 **Page selection** -
|
|
110
|
-
-
|
|
111
|
-
-
|
|
118
|
+
- 🎨 **Multiple formats** - JPG, PNG, WebP
|
|
119
|
+
- 📐 **Quality control** - Adjustable DPI (72, 150, 300, 600)
|
|
120
|
+
- 📄 **Page selection** - Render specific pages or all pages
|
|
121
|
+
- 🚀 **Parallel rendering** - Fast multi-page processing
|
|
122
|
+
- 📁 **Returns file paths** - Array of absolute paths to generated images
|
|
123
|
+
- 🔧 **Two engines** - Poppler (best quality) or PDF.js
|
|
124
|
+
|
|
125
|
+
**Output Structure:**
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
page-images/
|
|
129
|
+
└── jpg/
|
|
130
|
+
├── page-001.jpg
|
|
131
|
+
├── page-002.jpg
|
|
132
|
+
└── page-003.jpg
|
|
133
|
+
```
|
|
112
134
|
|
|
113
135
|
See [PAGE-TO-IMAGE-FEATURE.md](./PAGE-TO-IMAGE-FEATURE.md) for complete page-to-image documentation.
|
|
114
136
|
|
|
@@ -123,17 +145,33 @@ const text = await extractText("document.pdf");
|
|
|
123
145
|
console.log(`Extracted ${text.length} characters`);
|
|
124
146
|
```
|
|
125
147
|
|
|
126
|
-
### Images
|
|
148
|
+
### Extract Embedded Images
|
|
127
149
|
|
|
128
150
|
```typescript
|
|
129
|
-
import {
|
|
151
|
+
import { extractImageFiles } from "pdf-plus";
|
|
130
152
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
153
|
+
// Extract and save embedded images from PDF
|
|
154
|
+
const imagePaths = await extractImageFiles(
|
|
155
|
+
"document.pdf",
|
|
156
|
+
"./extracted-images" // Output directory for embedded images
|
|
157
|
+
);
|
|
135
158
|
|
|
136
|
-
console.log(`
|
|
159
|
+
console.log(`Extracted ${imagePaths.length} embedded images`);
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### Generate Page Images (Render Pages)
|
|
163
|
+
|
|
164
|
+
```typescript
|
|
165
|
+
import { generatePageImages } from "pdf-plus";
|
|
166
|
+
|
|
167
|
+
// Render PDF pages to image files
|
|
168
|
+
const imagePaths = await generatePageImages(
|
|
169
|
+
"document.pdf",
|
|
170
|
+
"./page-images" // Output directory for page images
|
|
171
|
+
);
|
|
172
|
+
|
|
173
|
+
console.log(`Generated ${imagePaths.length} page images`);
|
|
174
|
+
// Each page becomes an image: page-001.jpg, page-002.jpg, etc.
|
|
137
175
|
```
|
|
138
176
|
|
|
139
177
|
### Image Extraction with Optimization
|
|
@@ -364,10 +402,43 @@ Extract only image references.
|
|
|
364
402
|
|
|
365
403
|
#### `extractImageFiles(pdfPath, outputDir, options)`
|
|
366
404
|
|
|
367
|
-
Extract and save
|
|
405
|
+
Extract and save embedded image files from PDF.
|
|
406
|
+
|
|
407
|
+
**Parameters:**
|
|
408
|
+
|
|
409
|
+
- `pdfPath` - Path to the PDF file
|
|
410
|
+
- `outputDir` - Output directory path where embedded images will be saved
|
|
411
|
+
- `options` - Optional extraction options
|
|
368
412
|
|
|
369
413
|
**Returns:** `Promise<string[]>` - Array of saved file paths
|
|
370
414
|
|
|
415
|
+
#### `generatePageImages(pdfPath, outputDir, options)`
|
|
416
|
+
|
|
417
|
+
Render PDF pages to image files (page-to-image conversion).
|
|
418
|
+
|
|
419
|
+
**Parameters:**
|
|
420
|
+
|
|
421
|
+
- `pdfPath` - Path to the PDF file
|
|
422
|
+
- `outputDir` - Output directory path where page images will be saved
|
|
423
|
+
- `options` - Optional rendering options (pageImageFormat, pageImageDpi, pageRenderEngine, etc.)
|
|
424
|
+
|
|
425
|
+
**Returns:** `Promise<string[]>` - Array of absolute paths to generated page images
|
|
426
|
+
|
|
427
|
+
**Example:**
|
|
428
|
+
|
|
429
|
+
```typescript
|
|
430
|
+
import { generatePageImages } from "pdf-plus";
|
|
431
|
+
|
|
432
|
+
const imagePaths = await generatePageImages("document.pdf", "./page-images", {
|
|
433
|
+
pageImageFormat: "jpg",
|
|
434
|
+
pageImageDpi: 150,
|
|
435
|
+
pageRenderEngine: "poppler",
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
console.log(`Generated ${imagePaths.length} page images`);
|
|
439
|
+
// Returns: ['/absolute/path/to/page-images/jpg/page-001.jpg', ...]
|
|
440
|
+
```
|
|
441
|
+
|
|
371
442
|
### Options
|
|
372
443
|
|
|
373
444
|
```typescript
|