modern-pdf-lib 0.14.1 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +46 -5
  2. package/dist/batchOptimize-ClXizv19.mjs +306 -0
  3. package/dist/batchOptimize-DYQOX1-7.cjs +329 -0
  4. package/dist/bridge-DN7BOHRW.cjs +132 -0
  5. package/dist/bridge-DpzMOnHd.mjs +103 -0
  6. package/dist/cli/index.cjs +225 -0
  7. package/dist/cli/index.d.cts +1 -0
  8. package/dist/cli/index.d.mts +1 -0
  9. package/dist/cli/index.mjs +226 -0
  10. package/dist/deduplicateImages-BfpjHY9b.mjs +102 -0
  11. package/dist/deduplicateImages-BtJ5tlrr.cjs +113 -0
  12. package/dist/{fflateAdapter-DX0VqT5k.mjs → fflateAdapter-D2mv_ttM.mjs} +1 -1
  13. package/dist/{fflateAdapter-AHC_S3cb.cjs → fflateAdapter-cT4YeY_h.cjs} +1 -1
  14. package/dist/{fontSubset-pFc8Dueu.cjs → fontSubset-BxsF9Tu5.cjs} +1 -1
  15. package/dist/{fontSubset-ZpLoOZ2e.mjs → fontSubset-ClyTXlhY.mjs} +1 -1
  16. package/dist/imageExtract-BC7TMY98.cjs +4770 -0
  17. package/dist/imageExtract-vjyQyFcT.mjs +4747 -0
  18. package/dist/index.cjs +846 -5164
  19. package/dist/index.d.cts +486 -12
  20. package/dist/index.d.cts.map +1 -1
  21. package/dist/index.d.mts +486 -12
  22. package/dist/index.d.mts.map +1 -1
  23. package/dist/index.mjs +298 -4629
  24. package/dist/{libdeflateWasm-OkNoqBnO.cjs → libdeflateWasm-Cg7cWHOq.cjs} +2 -2
  25. package/dist/{libdeflateWasm-DlHgU5oy.mjs → libdeflateWasm-Cmxa-yiS.mjs} +2 -2
  26. package/dist/{loader-CQfoGFp9.mjs → loader-B6VIrZOJ.mjs} +3 -2
  27. package/dist/{loader-_fqS-TmT.cjs → loader-DdB5Xo5D.cjs} +3 -2
  28. package/dist/pdfCatalog-BcOL6QF-.cjs +173 -0
  29. package/dist/pdfCatalog-CnJRovvm.mjs +138 -0
  30. package/dist/{pdfCatalog-COKoYQ8C.cjs → pdfObjects-BrU4Xd0V.cjs} +1 -171
  31. package/dist/{pdfCatalog-BB2Wnmud.mjs → pdfObjects-DZZ2GPRW.mjs} +2 -137
  32. package/dist/{pdfPage-N1K2U3jI.mjs → pdfPage-Dm5XC_g_.mjs} +3 -2
  33. package/dist/{pdfPage-DBfdinTR.cjs → pdfPage-Dz_SVKUS.cjs} +105 -104
  34. package/dist/{pngEmbed-OYyOe_W0.cjs → pngEmbed-C6M1eX6b.cjs} +2 -2
  35. package/dist/{pngEmbed-DTOqgEUC.mjs → pngEmbed-I1hU3Y6m.mjs} +2 -2
  36. package/package.json +4 -1
package/README.md CHANGED
@@ -15,7 +15,7 @@ Create, parse, fill, merge, sign, and manipulate PDF documents<br />in Node, Den
15
15
 
16
16
  [![npm version](https://img.shields.io/npm/v/modern-pdf-lib?style=flat-square&color=cb3837)](https://www.npmjs.com/package/modern-pdf-lib)
17
17
  [![bundle size](https://img.shields.io/badge/gzip-36kb_core-blue?style=flat-square)](https://bundlephobia.com/package/modern-pdf-lib)
18
- [![tests](https://img.shields.io/badge/tests-2%2C243_passing-brightgreen?style=flat-square)](#)
18
+ [![tests](https://img.shields.io/badge/tests-2%2C323_passing-brightgreen?style=flat-square)](#)
19
19
  [![TypeScript](https://img.shields.io/badge/TypeScript-6.0-3178c6?style=flat-square&logo=typescript&logoColor=white)](#)
20
20
  [![License: MIT](https://img.shields.io/badge/license-MIT-yellow?style=flat-square)](LICENSE)
21
21
 
@@ -68,6 +68,7 @@ const blob = await doc.saveAsBlob(); // Blob (browsers)
68
68
  - TrueType & OpenType font embedding
69
69
  - Automatic font subsetting
70
70
  - JPEG / PNG image embedding
71
+ - Image optimization (JPEG recompression, dedup, grayscale)
71
72
  - RGB, CMYK, grayscale colors
72
73
  - Linear & radial gradients, tiling patterns
73
74
  - Text layout (multiline, combed, auto-size)
@@ -107,6 +108,7 @@ const blob = await doc.saveAsBlob(); // Blob (browsers)
107
108
  - Linearization (fast web view)
108
109
  - 60+ low-level PDF operators
109
110
  - Custom appearance providers
111
+ - CLI: `npx modern-pdf optimize`
110
112
 
111
113
  </td>
112
114
  </tr>
@@ -195,8 +197,12 @@ const blob = await doc.saveAsBlob(); // Blob (browsers)
195
197
  <td align="center">Yes</td>
196
198
  <td align="center">No</td></tr>
197
199
 
200
+ <tr><td><strong>Image optimization</strong></td>
201
+ <td align="center">JPEG recompress, dedup, grayscale</td>
202
+ <td align="center">No</td></tr>
203
+
198
204
  <tr><td><strong>WASM acceleration</strong></td>
199
- <td align="center">Optional (compression, PNG, fonts, JBIG2)</td>
205
+ <td align="center">Optional (compression, PNG, fonts, JBIG2, JPEG)</td>
200
206
  <td align="center">No</td></tr>
201
207
 
202
208
  <tr><td><strong>Dependencies</strong></td>
@@ -352,6 +358,38 @@ for (const item of items) {
352
358
  ```
353
359
  </details>
354
360
 
361
+ <details>
362
+ <summary><strong>Image Optimization</strong> &mdash; batch compress, deduplicate, CLI</summary>
363
+
364
+ ```ts
365
+ import { loadPdf, initWasm, optimizeAllImages, deduplicateImages } from 'modern-pdf-lib';
366
+
367
+ await initWasm({ jpeg: true });
368
+
369
+ const doc = await loadPdf(pdfBytes);
370
+
371
+ // Deduplicate identical images
372
+ const dedupReport = deduplicateImages(doc);
373
+
374
+ // Optimize all images (JPEG recompression)
375
+ const report = await optimizeAllImages(doc, {
376
+ quality: 75,
377
+ progressive: true,
378
+ autoGrayscale: true,
379
+ });
380
+
381
+ console.log(`${report.optimizedImages}/${report.totalImages} images optimized`);
382
+ console.log(`Savings: ${report.savings.toFixed(1)}%`);
383
+
384
+ const optimized = await doc.save();
385
+ ```
386
+
387
+ **CLI:**
388
+ ```sh
389
+ npx modern-pdf optimize report.pdf report-opt.pdf --quality 60 --grayscale --dedup -v
390
+ ```
391
+ </details>
392
+
355
393
  <details>
356
394
  <summary><strong>PDF/A & Accessibility</strong></summary>
357
395
 
@@ -403,6 +441,7 @@ await initWasm({
403
441
  deflate: true, // Faster compression
404
442
  png: true, // Faster PNG decoding
405
443
  fonts: true, // Faster font subsetting
444
+ jpeg: true, // JPEG encode/decode for image optimization
406
445
  });
407
446
  ```
408
447
 
@@ -413,6 +452,7 @@ await initWasm({
413
452
  | ttf | Font parsing & subsetting | ~3x |
414
453
  | shaping | Complex script layout | ~10x |
415
454
  | jbig2 | JBIG2 bilevel image decoding | ~3x |
455
+ | jpeg | JPEG encode/decode for image optimization | Required |
416
456
 
417
457
  <br />
418
458
 
@@ -434,8 +474,9 @@ modern-pdf-lib/
434
474
  layers/ Optional content groups (OCG)
435
475
  outline/ Bookmarks / document outline
436
476
  metadata/ XMP metadata, viewer preferences
437
- wasm/ Rust crate sources (5 modules)
438
- tests/ 2,243 tests across 103 suites
477
+ wasm/ Rust crate sources (6 modules)
478
+ cli/ CLI tool (modern-pdf optimize)
479
+ tests/ 2,323 tests across 110 suites
439
480
  docs/ VitePress documentation
440
481
  ```
441
482
 
@@ -447,7 +488,7 @@ modern-pdf-lib/
447
488
  git clone https://github.com/ABCrimson/modern-pdf-lib.git
448
489
  cd modern-pdf-lib
449
490
  npm install
450
- npm test # 2,243 tests
491
+ npm test # 2,323 tests
451
492
  npm run typecheck # TypeScript 6.0 strict
452
493
  npm run build # ESM + CJS + declarations
453
494
  ```
@@ -0,0 +1,306 @@
1
+ import { t as __exportAll } from "./rolldown-runtime-95iHPtFO.mjs";
2
+ import { i as PdfName } from "./pdfObjects-DZZ2GPRW.mjs";
3
+ import { n as extractImages, t as decodeImageStream } from "./imageExtract-vjyQyFcT.mjs";
4
+
5
+ //#region src/assets/image/grayscaleDetect.ts
6
+ /**
7
+ * @module assets/image/grayscaleDetect
8
+ *
9
+ * Grayscale detection and conversion for image optimization.
10
+ *
11
+ * Detects RGB images where all pixels are effectively grayscale
12
+ * (R ≈ G ≈ B) and converts them to single-channel grayscale,
13
+ * reducing data size by ~66%.
14
+ *
15
+ * No Buffer — uses Uint8Array exclusively.
16
+ */
17
+ /**
18
+ * Check whether an RGB/RGBA image is effectively grayscale.
19
+ *
20
+ * Scans all pixels and checks if R, G, and B channels are within
21
+ * `tolerance` of each other. If ≥99% of pixels pass, the image
22
+ * is considered grayscale.
23
+ *
24
+ * @param pixels - Raw pixel data (row-major, channel-interleaved).
25
+ * @param width - Image width in pixels.
26
+ * @param height - Image height in pixels.
27
+ * @param channels - Number of channels: 3 (RGB) or 4 (RGBA).
28
+ * @param tolerance - Maximum allowed difference between R, G, and B
29
+ * values for a pixel to be considered gray.
30
+ * Default: `2`.
31
+ * @returns `true` if the image is effectively grayscale.
32
+ *
33
+ * @example
34
+ * ```ts
35
+ * import { isGrayscaleImage, convertToGrayscale } from 'modern-pdf-lib';
36
+ *
37
+ * if (isGrayscaleImage(pixels, width, height, 3)) {
38
+ * const grayPixels = convertToGrayscale(pixels, width, height, 3);
39
+ * // grayPixels has 1 byte per pixel instead of 3
40
+ * }
41
+ * ```
42
+ */
43
+ function isGrayscaleImage(pixels, width, height, channels, tolerance = 2) {
44
+ const pixelCount = width * height;
45
+ const maxNonGray = Math.floor(pixelCount * .01);
46
+ let nonGrayCount = 0;
47
+ for (let i = 0; i < pixelCount; i++) {
48
+ const r = pixels[i * channels];
49
+ const g = pixels[i * channels + 1];
50
+ const b = pixels[i * channels + 2];
51
+ if (Math.max(r, g, b) - Math.min(r, g, b) > tolerance) {
52
+ nonGrayCount++;
53
+ if (nonGrayCount > maxNonGray) return false;
54
+ }
55
+ }
56
+ return true;
57
+ }
58
+ /**
59
+ * Convert an RGB/RGBA image to single-channel grayscale.
60
+ *
61
+ * Uses the ITU-R BT.601 luma formula:
62
+ * ```
63
+ * gray = 0.299 × R + 0.587 × G + 0.114 × B
64
+ * ```
65
+ *
66
+ * The alpha channel (if present) is discarded.
67
+ *
68
+ * @param pixels - Raw pixel data (row-major, channel-interleaved).
69
+ * @param width - Image width in pixels.
70
+ * @param height - Image height in pixels.
71
+ * @param channels - Number of channels: 3 (RGB) or 4 (RGBA).
72
+ * @returns Grayscale pixel data (1 byte per pixel).
73
+ */
74
+ function convertToGrayscale(pixels, width, height, channels) {
75
+ const pixelCount = width * height;
76
+ const gray = new Uint8Array(pixelCount);
77
+ for (let i = 0; i < pixelCount; i++) {
78
+ const r = pixels[i * channels];
79
+ const g = pixels[i * channels + 1];
80
+ const b = pixels[i * channels + 2];
81
+ gray[i] = Math.round(.299 * r + .587 * g + .114 * b);
82
+ }
83
+ return gray;
84
+ }
85
+
86
+ //#endregion
87
+ //#region src/assets/image/batchOptimize.ts
88
+ var batchOptimize_exports = /* @__PURE__ */ __exportAll({ optimizeAllImages: () => optimizeAllImages });
89
+ /** Minimum image size to bother optimizing (10 KB). */
90
+ const SMALL_IMAGE_THRESHOLD = 10240;
91
+ /**
92
+ * Optimize all images in a PDF document by recompressing them as JPEG.
93
+ *
94
+ * Walks every image XObject in the document, decodes its pixel data,
95
+ * recompresses it as JPEG using the WASM encoder (if available), and
96
+ * replaces the stream data in-place when the result is smaller.
97
+ *
98
+ * **Requires the JPEG WASM module to be initialized** via
99
+ * `initJpegWasm()` or `initWasm({ jpeg: true })`. Without it,
100
+ * no images will be optimized (all will be skipped).
101
+ *
102
+ * @param doc - A parsed `PdfDocument` (from `loadPdf()`).
103
+ * @param options - Optimization settings.
104
+ * @returns A report summarizing the optimization results.
105
+ *
106
+ * @example
107
+ * ```ts
108
+ * import { loadPdf, initWasm, optimizeAllImages } from 'modern-pdf-lib';
109
+ *
110
+ * await initWasm({ jpeg: true });
111
+ *
112
+ * const doc = await loadPdf(pdfBytes);
113
+ * const report = await optimizeAllImages(doc);
114
+ *
115
+ * console.log(`Optimized ${report.optimizedImages} of ${report.totalImages} images`);
116
+ * console.log(`Savings: ${report.savings.toFixed(1)}%`);
117
+ *
118
+ * const optimizedBytes = await doc.save();
119
+ * ```
120
+ */
121
+ async function optimizeAllImages(doc, options = {}) {
122
+ const quality = options.quality ?? 80;
123
+ const minSavingsPercent = options.minSavingsPercent ?? 10;
124
+ const skipSmall = options.skipSmallImages ?? false;
125
+ const progressive = options.progressive ?? false;
126
+ const chromaSubsampling = options.chromaSubsampling ?? "4:2:0";
127
+ const { encodeJpegWasm, isJpegWasmReady } = await import("./bridge-DpzMOnHd.mjs").then((n) => n.t);
128
+ const { decodeJpegWasm } = await import("./bridge-DpzMOnHd.mjs").then((n) => n.t);
129
+ const images = extractImages(doc);
130
+ const perImage = [];
131
+ let totalOriginal = 0;
132
+ let totalNew = 0;
133
+ let optimizedCount = 0;
134
+ for (const img of images) {
135
+ totalOriginal += img.compressedSize;
136
+ if (!isJpegWasmReady()) {
137
+ perImage.push({
138
+ name: img.name,
139
+ pageIndex: img.pageIndex,
140
+ originalSize: img.compressedSize,
141
+ newSize: img.compressedSize,
142
+ skipped: true,
143
+ reason: "JPEG WASM encoder not initialized"
144
+ });
145
+ totalNew += img.compressedSize;
146
+ continue;
147
+ }
148
+ if (skipSmall && img.compressedSize < SMALL_IMAGE_THRESHOLD) {
149
+ perImage.push({
150
+ name: img.name,
151
+ pageIndex: img.pageIndex,
152
+ originalSize: img.compressedSize,
153
+ newSize: img.compressedSize,
154
+ skipped: true,
155
+ reason: `Below size threshold (${SMALL_IMAGE_THRESHOLD} bytes)`
156
+ });
157
+ totalNew += img.compressedSize;
158
+ continue;
159
+ }
160
+ if (img.bitsPerComponent !== 8) {
161
+ perImage.push({
162
+ name: img.name,
163
+ pageIndex: img.pageIndex,
164
+ originalSize: img.compressedSize,
165
+ newSize: img.compressedSize,
166
+ skipped: true,
167
+ reason: `Unsupported bits per component: ${img.bitsPerComponent}`
168
+ });
169
+ totalNew += img.compressedSize;
170
+ continue;
171
+ }
172
+ if (img.colorSpace === "Indexed") {
173
+ perImage.push({
174
+ name: img.name,
175
+ pageIndex: img.pageIndex,
176
+ originalSize: img.compressedSize,
177
+ newSize: img.compressedSize,
178
+ skipped: true,
179
+ reason: "Indexed color space not suitable for JPEG"
180
+ });
181
+ totalNew += img.compressedSize;
182
+ continue;
183
+ }
184
+ let pixels;
185
+ let channels = img.channels;
186
+ try {
187
+ if (img.filters[0] === "DCTDecode") {
188
+ const decoded = decodeJpegWasm(img.stream.data);
189
+ if (!decoded) {
190
+ perImage.push({
191
+ name: img.name,
192
+ pageIndex: img.pageIndex,
193
+ originalSize: img.compressedSize,
194
+ newSize: img.compressedSize,
195
+ skipped: true,
196
+ reason: "Failed to decode existing JPEG"
197
+ });
198
+ totalNew += img.compressedSize;
199
+ continue;
200
+ }
201
+ pixels = decoded.pixels;
202
+ channels = decoded.channels;
203
+ } else pixels = decodeImageStream(img);
204
+ } catch {
205
+ perImage.push({
206
+ name: img.name,
207
+ pageIndex: img.pageIndex,
208
+ originalSize: img.compressedSize,
209
+ newSize: img.compressedSize,
210
+ skipped: true,
211
+ reason: "Failed to decode image stream"
212
+ });
213
+ totalNew += img.compressedSize;
214
+ continue;
215
+ }
216
+ const expectedLen = img.width * img.height * channels;
217
+ if (pixels.length !== expectedLen) {
218
+ perImage.push({
219
+ name: img.name,
220
+ pageIndex: img.pageIndex,
221
+ originalSize: img.compressedSize,
222
+ newSize: img.compressedSize,
223
+ skipped: true,
224
+ reason: `Pixel data length mismatch: got ${pixels.length}, expected ${expectedLen}`
225
+ });
226
+ totalNew += img.compressedSize;
227
+ continue;
228
+ }
229
+ if (channels === 4 && img.colorSpace === "DeviceCMYK") {
230
+ const rgb = new Uint8Array(img.width * img.height * 3);
231
+ for (let i = 0; i < img.width * img.height; i++) {
232
+ const c = pixels[i * 4] / 255;
233
+ const m = pixels[i * 4 + 1] / 255;
234
+ const y = pixels[i * 4 + 2] / 255;
235
+ const k = pixels[i * 4 + 3] / 255;
236
+ rgb[i * 3] = Math.round(255 * (1 - c) * (1 - k));
237
+ rgb[i * 3 + 1] = Math.round(255 * (1 - m) * (1 - k));
238
+ rgb[i * 3 + 2] = Math.round(255 * (1 - y) * (1 - k));
239
+ }
240
+ pixels = rgb;
241
+ channels = 3;
242
+ }
243
+ if (options.autoGrayscale && (channels === 3 || channels === 4)) {
244
+ if (isGrayscaleImage(pixels, img.width, img.height, channels)) {
245
+ pixels = convertToGrayscale(pixels, img.width, img.height, channels);
246
+ channels = 1;
247
+ }
248
+ }
249
+ const jpegBytes = encodeJpegWasm(pixels, img.width, img.height, channels, quality, progressive, chromaSubsampling);
250
+ if (!jpegBytes) {
251
+ perImage.push({
252
+ name: img.name,
253
+ pageIndex: img.pageIndex,
254
+ originalSize: img.compressedSize,
255
+ newSize: img.compressedSize,
256
+ skipped: true,
257
+ reason: "JPEG encoding failed"
258
+ });
259
+ totalNew += img.compressedSize;
260
+ continue;
261
+ }
262
+ const savingsPercent = (img.compressedSize - jpegBytes.length) / img.compressedSize * 100;
263
+ if (savingsPercent < minSavingsPercent) {
264
+ perImage.push({
265
+ name: img.name,
266
+ pageIndex: img.pageIndex,
267
+ originalSize: img.compressedSize,
268
+ newSize: img.compressedSize,
269
+ skipped: true,
270
+ reason: `Savings ${savingsPercent.toFixed(1)}% below threshold ${minSavingsPercent}%`
271
+ });
272
+ totalNew += img.compressedSize;
273
+ continue;
274
+ }
275
+ img.stream.data = jpegBytes;
276
+ img.stream.syncLength();
277
+ const dict = img.stream.dict;
278
+ dict.set("/Filter", PdfName.of("/DCTDecode"));
279
+ if (img.colorSpace === "DeviceCMYK" && channels === 3) dict.set("/ColorSpace", PdfName.of("/DeviceRGB"));
280
+ if (channels === 1) dict.set("/ColorSpace", PdfName.of("/DeviceGray"));
281
+ dict.delete("/DecodeParms");
282
+ if (img.colorSpace === "DeviceCMYK") dict.delete("/Decode");
283
+ optimizedCount++;
284
+ perImage.push({
285
+ name: img.name,
286
+ pageIndex: img.pageIndex,
287
+ originalSize: img.compressedSize,
288
+ newSize: jpegBytes.length,
289
+ skipped: false
290
+ });
291
+ totalNew += jpegBytes.length;
292
+ }
293
+ const overallSavings = totalOriginal > 0 ? (totalOriginal - totalNew) / totalOriginal * 100 : 0;
294
+ return {
295
+ totalImages: images.length,
296
+ optimizedImages: optimizedCount,
297
+ originalTotalBytes: totalOriginal,
298
+ optimizedTotalBytes: totalNew,
299
+ savings: overallSavings,
300
+ perImage
301
+ };
302
+ }
303
+
304
+ //#endregion
305
+ export { isGrayscaleImage as i, optimizeAllImages as n, convertToGrayscale as r, batchOptimize_exports as t };
306
+ //# sourceMappingURL=batchOptimize-ClXizv19.mjs.map