modern-pdf-lib 0.14.1 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -5
- package/dist/bridge-C7U4E7St.mjs +103 -0
- package/dist/bridge-DUcJFVsk.cjs +132 -0
- package/dist/index.cjs +845 -30
- package/dist/index.d.cts +486 -12
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +486 -12
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +833 -31
- package/dist/{libdeflateWasm-DlHgU5oy.mjs → libdeflateWasm-82loOtIV.mjs} +2 -2
- package/dist/{libdeflateWasm-OkNoqBnO.cjs → libdeflateWasm-Enus0G1k.cjs} +2 -2
- package/dist/{loader-CQfoGFp9.mjs → loader-1VJXLlMZ.mjs} +3 -2
- package/dist/{loader-_fqS-TmT.cjs → loader-CKlBOHma.cjs} +3 -2
- package/dist/{pngEmbed-OYyOe_W0.cjs → pngEmbed-10m4CfBU.cjs} +2 -2
- package/dist/{pngEmbed-DTOqgEUC.mjs → pngEmbed-gaJ9S2Dk.mjs} +2 -2
- package/package.json +4 -1
package/dist/index.cjs
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
2
|
const require_pdfPage = require('./pdfPage-DBfdinTR.cjs');
|
|
3
3
|
const require_pdfCatalog = require('./pdfCatalog-COKoYQ8C.cjs');
|
|
4
|
-
const require_libdeflateWasm = require('./libdeflateWasm-
|
|
4
|
+
const require_libdeflateWasm = require('./libdeflateWasm-Enus0G1k.cjs');
|
|
5
5
|
const require_fontSubset = require('./fontSubset-pFc8Dueu.cjs');
|
|
6
|
-
const require_pngEmbed = require('./pngEmbed-
|
|
6
|
+
const require_pngEmbed = require('./pngEmbed-10m4CfBU.cjs');
|
|
7
7
|
const require_fflateAdapter = require('./fflateAdapter-AHC_S3cb.cjs');
|
|
8
|
+
const require_bridge = require('./bridge-DUcJFVsk.cjs');
|
|
8
9
|
let fflate = require("fflate");
|
|
9
10
|
|
|
10
11
|
//#region src/core/pdfWriter.ts
|
|
@@ -4827,10 +4828,10 @@ async function tryLoadLibdeflate() {
|
|
|
4827
4828
|
if (libdeflateAttempted) return libdeflateEngine;
|
|
4828
4829
|
libdeflateAttempted = true;
|
|
4829
4830
|
try {
|
|
4830
|
-
const { LibdeflateWasm: LibdeflateCtor, initDeflateWasm } = await Promise.resolve().then(() => require("./libdeflateWasm-
|
|
4831
|
+
const { LibdeflateWasm: LibdeflateCtor, initDeflateWasm } = await Promise.resolve().then(() => require("./libdeflateWasm-Enus0G1k.cjs")).then((n) => n.libdeflateWasm_exports);
|
|
4831
4832
|
let customBytes;
|
|
4832
4833
|
try {
|
|
4833
|
-
const { getWasmLoaderConfig } = await Promise.resolve().then(() => require("./loader-
|
|
4834
|
+
const { getWasmLoaderConfig } = await Promise.resolve().then(() => require("./loader-CKlBOHma.cjs"));
|
|
4834
4835
|
customBytes = getWasmLoaderConfig().moduleBytes?.["libdeflate"];
|
|
4835
4836
|
} catch {}
|
|
4836
4837
|
await initDeflateWasm(customBytes);
|
|
@@ -9991,7 +9992,7 @@ function isAccessible(issues) {
|
|
|
9991
9992
|
* @param data The bytes to hash.
|
|
9992
9993
|
* @returns A hex string hash.
|
|
9993
9994
|
*/
|
|
9994
|
-
function hashBytes(data) {
|
|
9995
|
+
function hashBytes$1(data) {
|
|
9995
9996
|
let hash = 2166136261;
|
|
9996
9997
|
for (let i = 0; i < data.length; i++) {
|
|
9997
9998
|
hash ^= data[i];
|
|
@@ -10083,7 +10084,7 @@ function remapRef$1(sourceRef, context) {
|
|
|
10083
10084
|
}
|
|
10084
10085
|
let streamHash;
|
|
10085
10086
|
if (sourceObj.kind === "stream") {
|
|
10086
|
-
streamHash = hashBytes(sourceObj.data);
|
|
10087
|
+
streamHash = hashBytes$1(sourceObj.data);
|
|
10087
10088
|
const dedup = context.hashMap.get(streamHash);
|
|
10088
10089
|
if (dedup) {
|
|
10089
10090
|
context.refMap.set(sourceRef.objectNumber, dedup);
|
|
@@ -24367,7 +24368,7 @@ function addTrailerId(data) {
|
|
|
24367
24368
|
* ```
|
|
24368
24369
|
*/
|
|
24369
24370
|
function downscaleImage(image, options = {}) {
|
|
24370
|
-
const target = computeTargetDimensions(image.width, image.height, options);
|
|
24371
|
+
const target = computeTargetDimensions$1(image.width, image.height, options);
|
|
24371
24372
|
if (target.width >= image.width && target.height >= image.height) return image;
|
|
24372
24373
|
switch (options.algorithm ?? "bilinear") {
|
|
24373
24374
|
case "nearest": return resampleNearest(image, target.width, target.height);
|
|
@@ -24394,7 +24395,7 @@ function downscaleImage(image, options = {}) {
|
|
|
24394
24395
|
async function recompressImage(image, options = {}) {
|
|
24395
24396
|
switch (options.format ?? "deflate") {
|
|
24396
24397
|
case "deflate": return recompressDeflate(image, options.compressionLevel ?? 6);
|
|
24397
|
-
case "jpeg": return recompressJpeg(image, options.quality ?? 85);
|
|
24398
|
+
case "jpeg": return recompressJpeg(image, options.quality ?? 85, options.progressive ?? false, options.chromaSubsampling ?? "4:2:0");
|
|
24398
24399
|
default: return {
|
|
24399
24400
|
data: image.pixels,
|
|
24400
24401
|
width: image.width,
|
|
@@ -24424,10 +24425,163 @@ async function optimizeImage(image, options = {}) {
|
|
|
24424
24425
|
return recompressImage(downscaleImage(image, options), options);
|
|
24425
24426
|
}
|
|
24426
24427
|
/**
|
|
24428
|
+
* Standard JPEG luminance quantization table (Table K.1 from JPEG spec)
|
|
24429
|
+
* at quality 50, scaled to quality 100 = all-ones.
|
|
24430
|
+
* @internal
|
|
24431
|
+
*/
|
|
24432
|
+
const STANDARD_LUMINANCE_QT = [
|
|
24433
|
+
16,
|
|
24434
|
+
11,
|
|
24435
|
+
10,
|
|
24436
|
+
16,
|
|
24437
|
+
24,
|
|
24438
|
+
40,
|
|
24439
|
+
51,
|
|
24440
|
+
61,
|
|
24441
|
+
12,
|
|
24442
|
+
12,
|
|
24443
|
+
14,
|
|
24444
|
+
19,
|
|
24445
|
+
26,
|
|
24446
|
+
58,
|
|
24447
|
+
60,
|
|
24448
|
+
55,
|
|
24449
|
+
14,
|
|
24450
|
+
13,
|
|
24451
|
+
16,
|
|
24452
|
+
24,
|
|
24453
|
+
40,
|
|
24454
|
+
57,
|
|
24455
|
+
69,
|
|
24456
|
+
56,
|
|
24457
|
+
14,
|
|
24458
|
+
17,
|
|
24459
|
+
22,
|
|
24460
|
+
29,
|
|
24461
|
+
51,
|
|
24462
|
+
87,
|
|
24463
|
+
80,
|
|
24464
|
+
62,
|
|
24465
|
+
18,
|
|
24466
|
+
22,
|
|
24467
|
+
37,
|
|
24468
|
+
56,
|
|
24469
|
+
68,
|
|
24470
|
+
109,
|
|
24471
|
+
103,
|
|
24472
|
+
77,
|
|
24473
|
+
24,
|
|
24474
|
+
35,
|
|
24475
|
+
55,
|
|
24476
|
+
64,
|
|
24477
|
+
81,
|
|
24478
|
+
104,
|
|
24479
|
+
113,
|
|
24480
|
+
92,
|
|
24481
|
+
49,
|
|
24482
|
+
64,
|
|
24483
|
+
78,
|
|
24484
|
+
87,
|
|
24485
|
+
103,
|
|
24486
|
+
121,
|
|
24487
|
+
120,
|
|
24488
|
+
101,
|
|
24489
|
+
72,
|
|
24490
|
+
92,
|
|
24491
|
+
95,
|
|
24492
|
+
98,
|
|
24493
|
+
112,
|
|
24494
|
+
100,
|
|
24495
|
+
103,
|
|
24496
|
+
99
|
|
24497
|
+
];
|
|
24498
|
+
/**
|
|
24499
|
+
* Estimate the JPEG quality level (1–100) from the quantization tables
|
|
24500
|
+
* embedded in a JPEG file.
|
|
24501
|
+
*
|
|
24502
|
+
* Parses the DQT (Define Quantization Table, marker 0xFFDB) segments
|
|
24503
|
+
* from the raw JPEG bytes and compares the table values against the
|
|
24504
|
+
* standard JPEG luminance quantization table to estimate the quality
|
|
24505
|
+
* factor that was used during encoding.
|
|
24506
|
+
*
|
|
24507
|
+
* If no DQT marker is found, returns `undefined`.
|
|
24508
|
+
*
|
|
24509
|
+
* @param jpegBytes - Raw JPEG file bytes.
|
|
24510
|
+
* @returns Estimated quality 1–100, or `undefined` if no DQT is found.
|
|
24511
|
+
*
|
|
24512
|
+
* @example
|
|
24513
|
+
* ```ts
|
|
24514
|
+
* import { estimateJpegQuality } from 'modern-pdf-lib';
|
|
24515
|
+
*
|
|
24516
|
+
* const quality = estimateJpegQuality(jpegBytes);
|
|
24517
|
+
* if (quality !== undefined) {
|
|
24518
|
+
* console.log(`Estimated JPEG quality: ${quality}`);
|
|
24519
|
+
* }
|
|
24520
|
+
* ```
|
|
24521
|
+
*/
|
|
24522
|
+
function estimateJpegQuality(jpegBytes) {
|
|
24523
|
+
if (jpegBytes.length < 2 || jpegBytes[0] !== 255 || jpegBytes[1] !== 216) return;
|
|
24524
|
+
let offset = 2;
|
|
24525
|
+
let bestTable;
|
|
24526
|
+
while (offset < jpegBytes.length - 1) {
|
|
24527
|
+
if (jpegBytes[offset] !== 255) {
|
|
24528
|
+
offset++;
|
|
24529
|
+
continue;
|
|
24530
|
+
}
|
|
24531
|
+
const marker = jpegBytes[offset + 1];
|
|
24532
|
+
if (marker === 255) {
|
|
24533
|
+
offset++;
|
|
24534
|
+
continue;
|
|
24535
|
+
}
|
|
24536
|
+
if (marker === 0 || marker === 1 || marker >= 208 && marker <= 217) {
|
|
24537
|
+
offset += 2;
|
|
24538
|
+
continue;
|
|
24539
|
+
}
|
|
24540
|
+
if (marker === 218) break;
|
|
24541
|
+
if (offset + 3 >= jpegBytes.length) break;
|
|
24542
|
+
const segLen = jpegBytes[offset + 2] << 8 | jpegBytes[offset + 3];
|
|
24543
|
+
if (marker === 219) {
|
|
24544
|
+
let pos = offset + 4;
|
|
24545
|
+
const segEnd = offset + 2 + segLen;
|
|
24546
|
+
while (pos < segEnd && pos + 1 < jpegBytes.length) {
|
|
24547
|
+
const pqTq = jpegBytes[pos];
|
|
24548
|
+
const precision = pqTq >> 4 & 15;
|
|
24549
|
+
const tableId = pqTq & 15;
|
|
24550
|
+
pos++;
|
|
24551
|
+
const tableSize = 64 * (precision === 0 ? 1 : 2);
|
|
24552
|
+
if (pos + tableSize > jpegBytes.length) break;
|
|
24553
|
+
const table = [];
|
|
24554
|
+
for (let i = 0; i < 64; i++) if (precision === 0) table.push(jpegBytes[pos + i]);
|
|
24555
|
+
else table.push(jpegBytes[pos + i * 2] << 8 | jpegBytes[pos + i * 2 + 1]);
|
|
24556
|
+
pos += tableSize;
|
|
24557
|
+
if (tableId === 0 || !bestTable) bestTable = table;
|
|
24558
|
+
}
|
|
24559
|
+
}
|
|
24560
|
+
offset += 2 + segLen;
|
|
24561
|
+
}
|
|
24562
|
+
if (!bestTable) return void 0;
|
|
24563
|
+
let totalRatio = 0;
|
|
24564
|
+
let count = 0;
|
|
24565
|
+
for (let i = 0; i < 64; i++) {
|
|
24566
|
+
const std = STANDARD_LUMINANCE_QT[i];
|
|
24567
|
+
const actual = bestTable[i];
|
|
24568
|
+
if (std === 0 || actual === 0) continue;
|
|
24569
|
+
const scaleFactor = actual * 100 / std;
|
|
24570
|
+
totalRatio += scaleFactor;
|
|
24571
|
+
count++;
|
|
24572
|
+
}
|
|
24573
|
+
if (count === 0) return void 0;
|
|
24574
|
+
const avgScale = totalRatio / count;
|
|
24575
|
+
let quality;
|
|
24576
|
+
if (avgScale < 100) quality = (200 - avgScale) / 2;
|
|
24577
|
+
else quality = 5e3 / avgScale;
|
|
24578
|
+
return Math.max(1, Math.min(100, Math.round(quality)));
|
|
24579
|
+
}
|
|
24580
|
+
/**
|
|
24427
24581
|
* Compute target dimensions from options, preserving aspect ratio.
|
|
24428
24582
|
* @internal
|
|
24429
24583
|
*/
|
|
24430
|
-
function computeTargetDimensions(srcWidth, srcHeight, options) {
|
|
24584
|
+
function computeTargetDimensions$1(srcWidth, srcHeight, options) {
|
|
24431
24585
|
let targetWidth = srcWidth;
|
|
24432
24586
|
let targetHeight = srcHeight;
|
|
24433
24587
|
if (options.targetDpi && options.printWidth && options.printHeight) {
|
|
@@ -24644,30 +24798,37 @@ async function recompressDeflate(image, level) {
|
|
|
24644
24798
|
}
|
|
24645
24799
|
/**
|
|
24646
24800
|
* Recompress image data as JPEG.
|
|
24647
|
-
* @internal
|
|
24648
|
-
*/
|
|
24649
|
-
/**
|
|
24650
|
-
* Recompress image data as JPEG.
|
|
24651
|
-
*
|
|
24652
|
-
* JPEG encoding in pure JS is complex (DCT, Huffman coding, quantization).
|
|
24653
|
-
* A full implementation requires either:
|
|
24654
|
-
*
|
|
24655
|
-
* 1. **WASM-based encoder** (preferred) -- compile libjpeg-turbo or mozjpeg
|
|
24656
|
-
* to WASM, feed raw pixels, get JPEG bytes back.
|
|
24657
|
-
* 2. **Canvas API** (browser-only fallback) -- use `OffscreenCanvas` with
|
|
24658
|
-
* `convertToBlob({ type: 'image/jpeg', quality })`.
|
|
24659
|
-
* 3. **Pure JS encoder** (last resort) -- very slow but works everywhere.
|
|
24660
24801
|
*
|
|
24661
|
-
*
|
|
24662
|
-
*
|
|
24663
|
-
*
|
|
24802
|
+
* Uses the JPEG WASM encoder when available (initialized via
|
|
24803
|
+
* `initJpegWasm()` or `initWasm({ jpeg: true })`). When WASM is not
|
|
24804
|
+
* loaded, returns the input data unchanged with `wasOptimized: false`.
|
|
24664
24805
|
*
|
|
24665
24806
|
* @param image - The raw image pixel data.
|
|
24666
|
-
* @param quality - JPEG quality 1
|
|
24667
|
-
* @
|
|
24807
|
+
* @param quality - JPEG quality 1–100.
|
|
24808
|
+
* @param progressive - Encode as progressive JPEG (default: false).
|
|
24809
|
+
* @param chromaSubsampling - Chroma subsampling mode (default: '4:2:0').
|
|
24810
|
+
* @returns The JPEG-encoded result, or raw data if WASM is unavailable.
|
|
24668
24811
|
* @internal
|
|
24669
24812
|
*/
|
|
24670
|
-
async function recompressJpeg(image, quality) {
|
|
24813
|
+
async function recompressJpeg(image, quality, progressive = false, chromaSubsampling = "4:2:0") {
|
|
24814
|
+
const { encodeJpegWasm, isJpegWasmReady } = await Promise.resolve().then(() => require("./bridge-DUcJFVsk.cjs")).then((n) => n.bridge_exports);
|
|
24815
|
+
if (isJpegWasmReady()) {
|
|
24816
|
+
let pixels = image.pixels;
|
|
24817
|
+
let channels = image.channels;
|
|
24818
|
+
if (image.channels === 4 && image.colorSpace === "cmyk") {
|
|
24819
|
+
pixels = convertCmykToRgb(image.pixels, image.width, image.height);
|
|
24820
|
+
channels = 3;
|
|
24821
|
+
}
|
|
24822
|
+
const jpegBytes = encodeJpegWasm(pixels, image.width, image.height, channels, quality, progressive, chromaSubsampling);
|
|
24823
|
+
if (jpegBytes) return {
|
|
24824
|
+
data: jpegBytes,
|
|
24825
|
+
width: image.width,
|
|
24826
|
+
height: image.height,
|
|
24827
|
+
channels,
|
|
24828
|
+
format: "jpeg",
|
|
24829
|
+
wasOptimized: true
|
|
24830
|
+
};
|
|
24831
|
+
}
|
|
24671
24832
|
return {
|
|
24672
24833
|
data: image.pixels,
|
|
24673
24834
|
width: image.width,
|
|
@@ -24677,6 +24838,644 @@ async function recompressJpeg(image, quality) {
|
|
|
24677
24838
|
wasOptimized: false
|
|
24678
24839
|
};
|
|
24679
24840
|
}
|
|
24841
|
+
/**
|
|
24842
|
+
* Convert CMYK pixel data to RGB.
|
|
24843
|
+
*
|
|
24844
|
+
* Uses the standard CMYK→RGB formula (inverted CMYK, Adobe convention):
|
|
24845
|
+
* ```
|
|
24846
|
+
* R = 255 × (1 − C/255) × (1 − K/255)
|
|
24847
|
+
* G = 255 × (1 − M/255) × (1 − K/255)
|
|
24848
|
+
* B = 255 × (1 − Y/255) × (1 − K/255)
|
|
24849
|
+
* ```
|
|
24850
|
+
*
|
|
24851
|
+
* @param pixels - CMYK pixel data (4 bytes per pixel, row-major).
|
|
24852
|
+
* @param width - Image width.
|
|
24853
|
+
* @param height - Image height.
|
|
24854
|
+
* @returns RGB pixel data (3 bytes per pixel).
|
|
24855
|
+
* @internal
|
|
24856
|
+
*/
|
|
24857
|
+
function convertCmykToRgb(pixels, width, height) {
|
|
24858
|
+
const pixelCount = width * height;
|
|
24859
|
+
const rgb = new Uint8Array(pixelCount * 3);
|
|
24860
|
+
for (let i = 0; i < pixelCount; i++) {
|
|
24861
|
+
const c = pixels[i * 4] / 255;
|
|
24862
|
+
const m = pixels[i * 4 + 1] / 255;
|
|
24863
|
+
const y = pixels[i * 4 + 2] / 255;
|
|
24864
|
+
const k = pixels[i * 4 + 3] / 255;
|
|
24865
|
+
rgb[i * 3] = Math.round(255 * (1 - c) * (1 - k));
|
|
24866
|
+
rgb[i * 3 + 1] = Math.round(255 * (1 - m) * (1 - k));
|
|
24867
|
+
rgb[i * 3 + 2] = Math.round(255 * (1 - y) * (1 - k));
|
|
24868
|
+
}
|
|
24869
|
+
return rgb;
|
|
24870
|
+
}
|
|
24871
|
+
|
|
24872
|
+
//#endregion
|
|
24873
|
+
//#region src/assets/image/imageExtract.ts
|
|
24874
|
+
/**
|
|
24875
|
+
* Resolve the color space name from a `/ColorSpace` entry.
|
|
24876
|
+
* Handles both simple names (`/DeviceRGB`) and array forms
|
|
24877
|
+
* (`[/ICCBased ...]`, `[/Indexed /DeviceRGB ...]`).
|
|
24878
|
+
* @internal
|
|
24879
|
+
*/
|
|
24880
|
+
function resolveColorSpace(csEntry, registry) {
|
|
24881
|
+
if (!csEntry) return "DeviceRGB";
|
|
24882
|
+
if (csEntry.kind === "ref") {
|
|
24883
|
+
const resolved = registry.resolve(csEntry);
|
|
24884
|
+
if (!resolved) return "DeviceRGB";
|
|
24885
|
+
return resolveColorSpace(resolved, registry);
|
|
24886
|
+
}
|
|
24887
|
+
if (csEntry.kind === "name") return csEntry.value.replace(/^\//, "");
|
|
24888
|
+
if (csEntry.kind === "array") {
|
|
24889
|
+
const arr = csEntry;
|
|
24890
|
+
const first = arr.items[0];
|
|
24891
|
+
if (first && first.kind === "name") {
|
|
24892
|
+
const csName = first.value.replace(/^\//, "");
|
|
24893
|
+
if (csName === "ICCBased") {
|
|
24894
|
+
const profileRef = arr.items[1];
|
|
24895
|
+
if (profileRef && profileRef.kind === "ref") {
|
|
24896
|
+
const profile = registry.resolve(profileRef);
|
|
24897
|
+
if (profile && profile.kind === "stream") {
|
|
24898
|
+
const n = profile.dict.get("/N");
|
|
24899
|
+
if (n && n.kind === "number") {
|
|
24900
|
+
const channels = n.value;
|
|
24901
|
+
if (channels === 1) return "DeviceGray";
|
|
24902
|
+
if (channels === 3) return "DeviceRGB";
|
|
24903
|
+
if (channels === 4) return "DeviceCMYK";
|
|
24904
|
+
}
|
|
24905
|
+
}
|
|
24906
|
+
}
|
|
24907
|
+
return "DeviceRGB";
|
|
24908
|
+
}
|
|
24909
|
+
if (csName === "Indexed") return "Indexed";
|
|
24910
|
+
return csName;
|
|
24911
|
+
}
|
|
24912
|
+
}
|
|
24913
|
+
return "DeviceRGB";
|
|
24914
|
+
}
|
|
24915
|
+
/**
|
|
24916
|
+
* Determine the number of channels from a color space name.
|
|
24917
|
+
* @internal
|
|
24918
|
+
*/
|
|
24919
|
+
function channelsFromColorSpace(colorSpace) {
|
|
24920
|
+
switch (colorSpace) {
|
|
24921
|
+
case "DeviceGray":
|
|
24922
|
+
case "CalGray": return 1;
|
|
24923
|
+
case "DeviceCMYK": return 4;
|
|
24924
|
+
case "Indexed": return 1;
|
|
24925
|
+
default: return 3;
|
|
24926
|
+
}
|
|
24927
|
+
}
|
|
24928
|
+
/**
|
|
24929
|
+
* Extract all image XObjects from a PDF document.
|
|
24930
|
+
*
|
|
24931
|
+
* Walks every page's `/Resources /XObject` dictionary and collects
|
|
24932
|
+
* metadata for each image XObject found.
|
|
24933
|
+
*
|
|
24934
|
+
* @param doc - A parsed `PdfDocument`.
|
|
24935
|
+
* @returns An array of `ImageInfo` objects, one per image XObject.
|
|
24936
|
+
*
|
|
24937
|
+
* @example
|
|
24938
|
+
* ```ts
|
|
24939
|
+
* import { loadPdf, extractImages } from 'modern-pdf-lib';
|
|
24940
|
+
*
|
|
24941
|
+
* const doc = await loadPdf(pdfBytes);
|
|
24942
|
+
* const images = extractImages(doc);
|
|
24943
|
+
*
|
|
24944
|
+
* for (const img of images) {
|
|
24945
|
+
* console.log(`${img.name}: ${img.width}x${img.height} ${img.colorSpace} (${img.compressedSize} bytes)`);
|
|
24946
|
+
* }
|
|
24947
|
+
* ```
|
|
24948
|
+
*/
|
|
24949
|
+
function extractImages(doc) {
|
|
24950
|
+
const images = [];
|
|
24951
|
+
const seenRefs = /* @__PURE__ */ new Set();
|
|
24952
|
+
const pages = doc.getPages();
|
|
24953
|
+
for (let pageIndex = 0; pageIndex < pages.length; pageIndex++) {
|
|
24954
|
+
const page = pages[pageIndex];
|
|
24955
|
+
const resources = page.getOriginalResources();
|
|
24956
|
+
if (!resources) continue;
|
|
24957
|
+
let xObjDict;
|
|
24958
|
+
const xObjEntry = resources.get("/XObject");
|
|
24959
|
+
if (!xObjEntry) continue;
|
|
24960
|
+
if (xObjEntry.kind === "dict") xObjDict = xObjEntry;
|
|
24961
|
+
else if (xObjEntry.kind === "ref") {
|
|
24962
|
+
const resolved = page.getRegistry().resolve(xObjEntry);
|
|
24963
|
+
if (resolved && resolved.kind === "dict") xObjDict = resolved;
|
|
24964
|
+
}
|
|
24965
|
+
if (!xObjDict) continue;
|
|
24966
|
+
const registry = page.getRegistry();
|
|
24967
|
+
for (const [name, value] of xObjDict) {
|
|
24968
|
+
let ref;
|
|
24969
|
+
let stream;
|
|
24970
|
+
if (value.kind === "ref") {
|
|
24971
|
+
ref = value;
|
|
24972
|
+
if (seenRefs.has(ref.objectNumber)) continue;
|
|
24973
|
+
const resolved = registry.resolve(ref);
|
|
24974
|
+
if (resolved && resolved.kind === "stream") stream = resolved;
|
|
24975
|
+
} else if (value.kind === "stream") stream = value;
|
|
24976
|
+
if (!stream || !ref) continue;
|
|
24977
|
+
const subtype = stream.dict.get("/Subtype");
|
|
24978
|
+
if (!subtype || subtype.kind !== "name") continue;
|
|
24979
|
+
if (subtype.value !== "/Image") continue;
|
|
24980
|
+
const widthObj = stream.dict.get("/Width");
|
|
24981
|
+
const heightObj = stream.dict.get("/Height");
|
|
24982
|
+
const bpcObj = stream.dict.get("/BitsPerComponent");
|
|
24983
|
+
const width = widthObj && widthObj.kind === "number" ? widthObj.value : 0;
|
|
24984
|
+
const height = heightObj && heightObj.kind === "number" ? heightObj.value : 0;
|
|
24985
|
+
const bitsPerComponent = bpcObj && bpcObj.kind === "number" ? bpcObj.value : 8;
|
|
24986
|
+
const colorSpace = resolveColorSpace(stream.dict.get("/ColorSpace"), registry);
|
|
24987
|
+
const channels = channelsFromColorSpace(colorSpace);
|
|
24988
|
+
const { filters } = getStreamFilters(stream.dict);
|
|
24989
|
+
seenRefs.add(ref.objectNumber);
|
|
24990
|
+
images.push({
|
|
24991
|
+
stream,
|
|
24992
|
+
ref,
|
|
24993
|
+
name,
|
|
24994
|
+
pageIndex,
|
|
24995
|
+
width,
|
|
24996
|
+
height,
|
|
24997
|
+
bitsPerComponent,
|
|
24998
|
+
colorSpace,
|
|
24999
|
+
channels,
|
|
25000
|
+
filters,
|
|
25001
|
+
compressedSize: stream.data.length
|
|
25002
|
+
});
|
|
25003
|
+
}
|
|
25004
|
+
}
|
|
25005
|
+
return images;
|
|
25006
|
+
}
|
|
25007
|
+
/**
|
|
25008
|
+
* Decode image stream data into raw pixels.
|
|
25009
|
+
*
|
|
25010
|
+
* For DCTDecode (JPEG) streams, returns the raw JPEG bytes (not decoded
|
|
25011
|
+
* to pixels) since JPEG decoding requires the WASM module.
|
|
25012
|
+
*
|
|
25013
|
+
* For FlateDecode and other filters, fully decodes the stream.
|
|
25014
|
+
*
|
|
25015
|
+
* @param imageInfo - An `ImageInfo` from `extractImages()`.
|
|
25016
|
+
* @returns The decoded stream data.
|
|
25017
|
+
*/
|
|
25018
|
+
function decodeImageStream(imageInfo) {
|
|
25019
|
+
if (imageInfo.filters.length === 0) return imageInfo.stream.data;
|
|
25020
|
+
return decodeStream(imageInfo.stream.data, imageInfo.filters, null);
|
|
25021
|
+
}
|
|
25022
|
+
|
|
25023
|
+
//#endregion
|
|
25024
|
+
//#region src/assets/image/grayscaleDetect.ts
|
|
25025
|
+
/**
|
|
25026
|
+
* @module assets/image/grayscaleDetect
|
|
25027
|
+
*
|
|
25028
|
+
* Grayscale detection and conversion for image optimization.
|
|
25029
|
+
*
|
|
25030
|
+
* Detects RGB images where all pixels are effectively grayscale
|
|
25031
|
+
* (R ≈ G ≈ B) and converts them to single-channel grayscale,
|
|
25032
|
+
* reducing data size by ~66%.
|
|
25033
|
+
*
|
|
25034
|
+
* No Buffer — uses Uint8Array exclusively.
|
|
25035
|
+
*/
|
|
25036
|
+
/**
|
|
25037
|
+
* Check whether an RGB/RGBA image is effectively grayscale.
|
|
25038
|
+
*
|
|
25039
|
+
* Scans all pixels and checks if R, G, and B channels are within
|
|
25040
|
+
* `tolerance` of each other. If ≥99% of pixels pass, the image
|
|
25041
|
+
* is considered grayscale.
|
|
25042
|
+
*
|
|
25043
|
+
* @param pixels - Raw pixel data (row-major, channel-interleaved).
|
|
25044
|
+
* @param width - Image width in pixels.
|
|
25045
|
+
* @param height - Image height in pixels.
|
|
25046
|
+
* @param channels - Number of channels: 3 (RGB) or 4 (RGBA).
|
|
25047
|
+
* @param tolerance - Maximum allowed difference between R, G, and B
|
|
25048
|
+
* values for a pixel to be considered gray.
|
|
25049
|
+
* Default: `2`.
|
|
25050
|
+
* @returns `true` if the image is effectively grayscale.
|
|
25051
|
+
*
|
|
25052
|
+
* @example
|
|
25053
|
+
* ```ts
|
|
25054
|
+
* import { isGrayscaleImage, convertToGrayscale } from 'modern-pdf-lib';
|
|
25055
|
+
*
|
|
25056
|
+
* if (isGrayscaleImage(pixels, width, height, 3)) {
|
|
25057
|
+
* const grayPixels = convertToGrayscale(pixels, width, height, 3);
|
|
25058
|
+
* // grayPixels has 1 byte per pixel instead of 3
|
|
25059
|
+
* }
|
|
25060
|
+
* ```
|
|
25061
|
+
*/
|
|
25062
|
+
function isGrayscaleImage(pixels, width, height, channels, tolerance = 2) {
|
|
25063
|
+
const pixelCount = width * height;
|
|
25064
|
+
const maxNonGray = Math.floor(pixelCount * .01);
|
|
25065
|
+
let nonGrayCount = 0;
|
|
25066
|
+
for (let i = 0; i < pixelCount; i++) {
|
|
25067
|
+
const r = pixels[i * channels];
|
|
25068
|
+
const g = pixels[i * channels + 1];
|
|
25069
|
+
const b = pixels[i * channels + 2];
|
|
25070
|
+
if (Math.max(r, g, b) - Math.min(r, g, b) > tolerance) {
|
|
25071
|
+
nonGrayCount++;
|
|
25072
|
+
if (nonGrayCount > maxNonGray) return false;
|
|
25073
|
+
}
|
|
25074
|
+
}
|
|
25075
|
+
return true;
|
|
25076
|
+
}
|
|
25077
|
+
/**
|
|
25078
|
+
* Convert an RGB/RGBA image to single-channel grayscale.
|
|
25079
|
+
*
|
|
25080
|
+
* Uses the ITU-R BT.601 luma formula:
|
|
25081
|
+
* ```
|
|
25082
|
+
* gray = 0.299 × R + 0.587 × G + 0.114 × B
|
|
25083
|
+
* ```
|
|
25084
|
+
*
|
|
25085
|
+
* The alpha channel (if present) is discarded.
|
|
25086
|
+
*
|
|
25087
|
+
* @param pixels - Raw pixel data (row-major, channel-interleaved).
|
|
25088
|
+
* @param width - Image width in pixels.
|
|
25089
|
+
* @param height - Image height in pixels.
|
|
25090
|
+
* @param channels - Number of channels: 3 (RGB) or 4 (RGBA).
|
|
25091
|
+
* @returns Grayscale pixel data (1 byte per pixel).
|
|
25092
|
+
*/
|
|
25093
|
+
function convertToGrayscale(pixels, width, height, channels) {
|
|
25094
|
+
const pixelCount = width * height;
|
|
25095
|
+
const gray = new Uint8Array(pixelCount);
|
|
25096
|
+
for (let i = 0; i < pixelCount; i++) {
|
|
25097
|
+
const r = pixels[i * channels];
|
|
25098
|
+
const g = pixels[i * channels + 1];
|
|
25099
|
+
const b = pixels[i * channels + 2];
|
|
25100
|
+
gray[i] = Math.round(.299 * r + .587 * g + .114 * b);
|
|
25101
|
+
}
|
|
25102
|
+
return gray;
|
|
25103
|
+
}
|
|
25104
|
+
|
|
25105
|
+
//#endregion
|
|
25106
|
+
//#region src/assets/image/batchOptimize.ts
|
|
25107
|
+
/** Minimum image size to bother optimizing (10 KB). */
|
|
25108
|
+
const SMALL_IMAGE_THRESHOLD = 10240;
|
|
25109
|
+
/**
|
|
25110
|
+
* Optimize all images in a PDF document by recompressing them as JPEG.
|
|
25111
|
+
*
|
|
25112
|
+
* Walks every image XObject in the document, decodes its pixel data,
|
|
25113
|
+
* recompresses it as JPEG using the WASM encoder (if available), and
|
|
25114
|
+
* replaces the stream data in-place when the result is smaller.
|
|
25115
|
+
*
|
|
25116
|
+
* **Requires the JPEG WASM module to be initialized** via
|
|
25117
|
+
* `initJpegWasm()` or `initWasm({ jpeg: true })`. Without it,
|
|
25118
|
+
* no images will be optimized (all will be skipped).
|
|
25119
|
+
*
|
|
25120
|
+
* @param doc - A parsed `PdfDocument` (from `loadPdf()`).
|
|
25121
|
+
* @param options - Optimization settings.
|
|
25122
|
+
* @returns A report summarizing the optimization results.
|
|
25123
|
+
*
|
|
25124
|
+
* @example
|
|
25125
|
+
* ```ts
|
|
25126
|
+
* import { loadPdf, initWasm, optimizeAllImages } from 'modern-pdf-lib';
|
|
25127
|
+
*
|
|
25128
|
+
* await initWasm({ jpeg: true });
|
|
25129
|
+
*
|
|
25130
|
+
* const doc = await loadPdf(pdfBytes);
|
|
25131
|
+
* const report = await optimizeAllImages(doc);
|
|
25132
|
+
*
|
|
25133
|
+
* console.log(`Optimized ${report.optimizedImages} of ${report.totalImages} images`);
|
|
25134
|
+
* console.log(`Savings: ${report.savings.toFixed(1)}%`);
|
|
25135
|
+
*
|
|
25136
|
+
* const optimizedBytes = await doc.save();
|
|
25137
|
+
* ```
|
|
25138
|
+
*/
|
|
25139
|
+
async function optimizeAllImages(doc, options = {}) {
|
|
25140
|
+
const quality = options.quality ?? 80;
|
|
25141
|
+
const minSavingsPercent = options.minSavingsPercent ?? 10;
|
|
25142
|
+
const skipSmall = options.skipSmallImages ?? false;
|
|
25143
|
+
const progressive = options.progressive ?? false;
|
|
25144
|
+
const chromaSubsampling = options.chromaSubsampling ?? "4:2:0";
|
|
25145
|
+
const { encodeJpegWasm, isJpegWasmReady } = await Promise.resolve().then(() => require("./bridge-DUcJFVsk.cjs")).then((n) => n.bridge_exports);
|
|
25146
|
+
const { decodeJpegWasm } = await Promise.resolve().then(() => require("./bridge-DUcJFVsk.cjs")).then((n) => n.bridge_exports);
|
|
25147
|
+
const images = extractImages(doc);
|
|
25148
|
+
const perImage = [];
|
|
25149
|
+
let totalOriginal = 0;
|
|
25150
|
+
let totalNew = 0;
|
|
25151
|
+
let optimizedCount = 0;
|
|
25152
|
+
for (const img of images) {
|
|
25153
|
+
totalOriginal += img.compressedSize;
|
|
25154
|
+
if (!isJpegWasmReady()) {
|
|
25155
|
+
perImage.push({
|
|
25156
|
+
name: img.name,
|
|
25157
|
+
pageIndex: img.pageIndex,
|
|
25158
|
+
originalSize: img.compressedSize,
|
|
25159
|
+
newSize: img.compressedSize,
|
|
25160
|
+
skipped: true,
|
|
25161
|
+
reason: "JPEG WASM encoder not initialized"
|
|
25162
|
+
});
|
|
25163
|
+
totalNew += img.compressedSize;
|
|
25164
|
+
continue;
|
|
25165
|
+
}
|
|
25166
|
+
if (skipSmall && img.compressedSize < SMALL_IMAGE_THRESHOLD) {
|
|
25167
|
+
perImage.push({
|
|
25168
|
+
name: img.name,
|
|
25169
|
+
pageIndex: img.pageIndex,
|
|
25170
|
+
originalSize: img.compressedSize,
|
|
25171
|
+
newSize: img.compressedSize,
|
|
25172
|
+
skipped: true,
|
|
25173
|
+
reason: `Below size threshold (${SMALL_IMAGE_THRESHOLD} bytes)`
|
|
25174
|
+
});
|
|
25175
|
+
totalNew += img.compressedSize;
|
|
25176
|
+
continue;
|
|
25177
|
+
}
|
|
25178
|
+
if (img.bitsPerComponent !== 8) {
|
|
25179
|
+
perImage.push({
|
|
25180
|
+
name: img.name,
|
|
25181
|
+
pageIndex: img.pageIndex,
|
|
25182
|
+
originalSize: img.compressedSize,
|
|
25183
|
+
newSize: img.compressedSize,
|
|
25184
|
+
skipped: true,
|
|
25185
|
+
reason: `Unsupported bits per component: ${img.bitsPerComponent}`
|
|
25186
|
+
});
|
|
25187
|
+
totalNew += img.compressedSize;
|
|
25188
|
+
continue;
|
|
25189
|
+
}
|
|
25190
|
+
if (img.colorSpace === "Indexed") {
|
|
25191
|
+
perImage.push({
|
|
25192
|
+
name: img.name,
|
|
25193
|
+
pageIndex: img.pageIndex,
|
|
25194
|
+
originalSize: img.compressedSize,
|
|
25195
|
+
newSize: img.compressedSize,
|
|
25196
|
+
skipped: true,
|
|
25197
|
+
reason: "Indexed color space not suitable for JPEG"
|
|
25198
|
+
});
|
|
25199
|
+
totalNew += img.compressedSize;
|
|
25200
|
+
continue;
|
|
25201
|
+
}
|
|
25202
|
+
let pixels;
|
|
25203
|
+
let channels = img.channels;
|
|
25204
|
+
try {
|
|
25205
|
+
if (img.filters[0] === "DCTDecode") {
|
|
25206
|
+
const decoded = decodeJpegWasm(img.stream.data);
|
|
25207
|
+
if (!decoded) {
|
|
25208
|
+
perImage.push({
|
|
25209
|
+
name: img.name,
|
|
25210
|
+
pageIndex: img.pageIndex,
|
|
25211
|
+
originalSize: img.compressedSize,
|
|
25212
|
+
newSize: img.compressedSize,
|
|
25213
|
+
skipped: true,
|
|
25214
|
+
reason: "Failed to decode existing JPEG"
|
|
25215
|
+
});
|
|
25216
|
+
totalNew += img.compressedSize;
|
|
25217
|
+
continue;
|
|
25218
|
+
}
|
|
25219
|
+
pixels = decoded.pixels;
|
|
25220
|
+
channels = decoded.channels;
|
|
25221
|
+
} else pixels = decodeImageStream(img);
|
|
25222
|
+
} catch {
|
|
25223
|
+
perImage.push({
|
|
25224
|
+
name: img.name,
|
|
25225
|
+
pageIndex: img.pageIndex,
|
|
25226
|
+
originalSize: img.compressedSize,
|
|
25227
|
+
newSize: img.compressedSize,
|
|
25228
|
+
skipped: true,
|
|
25229
|
+
reason: "Failed to decode image stream"
|
|
25230
|
+
});
|
|
25231
|
+
totalNew += img.compressedSize;
|
|
25232
|
+
continue;
|
|
25233
|
+
}
|
|
25234
|
+
const expectedLen = img.width * img.height * channels;
|
|
25235
|
+
if (pixels.length !== expectedLen) {
|
|
25236
|
+
perImage.push({
|
|
25237
|
+
name: img.name,
|
|
25238
|
+
pageIndex: img.pageIndex,
|
|
25239
|
+
originalSize: img.compressedSize,
|
|
25240
|
+
newSize: img.compressedSize,
|
|
25241
|
+
skipped: true,
|
|
25242
|
+
reason: `Pixel data length mismatch: got ${pixels.length}, expected ${expectedLen}`
|
|
25243
|
+
});
|
|
25244
|
+
totalNew += img.compressedSize;
|
|
25245
|
+
continue;
|
|
25246
|
+
}
|
|
25247
|
+
if (channels === 4 && img.colorSpace === "DeviceCMYK") {
|
|
25248
|
+
const rgb = new Uint8Array(img.width * img.height * 3);
|
|
25249
|
+
for (let i = 0; i < img.width * img.height; i++) {
|
|
25250
|
+
const c = pixels[i * 4] / 255;
|
|
25251
|
+
const m = pixels[i * 4 + 1] / 255;
|
|
25252
|
+
const y = pixels[i * 4 + 2] / 255;
|
|
25253
|
+
const k = pixels[i * 4 + 3] / 255;
|
|
25254
|
+
rgb[i * 3] = Math.round(255 * (1 - c) * (1 - k));
|
|
25255
|
+
rgb[i * 3 + 1] = Math.round(255 * (1 - m) * (1 - k));
|
|
25256
|
+
rgb[i * 3 + 2] = Math.round(255 * (1 - y) * (1 - k));
|
|
25257
|
+
}
|
|
25258
|
+
pixels = rgb;
|
|
25259
|
+
channels = 3;
|
|
25260
|
+
}
|
|
25261
|
+
if (options.autoGrayscale && (channels === 3 || channels === 4)) {
|
|
25262
|
+
if (isGrayscaleImage(pixels, img.width, img.height, channels)) {
|
|
25263
|
+
pixels = convertToGrayscale(pixels, img.width, img.height, channels);
|
|
25264
|
+
channels = 1;
|
|
25265
|
+
}
|
|
25266
|
+
}
|
|
25267
|
+
const jpegBytes = encodeJpegWasm(pixels, img.width, img.height, channels, quality, progressive, chromaSubsampling);
|
|
25268
|
+
if (!jpegBytes) {
|
|
25269
|
+
perImage.push({
|
|
25270
|
+
name: img.name,
|
|
25271
|
+
pageIndex: img.pageIndex,
|
|
25272
|
+
originalSize: img.compressedSize,
|
|
25273
|
+
newSize: img.compressedSize,
|
|
25274
|
+
skipped: true,
|
|
25275
|
+
reason: "JPEG encoding failed"
|
|
25276
|
+
});
|
|
25277
|
+
totalNew += img.compressedSize;
|
|
25278
|
+
continue;
|
|
25279
|
+
}
|
|
25280
|
+
const savingsPercent = (img.compressedSize - jpegBytes.length) / img.compressedSize * 100;
|
|
25281
|
+
if (savingsPercent < minSavingsPercent) {
|
|
25282
|
+
perImage.push({
|
|
25283
|
+
name: img.name,
|
|
25284
|
+
pageIndex: img.pageIndex,
|
|
25285
|
+
originalSize: img.compressedSize,
|
|
25286
|
+
newSize: img.compressedSize,
|
|
25287
|
+
skipped: true,
|
|
25288
|
+
reason: `Savings ${savingsPercent.toFixed(1)}% below threshold ${minSavingsPercent}%`
|
|
25289
|
+
});
|
|
25290
|
+
totalNew += img.compressedSize;
|
|
25291
|
+
continue;
|
|
25292
|
+
}
|
|
25293
|
+
img.stream.data = jpegBytes;
|
|
25294
|
+
img.stream.syncLength();
|
|
25295
|
+
const dict = img.stream.dict;
|
|
25296
|
+
dict.set("/Filter", require_pdfCatalog.PdfName.of("/DCTDecode"));
|
|
25297
|
+
if (img.colorSpace === "DeviceCMYK" && channels === 3) dict.set("/ColorSpace", require_pdfCatalog.PdfName.of("/DeviceRGB"));
|
|
25298
|
+
if (channels === 1) dict.set("/ColorSpace", require_pdfCatalog.PdfName.of("/DeviceGray"));
|
|
25299
|
+
dict.delete("/DecodeParms");
|
|
25300
|
+
if (img.colorSpace === "DeviceCMYK") dict.delete("/Decode");
|
|
25301
|
+
optimizedCount++;
|
|
25302
|
+
perImage.push({
|
|
25303
|
+
name: img.name,
|
|
25304
|
+
pageIndex: img.pageIndex,
|
|
25305
|
+
originalSize: img.compressedSize,
|
|
25306
|
+
newSize: jpegBytes.length,
|
|
25307
|
+
skipped: false
|
|
25308
|
+
});
|
|
25309
|
+
totalNew += jpegBytes.length;
|
|
25310
|
+
}
|
|
25311
|
+
const overallSavings = totalOriginal > 0 ? (totalOriginal - totalNew) / totalOriginal * 100 : 0;
|
|
25312
|
+
return {
|
|
25313
|
+
totalImages: images.length,
|
|
25314
|
+
optimizedImages: optimizedCount,
|
|
25315
|
+
originalTotalBytes: totalOriginal,
|
|
25316
|
+
optimizedTotalBytes: totalNew,
|
|
25317
|
+
savings: overallSavings,
|
|
25318
|
+
perImage
|
|
25319
|
+
};
|
|
25320
|
+
}
|
|
25321
|
+
|
|
25322
|
+
//#endregion
|
|
25323
|
+
//#region src/assets/image/deduplicateImages.ts
|
|
25324
|
+
/**
|
|
25325
|
+
* Compute a fast FNV-1a hash of a byte array.
|
|
25326
|
+
*
|
|
25327
|
+
* This is used instead of SHA-256 because:
|
|
25328
|
+
* 1. It's synchronous (no need for crypto.subtle)
|
|
25329
|
+
* 2. It's fast for large buffers
|
|
25330
|
+
* 3. We only need collision resistance within a single document
|
|
25331
|
+
*
|
|
25332
|
+
* Returns a 64-char hex string (two 32-bit hashes concatenated).
|
|
25333
|
+
* @internal
|
|
25334
|
+
*/
|
|
25335
|
+
function hashBytes(data) {
|
|
25336
|
+
let h1 = 2166136261;
|
|
25337
|
+
for (let i = 0; i < data.length; i++) {
|
|
25338
|
+
h1 ^= data[i];
|
|
25339
|
+
h1 = Math.imul(h1, 16777619);
|
|
25340
|
+
}
|
|
25341
|
+
let h2 = 16777619;
|
|
25342
|
+
for (let i = data.length - 1; i >= 0; i--) {
|
|
25343
|
+
h2 ^= data[i];
|
|
25344
|
+
h2 = Math.imul(h2, 2166136261);
|
|
25345
|
+
}
|
|
25346
|
+
const h3 = data.length * 2654435769 | 0;
|
|
25347
|
+
return (h1 >>> 0).toString(16).padStart(8, "0") + (h2 >>> 0).toString(16).padStart(8, "0") + (h3 >>> 0).toString(16).padStart(8, "0");
|
|
25348
|
+
}
|
|
25349
|
+
/**
|
|
25350
|
+
* Deduplicate identical images in a PDF document.
|
|
25351
|
+
*
|
|
25352
|
+
* Scans all image XObjects, hashes their compressed stream data (plus
|
|
25353
|
+
* dimensions and filter), and replaces duplicate references in page
|
|
25354
|
+
* resource dictionaries with the canonical (first-seen) copy.
|
|
25355
|
+
*
|
|
25356
|
+
* This operation modifies the document in-place. Duplicate streams
|
|
25357
|
+
* are not removed from the object registry (they become unreferenced
|
|
25358
|
+
* and will be omitted on save if the writer supports garbage collection).
|
|
25359
|
+
*
|
|
25360
|
+
* @param doc - A parsed `PdfDocument` (from `loadPdf()`).
|
|
25361
|
+
* @returns A report summarizing deduplication results.
|
|
25362
|
+
*
|
|
25363
|
+
* @example
|
|
25364
|
+
* ```ts
|
|
25365
|
+
* import { loadPdf, deduplicateImages } from 'modern-pdf-lib';
|
|
25366
|
+
*
|
|
25367
|
+
* const doc = await loadPdf(pdfBytes);
|
|
25368
|
+
* const report = await deduplicateImages(doc);
|
|
25369
|
+
*
|
|
25370
|
+
* console.log(`Removed ${report.duplicatesRemoved} duplicate images`);
|
|
25371
|
+
* console.log(`Saved ~${(report.bytesSaved / 1024).toFixed(0)} KB`);
|
|
25372
|
+
*
|
|
25373
|
+
* const optimizedBytes = await doc.save();
|
|
25374
|
+
* ```
|
|
25375
|
+
*/
|
|
25376
|
+
function deduplicateImages(doc) {
|
|
25377
|
+
const images = extractImages(doc);
|
|
25378
|
+
const hashToCanonical = /* @__PURE__ */ new Map();
|
|
25379
|
+
const duplicates = [];
|
|
25380
|
+
for (const img of images) {
|
|
25381
|
+
const key = `${img.width}x${img.height}:${img.filters.join(",")}:` + hashBytes(img.stream.data);
|
|
25382
|
+
const existing = hashToCanonical.get(key);
|
|
25383
|
+
if (existing) duplicates.push({
|
|
25384
|
+
image: img,
|
|
25385
|
+
canonicalRef: existing.ref
|
|
25386
|
+
});
|
|
25387
|
+
else hashToCanonical.set(key, {
|
|
25388
|
+
ref: img.ref,
|
|
25389
|
+
size: img.compressedSize
|
|
25390
|
+
});
|
|
25391
|
+
}
|
|
25392
|
+
let bytesSaved = 0;
|
|
25393
|
+
for (const { image, canonicalRef } of duplicates) {
|
|
25394
|
+
const page = doc.getPages()[image.pageIndex];
|
|
25395
|
+
if (!page) continue;
|
|
25396
|
+
const resources = page.getOriginalResources();
|
|
25397
|
+
if (!resources) continue;
|
|
25398
|
+
const xObjEntry = resources.get("/XObject");
|
|
25399
|
+
if (!xObjEntry) continue;
|
|
25400
|
+
let xObjDict;
|
|
25401
|
+
if (xObjEntry.kind === "dict") xObjDict = xObjEntry;
|
|
25402
|
+
else if (xObjEntry.kind === "ref") {
|
|
25403
|
+
const resolved = page.getRegistry().resolve(xObjEntry);
|
|
25404
|
+
if (resolved && resolved.kind === "dict") xObjDict = resolved;
|
|
25405
|
+
}
|
|
25406
|
+
if (!xObjDict) continue;
|
|
25407
|
+
xObjDict.set(image.name, canonicalRef);
|
|
25408
|
+
bytesSaved += image.compressedSize;
|
|
25409
|
+
}
|
|
25410
|
+
return {
|
|
25411
|
+
totalImages: images.length,
|
|
25412
|
+
uniqueImages: hashToCanonical.size,
|
|
25413
|
+
duplicatesRemoved: duplicates.length,
|
|
25414
|
+
bytesSaved
|
|
25415
|
+
};
|
|
25416
|
+
}
|
|
25417
|
+
|
|
25418
|
+
//#endregion
|
|
25419
|
+
//#region src/assets/image/dpiAnalyze.ts
|
|
25420
|
+
/**
|
|
25421
|
+
* Compute the effective DPI of an image given its pixel dimensions
|
|
25422
|
+
* and display dimensions in points.
|
|
25423
|
+
*
|
|
25424
|
+
* PDF uses 72 points per inch, so:
|
|
25425
|
+
* ```
|
|
25426
|
+
* DPI = imagePixels / (displayPoints / 72)
|
|
25427
|
+
* ```
|
|
25428
|
+
*
|
|
25429
|
+
* @param imageWidth - Image width in pixels.
|
|
25430
|
+
* @param imageHeight - Image height in pixels.
|
|
25431
|
+
* @param displayWidth - Display width in PDF points (1/72 inch).
|
|
25432
|
+
* @param displayHeight - Display height in PDF points (1/72 inch).
|
|
25433
|
+
* @returns DPI information.
|
|
25434
|
+
*
|
|
25435
|
+
* @example
|
|
25436
|
+
* ```ts
|
|
25437
|
+
* import { computeImageDpi } from 'modern-pdf-lib';
|
|
25438
|
+
*
|
|
25439
|
+
* // A 3000×2000 image displayed at 4.17×2.78 inches (300×200 points)
|
|
25440
|
+
* const dpi = computeImageDpi(3000, 2000, 300, 200);
|
|
25441
|
+
* console.log(dpi.effectiveDpi); // 720
|
|
25442
|
+
* ```
|
|
25443
|
+
*/
|
|
25444
|
+
function computeImageDpi(imageWidth, imageHeight, displayWidth, displayHeight) {
|
|
25445
|
+
const xDpi = displayWidth > 0 ? imageWidth / displayWidth * 72 : Infinity;
|
|
25446
|
+
const yDpi = displayHeight > 0 ? imageHeight / displayHeight * 72 : Infinity;
|
|
25447
|
+
return {
|
|
25448
|
+
xDpi,
|
|
25449
|
+
yDpi,
|
|
25450
|
+
effectiveDpi: Math.min(xDpi, yDpi)
|
|
25451
|
+
};
|
|
25452
|
+
}
|
|
25453
|
+
/**
|
|
25454
|
+
* Compute the target pixel dimensions for downscaling an image
|
|
25455
|
+
* to a maximum DPI at a given display size.
|
|
25456
|
+
*
|
|
25457
|
+
* @param imageWidth - Current image width in pixels.
|
|
25458
|
+
* @param imageHeight - Current image height in pixels.
|
|
25459
|
+
* @param displayWidth - Display width in PDF points.
|
|
25460
|
+
* @param displayHeight - Display height in PDF points.
|
|
25461
|
+
* @param maxDpi - Maximum allowed DPI.
|
|
25462
|
+
* @returns Target dimensions, or the original dimensions if no
|
|
25463
|
+
* downscaling is needed.
|
|
25464
|
+
*/
|
|
25465
|
+
function computeTargetDimensions(imageWidth, imageHeight, displayWidth, displayHeight, maxDpi) {
|
|
25466
|
+
const dpi = computeImageDpi(imageWidth, imageHeight, displayWidth, displayHeight);
|
|
25467
|
+
if (dpi.effectiveDpi <= maxDpi || !isFinite(dpi.effectiveDpi)) return {
|
|
25468
|
+
width: imageWidth,
|
|
25469
|
+
height: imageHeight,
|
|
25470
|
+
downscaled: false
|
|
25471
|
+
};
|
|
25472
|
+
const scale = maxDpi / dpi.effectiveDpi;
|
|
25473
|
+
return {
|
|
25474
|
+
width: Math.max(1, Math.round(imageWidth * scale)),
|
|
25475
|
+
height: Math.max(1, Math.round(imageHeight * scale)),
|
|
25476
|
+
downscaled: true
|
|
25477
|
+
};
|
|
25478
|
+
}
|
|
24680
25479
|
|
|
24681
25480
|
//#endregion
|
|
24682
25481
|
//#region src/errors.ts
|
|
@@ -24846,15 +25645,18 @@ async function initWasm(options) {
|
|
|
24846
25645
|
if (options === void 0 || typeof options === "string" || options instanceof URL) return;
|
|
24847
25646
|
if (wasmInitialized) return;
|
|
24848
25647
|
const inits = [];
|
|
24849
|
-
if (options.deflate || options.deflateWasm) inits.push(Promise.resolve().then(() => require("./libdeflateWasm-
|
|
25648
|
+
if (options.deflate || options.deflateWasm) inits.push(Promise.resolve().then(() => require("./libdeflateWasm-Enus0G1k.cjs")).then((n) => n.libdeflateWasm_exports).then(async ({ initDeflateWasm }) => {
|
|
24850
25649
|
await initDeflateWasm(options.deflateWasm);
|
|
24851
25650
|
}));
|
|
24852
|
-
if (options.png || options.pngWasm) inits.push(Promise.resolve().then(() => require("./pngEmbed-
|
|
25651
|
+
if (options.png || options.pngWasm) inits.push(Promise.resolve().then(() => require("./pngEmbed-10m4CfBU.cjs")).then((n) => n.pngEmbed_exports).then(async ({ initPngWasm }) => {
|
|
24853
25652
|
await initPngWasm(options.pngWasm);
|
|
24854
25653
|
}));
|
|
24855
25654
|
if (options.fonts || options.fontWasm) inits.push(Promise.resolve().then(() => require("./fontSubset-pFc8Dueu.cjs")).then((n) => n.fontSubset_exports).then(async ({ initSubsetWasm }) => {
|
|
24856
25655
|
await initSubsetWasm(options.fontWasm);
|
|
24857
25656
|
}));
|
|
25657
|
+
if (options.jpeg || options.jpegWasm) inits.push(Promise.resolve().then(() => require("./bridge-DUcJFVsk.cjs")).then((n) => n.bridge_exports).then(async ({ initJpegWasm }) => {
|
|
25658
|
+
await initJpegWasm(options.jpegWasm);
|
|
25659
|
+
}));
|
|
24858
25660
|
await Promise.all(inits);
|
|
24859
25661
|
wasmInitialized = true;
|
|
24860
25662
|
}
|
|
@@ -24984,9 +25786,12 @@ exports.colorToComponents = require_pdfPage.colorToComponents;
|
|
|
24984
25786
|
exports.componentsToColor = require_pdfPage.componentsToColor;
|
|
24985
25787
|
exports.computeFileEncryptionKey = computeFileEncryptionKey;
|
|
24986
25788
|
exports.computeFontSize = computeFontSize;
|
|
25789
|
+
exports.computeImageDpi = computeImageDpi;
|
|
24987
25790
|
exports.computeSignatureHash = computeSignatureHash;
|
|
25791
|
+
exports.computeTargetDimensions = computeTargetDimensions;
|
|
24988
25792
|
exports.concatMatrix = require_pdfPage.concatMatrix;
|
|
24989
25793
|
exports.concatTransformationMatrix = require_pdfPage.concatMatrix;
|
|
25794
|
+
exports.convertToGrayscale = convertToGrayscale;
|
|
24990
25795
|
exports.copyPages = copyPages;
|
|
24991
25796
|
exports.createAnnotation = require_pdfPage.createAnnotation;
|
|
24992
25797
|
exports.createMarkedContentScope = require_pdfPage.createMarkedContentScope;
|
|
@@ -24996,8 +25801,11 @@ exports.cropPage = cropPage;
|
|
|
24996
25801
|
exports.curveToFinal = require_pdfPage.curveToFinal;
|
|
24997
25802
|
exports.curveToInitial = require_pdfPage.curveToInitial;
|
|
24998
25803
|
exports.curveToOp = require_pdfPage.curveTo;
|
|
25804
|
+
exports.decodeImageStream = decodeImageStream;
|
|
25805
|
+
exports.decodeJpegWasm = require_bridge.decodeJpegWasm;
|
|
24999
25806
|
exports.decodePermissions = decodePermissions;
|
|
25000
25807
|
exports.decodeStream = decodeStream;
|
|
25808
|
+
exports.deduplicateImages = deduplicateImages;
|
|
25001
25809
|
exports.degrees = require_pdfPage.degrees;
|
|
25002
25810
|
exports.degreesToRadians = require_pdfPage.degreesToRadians;
|
|
25003
25811
|
exports.downscaleImage = downscaleImage;
|
|
@@ -25011,6 +25819,7 @@ exports.embedPageAsFormXObject = embedPageAsFormXObject;
|
|
|
25011
25819
|
exports.embedSignature = embedSignature;
|
|
25012
25820
|
exports.encodeContextTag = encodeContextTag;
|
|
25013
25821
|
exports.encodeInteger = encodeInteger;
|
|
25822
|
+
exports.encodeJpegWasm = require_bridge.encodeJpegWasm;
|
|
25014
25823
|
exports.encodeLength = encodeLength;
|
|
25015
25824
|
exports.encodeOID = encodeOID;
|
|
25016
25825
|
exports.encodeOctetString = encodeOctetString;
|
|
@@ -25026,6 +25835,8 @@ exports.endMarkedContent = require_pdfPage.endMarkedContent;
|
|
|
25026
25835
|
exports.endPathOp = require_pdfPage.endPath;
|
|
25027
25836
|
exports.endText = require_pdfPage.endText;
|
|
25028
25837
|
exports.enforcePdfA = enforcePdfA;
|
|
25838
|
+
exports.estimateJpegQuality = estimateJpegQuality;
|
|
25839
|
+
exports.extractImages = extractImages;
|
|
25029
25840
|
exports.extractMetrics = extractMetrics;
|
|
25030
25841
|
exports.extractText = extractText;
|
|
25031
25842
|
exports.extractTextWithPositions = extractTextWithPositions;
|
|
@@ -25057,9 +25868,12 @@ exports.getPageSize = getPageSize;
|
|
|
25057
25868
|
exports.getRedactionMarks = require_pdfPage.getRedactionMarks;
|
|
25058
25869
|
exports.getSignatures = getSignatures;
|
|
25059
25870
|
exports.grayscale = require_pdfPage.grayscale;
|
|
25871
|
+
exports.initJpegWasm = require_bridge.initJpegWasm;
|
|
25060
25872
|
exports.initWasm = initWasm;
|
|
25061
25873
|
exports.insertPage = insertPage;
|
|
25062
25874
|
exports.isAccessible = isAccessible;
|
|
25875
|
+
exports.isGrayscaleImage = isGrayscaleImage;
|
|
25876
|
+
exports.isJpegWasmReady = require_bridge.isJpegWasmReady;
|
|
25063
25877
|
exports.isLinearized = isLinearized;
|
|
25064
25878
|
exports.isOpenTypeCFF = isOpenTypeCFF;
|
|
25065
25879
|
exports.isTrueType = isTrueType;
|
|
@@ -25078,6 +25892,7 @@ exports.moveTextOp = require_pdfPage.moveText;
|
|
|
25078
25892
|
exports.moveTextSetLeading = require_pdfPage.moveTextSetLeading;
|
|
25079
25893
|
exports.moveToOp = require_pdfPage.moveTo;
|
|
25080
25894
|
exports.nextLineOp = require_pdfPage.nextLine;
|
|
25895
|
+
exports.optimizeAllImages = optimizeAllImages;
|
|
25081
25896
|
exports.optimizeImage = optimizeImage;
|
|
25082
25897
|
exports.parseContentStream = parseContentStream;
|
|
25083
25898
|
exports.parseSvg = require_pdfPage.parseSvg;
|