modern-pdf-lib 0.15.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/dist/batchOptimize-ClXizv19.mjs +306 -0
  2. package/dist/batchOptimize-DYQOX1-7.cjs +329 -0
  3. package/dist/{bridge-DUcJFVsk.cjs → bridge-DN7BOHRW.cjs} +2 -2
  4. package/dist/{bridge-C7U4E7St.mjs → bridge-DpzMOnHd.mjs} +2 -2
  5. package/dist/cli/index.cjs +225 -0
  6. package/dist/cli/index.d.cts +1 -0
  7. package/dist/cli/index.d.mts +1 -0
  8. package/dist/cli/index.mjs +226 -0
  9. package/dist/deduplicateImages-BfpjHY9b.mjs +102 -0
  10. package/dist/deduplicateImages-BtJ5tlrr.cjs +113 -0
  11. package/dist/{fflateAdapter-DX0VqT5k.mjs → fflateAdapter-D2mv_ttM.mjs} +1 -1
  12. package/dist/{fflateAdapter-AHC_S3cb.cjs → fflateAdapter-cT4YeY_h.cjs} +1 -1
  13. package/dist/{fontSubset-pFc8Dueu.cjs → fontSubset-BxsF9Tu5.cjs} +1 -1
  14. package/dist/{fontSubset-ZpLoOZ2e.mjs → fontSubset-ClyTXlhY.mjs} +1 -1
  15. package/dist/imageExtract-BC7TMY98.cjs +4770 -0
  16. package/dist/imageExtract-vjyQyFcT.mjs +4747 -0
  17. package/dist/index.cjs +621 -5754
  18. package/dist/index.mjs +89 -5222
  19. package/dist/{libdeflateWasm-Enus0G1k.cjs → libdeflateWasm-Cg7cWHOq.cjs} +2 -2
  20. package/dist/{libdeflateWasm-82loOtIV.mjs → libdeflateWasm-Cmxa-yiS.mjs} +2 -2
  21. package/dist/{loader-1VJXLlMZ.mjs → loader-B6VIrZOJ.mjs} +1 -1
  22. package/dist/{loader-CKlBOHma.cjs → loader-DdB5Xo5D.cjs} +1 -1
  23. package/dist/pdfCatalog-BcOL6QF-.cjs +173 -0
  24. package/dist/pdfCatalog-CnJRovvm.mjs +138 -0
  25. package/dist/{pdfCatalog-COKoYQ8C.cjs → pdfObjects-BrU4Xd0V.cjs} +1 -171
  26. package/dist/{pdfCatalog-BB2Wnmud.mjs → pdfObjects-DZZ2GPRW.mjs} +2 -137
  27. package/dist/{pdfPage-N1K2U3jI.mjs → pdfPage-Dm5XC_g_.mjs} +3 -2
  28. package/dist/{pdfPage-DBfdinTR.cjs → pdfPage-Dz_SVKUS.cjs} +105 -104
  29. package/dist/{pngEmbed-10m4CfBU.cjs → pngEmbed-C6M1eX6b.cjs} +2 -2
  30. package/dist/{pngEmbed-gaJ9S2Dk.mjs → pngEmbed-I1hU3Y6m.mjs} +2 -2
  31. package/package.json +1 -1
@@ -0,0 +1,225 @@
1
+ #!/usr/bin/env node
2
+ let node_fs_promises = require("node:fs/promises");
3
+
4
+ //#region src/cli/optimize.ts
5
+ /**
6
+ * @module cli/optimize
7
+ *
8
+ * CLI command: `modern-pdf optimize input.pdf output.pdf [options]`
9
+ *
10
+ * Optimizes images in a PDF file by recompressing them as JPEG,
11
+ * with optional deduplication and grayscale detection.
12
+ */
13
+ /**
14
+ * Parse and execute the optimize command.
15
+ */
16
+ async function optimizeCommand(args) {
17
+ const parsed = parseArgs(args);
18
+ if (parsed.help) {
19
+ printOptimizeHelp();
20
+ return;
21
+ }
22
+ if (!parsed.input || !parsed.output) {
23
+ console.error("Error: input and output file paths are required.");
24
+ console.error("Usage: modern-pdf optimize <input.pdf> <output.pdf> [options]");
25
+ process.exit(1);
26
+ }
27
+ const { loadPdf } = await Promise.resolve().then(() => require("../index.cjs"));
28
+ const { initJpegWasm } = await Promise.resolve().then(() => require("../bridge-DN7BOHRW.cjs")).then((n) => n.bridge_exports);
29
+ const { optimizeAllImages } = await Promise.resolve().then(() => require("../batchOptimize-DYQOX1-7.cjs")).then((n) => n.batchOptimize_exports);
30
+ const { deduplicateImages } = await Promise.resolve().then(() => require("../deduplicateImages-BtJ5tlrr.cjs")).then((n) => n.deduplicateImages_exports);
31
+ const inputBytes = new Uint8Array(await (0, node_fs_promises.readFile)(parsed.input));
32
+ const inputSize = inputBytes.length;
33
+ if (parsed.verbose) {
34
+ console.log(`Input: ${parsed.input} (${formatBytes(inputSize)})`);
35
+ console.log(`Output: ${parsed.output}`);
36
+ console.log("");
37
+ }
38
+ try {
39
+ await initJpegWasm();
40
+ } catch {
41
+ console.warn("Warning: JPEG WASM module not available. Images will not be recompressed.");
42
+ }
43
+ const doc = await loadPdf(inputBytes);
44
+ if (parsed.dedup) {
45
+ const dedupReport = deduplicateImages(doc);
46
+ if (parsed.verbose && dedupReport.duplicatesRemoved > 0) console.log(`Deduplication: removed ${dedupReport.duplicatesRemoved} duplicate(s), ~${formatBytes(dedupReport.bytesSaved)} saved`);
47
+ }
48
+ const report = await optimizeAllImages(doc, {
49
+ quality: parsed.quality,
50
+ progressive: parsed.progressive,
51
+ chromaSubsampling: parsed.chroma,
52
+ autoGrayscale: parsed.grayscale,
53
+ skipSmallImages: true,
54
+ minSavingsPercent: 10
55
+ });
56
+ if (parsed.verbose) {
57
+ console.log("");
58
+ console.log(`Images found: ${report.totalImages}`);
59
+ console.log(`Images optimized: ${report.optimizedImages}`);
60
+ console.log(`Original size: ${formatBytes(report.originalTotalBytes)}`);
61
+ console.log(`Optimized size: ${formatBytes(report.optimizedTotalBytes)}`);
62
+ console.log(`Savings: ${report.savings.toFixed(1)}%`);
63
+ if (report.perImage.length > 0) {
64
+ console.log("");
65
+ console.log("Per-image details:");
66
+ for (const entry of report.perImage) if (entry.skipped) console.log(` ${entry.name} (p${entry.pageIndex}): SKIP — ${entry.reason}`);
67
+ else {
68
+ const pct = ((entry.originalSize - entry.newSize) / entry.originalSize * 100).toFixed(1);
69
+ console.log(` ${entry.name} (p${entry.pageIndex}): ${formatBytes(entry.originalSize)} → ${formatBytes(entry.newSize)} (−${pct}%)`);
70
+ }
71
+ }
72
+ }
73
+ const outputBytes = await doc.save();
74
+ await (0, node_fs_promises.writeFile)(parsed.output, outputBytes);
75
+ const outputSize = outputBytes.length;
76
+ const totalSavings = inputSize > 0 ? ((inputSize - outputSize) / inputSize * 100).toFixed(1) : "0.0";
77
+ console.log(`\n${formatBytes(inputSize)} → ${formatBytes(outputSize)} (−${totalSavings}%)`);
78
+ }
79
+ function parseArgs(args) {
80
+ const result = {
81
+ quality: 80,
82
+ progressive: false,
83
+ grayscale: false,
84
+ dedup: false,
85
+ chroma: "4:2:0",
86
+ verbose: false,
87
+ help: false
88
+ };
89
+ const positional = [];
90
+ for (let i = 0; i < args.length; i++) {
91
+ const arg = args[i];
92
+ switch (arg) {
93
+ case "--help":
94
+ case "-h":
95
+ result.help = true;
96
+ return result;
97
+ case "--quality":
98
+ case "-q":
99
+ result.quality = parseInt(args[++i] ?? "80", 10);
100
+ if (result.quality < 1 || result.quality > 100) {
101
+ console.error("Error: --quality must be between 1 and 100.");
102
+ process.exit(1);
103
+ }
104
+ break;
105
+ case "--max-dpi":
106
+ i++;
107
+ break;
108
+ case "--progressive":
109
+ result.progressive = true;
110
+ break;
111
+ case "--grayscale":
112
+ result.grayscale = true;
113
+ break;
114
+ case "--dedup":
115
+ result.dedup = true;
116
+ break;
117
+ case "--chroma": {
118
+ const val = args[++i] ?? "4:2:0";
119
+ if (val !== "4:4:4" && val !== "4:2:2" && val !== "4:2:0") {
120
+ console.error("Error: --chroma must be 4:4:4, 4:2:2, or 4:2:0.");
121
+ process.exit(1);
122
+ }
123
+ result.chroma = val;
124
+ break;
125
+ }
126
+ case "--verbose":
127
+ case "-v":
128
+ result.verbose = true;
129
+ break;
130
+ default:
131
+ if (arg.startsWith("-")) {
132
+ console.error(`Unknown option: ${arg}`);
133
+ process.exit(1);
134
+ }
135
+ positional.push(arg);
136
+ break;
137
+ }
138
+ }
139
+ if (positional[0] !== void 0) result.input = positional[0];
140
+ if (positional[1] !== void 0) result.output = positional[1];
141
+ return result;
142
+ }
143
+ function formatBytes(bytes) {
144
+ if (bytes < 1024) return `${bytes} B`;
145
+ if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
146
+ return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
147
+ }
148
+ function printOptimizeHelp() {
149
+ console.log(`
150
+ modern-pdf optimize — Optimize images in a PDF file
151
+
152
+ Usage:
153
+ modern-pdf optimize <input.pdf> <output.pdf> [options]
154
+
155
+ Options:
156
+ --quality <n>, -q <n> JPEG quality 1-100 (default: 80)
157
+ --progressive Use progressive JPEG encoding
158
+ --grayscale Auto-detect and convert grayscale images
159
+ --dedup Deduplicate identical images
160
+ --chroma <mode> Chroma subsampling: 4:4:4, 4:2:2, 4:2:0 (default: 4:2:0)
161
+ --verbose, -v Print per-image optimization details
162
+ --help, -h Show this help
163
+
164
+ Examples:
165
+ modern-pdf optimize report.pdf report-opt.pdf
166
+ modern-pdf optimize scan.pdf scan-opt.pdf --quality 60 --grayscale --dedup -v
167
+ `.trim());
168
+ }
169
+
170
+ //#endregion
171
+ //#region src/cli/index.ts
172
+ /**
173
+ * @module cli/index
174
+ *
175
+ * CLI entry point for modern-pdf-lib.
176
+ *
177
+ * Usage:
178
+ * npx modern-pdf optimize input.pdf output.pdf [options]
179
+ * npx modern-pdf --help
180
+ */
181
+ async function main() {
182
+ const args = process.argv.slice(2);
183
+ const command = args[0];
184
+ if (!command || command === "--help" || command === "-h") {
185
+ printHelp();
186
+ process.exit(0);
187
+ }
188
+ if (command === "--version" || command === "-v") {
189
+ console.log("modern-pdf-lib CLI");
190
+ process.exit(0);
191
+ }
192
+ switch (command) {
193
+ case "optimize":
194
+ await optimizeCommand(args.slice(1));
195
+ break;
196
+ default:
197
+ console.error(`Unknown command: ${command}`);
198
+ console.error("Run \"modern-pdf --help\" for usage information.");
199
+ process.exit(1);
200
+ }
201
+ }
202
+ main().catch((err) => {
203
+ console.error(err);
204
+ process.exit(1);
205
+ });
206
+ function printHelp() {
207
+ console.log(`
208
+ modern-pdf-lib CLI
209
+
210
+ Usage:
211
+ modern-pdf <command> [options]
212
+
213
+ Commands:
214
+ optimize Optimize images in a PDF file
215
+
216
+ Options:
217
+ --help, -h Show this help message
218
+ --version, -v Show version
219
+
220
+ Run "modern-pdf optimize --help" for optimize options.
221
+ `.trim());
222
+ }
223
+
224
+ //#endregion
225
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ export { };
@@ -0,0 +1 @@
1
+ export { };
@@ -0,0 +1,226 @@
1
+ #!/usr/bin/env node
2
+ import { readFile, writeFile } from "node:fs/promises";
3
+
4
+ //#region src/cli/optimize.ts
5
+ /**
6
+ * @module cli/optimize
7
+ *
8
+ * CLI command: `modern-pdf optimize input.pdf output.pdf [options]`
9
+ *
10
+ * Optimizes images in a PDF file by recompressing them as JPEG,
11
+ * with optional deduplication and grayscale detection.
12
+ */
13
+ /**
14
+ * Parse and execute the optimize command.
15
+ */
16
+ async function optimizeCommand(args) {
17
+ const parsed = parseArgs(args);
18
+ if (parsed.help) {
19
+ printOptimizeHelp();
20
+ return;
21
+ }
22
+ if (!parsed.input || !parsed.output) {
23
+ console.error("Error: input and output file paths are required.");
24
+ console.error("Usage: modern-pdf optimize <input.pdf> <output.pdf> [options]");
25
+ process.exit(1);
26
+ }
27
+ const { loadPdf } = await import("../index.mjs");
28
+ const { initJpegWasm } = await import("../bridge-DpzMOnHd.mjs").then((n) => n.t);
29
+ const { optimizeAllImages } = await import("../batchOptimize-ClXizv19.mjs").then((n) => n.t);
30
+ const { deduplicateImages } = await import("../deduplicateImages-BfpjHY9b.mjs").then((n) => n.n);
31
+ const inputBytes = new Uint8Array(await readFile(parsed.input));
32
+ const inputSize = inputBytes.length;
33
+ if (parsed.verbose) {
34
+ console.log(`Input: ${parsed.input} (${formatBytes(inputSize)})`);
35
+ console.log(`Output: ${parsed.output}`);
36
+ console.log("");
37
+ }
38
+ try {
39
+ await initJpegWasm();
40
+ } catch {
41
+ console.warn("Warning: JPEG WASM module not available. Images will not be recompressed.");
42
+ }
43
+ const doc = await loadPdf(inputBytes);
44
+ if (parsed.dedup) {
45
+ const dedupReport = deduplicateImages(doc);
46
+ if (parsed.verbose && dedupReport.duplicatesRemoved > 0) console.log(`Deduplication: removed ${dedupReport.duplicatesRemoved} duplicate(s), ~${formatBytes(dedupReport.bytesSaved)} saved`);
47
+ }
48
+ const report = await optimizeAllImages(doc, {
49
+ quality: parsed.quality,
50
+ progressive: parsed.progressive,
51
+ chromaSubsampling: parsed.chroma,
52
+ autoGrayscale: parsed.grayscale,
53
+ skipSmallImages: true,
54
+ minSavingsPercent: 10
55
+ });
56
+ if (parsed.verbose) {
57
+ console.log("");
58
+ console.log(`Images found: ${report.totalImages}`);
59
+ console.log(`Images optimized: ${report.optimizedImages}`);
60
+ console.log(`Original size: ${formatBytes(report.originalTotalBytes)}`);
61
+ console.log(`Optimized size: ${formatBytes(report.optimizedTotalBytes)}`);
62
+ console.log(`Savings: ${report.savings.toFixed(1)}%`);
63
+ if (report.perImage.length > 0) {
64
+ console.log("");
65
+ console.log("Per-image details:");
66
+ for (const entry of report.perImage) if (entry.skipped) console.log(` ${entry.name} (p${entry.pageIndex}): SKIP — ${entry.reason}`);
67
+ else {
68
+ const pct = ((entry.originalSize - entry.newSize) / entry.originalSize * 100).toFixed(1);
69
+ console.log(` ${entry.name} (p${entry.pageIndex}): ${formatBytes(entry.originalSize)} → ${formatBytes(entry.newSize)} (−${pct}%)`);
70
+ }
71
+ }
72
+ }
73
+ const outputBytes = await doc.save();
74
+ await writeFile(parsed.output, outputBytes);
75
+ const outputSize = outputBytes.length;
76
+ const totalSavings = inputSize > 0 ? ((inputSize - outputSize) / inputSize * 100).toFixed(1) : "0.0";
77
+ console.log(`\n${formatBytes(inputSize)} → ${formatBytes(outputSize)} (−${totalSavings}%)`);
78
+ }
79
+ function parseArgs(args) {
80
+ const result = {
81
+ quality: 80,
82
+ progressive: false,
83
+ grayscale: false,
84
+ dedup: false,
85
+ chroma: "4:2:0",
86
+ verbose: false,
87
+ help: false
88
+ };
89
+ const positional = [];
90
+ for (let i = 0; i < args.length; i++) {
91
+ const arg = args[i];
92
+ switch (arg) {
93
+ case "--help":
94
+ case "-h":
95
+ result.help = true;
96
+ return result;
97
+ case "--quality":
98
+ case "-q":
99
+ result.quality = parseInt(args[++i] ?? "80", 10);
100
+ if (result.quality < 1 || result.quality > 100) {
101
+ console.error("Error: --quality must be between 1 and 100.");
102
+ process.exit(1);
103
+ }
104
+ break;
105
+ case "--max-dpi":
106
+ i++;
107
+ break;
108
+ case "--progressive":
109
+ result.progressive = true;
110
+ break;
111
+ case "--grayscale":
112
+ result.grayscale = true;
113
+ break;
114
+ case "--dedup":
115
+ result.dedup = true;
116
+ break;
117
+ case "--chroma": {
118
+ const val = args[++i] ?? "4:2:0";
119
+ if (val !== "4:4:4" && val !== "4:2:2" && val !== "4:2:0") {
120
+ console.error("Error: --chroma must be 4:4:4, 4:2:2, or 4:2:0.");
121
+ process.exit(1);
122
+ }
123
+ result.chroma = val;
124
+ break;
125
+ }
126
+ case "--verbose":
127
+ case "-v":
128
+ result.verbose = true;
129
+ break;
130
+ default:
131
+ if (arg.startsWith("-")) {
132
+ console.error(`Unknown option: ${arg}`);
133
+ process.exit(1);
134
+ }
135
+ positional.push(arg);
136
+ break;
137
+ }
138
+ }
139
+ if (positional[0] !== void 0) result.input = positional[0];
140
+ if (positional[1] !== void 0) result.output = positional[1];
141
+ return result;
142
+ }
143
+ function formatBytes(bytes) {
144
+ if (bytes < 1024) return `${bytes} B`;
145
+ if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
146
+ return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
147
+ }
148
+ function printOptimizeHelp() {
149
+ console.log(`
150
+ modern-pdf optimize — Optimize images in a PDF file
151
+
152
+ Usage:
153
+ modern-pdf optimize <input.pdf> <output.pdf> [options]
154
+
155
+ Options:
156
+ --quality <n>, -q <n> JPEG quality 1-100 (default: 80)
157
+ --progressive Use progressive JPEG encoding
158
+ --grayscale Auto-detect and convert grayscale images
159
+ --dedup Deduplicate identical images
160
+ --chroma <mode> Chroma subsampling: 4:4:4, 4:2:2, 4:2:0 (default: 4:2:0)
161
+ --verbose, -v Print per-image optimization details
162
+ --help, -h Show this help
163
+
164
+ Examples:
165
+ modern-pdf optimize report.pdf report-opt.pdf
166
+ modern-pdf optimize scan.pdf scan-opt.pdf --quality 60 --grayscale --dedup -v
167
+ `.trim());
168
+ }
169
+
170
+ //#endregion
171
+ //#region src/cli/index.ts
172
+ /**
173
+ * @module cli/index
174
+ *
175
+ * CLI entry point for modern-pdf-lib.
176
+ *
177
+ * Usage:
178
+ * npx modern-pdf optimize input.pdf output.pdf [options]
179
+ * npx modern-pdf --help
180
+ */
181
+ async function main() {
182
+ const args = process.argv.slice(2);
183
+ const command = args[0];
184
+ if (!command || command === "--help" || command === "-h") {
185
+ printHelp();
186
+ process.exit(0);
187
+ }
188
+ if (command === "--version" || command === "-v") {
189
+ console.log("modern-pdf-lib CLI");
190
+ process.exit(0);
191
+ }
192
+ switch (command) {
193
+ case "optimize":
194
+ await optimizeCommand(args.slice(1));
195
+ break;
196
+ default:
197
+ console.error(`Unknown command: ${command}`);
198
+ console.error("Run \"modern-pdf --help\" for usage information.");
199
+ process.exit(1);
200
+ }
201
+ }
202
+ main().catch((err) => {
203
+ console.error(err);
204
+ process.exit(1);
205
+ });
206
+ function printHelp() {
207
+ console.log(`
208
+ modern-pdf-lib CLI
209
+
210
+ Usage:
211
+ modern-pdf <command> [options]
212
+
213
+ Commands:
214
+ optimize Optimize images in a PDF file
215
+
216
+ Options:
217
+ --help, -h Show this help message
218
+ --version, -v Show version
219
+
220
+ Run "modern-pdf optimize --help" for optimize options.
221
+ `.trim());
222
+ }
223
+
224
+ //#endregion
225
+ export { };
226
+ //# sourceMappingURL=index.mjs.map
@@ -0,0 +1,102 @@
1
+ import { t as __exportAll } from "./rolldown-runtime-95iHPtFO.mjs";
2
+ import { n as extractImages } from "./imageExtract-vjyQyFcT.mjs";
3
+
4
+ //#region src/assets/image/deduplicateImages.ts
5
+ var deduplicateImages_exports = /* @__PURE__ */ __exportAll({ deduplicateImages: () => deduplicateImages });
6
+ /**
7
+ * Compute a fast FNV-1a hash of a byte array.
8
+ *
9
+ * This is used instead of SHA-256 because:
10
+ * 1. It's synchronous (no need for crypto.subtle)
11
+ * 2. It's fast for large buffers
12
+ * 3. We only need collision resistance within a single document
13
+ *
14
+ * Returns a 64-char hex string (two 32-bit hashes concatenated).
15
+ * @internal
16
+ */
17
+ function hashBytes(data) {
18
+ let h1 = 2166136261;
19
+ for (let i = 0; i < data.length; i++) {
20
+ h1 ^= data[i];
21
+ h1 = Math.imul(h1, 16777619);
22
+ }
23
+ let h2 = 16777619;
24
+ for (let i = data.length - 1; i >= 0; i--) {
25
+ h2 ^= data[i];
26
+ h2 = Math.imul(h2, 2166136261);
27
+ }
28
+ const h3 = data.length * 2654435769 | 0;
29
+ return (h1 >>> 0).toString(16).padStart(8, "0") + (h2 >>> 0).toString(16).padStart(8, "0") + (h3 >>> 0).toString(16).padStart(8, "0");
30
+ }
31
+ /**
32
+ * Deduplicate identical images in a PDF document.
33
+ *
34
+ * Scans all image XObjects, hashes their compressed stream data (plus
35
+ * dimensions and filter), and replaces duplicate references in page
36
+ * resource dictionaries with the canonical (first-seen) copy.
37
+ *
38
+ * This operation modifies the document in-place. Duplicate streams
39
+ * are not removed from the object registry (they become unreferenced
40
+ * and will be omitted on save if the writer supports garbage collection).
41
+ *
42
+ * @param doc - A parsed `PdfDocument` (from `loadPdf()`).
43
+ * @returns A report summarizing deduplication results.
44
+ *
45
+ * @example
46
+ * ```ts
47
+ * import { loadPdf, deduplicateImages } from 'modern-pdf-lib';
48
+ *
49
+ * const doc = await loadPdf(pdfBytes);
50
+ * const report = await deduplicateImages(doc);
51
+ *
52
+ * console.log(`Removed ${report.duplicatesRemoved} duplicate images`);
53
+ * console.log(`Saved ~${(report.bytesSaved / 1024).toFixed(0)} KB`);
54
+ *
55
+ * const optimizedBytes = await doc.save();
56
+ * ```
57
+ */
58
+ function deduplicateImages(doc) {
59
+ const images = extractImages(doc);
60
+ const hashToCanonical = /* @__PURE__ */ new Map();
61
+ const duplicates = [];
62
+ for (const img of images) {
63
+ const key = `${img.width}x${img.height}:${img.filters.join(",")}:` + hashBytes(img.stream.data);
64
+ const existing = hashToCanonical.get(key);
65
+ if (existing) duplicates.push({
66
+ image: img,
67
+ canonicalRef: existing.ref
68
+ });
69
+ else hashToCanonical.set(key, {
70
+ ref: img.ref,
71
+ size: img.compressedSize
72
+ });
73
+ }
74
+ let bytesSaved = 0;
75
+ for (const { image, canonicalRef } of duplicates) {
76
+ const page = doc.getPages()[image.pageIndex];
77
+ if (!page) continue;
78
+ const resources = page.getOriginalResources();
79
+ if (!resources) continue;
80
+ const xObjEntry = resources.get("/XObject");
81
+ if (!xObjEntry) continue;
82
+ let xObjDict;
83
+ if (xObjEntry.kind === "dict") xObjDict = xObjEntry;
84
+ else if (xObjEntry.kind === "ref") {
85
+ const resolved = page.getRegistry().resolve(xObjEntry);
86
+ if (resolved && resolved.kind === "dict") xObjDict = resolved;
87
+ }
88
+ if (!xObjDict) continue;
89
+ xObjDict.set(image.name, canonicalRef);
90
+ bytesSaved += image.compressedSize;
91
+ }
92
+ return {
93
+ totalImages: images.length,
94
+ uniqueImages: hashToCanonical.size,
95
+ duplicatesRemoved: duplicates.length,
96
+ bytesSaved
97
+ };
98
+ }
99
+
100
+ //#endregion
101
+ export { deduplicateImages_exports as n, deduplicateImages as t };
102
+ //# sourceMappingURL=deduplicateImages-BfpjHY9b.mjs.map
@@ -0,0 +1,113 @@
1
+ const require_rolldown_runtime = require('./rolldown-runtime-CKhH4XqG.cjs');
2
+ const require_imageExtract = require('./imageExtract-BC7TMY98.cjs');
3
+
4
+ //#region src/assets/image/deduplicateImages.ts
5
+ var deduplicateImages_exports = /* @__PURE__ */ require_rolldown_runtime.__exportAll({ deduplicateImages: () => deduplicateImages });
6
+ /**
7
+ * Compute a fast FNV-1a hash of a byte array.
8
+ *
9
+ * This is used instead of SHA-256 because:
10
+ * 1. It's synchronous (no need for crypto.subtle)
11
+ * 2. It's fast for large buffers
12
+ * 3. We only need collision resistance within a single document
13
+ *
14
+ * Returns a 64-char hex string (two 32-bit hashes concatenated).
15
+ * @internal
16
+ */
17
+ function hashBytes(data) {
18
+ let h1 = 2166136261;
19
+ for (let i = 0; i < data.length; i++) {
20
+ h1 ^= data[i];
21
+ h1 = Math.imul(h1, 16777619);
22
+ }
23
+ let h2 = 16777619;
24
+ for (let i = data.length - 1; i >= 0; i--) {
25
+ h2 ^= data[i];
26
+ h2 = Math.imul(h2, 2166136261);
27
+ }
28
+ const h3 = data.length * 2654435769 | 0;
29
+ return (h1 >>> 0).toString(16).padStart(8, "0") + (h2 >>> 0).toString(16).padStart(8, "0") + (h3 >>> 0).toString(16).padStart(8, "0");
30
+ }
31
+ /**
32
+ * Deduplicate identical images in a PDF document.
33
+ *
34
+ * Scans all image XObjects, hashes their compressed stream data (plus
35
+ * dimensions and filter), and replaces duplicate references in page
36
+ * resource dictionaries with the canonical (first-seen) copy.
37
+ *
38
+ * This operation modifies the document in-place. Duplicate streams
39
+ * are not removed from the object registry (they become unreferenced
40
+ * and will be omitted on save if the writer supports garbage collection).
41
+ *
42
+ * @param doc - A parsed `PdfDocument` (from `loadPdf()`).
43
+ * @returns A report summarizing deduplication results.
44
+ *
45
+ * @example
46
+ * ```ts
47
+ * import { loadPdf, deduplicateImages } from 'modern-pdf-lib';
48
+ *
49
+ * const doc = await loadPdf(pdfBytes);
50
+ * const report = await deduplicateImages(doc);
51
+ *
52
+ * console.log(`Removed ${report.duplicatesRemoved} duplicate images`);
53
+ * console.log(`Saved ~${(report.bytesSaved / 1024).toFixed(0)} KB`);
54
+ *
55
+ * const optimizedBytes = await doc.save();
56
+ * ```
57
+ */
58
+ function deduplicateImages(doc) {
59
+ const images = require_imageExtract.extractImages(doc);
60
+ const hashToCanonical = /* @__PURE__ */ new Map();
61
+ const duplicates = [];
62
+ for (const img of images) {
63
+ const key = `${img.width}x${img.height}:${img.filters.join(",")}:` + hashBytes(img.stream.data);
64
+ const existing = hashToCanonical.get(key);
65
+ if (existing) duplicates.push({
66
+ image: img,
67
+ canonicalRef: existing.ref
68
+ });
69
+ else hashToCanonical.set(key, {
70
+ ref: img.ref,
71
+ size: img.compressedSize
72
+ });
73
+ }
74
+ let bytesSaved = 0;
75
+ for (const { image, canonicalRef } of duplicates) {
76
+ const page = doc.getPages()[image.pageIndex];
77
+ if (!page) continue;
78
+ const resources = page.getOriginalResources();
79
+ if (!resources) continue;
80
+ const xObjEntry = resources.get("/XObject");
81
+ if (!xObjEntry) continue;
82
+ let xObjDict;
83
+ if (xObjEntry.kind === "dict") xObjDict = xObjEntry;
84
+ else if (xObjEntry.kind === "ref") {
85
+ const resolved = page.getRegistry().resolve(xObjEntry);
86
+ if (resolved && resolved.kind === "dict") xObjDict = resolved;
87
+ }
88
+ if (!xObjDict) continue;
89
+ xObjDict.set(image.name, canonicalRef);
90
+ bytesSaved += image.compressedSize;
91
+ }
92
+ return {
93
+ totalImages: images.length,
94
+ uniqueImages: hashToCanonical.size,
95
+ duplicatesRemoved: duplicates.length,
96
+ bytesSaved
97
+ };
98
+ }
99
+
100
+ //#endregion
101
+ Object.defineProperty(exports, 'deduplicateImages', {
102
+ enumerable: true,
103
+ get: function () {
104
+ return deduplicateImages;
105
+ }
106
+ });
107
+ Object.defineProperty(exports, 'deduplicateImages_exports', {
108
+ enumerable: true,
109
+ get: function () {
110
+ return deduplicateImages_exports;
111
+ }
112
+ });
113
+ //# sourceMappingURL=deduplicateImages-BtJ5tlrr.cjs.map
@@ -193,4 +193,4 @@ var FflateEngine = class {
193
193
 
194
194
  //#endregion
195
195
  export { fflateAdapter_exports as n, decompressSync as t };
196
- //# sourceMappingURL=fflateAdapter-DX0VqT5k.mjs.map
196
+ //# sourceMappingURL=fflateAdapter-D2mv_ttM.mjs.map
@@ -204,4 +204,4 @@ Object.defineProperty(exports, 'fflateAdapter_exports', {
204
204
  return fflateAdapter_exports;
205
205
  }
206
206
  });
207
- //# sourceMappingURL=fflateAdapter-AHC_S3cb.cjs.map
207
+ //# sourceMappingURL=fflateAdapter-cT4YeY_h.cjs.map
@@ -515,4 +515,4 @@ Object.defineProperty(exports, 'subsetFont', {
515
515
  return subsetFont;
516
516
  }
517
517
  });
518
- //# sourceMappingURL=fontSubset-pFc8Dueu.cjs.map
518
+ //# sourceMappingURL=fontSubset-BxsF9Tu5.cjs.map
@@ -492,4 +492,4 @@ function computeSubsetTag(usedGlyphIds) {
492
492
 
493
493
  //#endregion
494
494
  export { subsetFont as i, computeSubsetTag as n, fontSubset_exports as r, buildSubsetCmap as t };
495
- //# sourceMappingURL=fontSubset-ZpLoOZ2e.mjs.map
495
+ //# sourceMappingURL=fontSubset-ClyTXlhY.mjs.map