qpdf-compress 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -26
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -2
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +7 -1
- package/dist/types.d.ts.map +1 -1
- package/lib/index.ts +6 -2
- package/lib/types.ts +12 -2
- package/package.json +1 -1
- package/src/images.cc +458 -80
- package/src/images.h +14 -3
- package/src/jpeg.cc +83 -0
- package/src/jpeg.h +4 -0
- package/src/qpdf_addon.cc +35 -11
package/README.md
CHANGED
|
@@ -17,8 +17,11 @@ import { compress } from 'qpdf-compress';
|
|
|
17
17
|
// lossless — optimize without touching image quality
|
|
18
18
|
const optimized = await compress(pdfBuffer, { mode: 'lossless' });
|
|
19
19
|
|
|
20
|
-
// lossy —
|
|
21
|
-
const smaller = await compress(pdfBuffer, { mode: 'lossy'
|
|
20
|
+
// lossy — auto quality, downscale to 75 DPI, strip metadata
|
|
21
|
+
const smaller = await compress(pdfBuffer, { mode: 'lossy' });
|
|
22
|
+
|
|
23
|
+
// lossy with explicit quality
|
|
24
|
+
const tiny = await compress(pdfBuffer, { mode: 'lossy', quality: 50 });
|
|
22
25
|
```
|
|
23
26
|
|
|
24
27
|
## 💡 Why qpdf-compress?
|
|
@@ -28,6 +31,7 @@ const smaller = await compress(pdfBuffer, { mode: 'lossy', quality: 50 });
|
|
|
28
31
|
- Native C++ — no WASM overhead, no shell-out to CLI tools
|
|
29
32
|
- Non-blocking — all operations run off the main thread via N-API AsyncWorker
|
|
30
33
|
- Multi-pass optimization — image dedup, JPEG Huffman optimization, Flate level 9
|
|
34
|
+
- Smart defaults — DPI downscaling, metadata stripping, adaptive JPEG quality
|
|
31
35
|
|
|
32
36
|
**🛠️ Developer experience**
|
|
33
37
|
|
|
@@ -52,16 +56,20 @@ const smaller = await compress(pdfBuffer, { mode: 'lossy', quality: 50 });
|
|
|
52
56
|
|
|
53
57
|
### 📊 How it compares
|
|
54
58
|
|
|
55
|
-
| | **qpdf-compress**
|
|
56
|
-
| ------------------------- |
|
|
57
|
-
| Integration | Native Node.js addon
|
|
58
|
-
| Async I/O | ✅ Non-blocking
|
|
59
|
-
| Image deduplication | ✅
|
|
60
|
-
| JPEG Huffman optimization | ✅ Lossless (libjpeg)
|
|
61
|
-
| Lossy image compression | ✅
|
|
62
|
-
|
|
|
63
|
-
|
|
|
64
|
-
|
|
|
59
|
+
| | **qpdf-compress** | qpdf CLI | Ghostscript |
|
|
60
|
+
| ------------------------- | ------------------------ | ----------------- | ----------------- |
|
|
61
|
+
| Integration | Native Node.js addon | Shell exec | Shell exec |
|
|
62
|
+
| Async I/O | ✅ Non-blocking | ❌ Blocks on exec | ❌ Blocks on exec |
|
|
63
|
+
| Image deduplication | ✅ | ❌ | ❌ |
|
|
64
|
+
| JPEG Huffman optimization | ✅ Lossless (libjpeg) | ❌ | ❌ |
|
|
65
|
+
| Lossy image compression | ✅ Auto or fixed quality | ❌ | ✅ |
|
|
66
|
+
| CMYK → RGB conversion | ✅ Automatic | ❌ | ✅ |
|
|
67
|
+
| DPI downscaling | ✅ Configurable | ❌ | ✅ |
|
|
68
|
+
| Metadata stripping | ✅ Default on | ✅ Manual flag | ✅ |
|
|
69
|
+
| Unused font removal | ✅ Automatic | ❌ | ❌ |
|
|
70
|
+
| PDF repair | ✅ Automatic | ✅ Manual flag | ⚠️ Partial |
|
|
71
|
+
| License | Apache-2.0 | Apache-2.0 | AGPL-3.0 ⚠️ |
|
|
72
|
+
| Dependencies | None¹ | System binary | System binary |
|
|
65
73
|
|
|
66
74
|
¹ QPDF is statically linked — no runtime dependencies. Prebuilt binaries downloaded at install.
|
|
67
75
|
|
|
@@ -111,12 +119,19 @@ import { compress } from 'qpdf-compress';
|
|
|
111
119
|
// lossless — optimize streams without touching image quality
|
|
112
120
|
const optimized = await compress(pdfBuffer, { mode: 'lossless' });
|
|
113
121
|
|
|
114
|
-
// lossy —
|
|
122
|
+
// lossy — auto quality per image (skips JPEGs ≤ q90, encodes rest at q85)
|
|
115
123
|
const smaller = await compress(pdfBuffer, { mode: 'lossy' });
|
|
116
124
|
|
|
117
|
-
// lossy with
|
|
125
|
+
// lossy with explicit quality (1–100)
|
|
118
126
|
const tiny = await compress(pdfBuffer, { mode: 'lossy', quality: 50 });
|
|
119
127
|
|
|
128
|
+
// control DPI downscaling (default: 75, 0 = disabled)
|
|
129
|
+
const highRes = await compress(pdfBuffer, { mode: 'lossless', maxDpi: 150 });
|
|
130
|
+
const noDpi = await compress(pdfBuffer, { mode: 'lossless', maxDpi: 0 });
|
|
131
|
+
|
|
132
|
+
// keep metadata (stripped by default)
|
|
133
|
+
const withMeta = await compress(pdfBuffer, { mode: 'lossless', stripMetadata: false });
|
|
134
|
+
|
|
120
135
|
// file path input (avoids copying into memory twice)
|
|
121
136
|
const result = await compress('/path/to/file.pdf', { mode: 'lossless' });
|
|
122
137
|
|
|
@@ -135,27 +150,35 @@ const fixed = await compress(damagedBuffer, { mode: 'lossless' });
|
|
|
135
150
|
|
|
136
151
|
Compresses a PDF document. Automatically repairs damaged PDFs.
|
|
137
152
|
|
|
138
|
-
| Parameter
|
|
139
|
-
|
|
|
140
|
-
| `input`
|
|
141
|
-
| `options.mode`
|
|
142
|
-
| `options.quality`
|
|
143
|
-
| `options.
|
|
153
|
+
| Parameter | Type | Description |
|
|
154
|
+
| ----------------------- | ----------------------- | -------------------------------------------------------------------- |
|
|
155
|
+
| `input` | `Buffer \| string` | PDF data or file path |
|
|
156
|
+
| `options.mode` | `'lossy' \| 'lossless'` | Compression mode |
|
|
157
|
+
| `options.quality` | `number` | JPEG quality 1–100 (lossy only). Omit for auto quality (recommended) |
|
|
158
|
+
| `options.maxDpi` | `number` | Downscale images exceeding this DPI. Default: `75`. `0` = disabled |
|
|
159
|
+
| `options.stripMetadata` | `boolean` | Remove XMP metadata, document info, and thumbnails. Default: `true` |
|
|
160
|
+
| `options.output` | `string` | Write to file path instead of returning a `Buffer` |
|
|
144
161
|
|
|
145
|
-
**
|
|
162
|
+
**Both modes:**
|
|
146
163
|
|
|
147
164
|
- Deduplicates identical images across pages
|
|
148
165
|
- Optimizes embedded JPEG Huffman tables (2–15% savings, zero quality loss)
|
|
149
166
|
- Recompresses all decodable streams with Flate level 9
|
|
150
167
|
- Generates object streams for smaller metadata overhead
|
|
151
|
-
- Removes unreferenced objects
|
|
168
|
+
- Removes unreferenced objects and unused fonts
|
|
169
|
+
- Downscales images exceeding `maxDpi` (default: 75 DPI)
|
|
170
|
+
- Strips XMP metadata, document info, and thumbnails (default: on)
|
|
171
|
+
- Converts CMYK and ICCBased color spaces to RGB
|
|
172
|
+
- Automatically repairs damaged PDFs
|
|
152
173
|
|
|
153
|
-
**Lossy mode** (in addition to
|
|
174
|
+
**Lossy mode** (in addition to the above):
|
|
154
175
|
|
|
155
|
-
- Extracts 8-bit RGB and
|
|
156
|
-
-
|
|
176
|
+
- Extracts 8-bit RGB, grayscale, and CMYK images
|
|
177
|
+
- **Auto quality** (default): skips existing JPEGs at q ≤ 90, encodes the rest at q85
|
|
178
|
+
- **Explicit quality**: recompresses all images at the specified quality (1–100)
|
|
157
179
|
- Only replaces images where JPEG is actually smaller
|
|
158
|
-
- Skips
|
|
180
|
+
- Skips re-encoding when estimated quality is already at or below target
|
|
181
|
+
- Skips tiny images (< 50×50 px)
|
|
159
182
|
|
|
160
183
|
## ⚙️ How it works
|
|
161
184
|
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../lib/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,eAAe,EAAe,MAAM,YAAY,CAAC;AAY/D,KAAK,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAC;AAEhC;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CACtB,KAAK,EAAE,QAAQ,EACf,OAAO,EAAE,eAAe,GAAG;IAAE,MAAM,EAAE,MAAM,CAAA;CAAE,GAC5C,OAAO,CAAC,IAAI,CAAC,CAAC;AACjB,wBAAgB,QAAQ,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../lib/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,eAAe,EAAe,MAAM,YAAY,CAAC;AAY/D,KAAK,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAC;AAEhC;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CACtB,KAAK,EAAE,QAAQ,EACf,OAAO,EAAE,eAAe,GAAG;IAAE,MAAM,EAAE,MAAM,CAAA;CAAE,GAC5C,OAAO,CAAC,IAAI,CAAC,CAAC;AACjB,wBAAgB,QAAQ,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;AAgCrF,YAAY,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -26,13 +26,17 @@ export async function compress(input, options) {
|
|
|
26
26
|
if (mode !== 'lossy' && mode !== 'lossless') {
|
|
27
27
|
throw new TypeError("Mode must be 'lossy' or 'lossless'");
|
|
28
28
|
}
|
|
29
|
-
const quality = options.quality ??
|
|
30
|
-
if (quality < 1 || quality > 100) {
|
|
29
|
+
const quality = options.quality ?? 0;
|
|
30
|
+
if (quality !== 0 && (quality < 1 || quality > 100)) {
|
|
31
31
|
throw new RangeError('Quality must be between 1 and 100');
|
|
32
32
|
}
|
|
33
|
+
const maxDpi = options.maxDpi ?? 75;
|
|
34
|
+
const stripMetadata = options.stripMetadata ?? true;
|
|
33
35
|
return addon.compress(input, {
|
|
34
36
|
mode,
|
|
35
37
|
quality,
|
|
38
|
+
...(maxDpi > 0 ? { maxDpi } : {}),
|
|
39
|
+
...(stripMetadata ? { stripMetadata: true } : {}),
|
|
36
40
|
...(options.output ? { output: options.output } : {}),
|
|
37
41
|
});
|
|
38
42
|
}
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../lib/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAGzC,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE1D,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;AAC9D,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,EAAE,CAAC;AAC7D,CAAC;AAED,MAAM,KAAK,GAAgB,OAAO,CAAC,qCAAqC,CAAC,CAAC;AAmB1E,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,KAAe,EAAE,OAAwB;IACtE,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,8BAA8B,CAAC,CAAC;QACtD,CAAC;IACH,CAAC;SAAM,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QACrC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,4BAA4B,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,SAAS,CAAC,4CAA4C,CAAC,CAAC;IACpE,CAAC;IACD,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAC1B,IAAI,IAAI,KAAK,OAAO,IAAI,IAAI,KAAK,UAAU,EAAE,CAAC;QAC5C,MAAM,IAAI,SAAS,CAAC,oCAAoC,CAAC,CAAC;IAC5D,CAAC;IACD,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../lib/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAGzC,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE1D,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;AAC9D,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,EAAE,CAAC;AAC7D,CAAC;AAED,MAAM,KAAK,GAAgB,OAAO,CAAC,qCAAqC,CAAC,CAAC;AAmB1E,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,KAAe,EAAE,OAAwB;IACtE,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,8BAA8B,CAAC,CAAC;QACtD,CAAC;IACH,CAAC;SAAM,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QACrC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,4BAA4B,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,SAAS,CAAC,4CAA4C,CAAC,CAAC;IACpE,CAAC;IACD,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAC1B,IAAI,IAAI,KAAK,OAAO,IAAI,IAAI,KAAK,UAAU,EAAE,CAAC;QAC5C,MAAM,IAAI,SAAS,CAAC,oCAAoC,CAAC,CAAC;IAC5D,CAAC;IACD,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC;IACrC,IAAI,OAAO,KAAK,CAAC,IAAI,CAAC,OAAO,GAAG,CAAC,IAAI,OAAO,GAAG,GAAG,CAAC,EAAE,CAAC;QACpD,MAAM,IAAI,UAAU,CAAC,mCAAmC,CAAC,CAAC;IAC5D,CAAC;IACD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,EAAE,CAAC;IACpC,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,IAAI,CAAC;IACpD,OAAO,KAAK,CAAC,QAAQ,CAAC,KAAK,EAAE;QAC3B,IAAI;QACJ,OAAO;QACP,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACjC,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACjD,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACtD,CAA2B,CAAC;AAC/B,CAAC"}
|
package/dist/types.d.ts
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
export interface CompressOptions {
|
|
2
2
|
/** Compression mode. */
|
|
3
3
|
readonly mode: 'lossy' | 'lossless';
|
|
4
|
-
/** JPEG quality for lossy mode (1–100).
|
|
4
|
+
/** JPEG quality for lossy mode (1–100). When omitted, automatically determines optimal quality per image (capped at 85). */
|
|
5
5
|
readonly quality?: number;
|
|
6
|
+
/** Maximum image DPI. Images exceeding this are downscaled. 0 = no limit. Default: 75. */
|
|
7
|
+
readonly maxDpi?: number;
|
|
8
|
+
/** Remove XMP metadata, document info, and thumbnails. Default: true. */
|
|
9
|
+
readonly stripMetadata?: boolean;
|
|
6
10
|
/** Write to this file path instead of returning a Buffer. */
|
|
7
11
|
readonly output?: string;
|
|
8
12
|
}
|
|
@@ -10,6 +14,8 @@ export interface NativeAddon {
|
|
|
10
14
|
compress(input: Buffer | string, options: {
|
|
11
15
|
mode: 'lossy' | 'lossless';
|
|
12
16
|
quality: number;
|
|
17
|
+
maxDpi?: number;
|
|
18
|
+
stripMetadata?: boolean;
|
|
13
19
|
output?: string;
|
|
14
20
|
}): Promise<Buffer | undefined>;
|
|
15
21
|
}
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../lib/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,eAAe;IAC9B,wBAAwB;IACxB,QAAQ,CAAC,IAAI,EAAE,OAAO,GAAG,UAAU,CAAC;IACpC,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../lib/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,eAAe;IAC9B,wBAAwB;IACxB,QAAQ,CAAC,IAAI,EAAE,OAAO,GAAG,UAAU,CAAC;IACpC,4HAA4H;IAC5H,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAC1B,0FAA0F;IAC1F,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,yEAAyE;IACzE,QAAQ,CAAC,aAAa,CAAC,EAAE,OAAO,CAAC;IACjC,6DAA6D;IAC7D,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,WAAW;IAC1B,QAAQ,CACN,KAAK,EAAE,MAAM,GAAG,MAAM,EACtB,OAAO,EAAE;QACP,IAAI,EAAE,OAAO,GAAG,UAAU,CAAC;QAC3B,OAAO,EAAE,MAAM,CAAC;QAChB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,aAAa,CAAC,EAAE,OAAO,CAAC;QACxB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,GACA,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;CAChC"}
|
package/lib/index.ts
CHANGED
|
@@ -46,13 +46,17 @@ export async function compress(input: PdfInput, options: CompressOptions): Promi
|
|
|
46
46
|
if (mode !== 'lossy' && mode !== 'lossless') {
|
|
47
47
|
throw new TypeError("Mode must be 'lossy' or 'lossless'");
|
|
48
48
|
}
|
|
49
|
-
const quality = options.quality ??
|
|
50
|
-
if (quality < 1 || quality > 100) {
|
|
49
|
+
const quality = options.quality ?? 0;
|
|
50
|
+
if (quality !== 0 && (quality < 1 || quality > 100)) {
|
|
51
51
|
throw new RangeError('Quality must be between 1 and 100');
|
|
52
52
|
}
|
|
53
|
+
const maxDpi = options.maxDpi ?? 75;
|
|
54
|
+
const stripMetadata = options.stripMetadata ?? true;
|
|
53
55
|
return addon.compress(input, {
|
|
54
56
|
mode,
|
|
55
57
|
quality,
|
|
58
|
+
...(maxDpi > 0 ? { maxDpi } : {}),
|
|
59
|
+
...(stripMetadata ? { stripMetadata: true } : {}),
|
|
56
60
|
...(options.output ? { output: options.output } : {}),
|
|
57
61
|
}) as Promise<Buffer | void>;
|
|
58
62
|
}
|
package/lib/types.ts
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
export interface CompressOptions {
|
|
2
2
|
/** Compression mode. */
|
|
3
3
|
readonly mode: 'lossy' | 'lossless';
|
|
4
|
-
/** JPEG quality for lossy mode (1–100).
|
|
4
|
+
/** JPEG quality for lossy mode (1–100). When omitted, automatically determines optimal quality per image (capped at 85). */
|
|
5
5
|
readonly quality?: number;
|
|
6
|
+
/** Maximum image DPI. Images exceeding this are downscaled. 0 = no limit. Default: 75. */
|
|
7
|
+
readonly maxDpi?: number;
|
|
8
|
+
/** Remove XMP metadata, document info, and thumbnails. Default: true. */
|
|
9
|
+
readonly stripMetadata?: boolean;
|
|
6
10
|
/** Write to this file path instead of returning a Buffer. */
|
|
7
11
|
readonly output?: string;
|
|
8
12
|
}
|
|
@@ -10,6 +14,12 @@ export interface CompressOptions {
|
|
|
10
14
|
export interface NativeAddon {
|
|
11
15
|
compress(
|
|
12
16
|
input: Buffer | string,
|
|
13
|
-
options: {
|
|
17
|
+
options: {
|
|
18
|
+
mode: 'lossy' | 'lossless';
|
|
19
|
+
quality: number; // 0 = auto, 1–100 = fixed
|
|
20
|
+
maxDpi?: number;
|
|
21
|
+
stripMetadata?: boolean;
|
|
22
|
+
output?: string;
|
|
23
|
+
},
|
|
14
24
|
): Promise<Buffer | undefined>;
|
|
15
25
|
}
|
package/package.json
CHANGED
package/src/images.cc
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#include "images.h"
|
|
2
2
|
#include "jpeg.h"
|
|
3
3
|
|
|
4
|
+
#include <algorithm>
|
|
4
5
|
#include <cstdint>
|
|
5
6
|
#include <cstring>
|
|
6
7
|
#include <limits>
|
|
@@ -10,95 +11,243 @@
|
|
|
10
11
|
|
|
11
12
|
#include <qpdf/Buffer.hh>
|
|
12
13
|
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Color space helpers
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
// resolves /ColorSpace to component count and whether it's CMYK.
|
|
19
|
+
// returns 0 on unsupported color spaces.
|
|
20
|
+
static int resolveColorSpace(QPDFObjectHandle cs, bool &isCMYK) {
|
|
21
|
+
isCMYK = false;
|
|
22
|
+
|
|
23
|
+
if (cs.isName()) {
|
|
24
|
+
auto name = cs.getName();
|
|
25
|
+
if (name == "/DeviceRGB")
|
|
26
|
+
return 3;
|
|
27
|
+
if (name == "/DeviceGray")
|
|
28
|
+
return 1;
|
|
29
|
+
if (name == "/DeviceCMYK") {
|
|
30
|
+
isCMYK = true;
|
|
31
|
+
return 4;
|
|
32
|
+
}
|
|
33
|
+
return 0;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ICCBased: [/ICCBased <stream>] — get /N from the ICC profile stream dict
|
|
37
|
+
if (cs.isArray() && cs.getArrayNItems() >= 2) {
|
|
38
|
+
auto csName = cs.getArrayItem(0);
|
|
39
|
+
if (csName.isName() && csName.getName() == "/ICCBased") {
|
|
40
|
+
auto profile = cs.getArrayItem(1);
|
|
41
|
+
if (profile.isStream()) {
|
|
42
|
+
auto n = profile.getDict().getKey("/N");
|
|
43
|
+
if (n.isInteger()) {
|
|
44
|
+
int components = static_cast<int>(n.getIntValue());
|
|
45
|
+
if (components == 4)
|
|
46
|
+
isCMYK = true;
|
|
47
|
+
if (components == 1 || components == 3 || components == 4)
|
|
48
|
+
return components;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return 0;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// naive CMYK → RGB conversion (without ICC profile).
|
|
58
|
+
// uses the standard formula: R = 255 * (1-C) * (1-K), etc.
|
|
59
|
+
static void cmykToRgb(const unsigned char *cmyk, unsigned char *rgb,
|
|
60
|
+
size_t pixelCount) {
|
|
61
|
+
for (size_t i = 0; i < pixelCount; ++i) {
|
|
62
|
+
double c = cmyk[i * 4 + 0] / 255.0;
|
|
63
|
+
double m = cmyk[i * 4 + 1] / 255.0;
|
|
64
|
+
double y = cmyk[i * 4 + 2] / 255.0;
|
|
65
|
+
double k = cmyk[i * 4 + 3] / 255.0;
|
|
66
|
+
rgb[i * 3 + 0] =
|
|
67
|
+
static_cast<unsigned char>(255.0 * (1.0 - c) * (1.0 - k) + 0.5);
|
|
68
|
+
rgb[i * 3 + 1] =
|
|
69
|
+
static_cast<unsigned char>(255.0 * (1.0 - m) * (1.0 - k) + 0.5);
|
|
70
|
+
rgb[i * 3 + 2] =
|
|
71
|
+
static_cast<unsigned char>(255.0 * (1.0 - y) * (1.0 - k) + 0.5);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// ---------------------------------------------------------------------------
|
|
76
|
+
// Bilinear downscaling
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
static std::vector<uint8_t> bilinearDownscale(const unsigned char *src,
|
|
80
|
+
int srcW, int srcH,
|
|
81
|
+
int components, int dstW,
|
|
82
|
+
int dstH) {
|
|
83
|
+
std::vector<uint8_t> dst(static_cast<size_t>(dstW) * dstH * components);
|
|
84
|
+
|
|
85
|
+
double xRatio = static_cast<double>(srcW) / dstW;
|
|
86
|
+
double yRatio = static_cast<double>(srcH) / dstH;
|
|
87
|
+
|
|
88
|
+
for (int y = 0; y < dstH; ++y) {
|
|
89
|
+
double srcY = y * yRatio;
|
|
90
|
+
int y0 = static_cast<int>(srcY);
|
|
91
|
+
int y1 = std::min(y0 + 1, srcH - 1);
|
|
92
|
+
double fy = srcY - y0;
|
|
93
|
+
|
|
94
|
+
for (int x = 0; x < dstW; ++x) {
|
|
95
|
+
double srcX = x * xRatio;
|
|
96
|
+
int x0 = static_cast<int>(srcX);
|
|
97
|
+
int x1 = std::min(x0 + 1, srcW - 1);
|
|
98
|
+
double fx = srcX - x0;
|
|
99
|
+
|
|
100
|
+
for (int c = 0; c < components; ++c) {
|
|
101
|
+
double v00 = src[(y0 * srcW + x0) * components + c];
|
|
102
|
+
double v10 = src[(y0 * srcW + x1) * components + c];
|
|
103
|
+
double v01 = src[(y1 * srcW + x0) * components + c];
|
|
104
|
+
double v11 = src[(y1 * srcW + x1) * components + c];
|
|
105
|
+
|
|
106
|
+
double val = v00 * (1 - fx) * (1 - fy) + v10 * fx * (1 - fy) +
|
|
107
|
+
v01 * (1 - fx) * fy + v11 * fx * fy;
|
|
108
|
+
dst[(y * dstW + x) * components + c] =
|
|
109
|
+
static_cast<uint8_t>(std::min(std::max(val + 0.5, 0.0), 255.0));
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return dst;
|
|
115
|
+
}
|
|
116
|
+
|
|
13
117
|
// ---------------------------------------------------------------------------
|
|
14
118
|
// Image recompression for lossy mode
|
|
15
119
|
// ---------------------------------------------------------------------------
|
|
16
120
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
121
|
+
// auto-mode thresholds — only re-encode existing JPEGs whose estimated
|
|
122
|
+
// quality exceeds kAutoSkipThreshold (avoids pointless re-encoding where
|
|
123
|
+
// generation loss outweighs size savings). Non-JPEG images and high-quality
|
|
124
|
+
// JPEGs are (re-)encoded at kAutoTargetQuality.
|
|
125
|
+
static constexpr int kAutoSkipThreshold = 90;
|
|
126
|
+
static constexpr int kAutoTargetQuality = 85;
|
|
127
|
+
|
|
128
|
+
void optimizeImages(QPDF &qpdf, const CompressOptions &opts) {
|
|
129
|
+
const bool autoQuality = (opts.quality == 0);
|
|
130
|
+
forEachImage(qpdf, [&](const std::string &, QPDFObjectHandle xobj,
|
|
131
|
+
QPDFObjectHandle, QPDFPageObjectHelper &) {
|
|
132
|
+
auto dict = xobj.getDict();
|
|
133
|
+
|
|
134
|
+
// only handle 8-bit images
|
|
135
|
+
if (!dict.getKey("/BitsPerComponent").isInteger() ||
|
|
136
|
+
dict.getKey("/BitsPerComponent").getIntValue() != 8)
|
|
137
|
+
return;
|
|
21
138
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
139
|
+
int width = 0, height = 0;
|
|
140
|
+
if (dict.getKey("/Width").isInteger())
|
|
141
|
+
width = static_cast<int>(dict.getKey("/Width").getIntValue());
|
|
142
|
+
if (dict.getKey("/Height").isInteger())
|
|
143
|
+
height = static_cast<int>(dict.getKey("/Height").getIntValue());
|
|
26
144
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
width = static_cast<int>(dict.getKey("/Width").getIntValue());
|
|
30
|
-
if (dict.getKey("/Height").isInteger())
|
|
31
|
-
height = static_cast<int>(dict.getKey("/Height").getIntValue());
|
|
145
|
+
if (width <= 0 || height <= 0 || width > 16384 || height > 16384)
|
|
146
|
+
return;
|
|
32
147
|
|
|
33
|
-
|
|
34
|
-
|
|
148
|
+
// determine color components via color space resolution
|
|
149
|
+
bool isCMYK = false;
|
|
150
|
+
int components = resolveColorSpace(dict.getKey("/ColorSpace"), isCMYK);
|
|
151
|
+
if (components == 0)
|
|
152
|
+
return;
|
|
35
153
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
if (cs.getName() == "/DeviceRGB")
|
|
40
|
-
components = 3;
|
|
41
|
-
else if (cs.getName() == "/DeviceGray")
|
|
42
|
-
components = 1;
|
|
43
|
-
else
|
|
44
|
-
return; // skip CMYK, Lab, etc.
|
|
45
|
-
} else {
|
|
46
|
-
return; // skip indexed, ICCBased, etc.
|
|
47
|
-
}
|
|
154
|
+
// skip tiny images (logos, icons)
|
|
155
|
+
if (width * height < 2500)
|
|
156
|
+
return;
|
|
48
157
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
158
|
+
// get fully decoded stream data (raw pixels)
|
|
159
|
+
std::shared_ptr<Buffer> streamData;
|
|
160
|
+
try {
|
|
161
|
+
streamData = xobj.getStreamData(qpdf_dl_all);
|
|
162
|
+
} catch (...) {
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
52
165
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
166
|
+
// overflow-safe size calculation
|
|
167
|
+
auto w = static_cast<size_t>(width);
|
|
168
|
+
auto h = static_cast<size_t>(height);
|
|
169
|
+
auto c = static_cast<size_t>(components);
|
|
170
|
+
if (h > 0 && w > std::numeric_limits<size_t>::max() / h)
|
|
171
|
+
return;
|
|
172
|
+
if (c > 0 && (w * h) > std::numeric_limits<size_t>::max() / c)
|
|
173
|
+
return;
|
|
174
|
+
size_t expectedSize = w * h * c;
|
|
175
|
+
if (streamData->getSize() != expectedSize)
|
|
176
|
+
return;
|
|
60
177
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
178
|
+
// convert CMYK → RGB for JPEG encoding (JPEG doesn't support CMYK
|
|
179
|
+
// natively in most decoders)
|
|
180
|
+
const unsigned char *pixels = streamData->getBuffer();
|
|
181
|
+
std::vector<uint8_t> rgbBuf;
|
|
182
|
+
int encodeComponents = components;
|
|
183
|
+
if (isCMYK) {
|
|
184
|
+
size_t pixelCount = w * h;
|
|
185
|
+
rgbBuf.resize(pixelCount * 3);
|
|
186
|
+
cmykToRgb(pixels, rgbBuf.data(), pixelCount);
|
|
187
|
+
pixels = rgbBuf.data();
|
|
188
|
+
encodeComponents = 3;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
auto currentFilter = dict.getKey("/Filter");
|
|
192
|
+
bool isCurrentlyJpeg =
|
|
193
|
+
currentFilter.isName() && currentFilter.getName() == "/DCTDecode";
|
|
194
|
+
|
|
195
|
+
// determine per-image target quality
|
|
196
|
+
int targetQuality = autoQuality ? kAutoTargetQuality : opts.quality;
|
|
197
|
+
|
|
198
|
+
// in auto mode, skip existing JPEGs unless their quality is very high
|
|
199
|
+
// (> 90) — re-encoding a q86 JPEG at q85 saves almost nothing but adds
|
|
200
|
+
// artifacts. Only high-quality originals (92, 95, 100…) benefit from
|
|
201
|
+
// re-encoding down to 85.
|
|
202
|
+
if (isCurrentlyJpeg && !isCMYK) {
|
|
203
|
+
auto rawData = xobj.getRawStreamData();
|
|
204
|
+
int existingQ =
|
|
205
|
+
estimateJpegQuality(rawData->getBuffer(), rawData->getSize());
|
|
206
|
+
if (autoQuality) {
|
|
207
|
+
if (existingQ > 0 && existingQ <= kAutoSkipThreshold)
|
|
68
208
|
return;
|
|
69
|
-
|
|
70
|
-
|
|
209
|
+
} else {
|
|
210
|
+
// explicit quality: use existing ceiling logic
|
|
211
|
+
if (existingQ > 0 && existingQ <= targetQuality)
|
|
71
212
|
return;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
72
215
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
216
|
+
// encode as JPEG via libjpeg-turbo
|
|
217
|
+
std::vector<uint8_t> jpegData;
|
|
218
|
+
if (!encodeJpeg(pixels, width, height, encodeComponents, targetQuality,
|
|
219
|
+
jpegData))
|
|
220
|
+
return;
|
|
76
221
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
222
|
+
// only replace if we actually reduced size (for non-CMYK images)
|
|
223
|
+
if (isCurrentlyJpeg && !isCMYK) {
|
|
224
|
+
auto rawData = xobj.getRawStreamData();
|
|
225
|
+
if (jpegData.size() >= rawData->getSize())
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
82
228
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
229
|
+
// replace stream data with JPEG
|
|
230
|
+
std::string jpegStr(reinterpret_cast<char *>(jpegData.data()),
|
|
231
|
+
jpegData.size());
|
|
232
|
+
xobj.replaceStreamData(jpegStr, QPDFObjectHandle::newName("/DCTDecode"),
|
|
233
|
+
QPDFObjectHandle::newNull());
|
|
234
|
+
|
|
235
|
+
// update color space to DeviceRGB if converted from CMYK/ICCBased
|
|
236
|
+
if (isCMYK || !dict.getKey("/ColorSpace").isName() ||
|
|
237
|
+
dict.getKey("/ColorSpace").getName() != "/DeviceRGB") {
|
|
238
|
+
if (encodeComponents == 3)
|
|
239
|
+
dict.replaceKey("/ColorSpace", QPDFObjectHandle::newName("/DeviceRGB"));
|
|
240
|
+
else if (encodeComponents == 1)
|
|
241
|
+
dict.replaceKey("/ColorSpace",
|
|
242
|
+
QPDFObjectHandle::newName("/DeviceGray"));
|
|
243
|
+
}
|
|
89
244
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
// remove FlateDecode-specific params
|
|
97
|
-
if (dict.hasKey("/DecodeParms"))
|
|
98
|
-
dict.removeKey("/DecodeParms");
|
|
99
|
-
if (dict.hasKey("/Predictor"))
|
|
100
|
-
dict.removeKey("/Predictor");
|
|
101
|
-
});
|
|
245
|
+
// remove FlateDecode-specific params
|
|
246
|
+
if (dict.hasKey("/DecodeParms"))
|
|
247
|
+
dict.removeKey("/DecodeParms");
|
|
248
|
+
if (dict.hasKey("/Predictor"))
|
|
249
|
+
dict.removeKey("/Predictor");
|
|
250
|
+
});
|
|
102
251
|
}
|
|
103
252
|
|
|
104
253
|
// ---------------------------------------------------------------------------
|
|
@@ -117,7 +266,8 @@ void deduplicateImages(QPDF &qpdf) {
|
|
|
117
266
|
|
|
118
267
|
// first pass: collect image objects and hash their raw data
|
|
119
268
|
forEachImage(qpdf, [&](const std::string & /*key*/, QPDFObjectHandle xobj,
|
|
120
|
-
QPDFObjectHandle /*xobjects
|
|
269
|
+
QPDFObjectHandle /*xobjects*/,
|
|
270
|
+
QPDFPageObjectHelper & /*page*/) {
|
|
121
271
|
auto og = xobj.getObjGen();
|
|
122
272
|
if (seen.count(og))
|
|
123
273
|
return;
|
|
@@ -170,12 +320,13 @@ void deduplicateImages(QPDF &qpdf) {
|
|
|
170
320
|
return;
|
|
171
321
|
|
|
172
322
|
// third pass: rewrite XObject references to point to canonical objects
|
|
173
|
-
forEachImage(qpdf,
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
323
|
+
forEachImage(qpdf,
|
|
324
|
+
[&](const std::string &key, QPDFObjectHandle xobj,
|
|
325
|
+
QPDFObjectHandle xobjects, QPDFPageObjectHelper & /*page*/) {
|
|
326
|
+
auto it = replacements.find(xobj.getObjGen());
|
|
327
|
+
if (it != replacements.end())
|
|
328
|
+
xobjects.replaceKey(key, it->second);
|
|
329
|
+
});
|
|
179
330
|
}
|
|
180
331
|
|
|
181
332
|
// ---------------------------------------------------------------------------
|
|
@@ -186,7 +337,8 @@ void optimizeExistingJpegs(QPDF &qpdf) {
|
|
|
186
337
|
std::set<QPDFObjGen> processed;
|
|
187
338
|
|
|
188
339
|
forEachImage(qpdf, [&](const std::string & /*key*/, QPDFObjectHandle xobj,
|
|
189
|
-
QPDFObjectHandle /*xobjects
|
|
340
|
+
QPDFObjectHandle /*xobjects*/,
|
|
341
|
+
QPDFPageObjectHelper & /*page*/) {
|
|
190
342
|
auto og = xobj.getObjGen();
|
|
191
343
|
if (processed.count(og))
|
|
192
344
|
return;
|
|
@@ -216,3 +368,229 @@ void optimizeExistingJpegs(QPDF &qpdf) {
|
|
|
216
368
|
}
|
|
217
369
|
});
|
|
218
370
|
}
|
|
371
|
+
|
|
372
|
+
// ---------------------------------------------------------------------------
|
|
373
|
+
// DPI-based image downscaling
|
|
374
|
+
// ---------------------------------------------------------------------------
|
|
375
|
+
|
|
376
|
+
void downscaleImages(QPDF &qpdf, int maxDpi) {
|
|
377
|
+
if (maxDpi <= 0)
|
|
378
|
+
return;
|
|
379
|
+
|
|
380
|
+
std::set<QPDFObjGen> processed;
|
|
381
|
+
|
|
382
|
+
forEachImage(qpdf, [&](const std::string & /*key*/, QPDFObjectHandle xobj,
|
|
383
|
+
QPDFObjectHandle /*xobjects*/,
|
|
384
|
+
QPDFPageObjectHelper &page) {
|
|
385
|
+
auto og = xobj.getObjGen();
|
|
386
|
+
if (processed.count(og))
|
|
387
|
+
return;
|
|
388
|
+
processed.insert(og);
|
|
389
|
+
|
|
390
|
+
auto dict = xobj.getDict();
|
|
391
|
+
|
|
392
|
+
if (!dict.getKey("/BitsPerComponent").isInteger() ||
|
|
393
|
+
dict.getKey("/BitsPerComponent").getIntValue() != 8)
|
|
394
|
+
return;
|
|
395
|
+
|
|
396
|
+
int imgW = 0, imgH = 0;
|
|
397
|
+
if (dict.getKey("/Width").isInteger())
|
|
398
|
+
imgW = static_cast<int>(dict.getKey("/Width").getIntValue());
|
|
399
|
+
if (dict.getKey("/Height").isInteger())
|
|
400
|
+
imgH = static_cast<int>(dict.getKey("/Height").getIntValue());
|
|
401
|
+
|
|
402
|
+
if (imgW <= 0 || imgH <= 0)
|
|
403
|
+
return;
|
|
404
|
+
|
|
405
|
+
bool isCMYK = false;
|
|
406
|
+
int components = resolveColorSpace(dict.getKey("/ColorSpace"), isCMYK);
|
|
407
|
+
if (components == 0)
|
|
408
|
+
return;
|
|
409
|
+
|
|
410
|
+
// get page dimensions from MediaBox (in points, 72 per inch)
|
|
411
|
+
auto mediaBox = page.getAttribute("/MediaBox", false);
|
|
412
|
+
if (!mediaBox.isArray() || mediaBox.getArrayNItems() < 4)
|
|
413
|
+
return;
|
|
414
|
+
|
|
415
|
+
double pageW = 0, pageH = 0;
|
|
416
|
+
try {
|
|
417
|
+
pageW = mediaBox.getArrayItem(2).getNumericValue() -
|
|
418
|
+
mediaBox.getArrayItem(0).getNumericValue();
|
|
419
|
+
pageH = mediaBox.getArrayItem(3).getNumericValue() -
|
|
420
|
+
mediaBox.getArrayItem(1).getNumericValue();
|
|
421
|
+
} catch (...) {
|
|
422
|
+
return;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
if (pageW <= 0 || pageH <= 0)
|
|
426
|
+
return;
|
|
427
|
+
|
|
428
|
+
// estimate effective DPI (assumes image fills page — conservative)
|
|
429
|
+
double dpiX = imgW / (pageW / 72.0);
|
|
430
|
+
double dpiY = imgH / (pageH / 72.0);
|
|
431
|
+
double effectiveDpi = std::max(dpiX, dpiY);
|
|
432
|
+
|
|
433
|
+
if (effectiveDpi <= maxDpi)
|
|
434
|
+
return;
|
|
435
|
+
|
|
436
|
+
// calculate target dimensions
|
|
437
|
+
double scale = static_cast<double>(maxDpi) / effectiveDpi;
|
|
438
|
+
int newW = std::max(1, static_cast<int>(imgW * scale + 0.5));
|
|
439
|
+
int newH = std::max(1, static_cast<int>(imgH * scale + 0.5));
|
|
440
|
+
|
|
441
|
+
// not worth downscaling if the reduction is minimal
|
|
442
|
+
if (newW >= imgW - 1 && newH >= imgH - 1)
|
|
443
|
+
return;
|
|
444
|
+
|
|
445
|
+
// decode pixels
|
|
446
|
+
std::shared_ptr<Buffer> streamData;
|
|
447
|
+
try {
|
|
448
|
+
streamData = xobj.getStreamData(qpdf_dl_all);
|
|
449
|
+
} catch (...) {
|
|
450
|
+
return;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
auto w = static_cast<size_t>(imgW);
|
|
454
|
+
auto h = static_cast<size_t>(imgH);
|
|
455
|
+
auto c = static_cast<size_t>(components);
|
|
456
|
+
if (h > 0 && w > std::numeric_limits<size_t>::max() / h)
|
|
457
|
+
return;
|
|
458
|
+
if (c > 0 && (w * h) > std::numeric_limits<size_t>::max() / c)
|
|
459
|
+
return;
|
|
460
|
+
if (streamData->getSize() != w * h * c)
|
|
461
|
+
return;
|
|
462
|
+
|
|
463
|
+
const unsigned char *pixels = streamData->getBuffer();
|
|
464
|
+
int downscaleComponents = components;
|
|
465
|
+
|
|
466
|
+
// convert CMYK → RGB before downscaling
|
|
467
|
+
std::vector<uint8_t> rgbBuf;
|
|
468
|
+
if (isCMYK) {
|
|
469
|
+
size_t pixelCount = w * h;
|
|
470
|
+
rgbBuf.resize(pixelCount * 3);
|
|
471
|
+
cmykToRgb(pixels, rgbBuf.data(), pixelCount);
|
|
472
|
+
pixels = rgbBuf.data();
|
|
473
|
+
downscaleComponents = 3;
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
auto scaled =
|
|
477
|
+
bilinearDownscale(pixels, imgW, imgH, downscaleComponents, newW, newH);
|
|
478
|
+
|
|
479
|
+
// re-encode as Flate-compressed raw pixels
|
|
480
|
+
auto newSize = static_cast<size_t>(newW) * newH * downscaleComponents;
|
|
481
|
+
if (scaled.size() != newSize)
|
|
482
|
+
return;
|
|
483
|
+
|
|
484
|
+
std::string rawStr(reinterpret_cast<char *>(scaled.data()), scaled.size());
|
|
485
|
+
xobj.replaceStreamData(rawStr, QPDFObjectHandle::newName("/FlateDecode"),
|
|
486
|
+
QPDFObjectHandle::newNull());
|
|
487
|
+
|
|
488
|
+
dict.replaceKey("/Width", QPDFObjectHandle::newInteger(newW));
|
|
489
|
+
dict.replaceKey("/Height", QPDFObjectHandle::newInteger(newH));
|
|
490
|
+
|
|
491
|
+
if (isCMYK) {
|
|
492
|
+
dict.replaceKey("/ColorSpace", QPDFObjectHandle::newName("/DeviceRGB"));
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// remove predictor params from previous encoding
|
|
496
|
+
if (dict.hasKey("/DecodeParms"))
|
|
497
|
+
dict.removeKey("/DecodeParms");
|
|
498
|
+
if (dict.hasKey("/Predictor"))
|
|
499
|
+
dict.removeKey("/Predictor");
|
|
500
|
+
});
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
// ---------------------------------------------------------------------------
|
|
504
|
+
// Metadata stripping
|
|
505
|
+
// ---------------------------------------------------------------------------
|
|
506
|
+
|
|
507
|
+
void stripMetadata(QPDF &qpdf) {
|
|
508
|
+
auto root = qpdf.getRoot();
|
|
509
|
+
|
|
510
|
+
// remove XMP metadata stream
|
|
511
|
+
if (root.hasKey("/Metadata"))
|
|
512
|
+
root.removeKey("/Metadata");
|
|
513
|
+
|
|
514
|
+
// remove document info dictionary
|
|
515
|
+
auto trailer = qpdf.getTrailer();
|
|
516
|
+
if (trailer.hasKey("/Info"))
|
|
517
|
+
trailer.removeKey("/Info");
|
|
518
|
+
|
|
519
|
+
// remove page-level metadata and PieceInfo
|
|
520
|
+
for (auto &page : QPDFPageDocumentHelper(qpdf).getAllPages()) {
|
|
521
|
+
auto pageObj = page.getObjectHandle();
|
|
522
|
+
if (pageObj.hasKey("/Metadata"))
|
|
523
|
+
pageObj.removeKey("/Metadata");
|
|
524
|
+
if (pageObj.hasKey("/PieceInfo"))
|
|
525
|
+
pageObj.removeKey("/PieceInfo");
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
// remove embedded thumbnails
|
|
529
|
+
for (auto &page : QPDFPageDocumentHelper(qpdf).getAllPages()) {
|
|
530
|
+
auto pageObj = page.getObjectHandle();
|
|
531
|
+
if (pageObj.hasKey("/Thumb"))
|
|
532
|
+
pageObj.removeKey("/Thumb");
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// remove MarkInfo and page labels (optional metadata)
|
|
536
|
+
if (root.hasKey("/MarkInfo"))
|
|
537
|
+
root.removeKey("/MarkInfo");
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
// ---------------------------------------------------------------------------
|
|
541
|
+
// Remove unused font resources
|
|
542
|
+
// ---------------------------------------------------------------------------
|
|
543
|
+
|
|
544
|
+
void removeUnusedFonts(QPDF &qpdf) {
|
|
545
|
+
for (auto &page : QPDFPageDocumentHelper(qpdf).getAllPages()) {
|
|
546
|
+
auto pageObj = page.getObjectHandle();
|
|
547
|
+
auto resources = pageObj.getKey("/Resources");
|
|
548
|
+
if (!resources.isDictionary())
|
|
549
|
+
continue;
|
|
550
|
+
auto fonts = resources.getKey("/Font");
|
|
551
|
+
if (!fonts.isDictionary())
|
|
552
|
+
continue;
|
|
553
|
+
|
|
554
|
+
// collect all font names referenced in this page's content stream(s)
|
|
555
|
+
std::set<std::string> usedFonts;
|
|
556
|
+
|
|
557
|
+
try {
|
|
558
|
+
// get unparsed content stream data
|
|
559
|
+
auto contents = pageObj.getKey("/Contents");
|
|
560
|
+
std::string contentStr;
|
|
561
|
+
|
|
562
|
+
if (contents.isStream()) {
|
|
563
|
+
auto buf = contents.getStreamData(qpdf_dl_generalized);
|
|
564
|
+
contentStr.assign(reinterpret_cast<const char *>(buf->getBuffer()),
|
|
565
|
+
buf->getSize());
|
|
566
|
+
} else if (contents.isArray()) {
|
|
567
|
+
for (int i = 0; i < contents.getArrayNItems(); ++i) {
|
|
568
|
+
auto stream = contents.getArrayItem(i);
|
|
569
|
+
if (stream.isStream()) {
|
|
570
|
+
auto buf = stream.getStreamData(qpdf_dl_generalized);
|
|
571
|
+
contentStr.append(reinterpret_cast<const char *>(buf->getBuffer()),
|
|
572
|
+
buf->getSize());
|
|
573
|
+
contentStr += '\n';
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
// scan for /FontName references — Tf operator uses font name
|
|
579
|
+
// pattern: /FontName <size> Tf
|
|
580
|
+
for (auto &fontKey : fonts.getKeys()) {
|
|
581
|
+
// fontKey includes the leading '/', e.g. "/F1"
|
|
582
|
+
if (contentStr.find(fontKey) != std::string::npos)
|
|
583
|
+
usedFonts.insert(fontKey);
|
|
584
|
+
}
|
|
585
|
+
} catch (...) {
|
|
586
|
+
continue; // skip this page if content can't be read
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
// remove fonts that are not referenced in the content stream
|
|
590
|
+
auto allFontKeys = fonts.getKeys();
|
|
591
|
+
for (auto &fontKey : allFontKeys) {
|
|
592
|
+
if (usedFonts.find(fontKey) == usedFonts.end())
|
|
593
|
+
fonts.removeKey(fontKey);
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
}
|
package/src/images.h
CHANGED
|
@@ -3,8 +3,10 @@
|
|
|
3
3
|
#include <qpdf/QPDF.hh>
|
|
4
4
|
#include <qpdf/QPDFObjectHandle.hh>
|
|
5
5
|
#include <qpdf/QPDFPageDocumentHelper.hh>
|
|
6
|
+
#include <qpdf/QPDFPageObjectHelper.hh>
|
|
6
7
|
|
|
7
|
-
// iterates all image XObjects across all pages
|
|
8
|
+
// iterates all image XObjects across all pages, providing the page helper for
|
|
9
|
+
// context (e.g. MediaBox for DPI calculations)
|
|
8
10
|
template <typename Fn> void forEachImage(QPDF &qpdf, Fn &&fn) {
|
|
9
11
|
for (auto &page : QPDFPageDocumentHelper(qpdf).getAllPages()) {
|
|
10
12
|
auto resources = page.getObjectHandle().getKey("/Resources");
|
|
@@ -21,11 +23,20 @@ template <typename Fn> void forEachImage(QPDF &qpdf, Fn &&fn) {
|
|
|
21
23
|
if (!dict.getKey("/Subtype").isName() ||
|
|
22
24
|
dict.getKey("/Subtype").getName() != "/Image")
|
|
23
25
|
continue;
|
|
24
|
-
fn(key, xobj, xobjects);
|
|
26
|
+
fn(key, xobj, xobjects, page);
|
|
25
27
|
}
|
|
26
28
|
}
|
|
27
29
|
}
|
|
28
30
|
|
|
29
|
-
|
|
31
|
+
struct CompressOptions {
|
|
32
|
+
int quality = 0; // 0 = auto (per-image quality, capped at 85)
|
|
33
|
+
int maxDpi = 0; // 0 = no downscaling
|
|
34
|
+
bool stripMetadata = false;
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
void optimizeImages(QPDF &qpdf, const CompressOptions &opts);
|
|
38
|
+
void downscaleImages(QPDF &qpdf, int maxDpi);
|
|
30
39
|
void deduplicateImages(QPDF &qpdf);
|
|
31
40
|
void optimizeExistingJpegs(QPDF &qpdf);
|
|
41
|
+
void stripMetadata(QPDF &qpdf);
|
|
42
|
+
void removeUnusedFonts(QPDF &qpdf);
|
package/src/jpeg.cc
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#include "jpeg.h"
|
|
2
2
|
|
|
3
3
|
#include <cstdlib>
|
|
4
|
+
#include <limits>
|
|
4
5
|
|
|
5
6
|
void jpegErrorExit(j_common_ptr cinfo) {
|
|
6
7
|
auto *myerr = reinterpret_cast<JpegErrorMgr *>(cinfo->err);
|
|
@@ -142,3 +143,85 @@ bool encodeJpeg(const unsigned char *pixels, int width, int height,
|
|
|
142
143
|
free(outbuf);
|
|
143
144
|
return ok;
|
|
144
145
|
}
|
|
146
|
+
|
|
147
|
+
// ---------------------------------------------------------------------------
|
|
148
|
+
// JPEG quality estimation from quantization tables
|
|
149
|
+
// ---------------------------------------------------------------------------
|
|
150
|
+
|
|
151
|
+
// standard IJG luminance quantization table (quality 50 baseline)
|
|
152
|
+
static const unsigned int std_luminance_qt[64] = {
|
|
153
|
+
16, 11, 10, 16, 24, 40, 51, 61, 12, 12, 14, 19, 26, 58, 60, 55,
|
|
154
|
+
14, 13, 16, 24, 40, 57, 69, 56, 14, 17, 22, 29, 51, 87, 80, 62,
|
|
155
|
+
18, 22, 37, 56, 68, 109, 103, 77, 24, 35, 55, 64, 81, 104, 113, 92,
|
|
156
|
+
49, 64, 78, 87, 103, 121, 120, 101, 72, 92, 95, 98, 112, 100, 103, 99};
|
|
157
|
+
|
|
158
|
+
// isolated setjmp scope for reading JPEG header
|
|
159
|
+
static int estimateJpegQualityImpl(const unsigned char *data, size_t size) {
|
|
160
|
+
struct jpeg_decompress_struct cinfo = {};
|
|
161
|
+
JpegErrorMgr jerr = {};
|
|
162
|
+
|
|
163
|
+
cinfo.err = jpeg_std_error(&jerr.pub);
|
|
164
|
+
jerr.pub.error_exit = jpegErrorExit;
|
|
165
|
+
|
|
166
|
+
if (setjmp(jerr.jmpbuf)) {
|
|
167
|
+
jpeg_destroy_decompress(&cinfo);
|
|
168
|
+
return -1;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
jpeg_create_decompress(&cinfo);
|
|
172
|
+
jpeg_mem_src(&cinfo, data, static_cast<unsigned long>(size));
|
|
173
|
+
|
|
174
|
+
if (jpeg_read_header(&cinfo, TRUE) != JPEG_HEADER_OK) {
|
|
175
|
+
jpeg_destroy_decompress(&cinfo);
|
|
176
|
+
return -1;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// need at least the luminance table (slot 0)
|
|
180
|
+
if (!cinfo.quant_tbl_ptrs[0]) {
|
|
181
|
+
jpeg_destroy_decompress(&cinfo);
|
|
182
|
+
return -1;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// reverse-engineer the IJG quality from the luminance table.
|
|
186
|
+
// for each quality q, IJG computes: scale = (q < 50) ? 5000/q : 200-2*q
|
|
187
|
+
// then each table value = clamp(floor((base * scale + 50) / 100), 1, 255)
|
|
188
|
+
// we find q that minimizes the sum of absolute differences.
|
|
189
|
+
JQUANT_TBL *tbl = cinfo.quant_tbl_ptrs[0];
|
|
190
|
+
int bestQ = -1;
|
|
191
|
+
long bestError = std::numeric_limits<long>::max();
|
|
192
|
+
|
|
193
|
+
for (int q = 1; q <= 100; q++) {
|
|
194
|
+
long scale = (q < 50) ? 5000L / q : 200L - 2L * q;
|
|
195
|
+
long error = 0;
|
|
196
|
+
for (int i = 0; i < 64; i++) {
|
|
197
|
+
long expected =
|
|
198
|
+
(static_cast<long>(std_luminance_qt[i]) * scale + 50L) / 100L;
|
|
199
|
+
if (expected < 1)
|
|
200
|
+
expected = 1;
|
|
201
|
+
if (expected > 255)
|
|
202
|
+
expected = 255;
|
|
203
|
+
long diff = static_cast<long>(tbl->quantval[i]) - expected;
|
|
204
|
+
error += (diff < 0) ? -diff : diff;
|
|
205
|
+
}
|
|
206
|
+
if (error < bestError) {
|
|
207
|
+
bestError = error;
|
|
208
|
+
bestQ = q;
|
|
209
|
+
}
|
|
210
|
+
// perfect match — stop early
|
|
211
|
+
if (error == 0)
|
|
212
|
+
break;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
jpeg_destroy_decompress(&cinfo);
|
|
216
|
+
|
|
217
|
+
// if the best match is poor (avg > 2 per coefficient), the tables are
|
|
218
|
+
// non-standard — return -1 to signal we can't reliably estimate
|
|
219
|
+
if (bestError > 128)
|
|
220
|
+
return -1;
|
|
221
|
+
|
|
222
|
+
return bestQ;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
int estimateJpegQuality(const unsigned char *data, size_t size) {
|
|
226
|
+
return estimateJpegQualityImpl(data, size);
|
|
227
|
+
}
|
package/src/jpeg.h
CHANGED
|
@@ -24,3 +24,7 @@ bool losslessJpegOptimize(const unsigned char *data, size_t size,
|
|
|
24
24
|
// encodes raw pixels as JPEG at the given quality (1–100) via libjpeg-turbo
|
|
25
25
|
bool encodeJpeg(const unsigned char *pixels, int width, int height,
|
|
26
26
|
int components, int quality, std::vector<uint8_t> &out);
|
|
27
|
+
|
|
28
|
+
// estimates the IJG quality factor (1–100) from a JPEG's quantization tables.
|
|
29
|
+
// returns -1 if the quality cannot be determined (corrupt, non-standard tables)
|
|
30
|
+
int estimateJpegQuality(const unsigned char *data, size_t size);
|
package/src/qpdf_addon.cc
CHANGED
|
@@ -51,17 +51,20 @@ class CompressWorker : public Napi::AsyncWorker {
|
|
|
51
51
|
public:
|
|
52
52
|
// buffer variant
|
|
53
53
|
CompressWorker(Napi::Env env, std::vector<uint8_t> data, bool lossy,
|
|
54
|
-
int quality,
|
|
54
|
+
int quality, int maxDpi, bool stripMeta,
|
|
55
|
+
std::string outputPath)
|
|
55
56
|
: Napi::AsyncWorker(env), deferred_(Napi::Promise::Deferred::New(env)),
|
|
56
57
|
bufferData_(std::move(data)), lossy_(lossy), quality_(quality),
|
|
57
|
-
|
|
58
|
+
maxDpi_(maxDpi), stripMeta_(stripMeta), useFile_(false),
|
|
59
|
+
outputPath_(std::move(outputPath)) {}
|
|
58
60
|
|
|
59
61
|
// file path variant
|
|
60
62
|
CompressWorker(Napi::Env env, std::string path, bool lossy, int quality,
|
|
61
|
-
std::string outputPath)
|
|
63
|
+
int maxDpi, bool stripMeta, std::string outputPath)
|
|
62
64
|
: Napi::AsyncWorker(env), deferred_(Napi::Promise::Deferred::New(env)),
|
|
63
65
|
filePath_(std::move(path)), lossy_(lossy), quality_(quality),
|
|
64
|
-
|
|
66
|
+
maxDpi_(maxDpi), stripMeta_(stripMeta), useFile_(true),
|
|
67
|
+
outputPath_(std::move(outputPath)) {}
|
|
65
68
|
|
|
66
69
|
Napi::Promise Promise() { return deferred_.Promise(); }
|
|
67
70
|
|
|
@@ -90,9 +93,17 @@ protected:
|
|
|
90
93
|
}
|
|
91
94
|
|
|
92
95
|
deduplicateImages(qpdf);
|
|
93
|
-
if (lossy_)
|
|
94
|
-
|
|
96
|
+
if (lossy_) {
|
|
97
|
+
CompressOptions opts;
|
|
98
|
+
opts.quality = quality_;
|
|
99
|
+
optimizeImages(qpdf, opts);
|
|
100
|
+
}
|
|
101
|
+
if (maxDpi_ > 0)
|
|
102
|
+
downscaleImages(qpdf, maxDpi_);
|
|
95
103
|
optimizeExistingJpegs(qpdf);
|
|
104
|
+
removeUnusedFonts(qpdf);
|
|
105
|
+
if (stripMeta_)
|
|
106
|
+
stripMetadata(qpdf);
|
|
96
107
|
|
|
97
108
|
Pl_Flate::setCompressionLevel(9);
|
|
98
109
|
|
|
@@ -143,6 +154,8 @@ private:
|
|
|
143
154
|
std::string filePath_;
|
|
144
155
|
bool lossy_;
|
|
145
156
|
int quality_;
|
|
157
|
+
int maxDpi_;
|
|
158
|
+
bool stripMeta_;
|
|
146
159
|
bool useFile_;
|
|
147
160
|
std::string outputPath_;
|
|
148
161
|
std::vector<uint8_t> result_;
|
|
@@ -162,7 +175,9 @@ static Napi::Value Compress(const Napi::CallbackInfo &info) {
|
|
|
162
175
|
}
|
|
163
176
|
|
|
164
177
|
bool lossy = false;
|
|
165
|
-
int quality =
|
|
178
|
+
int quality = 0;
|
|
179
|
+
int maxDpi = 0;
|
|
180
|
+
bool stripMeta = false;
|
|
166
181
|
std::string outputPath;
|
|
167
182
|
|
|
168
183
|
if (info.Length() >= 2 && info[1].IsObject()) {
|
|
@@ -180,12 +195,21 @@ static Napi::Value Compress(const Napi::CallbackInfo &info) {
|
|
|
180
195
|
|
|
181
196
|
if (options.Has("quality")) {
|
|
182
197
|
quality = options.Get("quality").As<Napi::Number>().Int32Value();
|
|
183
|
-
if (quality <
|
|
184
|
-
quality =
|
|
198
|
+
if (quality < 0)
|
|
199
|
+
quality = 0;
|
|
185
200
|
if (quality > 100)
|
|
186
201
|
quality = 100;
|
|
187
202
|
}
|
|
188
203
|
|
|
204
|
+
if (options.Has("maxDpi")) {
|
|
205
|
+
maxDpi = options.Get("maxDpi").As<Napi::Number>().Int32Value();
|
|
206
|
+
if (maxDpi < 0)
|
|
207
|
+
maxDpi = 0;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (options.Has("stripMetadata"))
|
|
211
|
+
stripMeta = options.Get("stripMetadata").As<Napi::Boolean>().Value();
|
|
212
|
+
|
|
189
213
|
if (options.Has("output"))
|
|
190
214
|
outputPath = options.Get("output").As<Napi::String>().Utf8Value();
|
|
191
215
|
}
|
|
@@ -194,7 +218,7 @@ static Napi::Value Compress(const Napi::CallbackInfo &info) {
|
|
|
194
218
|
auto buf = info[0].As<Napi::Buffer<uint8_t>>();
|
|
195
219
|
std::vector<uint8_t> data(buf.Data(), buf.Data() + buf.Length());
|
|
196
220
|
auto *worker = new CompressWorker(env, std::move(data), lossy, quality,
|
|
197
|
-
std::move(outputPath));
|
|
221
|
+
maxDpi, stripMeta, std::move(outputPath));
|
|
198
222
|
worker->Queue();
|
|
199
223
|
return worker->Promise();
|
|
200
224
|
}
|
|
@@ -202,7 +226,7 @@ static Napi::Value Compress(const Napi::CallbackInfo &info) {
|
|
|
202
226
|
if (info[0].IsString()) {
|
|
203
227
|
auto path = info[0].As<Napi::String>().Utf8Value();
|
|
204
228
|
auto *worker = new CompressWorker(env, std::move(path), lossy, quality,
|
|
205
|
-
std::move(outputPath));
|
|
229
|
+
maxDpi, stripMeta, std::move(outputPath));
|
|
206
230
|
worker->Queue();
|
|
207
231
|
return worker->Promise();
|
|
208
232
|
}
|