qpdf-compress 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -17,8 +17,11 @@ import { compress } from 'qpdf-compress';
17
17
  // lossless — optimize without touching image quality
18
18
  const optimized = await compress(pdfBuffer, { mode: 'lossless' });
19
19
 
20
- // lossy — recompress images as JPEG for maximum savings
21
- const smaller = await compress(pdfBuffer, { mode: 'lossy', quality: 50 });
20
+ // lossy — auto quality, downscale to 75 DPI, strip metadata
21
+ const smaller = await compress(pdfBuffer, { mode: 'lossy' });
22
+
23
+ // lossy with explicit quality
24
+ const tiny = await compress(pdfBuffer, { mode: 'lossy', quality: 50 });
22
25
  ```
23
26
 
24
27
  ## 💡 Why qpdf-compress?
@@ -28,6 +31,7 @@ const smaller = await compress(pdfBuffer, { mode: 'lossy', quality: 50 });
28
31
  - Native C++ — no WASM overhead, no shell-out to CLI tools
29
32
  - Non-blocking — all operations run off the main thread via N-API AsyncWorker
30
33
  - Multi-pass optimization — image dedup, JPEG Huffman optimization, Flate level 9
34
+ - Smart defaults — DPI downscaling, metadata stripping, adaptive JPEG quality
31
35
 
32
36
  **🛠️ Developer experience**
33
37
 
@@ -52,16 +56,20 @@ const smaller = await compress(pdfBuffer, { mode: 'lossy', quality: 50 });
52
56
 
53
57
  ### 📊 How it compares
54
58
 
55
- | | **qpdf-compress** | qpdf CLI | Ghostscript |
56
- | ------------------------- | ----------------------- | ----------------- | ----------------- |
57
- | Integration | Native Node.js addon | Shell exec | Shell exec |
58
- | Async I/O | ✅ Non-blocking | ❌ Blocks on exec | ❌ Blocks on exec |
59
- | Image deduplication | ✅ | ❌ | ❌ |
60
- | JPEG Huffman optimization | ✅ Lossless (libjpeg) | ❌ | ❌ |
61
- | Lossy image compression | ✅ Configurable quality | ❌ | ✅ |
62
- | PDF repair | ✅ Automatic | ✅ Manual flag | ⚠️ Partial |
63
- | License | Apache-2.0 | Apache-2.0 | AGPL-3.0 ⚠️ |
64
- | Dependencies | None¹ | System binary | System binary |
59
+ | | **qpdf-compress** | qpdf CLI | Ghostscript |
60
+ | ------------------------- | ------------------------ | ----------------- | ----------------- |
61
+ | Integration | Native Node.js addon | Shell exec | Shell exec |
62
+ | Async I/O | ✅ Non-blocking | ❌ Blocks on exec | ❌ Blocks on exec |
63
+ | Image deduplication | ✅ | ❌ | ❌ |
64
+ | JPEG Huffman optimization | ✅ Lossless (libjpeg) | ❌ | ❌ |
65
+ | Lossy image compression | ✅ Auto or fixed quality | ❌ | ✅ |
66
+ | CMYK → RGB conversion | ✅ Automatic | | |
67
+ | DPI downscaling | ✅ Configurable | | |
68
+ | Metadata stripping | Default on | Manual flag | ✅ |
69
+ | Unused font removal | ✅ Automatic | ❌ | ❌ |
70
+ | PDF repair | ✅ Automatic | ✅ Manual flag | ⚠️ Partial |
71
+ | License | Apache-2.0 | Apache-2.0 | AGPL-3.0 ⚠️ |
72
+ | Dependencies | None¹ | System binary | System binary |
65
73
 
66
74
  ¹ QPDF is statically linked — no runtime dependencies. Prebuilt binaries downloaded at install.
67
75
 
@@ -111,12 +119,19 @@ import { compress } from 'qpdf-compress';
111
119
  // lossless — optimize streams without touching image quality
112
120
  const optimized = await compress(pdfBuffer, { mode: 'lossless' });
113
121
 
114
- // lossy — recompress images as JPEG (default quality: 75)
122
+ // lossy — auto quality per image (skips JPEGs ≤ q90, encodes rest at q85)
115
123
  const smaller = await compress(pdfBuffer, { mode: 'lossy' });
116
124
 
117
- // lossy with custom quality (1–100)
125
+ // lossy with explicit quality (1–100)
118
126
  const tiny = await compress(pdfBuffer, { mode: 'lossy', quality: 50 });
119
127
 
128
+ // control DPI downscaling (default: 75, 0 = disabled)
129
+ const highRes = await compress(pdfBuffer, { mode: 'lossless', maxDpi: 150 });
130
+ const noDpi = await compress(pdfBuffer, { mode: 'lossless', maxDpi: 0 });
131
+
132
+ // keep metadata (stripped by default)
133
+ const withMeta = await compress(pdfBuffer, { mode: 'lossless', stripMetadata: false });
134
+
120
135
  // file path input (avoids copying into memory twice)
121
136
  const result = await compress('/path/to/file.pdf', { mode: 'lossless' });
122
137
 
@@ -135,27 +150,35 @@ const fixed = await compress(damagedBuffer, { mode: 'lossless' });
135
150
 
136
151
  Compresses a PDF document. Automatically repairs damaged PDFs.
137
152
 
138
- | Parameter | Type | Description |
139
- | ----------------- | ----------------------- | -------------------------------------------------- |
140
- | `input` | `Buffer \| string` | PDF data or file path |
141
- | `options.mode` | `'lossy' \| 'lossless'` | Compression mode |
142
- | `options.quality` | `number` | JPEG quality 1–100 (lossy only, default: 75) |
143
- | `options.output` | `string` | Write to file path instead of returning a `Buffer` |
153
+ | Parameter | Type | Description |
154
+ | ----------------------- | ----------------------- | -------------------------------------------------------------------- |
155
+ | `input` | `Buffer \| string` | PDF data or file path |
156
+ | `options.mode` | `'lossy' \| 'lossless'` | Compression mode |
157
+ | `options.quality` | `number` | JPEG quality 1–100 (lossy only). Omit for auto quality (recommended) |
158
+ | `options.maxDpi` | `number` | Downscale images exceeding this DPI. Default: `75`. `0` = disabled |
159
+ | `options.stripMetadata` | `boolean` | Remove XMP metadata, document info, and thumbnails. Default: `true` |
160
+ | `options.output` | `string` | Write to file path instead of returning a `Buffer` |
144
161
 
145
- **Lossless mode:**
162
+ **Both modes:**
146
163
 
147
164
  - Deduplicates identical images across pages
148
165
  - Optimizes embedded JPEG Huffman tables (2–15% savings, zero quality loss)
149
166
  - Recompresses all decodable streams with Flate level 9
150
167
  - Generates object streams for smaller metadata overhead
151
- - Removes unreferenced objects
168
+ - Removes unreferenced objects and unused fonts
169
+ - Downscales images exceeding `maxDpi` (default: 75 DPI)
170
+ - Strips XMP metadata, document info, and thumbnails (default: on)
171
+ - Converts CMYK and ICCBased color spaces to RGB
172
+ - Automatically repairs damaged PDFs
152
173
 
153
- **Lossy mode** (in addition to lossless optimizations):
174
+ **Lossy mode** (in addition to the above):
154
175
 
155
- - Extracts 8-bit RGB and grayscale images
156
- - Recompresses as JPEG at the specified quality
176
+ - Extracts 8-bit RGB, grayscale, and CMYK images
177
+ - **Auto quality** (default): skips existing JPEGs at q ≤ 90, encodes the rest at q85
178
+ - **Explicit quality**: recompresses all images at the specified quality (1–100)
157
179
  - Only replaces images where JPEG is actually smaller
158
- - Skips tiny images (< 50×50 px), CMYK, and indexed color
180
+ - Skips re-encoding when estimated quality is already at or below target
181
+ - Skips tiny images (< 50×50 px)
159
182
 
160
183
  ## ⚙️ How it works
161
184
 
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../lib/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,eAAe,EAAe,MAAM,YAAY,CAAC;AAY/D,KAAK,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAC;AAEhC;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CACtB,KAAK,EAAE,QAAQ,EACf,OAAO,EAAE,eAAe,GAAG;IAAE,MAAM,EAAE,MAAM,CAAA;CAAE,GAC5C,OAAO,CAAC,IAAI,CAAC,CAAC;AACjB,wBAAgB,QAAQ,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;AA4BrF,YAAY,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../lib/index.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,eAAe,EAAe,MAAM,YAAY,CAAC;AAY/D,KAAK,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAC;AAEhC;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CACtB,KAAK,EAAE,QAAQ,EACf,OAAO,EAAE,eAAe,GAAG;IAAE,MAAM,EAAE,MAAM,CAAA;CAAE,GAC5C,OAAO,CAAC,IAAI,CAAC,CAAC;AACjB,wBAAgB,QAAQ,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;AAgCrF,YAAY,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC"}
package/dist/index.js CHANGED
@@ -26,13 +26,17 @@ export async function compress(input, options) {
26
26
  if (mode !== 'lossy' && mode !== 'lossless') {
27
27
  throw new TypeError("Mode must be 'lossy' or 'lossless'");
28
28
  }
29
- const quality = options.quality ?? 75;
30
- if (quality < 1 || quality > 100) {
29
+ const quality = options.quality ?? 0;
30
+ if (quality !== 0 && (quality < 1 || quality > 100)) {
31
31
  throw new RangeError('Quality must be between 1 and 100');
32
32
  }
33
+ const maxDpi = options.maxDpi ?? 75;
34
+ const stripMetadata = options.stripMetadata ?? true;
33
35
  return addon.compress(input, {
34
36
  mode,
35
37
  quality,
38
+ ...(maxDpi > 0 ? { maxDpi } : {}),
39
+ ...(stripMetadata ? { stripMetadata: true } : {}),
36
40
  ...(options.output ? { output: options.output } : {}),
37
41
  });
38
42
  }
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../lib/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAGzC,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE1D,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;AAC9D,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,EAAE,CAAC;AAC7D,CAAC;AAED,MAAM,KAAK,GAAgB,OAAO,CAAC,qCAAqC,CAAC,CAAC;AAmB1E,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,KAAe,EAAE,OAAwB;IACtE,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,8BAA8B,CAAC,CAAC;QACtD,CAAC;IACH,CAAC;SAAM,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QACrC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,4BAA4B,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,SAAS,CAAC,4CAA4C,CAAC,CAAC;IACpE,CAAC;IACD,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAC1B,IAAI,IAAI,KAAK,OAAO,IAAI,IAAI,KAAK,UAAU,EAAE,CAAC;QAC5C,MAAM,IAAI,SAAS,CAAC,oCAAoC,CAAC,CAAC;IAC5D,CAAC;IACD,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;IACtC,IAAI,OAAO,GAAG,CAAC,IAAI,OAAO,GAAG,GAAG,EAAE,CAAC;QACjC,MAAM,IAAI,UAAU,CAAC,mCAAmC,CAAC,CAAC;IAC5D,CAAC;IACD,OAAO,KAAK,CAAC,QAAQ,CAAC,KAAK,EAAE;QAC3B,IAAI;QACJ,OAAO;QACP,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACtD,CAA2B,CAAC;AAC/B,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../lib/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAGzC,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE1D,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;AAC9D,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;IACjC,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,EAAE,CAAC;AAC7D,CAAC;AAED,MAAM,KAAK,GAAgB,OAAO,CAAC,qCAAqC,CAAC,CAAC;AAmB1E,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,KAAe,EAAE,OAAwB;IACtE,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,8BAA8B,CAAC,CAAC;QACtD,CAAC;IACH,CAAC;SAAM,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QACrC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,SAAS,CAAC,4BAA4B,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,SAAS,CAAC,4CAA4C,CAAC,CAAC;IACpE,CAAC;IACD,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAC1B,IAAI,IAAI,KAAK,OAAO,IAAI,IAAI,KAAK,UAAU,EAAE,CAAC;QAC5C,MAAM,IAAI,SAAS,CAAC,oCAAoC,CAAC,CAAC;IAC5D,CAAC;IACD,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC;IACrC,IAAI,OAAO,KAAK,CAAC,IAAI,CAAC,OAAO,GAAG,CAAC,IAAI,OAAO,GAAG,GAAG,CAAC,EAAE,CAAC;QACpD,MAAM,IAAI,UAAU,CAAC,mCAAmC,CAAC,CAAC;IAC5D,CAAC;IACD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,EAAE,CAAC;IACpC,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,IAAI,CAAC;IACpD,OAAO,KAAK,CAAC,QAAQ,CAAC,KAAK,EAAE;QAC3B,IAAI;QACJ,OAAO;QACP,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACjC,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACjD,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACtD,CAA2B,CAAC;AAC/B,CAAC"}
package/dist/types.d.ts CHANGED
@@ -1,8 +1,12 @@
1
1
  export interface CompressOptions {
2
2
  /** Compression mode. */
3
3
  readonly mode: 'lossy' | 'lossless';
4
- /** JPEG quality for lossy mode (1–100). Default: 75. */
4
+ /** JPEG quality for lossy mode (1–100). When omitted, automatically determines optimal quality per image (capped at 85). */
5
5
  readonly quality?: number;
6
+ /** Maximum image DPI. Images exceeding this are downscaled. 0 = no limit. Default: 75. */
7
+ readonly maxDpi?: number;
8
+ /** Remove XMP metadata, document info, and thumbnails. Default: true. */
9
+ readonly stripMetadata?: boolean;
6
10
  /** Write to this file path instead of returning a Buffer. */
7
11
  readonly output?: string;
8
12
  }
@@ -10,6 +14,8 @@ export interface NativeAddon {
10
14
  compress(input: Buffer | string, options: {
11
15
  mode: 'lossy' | 'lossless';
12
16
  quality: number;
17
+ maxDpi?: number;
18
+ stripMetadata?: boolean;
13
19
  output?: string;
14
20
  }): Promise<Buffer | undefined>;
15
21
  }
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../lib/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,eAAe;IAC9B,wBAAwB;IACxB,QAAQ,CAAC,IAAI,EAAE,OAAO,GAAG,UAAU,CAAC;IACpC,wDAAwD;IACxD,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAC1B,6DAA6D;IAC7D,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,WAAW;IAC1B,QAAQ,CACN,KAAK,EAAE,MAAM,GAAG,MAAM,EACtB,OAAO,EAAE;QAAE,IAAI,EAAE,OAAO,GAAG,UAAU,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,GACxE,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;CAChC"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../lib/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,eAAe;IAC9B,wBAAwB;IACxB,QAAQ,CAAC,IAAI,EAAE,OAAO,GAAG,UAAU,CAAC;IACpC,4HAA4H;IAC5H,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAC1B,0FAA0F;IAC1F,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,yEAAyE;IACzE,QAAQ,CAAC,aAAa,CAAC,EAAE,OAAO,CAAC;IACjC,6DAA6D;IAC7D,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,WAAW;IAC1B,QAAQ,CACN,KAAK,EAAE,MAAM,GAAG,MAAM,EACtB,OAAO,EAAE;QACP,IAAI,EAAE,OAAO,GAAG,UAAU,CAAC;QAC3B,OAAO,EAAE,MAAM,CAAC;QAChB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,aAAa,CAAC,EAAE,OAAO,CAAC;QACxB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,GACA,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;CAChC"}
package/lib/index.ts CHANGED
@@ -46,13 +46,17 @@ export async function compress(input: PdfInput, options: CompressOptions): Promi
46
46
  if (mode !== 'lossy' && mode !== 'lossless') {
47
47
  throw new TypeError("Mode must be 'lossy' or 'lossless'");
48
48
  }
49
- const quality = options.quality ?? 75;
50
- if (quality < 1 || quality > 100) {
49
+ const quality = options.quality ?? 0;
50
+ if (quality !== 0 && (quality < 1 || quality > 100)) {
51
51
  throw new RangeError('Quality must be between 1 and 100');
52
52
  }
53
+ const maxDpi = options.maxDpi ?? 75;
54
+ const stripMetadata = options.stripMetadata ?? true;
53
55
  return addon.compress(input, {
54
56
  mode,
55
57
  quality,
58
+ ...(maxDpi > 0 ? { maxDpi } : {}),
59
+ ...(stripMetadata ? { stripMetadata: true } : {}),
56
60
  ...(options.output ? { output: options.output } : {}),
57
61
  }) as Promise<Buffer | void>;
58
62
  }
package/lib/types.ts CHANGED
@@ -1,8 +1,12 @@
1
1
  export interface CompressOptions {
2
2
  /** Compression mode. */
3
3
  readonly mode: 'lossy' | 'lossless';
4
- /** JPEG quality for lossy mode (1–100). Default: 75. */
4
+ /** JPEG quality for lossy mode (1–100). When omitted, automatically determines optimal quality per image (capped at 85). */
5
5
  readonly quality?: number;
6
+ /** Maximum image DPI. Images exceeding this are downscaled. 0 = no limit. Default: 75. */
7
+ readonly maxDpi?: number;
8
+ /** Remove XMP metadata, document info, and thumbnails. Default: true. */
9
+ readonly stripMetadata?: boolean;
6
10
  /** Write to this file path instead of returning a Buffer. */
7
11
  readonly output?: string;
8
12
  }
@@ -10,6 +14,12 @@ export interface CompressOptions {
10
14
  export interface NativeAddon {
11
15
  compress(
12
16
  input: Buffer | string,
13
- options: { mode: 'lossy' | 'lossless'; quality: number; output?: string },
17
+ options: {
18
+ mode: 'lossy' | 'lossless';
19
+ quality: number; // 0 = auto, 1–100 = fixed
20
+ maxDpi?: number;
21
+ stripMetadata?: boolean;
22
+ output?: string;
23
+ },
14
24
  ): Promise<Buffer | undefined>;
15
25
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "qpdf-compress",
3
- "version": "0.1.3",
3
+ "version": "0.2.0",
4
4
  "description": "Native PDF compression for Node.js, powered by QPDF",
5
5
  "license": "Apache-2.0",
6
6
  "type": "module",
package/src/images.cc CHANGED
@@ -1,6 +1,7 @@
1
1
  #include "images.h"
2
2
  #include "jpeg.h"
3
3
 
4
+ #include <algorithm>
4
5
  #include <cstdint>
5
6
  #include <cstring>
6
7
  #include <limits>
@@ -10,95 +11,243 @@
10
11
 
11
12
  #include <qpdf/Buffer.hh>
12
13
 
14
+ // ---------------------------------------------------------------------------
15
+ // Color space helpers
16
+ // ---------------------------------------------------------------------------
17
+
18
+ // resolves /ColorSpace to component count and whether it's CMYK.
19
+ // returns 0 on unsupported color spaces.
20
+ static int resolveColorSpace(QPDFObjectHandle cs, bool &isCMYK) {
21
+ isCMYK = false;
22
+
23
+ if (cs.isName()) {
24
+ auto name = cs.getName();
25
+ if (name == "/DeviceRGB")
26
+ return 3;
27
+ if (name == "/DeviceGray")
28
+ return 1;
29
+ if (name == "/DeviceCMYK") {
30
+ isCMYK = true;
31
+ return 4;
32
+ }
33
+ return 0;
34
+ }
35
+
36
+ // ICCBased: [/ICCBased <stream>] — get /N from the ICC profile stream dict
37
+ if (cs.isArray() && cs.getArrayNItems() >= 2) {
38
+ auto csName = cs.getArrayItem(0);
39
+ if (csName.isName() && csName.getName() == "/ICCBased") {
40
+ auto profile = cs.getArrayItem(1);
41
+ if (profile.isStream()) {
42
+ auto n = profile.getDict().getKey("/N");
43
+ if (n.isInteger()) {
44
+ int components = static_cast<int>(n.getIntValue());
45
+ if (components == 4)
46
+ isCMYK = true;
47
+ if (components == 1 || components == 3 || components == 4)
48
+ return components;
49
+ }
50
+ }
51
+ }
52
+ }
53
+
54
+ return 0;
55
+ }
56
+
57
+ // naive CMYK → RGB conversion (without ICC profile).
58
+ // uses the standard formula: R = 255 * (1-C) * (1-K), etc.
59
+ static void cmykToRgb(const unsigned char *cmyk, unsigned char *rgb,
60
+ size_t pixelCount) {
61
+ for (size_t i = 0; i < pixelCount; ++i) {
62
+ double c = cmyk[i * 4 + 0] / 255.0;
63
+ double m = cmyk[i * 4 + 1] / 255.0;
64
+ double y = cmyk[i * 4 + 2] / 255.0;
65
+ double k = cmyk[i * 4 + 3] / 255.0;
66
+ rgb[i * 3 + 0] =
67
+ static_cast<unsigned char>(255.0 * (1.0 - c) * (1.0 - k) + 0.5);
68
+ rgb[i * 3 + 1] =
69
+ static_cast<unsigned char>(255.0 * (1.0 - m) * (1.0 - k) + 0.5);
70
+ rgb[i * 3 + 2] =
71
+ static_cast<unsigned char>(255.0 * (1.0 - y) * (1.0 - k) + 0.5);
72
+ }
73
+ }
74
+
75
+ // ---------------------------------------------------------------------------
76
+ // Bilinear downscaling
77
+ // ---------------------------------------------------------------------------
78
+
79
+ static std::vector<uint8_t> bilinearDownscale(const unsigned char *src,
80
+ int srcW, int srcH,
81
+ int components, int dstW,
82
+ int dstH) {
83
+ std::vector<uint8_t> dst(static_cast<size_t>(dstW) * dstH * components);
84
+
85
+ double xRatio = static_cast<double>(srcW) / dstW;
86
+ double yRatio = static_cast<double>(srcH) / dstH;
87
+
88
+ for (int y = 0; y < dstH; ++y) {
89
+ double srcY = y * yRatio;
90
+ int y0 = static_cast<int>(srcY);
91
+ int y1 = std::min(y0 + 1, srcH - 1);
92
+ double fy = srcY - y0;
93
+
94
+ for (int x = 0; x < dstW; ++x) {
95
+ double srcX = x * xRatio;
96
+ int x0 = static_cast<int>(srcX);
97
+ int x1 = std::min(x0 + 1, srcW - 1);
98
+ double fx = srcX - x0;
99
+
100
+ for (int c = 0; c < components; ++c) {
101
+ double v00 = src[(y0 * srcW + x0) * components + c];
102
+ double v10 = src[(y0 * srcW + x1) * components + c];
103
+ double v01 = src[(y1 * srcW + x0) * components + c];
104
+ double v11 = src[(y1 * srcW + x1) * components + c];
105
+
106
+ double val = v00 * (1 - fx) * (1 - fy) + v10 * fx * (1 - fy) +
107
+ v01 * (1 - fx) * fy + v11 * fx * fy;
108
+ dst[(y * dstW + x) * components + c] =
109
+ static_cast<uint8_t>(std::min(std::max(val + 0.5, 0.0), 255.0));
110
+ }
111
+ }
112
+ }
113
+
114
+ return dst;
115
+ }
116
+
13
117
  // ---------------------------------------------------------------------------
14
118
  // Image recompression for lossy mode
15
119
  // ---------------------------------------------------------------------------
16
120
 
17
- void optimizeImages(QPDF &qpdf, int quality) {
18
- forEachImage(
19
- qpdf, [&](const std::string &, QPDFObjectHandle xobj, QPDFObjectHandle) {
20
- auto dict = xobj.getDict();
121
+ // auto-mode thresholds only re-encode existing JPEGs whose estimated
122
+ // quality exceeds kAutoSkipThreshold (avoids pointless re-encoding where
123
+ // generation loss outweighs size savings). Non-JPEG images and high-quality
124
+ // JPEGs are (re-)encoded at kAutoTargetQuality.
125
+ static constexpr int kAutoSkipThreshold = 90;
126
+ static constexpr int kAutoTargetQuality = 85;
127
+
128
+ void optimizeImages(QPDF &qpdf, const CompressOptions &opts) {
129
+ const bool autoQuality = (opts.quality == 0);
130
+ forEachImage(qpdf, [&](const std::string &, QPDFObjectHandle xobj,
131
+ QPDFObjectHandle, QPDFPageObjectHelper &) {
132
+ auto dict = xobj.getDict();
133
+
134
+ // only handle 8-bit images
135
+ if (!dict.getKey("/BitsPerComponent").isInteger() ||
136
+ dict.getKey("/BitsPerComponent").getIntValue() != 8)
137
+ return;
21
138
 
22
- // only handle 8-bit images
23
- if (!dict.getKey("/BitsPerComponent").isInteger() ||
24
- dict.getKey("/BitsPerComponent").getIntValue() != 8)
25
- return;
139
+ int width = 0, height = 0;
140
+ if (dict.getKey("/Width").isInteger())
141
+ width = static_cast<int>(dict.getKey("/Width").getIntValue());
142
+ if (dict.getKey("/Height").isInteger())
143
+ height = static_cast<int>(dict.getKey("/Height").getIntValue());
26
144
 
27
- int width = 0, height = 0, components = 0;
28
- if (dict.getKey("/Width").isInteger())
29
- width = static_cast<int>(dict.getKey("/Width").getIntValue());
30
- if (dict.getKey("/Height").isInteger())
31
- height = static_cast<int>(dict.getKey("/Height").getIntValue());
145
+ if (width <= 0 || height <= 0 || width > 16384 || height > 16384)
146
+ return;
32
147
 
33
- if (width <= 0 || height <= 0 || width > 16384 || height > 16384)
34
- return;
148
+ // determine color components via color space resolution
149
+ bool isCMYK = false;
150
+ int components = resolveColorSpace(dict.getKey("/ColorSpace"), isCMYK);
151
+ if (components == 0)
152
+ return;
35
153
 
36
- // determine color components
37
- auto cs = dict.getKey("/ColorSpace");
38
- if (cs.isName()) {
39
- if (cs.getName() == "/DeviceRGB")
40
- components = 3;
41
- else if (cs.getName() == "/DeviceGray")
42
- components = 1;
43
- else
44
- return; // skip CMYK, Lab, etc.
45
- } else {
46
- return; // skip indexed, ICCBased, etc.
47
- }
154
+ // skip tiny images (logos, icons)
155
+ if (width * height < 2500)
156
+ return;
48
157
 
49
- // skip tiny images (logos, icons)
50
- if (width * height < 2500)
51
- return;
158
+ // get fully decoded stream data (raw pixels)
159
+ std::shared_ptr<Buffer> streamData;
160
+ try {
161
+ streamData = xobj.getStreamData(qpdf_dl_all);
162
+ } catch (...) {
163
+ return;
164
+ }
52
165
 
53
- // get fully decoded stream data (raw pixels)
54
- std::shared_ptr<Buffer> streamData;
55
- try {
56
- streamData = xobj.getStreamData(qpdf_dl_all);
57
- } catch (...) {
58
- return;
59
- }
166
+ // overflow-safe size calculation
167
+ auto w = static_cast<size_t>(width);
168
+ auto h = static_cast<size_t>(height);
169
+ auto c = static_cast<size_t>(components);
170
+ if (h > 0 && w > std::numeric_limits<size_t>::max() / h)
171
+ return;
172
+ if (c > 0 && (w * h) > std::numeric_limits<size_t>::max() / c)
173
+ return;
174
+ size_t expectedSize = w * h * c;
175
+ if (streamData->getSize() != expectedSize)
176
+ return;
60
177
 
61
- // overflow-safe size calculation
62
- auto w = static_cast<size_t>(width);
63
- auto h = static_cast<size_t>(height);
64
- auto c = static_cast<size_t>(components);
65
- if (h > 0 && w > std::numeric_limits<size_t>::max() / h)
66
- return;
67
- if (c > 0 && (w * h) > std::numeric_limits<size_t>::max() / c)
178
+ // convert CMYK → RGB for JPEG encoding (JPEG doesn't support CMYK
179
+ // natively in most decoders)
180
+ const unsigned char *pixels = streamData->getBuffer();
181
+ std::vector<uint8_t> rgbBuf;
182
+ int encodeComponents = components;
183
+ if (isCMYK) {
184
+ size_t pixelCount = w * h;
185
+ rgbBuf.resize(pixelCount * 3);
186
+ cmykToRgb(pixels, rgbBuf.data(), pixelCount);
187
+ pixels = rgbBuf.data();
188
+ encodeComponents = 3;
189
+ }
190
+
191
+ auto currentFilter = dict.getKey("/Filter");
192
+ bool isCurrentlyJpeg =
193
+ currentFilter.isName() && currentFilter.getName() == "/DCTDecode";
194
+
195
+ // determine per-image target quality
196
+ int targetQuality = autoQuality ? kAutoTargetQuality : opts.quality;
197
+
198
+ // in auto mode, skip existing JPEGs unless their quality is very high
199
+ // (> 90) — re-encoding a q86 JPEG at q85 saves almost nothing but adds
200
+ // artifacts. Only high-quality originals (92, 95, 100…) benefit from
201
+ // re-encoding down to 85.
202
+ if (isCurrentlyJpeg && !isCMYK) {
203
+ auto rawData = xobj.getRawStreamData();
204
+ int existingQ =
205
+ estimateJpegQuality(rawData->getBuffer(), rawData->getSize());
206
+ if (autoQuality) {
207
+ if (existingQ > 0 && existingQ <= kAutoSkipThreshold)
68
208
  return;
69
- size_t expectedSize = w * h * c;
70
- if (streamData->getSize() != expectedSize)
209
+ } else {
210
+ // explicit quality: use existing ceiling logic
211
+ if (existingQ > 0 && existingQ <= targetQuality)
71
212
  return;
213
+ }
214
+ }
72
215
 
73
- auto currentFilter = dict.getKey("/Filter");
74
- bool isCurrentlyJpeg =
75
- currentFilter.isName() && currentFilter.getName() == "/DCTDecode";
216
+ // encode as JPEG via libjpeg-turbo
217
+ std::vector<uint8_t> jpegData;
218
+ if (!encodeJpeg(pixels, width, height, encodeComponents, targetQuality,
219
+ jpegData))
220
+ return;
76
221
 
77
- // encode as JPEG via libjpeg-turbo
78
- std::vector<uint8_t> jpegData;
79
- if (!encodeJpeg(streamData->getBuffer(), width, height, components,
80
- quality, jpegData))
81
- return;
222
+ // only replace if we actually reduced size (for non-CMYK images)
223
+ if (isCurrentlyJpeg && !isCMYK) {
224
+ auto rawData = xobj.getRawStreamData();
225
+ if (jpegData.size() >= rawData->getSize())
226
+ return;
227
+ }
82
228
 
83
- // only replace if we actually reduced size
84
- if (isCurrentlyJpeg) {
85
- auto rawData = xobj.getRawStreamData();
86
- if (jpegData.size() >= rawData->getSize())
87
- return;
88
- }
229
+ // replace stream data with JPEG
230
+ std::string jpegStr(reinterpret_cast<char *>(jpegData.data()),
231
+ jpegData.size());
232
+ xobj.replaceStreamData(jpegStr, QPDFObjectHandle::newName("/DCTDecode"),
233
+ QPDFObjectHandle::newNull());
234
+
235
+ // update color space to DeviceRGB if converted from CMYK/ICCBased
236
+ if (isCMYK || !dict.getKey("/ColorSpace").isName() ||
237
+ dict.getKey("/ColorSpace").getName() != "/DeviceRGB") {
238
+ if (encodeComponents == 3)
239
+ dict.replaceKey("/ColorSpace", QPDFObjectHandle::newName("/DeviceRGB"));
240
+ else if (encodeComponents == 1)
241
+ dict.replaceKey("/ColorSpace",
242
+ QPDFObjectHandle::newName("/DeviceGray"));
243
+ }
89
244
 
90
- // replace stream data with JPEG
91
- std::string jpegStr(reinterpret_cast<char *>(jpegData.data()),
92
- jpegData.size());
93
- xobj.replaceStreamData(jpegStr, QPDFObjectHandle::newName("/DCTDecode"),
94
- QPDFObjectHandle::newNull());
95
-
96
- // remove FlateDecode-specific params
97
- if (dict.hasKey("/DecodeParms"))
98
- dict.removeKey("/DecodeParms");
99
- if (dict.hasKey("/Predictor"))
100
- dict.removeKey("/Predictor");
101
- });
245
+ // remove FlateDecode-specific params
246
+ if (dict.hasKey("/DecodeParms"))
247
+ dict.removeKey("/DecodeParms");
248
+ if (dict.hasKey("/Predictor"))
249
+ dict.removeKey("/Predictor");
250
+ });
102
251
  }
103
252
 
104
253
  // ---------------------------------------------------------------------------
@@ -117,7 +266,8 @@ void deduplicateImages(QPDF &qpdf) {
117
266
 
118
267
  // first pass: collect image objects and hash their raw data
119
268
  forEachImage(qpdf, [&](const std::string & /*key*/, QPDFObjectHandle xobj,
120
- QPDFObjectHandle /*xobjects*/) {
269
+ QPDFObjectHandle /*xobjects*/,
270
+ QPDFPageObjectHelper & /*page*/) {
121
271
  auto og = xobj.getObjGen();
122
272
  if (seen.count(og))
123
273
  return;
@@ -170,12 +320,13 @@ void deduplicateImages(QPDF &qpdf) {
170
320
  return;
171
321
 
172
322
  // third pass: rewrite XObject references to point to canonical objects
173
- forEachImage(qpdf, [&](const std::string &key, QPDFObjectHandle xobj,
174
- QPDFObjectHandle xobjects) {
175
- auto it = replacements.find(xobj.getObjGen());
176
- if (it != replacements.end())
177
- xobjects.replaceKey(key, it->second);
178
- });
323
+ forEachImage(qpdf,
324
+ [&](const std::string &key, QPDFObjectHandle xobj,
325
+ QPDFObjectHandle xobjects, QPDFPageObjectHelper & /*page*/) {
326
+ auto it = replacements.find(xobj.getObjGen());
327
+ if (it != replacements.end())
328
+ xobjects.replaceKey(key, it->second);
329
+ });
179
330
  }
180
331
 
181
332
  // ---------------------------------------------------------------------------
@@ -186,7 +337,8 @@ void optimizeExistingJpegs(QPDF &qpdf) {
186
337
  std::set<QPDFObjGen> processed;
187
338
 
188
339
  forEachImage(qpdf, [&](const std::string & /*key*/, QPDFObjectHandle xobj,
189
- QPDFObjectHandle /*xobjects*/) {
340
+ QPDFObjectHandle /*xobjects*/,
341
+ QPDFPageObjectHelper & /*page*/) {
190
342
  auto og = xobj.getObjGen();
191
343
  if (processed.count(og))
192
344
  return;
@@ -216,3 +368,229 @@ void optimizeExistingJpegs(QPDF &qpdf) {
216
368
  }
217
369
  });
218
370
  }
371
+
372
+ // ---------------------------------------------------------------------------
373
+ // DPI-based image downscaling
374
+ // ---------------------------------------------------------------------------
375
+
376
+ void downscaleImages(QPDF &qpdf, int maxDpi) {
377
+ if (maxDpi <= 0)
378
+ return;
379
+
380
+ std::set<QPDFObjGen> processed;
381
+
382
+ forEachImage(qpdf, [&](const std::string & /*key*/, QPDFObjectHandle xobj,
383
+ QPDFObjectHandle /*xobjects*/,
384
+ QPDFPageObjectHelper &page) {
385
+ auto og = xobj.getObjGen();
386
+ if (processed.count(og))
387
+ return;
388
+ processed.insert(og);
389
+
390
+ auto dict = xobj.getDict();
391
+
392
+ if (!dict.getKey("/BitsPerComponent").isInteger() ||
393
+ dict.getKey("/BitsPerComponent").getIntValue() != 8)
394
+ return;
395
+
396
+ int imgW = 0, imgH = 0;
397
+ if (dict.getKey("/Width").isInteger())
398
+ imgW = static_cast<int>(dict.getKey("/Width").getIntValue());
399
+ if (dict.getKey("/Height").isInteger())
400
+ imgH = static_cast<int>(dict.getKey("/Height").getIntValue());
401
+
402
+ if (imgW <= 0 || imgH <= 0)
403
+ return;
404
+
405
+ bool isCMYK = false;
406
+ int components = resolveColorSpace(dict.getKey("/ColorSpace"), isCMYK);
407
+ if (components == 0)
408
+ return;
409
+
410
+ // get page dimensions from MediaBox (in points, 72 per inch)
411
+ auto mediaBox = page.getAttribute("/MediaBox", false);
412
+ if (!mediaBox.isArray() || mediaBox.getArrayNItems() < 4)
413
+ return;
414
+
415
+ double pageW = 0, pageH = 0;
416
+ try {
417
+ pageW = mediaBox.getArrayItem(2).getNumericValue() -
418
+ mediaBox.getArrayItem(0).getNumericValue();
419
+ pageH = mediaBox.getArrayItem(3).getNumericValue() -
420
+ mediaBox.getArrayItem(1).getNumericValue();
421
+ } catch (...) {
422
+ return;
423
+ }
424
+
425
+ if (pageW <= 0 || pageH <= 0)
426
+ return;
427
+
428
+ // estimate effective DPI (assumes image fills page — conservative)
429
+ double dpiX = imgW / (pageW / 72.0);
430
+ double dpiY = imgH / (pageH / 72.0);
431
+ double effectiveDpi = std::max(dpiX, dpiY);
432
+
433
+ if (effectiveDpi <= maxDpi)
434
+ return;
435
+
436
+ // calculate target dimensions
437
+ double scale = static_cast<double>(maxDpi) / effectiveDpi;
438
+ int newW = std::max(1, static_cast<int>(imgW * scale + 0.5));
439
+ int newH = std::max(1, static_cast<int>(imgH * scale + 0.5));
440
+
441
+ // not worth downscaling if the reduction is minimal
442
+ if (newW >= imgW - 1 && newH >= imgH - 1)
443
+ return;
444
+
445
+ // decode pixels
446
+ std::shared_ptr<Buffer> streamData;
447
+ try {
448
+ streamData = xobj.getStreamData(qpdf_dl_all);
449
+ } catch (...) {
450
+ return;
451
+ }
452
+
453
+ auto w = static_cast<size_t>(imgW);
454
+ auto h = static_cast<size_t>(imgH);
455
+ auto c = static_cast<size_t>(components);
456
+ if (h > 0 && w > std::numeric_limits<size_t>::max() / h)
457
+ return;
458
+ if (c > 0 && (w * h) > std::numeric_limits<size_t>::max() / c)
459
+ return;
460
+ if (streamData->getSize() != w * h * c)
461
+ return;
462
+
463
+ const unsigned char *pixels = streamData->getBuffer();
464
+ int downscaleComponents = components;
465
+
466
+ // convert CMYK → RGB before downscaling
467
+ std::vector<uint8_t> rgbBuf;
468
+ if (isCMYK) {
469
+ size_t pixelCount = w * h;
470
+ rgbBuf.resize(pixelCount * 3);
471
+ cmykToRgb(pixels, rgbBuf.data(), pixelCount);
472
+ pixels = rgbBuf.data();
473
+ downscaleComponents = 3;
474
+ }
475
+
476
+ auto scaled =
477
+ bilinearDownscale(pixels, imgW, imgH, downscaleComponents, newW, newH);
478
+
479
+ // re-encode as Flate-compressed raw pixels
480
+ auto newSize = static_cast<size_t>(newW) * newH * downscaleComponents;
481
+ if (scaled.size() != newSize)
482
+ return;
483
+
484
+ std::string rawStr(reinterpret_cast<char *>(scaled.data()), scaled.size());
485
+ xobj.replaceStreamData(rawStr, QPDFObjectHandle::newName("/FlateDecode"),
486
+ QPDFObjectHandle::newNull());
487
+
488
+ dict.replaceKey("/Width", QPDFObjectHandle::newInteger(newW));
489
+ dict.replaceKey("/Height", QPDFObjectHandle::newInteger(newH));
490
+
491
+ if (isCMYK) {
492
+ dict.replaceKey("/ColorSpace", QPDFObjectHandle::newName("/DeviceRGB"));
493
+ }
494
+
495
+ // remove predictor params from previous encoding
496
+ if (dict.hasKey("/DecodeParms"))
497
+ dict.removeKey("/DecodeParms");
498
+ if (dict.hasKey("/Predictor"))
499
+ dict.removeKey("/Predictor");
500
+ });
501
+ }
502
+
503
+ // ---------------------------------------------------------------------------
504
+ // Metadata stripping
505
+ // ---------------------------------------------------------------------------
506
+
507
+ void stripMetadata(QPDF &qpdf) {
508
+ auto root = qpdf.getRoot();
509
+
510
+ // remove XMP metadata stream
511
+ if (root.hasKey("/Metadata"))
512
+ root.removeKey("/Metadata");
513
+
514
+ // remove document info dictionary
515
+ auto trailer = qpdf.getTrailer();
516
+ if (trailer.hasKey("/Info"))
517
+ trailer.removeKey("/Info");
518
+
519
+ // remove page-level metadata and PieceInfo
520
+ for (auto &page : QPDFPageDocumentHelper(qpdf).getAllPages()) {
521
+ auto pageObj = page.getObjectHandle();
522
+ if (pageObj.hasKey("/Metadata"))
523
+ pageObj.removeKey("/Metadata");
524
+ if (pageObj.hasKey("/PieceInfo"))
525
+ pageObj.removeKey("/PieceInfo");
526
+ }
527
+
528
+ // remove embedded thumbnails
529
+ for (auto &page : QPDFPageDocumentHelper(qpdf).getAllPages()) {
530
+ auto pageObj = page.getObjectHandle();
531
+ if (pageObj.hasKey("/Thumb"))
532
+ pageObj.removeKey("/Thumb");
533
+ }
534
+
535
+ // remove MarkInfo and page labels (optional metadata)
536
+ if (root.hasKey("/MarkInfo"))
537
+ root.removeKey("/MarkInfo");
538
+ }
539
+
540
+ // ---------------------------------------------------------------------------
541
+ // Remove unused font resources
542
+ // ---------------------------------------------------------------------------
543
+
544
+ void removeUnusedFonts(QPDF &qpdf) {
545
+ for (auto &page : QPDFPageDocumentHelper(qpdf).getAllPages()) {
546
+ auto pageObj = page.getObjectHandle();
547
+ auto resources = pageObj.getKey("/Resources");
548
+ if (!resources.isDictionary())
549
+ continue;
550
+ auto fonts = resources.getKey("/Font");
551
+ if (!fonts.isDictionary())
552
+ continue;
553
+
554
+ // collect all font names referenced in this page's content stream(s)
555
+ std::set<std::string> usedFonts;
556
+
557
+ try {
558
+ // get unparsed content stream data
559
+ auto contents = pageObj.getKey("/Contents");
560
+ std::string contentStr;
561
+
562
+ if (contents.isStream()) {
563
+ auto buf = contents.getStreamData(qpdf_dl_generalized);
564
+ contentStr.assign(reinterpret_cast<const char *>(buf->getBuffer()),
565
+ buf->getSize());
566
+ } else if (contents.isArray()) {
567
+ for (int i = 0; i < contents.getArrayNItems(); ++i) {
568
+ auto stream = contents.getArrayItem(i);
569
+ if (stream.isStream()) {
570
+ auto buf = stream.getStreamData(qpdf_dl_generalized);
571
+ contentStr.append(reinterpret_cast<const char *>(buf->getBuffer()),
572
+ buf->getSize());
573
+ contentStr += '\n';
574
+ }
575
+ }
576
+ }
577
+
578
+ // scan for /FontName references — Tf operator uses font name
579
+ // pattern: /FontName <size> Tf
580
+ for (auto &fontKey : fonts.getKeys()) {
581
+ // fontKey includes the leading '/', e.g. "/F1"
582
+ if (contentStr.find(fontKey) != std::string::npos)
583
+ usedFonts.insert(fontKey);
584
+ }
585
+ } catch (...) {
586
+ continue; // skip this page if content can't be read
587
+ }
588
+
589
+ // remove fonts that are not referenced in the content stream
590
+ auto allFontKeys = fonts.getKeys();
591
+ for (auto &fontKey : allFontKeys) {
592
+ if (usedFonts.find(fontKey) == usedFonts.end())
593
+ fonts.removeKey(fontKey);
594
+ }
595
+ }
596
+ }
package/src/images.h CHANGED
@@ -3,8 +3,10 @@
3
3
  #include <qpdf/QPDF.hh>
4
4
  #include <qpdf/QPDFObjectHandle.hh>
5
5
  #include <qpdf/QPDFPageDocumentHelper.hh>
6
+ #include <qpdf/QPDFPageObjectHelper.hh>
6
7
 
7
- // iterates all image XObjects across all pages
8
+ // iterates all image XObjects across all pages, providing the page helper for
9
+ // context (e.g. MediaBox for DPI calculations)
8
10
  template <typename Fn> void forEachImage(QPDF &qpdf, Fn &&fn) {
9
11
  for (auto &page : QPDFPageDocumentHelper(qpdf).getAllPages()) {
10
12
  auto resources = page.getObjectHandle().getKey("/Resources");
@@ -21,11 +23,20 @@ template <typename Fn> void forEachImage(QPDF &qpdf, Fn &&fn) {
21
23
  if (!dict.getKey("/Subtype").isName() ||
22
24
  dict.getKey("/Subtype").getName() != "/Image")
23
25
  continue;
24
- fn(key, xobj, xobjects);
26
+ fn(key, xobj, xobjects, page);
25
27
  }
26
28
  }
27
29
  }
28
30
 
29
- void optimizeImages(QPDF &qpdf, int quality);
31
+ struct CompressOptions {
32
+ int quality = 0; // 0 = auto (per-image quality, capped at 85)
33
+ int maxDpi = 0; // 0 = no downscaling
34
+ bool stripMetadata = false;
35
+ };
36
+
37
+ void optimizeImages(QPDF &qpdf, const CompressOptions &opts);
38
+ void downscaleImages(QPDF &qpdf, int maxDpi);
30
39
  void deduplicateImages(QPDF &qpdf);
31
40
  void optimizeExistingJpegs(QPDF &qpdf);
41
+ void stripMetadata(QPDF &qpdf);
42
+ void removeUnusedFonts(QPDF &qpdf);
package/src/jpeg.cc CHANGED
@@ -1,6 +1,7 @@
1
1
  #include "jpeg.h"
2
2
 
3
3
  #include <cstdlib>
4
+ #include <limits>
4
5
 
5
6
  void jpegErrorExit(j_common_ptr cinfo) {
6
7
  auto *myerr = reinterpret_cast<JpegErrorMgr *>(cinfo->err);
@@ -142,3 +143,85 @@ bool encodeJpeg(const unsigned char *pixels, int width, int height,
142
143
  free(outbuf);
143
144
  return ok;
144
145
  }
146
+
147
+ // ---------------------------------------------------------------------------
148
+ // JPEG quality estimation from quantization tables
149
+ // ---------------------------------------------------------------------------
150
+
151
+ // standard IJG luminance quantization table (quality 50 baseline)
152
+ static const unsigned int std_luminance_qt[64] = {
153
+ 16, 11, 10, 16, 24, 40, 51, 61, 12, 12, 14, 19, 26, 58, 60, 55,
154
+ 14, 13, 16, 24, 40, 57, 69, 56, 14, 17, 22, 29, 51, 87, 80, 62,
155
+ 18, 22, 37, 56, 68, 109, 103, 77, 24, 35, 55, 64, 81, 104, 113, 92,
156
+ 49, 64, 78, 87, 103, 121, 120, 101, 72, 92, 95, 98, 112, 100, 103, 99};
157
+
158
+ // isolated setjmp scope for reading JPEG header
159
+ static int estimateJpegQualityImpl(const unsigned char *data, size_t size) {
160
+ struct jpeg_decompress_struct cinfo = {};
161
+ JpegErrorMgr jerr = {};
162
+
163
+ cinfo.err = jpeg_std_error(&jerr.pub);
164
+ jerr.pub.error_exit = jpegErrorExit;
165
+
166
+ if (setjmp(jerr.jmpbuf)) {
167
+ jpeg_destroy_decompress(&cinfo);
168
+ return -1;
169
+ }
170
+
171
+ jpeg_create_decompress(&cinfo);
172
+ jpeg_mem_src(&cinfo, data, static_cast<unsigned long>(size));
173
+
174
+ if (jpeg_read_header(&cinfo, TRUE) != JPEG_HEADER_OK) {
175
+ jpeg_destroy_decompress(&cinfo);
176
+ return -1;
177
+ }
178
+
179
+ // need at least the luminance table (slot 0)
180
+ if (!cinfo.quant_tbl_ptrs[0]) {
181
+ jpeg_destroy_decompress(&cinfo);
182
+ return -1;
183
+ }
184
+
185
+ // reverse-engineer the IJG quality from the luminance table.
186
+ // for each quality q, IJG computes: scale = (q < 50) ? 5000/q : 200-2*q
187
+ // then each table value = clamp(floor((base * scale + 50) / 100), 1, 255)
188
+ // we find q that minimizes the sum of absolute differences.
189
+ JQUANT_TBL *tbl = cinfo.quant_tbl_ptrs[0];
190
+ int bestQ = -1;
191
+ long bestError = std::numeric_limits<long>::max();
192
+
193
+ for (int q = 1; q <= 100; q++) {
194
+ long scale = (q < 50) ? 5000L / q : 200L - 2L * q;
195
+ long error = 0;
196
+ for (int i = 0; i < 64; i++) {
197
+ long expected =
198
+ (static_cast<long>(std_luminance_qt[i]) * scale + 50L) / 100L;
199
+ if (expected < 1)
200
+ expected = 1;
201
+ if (expected > 255)
202
+ expected = 255;
203
+ long diff = static_cast<long>(tbl->quantval[i]) - expected;
204
+ error += (diff < 0) ? -diff : diff;
205
+ }
206
+ if (error < bestError) {
207
+ bestError = error;
208
+ bestQ = q;
209
+ }
210
+ // perfect match — stop early
211
+ if (error == 0)
212
+ break;
213
+ }
214
+
215
+ jpeg_destroy_decompress(&cinfo);
216
+
217
+ // if the best match is poor (avg > 2 per coefficient), the tables are
218
+ // non-standard — return -1 to signal we can't reliably estimate
219
+ if (bestError > 128)
220
+ return -1;
221
+
222
+ return bestQ;
223
+ }
224
+
225
+ int estimateJpegQuality(const unsigned char *data, size_t size) {
226
+ return estimateJpegQualityImpl(data, size);
227
+ }
package/src/jpeg.h CHANGED
@@ -24,3 +24,7 @@ bool losslessJpegOptimize(const unsigned char *data, size_t size,
24
24
  // encodes raw pixels as JPEG at the given quality (1–100) via libjpeg-turbo
25
25
  bool encodeJpeg(const unsigned char *pixels, int width, int height,
26
26
  int components, int quality, std::vector<uint8_t> &out);
27
+
28
+ // estimates the IJG quality factor (1–100) from a JPEG's quantization tables.
29
+ // returns -1 if the quality cannot be determined (corrupt, non-standard tables)
30
+ int estimateJpegQuality(const unsigned char *data, size_t size);
package/src/qpdf_addon.cc CHANGED
@@ -51,17 +51,20 @@ class CompressWorker : public Napi::AsyncWorker {
51
51
  public:
52
52
  // buffer variant
53
53
  CompressWorker(Napi::Env env, std::vector<uint8_t> data, bool lossy,
54
- int quality, std::string outputPath)
54
+ int quality, int maxDpi, bool stripMeta,
55
+ std::string outputPath)
55
56
  : Napi::AsyncWorker(env), deferred_(Napi::Promise::Deferred::New(env)),
56
57
  bufferData_(std::move(data)), lossy_(lossy), quality_(quality),
57
- useFile_(false), outputPath_(std::move(outputPath)) {}
58
+ maxDpi_(maxDpi), stripMeta_(stripMeta), useFile_(false),
59
+ outputPath_(std::move(outputPath)) {}
58
60
 
59
61
  // file path variant
60
62
  CompressWorker(Napi::Env env, std::string path, bool lossy, int quality,
61
- std::string outputPath)
63
+ int maxDpi, bool stripMeta, std::string outputPath)
62
64
  : Napi::AsyncWorker(env), deferred_(Napi::Promise::Deferred::New(env)),
63
65
  filePath_(std::move(path)), lossy_(lossy), quality_(quality),
64
- useFile_(true), outputPath_(std::move(outputPath)) {}
66
+ maxDpi_(maxDpi), stripMeta_(stripMeta), useFile_(true),
67
+ outputPath_(std::move(outputPath)) {}
65
68
 
66
69
  Napi::Promise Promise() { return deferred_.Promise(); }
67
70
 
@@ -90,9 +93,17 @@ protected:
90
93
  }
91
94
 
92
95
  deduplicateImages(qpdf);
93
- if (lossy_)
94
- optimizeImages(qpdf, quality_);
96
+ if (lossy_) {
97
+ CompressOptions opts;
98
+ opts.quality = quality_;
99
+ optimizeImages(qpdf, opts);
100
+ }
101
+ if (maxDpi_ > 0)
102
+ downscaleImages(qpdf, maxDpi_);
95
103
  optimizeExistingJpegs(qpdf);
104
+ removeUnusedFonts(qpdf);
105
+ if (stripMeta_)
106
+ stripMetadata(qpdf);
96
107
 
97
108
  Pl_Flate::setCompressionLevel(9);
98
109
 
@@ -143,6 +154,8 @@ private:
143
154
  std::string filePath_;
144
155
  bool lossy_;
145
156
  int quality_;
157
+ int maxDpi_;
158
+ bool stripMeta_;
146
159
  bool useFile_;
147
160
  std::string outputPath_;
148
161
  std::vector<uint8_t> result_;
@@ -162,7 +175,9 @@ static Napi::Value Compress(const Napi::CallbackInfo &info) {
162
175
  }
163
176
 
164
177
  bool lossy = false;
165
- int quality = 75;
178
+ int quality = 0;
179
+ int maxDpi = 0;
180
+ bool stripMeta = false;
166
181
  std::string outputPath;
167
182
 
168
183
  if (info.Length() >= 2 && info[1].IsObject()) {
@@ -180,12 +195,21 @@ static Napi::Value Compress(const Napi::CallbackInfo &info) {
180
195
 
181
196
  if (options.Has("quality")) {
182
197
  quality = options.Get("quality").As<Napi::Number>().Int32Value();
183
- if (quality < 1)
184
- quality = 1;
198
+ if (quality < 0)
199
+ quality = 0;
185
200
  if (quality > 100)
186
201
  quality = 100;
187
202
  }
188
203
 
204
+ if (options.Has("maxDpi")) {
205
+ maxDpi = options.Get("maxDpi").As<Napi::Number>().Int32Value();
206
+ if (maxDpi < 0)
207
+ maxDpi = 0;
208
+ }
209
+
210
+ if (options.Has("stripMetadata"))
211
+ stripMeta = options.Get("stripMetadata").As<Napi::Boolean>().Value();
212
+
189
213
  if (options.Has("output"))
190
214
  outputPath = options.Get("output").As<Napi::String>().Utf8Value();
191
215
  }
@@ -194,7 +218,7 @@ static Napi::Value Compress(const Napi::CallbackInfo &info) {
194
218
  auto buf = info[0].As<Napi::Buffer<uint8_t>>();
195
219
  std::vector<uint8_t> data(buf.Data(), buf.Data() + buf.Length());
196
220
  auto *worker = new CompressWorker(env, std::move(data), lossy, quality,
197
- std::move(outputPath));
221
+ maxDpi, stripMeta, std::move(outputPath));
198
222
  worker->Queue();
199
223
  return worker->Promise();
200
224
  }
@@ -202,7 +226,7 @@ static Napi::Value Compress(const Napi::CallbackInfo &info) {
202
226
  if (info[0].IsString()) {
203
227
  auto path = info[0].As<Napi::String>().Utf8Value();
204
228
  auto *worker = new CompressWorker(env, std::move(path), lossy, quality,
205
- std::move(outputPath));
229
+ maxDpi, stripMeta, std::move(outputPath));
206
230
  worker->Queue();
207
231
  return worker->Promise();
208
232
  }