qpdf-compress 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +176 -0
- package/README.md +168 -0
- package/binding.gyp +111 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +39 -0
- package/dist/index.js.map +1 -0
- package/dist/types.d.ts +16 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/lib/index.ts +60 -0
- package/lib/types.ts +15 -0
- package/package.json +91 -0
- package/scripts/bundle-lib.mjs +35 -0
- package/scripts/download-qpdf.mjs +205 -0
- package/scripts/install.mjs +112 -0
- package/src/qpdf_addon.cc +610 -0
- package/src/stb_image_write.h +1724 -0
- package/src/stb_impl.cc +2 -0
|
@@ -0,0 +1,610 @@
|
|
|
1
|
+
#include <napi.h>
|
|
2
|
+
|
|
3
|
+
#include <cerrno>
|
|
4
|
+
#include <csetjmp>
|
|
5
|
+
#include <cstdio>
|
|
6
|
+
#include <cstring>
|
|
7
|
+
#include <filesystem>
|
|
8
|
+
#include <limits>
|
|
9
|
+
#include <map>
|
|
10
|
+
#include <memory>
|
|
11
|
+
#include <set>
|
|
12
|
+
#include <string>
|
|
13
|
+
#include <unordered_map>
|
|
14
|
+
#include <vector>
|
|
15
|
+
|
|
16
|
+
#include <jpeglib.h>
|
|
17
|
+
|
|
18
|
+
#include <qpdf/Buffer.hh>
|
|
19
|
+
#include <qpdf/Pl_Flate.hh>
|
|
20
|
+
#include <qpdf/QPDF.hh>
|
|
21
|
+
#include <qpdf/QPDFObjectHandle.hh>
|
|
22
|
+
#include <qpdf/QPDFPageDocumentHelper.hh>
|
|
23
|
+
#include <qpdf/QPDFWriter.hh>
|
|
24
|
+
|
|
25
|
+
#include "stb_image_write.h"
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// stb_image_write callback — writes JPEG data to a vector
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
static void stbi_write_to_vector(void *context, void *data, int size) {
|
|
32
|
+
if (!context || !data || size <= 0)
|
|
33
|
+
return;
|
|
34
|
+
auto *vec = static_cast<std::vector<uint8_t> *>(context);
|
|
35
|
+
auto *bytes = static_cast<uint8_t *>(data);
|
|
36
|
+
vec->insert(vec->end(), bytes, bytes + size);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
// JPEG error handler — prevents libjpeg from calling exit() on errors
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
struct JpegErrorMgr {
|
|
44
|
+
struct jpeg_error_mgr pub;
|
|
45
|
+
std::jmp_buf jmpbuf;
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
static void jpegErrorExit(j_common_ptr cinfo) {
|
|
49
|
+
auto *myerr = reinterpret_cast<JpegErrorMgr *>(cinfo->err);
|
|
50
|
+
std::longjmp(myerr->jmpbuf, 1);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
// Lossless JPEG optimization — rewrites Huffman tables at the DCT coefficient
|
|
55
|
+
// level without touching pixel data. Typically saves 2–15%.
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
// isolated setjmp scope — no C++ objects with non-trivial destructors
|
|
59
|
+
// may be live when longjmp fires, avoiding undefined behavior
|
|
60
|
+
static bool losslessJpegOptimizeImpl(const unsigned char *data, size_t size,
|
|
61
|
+
unsigned char **outbuf,
|
|
62
|
+
unsigned long *outsize) {
|
|
63
|
+
struct jpeg_decompress_struct srcinfo = {};
|
|
64
|
+
struct jpeg_compress_struct dstinfo = {};
|
|
65
|
+
JpegErrorMgr jerr = {};
|
|
66
|
+
|
|
67
|
+
srcinfo.err = jpeg_std_error(&jerr.pub);
|
|
68
|
+
jerr.pub.error_exit = jpegErrorExit;
|
|
69
|
+
dstinfo.err = &jerr.pub;
|
|
70
|
+
|
|
71
|
+
jpeg_create_decompress(&srcinfo);
|
|
72
|
+
jpeg_create_compress(&dstinfo);
|
|
73
|
+
|
|
74
|
+
if (setjmp(jerr.jmpbuf)) {
|
|
75
|
+
jpeg_destroy_decompress(&srcinfo);
|
|
76
|
+
jpeg_destroy_compress(&dstinfo);
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
jpeg_mem_src(&srcinfo, data, static_cast<unsigned long>(size));
|
|
81
|
+
|
|
82
|
+
if (jpeg_read_header(&srcinfo, TRUE) != JPEG_HEADER_OK) {
|
|
83
|
+
jpeg_destroy_decompress(&srcinfo);
|
|
84
|
+
jpeg_destroy_compress(&dstinfo);
|
|
85
|
+
return false;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// read DCT coefficients — zero quality loss
|
|
89
|
+
jvirt_barray_ptr *coef_arrays = jpeg_read_coefficients(&srcinfo);
|
|
90
|
+
if (!coef_arrays) {
|
|
91
|
+
jpeg_destroy_decompress(&srcinfo);
|
|
92
|
+
jpeg_destroy_compress(&dstinfo);
|
|
93
|
+
return false;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
*outsize = 0;
|
|
97
|
+
jpeg_mem_dest(&dstinfo, outbuf, outsize);
|
|
98
|
+
|
|
99
|
+
jpeg_copy_critical_parameters(&srcinfo, &dstinfo);
|
|
100
|
+
dstinfo.optimize_coding = TRUE;
|
|
101
|
+
|
|
102
|
+
jpeg_write_coefficients(&dstinfo, coef_arrays);
|
|
103
|
+
jpeg_finish_compress(&dstinfo);
|
|
104
|
+
jpeg_finish_decompress(&srcinfo);
|
|
105
|
+
|
|
106
|
+
jpeg_destroy_compress(&dstinfo);
|
|
107
|
+
jpeg_destroy_decompress(&srcinfo);
|
|
108
|
+
|
|
109
|
+
return *outbuf != nullptr && *outsize > 0;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
static bool losslessJpegOptimize(const unsigned char *data, size_t size,
|
|
113
|
+
std::vector<uint8_t> &out) {
|
|
114
|
+
unsigned char *outbuf = nullptr;
|
|
115
|
+
unsigned long outsize = 0;
|
|
116
|
+
|
|
117
|
+
bool ok = losslessJpegOptimizeImpl(data, size, &outbuf, &outsize);
|
|
118
|
+
if (ok && outbuf && outsize > 0) {
|
|
119
|
+
out.assign(outbuf, outbuf + outsize);
|
|
120
|
+
}
|
|
121
|
+
free(outbuf);
|
|
122
|
+
return ok;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// ---------------------------------------------------------------------------
|
|
126
|
+
// Image recompression for lossy mode
|
|
127
|
+
// ---------------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
static void optimizeImages(QPDF &qpdf, int quality) {
|
|
130
|
+
for (auto &page : QPDFPageDocumentHelper(qpdf).getAllPages()) {
|
|
131
|
+
auto pageObj = page.getObjectHandle();
|
|
132
|
+
auto resources = pageObj.getKey("/Resources");
|
|
133
|
+
if (!resources.isDictionary())
|
|
134
|
+
continue;
|
|
135
|
+
auto xobjects = resources.getKey("/XObject");
|
|
136
|
+
if (!xobjects.isDictionary())
|
|
137
|
+
continue;
|
|
138
|
+
|
|
139
|
+
for (auto &key : xobjects.getKeys()) {
|
|
140
|
+
auto xobj = xobjects.getKey(key);
|
|
141
|
+
if (!xobj.isStream())
|
|
142
|
+
continue;
|
|
143
|
+
|
|
144
|
+
auto dict = xobj.getDict();
|
|
145
|
+
if (!dict.getKey("/Subtype").isName() ||
|
|
146
|
+
dict.getKey("/Subtype").getName() != "/Image")
|
|
147
|
+
continue;
|
|
148
|
+
|
|
149
|
+
// only handle 8-bit images
|
|
150
|
+
if (!dict.getKey("/BitsPerComponent").isInteger() ||
|
|
151
|
+
dict.getKey("/BitsPerComponent").getIntValue() != 8)
|
|
152
|
+
continue;
|
|
153
|
+
|
|
154
|
+
int width = 0, height = 0, components = 0;
|
|
155
|
+
if (dict.getKey("/Width").isInteger())
|
|
156
|
+
width = static_cast<int>(dict.getKey("/Width").getIntValue());
|
|
157
|
+
if (dict.getKey("/Height").isInteger())
|
|
158
|
+
height = static_cast<int>(dict.getKey("/Height").getIntValue());
|
|
159
|
+
|
|
160
|
+
if (width <= 0 || height <= 0 || width > 16384 || height > 16384)
|
|
161
|
+
continue;
|
|
162
|
+
|
|
163
|
+
// determine color components
|
|
164
|
+
auto cs = dict.getKey("/ColorSpace");
|
|
165
|
+
if (cs.isName()) {
|
|
166
|
+
if (cs.getName() == "/DeviceRGB")
|
|
167
|
+
components = 3;
|
|
168
|
+
else if (cs.getName() == "/DeviceGray")
|
|
169
|
+
components = 1;
|
|
170
|
+
else
|
|
171
|
+
continue; // skip CMYK, Lab, etc. for now
|
|
172
|
+
} else {
|
|
173
|
+
continue; // skip indexed, ICCBased, etc.
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// skip tiny images (logos, icons) — not worth recompressing
|
|
177
|
+
if (width * height < 2500)
|
|
178
|
+
continue;
|
|
179
|
+
|
|
180
|
+
// get fully decoded stream data (raw pixels)
|
|
181
|
+
std::shared_ptr<Buffer> streamData;
|
|
182
|
+
try {
|
|
183
|
+
streamData = xobj.getStreamData(qpdf_dl_all);
|
|
184
|
+
} catch (...) {
|
|
185
|
+
continue; // can't decode — skip
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// overflow-safe size calculation
|
|
189
|
+
auto w = static_cast<size_t>(width);
|
|
190
|
+
auto h = static_cast<size_t>(height);
|
|
191
|
+
auto c = static_cast<size_t>(components);
|
|
192
|
+
if (h > 0 && w > std::numeric_limits<size_t>::max() / h)
|
|
193
|
+
continue;
|
|
194
|
+
if (c > 0 && (w * h) > std::numeric_limits<size_t>::max() / c)
|
|
195
|
+
continue;
|
|
196
|
+
size_t expectedSize = w * h * c;
|
|
197
|
+
if (streamData->getSize() != expectedSize)
|
|
198
|
+
continue;
|
|
199
|
+
|
|
200
|
+
// check if recompression would actually help:
|
|
201
|
+
// skip if already a small JPEG
|
|
202
|
+
auto currentFilter = dict.getKey("/Filter");
|
|
203
|
+
bool isCurrentlyJpeg =
|
|
204
|
+
currentFilter.isName() && currentFilter.getName() == "/DCTDecode";
|
|
205
|
+
|
|
206
|
+
// encode as JPEG
|
|
207
|
+
std::vector<uint8_t> jpegData;
|
|
208
|
+
jpegData.reserve(expectedSize / 4); // estimate
|
|
209
|
+
int writeOk =
|
|
210
|
+
stbi_write_jpg_to_func(stbi_write_to_vector, &jpegData, width, height,
|
|
211
|
+
components, streamData->getBuffer(), quality);
|
|
212
|
+
|
|
213
|
+
if (!writeOk || jpegData.empty())
|
|
214
|
+
continue;
|
|
215
|
+
|
|
216
|
+
// only replace if we actually reduced size
|
|
217
|
+
if (isCurrentlyJpeg) {
|
|
218
|
+
auto rawData = xobj.getRawStreamData();
|
|
219
|
+
if (jpegData.size() >= rawData->getSize())
|
|
220
|
+
continue; // new JPEG is larger, keep original
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// replace stream data with JPEG
|
|
224
|
+
std::string jpegStr(reinterpret_cast<char *>(jpegData.data()),
|
|
225
|
+
jpegData.size());
|
|
226
|
+
xobj.replaceStreamData(jpegStr, QPDFObjectHandle::newName("/DCTDecode"),
|
|
227
|
+
QPDFObjectHandle::newNull());
|
|
228
|
+
|
|
229
|
+
// update dictionary — remove FlateDecode-specific params
|
|
230
|
+
if (dict.hasKey("/DecodeParms"))
|
|
231
|
+
dict.removeKey("/DecodeParms");
|
|
232
|
+
if (dict.hasKey("/Predictor"))
|
|
233
|
+
dict.removeKey("/Predictor");
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// ---------------------------------------------------------------------------
|
|
239
|
+
// Duplicate image detection — replaces identical image objects with
|
|
240
|
+
// references to a single canonical copy. Dropped duplicates become
|
|
241
|
+
// unreferenced and are omitted from the output.
|
|
242
|
+
// ---------------------------------------------------------------------------
|
|
243
|
+
|
|
244
|
+
static void deduplicateImages(QPDF &qpdf) {
|
|
245
|
+
struct ImageEntry {
|
|
246
|
+
QPDFObjGen og;
|
|
247
|
+
size_t dataSize;
|
|
248
|
+
QPDFObjectHandle handle;
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
std::unordered_map<size_t, std::vector<ImageEntry>> hashGroups;
|
|
252
|
+
std::set<QPDFObjGen> seen;
|
|
253
|
+
|
|
254
|
+
// first pass: collect all image objects and hash their raw data
|
|
255
|
+
for (auto &page : QPDFPageDocumentHelper(qpdf).getAllPages()) {
|
|
256
|
+
auto resources = page.getObjectHandle().getKey("/Resources");
|
|
257
|
+
if (!resources.isDictionary())
|
|
258
|
+
continue;
|
|
259
|
+
auto xobjects = resources.getKey("/XObject");
|
|
260
|
+
if (!xobjects.isDictionary())
|
|
261
|
+
continue;
|
|
262
|
+
|
|
263
|
+
for (auto &key : xobjects.getKeys()) {
|
|
264
|
+
auto xobj = xobjects.getKey(key);
|
|
265
|
+
if (!xobj.isStream())
|
|
266
|
+
continue;
|
|
267
|
+
auto og = xobj.getObjGen();
|
|
268
|
+
if (seen.count(og))
|
|
269
|
+
continue;
|
|
270
|
+
seen.insert(og);
|
|
271
|
+
|
|
272
|
+
auto dict = xobj.getDict();
|
|
273
|
+
if (!dict.getKey("/Subtype").isName() ||
|
|
274
|
+
dict.getKey("/Subtype").getName() != "/Image")
|
|
275
|
+
continue;
|
|
276
|
+
|
|
277
|
+
try {
|
|
278
|
+
auto rawData = xobj.getRawStreamData();
|
|
279
|
+
size_t size = rawData->getSize();
|
|
280
|
+
|
|
281
|
+
// FNV-1a hash
|
|
282
|
+
size_t hash = 14695981039346656037ULL;
|
|
283
|
+
auto *p = rawData->getBuffer();
|
|
284
|
+
for (size_t i = 0; i < size; ++i) {
|
|
285
|
+
hash ^= static_cast<size_t>(p[i]);
|
|
286
|
+
hash *= 1099511628211ULL;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
hashGroups[hash].push_back({og, size, xobj});
|
|
290
|
+
} catch (...) {
|
|
291
|
+
continue;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// second pass: verify hash collisions with full byte comparison
|
|
297
|
+
std::map<QPDFObjGen, QPDFObjectHandle> replacements;
|
|
298
|
+
|
|
299
|
+
for (auto &[hash, group] : hashGroups) {
|
|
300
|
+
if (group.size() < 2)
|
|
301
|
+
continue;
|
|
302
|
+
|
|
303
|
+
for (size_t i = 0; i < group.size(); ++i) {
|
|
304
|
+
if (replacements.count(group[i].og))
|
|
305
|
+
continue;
|
|
306
|
+
|
|
307
|
+
auto rawI = group[i].handle.getRawStreamData();
|
|
308
|
+
for (size_t j = i + 1; j < group.size(); ++j) {
|
|
309
|
+
if (replacements.count(group[j].og))
|
|
310
|
+
continue;
|
|
311
|
+
|
|
312
|
+
auto rawJ = group[j].handle.getRawStreamData();
|
|
313
|
+
if (rawI->getSize() != rawJ->getSize())
|
|
314
|
+
continue;
|
|
315
|
+
|
|
316
|
+
if (memcmp(rawI->getBuffer(), rawJ->getBuffer(), rawI->getSize()) ==
|
|
317
|
+
0) {
|
|
318
|
+
replacements[group[j].og] = group[i].handle;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
if (replacements.empty())
|
|
325
|
+
return;
|
|
326
|
+
|
|
327
|
+
// third pass: rewrite XObject references to point to canonical objects
|
|
328
|
+
for (auto &page : QPDFPageDocumentHelper(qpdf).getAllPages()) {
|
|
329
|
+
auto resources = page.getObjectHandle().getKey("/Resources");
|
|
330
|
+
if (!resources.isDictionary())
|
|
331
|
+
continue;
|
|
332
|
+
auto xobjects = resources.getKey("/XObject");
|
|
333
|
+
if (!xobjects.isDictionary())
|
|
334
|
+
continue;
|
|
335
|
+
|
|
336
|
+
for (auto &key : xobjects.getKeys()) {
|
|
337
|
+
auto xobj = xobjects.getKey(key);
|
|
338
|
+
auto it = replacements.find(xobj.getObjGen());
|
|
339
|
+
if (it != replacements.end()) {
|
|
340
|
+
xobjects.replaceKey(key, it->second);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// ---------------------------------------------------------------------------
|
|
347
|
+
// Lossless optimization of existing embedded JPEG images — optimizes Huffman
|
|
348
|
+
// tables at the DCT coefficient level without any quality loss.
|
|
349
|
+
// ---------------------------------------------------------------------------
|
|
350
|
+
|
|
351
|
+
static void optimizeExistingJpegs(QPDF &qpdf) {
|
|
352
|
+
std::set<QPDFObjGen> processed;
|
|
353
|
+
|
|
354
|
+
for (auto &page : QPDFPageDocumentHelper(qpdf).getAllPages()) {
|
|
355
|
+
auto resources = page.getObjectHandle().getKey("/Resources");
|
|
356
|
+
if (!resources.isDictionary())
|
|
357
|
+
continue;
|
|
358
|
+
auto xobjects = resources.getKey("/XObject");
|
|
359
|
+
if (!xobjects.isDictionary())
|
|
360
|
+
continue;
|
|
361
|
+
|
|
362
|
+
for (auto &key : xobjects.getKeys()) {
|
|
363
|
+
auto xobj = xobjects.getKey(key);
|
|
364
|
+
if (!xobj.isStream())
|
|
365
|
+
continue;
|
|
366
|
+
|
|
367
|
+
auto og = xobj.getObjGen();
|
|
368
|
+
if (processed.count(og))
|
|
369
|
+
continue;
|
|
370
|
+
processed.insert(og);
|
|
371
|
+
|
|
372
|
+
auto dict = xobj.getDict();
|
|
373
|
+
auto filter = dict.getKey("/Filter");
|
|
374
|
+
if (!filter.isName() || filter.getName() != "/DCTDecode")
|
|
375
|
+
continue;
|
|
376
|
+
|
|
377
|
+
try {
|
|
378
|
+
auto rawData = xobj.getRawStreamData();
|
|
379
|
+
|
|
380
|
+
std::vector<uint8_t> optimized;
|
|
381
|
+
if (!losslessJpegOptimize(rawData->getBuffer(), rawData->getSize(),
|
|
382
|
+
optimized))
|
|
383
|
+
continue;
|
|
384
|
+
|
|
385
|
+
// only replace if strictly smaller
|
|
386
|
+
if (optimized.size() >= rawData->getSize())
|
|
387
|
+
continue;
|
|
388
|
+
|
|
389
|
+
std::string jpegStr(reinterpret_cast<char *>(optimized.data()),
|
|
390
|
+
optimized.size());
|
|
391
|
+
xobj.replaceStreamData(jpegStr, QPDFObjectHandle::newName("/DCTDecode"),
|
|
392
|
+
QPDFObjectHandle::newNull());
|
|
393
|
+
} catch (...) {
|
|
394
|
+
continue;
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// ---------------------------------------------------------------------------
|
|
401
|
+
// CompressWorker — async PDF compression
|
|
402
|
+
// ---------------------------------------------------------------------------
|
|
403
|
+
|
|
404
|
+
class CompressWorker : public Napi::AsyncWorker {
|
|
405
|
+
public:
|
|
406
|
+
// buffer variant
|
|
407
|
+
CompressWorker(Napi::Env env, std::vector<uint8_t> data, bool lossy,
|
|
408
|
+
int quality, std::string outputPath)
|
|
409
|
+
: Napi::AsyncWorker(env), deferred_(Napi::Promise::Deferred::New(env)),
|
|
410
|
+
bufferData_(std::move(data)), lossy_(lossy), quality_(quality),
|
|
411
|
+
useFile_(false), outputPath_(std::move(outputPath)) {}
|
|
412
|
+
|
|
413
|
+
// file path variant
|
|
414
|
+
CompressWorker(Napi::Env env, std::string path, bool lossy, int quality,
|
|
415
|
+
std::string outputPath)
|
|
416
|
+
: Napi::AsyncWorker(env), deferred_(Napi::Promise::Deferred::New(env)),
|
|
417
|
+
filePath_(std::move(path)), lossy_(lossy), quality_(quality),
|
|
418
|
+
useFile_(true), outputPath_(std::move(outputPath)) {}
|
|
419
|
+
|
|
420
|
+
Napi::Promise Promise() { return deferred_.Promise(); }
|
|
421
|
+
|
|
422
|
+
protected:
|
|
423
|
+
void Execute() override {
|
|
424
|
+
try {
|
|
425
|
+
QPDF qpdf;
|
|
426
|
+
qpdf.setAttemptRecovery(true);
|
|
427
|
+
|
|
428
|
+
if (useFile_) {
|
|
429
|
+
if (!std::filesystem::exists(filePath_)) {
|
|
430
|
+
SetError("Input file not found: " + filePath_);
|
|
431
|
+
return;
|
|
432
|
+
}
|
|
433
|
+
qpdf.processFile(filePath_.c_str());
|
|
434
|
+
} else {
|
|
435
|
+
// validate PDF header to prevent QPDF from aborting on garbage input
|
|
436
|
+
if (bufferData_.size() < 5 ||
|
|
437
|
+
memcmp(bufferData_.data(), "%PDF-", 5) != 0) {
|
|
438
|
+
SetError("Input is not a valid PDF (missing %PDF- header)");
|
|
439
|
+
return;
|
|
440
|
+
}
|
|
441
|
+
qpdf.processMemoryFile(
|
|
442
|
+
"input.pdf", reinterpret_cast<const char *>(bufferData_.data()),
|
|
443
|
+
bufferData_.size());
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// deduplicate identical images across pages
|
|
447
|
+
deduplicateImages(qpdf);
|
|
448
|
+
|
|
449
|
+
// lossy: recompress embedded images as JPEG
|
|
450
|
+
if (lossy_) {
|
|
451
|
+
optimizeImages(qpdf, quality_);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// lossless JPEG Huffman table optimization
|
|
455
|
+
optimizeExistingJpegs(qpdf);
|
|
456
|
+
|
|
457
|
+
// maximum Flate compression level
|
|
458
|
+
Pl_Flate::setCompressionLevel(9);
|
|
459
|
+
|
|
460
|
+
QPDFWriter writer(qpdf);
|
|
461
|
+
writer.setOutputMemory();
|
|
462
|
+
writer.setStreamDataMode(qpdf_s_compress);
|
|
463
|
+
writer.setRecompressFlate(true);
|
|
464
|
+
writer.setObjectStreamMode(qpdf_o_generate);
|
|
465
|
+
writer.setCompressStreams(true);
|
|
466
|
+
// only decode generalized streams (Flate, LZW, etc.)
|
|
467
|
+
// this preserves DCTDecode (our recompressed JPEG images)
|
|
468
|
+
writer.setDecodeLevel(qpdf_dl_generalized);
|
|
469
|
+
writer.setPreserveUnreferencedObjects(false);
|
|
470
|
+
writer.write();
|
|
471
|
+
|
|
472
|
+
auto buf = writer.getBufferSharedPointer();
|
|
473
|
+
result_.assign(buf->getBuffer(), buf->getBuffer() + buf->getSize());
|
|
474
|
+
|
|
475
|
+
// write to file if output path was specified
|
|
476
|
+
if (!outputPath_.empty()) {
|
|
477
|
+
auto closer = [](FILE *fp) {
|
|
478
|
+
if (fp)
|
|
479
|
+
fclose(fp);
|
|
480
|
+
};
|
|
481
|
+
std::unique_ptr<FILE, decltype(closer)> f(
|
|
482
|
+
fopen(outputPath_.c_str(), "wb"), closer);
|
|
483
|
+
if (!f) {
|
|
484
|
+
auto parentDir = std::filesystem::path(outputPath_).parent_path();
|
|
485
|
+
if (!parentDir.empty() && !std::filesystem::is_directory(parentDir)) {
|
|
486
|
+
SetError("Parent directory does not exist: " + parentDir.string());
|
|
487
|
+
} else {
|
|
488
|
+
SetError("Failed to open output file: " + outputPath_ + " (" +
|
|
489
|
+
std::strerror(errno) + ")");
|
|
490
|
+
}
|
|
491
|
+
return;
|
|
492
|
+
}
|
|
493
|
+
size_t written = fwrite(result_.data(), 1, result_.size(), f.get());
|
|
494
|
+
if (written != result_.size()) {
|
|
495
|
+
SetError("Failed to write output file: " + outputPath_ + " (" +
|
|
496
|
+
std::strerror(errno) + ")");
|
|
497
|
+
return;
|
|
498
|
+
}
|
|
499
|
+
if (fflush(f.get()) != 0) {
|
|
500
|
+
SetError("Failed to flush output file: " + outputPath_ + " (" +
|
|
501
|
+
std::strerror(errno) + ")");
|
|
502
|
+
return;
|
|
503
|
+
}
|
|
504
|
+
result_.clear();
|
|
505
|
+
}
|
|
506
|
+
} catch (std::exception &e) {
|
|
507
|
+
SetError(e.what());
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
void OnOK() override {
|
|
512
|
+
if (outputPath_.empty()) {
|
|
513
|
+
auto buffer =
|
|
514
|
+
Napi::Buffer<uint8_t>::Copy(Env(), result_.data(), result_.size());
|
|
515
|
+
deferred_.Resolve(buffer);
|
|
516
|
+
} else {
|
|
517
|
+
deferred_.Resolve(Env().Undefined());
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
void OnError(Napi::Error const &error) override {
|
|
522
|
+
deferred_.Reject(error.Value());
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
private:
|
|
526
|
+
Napi::Promise::Deferred deferred_;
|
|
527
|
+
std::vector<uint8_t> bufferData_;
|
|
528
|
+
std::string filePath_;
|
|
529
|
+
bool lossy_;
|
|
530
|
+
int quality_;
|
|
531
|
+
bool useFile_;
|
|
532
|
+
std::string outputPath_;
|
|
533
|
+
std::vector<uint8_t> result_;
|
|
534
|
+
};
|
|
535
|
+
|
|
536
|
+
// ---------------------------------------------------------------------------
|
|
537
|
+
// JS API: compress(input, options)
|
|
538
|
+
// ---------------------------------------------------------------------------
|
|
539
|
+
|
|
540
|
+
static Napi::Value Compress(const Napi::CallbackInfo &info) {
|
|
541
|
+
Napi::Env env = info.Env();
|
|
542
|
+
|
|
543
|
+
if (info.Length() < 1) {
|
|
544
|
+
Napi::TypeError::New(env, "Expected input (Buffer or string)")
|
|
545
|
+
.ThrowAsJavaScriptException();
|
|
546
|
+
return env.Undefined();
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
// parse options
|
|
550
|
+
bool lossy = false;
|
|
551
|
+
int quality = 75;
|
|
552
|
+
std::string outputPath;
|
|
553
|
+
|
|
554
|
+
if (info.Length() >= 2 && info[1].IsObject()) {
|
|
555
|
+
auto options = info[1].As<Napi::Object>();
|
|
556
|
+
|
|
557
|
+
if (options.Has("mode")) {
|
|
558
|
+
auto mode = options.Get("mode").As<Napi::String>().Utf8Value();
|
|
559
|
+
if (mode != "lossy" && mode != "lossless") {
|
|
560
|
+
Napi::TypeError::New(env, "Mode must be 'lossy' or 'lossless'")
|
|
561
|
+
.ThrowAsJavaScriptException();
|
|
562
|
+
return env.Undefined();
|
|
563
|
+
}
|
|
564
|
+
lossy = (mode == "lossy");
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
if (options.Has("quality")) {
|
|
568
|
+
quality = options.Get("quality").As<Napi::Number>().Int32Value();
|
|
569
|
+
if (quality < 1)
|
|
570
|
+
quality = 1;
|
|
571
|
+
if (quality > 100)
|
|
572
|
+
quality = 100;
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
if (options.Has("output"))
|
|
576
|
+
outputPath = options.Get("output").As<Napi::String>().Utf8Value();
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
if (info[0].IsBuffer()) {
|
|
580
|
+
auto buf = info[0].As<Napi::Buffer<uint8_t>>();
|
|
581
|
+
std::vector<uint8_t> data(buf.Data(), buf.Data() + buf.Length());
|
|
582
|
+
auto worker = new CompressWorker(env, std::move(data), lossy, quality,
|
|
583
|
+
std::move(outputPath));
|
|
584
|
+
worker->Queue();
|
|
585
|
+
return worker->Promise();
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
if (info[0].IsString()) {
|
|
589
|
+
auto path = info[0].As<Napi::String>().Utf8Value();
|
|
590
|
+
auto worker = new CompressWorker(env, std::move(path), lossy, quality,
|
|
591
|
+
std::move(outputPath));
|
|
592
|
+
worker->Queue();
|
|
593
|
+
return worker->Promise();
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
Napi::TypeError::New(env, "Input must be a Buffer or file path string")
|
|
597
|
+
.ThrowAsJavaScriptException();
|
|
598
|
+
return env.Undefined();
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
// ---------------------------------------------------------------------------
|
|
602
|
+
// Module init
|
|
603
|
+
// ---------------------------------------------------------------------------
|
|
604
|
+
|
|
605
|
+
static Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
606
|
+
exports.Set("compress", Napi::Function::New(env, Compress));
|
|
607
|
+
return exports;
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
NODE_API_MODULE(qpdf_compress, Init)
|