@bentopdf/pymupdf-wasm 0.11.15 → 0.11.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +58 -84
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -2318,35 +2318,22 @@ doc.scrub(
|
|
|
2318
2318
|
reset_responses=${scrubResetResponses ? "True" : "False"},
|
|
2319
2319
|
)
|
|
2320
2320
|
|
|
2321
|
-
# 2. Image compression
|
|
2322
|
-
# with shared image xrefs across many pages \u2014 bypasses doc.rewrite_images())
|
|
2321
|
+
# 2. Image compression
|
|
2323
2322
|
if ${compressImages ? "True" : "False"}:
|
|
2324
2323
|
import math as _math
|
|
2325
|
-
import sys as _sys
|
|
2326
2324
|
|
|
2327
2325
|
_dpi_target = ${dpiTarget}
|
|
2328
2326
|
_dpi_threshold = ${dpiThreshold}
|
|
2329
|
-
_quality = ${imageQuality}
|
|
2330
2327
|
_set_to_gray = ${convertToGray ? "True" : "False"}
|
|
2331
|
-
|
|
2332
|
-
_process_lossless = ${processLossless ? "True" : "False"}
|
|
2333
|
-
_process_bitonal = ${processBitonal ? "True" : "False"}
|
|
2334
|
-
_process_color = ${processColor ? "True" : "False"}
|
|
2335
|
-
_process_gray = ${processGray ? "True" : "False"}
|
|
2336
|
-
|
|
2337
|
-
# Phase 1: Collect unique image xrefs and smask info
|
|
2338
|
-
_xref_info = {}
|
|
2339
|
-
for _page in doc:
|
|
2340
|
-
for _img in _page.get_images(full=True):
|
|
2341
|
-
_xref, _smask = _img[0], _img[1]
|
|
2342
|
-
if _xref > 0:
|
|
2343
|
-
_xref_info.setdefault(_xref, {"smask": _smask, "min_dpi": float("inf")})
|
|
2328
|
+
_effective_threshold = max(_dpi_threshold or 0, (_dpi_target or 0) + 10) if _dpi_target else None
|
|
2344
2329
|
|
|
2345
|
-
#
|
|
2330
|
+
# Pass 1: Handle lossless (PNG/Flate) images via page.replace_image()
|
|
2331
|
+
# Calculate DPI for each xref
|
|
2332
|
+
_xref_dpi = {}
|
|
2346
2333
|
for _page in doc:
|
|
2347
2334
|
for _info in _page.get_image_info(hashes=False, xrefs=True):
|
|
2348
2335
|
_xref = _info.get("xref", 0)
|
|
2349
|
-
if _xref
|
|
2336
|
+
if _xref <= 0:
|
|
2350
2337
|
continue
|
|
2351
2338
|
_bbox = _info.get("bbox")
|
|
2352
2339
|
_w = _info.get("width", 0)
|
|
@@ -2356,83 +2343,70 @@ if ${compressImages ? "True" : "False"}:
|
|
|
2356
2343
|
_disp_h = abs(_bbox[3] - _bbox[1])
|
|
2357
2344
|
if _disp_w > 0 and _disp_h > 0:
|
|
2358
2345
|
_dpi = min(_w / _disp_w * 72, _h / _disp_h * 72)
|
|
2359
|
-
if _dpi <
|
|
2360
|
-
|
|
2346
|
+
if _xref not in _xref_dpi or _dpi < _xref_dpi[_xref]:
|
|
2347
|
+
_xref_dpi[_xref] = _dpi
|
|
2361
2348
|
|
|
2362
|
-
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
_needs_downscale = bool(
|
|
2370
|
-
_dpi_target and _effective_threshold
|
|
2371
|
-
and _min_dpi != float("inf")
|
|
2372
|
-
and _min_dpi > _effective_threshold
|
|
2373
|
-
)
|
|
2374
|
-
if not _needs_downscale and _quality is None and not _set_to_gray:
|
|
2375
|
-
continue
|
|
2349
|
+
_handled = set()
|
|
2350
|
+
for _page in doc:
|
|
2351
|
+
for _img in _page.get_images():
|
|
2352
|
+
_xref = _img[0]
|
|
2353
|
+
if _xref in _handled:
|
|
2354
|
+
continue
|
|
2355
|
+
_handled.add(_xref)
|
|
2376
2356
|
|
|
2377
|
-
|
|
2378
|
-
# Check image type filters (match rewrite_images behavior)
|
|
2357
|
+
_mask_xref = _img[1]
|
|
2379
2358
|
_xref_obj = doc.xref_object(_xref)
|
|
2380
|
-
|
|
2381
|
-
|
|
2382
|
-
if _is_lossy and not _process_lossy:
|
|
2359
|
+
|
|
2360
|
+
if "FlateDecode" not in _xref_obj:
|
|
2383
2361
|
continue
|
|
2384
|
-
|
|
2362
|
+
|
|
2363
|
+
_min_dpi = _xref_dpi.get(_xref, float("inf"))
|
|
2364
|
+
_needs_downscale = bool(
|
|
2365
|
+
_dpi_target and _effective_threshold
|
|
2366
|
+
and _min_dpi != float("inf")
|
|
2367
|
+
and _min_dpi > _effective_threshold
|
|
2368
|
+
)
|
|
2369
|
+
if not _needs_downscale and not _set_to_gray:
|
|
2385
2370
|
continue
|
|
2386
2371
|
|
|
2387
|
-
|
|
2372
|
+
try:
|
|
2373
|
+
_base = pymupdf.Pixmap(doc, _xref)
|
|
2388
2374
|
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
_is_bitonal = (_pix.colorspace and _n == 1 and doc.xref_get_key(_xref, "BitsPerComponent")[1] == "1")
|
|
2392
|
-
_is_gray = (_n == 1 and not _is_bitonal)
|
|
2393
|
-
_is_color = (_n >= 3)
|
|
2394
|
-
if _is_bitonal and not _process_bitonal:
|
|
2395
|
-
_pix = None
|
|
2396
|
-
continue
|
|
2397
|
-
if _is_gray and not _process_gray:
|
|
2398
|
-
_pix = None
|
|
2399
|
-
continue
|
|
2400
|
-
if _is_color and not _process_color:
|
|
2401
|
-
_pix = None
|
|
2402
|
-
continue
|
|
2375
|
+
if _base.alpha:
|
|
2376
|
+
_base = pymupdf.Pixmap(_base, 0)
|
|
2403
2377
|
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
_pix = pymupdf.Pixmap(_pix.colorspace or pymupdf.csRGB, _pix)
|
|
2378
|
+
if _mask_xref:
|
|
2379
|
+
_mask = pymupdf.Pixmap(doc, _mask_xref)
|
|
2380
|
+
_base = pymupdf.Pixmap(_base, _mask)
|
|
2408
2381
|
|
|
2409
|
-
|
|
2410
|
-
|
|
2411
|
-
|
|
2412
|
-
|
|
2413
|
-
_pix.shrink(_shrink_n)
|
|
2382
|
+
if _set_to_gray and _base.colorspace and _base.colorspace.n > 1:
|
|
2383
|
+
_base = pymupdf.Pixmap(pymupdf.csGRAY, _base)
|
|
2384
|
+
elif _base.colorspace and _base.colorspace.n > 3:
|
|
2385
|
+
_base = pymupdf.Pixmap(pymupdf.csRGB, _base)
|
|
2414
2386
|
|
|
2415
|
-
|
|
2416
|
-
|
|
2387
|
+
if _needs_downscale:
|
|
2388
|
+
_ratio = _min_dpi / _dpi_target
|
|
2389
|
+
_shrink_n = max(0, min(7, int(_math.log2(_ratio))))
|
|
2390
|
+
if _shrink_n > 0:
|
|
2391
|
+
_base.shrink(_shrink_n)
|
|
2417
2392
|
|
|
2418
|
-
|
|
2419
|
-
|
|
2420
|
-
|
|
2421
|
-
|
|
2422
|
-
)
|
|
2423
|
-
_smask_entry = f"/SMask {_smask_xref} 0 R " if _smask_xref else ""
|
|
2424
|
-
_new_obj = (
|
|
2425
|
-
f"<</Type /XObject /Subtype /Image /BitsPerComponent 8"
|
|
2426
|
-
f" /ColorSpace {_cs_name} /Filter /DCTDecode"
|
|
2427
|
-
f" /Height {_pix.height} /Width {_pix.width}"
|
|
2428
|
-
f" {_smask_entry}>>"
|
|
2429
|
-
)
|
|
2430
|
-
doc.update_object(_xref, _new_obj)
|
|
2431
|
-
doc.update_stream(_xref, _jpeg_bytes, compress=0)
|
|
2432
|
-
_pix = None
|
|
2393
|
+
_page.replace_image(_xref, pixmap=_base)
|
|
2394
|
+
_base = None
|
|
2395
|
+
except Exception as _e:
|
|
2396
|
+
pass
|
|
2433
2397
|
|
|
2434
|
-
|
|
2435
|
-
|
|
2398
|
+
# Pass 2: Handle lossy (JPEG) images via rewrite_images
|
|
2399
|
+
doc.rewrite_images(
|
|
2400
|
+
dpi_threshold=${dpiThreshold},
|
|
2401
|
+
dpi_target=${dpiTarget},
|
|
2402
|
+
quality=${imageQuality},
|
|
2403
|
+
lossless=False,
|
|
2404
|
+
lossy=${processLossy ? "True" : "False"},
|
|
2405
|
+
bitonal=${processBitonal ? "True" : "False"},
|
|
2406
|
+
color=${processColor ? "True" : "False"},
|
|
2407
|
+
gray=${processGray ? "True" : "False"},
|
|
2408
|
+
set_to_gray=${convertToGray ? "True" : "False"},
|
|
2409
|
+
)
|
|
2436
2410
|
|
|
2437
2411
|
# 3. Font subsetting
|
|
2438
2412
|
if ${subsetFonts ? "True" : "False"}:
|