npm - @bentopdf/pymupdf-wasm - Versions diffs - 0.11.15 → 0.11.16 - Mend

@bentopdf/pymupdf-wasm 0.11.15 → 0.11.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/index.js +58 -84
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -2318,35 +2318,22 @@ doc.scrub(
     reset_responses=${scrubResetResponses ? "True" : "False"},
 )
-# 2. Image compression (safe per-xref approach to avoid MuPDF buffer overflow
-#    with shared image xrefs across many pages \u2014 bypasses doc.rewrite_images())
+# 2. Image compression
 if ${compressImages ? "True" : "False"}:
     import math as _math
-    import sys as _sys
     _dpi_target = ${dpiTarget}
     _dpi_threshold = ${dpiThreshold}
-    _quality = ${imageQuality}
     _set_to_gray = ${convertToGray ? "True" : "False"}
-    _process_lossy = ${processLossy ? "True" : "False"}
-    _process_lossless = ${processLossless ? "True" : "False"}
-    _process_bitonal = ${processBitonal ? "True" : "False"}
-    _process_color = ${processColor ? "True" : "False"}
-    _process_gray = ${processGray ? "True" : "False"}
-    # Phase 1: Collect unique image xrefs and smask info
-    _xref_info = {}
-    for _page in doc:
-        for _img in _page.get_images(full=True):
-            _xref, _smask = _img[0], _img[1]
-            if _xref > 0:
-                _xref_info.setdefault(_xref, {"smask": _smask, "min_dpi": float("inf")})
+    _effective_threshold = max(_dpi_threshold or 0, (_dpi_target or 0) + 10) if _dpi_target else None
-    # Phase 2: Calculate effective DPI for each xref across all page usages
+    # Pass 1: Handle lossless (PNG/Flate) images via page.replace_image()
+    # Calculate DPI for each xref
+    _xref_dpi = {}
     for _page in doc:
         for _info in _page.get_image_info(hashes=False, xrefs=True):
             _xref = _info.get("xref", 0)
-            if _xref not in _xref_info:
+            if _xref <= 0:
                 continue
             _bbox = _info.get("bbox")
             _w = _info.get("width", 0)
@@ -2356,83 +2343,70 @@ if ${compressImages ? "True" : "False"}:
                 _disp_h = abs(_bbox[3] - _bbox[1])
                 if _disp_w > 0 and _disp_h > 0:
                     _dpi = min(_w / _disp_w * 72, _h / _disp_h * 72)
-                    if _dpi < _xref_info[_xref]["min_dpi"]:
-                        _xref_info[_xref]["min_dpi"] = _dpi
+                    if _xref not in _xref_dpi or _dpi < _xref_dpi[_xref]:
+                        _xref_dpi[_xref] = _dpi
-    _effective_threshold = max(_dpi_threshold or 0, (_dpi_target or 0) + 10) if _dpi_target else None
-    # Phase 3: Rewrite each image xref individually
-    for _xref, _meta in _xref_info.items():
-        _min_dpi = _meta["min_dpi"]
-        _smask_xref = _meta["smask"]
-        _needs_downscale = bool(
-            _dpi_target and _effective_threshold
-            and _min_dpi != float("inf")
-            and _min_dpi > _effective_threshold
-        )
-        if not _needs_downscale and _quality is None and not _set_to_gray:
-            continue
+    _handled = set()
+    for _page in doc:
+        for _img in _page.get_images():
+            _xref = _img[0]
+            if _xref in _handled:
+                continue
+            _handled.add(_xref)
-        try:
-            # Check image type filters (match rewrite_images behavior)
+            _mask_xref = _img[1]
             _xref_obj = doc.xref_object(_xref)
-            _is_lossy = "/DCTDecode" in _xref_obj or "/JPXDecode" in _xref_obj
-            _is_lossless = not _is_lossy
-            if _is_lossy and not _process_lossy:
+            if "FlateDecode" not in _xref_obj:
                 continue
-            if _is_lossless and not _process_lossless:
+            _min_dpi = _xref_dpi.get(_xref, float("inf"))
+            _needs_downscale = bool(
+                _dpi_target and _effective_threshold
+                and _min_dpi != float("inf")
+                and _min_dpi > _effective_threshold
+            )
+            if not _needs_downscale and not _set_to_gray:
                 continue
-            _pix = pymupdf.Pixmap(doc, _xref)
+            try:
+                _base = pymupdf.Pixmap(doc, _xref)
-            # Check colorspace filters
-            _n = _pix.colorspace.n if _pix.colorspace else 0
-            _is_bitonal = (_pix.colorspace and _n == 1 and doc.xref_get_key(_xref, "BitsPerComponent")[1] == "1")
-            _is_gray = (_n == 1 and not _is_bitonal)
-            _is_color = (_n >= 3)
-            if _is_bitonal and not _process_bitonal:
-                _pix = None
-                continue
-            if _is_gray and not _process_gray:
-                _pix = None
-                continue
-            if _is_color and not _process_color:
-                _pix = None
-                continue
+                if _base.alpha:
+                    _base = pymupdf.Pixmap(_base, 0)
-            if _set_to_gray and _pix.colorspace and _pix.colorspace.n > 1:
-                _pix = pymupdf.Pixmap(pymupdf.csGRAY, _pix)
-            elif _pix.alpha:
-                _pix = pymupdf.Pixmap(_pix.colorspace or pymupdf.csRGB, _pix)
+                if _mask_xref:
+                    _mask = pymupdf.Pixmap(doc, _mask_xref)
+                    _base = pymupdf.Pixmap(_base, _mask)
-            if _needs_downscale:
-                _ratio = _min_dpi / _dpi_target
-                _shrink_n = max(0, min(7, int(_math.log2(_ratio))))
-                if _shrink_n > 0:
-                    _pix.shrink(_shrink_n)
+                if _set_to_gray and _base.colorspace and _base.colorspace.n > 1:
+                    _base = pymupdf.Pixmap(pymupdf.csGRAY, _base)
+                elif _base.colorspace and _base.colorspace.n > 3:
+                    _base = pymupdf.Pixmap(pymupdf.csRGB, _base)
-            _q = _quality if _quality is not None else 85
-            _jpeg_bytes = _pix.tobytes("jpeg", jpg_quality=_q)
+                if _needs_downscale:
+                    _ratio = _min_dpi / _dpi_target
+                    _shrink_n = max(0, min(7, int(_math.log2(_ratio))))
+                    if _shrink_n > 0:
+                        _base.shrink(_shrink_n)
-            _cs_name = (
-                "/DeviceGray"
-                if _pix.colorspace and _pix.colorspace.n == 1
-                else "/DeviceRGB"
-            )
-            _smask_entry = f"/SMask {_smask_xref} 0 R " if _smask_xref else ""
-            _new_obj = (
-                f"<</Type /XObject /Subtype /Image /BitsPerComponent 8"
-                f" /ColorSpace {_cs_name} /Filter /DCTDecode"
-                f" /Height {_pix.height} /Width {_pix.width}"
-                f" {_smask_entry}>>"
-            )
-            doc.update_object(_xref, _new_obj)
-            doc.update_stream(_xref, _jpeg_bytes, compress=0)
-            _pix = None
+                _page.replace_image(_xref, pixmap=_base)
+                _base = None
+            except Exception as _e:
+                pass
-        except Exception as _e:
-            _sys.stderr.write(f"[pymupdf-wasm] safe_rewrite_images xref {_xref}: {_e}\\n")
+    # Pass 2: Handle lossy (JPEG) images via rewrite_images
+    doc.rewrite_images(
+        dpi_threshold=${dpiThreshold},
+        dpi_target=${dpiTarget},
+        quality=${imageQuality},
+        lossless=False,
+        lossy=${processLossy ? "True" : "False"},
+        bitonal=${processBitonal ? "True" : "False"},
+        color=${processColor ? "True" : "False"},
+        gray=${processGray ? "True" : "False"},
+        set_to_gray=${convertToGray ? "True" : "False"},
+    )
 # 3. Font subsetting
 if ${subsetFonts ? "True" : "False"}:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@bentopdf/pymupdf-wasm",
-    "version": "0.11.15",
+    "version": "0.11.16",
     "description": "PyMuPDF compiled to WebAssembly - Full PDF manipulation in the browser",
     "type": "module",
     "main": "dist/index.js",