@bentopdf/pymupdf-wasm 0.11.15 → 0.11.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +58 -84
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -2318,35 +2318,22 @@ doc.scrub(
2318
2318
  reset_responses=${scrubResetResponses ? "True" : "False"},
2319
2319
  )
2320
2320
 
2321
- # 2. Image compression (safe per-xref approach to avoid MuPDF buffer overflow
2322
- # with shared image xrefs across many pages \u2014 bypasses doc.rewrite_images())
2321
+ # 2. Image compression
2323
2322
  if ${compressImages ? "True" : "False"}:
2324
2323
  import math as _math
2325
- import sys as _sys
2326
2324
 
2327
2325
  _dpi_target = ${dpiTarget}
2328
2326
  _dpi_threshold = ${dpiThreshold}
2329
- _quality = ${imageQuality}
2330
2327
  _set_to_gray = ${convertToGray ? "True" : "False"}
2331
- _process_lossy = ${processLossy ? "True" : "False"}
2332
- _process_lossless = ${processLossless ? "True" : "False"}
2333
- _process_bitonal = ${processBitonal ? "True" : "False"}
2334
- _process_color = ${processColor ? "True" : "False"}
2335
- _process_gray = ${processGray ? "True" : "False"}
2336
-
2337
- # Phase 1: Collect unique image xrefs and smask info
2338
- _xref_info = {}
2339
- for _page in doc:
2340
- for _img in _page.get_images(full=True):
2341
- _xref, _smask = _img[0], _img[1]
2342
- if _xref > 0:
2343
- _xref_info.setdefault(_xref, {"smask": _smask, "min_dpi": float("inf")})
2328
+ _effective_threshold = max(_dpi_threshold or 0, (_dpi_target or 0) + 10) if _dpi_target else None
2344
2329
 
2345
- # Phase 2: Calculate effective DPI for each xref across all page usages
2330
+ # Pass 1: Handle lossless (PNG/Flate) images via page.replace_image()
2331
+ # Calculate DPI for each xref
2332
+ _xref_dpi = {}
2346
2333
  for _page in doc:
2347
2334
  for _info in _page.get_image_info(hashes=False, xrefs=True):
2348
2335
  _xref = _info.get("xref", 0)
2349
- if _xref not in _xref_info:
2336
+ if _xref <= 0:
2350
2337
  continue
2351
2338
  _bbox = _info.get("bbox")
2352
2339
  _w = _info.get("width", 0)
@@ -2356,83 +2343,70 @@ if ${compressImages ? "True" : "False"}:
2356
2343
  _disp_h = abs(_bbox[3] - _bbox[1])
2357
2344
  if _disp_w > 0 and _disp_h > 0:
2358
2345
  _dpi = min(_w / _disp_w * 72, _h / _disp_h * 72)
2359
- if _dpi < _xref_info[_xref]["min_dpi"]:
2360
- _xref_info[_xref]["min_dpi"] = _dpi
2346
+ if _xref not in _xref_dpi or _dpi < _xref_dpi[_xref]:
2347
+ _xref_dpi[_xref] = _dpi
2361
2348
 
2362
- _effective_threshold = max(_dpi_threshold or 0, (_dpi_target or 0) + 10) if _dpi_target else None
2363
-
2364
- # Phase 3: Rewrite each image xref individually
2365
- for _xref, _meta in _xref_info.items():
2366
- _min_dpi = _meta["min_dpi"]
2367
- _smask_xref = _meta["smask"]
2368
-
2369
- _needs_downscale = bool(
2370
- _dpi_target and _effective_threshold
2371
- and _min_dpi != float("inf")
2372
- and _min_dpi > _effective_threshold
2373
- )
2374
- if not _needs_downscale and _quality is None and not _set_to_gray:
2375
- continue
2349
+ _handled = set()
2350
+ for _page in doc:
2351
+ for _img in _page.get_images():
2352
+ _xref = _img[0]
2353
+ if _xref in _handled:
2354
+ continue
2355
+ _handled.add(_xref)
2376
2356
 
2377
- try:
2378
- # Check image type filters (match rewrite_images behavior)
2357
+ _mask_xref = _img[1]
2379
2358
  _xref_obj = doc.xref_object(_xref)
2380
- _is_lossy = "/DCTDecode" in _xref_obj or "/JPXDecode" in _xref_obj
2381
- _is_lossless = not _is_lossy
2382
- if _is_lossy and not _process_lossy:
2359
+
2360
+ if "FlateDecode" not in _xref_obj:
2383
2361
  continue
2384
- if _is_lossless and not _process_lossless:
2362
+
2363
+ _min_dpi = _xref_dpi.get(_xref, float("inf"))
2364
+ _needs_downscale = bool(
2365
+ _dpi_target and _effective_threshold
2366
+ and _min_dpi != float("inf")
2367
+ and _min_dpi > _effective_threshold
2368
+ )
2369
+ if not _needs_downscale and not _set_to_gray:
2385
2370
  continue
2386
2371
 
2387
- _pix = pymupdf.Pixmap(doc, _xref)
2372
+ try:
2373
+ _base = pymupdf.Pixmap(doc, _xref)
2388
2374
 
2389
- # Check colorspace filters
2390
- _n = _pix.colorspace.n if _pix.colorspace else 0
2391
- _is_bitonal = (_pix.colorspace and _n == 1 and doc.xref_get_key(_xref, "BitsPerComponent")[1] == "1")
2392
- _is_gray = (_n == 1 and not _is_bitonal)
2393
- _is_color = (_n >= 3)
2394
- if _is_bitonal and not _process_bitonal:
2395
- _pix = None
2396
- continue
2397
- if _is_gray and not _process_gray:
2398
- _pix = None
2399
- continue
2400
- if _is_color and not _process_color:
2401
- _pix = None
2402
- continue
2375
+ if _base.alpha:
2376
+ _base = pymupdf.Pixmap(_base, 0)
2403
2377
 
2404
- if _set_to_gray and _pix.colorspace and _pix.colorspace.n > 1:
2405
- _pix = pymupdf.Pixmap(pymupdf.csGRAY, _pix)
2406
- elif _pix.alpha:
2407
- _pix = pymupdf.Pixmap(_pix.colorspace or pymupdf.csRGB, _pix)
2378
+ if _mask_xref:
2379
+ _mask = pymupdf.Pixmap(doc, _mask_xref)
2380
+ _base = pymupdf.Pixmap(_base, _mask)
2408
2381
 
2409
- if _needs_downscale:
2410
- _ratio = _min_dpi / _dpi_target
2411
- _shrink_n = max(0, min(7, int(_math.log2(_ratio))))
2412
- if _shrink_n > 0:
2413
- _pix.shrink(_shrink_n)
2382
+ if _set_to_gray and _base.colorspace and _base.colorspace.n > 1:
2383
+ _base = pymupdf.Pixmap(pymupdf.csGRAY, _base)
2384
+ elif _base.colorspace and _base.colorspace.n > 3:
2385
+ _base = pymupdf.Pixmap(pymupdf.csRGB, _base)
2414
2386
 
2415
- _q = _quality if _quality is not None else 85
2416
- _jpeg_bytes = _pix.tobytes("jpeg", jpg_quality=_q)
2387
+ if _needs_downscale:
2388
+ _ratio = _min_dpi / _dpi_target
2389
+ _shrink_n = max(0, min(7, int(_math.log2(_ratio))))
2390
+ if _shrink_n > 0:
2391
+ _base.shrink(_shrink_n)
2417
2392
 
2418
- _cs_name = (
2419
- "/DeviceGray"
2420
- if _pix.colorspace and _pix.colorspace.n == 1
2421
- else "/DeviceRGB"
2422
- )
2423
- _smask_entry = f"/SMask {_smask_xref} 0 R " if _smask_xref else ""
2424
- _new_obj = (
2425
- f"<</Type /XObject /Subtype /Image /BitsPerComponent 8"
2426
- f" /ColorSpace {_cs_name} /Filter /DCTDecode"
2427
- f" /Height {_pix.height} /Width {_pix.width}"
2428
- f" {_smask_entry}>>"
2429
- )
2430
- doc.update_object(_xref, _new_obj)
2431
- doc.update_stream(_xref, _jpeg_bytes, compress=0)
2432
- _pix = None
2393
+ _page.replace_image(_xref, pixmap=_base)
2394
+ _base = None
2395
+ except Exception as _e:
2396
+ pass
2433
2397
 
2434
- except Exception as _e:
2435
- _sys.stderr.write(f"[pymupdf-wasm] safe_rewrite_images xref {_xref}: {_e}\\n")
2398
+ # Pass 2: Handle lossy (JPEG) images via rewrite_images
2399
+ doc.rewrite_images(
2400
+ dpi_threshold=${dpiThreshold},
2401
+ dpi_target=${dpiTarget},
2402
+ quality=${imageQuality},
2403
+ lossless=False,
2404
+ lossy=${processLossy ? "True" : "False"},
2405
+ bitonal=${processBitonal ? "True" : "False"},
2406
+ color=${processColor ? "True" : "False"},
2407
+ gray=${processGray ? "True" : "False"},
2408
+ set_to_gray=${convertToGray ? "True" : "False"},
2409
+ )
2436
2410
 
2437
2411
  # 3. Font subsetting
2438
2412
  if ${subsetFonts ? "True" : "False"}:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bentopdf/pymupdf-wasm",
3
- "version": "0.11.15",
3
+ "version": "0.11.16",
4
4
  "description": "PyMuPDF compiled to WebAssembly - Full PDF manipulation in the browser",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",