@bentopdf/pymupdf-wasm 0.11.14 → 0.11.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +114 -12
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -2318,19 +2318,121 @@ doc.scrub(
2318
2318
  reset_responses=${scrubResetResponses ? "True" : "False"},
2319
2319
  )
2320
2320
 
2321
- # 2. Image compression
2321
+ # 2. Image compression (safe per-xref approach to avoid MuPDF buffer overflow
2322
+ # with shared image xrefs across many pages \u2014 bypasses doc.rewrite_images())
2322
2323
  if ${compressImages ? "True" : "False"}:
2323
- doc.rewrite_images(
2324
- dpi_threshold=${dpiThreshold},
2325
- dpi_target=${dpiTarget},
2326
- quality=${imageQuality},
2327
- lossy=${processLossy ? "True" : "False"},
2328
- lossless=${processLossless ? "True" : "False"},
2329
- bitonal=${processBitonal ? "True" : "False"},
2330
- color=${processColor ? "True" : "False"},
2331
- gray=${processGray ? "True" : "False"},
2332
- set_to_gray=${convertToGray ? "True" : "False"},
2333
- )
2324
+ import math as _math
2325
+ import sys as _sys
2326
+
2327
+ _dpi_target = ${dpiTarget}
2328
+ _dpi_threshold = ${dpiThreshold}
2329
+ _quality = ${imageQuality}
2330
+ _set_to_gray = ${convertToGray ? "True" : "False"}
2331
+ _process_lossy = ${processLossy ? "True" : "False"}
2332
+ _process_lossless = ${processLossless ? "True" : "False"}
2333
+ _process_bitonal = ${processBitonal ? "True" : "False"}
2334
+ _process_color = ${processColor ? "True" : "False"}
2335
+ _process_gray = ${processGray ? "True" : "False"}
2336
+
2337
+ # Phase 1: Collect unique image xrefs and smask info
2338
+ _xref_info = {}
2339
+ for _page in doc:
2340
+ for _img in _page.get_images(full=True):
2341
+ _xref, _smask = _img[0], _img[1]
2342
+ if _xref > 0:
2343
+ _xref_info.setdefault(_xref, {"smask": _smask, "min_dpi": float("inf")})
2344
+
2345
+ # Phase 2: Calculate effective DPI for each xref across all page usages
2346
+ for _page in doc:
2347
+ for _info in _page.get_image_info(hashes=False, xrefs=True):
2348
+ _xref = _info.get("xref", 0)
2349
+ if _xref not in _xref_info:
2350
+ continue
2351
+ _bbox = _info.get("bbox")
2352
+ _w = _info.get("width", 0)
2353
+ _h = _info.get("height", 0)
2354
+ if _bbox and _w > 0 and _h > 0:
2355
+ _disp_w = abs(_bbox[2] - _bbox[0])
2356
+ _disp_h = abs(_bbox[3] - _bbox[1])
2357
+ if _disp_w > 0 and _disp_h > 0:
2358
+ _dpi = min(_w / _disp_w * 72, _h / _disp_h * 72)
2359
+ if _dpi < _xref_info[_xref]["min_dpi"]:
2360
+ _xref_info[_xref]["min_dpi"] = _dpi
2361
+
2362
+ _effective_threshold = max(_dpi_threshold or 0, (_dpi_target or 0) + 10) if _dpi_target else None
2363
+
2364
+ # Phase 3: Rewrite each image xref individually
2365
+ for _xref, _meta in _xref_info.items():
2366
+ _min_dpi = _meta["min_dpi"]
2367
+ _smask_xref = _meta["smask"]
2368
+
2369
+ _needs_downscale = bool(
2370
+ _dpi_target and _effective_threshold
2371
+ and _min_dpi != float("inf")
2372
+ and _min_dpi > _effective_threshold
2373
+ )
2374
+ if not _needs_downscale and _quality is None and not _set_to_gray:
2375
+ continue
2376
+
2377
+ try:
2378
+ # Check image type filters (match rewrite_images behavior)
2379
+ _xref_obj = doc.xref_object(_xref)
2380
+ _is_lossy = "/DCTDecode" in _xref_obj or "/JPXDecode" in _xref_obj
2381
+ _is_lossless = not _is_lossy
2382
+ if _is_lossy and not _process_lossy:
2383
+ continue
2384
+ if _is_lossless and not _process_lossless:
2385
+ continue
2386
+
2387
+ _pix = pymupdf.Pixmap(doc, _xref)
2388
+
2389
+ # Check colorspace filters
2390
+ _n = _pix.colorspace.n if _pix.colorspace else 0
2391
+ _is_bitonal = (_pix.colorspace and _n == 1 and doc.xref_get_key(_xref, "BitsPerComponent")[1] == "1")
2392
+ _is_gray = (_n == 1 and not _is_bitonal)
2393
+ _is_color = (_n >= 3)
2394
+ if _is_bitonal and not _process_bitonal:
2395
+ _pix = None
2396
+ continue
2397
+ if _is_gray and not _process_gray:
2398
+ _pix = None
2399
+ continue
2400
+ if _is_color and not _process_color:
2401
+ _pix = None
2402
+ continue
2403
+
2404
+ if _set_to_gray and _pix.colorspace and _pix.colorspace.n > 1:
2405
+ _pix = pymupdf.Pixmap(pymupdf.csGRAY, _pix)
2406
+ elif _pix.alpha:
2407
+ _pix = pymupdf.Pixmap(_pix.colorspace or pymupdf.csRGB, _pix)
2408
+
2409
+ if _needs_downscale:
2410
+ _ratio = _min_dpi / _dpi_target
2411
+ _shrink_n = max(0, min(7, int(_math.log2(_ratio))))
2412
+ if _shrink_n > 0:
2413
+ _pix.shrink(_shrink_n)
2414
+
2415
+ _q = _quality if _quality is not None else 85
2416
+ _jpeg_bytes = _pix.tobytes("jpeg", jpg_quality=_q)
2417
+
2418
+ _cs_name = (
2419
+ "/DeviceGray"
2420
+ if _pix.colorspace and _pix.colorspace.n == 1
2421
+ else "/DeviceRGB"
2422
+ )
2423
+ _smask_entry = f"/SMask {_smask_xref} 0 R " if _smask_xref else ""
2424
+ _new_obj = (
2425
+ f"<</Type /XObject /Subtype /Image /BitsPerComponent 8"
2426
+ f" /ColorSpace {_cs_name} /Filter /DCTDecode"
2427
+ f" /Height {_pix.height} /Width {_pix.width}"
2428
+ f" {_smask_entry}>>"
2429
+ )
2430
+ doc.update_object(_xref, _new_obj)
2431
+ doc.update_stream(_xref, _jpeg_bytes, compress=0)
2432
+ _pix = None
2433
+
2434
+ except Exception as _e:
2435
+ _sys.stderr.write(f"[pymupdf-wasm] safe_rewrite_images xref {_xref}: {_e}\\n")
2334
2436
 
2335
2437
  # 3. Font subsetting
2336
2438
  if ${subsetFonts ? "True" : "False"}:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bentopdf/pymupdf-wasm",
3
- "version": "0.11.14",
3
+ "version": "0.11.15",
4
4
  "description": "PyMuPDF compiled to WebAssembly - Full PDF manipulation in the browser",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",