PyPI - sigdetect - Versions diffs - 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

sigdetect 0.3.1py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

sigdetect/__init__.py +1 -1
sigdetect/api.py +43 -11
sigdetect/cli.py +89 -23
sigdetect/config.py +48 -3
sigdetect/cropping.py +72 -12
sigdetect/detector/__init__.py +27 -8
sigdetect/detector/pymupdf_engine.py +3 -2
sigdetect/detector/pypdf2_engine.py +7 -5
sigdetect/detector/signature_model.py +3 -1
sigdetect/wet_detection.py +549 -0
{sigdetect-0.3.1.dist-info → sigdetect-0.5.0.dist-info}/METADATA +28 -25
sigdetect-0.5.0.dist-info/RECORD +24 -0
{sigdetect-0.3.1.dist-info → sigdetect-0.5.0.dist-info}/WHEEL +1 -1
sigdetect-0.3.1.dist-info/RECORD +0 -23
{sigdetect-0.3.1.dist-info → sigdetect-0.5.0.dist-info}/entry_points.txt +0 -0
{sigdetect-0.3.1.dist-info → sigdetect-0.5.0.dist-info}/top_level.txt +0 -0

sigdetect/detector/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@
 from __future__ import annotations
+import warnings
 from typing import TYPE_CHECKING, Type
 from .base_detector import Detector
@@ -21,10 +22,13 @@ ENGINE_REGISTRY: dict[str, Type[Detector]] = {
 ENGINE_REGISTRY.setdefault("pypdf", PyPDF2Detector)
 try:  # pragma: no cover - optional dependency
-    from .pymupdf_engine import PyMuPDFDetector  # type: ignore
+    from .pymupdf_engine import PyMuPDFDetector
+    from .pymupdf_engine import fitz as pymupdf_fitz  # type: ignore
-    if getattr(PyMuPDFDetector, "Name", None):
+    if pymupdf_fitz is not None and getattr(PyMuPDFDetector, "Name", None):
         ENGINE_REGISTRY[PyMuPDFDetector.Name] = PyMuPDFDetector
+    else:
+        PyMuPDFDetector = None  # type: ignore
 except Exception:
     PyMuPDFDetector = None  # type: ignore
@@ -32,12 +36,27 @@ except Exception:
 def BuildDetector(configuration: DetectConfiguration) -> Detector:
     """Instantiate the configured engine or raise a clear error."""
-    engine_name = (
-        getattr(configuration, "Engine", None)
-        or getattr(configuration, "engine", None)
-        or PyPDF2Detector.Name
-    )
-    normalized = engine_name.lower()
+    # Force geometry-capable engine selection (auto prefers PyMuPDF when available).
+    engine_name = "auto"
+    normalized = str(engine_name).lower()
+    if normalized == "auto":
+        detector_cls: Type[Detector] | None = None
+        if PyMuPDFDetector is not None:
+            detector_cls = (
+                ENGINE_REGISTRY.get(getattr(PyMuPDFDetector, "Name", "")) or PyMuPDFDetector
+            )
+        if detector_cls is None:
+            detector_cls = ENGINE_REGISTRY.get(PyPDF2Detector.Name) or ENGINE_REGISTRY.get("pypdf")
+            warnings.warn(
+                "Engine 'auto' falling back to 'pypdf2' because PyMuPDF is unavailable",
+                RuntimeWarning,
+                stacklevel=2,
+            )
+        if detector_cls is None:
+            available = ", ".join(sorted(ENGINE_REGISTRY)) or "<none>"
+            raise ValueError(f"No available detector engines. Available engines: {available}")
+        return detector_cls(configuration)
     detector_cls = ENGINE_REGISTRY.get(normalized)
     if detector_cls is None:

sigdetect/detector/pymupdf_engine.py CHANGED Viewed

@@ -30,8 +30,8 @@ class PyMuPDFDetector(PyPDF2Detector):
     def __init__(self, configuration):
         if fitz is None:  # pragma: no cover - optional dependency
             raise ValueError(
-                "PyMuPDF engine requires the optional 'pymupdf' dependency. Install via 'pip install "
-                "sigdetect[pymupdf]' or add pymupdf to your environment."
+                "PyMuPDF engine requires the optional 'pymupdf' dependency. Install 'pymupdf' or add "
+                "it to your environment."
             )
         super().__init__(configuration)
@@ -111,6 +111,7 @@ class PyMuPDFDetector(PyPDF2Detector):
                     rect, exclusion, mode = rect_info
                     padded = self._PadRect(rect, page.rect, signature.Role, exclusion, mode)
                     signature.BoundingBox = self._RectToPdfTuple(padded, page.rect.height)
+                    signature.RenderType = "drawn"
                     if signature.Page is None:
                         signature.Page = page_index + 1
                     break

sigdetect/detector/pypdf2_engine.py CHANGED Viewed

@@ -348,7 +348,7 @@ class PyPDF2Detector(Detector):
         return normalized.lower().startswith("im")
     def _ClassifyAppearance(self, widget: generic.DictionaryObject, page) -> str:
-        """Classify the widget's appearance as drawn/typed/hybrid/unknown."""
+        """Classify the widget's appearance as drawn or typed."""
         ap_dict = AsDictionary(widget.get("/AP"))
         if not isinstance(ap_dict, generic.DictionaryObject):
@@ -356,7 +356,7 @@ class PyPDF2Detector(Detector):
         normal = ap_dict.get("/N")
         streams = self._ExtractAppearanceStreams(normal)
         if not streams:
-            return "unknown"
+            return "typed"
         has_text = False
         has_vector = False
@@ -384,13 +384,11 @@ class PyPDF2Detector(Detector):
                         has_image = True
                         break
-        if has_image and (has_text or has_vector):
-            return "hybrid"
         if has_image:
             return "drawn"
         if has_text or has_vector:
             return "typed"
-        return "unknown"
+        return "typed"
     # ---- file-wide stream scan (compressed or not)
     def _ScanFileStreamsForVendors(self, file_bytes: bytes) -> tuple[set[str], str]:
@@ -863,6 +861,7 @@ class PyPDF2Detector(Detector):
                                 Scores={r: sc},
                                 Evidence=ev + ["pseudo:true"],
                                 Hint="VendorOrAcroOnly",
+                                RenderType="typed",
                             )
                         )
@@ -903,6 +902,7 @@ class PyPDF2Detector(Detector):
                                 Scores={role: score} if score > 0 else {},
                                 Evidence=ev + ["pseudo:true"],
                                 Hint="VendorOrAcroOnly",
+                                RenderType="typed",
                             )
                         )
@@ -1055,6 +1055,7 @@ class PyPDF2Detector(Detector):
                         Scores=scores,
                         Evidence=evidence,
                         Hint=f"AcroSig:{fname}" if fname else "AcroSig",
+                        RenderType="typed",
                     )
                 )
@@ -1120,6 +1121,7 @@ class PyPDF2Detector(Detector):
                         Scores=dict(scores),
                         Evidence=evidence + ["pseudo:true"],
                         Hint="VendorOrAcroOnly",
+                        RenderType="typed",
                     )
                 )

sigdetect/detector/signature_model.py CHANGED Viewed

@@ -17,9 +17,10 @@ class Signature:
     Scores: dict[str, int]
     Evidence: list[str]
     Hint: str
-    RenderType: str = "unknown"
+    RenderType: str = "typed"
     BoundingBox: tuple[float, float, float, float] | None = None
     CropPath: str | None = None
+    CropBytes: str | None = None
     def to_dict(self) -> dict[str, Any]:
         """Return the legacy snake_case representation used in JSON payloads."""
@@ -35,4 +36,5 @@ class Signature:
             "render_type": self.RenderType,
             "bounding_box": list(self.BoundingBox) if self.BoundingBox else None,
             "crop_path": self.CropPath,
+            "crop_bytes": self.CropBytes,
         }

sigdetect 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

sigdetect 0.3.1py3-none-any.whl → 0.5.0py3-none-any.whl