PyPI - sigdetect - Versions diffs - 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

sigdetect 0.3.1py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

sigdetect/__init__.py +1 -1
sigdetect/api.py +43 -11
sigdetect/cli.py +89 -23
sigdetect/config.py +48 -3
sigdetect/cropping.py +72 -12
sigdetect/detector/__init__.py +27 -8
sigdetect/detector/pymupdf_engine.py +3 -2
sigdetect/detector/pypdf2_engine.py +7 -5
sigdetect/detector/signature_model.py +3 -1
sigdetect/wet_detection.py +549 -0
{sigdetect-0.3.1.dist-info → sigdetect-0.5.0.dist-info}/METADATA +28 -25
sigdetect-0.5.0.dist-info/RECORD +24 -0
{sigdetect-0.3.1.dist-info → sigdetect-0.5.0.dist-info}/WHEEL +1 -1
sigdetect-0.3.1.dist-info/RECORD +0 -23
{sigdetect-0.3.1.dist-info → sigdetect-0.5.0.dist-info}/entry_points.txt +0 -0
{sigdetect-0.3.1.dist-info → sigdetect-0.5.0.dist-info}/top_level.txt +0 -0

sigdetect/__init__.py CHANGED Viewed

@@ -21,4 +21,4 @@ try:
 except PackageNotFoundError:  # pragma: no cover
     __version__ = "0.0.0"
-DEFAULT_ENGINE = "pypdf2"
+DEFAULT_ENGINE = "auto"

sigdetect/api.py CHANGED Viewed

@@ -9,8 +9,9 @@ from typing import Any, Generator, Iterable, Iterator, Literal, overload
 from sigdetect.config import DetectConfiguration
 from sigdetect.cropping import SignatureCrop
 from sigdetect.detector import BuildDetector, Detector, FileResult, Signature
+from sigdetect.wet_detection import apply_wet_detection
-EngineName = Literal["pypdf2", "pypdf", "pymupdf"]
+EngineName = Literal["pypdf2", "pypdf", "pymupdf", "auto"]
 ProfileName = Literal["hipaa", "retainer"]
@@ -18,12 +19,16 @@ def DetectPdf(
     pdfPath: str | Path,
     *,
     profileName: ProfileName = "hipaa",
-    engineName: EngineName = "pypdf2",
+    engineName: EngineName = "auto",
     includePseudoSignatures: bool = True,
     recurseXObjects: bool = True,
+    runWetDetection: bool = True,
     detector: Detector | None = None,
 ) -> dict[str, Any]:
-    """Detect signature evidence and assign roles for a single PDF."""
+    """Detect signature evidence and assign roles for a single PDF.
+    Wet detection runs by default for non-e-sign PDFs; pass ``runWetDetection=False`` to skip OCR.
+    """
     resolvedPath = Path(pdfPath)
     activeDetector = detector or get_detector(
@@ -36,6 +41,10 @@ def DetectPdf(
     )
     result = activeDetector.Detect(resolvedPath)
+    if runWetDetection:
+        configuration = _ResolveConfiguration(activeDetector)
+        if configuration is not None:
+            apply_wet_detection(resolvedPath, configuration, result)
     return _ToPlainDictionary(result)
@@ -43,12 +52,15 @@ def get_detector(
     *,
     pdfRoot: str | Path | None = None,
     profileName: ProfileName = "hipaa",
-    engineName: EngineName = "pypdf2",
+    engineName: EngineName = "auto",
     includePseudoSignatures: bool = True,
     recurseXObjects: bool = True,
     outputDirectory: str | Path | None = None,
 ) -> Detector:
-    """Return a reusable detector instance configured with the supplied options."""
+    """Return a reusable detector instance configured with the supplied options.
+    Engine selection is forced to ``auto`` (prefers PyMuPDF when available).
+    """
     configuration = DetectConfiguration(
         PdfRoot=Path(pdfRoot) if pdfRoot is not None else Path.cwd(),
@@ -108,6 +120,7 @@ def _ToPlainValue(value: Any) -> Any:
 def DetectMany(
     pdfPaths: Iterable[str | Path],
     *,
+    runWetDetection: bool = True,
     detector: Detector | None = None,
     **kwargs: Any,
 ) -> Iterator[dict[str, Any]]:
@@ -115,17 +128,18 @@ def DetectMany(
     if detector is not None:
         for pdfPath in pdfPaths:
-            yield _DetectWithDetector(detector, pdfPath)
+            yield _DetectWithDetector(detector, pdfPath, runWetDetection=runWetDetection)
         return
     for pdfPath in pdfPaths:
-        yield DetectPdf(pdfPath, **kwargs)
+        yield DetectPdf(pdfPath, runWetDetection=runWetDetection, **kwargs)
 def ScanDirectory(
     pdfRoot: str | Path,
     *,
     globPattern: str = "**/*.pdf",
+    runWetDetection: bool = True,
     detector: Detector | None = None,
     **kwargs: Any,
 ) -> Iterator[dict[str, Any]]:
@@ -143,7 +157,7 @@ def ScanDirectory(
     for pdfPath in iterator:
         if pdfPath.is_file() and pdfPath.suffix.lower() == ".pdf":
-            yield DetectPdf(pdfPath, detector=detector, **kwargs)
+            yield DetectPdf(pdfPath, detector=detector, runWetDetection=runWetDetection, **kwargs)
 def ToCsvRow(result: dict[str, Any]) -> dict[str, Any]:
@@ -174,11 +188,25 @@ def Version() -> str:
         return "0.0.0-dev"
-def _DetectWithDetector(detector: Detector, pdfPath: str | Path) -> dict[str, Any]:
+def _DetectWithDetector(
+    detector: Detector, pdfPath: str | Path, *, runWetDetection: bool
+) -> dict[str, Any]:
     """Helper that runs ``detector`` and returns the plain dictionary result."""
     resolvedPath = Path(pdfPath)
-    return _ToPlainDictionary(detector.Detect(resolvedPath))
+    result = detector.Detect(resolvedPath)
+    if runWetDetection:
+        configuration = _ResolveConfiguration(detector)
+        if configuration is not None:
+            apply_wet_detection(resolvedPath, configuration, result)
+    return _ToPlainDictionary(result)
+def _ResolveConfiguration(detector: Detector) -> DetectConfiguration | None:
+    configuration = getattr(detector, "Configuration", None)
+    if isinstance(configuration, DetectConfiguration):
+        return configuration
+    return None
 @contextmanager
@@ -225,12 +253,15 @@ def CropSignatureImages(
     returnBytes: bool = False,
     saveToDisk: bool = True,
 ) -> list[Path] | list[SignatureCrop]:
-    """Crop detected signature regions to PNG files.
+    """Create DOCX files containing cropped signature images.
     Accepts either a :class:`FileResult` instance or the ``dict`` returned by
     :func:`DetectPdf`. Requires the optional ``pymupdf`` dependency.
     Set ``returnBytes=True`` to also receive in-memory PNG bytes for each crop. Set
     ``saveToDisk=False`` to skip writing PNG files while still returning in-memory data.
+    When ``saveToDisk`` is enabled, a one-image DOCX file is also written per crop. When
+    ``returnBytes`` is True and ``python-docx`` is available, the returned
+    :class:`SignatureCrop` objects include ``docx_bytes``.
     """
     from sigdetect.cropping import crop_signatures
@@ -273,6 +304,7 @@ def _CoerceFileResult(
                 RenderType=str(entry.get("render_type") or "unknown"),
                 BoundingBox=tuple(bbox) if bbox else None,
                 CropPath=entry.get("crop_path"),
+                CropBytes=entry.get("crop_bytes"),
             )
         )

sigdetect/cli.py CHANGED Viewed

@@ -2,6 +2,7 @@
 from __future__ import annotations
+import base64
 import json
 from collections.abc import Iterator
 from dataclasses import asdict, is_dataclass
@@ -15,6 +16,7 @@ from .cropping import SignatureCroppingUnavailable, crop_signatures
 from .detector import BuildDetector, FileResult
 from .eda import RunExploratoryAnalysis
 from .logging_setup import ConfigureLogging
+from .wet_detection import apply_wet_detection
 Logger = ConfigureLogging()
@@ -47,6 +49,12 @@ def Detect(
     configurationPath: Path | None = typer.Option(
         None, "--config", "-c", help="Path to YAML config"
     ),
+    writeResults: bool | None = typer.Option(
+        None,
+        "--write-results/--no-write-results",
+        help="Write results.json (or JSON to stdout when out_dir is none)",
+        show_default=False,
+    ),
     profileOverride: str | None = typer.Option(None, "--profile", "-p", help="hipaa or retainer"),
     recursive: bool = typer.Option(
         True,
@@ -56,13 +64,13 @@ def Detect(
     cropSignatures: bool | None = typer.Option(
         None,
         "--crop-signatures/--no-crop-signatures",
-        help="Crop detected signature regions to PNG files (requires PyMuPDF)",
+        help="Write DOCX files containing cropped signature images (requires PyMuPDF + python-docx)",
         show_default=False,
     ),
     cropDirectory: Path | None = typer.Option(
         None,
         "--crop-dir",
-        help="Directory for signature PNG crops (defaults to out_dir/signature_crops)",
+        help="Directory for signature DOCX crops (defaults to out_dir/signature_crops)",
     ),
     cropDpi: int | None = typer.Option(
         None,
@@ -72,6 +80,39 @@ def Detect(
         help="Rendering DPI for signature crops",
         show_default=False,
     ),
+    cropBytes: bool = typer.Option(
+        False,
+        "--crop-bytes/--no-crop-bytes",
+        help="Embed base64 PNG bytes for signature crops in results JSON",
+        show_default=False,
+    ),
+    detectWetSignatures: bool | None = typer.Option(
+        None,
+        "--detect-wet/--no-detect-wet",
+        help="Compatibility flag; non-e-sign PDFs always run OCR when deps are available",
+        show_default=False,
+    ),
+    wetOcrDpi: int | None = typer.Option(
+        None,
+        "--wet-ocr-dpi",
+        min=72,
+        max=600,
+        help="Rendering DPI for OCR pages (wet detection)",
+        show_default=False,
+    ),
+    wetOcrLanguages: str | None = typer.Option(
+        None,
+        "--wet-ocr-languages",
+        help="Tesseract language packs for OCR (e.g., 'eng' or 'eng+spa')",
+    ),
+    wetPrecisionThreshold: float | None = typer.Option(
+        None,
+        "--wet-precision-threshold",
+        min=0.0,
+        max=1.0,
+        help="Minimum wet-signature confidence (0-1) to accept a candidate",
+        show_default=False,
+    ),
 ) -> None:
     """Run detection for the configured directory and emit ``results.json``."""
@@ -83,12 +124,22 @@ def Detect(
         configuration = configuration.model_copy(update={"Profile": normalized_profile})
     overrides: dict[str, object] = {}
+    if writeResults is not None:
+        overrides["WriteResults"] = writeResults
     if cropSignatures is not None:
         overrides["CropSignatures"] = cropSignatures
     if cropDirectory is not None:
         overrides["CropOutputDirectory"] = cropDirectory
     if cropDpi is not None:
         overrides["CropImageDpi"] = cropDpi
+    if detectWetSignatures is not None:
+        overrides["DetectWetSignatures"] = detectWetSignatures
+    if wetOcrDpi is not None:
+        overrides["WetOcrDpi"] = wetOcrDpi
+    if wetOcrLanguages is not None:
+        overrides["WetOcrLanguages"] = wetOcrLanguages
+    if wetPrecisionThreshold is not None:
+        overrides["WetPrecisionThreshold"] = wetPrecisionThreshold
     if overrides:
         configuration = configuration.model_copy(update=overrides)
         configuration = FinalizeConfiguration(configuration)
@@ -109,44 +160,52 @@ def Detect(
     except StopIteration:
         raise SystemExit(f"No PDFs found in {configuration.PdfRoot}") from None
-    results_buffer: list[FileResult] | None = [] if configuration.OutputDirectory is None else None
+    write_results = configuration.WriteResults
+    results_buffer: list[FileResult] | None = (
+        [] if write_results and configuration.OutputDirectory is None else None
+    )
     json_handle = None
     json_path: Path | None = None
     wrote_first = False
-    if configuration.OutputDirectory is not None:
+    if write_results and configuration.OutputDirectory is not None:
         outputDirectory = configuration.OutputDirectory
         outputDirectory.mkdir(parents=True, exist_ok=True)
         json_path = outputDirectory / "results.json"
         json_handle = open(json_path, "w", encoding="utf-8")
         json_handle.write("[")
+    crop_bytes_enabled = bool(cropBytes)
     crop_dir = configuration.CropOutputDirectory
+    if crop_dir is None:
+        base_dir = configuration.OutputDirectory or configuration.PdfRoot
+        crop_dir = base_dir / "signature_crops"
     cropping_enabled = configuration.CropSignatures
     cropping_available = True
     cropping_attempted = False
-    if configuration.CropSignatures and crop_dir is None:
-        Logger.warning(
-            "CropSignatures enabled without an output directory",
-            extra={"pdf_root": str(configuration.PdfRoot)},
-        )
-        cropping_enabled = False
     total_bboxes = 0
     def _append_result(file_result: FileResult, source_pdf: Path) -> None:
         nonlocal wrote_first, json_handle, total_bboxes, cropping_available, cropping_attempted
-        if cropping_enabled and cropping_available and crop_dir is not None:
+        if cropping_available and (cropping_enabled or crop_bytes_enabled) and crop_dir is not None:
             try:
-                crop_signatures(
+                crops = crop_signatures(
                     pdf_path=source_pdf,
                     file_result=file_result,
                     output_dir=crop_dir,
                     dpi=configuration.CropImageDpi,
                     logger=Logger,
+                    return_bytes=crop_bytes_enabled,
+                    save_files=cropping_enabled,
                 )
                 cropping_attempted = True
+                if crop_bytes_enabled:
+                    for crop in crops:
+                        crop.signature.CropBytes = base64.b64encode(crop.image_bytes).decode(
+                            "ascii"
+                        )
             except SignatureCroppingUnavailable as exc:
                 cropping_available = False
                 Logger.warning("Signature cropping unavailable", extra={"error": str(exc)})
@@ -182,6 +241,7 @@ def Detect(
     def _process(pdf_path: Path) -> None:
         file_result = detector.Detect(pdf_path)
+        apply_wet_detection(pdf_path, configuration, file_result, logger=Logger)
         _append_result(file_result, pdf_path)
     try:
@@ -194,18 +254,24 @@ def Detect(
             json_handle.write(closing)
             json_handle.close()
-    if json_handle is not None:
-        typer.echo(f"Wrote {json_path}")
-    else:
-        payload = json.dumps(
-            results_buffer or [], indent=2, ensure_ascii=False, default=_JsonSerializer
-        )
-        typer.echo(payload)
-        typer.echo("Detection completed with output disabled (out_dir=none)")
-    if cropping_enabled and cropping_available and cropping_attempted and total_bboxes == 0:
+    if write_results:
+        if json_handle is not None:
+            typer.echo(f"Wrote {json_path}")
+        else:
+            payload = json.dumps(
+                results_buffer or [], indent=2, ensure_ascii=False, default=_JsonSerializer
+            )
+            typer.echo(payload)
+            typer.echo("Detection completed with output disabled (out_dir=none)")
+    if (
+        (cropping_enabled or crop_bytes_enabled)
+        and cropping_available
+        and cropping_attempted
+        and total_bboxes == 0
+    ):
         Logger.warning(
-            "No signature bounding boxes detected; try --engine pymupdf for crop-ready output",
+            "No signature bounding boxes detected; install PyMuPDF for crop-ready output",
             extra={"engine": configuration.Engine},
         )

sigdetect/config.py CHANGED Viewed

@@ -10,7 +10,7 @@ from typing import Literal
 import yaml
 from pydantic import BaseModel, ConfigDict, Field, field_validator
-EngineName = Literal["pypdf2", "pypdf", "pymupdf"]
+EngineName = Literal["pypdf2", "pypdf", "pymupdf", "auto"]
 ProfileName = Literal["hipaa", "retainer"]
@@ -25,13 +25,20 @@ class DetectConfiguration(BaseModel):
     PdfRoot: Path = Field(default=Path("hipaa_results"), alias="pdf_root")
     OutputDirectory: Path | None = Field(default=Path("out"), alias="out_dir")
-    Engine: EngineName = Field(default="pypdf2", alias="engine")
+    WriteResults: bool = Field(default=False, alias="write_results")
+    Engine: EngineName = Field(default="auto", alias="engine")
     Profile: ProfileName = Field(default="hipaa", alias="profile")
     PseudoSignatures: bool = Field(default=True, alias="pseudo_signatures")
     RecurseXObjects: bool = Field(default=True, alias="recurse_xobjects")
-    CropSignatures: bool = Field(default=False, alias="crop_signatures")
+    CropSignatures: bool = Field(default=True, alias="crop_signatures")
     CropOutputDirectory: Path | None = Field(default=None, alias="crop_output_dir")
     CropImageDpi: int = Field(default=200, alias="crop_image_dpi", ge=72, le=600)
+    DetectWetSignatures: bool = Field(default=True, alias="detect_wet_signatures")
+    WetOcrDpi: int = Field(default=200, alias="wet_ocr_dpi", ge=72, le=600)
+    WetOcrLanguages: str = Field(default="eng", alias="wet_ocr_languages")
+    WetPrecisionThreshold: float = Field(
+        default=0.82, alias="wet_precision_threshold", ge=0.0, le=1.0
+    )
     @field_validator("PdfRoot", "OutputDirectory", "CropOutputDirectory", mode="before")
     @classmethod
@@ -57,6 +64,10 @@ class DetectConfiguration(BaseModel):
     def out_dir(self) -> Path | None:  # pragma: no cover - simple passthrough
         return self.OutputDirectory
+    @property
+    def write_results(self) -> bool:  # pragma: no cover - simple passthrough
+        return self.WriteResults
     @property
     def engine(self) -> EngineName:  # pragma: no cover - simple passthrough
         return self.Engine
@@ -85,6 +96,22 @@ class DetectConfiguration(BaseModel):
     def crop_image_dpi(self) -> int:  # pragma: no cover - simple passthrough
         return self.CropImageDpi
+    @property
+    def detect_wet_signatures(self) -> bool:  # pragma: no cover - simple passthrough
+        return self.DetectWetSignatures
+    @property
+    def wet_ocr_dpi(self) -> int:  # pragma: no cover - simple passthrough
+        return self.WetOcrDpi
+    @property
+    def wet_ocr_languages(self) -> str:  # pragma: no cover - simple passthrough
+        return self.WetOcrLanguages
+    @property
+    def wet_precision_threshold(self) -> float:  # pragma: no cover - simple passthrough
+        return self.WetPrecisionThreshold
 def LoadConfiguration(path: Path | None) -> DetectConfiguration:
     """Load configuration from ``path`` while applying environment overrides.
@@ -108,6 +135,10 @@ def LoadConfiguration(path: Path | None) -> DetectConfiguration:
     env_crop = os.getenv("SIGDETECT_CROP_SIGNATURES")
     env_crop_dir = os.getenv("SIGDETECT_CROP_DIR")
     env_crop_dpi = os.getenv("SIGDETECT_CROP_DPI")
+    env_detect_wet = os.getenv("SIGDETECT_DETECT_WET")
+    env_wet_dpi = os.getenv("SIGDETECT_WET_OCR_DPI")
+    env_wet_lang = os.getenv("SIGDETECT_WET_LANGUAGES")
+    env_wet_precision = os.getenv("SIGDETECT_WET_PRECISION")
     raw_data: dict[str, object] = {}
     if path and Path(path).exists():
@@ -133,6 +164,20 @@ def LoadConfiguration(path: Path | None) -> DetectConfiguration:
     if env_crop_dpi:
         with suppress(ValueError):
             raw_data["crop_image_dpi"] = int(env_crop_dpi)
+    if env_detect_wet is not None:
+        lowered = env_detect_wet.lower()
+        if lowered in {"1", "true", "yes", "on"}:
+            raw_data["detect_wet_signatures"] = True
+        elif lowered in {"0", "false", "no", "off"}:
+            raw_data["detect_wet_signatures"] = False
+    if env_wet_dpi:
+        with suppress(ValueError):
+            raw_data["wet_ocr_dpi"] = int(env_wet_dpi)
+    if env_wet_lang:
+        raw_data["wet_ocr_languages"] = env_wet_lang
+    if env_wet_precision:
+        with suppress(ValueError):
+            raw_data["wet_precision_threshold"] = float(env_wet_precision)
     configuration = DetectConfiguration(**raw_data)
     return FinalizeConfiguration(configuration)

sigdetect/cropping.py CHANGED Viewed

@@ -1,7 +1,8 @@
-"""Helpers for converting signature bounding boxes into PNG crops."""
+"""Helpers for converting signature bounding boxes into DOCX crops."""
 from __future__ import annotations
+import io
 import logging
 import re
 from dataclasses import dataclass
@@ -16,18 +17,28 @@ try:  # pragma: no cover - optional dependency
 except Exception:  # pragma: no cover - optional dependency
     fitz = None  # type: ignore[misc]
+try:  # pragma: no cover - optional dependency
+    from docx import Document  # type: ignore
+except Exception:  # pragma: no cover - optional dependency
+    Document = None  # type: ignore[assignment]
 class SignatureCroppingUnavailable(RuntimeError):
     """Raised when PNG cropping cannot be performed (e.g., PyMuPDF missing)."""
+class SignatureDocxUnavailable(RuntimeError):
+    """Raised when DOCX creation cannot be performed (e.g., python-docx missing)."""
 @dataclass(slots=True)
 class SignatureCrop:
-    """PNG crop metadata and in-memory content."""
+    """Crop metadata and in-memory content."""
     path: Path
     image_bytes: bytes
     signature: Signature
+    docx_bytes: bytes | None = None
     saved_to_disk: bool = True
@@ -40,6 +51,7 @@ def crop_signatures(
     dpi: int = 200,
     logger: logging.Logger | None = None,
     return_bytes: Literal[False] = False,
+    save_files: bool = True,
 ) -> list[Path]: ...
@@ -51,7 +63,8 @@ def crop_signatures(
     output_dir: Path,
     dpi: int = 200,
     logger: logging.Logger | None = None,
-    return_bytes: Literal[True] = True,
+    return_bytes: Literal[True],
+    save_files: bool = True,
 ) -> list[SignatureCrop]: ...
@@ -65,15 +78,18 @@ def crop_signatures(
     return_bytes: bool = False,
     save_files: bool = True,
 ) -> list[Path] | list[SignatureCrop]:
-    """Render each signature bounding box to a PNG image using PyMuPDF.
+    """Render each signature bounding box to a PNG image and wrap it in a DOCX file.
     Set ``return_bytes=True`` to collect in-memory PNG bytes for each crop while also writing
     the files to ``output_dir``. Set ``save_files=False`` to skip writing PNGs to disk.
+    When ``save_files`` is enabled, a one-image DOCX file is also written per signature crop.
+    When ``return_bytes`` is True and ``python-docx`` is available, ``SignatureCrop.docx_bytes``
+    will contain the DOCX payload.
     """
     if fitz is None:  # pragma: no cover - exercised when dependency absent
         raise SignatureCroppingUnavailable(
-            "PyMuPDF is required for PNG crops. Install 'pymupdf' or 'sigdetect[pymupdf]'."
+            "PyMuPDF is required for PNG crops. Install 'pymupdf' or add it to your environment."
         )
     if not save_files and not return_bytes:
         raise ValueError("At least one of save_files or return_bytes must be True")
@@ -85,6 +101,16 @@ def crop_signatures(
     generated_paths: list[Path] = []
     generated_crops: list[SignatureCrop] = []
+    docx_to_disk = save_files
+    docx_in_memory = return_bytes
+    docx_enabled = docx_to_disk or docx_in_memory
+    docx_available = Document is not None
+    if docx_enabled and not docx_available and logger:
+        logger.warning(
+            "Signature DOCX output unavailable",
+            extra={"error": "python-docx is required to generate DOCX outputs"},
+        )
     with fitz.open(pdf_path) as document:  # type: ignore[attr-defined]
         per_document_dir = output_dir / pdf_path.stem
         if save_files:
@@ -114,14 +140,13 @@ def crop_signatures(
                 continue
             filename = _build_filename(index, signature)
-            destination = per_document_dir / filename
+            png_destination = per_document_dir / filename
+            docx_destination = png_destination.with_suffix(".docx")
             try:
                 image_bytes: bytes | None = None
                 pixmap = page.get_pixmap(matrix=matrix, clip=clip, alpha=False)
-                if save_files:
-                    pixmap.save(destination)
-                if return_bytes:
+                if return_bytes or docx_enabled:
                     image_bytes = pixmap.tobytes("png")
             except Exception as exc:  # pragma: no cover - defensive
                 if logger:
@@ -136,17 +161,40 @@ def crop_signatures(
                     )
                 continue
+            docx_bytes: bytes | None = None
+            if docx_enabled and docx_available:
+                if image_bytes is None:  # pragma: no cover - defensive
+                    continue
+                try:
+                    docx_bytes = _build_docx_bytes(image_bytes)
+                    if docx_to_disk:
+                        docx_destination.write_bytes(docx_bytes)
+                except SignatureDocxUnavailable as exc:
+                    if logger:
+                        logger.warning(
+                            "Signature DOCX output unavailable",
+                            extra={"error": str(exc)},
+                        )
+                    docx_available = False
+                except Exception as exc:  # pragma: no cover - defensive
+                    if logger:
+                        logger.warning(
+                            "Failed to write signature DOCX",
+                            extra={"file": pdf_path.name, "error": str(exc)},
+                        )
             if save_files:
-                signature.CropPath = str(destination)
-                generated_paths.append(destination)
+                signature.CropPath = str(docx_destination)
+                generated_paths.append(docx_destination)
             if return_bytes:
                 if image_bytes is None:  # pragma: no cover - defensive
                     continue
                 generated_crops.append(
                     SignatureCrop(
-                        path=destination,
+                        path=docx_destination,
                         image_bytes=image_bytes,
                         signature=signature,
+                        docx_bytes=docx_bytes,
                         saved_to_disk=save_files,
                     )
                 )
@@ -154,6 +202,18 @@ def crop_signatures(
     return generated_crops if return_bytes else generated_paths
+def _build_docx_bytes(image_bytes: bytes) -> bytes:
+    if Document is None:
+        raise SignatureDocxUnavailable(
+            "python-docx is required to generate DOCX outputs for signature crops."
+        )
+    document = Document()
+    document.add_picture(io.BytesIO(image_bytes))
+    buffer = io.BytesIO()
+    document.save(buffer)
+    return buffer.getvalue()
 def _to_clip_rect(page, bbox: tuple[float, float, float, float]):
     width = float(page.rect.width)
     height = float(page.rect.height)

sigdetect 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

sigdetect 0.3.1py3-none-any.whl → 0.5.0py3-none-any.whl