sigdetect 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sigdetect/api.py CHANGED
@@ -4,9 +4,10 @@ from __future__ import annotations
4
4
 
5
5
  from contextlib import contextmanager
6
6
  from pathlib import Path
7
- from typing import Any, Generator, Iterable, Iterator, Literal
7
+ from typing import Any, Generator, Iterable, Iterator, Literal, overload
8
8
 
9
9
  from sigdetect.config import DetectConfiguration
10
+ from sigdetect.cropping import SignatureCrop
10
11
  from sigdetect.detector import BuildDetector, Detector, FileResult, Signature
11
12
 
12
13
  EngineName = Literal["pypdf2", "pypdf", "pymupdf"]
@@ -191,17 +192,45 @@ def detector_context(**kwargs: Any) -> Generator[Detector, None, None]:
191
192
  pass
192
193
 
193
194
 
195
+ @overload
194
196
  def CropSignatureImages(
195
197
  pdfPath: str | Path,
196
198
  fileResult: FileResult | dict[str, Any],
197
199
  *,
198
200
  outputDirectory: str | Path,
199
201
  dpi: int = 200,
200
- ) -> list[Path]:
202
+ returnBytes: Literal[False] = False,
203
+ saveToDisk: bool = True,
204
+ ) -> list[Path]: ...
205
+
206
+
207
+ @overload
208
+ def CropSignatureImages(
209
+ pdfPath: str | Path,
210
+ fileResult: FileResult | dict[str, Any],
211
+ *,
212
+ outputDirectory: str | Path,
213
+ dpi: int,
214
+ returnBytes: Literal[True],
215
+ saveToDisk: bool,
216
+ ) -> list[SignatureCrop]: ...
217
+
218
+
219
+ def CropSignatureImages(
220
+ pdfPath: str | Path,
221
+ fileResult: FileResult | dict[str, Any],
222
+ *,
223
+ outputDirectory: str | Path,
224
+ dpi: int = 200,
225
+ returnBytes: bool = False,
226
+ saveToDisk: bool = True,
227
+ ) -> list[Path] | list[SignatureCrop]:
201
228
  """Crop detected signature regions to PNG files.
202
229
 
203
230
  Accepts either a :class:`FileResult` instance or the ``dict`` returned by
204
231
  :func:`DetectPdf`. Requires the optional ``pymupdf`` dependency.
232
+ Set ``returnBytes=True`` to also receive in-memory PNG bytes for each crop. Set
233
+ ``saveToDisk=False`` to skip writing PNG files while still returning in-memory data.
205
234
  """
206
235
 
207
236
  from sigdetect.cropping import crop_signatures
@@ -212,6 +241,8 @@ def CropSignatureImages(
212
241
  file_result=file_result_obj,
213
242
  output_dir=Path(outputDirectory),
214
243
  dpi=dpi,
244
+ return_bytes=returnBytes,
245
+ save_files=saveToDisk,
215
246
  )
216
247
  if original_dict is not None:
217
248
  original_dict.clear()
sigdetect/cropping.py CHANGED
@@ -4,7 +4,9 @@ from __future__ import annotations
4
4
 
5
5
  import logging
6
6
  import re
7
+ from dataclasses import dataclass
7
8
  from pathlib import Path
9
+ from typing import Literal, overload
8
10
 
9
11
  from .detector.file_result_model import FileResult
10
12
  from .detector.signature_model import Signature
@@ -19,6 +21,29 @@ class SignatureCroppingUnavailable(RuntimeError):
19
21
  """Raised when PNG cropping cannot be performed (e.g., PyMuPDF missing)."""
20
22
 
21
23
 
24
+ @dataclass(slots=True)
25
+ class SignatureCrop:
26
+ """PNG crop metadata and in-memory content."""
27
+
28
+ path: Path
29
+ image_bytes: bytes
30
+ signature: Signature
31
+ saved_to_disk: bool = True
32
+
33
+
34
+ @overload
35
+ def crop_signatures(
36
+ pdf_path: Path,
37
+ file_result: FileResult,
38
+ *,
39
+ output_dir: Path,
40
+ dpi: int = 200,
41
+ logger: logging.Logger | None = None,
42
+ return_bytes: Literal[False] = False,
43
+ ) -> list[Path]: ...
44
+
45
+
46
+ @overload
22
47
  def crop_signatures(
23
48
  pdf_path: Path,
24
49
  file_result: FileResult,
@@ -26,22 +51,44 @@ def crop_signatures(
26
51
  output_dir: Path,
27
52
  dpi: int = 200,
28
53
  logger: logging.Logger | None = None,
29
- ) -> list[Path]:
30
- """Render each signature bounding box to a PNG image using PyMuPDF."""
54
+ return_bytes: Literal[True] = True,
55
+ ) -> list[SignatureCrop]: ...
56
+
57
+
58
+ def crop_signatures(
59
+ pdf_path: Path,
60
+ file_result: FileResult,
61
+ *,
62
+ output_dir: Path,
63
+ dpi: int = 200,
64
+ logger: logging.Logger | None = None,
65
+ return_bytes: bool = False,
66
+ save_files: bool = True,
67
+ ) -> list[Path] | list[SignatureCrop]:
68
+ """Render each signature bounding box to a PNG image using PyMuPDF.
69
+
70
+ Set ``return_bytes=True`` to collect in-memory PNG bytes for each crop while also writing
71
+ the files to ``output_dir``. Set ``save_files=False`` to skip writing PNGs to disk.
72
+ """
31
73
 
32
74
  if fitz is None: # pragma: no cover - exercised when dependency absent
33
75
  raise SignatureCroppingUnavailable(
34
76
  "PyMuPDF is required for PNG crops. Install 'pymupdf' or 'sigdetect[pymupdf]'."
35
77
  )
78
+ if not save_files and not return_bytes:
79
+ raise ValueError("At least one of save_files or return_bytes must be True")
36
80
 
37
81
  pdf_path = Path(pdf_path)
38
82
  output_dir = Path(output_dir)
39
- output_dir.mkdir(parents=True, exist_ok=True)
40
- generated: list[Path] = []
83
+ if save_files:
84
+ output_dir.mkdir(parents=True, exist_ok=True)
85
+ generated_paths: list[Path] = []
86
+ generated_crops: list[SignatureCrop] = []
41
87
 
42
88
  with fitz.open(pdf_path) as document: # type: ignore[attr-defined]
43
89
  per_document_dir = output_dir / pdf_path.stem
44
- per_document_dir.mkdir(parents=True, exist_ok=True)
90
+ if save_files:
91
+ per_document_dir.mkdir(parents=True, exist_ok=True)
45
92
  scale = dpi / 72.0
46
93
  matrix = fitz.Matrix(scale, scale)
47
94
 
@@ -70,8 +117,12 @@ def crop_signatures(
70
117
  destination = per_document_dir / filename
71
118
 
72
119
  try:
120
+ image_bytes: bytes | None = None
73
121
  pixmap = page.get_pixmap(matrix=matrix, clip=clip, alpha=False)
74
- pixmap.save(destination)
122
+ if save_files:
123
+ pixmap.save(destination)
124
+ if return_bytes:
125
+ image_bytes = pixmap.tobytes("png")
75
126
  except Exception as exc: # pragma: no cover - defensive
76
127
  if logger:
77
128
  logger.warning(
@@ -85,10 +136,22 @@ def crop_signatures(
85
136
  )
86
137
  continue
87
138
 
88
- signature.CropPath = str(destination)
89
- generated.append(destination)
139
+ if save_files:
140
+ signature.CropPath = str(destination)
141
+ generated_paths.append(destination)
142
+ if return_bytes:
143
+ if image_bytes is None: # pragma: no cover - defensive
144
+ continue
145
+ generated_crops.append(
146
+ SignatureCrop(
147
+ path=destination,
148
+ image_bytes=image_bytes,
149
+ signature=signature,
150
+ saved_to_disk=save_files,
151
+ )
152
+ )
90
153
 
91
- return generated
154
+ return generated_crops if return_bytes else generated_paths
92
155
 
93
156
 
94
157
  def _to_clip_rect(page, bbox: tuple[float, float, float, float]):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sigdetect
3
- Version: 0.2.0
3
+ Version: 0.3.1
4
4
  Summary: Signature detection and role attribution for PDFs
5
5
  Author-email: BT Asmamaw <basmamaw@angeiongroup.com>
6
6
  License: MIT
@@ -192,14 +192,23 @@ for res in ScanDirectory(
192
192
  # 3) Crop PNG snippets for FileResult objects (requires PyMuPDF)
193
193
  detector = get_detector(pdfRoot="/path/to/pdfs", profileName="hipaa")
194
194
  file_result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
195
- CropSignatureImages(
195
+ crops = CropSignatureImages(
196
196
  "/path/to/pdfs/example.pdf",
197
197
  file_result,
198
198
  outputDirectory="./signature_crops",
199
199
  dpi=200,
200
+ returnBytes=True, # also returns in-memory PNG bytes for each crop
201
+ # saveToDisk=False, # optional: skip writing PNGs to disk
200
202
  )
203
+
204
+ first_crop = crops[0]
205
+ print(first_crop.path, len(first_crop.image_bytes))
201
206
  ~~~
202
207
 
208
+ When ``returnBytes=True`` the helper returns ``SignatureCrop`` objects containing the saved path,
209
+ PNG bytes, and the originating signature metadata.
210
+ Pass ``saveToDisk=False`` if you only want in-memory PNG bytes (no files on disk or ``crop_path`` updates).
211
+
203
212
 
204
213
  ## Result schema
205
214
 
@@ -1,8 +1,8 @@
1
1
  sigdetect/__init__.py,sha256=LhY78mDZ1ClYVNTxW_qtE-vqJoN9N7N5ZcNRDUI_3ss,575
2
- sigdetect/api.py,sha256=F7bM0ctYmtczjqSbsl7MkUZQ28wkRnLAYt1WxfCtzk4,8518
2
+ sigdetect/api.py,sha256=6_CMSxcag9coHHzrpuRSVimHWSNtqQiWY9hdlqQ2IKY,9396
3
3
  sigdetect/cli.py,sha256=NctAnaB-TQrUAT9m-v8kj2_KTNs88kbFOCiX32tHZm8,7920
4
4
  sigdetect/config.py,sha256=S0NVKuJYiHJCocL-VNFdGJpasFcjTecavC4EthyS1DQ,5951
5
- sigdetect/cropping.py,sha256=89xPwXhWkJC5E0oW2e3_fDyERH5YGqyt4q4B-HSld4o,4084
5
+ sigdetect/cropping.py,sha256=dmJF4Q1tkmkfm0NaiwHddNOP8Sj9S4Lj_d5EBjodEkk,6015
6
6
  sigdetect/eda.py,sha256=S92G1Gjmepri__D0n_V6foq0lQgH-RXI9anW8A58jfw,4681
7
7
  sigdetect/logging_setup.py,sha256=LMF8ao_a-JwH0S522T6aYTFX3e8Ajjv_5ODS2YiBcHA,6404
8
8
  sigdetect/utils.py,sha256=T9rubLf5T9JmjOHYMOba1j34fhOJaWocAXccnGTxRUE,5198
@@ -16,8 +16,8 @@ sigdetect/detector/file_result_model.py,sha256=j2gTc9Sw3fJOHlexYsR_m5DiwHA8DzIzA
16
16
  sigdetect/detector/pymupdf_engine.py,sha256=iyp7JuPlUnydwohH5zbNg4MwH44mBmxbBWOS3ZmArBo,17339
17
17
  sigdetect/detector/pypdf2_engine.py,sha256=INWQH06kMLvto2VS-EdLC-EtMC6AG7JmdVYmNgx6_RU,47313
18
18
  sigdetect/detector/signature_model.py,sha256=mztb9V5wgv2oohQ5Cxzcv8_Bo6TyWAVIXteaeQ2rywQ,1076
19
- sigdetect-0.2.0.dist-info/METADATA,sha256=HzF-CmGBs48_Cqv9Dv9AdXo_UoztA-tLPxVMN1fXOH0,11866
20
- sigdetect-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
- sigdetect-0.2.0.dist-info/entry_points.txt,sha256=iqtfKjBU44-omM7Sh-idGz2ahw19oAvpvSyKZVArG3o,48
22
- sigdetect-0.2.0.dist-info/top_level.txt,sha256=PKlfwUobkRC0viwiSXmhtw83G26FSNpimWYC1Uy00FY,10
23
- sigdetect-0.2.0.dist-info/RECORD,,
19
+ sigdetect-0.3.1.dist-info/METADATA,sha256=whXGE4-9spAjlMcZz_owdsIiB4EobXL9_UOuAJeDVfA,12342
20
+ sigdetect-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ sigdetect-0.3.1.dist-info/entry_points.txt,sha256=iqtfKjBU44-omM7Sh-idGz2ahw19oAvpvSyKZVArG3o,48
22
+ sigdetect-0.3.1.dist-info/top_level.txt,sha256=PKlfwUobkRC0viwiSXmhtw83G26FSNpimWYC1Uy00FY,10
23
+ sigdetect-0.3.1.dist-info/RECORD,,