sigdetect 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {sigdetect-0.2.0 → sigdetect-0.3.0}/PKG-INFO +9 -2
  2. {sigdetect-0.2.0 → sigdetect-0.3.0}/README.md +8 -1
  3. {sigdetect-0.2.0 → sigdetect-0.3.0}/pyproject.toml +1 -1
  4. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/api.py +28 -2
  5. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/cropping.py +59 -5
  6. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect.egg-info/PKG-INFO +9 -2
  7. {sigdetect-0.2.0 → sigdetect-0.3.0}/tests/test_cropping.py +42 -1
  8. {sigdetect-0.2.0 → sigdetect-0.3.0}/setup.cfg +0 -0
  9. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/__init__.py +0 -0
  10. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/cli.py +0 -0
  11. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/config.py +0 -0
  12. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/data/role_rules.retainer.yml +0 -0
  13. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/data/role_rules.yml +0 -0
  14. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/data/vendor_patterns.yml +0 -0
  15. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/detector/__init__.py +0 -0
  16. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/detector/base.py +0 -0
  17. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/detector/base_detector.py +0 -0
  18. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/detector/file_result_model.py +0 -0
  19. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/detector/pymupdf_engine.py +0 -0
  20. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/detector/pypdf2_engine.py +0 -0
  21. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/detector/signature_model.py +0 -0
  22. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/eda.py +0 -0
  23. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/logging_setup.py +0 -0
  24. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect/utils.py +0 -0
  25. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect.egg-info/SOURCES.txt +0 -0
  26. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect.egg-info/dependency_links.txt +0 -0
  27. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect.egg-info/entry_points.txt +0 -0
  28. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect.egg-info/requires.txt +0 -0
  29. {sigdetect-0.2.0 → sigdetect-0.3.0}/src/sigdetect.egg-info/top_level.txt +0 -0
  30. {sigdetect-0.2.0 → sigdetect-0.3.0}/tests/test_api.py +0 -0
  31. {sigdetect-0.2.0 → sigdetect-0.3.0}/tests/test_pymupdf_engine.py +0 -0
  32. {sigdetect-0.2.0 → sigdetect-0.3.0}/tests/test_widget_role_patient_smoke.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sigdetect
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Signature detection and role attribution for PDFs
5
5
  Author-email: BT Asmamaw <basmamaw@angeiongroup.com>
6
6
  License: MIT
@@ -192,14 +192,21 @@ for res in ScanDirectory(
192
192
  # 3) Crop PNG snippets for FileResult objects (requires PyMuPDF)
193
193
  detector = get_detector(pdfRoot="/path/to/pdfs", profileName="hipaa")
194
194
  file_result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
195
- CropSignatureImages(
195
+ crops = CropSignatureImages(
196
196
  "/path/to/pdfs/example.pdf",
197
197
  file_result,
198
198
  outputDirectory="./signature_crops",
199
199
  dpi=200,
200
+ returnBytes=True, # also returns in-memory PNG bytes for each crop
200
201
  )
202
+
203
+ first_crop = crops[0]
204
+ print(first_crop.path, len(first_crop.image_bytes))
201
205
  ~~~
202
206
 
207
+ When ``returnBytes=True`` the helper returns ``SignatureCrop`` objects containing the saved path,
208
+ PNG bytes, and the originating signature metadata.
209
+
203
210
 
204
211
  ## Result schema
205
212
 
@@ -176,14 +176,21 @@ for res in ScanDirectory(
176
176
  # 3) Crop PNG snippets for FileResult objects (requires PyMuPDF)
177
177
  detector = get_detector(pdfRoot="/path/to/pdfs", profileName="hipaa")
178
178
  file_result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
179
- CropSignatureImages(
179
+ crops = CropSignatureImages(
180
180
  "/path/to/pdfs/example.pdf",
181
181
  file_result,
182
182
  outputDirectory="./signature_crops",
183
183
  dpi=200,
184
+ returnBytes=True, # also returns in-memory PNG bytes for each crop
184
185
  )
186
+
187
+ first_crop = crops[0]
188
+ print(first_crop.path, len(first_crop.image_bytes))
185
189
  ~~~
186
190
 
191
+ When ``returnBytes=True`` the helper returns ``SignatureCrop`` objects containing the saved path,
192
+ PNG bytes, and the originating signature metadata.
193
+
187
194
 
188
195
  ## Result schema
189
196
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "sigdetect"
7
- version = "0.2.0"
7
+ version = "0.3.0"
8
8
  description = "Signature detection and role attribution for PDFs"
9
9
  readme = "README.md"
10
10
  authors = [{ name = "BT Asmamaw", email = "basmamaw@angeiongroup.com" }]
@@ -4,9 +4,10 @@ from __future__ import annotations
4
4
 
5
5
  from contextlib import contextmanager
6
6
  from pathlib import Path
7
- from typing import Any, Generator, Iterable, Iterator, Literal
7
+ from typing import Any, Generator, Iterable, Iterator, Literal, overload
8
8
 
9
9
  from sigdetect.config import DetectConfiguration
10
+ from sigdetect.cropping import SignatureCrop
10
11
  from sigdetect.detector import BuildDetector, Detector, FileResult, Signature
11
12
 
12
13
  EngineName = Literal["pypdf2", "pypdf", "pymupdf"]
@@ -191,17 +192,41 @@ def detector_context(**kwargs: Any) -> Generator[Detector, None, None]:
191
192
  pass
192
193
 
193
194
 
195
+ @overload
194
196
  def CropSignatureImages(
195
197
  pdfPath: str | Path,
196
198
  fileResult: FileResult | dict[str, Any],
197
199
  *,
198
200
  outputDirectory: str | Path,
199
201
  dpi: int = 200,
200
- ) -> list[Path]:
202
+ returnBytes: Literal[False] = False,
203
+ ) -> list[Path]: ...
204
+
205
+
206
+ @overload
207
+ def CropSignatureImages(
208
+ pdfPath: str | Path,
209
+ fileResult: FileResult | dict[str, Any],
210
+ *,
211
+ outputDirectory: str | Path,
212
+ dpi: int,
213
+ returnBytes: Literal[True],
214
+ ) -> list[SignatureCrop]: ...
215
+
216
+
217
+ def CropSignatureImages(
218
+ pdfPath: str | Path,
219
+ fileResult: FileResult | dict[str, Any],
220
+ *,
221
+ outputDirectory: str | Path,
222
+ dpi: int = 200,
223
+ returnBytes: bool = False,
224
+ ) -> list[Path] | list[SignatureCrop]:
201
225
  """Crop detected signature regions to PNG files.
202
226
 
203
227
  Accepts either a :class:`FileResult` instance or the ``dict`` returned by
204
228
  :func:`DetectPdf`. Requires the optional ``pymupdf`` dependency.
229
+ Set ``returnBytes=True`` to also receive in-memory PNG bytes for each crop.
205
230
  """
206
231
 
207
232
  from sigdetect.cropping import crop_signatures
@@ -212,6 +237,7 @@ def CropSignatureImages(
212
237
  file_result=file_result_obj,
213
238
  output_dir=Path(outputDirectory),
214
239
  dpi=dpi,
240
+ return_bytes=returnBytes,
215
241
  )
216
242
  if original_dict is not None:
217
243
  original_dict.clear()
@@ -4,7 +4,9 @@ from __future__ import annotations
4
4
 
5
5
  import logging
6
6
  import re
7
+ from dataclasses import dataclass
7
8
  from pathlib import Path
9
+ from typing import Literal, overload
8
10
 
9
11
  from .detector.file_result_model import FileResult
10
12
  from .detector.signature_model import Signature
@@ -19,6 +21,28 @@ class SignatureCroppingUnavailable(RuntimeError):
19
21
  """Raised when PNG cropping cannot be performed (e.g., PyMuPDF missing)."""
20
22
 
21
23
 
24
+ @dataclass(slots=True)
25
+ class SignatureCrop:
26
+ """PNG crop metadata and in-memory content."""
27
+
28
+ path: Path
29
+ image_bytes: bytes
30
+ signature: Signature
31
+
32
+
33
+ @overload
34
+ def crop_signatures(
35
+ pdf_path: Path,
36
+ file_result: FileResult,
37
+ *,
38
+ output_dir: Path,
39
+ dpi: int = 200,
40
+ logger: logging.Logger | None = None,
41
+ return_bytes: Literal[False] = False,
42
+ ) -> list[Path]: ...
43
+
44
+
45
+ @overload
22
46
  def crop_signatures(
23
47
  pdf_path: Path,
24
48
  file_result: FileResult,
@@ -26,8 +50,24 @@ def crop_signatures(
26
50
  output_dir: Path,
27
51
  dpi: int = 200,
28
52
  logger: logging.Logger | None = None,
29
- ) -> list[Path]:
30
- """Render each signature bounding box to a PNG image using PyMuPDF."""
53
+ return_bytes: Literal[True] = True,
54
+ ) -> list[SignatureCrop]: ...
55
+
56
+
57
+ def crop_signatures(
58
+ pdf_path: Path,
59
+ file_result: FileResult,
60
+ *,
61
+ output_dir: Path,
62
+ dpi: int = 200,
63
+ logger: logging.Logger | None = None,
64
+ return_bytes: bool = False,
65
+ ) -> list[Path] | list[SignatureCrop]:
66
+ """Render each signature bounding box to a PNG image using PyMuPDF.
67
+
68
+ Set ``return_bytes=True`` to collect in-memory PNG bytes for each crop while also writing
69
+ the files to ``output_dir``.
70
+ """
31
71
 
32
72
  if fitz is None: # pragma: no cover - exercised when dependency absent
33
73
  raise SignatureCroppingUnavailable(
@@ -37,7 +77,8 @@ def crop_signatures(
37
77
  pdf_path = Path(pdf_path)
38
78
  output_dir = Path(output_dir)
39
79
  output_dir.mkdir(parents=True, exist_ok=True)
40
- generated: list[Path] = []
80
+ generated_paths: list[Path] = []
81
+ generated_crops: list[SignatureCrop] = []
41
82
 
42
83
  with fitz.open(pdf_path) as document: # type: ignore[attr-defined]
43
84
  per_document_dir = output_dir / pdf_path.stem
@@ -70,8 +111,11 @@ def crop_signatures(
70
111
  destination = per_document_dir / filename
71
112
 
72
113
  try:
114
+ image_bytes: bytes | None = None
73
115
  pixmap = page.get_pixmap(matrix=matrix, clip=clip, alpha=False)
74
116
  pixmap.save(destination)
117
+ if return_bytes:
118
+ image_bytes = pixmap.tobytes("png")
75
119
  except Exception as exc: # pragma: no cover - defensive
76
120
  if logger:
77
121
  logger.warning(
@@ -86,9 +130,19 @@ def crop_signatures(
86
130
  continue
87
131
 
88
132
  signature.CropPath = str(destination)
89
- generated.append(destination)
133
+ generated_paths.append(destination)
134
+ if return_bytes:
135
+ if image_bytes is None: # pragma: no cover - defensive
136
+ continue
137
+ generated_crops.append(
138
+ SignatureCrop(
139
+ path=destination,
140
+ image_bytes=image_bytes,
141
+ signature=signature,
142
+ )
143
+ )
90
144
 
91
- return generated
145
+ return generated_crops if return_bytes else generated_paths
92
146
 
93
147
 
94
148
  def _to_clip_rect(page, bbox: tuple[float, float, float, float]):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sigdetect
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Signature detection and role attribution for PDFs
5
5
  Author-email: BT Asmamaw <basmamaw@angeiongroup.com>
6
6
  License: MIT
@@ -192,14 +192,21 @@ for res in ScanDirectory(
192
192
  # 3) Crop PNG snippets for FileResult objects (requires PyMuPDF)
193
193
  detector = get_detector(pdfRoot="/path/to/pdfs", profileName="hipaa")
194
194
  file_result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
195
- CropSignatureImages(
195
+ crops = CropSignatureImages(
196
196
  "/path/to/pdfs/example.pdf",
197
197
  file_result,
198
198
  outputDirectory="./signature_crops",
199
199
  dpi=200,
200
+ returnBytes=True, # also returns in-memory PNG bytes for each crop
200
201
  )
202
+
203
+ first_crop = crops[0]
204
+ print(first_crop.path, len(first_crop.image_bytes))
201
205
  ~~~
202
206
 
207
+ When ``returnBytes=True`` the helper returns ``SignatureCrop`` objects containing the saved path,
208
+ PNG bytes, and the originating signature metadata.
209
+
203
210
 
204
211
  ## Result schema
205
212
 
@@ -6,7 +6,7 @@ from pypdf.generic import ArrayObject, DictionaryObject, NameObject, NumberObjec
6
6
 
7
7
  from sigdetect.api import CropSignatureImages, DetectPdf
8
8
  from sigdetect.config import DetectConfiguration
9
- from sigdetect.cropping import crop_signatures
9
+ from sigdetect.cropping import SignatureCrop, crop_signatures
10
10
  from sigdetect.detector.pypdf2_engine import PyPDF2Detector
11
11
 
12
12
  pytest.importorskip("fitz")
@@ -70,3 +70,44 @@ def test_crop_signature_images_accepts_dict(tmp_path: Path) -> None:
70
70
 
71
71
  assert paths
72
72
  assert result_dict["signatures"][0]["crop_path"] is not None
73
+
74
+
75
+ def test_crop_signature_images_returns_bytes(tmp_path: Path) -> None:
76
+ pdf_path = tmp_path / "doc.pdf"
77
+ _pdf_with_signature(pdf_path)
78
+
79
+ result_dict = DetectPdf(pdf_path, engineName="pymupdf")
80
+ out_dir = tmp_path / "dict_byte_crops"
81
+ crops = CropSignatureImages(
82
+ pdf_path,
83
+ result_dict,
84
+ outputDirectory=out_dir,
85
+ returnBytes=True,
86
+ )
87
+
88
+ assert crops
89
+ assert isinstance(crops[0], SignatureCrop)
90
+ assert crops[0].image_bytes
91
+ assert result_dict["signatures"][0]["crop_path"] is not None
92
+
93
+
94
+ def test_crop_signatures_returns_bytes(tmp_path: Path) -> None:
95
+ pdf_path = tmp_path / "doc.pdf"
96
+ _pdf_with_signature(pdf_path)
97
+
98
+ cfg = DetectConfiguration(pdf_root=tmp_path, out_dir=tmp_path, engine="pypdf2")
99
+ result = PyPDF2Detector(cfg).Detect(pdf_path)
100
+
101
+ out_dir = tmp_path / "byte_crops"
102
+ crops = crop_signatures(
103
+ pdf_path,
104
+ result,
105
+ output_dir=out_dir,
106
+ dpi=120,
107
+ return_bytes=True,
108
+ )
109
+
110
+ assert crops
111
+ assert isinstance(crops[0], SignatureCrop)
112
+ assert crops[0].path.exists()
113
+ assert crops[0].image_bytes
File without changes
File without changes