sigdetect 0.3.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sigdetect-0.3.0 → sigdetect-0.3.1}/PKG-INFO +3 -1
- {sigdetect-0.3.0 → sigdetect-0.3.1}/README.md +2 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/pyproject.toml +1 -1
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/api.py +6 -1
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/cropping.py +15 -6
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect.egg-info/PKG-INFO +3 -1
- {sigdetect-0.3.0 → sigdetect-0.3.1}/tests/test_cropping.py +41 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/setup.cfg +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/__init__.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/cli.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/config.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/data/role_rules.retainer.yml +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/data/role_rules.yml +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/data/vendor_patterns.yml +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/__init__.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/base.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/base_detector.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/file_result_model.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/pymupdf_engine.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/pypdf2_engine.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/signature_model.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/eda.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/logging_setup.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/utils.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect.egg-info/SOURCES.txt +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect.egg-info/dependency_links.txt +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect.egg-info/entry_points.txt +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect.egg-info/requires.txt +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect.egg-info/top_level.txt +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/tests/test_api.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/tests/test_pymupdf_engine.py +0 -0
- {sigdetect-0.3.0 → sigdetect-0.3.1}/tests/test_widget_role_patient_smoke.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sigdetect
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Signature detection and role attribution for PDFs
|
|
5
5
|
Author-email: BT Asmamaw <basmamaw@angeiongroup.com>
|
|
6
6
|
License: MIT
|
|
@@ -198,6 +198,7 @@ crops = CropSignatureImages(
|
|
|
198
198
|
outputDirectory="./signature_crops",
|
|
199
199
|
dpi=200,
|
|
200
200
|
returnBytes=True, # also returns in-memory PNG bytes for each crop
|
|
201
|
+
# saveToDisk=False, # optional: skip writing PNGs to disk
|
|
201
202
|
)
|
|
202
203
|
|
|
203
204
|
first_crop = crops[0]
|
|
@@ -206,6 +207,7 @@ print(first_crop.path, len(first_crop.image_bytes))
|
|
|
206
207
|
|
|
207
208
|
When ``returnBytes=True`` the helper returns ``SignatureCrop`` objects containing the saved path,
|
|
208
209
|
PNG bytes, and the originating signature metadata.
|
|
210
|
+
Pass ``saveToDisk=False`` if you only want in-memory PNG bytes (no files on disk or ``crop_path`` updates).
|
|
209
211
|
|
|
210
212
|
|
|
211
213
|
## Result schema
|
|
@@ -182,6 +182,7 @@ crops = CropSignatureImages(
|
|
|
182
182
|
outputDirectory="./signature_crops",
|
|
183
183
|
dpi=200,
|
|
184
184
|
returnBytes=True, # also returns in-memory PNG bytes for each crop
|
|
185
|
+
# saveToDisk=False, # optional: skip writing PNGs to disk
|
|
185
186
|
)
|
|
186
187
|
|
|
187
188
|
first_crop = crops[0]
|
|
@@ -190,6 +191,7 @@ print(first_crop.path, len(first_crop.image_bytes))
|
|
|
190
191
|
|
|
191
192
|
When ``returnBytes=True`` the helper returns ``SignatureCrop`` objects containing the saved path,
|
|
192
193
|
PNG bytes, and the originating signature metadata.
|
|
194
|
+
Pass ``saveToDisk=False`` if you only want in-memory PNG bytes (no files on disk or ``crop_path`` updates).
|
|
193
195
|
|
|
194
196
|
|
|
195
197
|
## Result schema
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sigdetect"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.1"
|
|
8
8
|
description = "Signature detection and role attribution for PDFs"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [{ name = "BT Asmamaw", email = "basmamaw@angeiongroup.com" }]
|
|
@@ -200,6 +200,7 @@ def CropSignatureImages(
|
|
|
200
200
|
outputDirectory: str | Path,
|
|
201
201
|
dpi: int = 200,
|
|
202
202
|
returnBytes: Literal[False] = False,
|
|
203
|
+
saveToDisk: bool = True,
|
|
203
204
|
) -> list[Path]: ...
|
|
204
205
|
|
|
205
206
|
|
|
@@ -211,6 +212,7 @@ def CropSignatureImages(
|
|
|
211
212
|
outputDirectory: str | Path,
|
|
212
213
|
dpi: int,
|
|
213
214
|
returnBytes: Literal[True],
|
|
215
|
+
saveToDisk: bool,
|
|
214
216
|
) -> list[SignatureCrop]: ...
|
|
215
217
|
|
|
216
218
|
|
|
@@ -221,12 +223,14 @@ def CropSignatureImages(
|
|
|
221
223
|
outputDirectory: str | Path,
|
|
222
224
|
dpi: int = 200,
|
|
223
225
|
returnBytes: bool = False,
|
|
226
|
+
saveToDisk: bool = True,
|
|
224
227
|
) -> list[Path] | list[SignatureCrop]:
|
|
225
228
|
"""Crop detected signature regions to PNG files.
|
|
226
229
|
|
|
227
230
|
Accepts either a :class:`FileResult` instance or the ``dict`` returned by
|
|
228
231
|
:func:`DetectPdf`. Requires the optional ``pymupdf`` dependency.
|
|
229
|
-
Set ``returnBytes=True`` to also receive in-memory PNG bytes for each crop.
|
|
232
|
+
Set ``returnBytes=True`` to also receive in-memory PNG bytes for each crop. Set
|
|
233
|
+
``saveToDisk=False`` to skip writing PNG files while still returning in-memory data.
|
|
230
234
|
"""
|
|
231
235
|
|
|
232
236
|
from sigdetect.cropping import crop_signatures
|
|
@@ -238,6 +242,7 @@ def CropSignatureImages(
|
|
|
238
242
|
output_dir=Path(outputDirectory),
|
|
239
243
|
dpi=dpi,
|
|
240
244
|
return_bytes=returnBytes,
|
|
245
|
+
save_files=saveToDisk,
|
|
241
246
|
)
|
|
242
247
|
if original_dict is not None:
|
|
243
248
|
original_dict.clear()
|
|
@@ -28,6 +28,7 @@ class SignatureCrop:
|
|
|
28
28
|
path: Path
|
|
29
29
|
image_bytes: bytes
|
|
30
30
|
signature: Signature
|
|
31
|
+
saved_to_disk: bool = True
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
@overload
|
|
@@ -62,27 +63,32 @@ def crop_signatures(
|
|
|
62
63
|
dpi: int = 200,
|
|
63
64
|
logger: logging.Logger | None = None,
|
|
64
65
|
return_bytes: bool = False,
|
|
66
|
+
save_files: bool = True,
|
|
65
67
|
) -> list[Path] | list[SignatureCrop]:
|
|
66
68
|
"""Render each signature bounding box to a PNG image using PyMuPDF.
|
|
67
69
|
|
|
68
70
|
Set ``return_bytes=True`` to collect in-memory PNG bytes for each crop while also writing
|
|
69
|
-
the files to ``output_dir``.
|
|
71
|
+
the files to ``output_dir``. Set ``save_files=False`` to skip writing PNGs to disk.
|
|
70
72
|
"""
|
|
71
73
|
|
|
72
74
|
if fitz is None: # pragma: no cover - exercised when dependency absent
|
|
73
75
|
raise SignatureCroppingUnavailable(
|
|
74
76
|
"PyMuPDF is required for PNG crops. Install 'pymupdf' or 'sigdetect[pymupdf]'."
|
|
75
77
|
)
|
|
78
|
+
if not save_files and not return_bytes:
|
|
79
|
+
raise ValueError("At least one of save_files or return_bytes must be True")
|
|
76
80
|
|
|
77
81
|
pdf_path = Path(pdf_path)
|
|
78
82
|
output_dir = Path(output_dir)
|
|
79
|
-
|
|
83
|
+
if save_files:
|
|
84
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
80
85
|
generated_paths: list[Path] = []
|
|
81
86
|
generated_crops: list[SignatureCrop] = []
|
|
82
87
|
|
|
83
88
|
with fitz.open(pdf_path) as document: # type: ignore[attr-defined]
|
|
84
89
|
per_document_dir = output_dir / pdf_path.stem
|
|
85
|
-
|
|
90
|
+
if save_files:
|
|
91
|
+
per_document_dir.mkdir(parents=True, exist_ok=True)
|
|
86
92
|
scale = dpi / 72.0
|
|
87
93
|
matrix = fitz.Matrix(scale, scale)
|
|
88
94
|
|
|
@@ -113,7 +119,8 @@ def crop_signatures(
|
|
|
113
119
|
try:
|
|
114
120
|
image_bytes: bytes | None = None
|
|
115
121
|
pixmap = page.get_pixmap(matrix=matrix, clip=clip, alpha=False)
|
|
116
|
-
|
|
122
|
+
if save_files:
|
|
123
|
+
pixmap.save(destination)
|
|
117
124
|
if return_bytes:
|
|
118
125
|
image_bytes = pixmap.tobytes("png")
|
|
119
126
|
except Exception as exc: # pragma: no cover - defensive
|
|
@@ -129,8 +136,9 @@ def crop_signatures(
|
|
|
129
136
|
)
|
|
130
137
|
continue
|
|
131
138
|
|
|
132
|
-
|
|
133
|
-
|
|
139
|
+
if save_files:
|
|
140
|
+
signature.CropPath = str(destination)
|
|
141
|
+
generated_paths.append(destination)
|
|
134
142
|
if return_bytes:
|
|
135
143
|
if image_bytes is None: # pragma: no cover - defensive
|
|
136
144
|
continue
|
|
@@ -139,6 +147,7 @@ def crop_signatures(
|
|
|
139
147
|
path=destination,
|
|
140
148
|
image_bytes=image_bytes,
|
|
141
149
|
signature=signature,
|
|
150
|
+
saved_to_disk=save_files,
|
|
142
151
|
)
|
|
143
152
|
)
|
|
144
153
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sigdetect
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Signature detection and role attribution for PDFs
|
|
5
5
|
Author-email: BT Asmamaw <basmamaw@angeiongroup.com>
|
|
6
6
|
License: MIT
|
|
@@ -198,6 +198,7 @@ crops = CropSignatureImages(
|
|
|
198
198
|
outputDirectory="./signature_crops",
|
|
199
199
|
dpi=200,
|
|
200
200
|
returnBytes=True, # also returns in-memory PNG bytes for each crop
|
|
201
|
+
# saveToDisk=False, # optional: skip writing PNGs to disk
|
|
201
202
|
)
|
|
202
203
|
|
|
203
204
|
first_crop = crops[0]
|
|
@@ -206,6 +207,7 @@ print(first_crop.path, len(first_crop.image_bytes))
|
|
|
206
207
|
|
|
207
208
|
When ``returnBytes=True`` the helper returns ``SignatureCrop`` objects containing the saved path,
|
|
208
209
|
PNG bytes, and the originating signature metadata.
|
|
210
|
+
Pass ``saveToDisk=False`` if you only want in-memory PNG bytes (no files on disk or ``crop_path`` updates).
|
|
209
211
|
|
|
210
212
|
|
|
211
213
|
## Result schema
|
|
@@ -91,6 +91,29 @@ def test_crop_signature_images_returns_bytes(tmp_path: Path) -> None:
|
|
|
91
91
|
assert result_dict["signatures"][0]["crop_path"] is not None
|
|
92
92
|
|
|
93
93
|
|
|
94
|
+
def test_crop_signature_images_can_skip_disk(tmp_path: Path) -> None:
|
|
95
|
+
pdf_path = tmp_path / "doc.pdf"
|
|
96
|
+
_pdf_with_signature(pdf_path)
|
|
97
|
+
|
|
98
|
+
result_dict = DetectPdf(pdf_path, engineName="pymupdf")
|
|
99
|
+
out_dir = tmp_path / "dict_byte_crops_no_disk"
|
|
100
|
+
crops = CropSignatureImages(
|
|
101
|
+
pdf_path,
|
|
102
|
+
result_dict,
|
|
103
|
+
outputDirectory=out_dir,
|
|
104
|
+
returnBytes=True,
|
|
105
|
+
saveToDisk=False,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
assert crops
|
|
109
|
+
first_crop = crops[0]
|
|
110
|
+
assert isinstance(first_crop, SignatureCrop)
|
|
111
|
+
assert first_crop.image_bytes
|
|
112
|
+
assert first_crop.saved_to_disk is False
|
|
113
|
+
assert not first_crop.path.exists()
|
|
114
|
+
assert result_dict["signatures"][0]["crop_path"] is None
|
|
115
|
+
|
|
116
|
+
|
|
94
117
|
def test_crop_signatures_returns_bytes(tmp_path: Path) -> None:
|
|
95
118
|
pdf_path = tmp_path / "doc.pdf"
|
|
96
119
|
_pdf_with_signature(pdf_path)
|
|
@@ -111,3 +134,21 @@ def test_crop_signatures_returns_bytes(tmp_path: Path) -> None:
|
|
|
111
134
|
assert isinstance(crops[0], SignatureCrop)
|
|
112
135
|
assert crops[0].path.exists()
|
|
113
136
|
assert crops[0].image_bytes
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_crop_signatures_requires_save_or_bytes(tmp_path: Path) -> None:
|
|
140
|
+
pdf_path = tmp_path / "doc.pdf"
|
|
141
|
+
_pdf_with_signature(pdf_path)
|
|
142
|
+
|
|
143
|
+
cfg = DetectConfiguration(pdf_root=tmp_path, out_dir=tmp_path, engine="pypdf2")
|
|
144
|
+
result = PyPDF2Detector(cfg).Detect(pdf_path)
|
|
145
|
+
|
|
146
|
+
with pytest.raises(ValueError):
|
|
147
|
+
crop_signatures(
|
|
148
|
+
pdf_path,
|
|
149
|
+
result,
|
|
150
|
+
output_dir=tmp_path / "unused",
|
|
151
|
+
dpi=120,
|
|
152
|
+
save_files=False,
|
|
153
|
+
return_bytes=False,
|
|
154
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|