sigdetect 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {sigdetect-0.3.0 → sigdetect-0.3.1}/PKG-INFO +3 -1
  2. {sigdetect-0.3.0 → sigdetect-0.3.1}/README.md +2 -0
  3. {sigdetect-0.3.0 → sigdetect-0.3.1}/pyproject.toml +1 -1
  4. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/api.py +6 -1
  5. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/cropping.py +15 -6
  6. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect.egg-info/PKG-INFO +3 -1
  7. {sigdetect-0.3.0 → sigdetect-0.3.1}/tests/test_cropping.py +41 -0
  8. {sigdetect-0.3.0 → sigdetect-0.3.1}/setup.cfg +0 -0
  9. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/__init__.py +0 -0
  10. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/cli.py +0 -0
  11. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/config.py +0 -0
  12. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/data/role_rules.retainer.yml +0 -0
  13. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/data/role_rules.yml +0 -0
  14. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/data/vendor_patterns.yml +0 -0
  15. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/__init__.py +0 -0
  16. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/base.py +0 -0
  17. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/base_detector.py +0 -0
  18. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/file_result_model.py +0 -0
  19. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/pymupdf_engine.py +0 -0
  20. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/pypdf2_engine.py +0 -0
  21. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/detector/signature_model.py +0 -0
  22. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/eda.py +0 -0
  23. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/logging_setup.py +0 -0
  24. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect/utils.py +0 -0
  25. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect.egg-info/SOURCES.txt +0 -0
  26. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect.egg-info/dependency_links.txt +0 -0
  27. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect.egg-info/entry_points.txt +0 -0
  28. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect.egg-info/requires.txt +0 -0
  29. {sigdetect-0.3.0 → sigdetect-0.3.1}/src/sigdetect.egg-info/top_level.txt +0 -0
  30. {sigdetect-0.3.0 → sigdetect-0.3.1}/tests/test_api.py +0 -0
  31. {sigdetect-0.3.0 → sigdetect-0.3.1}/tests/test_pymupdf_engine.py +0 -0
  32. {sigdetect-0.3.0 → sigdetect-0.3.1}/tests/test_widget_role_patient_smoke.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sigdetect
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Signature detection and role attribution for PDFs
5
5
  Author-email: BT Asmamaw <basmamaw@angeiongroup.com>
6
6
  License: MIT
@@ -198,6 +198,7 @@ crops = CropSignatureImages(
198
198
  outputDirectory="./signature_crops",
199
199
  dpi=200,
200
200
  returnBytes=True, # also returns in-memory PNG bytes for each crop
201
+ # saveToDisk=False, # optional: skip writing PNGs to disk
201
202
  )
202
203
 
203
204
  first_crop = crops[0]
@@ -206,6 +207,7 @@ print(first_crop.path, len(first_crop.image_bytes))
206
207
 
207
208
  When ``returnBytes=True`` the helper returns ``SignatureCrop`` objects containing the saved path,
208
209
  PNG bytes, and the originating signature metadata.
210
+ Pass ``saveToDisk=False`` if you only want in-memory PNG bytes (no files on disk or ``crop_path`` updates).
209
211
 
210
212
 
211
213
  ## Result schema
@@ -182,6 +182,7 @@ crops = CropSignatureImages(
182
182
  outputDirectory="./signature_crops",
183
183
  dpi=200,
184
184
  returnBytes=True, # also returns in-memory PNG bytes for each crop
185
+ # saveToDisk=False, # optional: skip writing PNGs to disk
185
186
  )
186
187
 
187
188
  first_crop = crops[0]
@@ -190,6 +191,7 @@ print(first_crop.path, len(first_crop.image_bytes))
190
191
 
191
192
  When ``returnBytes=True`` the helper returns ``SignatureCrop`` objects containing the saved path,
192
193
  PNG bytes, and the originating signature metadata.
194
+ Pass ``saveToDisk=False`` if you only want in-memory PNG bytes (no files on disk or ``crop_path`` updates).
193
195
 
194
196
 
195
197
  ## Result schema
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "sigdetect"
7
- version = "0.3.0"
7
+ version = "0.3.1"
8
8
  description = "Signature detection and role attribution for PDFs"
9
9
  readme = "README.md"
10
10
  authors = [{ name = "BT Asmamaw", email = "basmamaw@angeiongroup.com" }]
@@ -200,6 +200,7 @@ def CropSignatureImages(
200
200
  outputDirectory: str | Path,
201
201
  dpi: int = 200,
202
202
  returnBytes: Literal[False] = False,
203
+ saveToDisk: bool = True,
203
204
  ) -> list[Path]: ...
204
205
 
205
206
 
@@ -211,6 +212,7 @@ def CropSignatureImages(
211
212
  outputDirectory: str | Path,
212
213
  dpi: int,
213
214
  returnBytes: Literal[True],
215
+ saveToDisk: bool,
214
216
  ) -> list[SignatureCrop]: ...
215
217
 
216
218
 
@@ -221,12 +223,14 @@ def CropSignatureImages(
221
223
  outputDirectory: str | Path,
222
224
  dpi: int = 200,
223
225
  returnBytes: bool = False,
226
+ saveToDisk: bool = True,
224
227
  ) -> list[Path] | list[SignatureCrop]:
225
228
  """Crop detected signature regions to PNG files.
226
229
 
227
230
  Accepts either a :class:`FileResult` instance or the ``dict`` returned by
228
231
  :func:`DetectPdf`. Requires the optional ``pymupdf`` dependency.
229
- Set ``returnBytes=True`` to also receive in-memory PNG bytes for each crop.
232
+ Set ``returnBytes=True`` to also receive in-memory PNG bytes for each crop. Set
233
+ ``saveToDisk=False`` to skip writing PNG files while still returning in-memory data.
230
234
  """
231
235
 
232
236
  from sigdetect.cropping import crop_signatures
@@ -238,6 +242,7 @@ def CropSignatureImages(
238
242
  output_dir=Path(outputDirectory),
239
243
  dpi=dpi,
240
244
  return_bytes=returnBytes,
245
+ save_files=saveToDisk,
241
246
  )
242
247
  if original_dict is not None:
243
248
  original_dict.clear()
@@ -28,6 +28,7 @@ class SignatureCrop:
28
28
  path: Path
29
29
  image_bytes: bytes
30
30
  signature: Signature
31
+ saved_to_disk: bool = True
31
32
 
32
33
 
33
34
  @overload
@@ -62,27 +63,32 @@ def crop_signatures(
62
63
  dpi: int = 200,
63
64
  logger: logging.Logger | None = None,
64
65
  return_bytes: bool = False,
66
+ save_files: bool = True,
65
67
  ) -> list[Path] | list[SignatureCrop]:
66
68
  """Render each signature bounding box to a PNG image using PyMuPDF.
67
69
 
68
70
  Set ``return_bytes=True`` to collect in-memory PNG bytes for each crop while also writing
69
- the files to ``output_dir``.
71
+ the files to ``output_dir``. Set ``save_files=False`` to skip writing PNGs to disk.
70
72
  """
71
73
 
72
74
  if fitz is None: # pragma: no cover - exercised when dependency absent
73
75
  raise SignatureCroppingUnavailable(
74
76
  "PyMuPDF is required for PNG crops. Install 'pymupdf' or 'sigdetect[pymupdf]'."
75
77
  )
78
+ if not save_files and not return_bytes:
79
+ raise ValueError("At least one of save_files or return_bytes must be True")
76
80
 
77
81
  pdf_path = Path(pdf_path)
78
82
  output_dir = Path(output_dir)
79
- output_dir.mkdir(parents=True, exist_ok=True)
83
+ if save_files:
84
+ output_dir.mkdir(parents=True, exist_ok=True)
80
85
  generated_paths: list[Path] = []
81
86
  generated_crops: list[SignatureCrop] = []
82
87
 
83
88
  with fitz.open(pdf_path) as document: # type: ignore[attr-defined]
84
89
  per_document_dir = output_dir / pdf_path.stem
85
- per_document_dir.mkdir(parents=True, exist_ok=True)
90
+ if save_files:
91
+ per_document_dir.mkdir(parents=True, exist_ok=True)
86
92
  scale = dpi / 72.0
87
93
  matrix = fitz.Matrix(scale, scale)
88
94
 
@@ -113,7 +119,8 @@ def crop_signatures(
113
119
  try:
114
120
  image_bytes: bytes | None = None
115
121
  pixmap = page.get_pixmap(matrix=matrix, clip=clip, alpha=False)
116
- pixmap.save(destination)
122
+ if save_files:
123
+ pixmap.save(destination)
117
124
  if return_bytes:
118
125
  image_bytes = pixmap.tobytes("png")
119
126
  except Exception as exc: # pragma: no cover - defensive
@@ -129,8 +136,9 @@ def crop_signatures(
129
136
  )
130
137
  continue
131
138
 
132
- signature.CropPath = str(destination)
133
- generated_paths.append(destination)
139
+ if save_files:
140
+ signature.CropPath = str(destination)
141
+ generated_paths.append(destination)
134
142
  if return_bytes:
135
143
  if image_bytes is None: # pragma: no cover - defensive
136
144
  continue
@@ -139,6 +147,7 @@ def crop_signatures(
139
147
  path=destination,
140
148
  image_bytes=image_bytes,
141
149
  signature=signature,
150
+ saved_to_disk=save_files,
142
151
  )
143
152
  )
144
153
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sigdetect
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Signature detection and role attribution for PDFs
5
5
  Author-email: BT Asmamaw <basmamaw@angeiongroup.com>
6
6
  License: MIT
@@ -198,6 +198,7 @@ crops = CropSignatureImages(
198
198
  outputDirectory="./signature_crops",
199
199
  dpi=200,
200
200
  returnBytes=True, # also returns in-memory PNG bytes for each crop
201
+ # saveToDisk=False, # optional: skip writing PNGs to disk
201
202
  )
202
203
 
203
204
  first_crop = crops[0]
@@ -206,6 +207,7 @@ print(first_crop.path, len(first_crop.image_bytes))
206
207
 
207
208
  When ``returnBytes=True`` the helper returns ``SignatureCrop`` objects containing the saved path,
208
209
  PNG bytes, and the originating signature metadata.
210
+ Pass ``saveToDisk=False`` if you only want in-memory PNG bytes (no files on disk or ``crop_path`` updates).
209
211
 
210
212
 
211
213
  ## Result schema
@@ -91,6 +91,29 @@ def test_crop_signature_images_returns_bytes(tmp_path: Path) -> None:
91
91
  assert result_dict["signatures"][0]["crop_path"] is not None
92
92
 
93
93
 
94
+ def test_crop_signature_images_can_skip_disk(tmp_path: Path) -> None:
95
+ pdf_path = tmp_path / "doc.pdf"
96
+ _pdf_with_signature(pdf_path)
97
+
98
+ result_dict = DetectPdf(pdf_path, engineName="pymupdf")
99
+ out_dir = tmp_path / "dict_byte_crops_no_disk"
100
+ crops = CropSignatureImages(
101
+ pdf_path,
102
+ result_dict,
103
+ outputDirectory=out_dir,
104
+ returnBytes=True,
105
+ saveToDisk=False,
106
+ )
107
+
108
+ assert crops
109
+ first_crop = crops[0]
110
+ assert isinstance(first_crop, SignatureCrop)
111
+ assert first_crop.image_bytes
112
+ assert first_crop.saved_to_disk is False
113
+ assert not first_crop.path.exists()
114
+ assert result_dict["signatures"][0]["crop_path"] is None
115
+
116
+
94
117
  def test_crop_signatures_returns_bytes(tmp_path: Path) -> None:
95
118
  pdf_path = tmp_path / "doc.pdf"
96
119
  _pdf_with_signature(pdf_path)
@@ -111,3 +134,21 @@ def test_crop_signatures_returns_bytes(tmp_path: Path) -> None:
111
134
  assert isinstance(crops[0], SignatureCrop)
112
135
  assert crops[0].path.exists()
113
136
  assert crops[0].image_bytes
137
+
138
+
139
+ def test_crop_signatures_requires_save_or_bytes(tmp_path: Path) -> None:
140
+ pdf_path = tmp_path / "doc.pdf"
141
+ _pdf_with_signature(pdf_path)
142
+
143
+ cfg = DetectConfiguration(pdf_root=tmp_path, out_dir=tmp_path, engine="pypdf2")
144
+ result = PyPDF2Detector(cfg).Detect(pdf_path)
145
+
146
+ with pytest.raises(ValueError):
147
+ crop_signatures(
148
+ pdf_path,
149
+ result,
150
+ output_dir=tmp_path / "unused",
151
+ dpi=120,
152
+ save_files=False,
153
+ return_bytes=False,
154
+ )
File without changes
File without changes