sigdetect 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sigdetect/api.py CHANGED
@@ -200,6 +200,7 @@ def CropSignatureImages(
200
200
  outputDirectory: str | Path,
201
201
  dpi: int = 200,
202
202
  returnBytes: Literal[False] = False,
203
+ saveToDisk: bool = True,
203
204
  ) -> list[Path]: ...
204
205
 
205
206
 
@@ -211,6 +212,7 @@ def CropSignatureImages(
211
212
  outputDirectory: str | Path,
212
213
  dpi: int,
213
214
  returnBytes: Literal[True],
215
+ saveToDisk: bool,
214
216
  ) -> list[SignatureCrop]: ...
215
217
 
216
218
 
@@ -221,12 +223,14 @@ def CropSignatureImages(
221
223
  outputDirectory: str | Path,
222
224
  dpi: int = 200,
223
225
  returnBytes: bool = False,
226
+ saveToDisk: bool = True,
224
227
  ) -> list[Path] | list[SignatureCrop]:
225
228
  """Crop detected signature regions to PNG files.
226
229
 
227
230
  Accepts either a :class:`FileResult` instance or the ``dict`` returned by
228
231
  :func:`DetectPdf`. Requires the optional ``pymupdf`` dependency.
229
- Set ``returnBytes=True`` to also receive in-memory PNG bytes for each crop.
232
+ Set ``returnBytes=True`` to also receive in-memory PNG bytes for each crop. Set
233
+ ``saveToDisk=False`` to skip writing PNG files while still returning in-memory data.
230
234
  """
231
235
 
232
236
  from sigdetect.cropping import crop_signatures
@@ -238,6 +242,7 @@ def CropSignatureImages(
238
242
  output_dir=Path(outputDirectory),
239
243
  dpi=dpi,
240
244
  return_bytes=returnBytes,
245
+ save_files=saveToDisk,
241
246
  )
242
247
  if original_dict is not None:
243
248
  original_dict.clear()
sigdetect/cropping.py CHANGED
@@ -28,6 +28,7 @@ class SignatureCrop:
28
28
  path: Path
29
29
  image_bytes: bytes
30
30
  signature: Signature
31
+ saved_to_disk: bool = True
31
32
 
32
33
 
33
34
  @overload
@@ -62,27 +63,32 @@ def crop_signatures(
62
63
  dpi: int = 200,
63
64
  logger: logging.Logger | None = None,
64
65
  return_bytes: bool = False,
66
+ save_files: bool = True,
65
67
  ) -> list[Path] | list[SignatureCrop]:
66
68
  """Render each signature bounding box to a PNG image using PyMuPDF.
67
69
 
68
70
  Set ``return_bytes=True`` to collect in-memory PNG bytes for each crop while also writing
69
- the files to ``output_dir``.
71
+ the files to ``output_dir``. Set ``save_files=False`` to skip writing PNGs to disk.
70
72
  """
71
73
 
72
74
  if fitz is None: # pragma: no cover - exercised when dependency absent
73
75
  raise SignatureCroppingUnavailable(
74
76
  "PyMuPDF is required for PNG crops. Install 'pymupdf' or 'sigdetect[pymupdf]'."
75
77
  )
78
+ if not save_files and not return_bytes:
79
+ raise ValueError("At least one of save_files or return_bytes must be True")
76
80
 
77
81
  pdf_path = Path(pdf_path)
78
82
  output_dir = Path(output_dir)
79
- output_dir.mkdir(parents=True, exist_ok=True)
83
+ if save_files:
84
+ output_dir.mkdir(parents=True, exist_ok=True)
80
85
  generated_paths: list[Path] = []
81
86
  generated_crops: list[SignatureCrop] = []
82
87
 
83
88
  with fitz.open(pdf_path) as document: # type: ignore[attr-defined]
84
89
  per_document_dir = output_dir / pdf_path.stem
85
- per_document_dir.mkdir(parents=True, exist_ok=True)
90
+ if save_files:
91
+ per_document_dir.mkdir(parents=True, exist_ok=True)
86
92
  scale = dpi / 72.0
87
93
  matrix = fitz.Matrix(scale, scale)
88
94
 
@@ -113,7 +119,8 @@ def crop_signatures(
113
119
  try:
114
120
  image_bytes: bytes | None = None
115
121
  pixmap = page.get_pixmap(matrix=matrix, clip=clip, alpha=False)
116
- pixmap.save(destination)
122
+ if save_files:
123
+ pixmap.save(destination)
117
124
  if return_bytes:
118
125
  image_bytes = pixmap.tobytes("png")
119
126
  except Exception as exc: # pragma: no cover - defensive
@@ -129,8 +136,9 @@ def crop_signatures(
129
136
  )
130
137
  continue
131
138
 
132
- signature.CropPath = str(destination)
133
- generated_paths.append(destination)
139
+ if save_files:
140
+ signature.CropPath = str(destination)
141
+ generated_paths.append(destination)
134
142
  if return_bytes:
135
143
  if image_bytes is None: # pragma: no cover - defensive
136
144
  continue
@@ -139,6 +147,7 @@ def crop_signatures(
139
147
  path=destination,
140
148
  image_bytes=image_bytes,
141
149
  signature=signature,
150
+ saved_to_disk=save_files,
142
151
  )
143
152
  )
144
153
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sigdetect
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Signature detection and role attribution for PDFs
5
5
  Author-email: BT Asmamaw <basmamaw@angeiongroup.com>
6
6
  License: MIT
@@ -198,6 +198,7 @@ crops = CropSignatureImages(
198
198
  outputDirectory="./signature_crops",
199
199
  dpi=200,
200
200
  returnBytes=True, # also returns in-memory PNG bytes for each crop
201
+ # saveToDisk=False, # optional: skip writing PNGs to disk
201
202
  )
202
203
 
203
204
  first_crop = crops[0]
@@ -206,6 +207,7 @@ print(first_crop.path, len(first_crop.image_bytes))
206
207
 
207
208
  When ``returnBytes=True`` the helper returns ``SignatureCrop`` objects containing the saved path,
208
209
  PNG bytes, and the originating signature metadata.
210
+ Pass ``saveToDisk=False`` if you only want in-memory PNG bytes (no files on disk or ``crop_path`` updates).
209
211
 
210
212
 
211
213
  ## Result schema
@@ -1,8 +1,8 @@
1
1
  sigdetect/__init__.py,sha256=LhY78mDZ1ClYVNTxW_qtE-vqJoN9N7N5ZcNRDUI_3ss,575
2
- sigdetect/api.py,sha256=jIUaq6nslDdluNlRoDSdaX3Dx1lkIIZmIJPHn8Nk2Ko,9192
2
+ sigdetect/api.py,sha256=6_CMSxcag9coHHzrpuRSVimHWSNtqQiWY9hdlqQ2IKY,9396
3
3
  sigdetect/cli.py,sha256=NctAnaB-TQrUAT9m-v8kj2_KTNs88kbFOCiX32tHZm8,7920
4
4
  sigdetect/config.py,sha256=S0NVKuJYiHJCocL-VNFdGJpasFcjTecavC4EthyS1DQ,5951
5
- sigdetect/cropping.py,sha256=IyIcQAPH3z58tS6yeplglMDNu9F-iyQtpYQ1Ya2X_8o,5602
5
+ sigdetect/cropping.py,sha256=dmJF4Q1tkmkfm0NaiwHddNOP8Sj9S4Lj_d5EBjodEkk,6015
6
6
  sigdetect/eda.py,sha256=S92G1Gjmepri__D0n_V6foq0lQgH-RXI9anW8A58jfw,4681
7
7
  sigdetect/logging_setup.py,sha256=LMF8ao_a-JwH0S522T6aYTFX3e8Ajjv_5ODS2YiBcHA,6404
8
8
  sigdetect/utils.py,sha256=T9rubLf5T9JmjOHYMOba1j34fhOJaWocAXccnGTxRUE,5198
@@ -16,8 +16,8 @@ sigdetect/detector/file_result_model.py,sha256=j2gTc9Sw3fJOHlexYsR_m5DiwHA8DzIzA
16
16
  sigdetect/detector/pymupdf_engine.py,sha256=iyp7JuPlUnydwohH5zbNg4MwH44mBmxbBWOS3ZmArBo,17339
17
17
  sigdetect/detector/pypdf2_engine.py,sha256=INWQH06kMLvto2VS-EdLC-EtMC6AG7JmdVYmNgx6_RU,47313
18
18
  sigdetect/detector/signature_model.py,sha256=mztb9V5wgv2oohQ5Cxzcv8_Bo6TyWAVIXteaeQ2rywQ,1076
19
- sigdetect-0.3.0.dist-info/METADATA,sha256=i7rSqbNbViLWyNJFO5si0eghcM01mBdkLrFsVND7xZw,12171
20
- sigdetect-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
- sigdetect-0.3.0.dist-info/entry_points.txt,sha256=iqtfKjBU44-omM7Sh-idGz2ahw19oAvpvSyKZVArG3o,48
22
- sigdetect-0.3.0.dist-info/top_level.txt,sha256=PKlfwUobkRC0viwiSXmhtw83G26FSNpimWYC1Uy00FY,10
23
- sigdetect-0.3.0.dist-info/RECORD,,
19
+ sigdetect-0.3.1.dist-info/METADATA,sha256=whXGE4-9spAjlMcZz_owdsIiB4EobXL9_UOuAJeDVfA,12342
20
+ sigdetect-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ sigdetect-0.3.1.dist-info/entry_points.txt,sha256=iqtfKjBU44-omM7Sh-idGz2ahw19oAvpvSyKZVArG3o,48
22
+ sigdetect-0.3.1.dist-info/top_level.txt,sha256=PKlfwUobkRC0viwiSXmhtw83G26FSNpimWYC1Uy00FY,10
23
+ sigdetect-0.3.1.dist-info/RECORD,,