sigdetect 0.5.0__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sigdetect-0.5.0 → sigdetect-0.5.2}/PKG-INFO +11 -7
- {sigdetect-0.5.0 → sigdetect-0.5.2}/README.md +10 -6
- {sigdetect-0.5.0 → sigdetect-0.5.2}/pyproject.toml +1 -1
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/api.py +10 -4
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/cli.py +20 -7
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/config.py +12 -0
- sigdetect-0.5.2/src/sigdetect/cropping.py +488 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/detector/signature_model.py +4 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/wet_detection.py +48 -14
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect.egg-info/PKG-INFO +11 -7
- {sigdetect-0.5.0 → sigdetect-0.5.2}/tests/test_cli.py +2 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/tests/test_cropping.py +132 -7
- {sigdetect-0.5.0 → sigdetect-0.5.2}/tests/test_wet_detection.py +98 -0
- sigdetect-0.5.0/src/sigdetect/cropping.py +0 -246
- {sigdetect-0.5.0 → sigdetect-0.5.2}/setup.cfg +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/__init__.py +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/data/role_rules.retainer.yml +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/data/role_rules.yml +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/data/vendor_patterns.yml +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/detector/__init__.py +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/detector/base.py +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/detector/base_detector.py +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/detector/file_result_model.py +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/detector/pymupdf_engine.py +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/detector/pypdf2_engine.py +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/eda.py +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/logging_setup.py +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect/utils.py +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect.egg-info/SOURCES.txt +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect.egg-info/dependency_links.txt +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect.egg-info/entry_points.txt +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect.egg-info/requires.txt +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/src/sigdetect.egg-info/top_level.txt +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/tests/test_api.py +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/tests/test_detector_options.py +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/tests/test_pymupdf_engine.py +0 -0
- {sigdetect-0.5.0 → sigdetect-0.5.2}/tests/test_widget_role_patient_smoke.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sigdetect
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: Signature detection and role attribution for PDFs
|
|
5
5
|
Author-email: BT Asmamaw <basmamaw@angeiongroup.com>
|
|
6
6
|
License: MIT
|
|
@@ -105,7 +105,7 @@ sigdetect detect \
|
|
|
105
105
|
- `retainer` → client / firm (prefers detecting two signatures)
|
|
106
106
|
- `--recursive/--no-recursive` toggles whether `sigdetect detect` descends into subdirectories when hunting for PDFs (recursive by default).
|
|
107
107
|
- Results output is disabled by default; set `write_results: true` or pass `--write-results` when you need `results.json` (for EDA).
|
|
108
|
-
- Cropping (`--crop-signatures`) writes
|
|
108
|
+
- Cropping (`--crop-signatures`) writes PNG crops to disk by default; enable `--crop-docx` to write DOCX files instead of PNGs. `--crop-bytes` embeds base64 PNG data in `signatures[].crop_bytes` and, when `--crop-docx` is enabled, embeds DOCX bytes in `signatures[].crop_docx_bytes`. PyMuPDF is required for crops, and `python-docx` is required for DOCX output.
|
|
109
109
|
- Wet detection runs automatically for non-e-sign PDFs when dependencies are available; missing OCR dependencies add a `ManualReview:*` hint instead of failing. PyMuPDF + Tesseract are required for wet detection.
|
|
110
110
|
- If the executable is not on `PATH`, you can always fall back to `python -m sigdetect.cli ...`.
|
|
111
111
|
|
|
@@ -142,7 +142,7 @@ result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
|
|
|
142
142
|
print(result.to_dict())
|
|
143
143
|
~~~
|
|
144
144
|
|
|
145
|
-
`Detect(Path)` returns a **FileResult** dataclass; call `.to_dict()` for the JSON-friendly representation (see [Result schema](#result-schema)). Each signature entry now exposes `bounding_box` coordinates (PDF points, origin bottom-left). When cropping is enabled, `crop_path` points at the generated
|
|
145
|
+
`Detect(Path)` returns a **FileResult** dataclass; call `.to_dict()` for the JSON-friendly representation (see [Result schema](#result-schema)). Each signature entry now exposes `bounding_box` coordinates (PDF points, origin bottom-left). When PNG cropping is enabled, `crop_path` points at the generated image; when DOCX cropping is enabled, `crop_docx_path` points at the generated doc. Use `Engine="auto"` if you want the single-pass defaults that prefer PyMuPDF (for geometry) when available.
|
|
146
146
|
|
|
147
147
|
---
|
|
148
148
|
|
|
@@ -194,7 +194,7 @@ for res in ScanDirectory(
|
|
|
194
194
|
# store in DB, print, etc.
|
|
195
195
|
pass
|
|
196
196
|
|
|
197
|
-
# 3)
|
|
197
|
+
# 3) Crop signature snippets for FileResult objects (requires PyMuPDF; DOCX needs python-docx)
|
|
198
198
|
detector = get_detector(pdfRoot="/path/to/pdfs", profileName="hipaa")
|
|
199
199
|
file_result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
|
|
200
200
|
CropSignatureImages(
|
|
@@ -233,7 +233,8 @@ High-level summary (per file):
|
|
|
233
233
|
"hint": "AcroSig:sig_patient",
|
|
234
234
|
"render_type": "typed",
|
|
235
235
|
"bounding_box": [10.0, 10.0, 150.0, 40.0],
|
|
236
|
-
"crop_path": "signature_crops/example/sig_01_patient.
|
|
236
|
+
"crop_path": "signature_crops/example/sig_01_patient.png",
|
|
237
|
+
"crop_docx_path": null
|
|
237
238
|
},
|
|
238
239
|
{
|
|
239
240
|
"page": null,
|
|
@@ -259,8 +260,10 @@ High-level summary (per file):
|
|
|
259
260
|
- **`roles`** summarizes unique non-`unknown` roles across signatures.
|
|
260
261
|
- In retainer profile, emitter prefers two signatures (client + firm), often on the same page.
|
|
261
262
|
- **`signatures[].bounding_box`** reports the widget rectangle in PDF points (origin bottom-left).
|
|
262
|
-
- **`signatures[].crop_path`** is populated when
|
|
263
|
+
- **`signatures[].crop_path`** is populated when PNG crops are generated (via CLI `--crop-signatures` or `CropSignatureImages`).
|
|
264
|
+
- **`signatures[].crop_docx_path`** is populated when DOCX crops are generated (`--crop-docx` or `docx=True`).
|
|
263
265
|
- **`signatures[].crop_bytes`** contains base64 PNG data when CLI `--crop-bytes` is enabled.
|
|
266
|
+
- **`signatures[].crop_docx_bytes`** contains base64 DOCX data when `--crop-docx` and `--crop-bytes` are enabled together.
|
|
264
267
|
|
|
265
268
|
---
|
|
266
269
|
|
|
@@ -287,7 +290,8 @@ write_results: false
|
|
|
287
290
|
pseudo_signatures: true
|
|
288
291
|
recurse_xobjects: true
|
|
289
292
|
profile: retainer # or: hipaa
|
|
290
|
-
crop_signatures: false # enable to write
|
|
293
|
+
crop_signatures: false # enable to write PNG crops (requires pymupdf)
|
|
294
|
+
crop_docx: false # enable to write DOCX crops instead of PNGs (requires python-docx)
|
|
291
295
|
# crop_output_dir: ./signature_crops
|
|
292
296
|
crop_image_dpi: 200
|
|
293
297
|
detect_wet_signatures: false # kept for compatibility; non-e-sign PDFs still trigger OCR
|
|
@@ -87,7 +87,7 @@ sigdetect detect \
|
|
|
87
87
|
- `retainer` → client / firm (prefers detecting two signatures)
|
|
88
88
|
- `--recursive/--no-recursive` toggles whether `sigdetect detect` descends into subdirectories when hunting for PDFs (recursive by default).
|
|
89
89
|
- Results output is disabled by default; set `write_results: true` or pass `--write-results` when you need `results.json` (for EDA).
|
|
90
|
-
- Cropping (`--crop-signatures`) writes
|
|
90
|
+
- Cropping (`--crop-signatures`) writes PNG crops to disk by default; enable `--crop-docx` to write DOCX files instead of PNGs. `--crop-bytes` embeds base64 PNG data in `signatures[].crop_bytes` and, when `--crop-docx` is enabled, embeds DOCX bytes in `signatures[].crop_docx_bytes`. PyMuPDF is required for crops, and `python-docx` is required for DOCX output.
|
|
91
91
|
- Wet detection runs automatically for non-e-sign PDFs when dependencies are available; missing OCR dependencies add a `ManualReview:*` hint instead of failing. PyMuPDF + Tesseract are required for wet detection.
|
|
92
92
|
- If the executable is not on `PATH`, you can always fall back to `python -m sigdetect.cli ...`.
|
|
93
93
|
|
|
@@ -124,7 +124,7 @@ result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
|
|
|
124
124
|
print(result.to_dict())
|
|
125
125
|
~~~
|
|
126
126
|
|
|
127
|
-
`Detect(Path)` returns a **FileResult** dataclass; call `.to_dict()` for the JSON-friendly representation (see [Result schema](#result-schema)). Each signature entry now exposes `bounding_box` coordinates (PDF points, origin bottom-left). When cropping is enabled, `crop_path` points at the generated
|
|
127
|
+
`Detect(Path)` returns a **FileResult** dataclass; call `.to_dict()` for the JSON-friendly representation (see [Result schema](#result-schema)). Each signature entry now exposes `bounding_box` coordinates (PDF points, origin bottom-left). When PNG cropping is enabled, `crop_path` points at the generated image; when DOCX cropping is enabled, `crop_docx_path` points at the generated doc. Use `Engine="auto"` if you want the single-pass defaults that prefer PyMuPDF (for geometry) when available.
|
|
128
128
|
|
|
129
129
|
---
|
|
130
130
|
|
|
@@ -176,7 +176,7 @@ for res in ScanDirectory(
|
|
|
176
176
|
# store in DB, print, etc.
|
|
177
177
|
pass
|
|
178
178
|
|
|
179
|
-
# 3)
|
|
179
|
+
# 3) Crop signature snippets for FileResult objects (requires PyMuPDF; DOCX needs python-docx)
|
|
180
180
|
detector = get_detector(pdfRoot="/path/to/pdfs", profileName="hipaa")
|
|
181
181
|
file_result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
|
|
182
182
|
CropSignatureImages(
|
|
@@ -215,7 +215,8 @@ High-level summary (per file):
|
|
|
215
215
|
"hint": "AcroSig:sig_patient",
|
|
216
216
|
"render_type": "typed",
|
|
217
217
|
"bounding_box": [10.0, 10.0, 150.0, 40.0],
|
|
218
|
-
"crop_path": "signature_crops/example/sig_01_patient.
|
|
218
|
+
"crop_path": "signature_crops/example/sig_01_patient.png",
|
|
219
|
+
"crop_docx_path": null
|
|
219
220
|
},
|
|
220
221
|
{
|
|
221
222
|
"page": null,
|
|
@@ -241,8 +242,10 @@ High-level summary (per file):
|
|
|
241
242
|
- **`roles`** summarizes unique non-`unknown` roles across signatures.
|
|
242
243
|
- In retainer profile, emitter prefers two signatures (client + firm), often on the same page.
|
|
243
244
|
- **`signatures[].bounding_box`** reports the widget rectangle in PDF points (origin bottom-left).
|
|
244
|
-
- **`signatures[].crop_path`** is populated when
|
|
245
|
+
- **`signatures[].crop_path`** is populated when PNG crops are generated (via CLI `--crop-signatures` or `CropSignatureImages`).
|
|
246
|
+
- **`signatures[].crop_docx_path`** is populated when DOCX crops are generated (`--crop-docx` or `docx=True`).
|
|
245
247
|
- **`signatures[].crop_bytes`** contains base64 PNG data when CLI `--crop-bytes` is enabled.
|
|
248
|
+
- **`signatures[].crop_docx_bytes`** contains base64 DOCX data when `--crop-docx` and `--crop-bytes` are enabled together.
|
|
246
249
|
|
|
247
250
|
---
|
|
248
251
|
|
|
@@ -269,7 +272,8 @@ write_results: false
|
|
|
269
272
|
pseudo_signatures: true
|
|
270
273
|
recurse_xobjects: true
|
|
271
274
|
profile: retainer # or: hipaa
|
|
272
|
-
crop_signatures: false # enable to write
|
|
275
|
+
crop_signatures: false # enable to write PNG crops (requires pymupdf)
|
|
276
|
+
crop_docx: false # enable to write DOCX crops instead of PNGs (requires python-docx)
|
|
273
277
|
# crop_output_dir: ./signature_crops
|
|
274
278
|
crop_image_dpi: 200
|
|
275
279
|
detect_wet_signatures: false # kept for compatibility; non-e-sign PDFs still trigger OCR
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sigdetect"
|
|
7
|
-
version = "0.5.
|
|
7
|
+
version = "0.5.2"
|
|
8
8
|
description = "Signature detection and role attribution for PDFs"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [{ name = "BT Asmamaw", email = "basmamaw@angeiongroup.com" }]
|
|
@@ -229,6 +229,7 @@ def CropSignatureImages(
|
|
|
229
229
|
dpi: int = 200,
|
|
230
230
|
returnBytes: Literal[False] = False,
|
|
231
231
|
saveToDisk: bool = True,
|
|
232
|
+
docx: bool = False,
|
|
232
233
|
) -> list[Path]: ...
|
|
233
234
|
|
|
234
235
|
|
|
@@ -241,6 +242,7 @@ def CropSignatureImages(
|
|
|
241
242
|
dpi: int,
|
|
242
243
|
returnBytes: Literal[True],
|
|
243
244
|
saveToDisk: bool,
|
|
245
|
+
docx: bool = False,
|
|
244
246
|
) -> list[SignatureCrop]: ...
|
|
245
247
|
|
|
246
248
|
|
|
@@ -252,16 +254,17 @@ def CropSignatureImages(
|
|
|
252
254
|
dpi: int = 200,
|
|
253
255
|
returnBytes: bool = False,
|
|
254
256
|
saveToDisk: bool = True,
|
|
257
|
+
docx: bool = False,
|
|
255
258
|
) -> list[Path] | list[SignatureCrop]:
|
|
256
|
-
"""Create
|
|
259
|
+
"""Create PNG files containing cropped signature images (or DOCX when enabled).
|
|
257
260
|
|
|
258
261
|
Accepts either a :class:`FileResult` instance or the ``dict`` returned by
|
|
259
262
|
:func:`DetectPdf`. Requires the optional ``pymupdf`` dependency.
|
|
260
263
|
Set ``returnBytes=True`` to also receive in-memory PNG bytes for each crop. Set
|
|
261
264
|
``saveToDisk=False`` to skip writing PNG files while still returning in-memory data.
|
|
262
|
-
When ``
|
|
263
|
-
|
|
264
|
-
|
|
265
|
+
When ``docx`` is True, DOCX files are written instead of PNG files. When ``returnBytes`` is
|
|
266
|
+
True and ``docx`` is enabled, the returned :class:`SignatureCrop` objects include
|
|
267
|
+
``docx_bytes``.
|
|
265
268
|
"""
|
|
266
269
|
|
|
267
270
|
from sigdetect.cropping import crop_signatures
|
|
@@ -274,6 +277,7 @@ def CropSignatureImages(
|
|
|
274
277
|
dpi=dpi,
|
|
275
278
|
return_bytes=returnBytes,
|
|
276
279
|
save_files=saveToDisk,
|
|
280
|
+
docx=docx,
|
|
277
281
|
)
|
|
278
282
|
if original_dict is not None:
|
|
279
283
|
original_dict.clear()
|
|
@@ -305,6 +309,8 @@ def _CoerceFileResult(
|
|
|
305
309
|
BoundingBox=tuple(bbox) if bbox else None,
|
|
306
310
|
CropPath=entry.get("crop_path"),
|
|
307
311
|
CropBytes=entry.get("crop_bytes"),
|
|
312
|
+
CropDocxPath=entry.get("crop_docx_path"),
|
|
313
|
+
CropDocxBytes=entry.get("crop_docx_bytes"),
|
|
308
314
|
)
|
|
309
315
|
)
|
|
310
316
|
|
|
@@ -64,13 +64,19 @@ def Detect(
|
|
|
64
64
|
cropSignatures: bool | None = typer.Option(
|
|
65
65
|
None,
|
|
66
66
|
"--crop-signatures/--no-crop-signatures",
|
|
67
|
-
help="Write
|
|
67
|
+
help="Write PNG crops for signature widgets (requires PyMuPDF)",
|
|
68
|
+
show_default=False,
|
|
69
|
+
),
|
|
70
|
+
cropDocx: bool | None = typer.Option(
|
|
71
|
+
None,
|
|
72
|
+
"--crop-docx/--no-crop-docx",
|
|
73
|
+
help="Write DOCX crops instead of PNG files (requires PyMuPDF + python-docx)",
|
|
68
74
|
show_default=False,
|
|
69
75
|
),
|
|
70
76
|
cropDirectory: Path | None = typer.Option(
|
|
71
77
|
None,
|
|
72
78
|
"--crop-dir",
|
|
73
|
-
help="Directory for signature
|
|
79
|
+
help="Directory for signature crops (defaults to out_dir/signature_crops)",
|
|
74
80
|
),
|
|
75
81
|
cropDpi: int | None = typer.Option(
|
|
76
82
|
None,
|
|
@@ -83,7 +89,7 @@ def Detect(
|
|
|
83
89
|
cropBytes: bool = typer.Option(
|
|
84
90
|
False,
|
|
85
91
|
"--crop-bytes/--no-crop-bytes",
|
|
86
|
-
help="Embed base64 PNG bytes
|
|
92
|
+
help="Embed base64 PNG bytes (and DOCX bytes when --crop-docx) in results JSON",
|
|
87
93
|
show_default=False,
|
|
88
94
|
),
|
|
89
95
|
detectWetSignatures: bool | None = typer.Option(
|
|
@@ -128,6 +134,8 @@ def Detect(
|
|
|
128
134
|
overrides["WriteResults"] = writeResults
|
|
129
135
|
if cropSignatures is not None:
|
|
130
136
|
overrides["CropSignatures"] = cropSignatures
|
|
137
|
+
if cropDocx is not None:
|
|
138
|
+
overrides["CropDocx"] = cropDocx
|
|
131
139
|
if cropDirectory is not None:
|
|
132
140
|
overrides["CropOutputDirectory"] = cropDirectory
|
|
133
141
|
if cropDpi is not None:
|
|
@@ -181,6 +189,7 @@ def Detect(
|
|
|
181
189
|
base_dir = configuration.OutputDirectory or configuration.PdfRoot
|
|
182
190
|
crop_dir = base_dir / "signature_crops"
|
|
183
191
|
cropping_enabled = configuration.CropSignatures
|
|
192
|
+
docx_enabled = configuration.CropDocx
|
|
184
193
|
cropping_available = True
|
|
185
194
|
cropping_attempted = False
|
|
186
195
|
|
|
@@ -199,6 +208,7 @@ def Detect(
|
|
|
199
208
|
logger=Logger,
|
|
200
209
|
return_bytes=crop_bytes_enabled,
|
|
201
210
|
save_files=cropping_enabled,
|
|
211
|
+
docx=docx_enabled,
|
|
202
212
|
)
|
|
203
213
|
cropping_attempted = True
|
|
204
214
|
if crop_bytes_enabled:
|
|
@@ -206,15 +216,18 @@ def Detect(
|
|
|
206
216
|
crop.signature.CropBytes = base64.b64encode(crop.image_bytes).decode(
|
|
207
217
|
"ascii"
|
|
208
218
|
)
|
|
219
|
+
if crop.docx_bytes:
|
|
220
|
+
crop.signature.CropDocxBytes = base64.b64encode(
|
|
221
|
+
crop.docx_bytes
|
|
222
|
+
).decode("ascii")
|
|
209
223
|
except SignatureCroppingUnavailable as exc:
|
|
210
224
|
cropping_available = False
|
|
211
225
|
Logger.warning("Signature cropping unavailable", extra={"error": str(exc)})
|
|
212
226
|
typer.echo(str(exc), err=True)
|
|
213
227
|
except Exception as exc: # pragma: no cover - defensive
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
)
|
|
228
|
+
cropping_available = False
|
|
229
|
+
Logger.warning("Signature cropping unavailable", extra={"error": str(exc)})
|
|
230
|
+
typer.echo(str(exc), err=True)
|
|
218
231
|
|
|
219
232
|
total_bboxes += sum(1 for sig in file_result.Signatures if sig.BoundingBox)
|
|
220
233
|
|
|
@@ -31,6 +31,7 @@ class DetectConfiguration(BaseModel):
|
|
|
31
31
|
PseudoSignatures: bool = Field(default=True, alias="pseudo_signatures")
|
|
32
32
|
RecurseXObjects: bool = Field(default=True, alias="recurse_xobjects")
|
|
33
33
|
CropSignatures: bool = Field(default=True, alias="crop_signatures")
|
|
34
|
+
CropDocx: bool = Field(default=False, alias="crop_docx")
|
|
34
35
|
CropOutputDirectory: Path | None = Field(default=None, alias="crop_output_dir")
|
|
35
36
|
CropImageDpi: int = Field(default=200, alias="crop_image_dpi", ge=72, le=600)
|
|
36
37
|
DetectWetSignatures: bool = Field(default=True, alias="detect_wet_signatures")
|
|
@@ -88,6 +89,10 @@ class DetectConfiguration(BaseModel):
|
|
|
88
89
|
def crop_signatures(self) -> bool: # pragma: no cover - simple passthrough
|
|
89
90
|
return self.CropSignatures
|
|
90
91
|
|
|
92
|
+
@property
|
|
93
|
+
def crop_docx(self) -> bool: # pragma: no cover - simple passthrough
|
|
94
|
+
return self.CropDocx
|
|
95
|
+
|
|
91
96
|
@property
|
|
92
97
|
def crop_output_dir(self) -> Path | None: # pragma: no cover - simple passthrough
|
|
93
98
|
return self.CropOutputDirectory
|
|
@@ -133,6 +138,7 @@ def LoadConfiguration(path: Path | None) -> DetectConfiguration:
|
|
|
133
138
|
env_out_dir = os.getenv("SIGDETECT_OUT_DIR")
|
|
134
139
|
env_profile = os.getenv("SIGDETECT_PROFILE")
|
|
135
140
|
env_crop = os.getenv("SIGDETECT_CROP_SIGNATURES")
|
|
141
|
+
env_crop_docx = os.getenv("SIGDETECT_CROP_DOCX")
|
|
136
142
|
env_crop_dir = os.getenv("SIGDETECT_CROP_DIR")
|
|
137
143
|
env_crop_dpi = os.getenv("SIGDETECT_CROP_DPI")
|
|
138
144
|
env_detect_wet = os.getenv("SIGDETECT_DETECT_WET")
|
|
@@ -159,6 +165,12 @@ def LoadConfiguration(path: Path | None) -> DetectConfiguration:
|
|
|
159
165
|
raw_data["crop_signatures"] = True
|
|
160
166
|
elif lowered in {"0", "false", "no", "off"}:
|
|
161
167
|
raw_data["crop_signatures"] = False
|
|
168
|
+
if env_crop_docx is not None:
|
|
169
|
+
lowered = env_crop_docx.lower()
|
|
170
|
+
if lowered in {"1", "true", "yes", "on"}:
|
|
171
|
+
raw_data["crop_docx"] = True
|
|
172
|
+
elif lowered in {"0", "false", "no", "off"}:
|
|
173
|
+
raw_data["crop_docx"] = False
|
|
162
174
|
if env_crop_dir:
|
|
163
175
|
raw_data["crop_output_dir"] = env_crop_dir
|
|
164
176
|
if env_crop_dpi:
|