sigdetect 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {sigdetect-0.1.0 → sigdetect-0.2.0}/PKG-INFO +37 -6
  2. {sigdetect-0.1.0 → sigdetect-0.2.0}/README.md +36 -4
  3. {sigdetect-0.1.0 → sigdetect-0.2.0}/pyproject.toml +1 -2
  4. sigdetect-0.2.0/src/sigdetect/api.py +261 -0
  5. sigdetect-0.2.0/src/sigdetect/cli.py +232 -0
  6. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/config.py +49 -9
  7. sigdetect-0.2.0/src/sigdetect/cropping.py +123 -0
  8. sigdetect-0.2.0/src/sigdetect/detector/pymupdf_engine.py +420 -0
  9. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/detector/pypdf2_engine.py +46 -8
  10. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/detector/signature_model.py +4 -0
  11. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect.egg-info/PKG-INFO +37 -6
  12. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect.egg-info/SOURCES.txt +6 -1
  13. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect.egg-info/requires.txt +0 -1
  14. sigdetect-0.2.0/tests/test_api.py +60 -0
  15. sigdetect-0.2.0/tests/test_cropping.py +72 -0
  16. sigdetect-0.2.0/tests/test_pymupdf_engine.py +87 -0
  17. sigdetect-0.2.0/tests/test_widget_role_patient_smoke.py +66 -0
  18. sigdetect-0.1.0/src/sigdetect/api.py +0 -139
  19. sigdetect-0.1.0/src/sigdetect/cli.py +0 -98
  20. sigdetect-0.1.0/src/sigdetect/detector/pymupdf_engine.py +0 -0
  21. {sigdetect-0.1.0 → sigdetect-0.2.0}/setup.cfg +0 -0
  22. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/__init__.py +0 -0
  23. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/data/role_rules.retainer.yml +0 -0
  24. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/data/role_rules.yml +0 -0
  25. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/data/vendor_patterns.yml +0 -0
  26. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/detector/__init__.py +0 -0
  27. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/detector/base.py +0 -0
  28. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/detector/base_detector.py +0 -0
  29. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/detector/file_result_model.py +0 -0
  30. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/eda.py +0 -0
  31. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/logging_setup.py +0 -0
  32. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect/utils.py +0 -0
  33. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect.egg-info/dependency_links.txt +0 -0
  34. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect.egg-info/entry_points.txt +0 -0
  35. {sigdetect-0.1.0 → sigdetect-0.2.0}/src/sigdetect.egg-info/top_level.txt +0 -0
@@ -1,13 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sigdetect
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Signature detection and role attribution for PDFs
5
5
  Author-email: BT Asmamaw <basmamaw@angeiongroup.com>
6
6
  License: MIT
7
7
  Requires-Python: >=3.9
8
8
  Description-Content-Type: text/markdown
9
9
  Requires-Dist: pypdf>=4.0.0
10
- Requires-Dist: pandas>=2.0
11
10
  Requires-Dist: rich>=13.0
12
11
  Requires-Dist: typer>=0.12
13
12
  Requires-Dist: pydantic>=2.5
@@ -102,6 +101,8 @@ sigdetect detect \
102
101
  - `--profile` selects tuned role logic:
103
102
  - `hipaa` → patient / representative / attorney
104
103
  - `retainer` → client / firm (prefers detecting two signatures)
104
+ - `--recursive/--no-recursive` toggles whether `sigdetect detect` descends into subdirectories when hunting for PDFs (recursive by default).
105
+ - `--crop-signatures` enables PNG crops for each detected widget (requires installing the optional `pymupdf` dependency). Use `--crop-dir` to override the destination and `--crop-dpi` to choose rendering quality.
105
106
  - If the executable is not on `PATH`, you can always fall back to `python -m sigdetect.cli ...`.
106
107
 
107
108
  ### EDA (quick aggregate stats)
@@ -135,7 +136,7 @@ result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
135
136
  print(result.to_dict())
136
137
  ~~~
137
138
 
138
- `Detect(Path)` returns a **FileResult** dataclass; call `.to_dict()` for the JSON-friendly representation (see [Result schema](#result-schema)).
139
+ `Detect(Path)` returns a **FileResult** dataclass; call `.to_dict()` for the JSON-friendly representation (see [Result schema](#result-schema)). Each signature entry now exposes `bounding_box` coordinates (PDF points, origin bottom-left). When PNG cropping is enabled, `crop_path` points at the generated image.
139
140
 
140
141
  ---
141
142
 
@@ -146,7 +147,17 @@ Import from `sigdetect.api` and get plain dicts out (JSON-ready),
146
147
  with no I/O side effects by default:
147
148
 
148
149
  ~~~python
149
- from sigdetect.api import DetectPdf, DetectMany, ScanDirectory, ToCsvRow, Version
150
+ from pathlib import Path
151
+
152
+ from sigdetect.api import (
153
+ CropSignatureImages,
154
+ DetectMany,
155
+ DetectPdf,
156
+ ScanDirectory,
157
+ ToCsvRow,
158
+ Version,
159
+ get_detector,
160
+ )
150
161
 
151
162
  print("sigdetect", Version())
152
163
 
@@ -178,6 +189,15 @@ for res in ScanDirectory(
178
189
  # store in DB, print, etc.
179
190
  pass
180
191
 
192
+ # 3) Crop PNG snippets for FileResult objects (requires PyMuPDF)
193
+ detector = get_detector(pdfRoot="/path/to/pdfs", profileName="hipaa")
194
+ file_result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
195
+ CropSignatureImages(
196
+ "/path/to/pdfs/example.pdf",
197
+ file_result,
198
+ outputDirectory="./signature_crops",
199
+ dpi=200,
200
+ )
181
201
  ~~~
182
202
 
183
203
 
@@ -205,7 +225,10 @@ High-level summary (per file):
205
225
  "score": 5,
206
226
  "scores": { "field": 3, "page_label": 2 },
207
227
  "evidence": ["field:patient", "page_label:patient"],
208
- "hint": "AcroSig:sig_patient"
228
+ "hint": "AcroSig:sig_patient",
229
+ "render_type": "typed",
230
+ "bounding_box": [10.0, 10.0, 150.0, 40.0],
231
+ "crop_path": "signature_crops/example/sig_01_patient.png"
209
232
  },
210
233
  {
211
234
  "page": null,
@@ -214,7 +237,10 @@ High-level summary (per file):
214
237
  "score": 6,
215
238
  "scores": { "page_label": 4, "general": 2 },
216
239
  "evidence": ["page_label:representative(parent/guardian)", "pseudo:true"],
217
- "hint": "VendorOrAcroOnly"
240
+ "hint": "VendorOrAcroOnly",
241
+ "render_type": "unknown",
242
+ "bounding_box": null,
243
+ "crop_path": null
218
244
  }
219
245
  ]
220
246
  }
@@ -227,6 +253,8 @@ High-level summary (per file):
227
253
  - **`mixed`** means both `esign_found` and `scanned_pdf` are `true`.
228
254
  - **`roles`** summarizes unique non-`unknown` roles across signatures.
229
255
  - In retainer profile, emitter prefers two signatures (client + firm), often on the same page.
256
+ - **`signatures[].bounding_box`** reports the widget rectangle in PDF points (origin bottom-left).
257
+ - **`signatures[].crop_path`** is populated when PNG crops are generated (via CLI `--crop-signatures` or `CropSignatureImages`).
230
258
 
231
259
  ---
232
260
 
@@ -252,6 +280,9 @@ engine: pypdf2
252
280
  pseudo_signatures: true
253
281
  recurse_xobjects: true
254
282
  profile: retainer # or: hipaa
283
+ crop_signatures: false # enable to write PNG crops (requires pymupdf)
284
+ # crop_output_dir: ./signature_crops
285
+ crop_image_dpi: 200
255
286
  ~~~
256
287
 
257
288
  YAML files can be customized or load at runtime (see CLI `--config`, if available, or import and pass patterns into engine).
@@ -85,6 +85,8 @@ sigdetect detect \
85
85
  - `--profile` selects tuned role logic:
86
86
  - `hipaa` → patient / representative / attorney
87
87
  - `retainer` → client / firm (prefers detecting two signatures)
88
+ - `--recursive/--no-recursive` toggles whether `sigdetect detect` descends into subdirectories when hunting for PDFs (recursive by default).
89
+ - `--crop-signatures` enables PNG crops for each detected widget (requires installing the optional `pymupdf` dependency). Use `--crop-dir` to override the destination and `--crop-dpi` to choose rendering quality.
88
90
  - If the executable is not on `PATH`, you can always fall back to `python -m sigdetect.cli ...`.
89
91
 
90
92
  ### EDA (quick aggregate stats)
@@ -118,7 +120,7 @@ result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
118
120
  print(result.to_dict())
119
121
  ~~~
120
122
 
121
- `Detect(Path)` returns a **FileResult** dataclass; call `.to_dict()` for the JSON-friendly representation (see [Result schema](#result-schema)).
123
+ `Detect(Path)` returns a **FileResult** dataclass; call `.to_dict()` for the JSON-friendly representation (see [Result schema](#result-schema)). Each signature entry now exposes `bounding_box` coordinates (PDF points, origin bottom-left). When PNG cropping is enabled, `crop_path` points at the generated image.
122
124
 
123
125
  ---
124
126
 
@@ -129,7 +131,17 @@ Import from `sigdetect.api` and get plain dicts out (JSON-ready),
129
131
  with no I/O side effects by default:
130
132
 
131
133
  ~~~python
132
- from sigdetect.api import DetectPdf, DetectMany, ScanDirectory, ToCsvRow, Version
134
+ from pathlib import Path
135
+
136
+ from sigdetect.api import (
137
+ CropSignatureImages,
138
+ DetectMany,
139
+ DetectPdf,
140
+ ScanDirectory,
141
+ ToCsvRow,
142
+ Version,
143
+ get_detector,
144
+ )
133
145
 
134
146
  print("sigdetect", Version())
135
147
 
@@ -161,6 +173,15 @@ for res in ScanDirectory(
161
173
  # store in DB, print, etc.
162
174
  pass
163
175
 
176
+ # 3) Crop PNG snippets for FileResult objects (requires PyMuPDF)
177
+ detector = get_detector(pdfRoot="/path/to/pdfs", profileName="hipaa")
178
+ file_result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
179
+ CropSignatureImages(
180
+ "/path/to/pdfs/example.pdf",
181
+ file_result,
182
+ outputDirectory="./signature_crops",
183
+ dpi=200,
184
+ )
164
185
  ~~~
165
186
 
166
187
 
@@ -188,7 +209,10 @@ High-level summary (per file):
188
209
  "score": 5,
189
210
  "scores": { "field": 3, "page_label": 2 },
190
211
  "evidence": ["field:patient", "page_label:patient"],
191
- "hint": "AcroSig:sig_patient"
212
+ "hint": "AcroSig:sig_patient",
213
+ "render_type": "typed",
214
+ "bounding_box": [10.0, 10.0, 150.0, 40.0],
215
+ "crop_path": "signature_crops/example/sig_01_patient.png"
192
216
  },
193
217
  {
194
218
  "page": null,
@@ -197,7 +221,10 @@ High-level summary (per file):
197
221
  "score": 6,
198
222
  "scores": { "page_label": 4, "general": 2 },
199
223
  "evidence": ["page_label:representative(parent/guardian)", "pseudo:true"],
200
- "hint": "VendorOrAcroOnly"
224
+ "hint": "VendorOrAcroOnly",
225
+ "render_type": "unknown",
226
+ "bounding_box": null,
227
+ "crop_path": null
201
228
  }
202
229
  ]
203
230
  }
@@ -210,6 +237,8 @@ High-level summary (per file):
210
237
  - **`mixed`** means both `esign_found` and `scanned_pdf` are `true`.
211
238
  - **`roles`** summarizes unique non-`unknown` roles across signatures.
212
239
  - In retainer profile, emitter prefers two signatures (client + firm), often on the same page.
240
+ - **`signatures[].bounding_box`** reports the widget rectangle in PDF points (origin bottom-left).
241
+ - **`signatures[].crop_path`** is populated when PNG crops are generated (via CLI `--crop-signatures` or `CropSignatureImages`).
213
242
 
214
243
  ---
215
244
 
@@ -235,6 +264,9 @@ engine: pypdf2
235
264
  pseudo_signatures: true
236
265
  recurse_xobjects: true
237
266
  profile: retainer # or: hipaa
267
+ crop_signatures: false # enable to write PNG crops (requires pymupdf)
268
+ # crop_output_dir: ./signature_crops
269
+ crop_image_dpi: 200
238
270
  ~~~
239
271
 
240
272
  YAML files can be customized or load at runtime (see CLI `--config`, if available, or import and pass patterns into engine).
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "sigdetect"
7
- version = "0.1.0"
7
+ version = "0.2.0"
8
8
  description = "Signature detection and role attribution for PDFs"
9
9
  readme = "README.md"
10
10
  authors = [{ name = "BT Asmamaw", email = "basmamaw@angeiongroup.com" }]
@@ -12,7 +12,6 @@ license = { text = "MIT" }
12
12
  requires-python = ">=3.9"
13
13
  dependencies = [
14
14
  "pypdf>=4.0.0",
15
- "pandas>=2.0",
16
15
  "rich>=13.0",
17
16
  "typer>=0.12",
18
17
  "pydantic>=2.5",
@@ -0,0 +1,261 @@
1
+ """Public helpers for programmatic use of the signature detection engine."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contextlib import contextmanager
6
+ from pathlib import Path
7
+ from typing import Any, Generator, Iterable, Iterator, Literal
8
+
9
+ from sigdetect.config import DetectConfiguration
10
+ from sigdetect.detector import BuildDetector, Detector, FileResult, Signature
11
+
12
+ EngineName = Literal["pypdf2", "pypdf", "pymupdf"]
13
+ ProfileName = Literal["hipaa", "retainer"]
14
+
15
+
16
+ def DetectPdf(
17
+ pdfPath: str | Path,
18
+ *,
19
+ profileName: ProfileName = "hipaa",
20
+ engineName: EngineName = "pypdf2",
21
+ includePseudoSignatures: bool = True,
22
+ recurseXObjects: bool = True,
23
+ detector: Detector | None = None,
24
+ ) -> dict[str, Any]:
25
+ """Detect signature evidence and assign roles for a single PDF."""
26
+
27
+ resolvedPath = Path(pdfPath)
28
+ activeDetector = detector or get_detector(
29
+ pdfRoot=resolvedPath.parent,
30
+ profileName=profileName,
31
+ engineName=engineName,
32
+ includePseudoSignatures=includePseudoSignatures,
33
+ recurseXObjects=recurseXObjects,
34
+ outputDirectory=None,
35
+ )
36
+
37
+ result = activeDetector.Detect(resolvedPath)
38
+ return _ToPlainDictionary(result)
39
+
40
+
41
+ def get_detector(
42
+ *,
43
+ pdfRoot: str | Path | None = None,
44
+ profileName: ProfileName = "hipaa",
45
+ engineName: EngineName = "pypdf2",
46
+ includePseudoSignatures: bool = True,
47
+ recurseXObjects: bool = True,
48
+ outputDirectory: str | Path | None = None,
49
+ ) -> Detector:
50
+ """Return a reusable detector instance configured with the supplied options."""
51
+
52
+ configuration = DetectConfiguration(
53
+ PdfRoot=Path(pdfRoot) if pdfRoot is not None else Path.cwd(),
54
+ OutputDirectory=Path(outputDirectory) if outputDirectory is not None else None,
55
+ Engine=engineName,
56
+ PseudoSignatures=includePseudoSignatures,
57
+ RecurseXObjects=recurseXObjects,
58
+ Profile=profileName,
59
+ )
60
+ return BuildDetector(configuration)
61
+
62
+
63
+ def _ToPlainDictionary(candidate: Any) -> dict[str, Any]:
64
+ """Convert pydantic/dataclass instances to plain dictionaries."""
65
+
66
+ if hasattr(candidate, "to_dict"):
67
+ return candidate.to_dict()
68
+ if hasattr(candidate, "model_dump"):
69
+ return candidate.model_dump() # type: ignore[attr-defined]
70
+ if hasattr(candidate, "dict"):
71
+ return candidate.dict() # type: ignore[attr-defined]
72
+ try:
73
+ from dataclasses import asdict, is_dataclass
74
+
75
+ if is_dataclass(candidate):
76
+ return asdict(candidate)
77
+ except Exception:
78
+ pass
79
+ if isinstance(candidate, dict):
80
+ return {key: _ToPlainValue(candidate[key]) for key in candidate}
81
+ raise TypeError(f"Unsupported result type: {type(candidate)!r}")
82
+
83
+
84
+ def _ToPlainValue(value: Any) -> Any:
85
+ """Best effort conversion for nested structures."""
86
+
87
+ if hasattr(value, "to_dict"):
88
+ return value.to_dict()
89
+ if hasattr(value, "model_dump") or hasattr(value, "dict"):
90
+ return _ToPlainDictionary(value)
91
+ try:
92
+ from dataclasses import asdict, is_dataclass
93
+
94
+ if is_dataclass(value):
95
+ return asdict(value)
96
+ except Exception:
97
+ pass
98
+ if isinstance(value, list):
99
+ return [_ToPlainValue(item) for item in value]
100
+ if isinstance(value, tuple):
101
+ return tuple(_ToPlainValue(item) for item in value)
102
+ if isinstance(value, dict):
103
+ return {key: _ToPlainValue(result) for key, result in value.items()}
104
+ return value
105
+
106
+
107
+ def DetectMany(
108
+ pdfPaths: Iterable[str | Path],
109
+ *,
110
+ detector: Detector | None = None,
111
+ **kwargs: Any,
112
+ ) -> Iterator[dict[str, Any]]:
113
+ """Yield :func:`DetectPdf` results for each path in ``pdfPaths``."""
114
+
115
+ if detector is not None:
116
+ for pdfPath in pdfPaths:
117
+ yield _DetectWithDetector(detector, pdfPath)
118
+ return
119
+
120
+ for pdfPath in pdfPaths:
121
+ yield DetectPdf(pdfPath, **kwargs)
122
+
123
+
124
+ def ScanDirectory(
125
+ pdfRoot: str | Path,
126
+ *,
127
+ globPattern: str = "**/*.pdf",
128
+ detector: Detector | None = None,
129
+ **kwargs: Any,
130
+ ) -> Iterator[dict[str, Any]]:
131
+ """Walk ``pdfRoot`` and yield detection output for every matching PDF."""
132
+
133
+ rootDirectory = Path(pdfRoot)
134
+ if globPattern == "**/*.pdf":
135
+ iterator = (path for path in rootDirectory.rglob("*") if path.is_file())
136
+ else:
137
+ iterator = (
138
+ rootDirectory.rglob(globPattern.replace("**/", "", 1))
139
+ if globPattern.startswith("**/")
140
+ else rootDirectory.glob(globPattern)
141
+ )
142
+
143
+ for pdfPath in iterator:
144
+ if pdfPath.is_file() and pdfPath.suffix.lower() == ".pdf":
145
+ yield DetectPdf(pdfPath, detector=detector, **kwargs)
146
+
147
+
148
+ def ToCsvRow(result: dict[str, Any]) -> dict[str, Any]:
149
+ """Return a curated subset of keys suitable for CSV export."""
150
+
151
+ return {
152
+ "file": result.get("file"),
153
+ "size_kb": result.get("size_kb"),
154
+ "pages": result.get("pages"),
155
+ "esign_found": result.get("esign_found"),
156
+ "scanned_pdf": result.get("scanned_pdf"),
157
+ "mixed": result.get("mixed"),
158
+ "sig_count": result.get("sig_count"),
159
+ "sig_pages": result.get("sig_pages"),
160
+ "roles": result.get("roles"),
161
+ "hints": result.get("hints"),
162
+ }
163
+
164
+
165
+ def Version() -> str:
166
+ """Expose the installed package version without importing the CLI stack."""
167
+
168
+ try:
169
+ from importlib.metadata import version as resolveVersion
170
+
171
+ return resolveVersion("sigdetect")
172
+ except Exception:
173
+ return "0.0.0-dev"
174
+
175
+
176
+ def _DetectWithDetector(detector: Detector, pdfPath: str | Path) -> dict[str, Any]:
177
+ """Helper that runs ``detector`` and returns the plain dictionary result."""
178
+
179
+ resolvedPath = Path(pdfPath)
180
+ return _ToPlainDictionary(detector.Detect(resolvedPath))
181
+
182
+
183
+ @contextmanager
184
+ def detector_context(**kwargs: Any) -> Generator[Detector, None, None]:
185
+ """Context manager wrapper around :func:`get_detector`."""
186
+
187
+ detector = get_detector(**kwargs)
188
+ try:
189
+ yield detector
190
+ finally:
191
+ pass
192
+
193
+
194
+ def CropSignatureImages(
195
+ pdfPath: str | Path,
196
+ fileResult: FileResult | dict[str, Any],
197
+ *,
198
+ outputDirectory: str | Path,
199
+ dpi: int = 200,
200
+ ) -> list[Path]:
201
+ """Crop detected signature regions to PNG files.
202
+
203
+ Accepts either a :class:`FileResult` instance or the ``dict`` returned by
204
+ :func:`DetectPdf`. Requires the optional ``pymupdf`` dependency.
205
+ """
206
+
207
+ from sigdetect.cropping import crop_signatures
208
+
209
+ file_result_obj, original_dict = _CoerceFileResult(fileResult)
210
+ paths = crop_signatures(
211
+ pdf_path=Path(pdfPath),
212
+ file_result=file_result_obj,
213
+ output_dir=Path(outputDirectory),
214
+ dpi=dpi,
215
+ )
216
+ if original_dict is not None:
217
+ original_dict.clear()
218
+ original_dict.update(file_result_obj.to_dict())
219
+ return paths
220
+
221
+
222
+ def _CoerceFileResult(
223
+ candidate: FileResult | dict[str, Any]
224
+ ) -> tuple[FileResult, dict[str, Any] | None]:
225
+ if isinstance(candidate, FileResult):
226
+ return candidate, None
227
+ if not isinstance(candidate, dict):
228
+ raise TypeError("fileResult must be FileResult or dict")
229
+
230
+ signatures: list[Signature] = []
231
+ for entry in candidate.get("signatures") or []:
232
+ bbox = entry.get("bounding_box")
233
+ signatures.append(
234
+ Signature(
235
+ Page=entry.get("page"),
236
+ FieldName=str(entry.get("field_name") or ""),
237
+ Role=str(entry.get("role") or "unknown"),
238
+ Score=int(entry.get("score") or 0),
239
+ Scores=dict(entry.get("scores") or {}),
240
+ Evidence=list(entry.get("evidence") or []),
241
+ Hint=str(entry.get("hint") or ""),
242
+ RenderType=str(entry.get("render_type") or "unknown"),
243
+ BoundingBox=tuple(bbox) if bbox else None,
244
+ CropPath=entry.get("crop_path"),
245
+ )
246
+ )
247
+
248
+ file_result = FileResult(
249
+ File=str(candidate.get("file") or ""),
250
+ SizeKilobytes=candidate.get("size_kb"),
251
+ PageCount=int(candidate.get("pages") or 0),
252
+ ElectronicSignatureFound=bool(candidate.get("esign_found")),
253
+ ScannedPdf=candidate.get("scanned_pdf"),
254
+ MixedContent=candidate.get("mixed"),
255
+ SignatureCount=int(candidate.get("sig_count") or len(signatures)),
256
+ SignaturePages=str(candidate.get("sig_pages") or ""),
257
+ Roles=str(candidate.get("roles") or "unknown"),
258
+ Hints=str(candidate.get("hints") or ""),
259
+ Signatures=signatures,
260
+ )
261
+ return file_result, candidate