sigdetect 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sigdetect/api.py CHANGED
@@ -9,6 +9,7 @@ from typing import Any, Generator, Iterable, Iterator, Literal, overload
9
9
  from sigdetect.config import DetectConfiguration
10
10
  from sigdetect.cropping import SignatureCrop
11
11
  from sigdetect.detector import BuildDetector, Detector, FileResult, Signature
12
+ from sigdetect.wet_detection import apply_wet_detection
12
13
 
13
14
  EngineName = Literal["pypdf2", "pypdf", "pymupdf", "auto"]
14
15
  ProfileName = Literal["hipaa", "retainer"]
@@ -21,9 +22,13 @@ def DetectPdf(
21
22
  engineName: EngineName = "auto",
22
23
  includePseudoSignatures: bool = True,
23
24
  recurseXObjects: bool = True,
25
+ runWetDetection: bool = True,
24
26
  detector: Detector | None = None,
25
27
  ) -> dict[str, Any]:
26
- """Detect signature evidence and assign roles for a single PDF."""
28
+ """Detect signature evidence and assign roles for a single PDF.
29
+
30
+ Wet detection runs by default for non-e-sign PDFs; pass ``runWetDetection=False`` to skip OCR.
31
+ """
27
32
 
28
33
  resolvedPath = Path(pdfPath)
29
34
  activeDetector = detector or get_detector(
@@ -36,6 +41,10 @@ def DetectPdf(
36
41
  )
37
42
 
38
43
  result = activeDetector.Detect(resolvedPath)
44
+ if runWetDetection:
45
+ configuration = _ResolveConfiguration(activeDetector)
46
+ if configuration is not None:
47
+ apply_wet_detection(resolvedPath, configuration, result)
39
48
  return _ToPlainDictionary(result)
40
49
 
41
50
 
@@ -48,7 +57,10 @@ def get_detector(
48
57
  recurseXObjects: bool = True,
49
58
  outputDirectory: str | Path | None = None,
50
59
  ) -> Detector:
51
- """Return a reusable detector instance configured with the supplied options."""
60
+ """Return a reusable detector instance configured with the supplied options.
61
+
62
+ Engine selection is forced to ``auto`` (prefers PyMuPDF when available).
63
+ """
52
64
 
53
65
  configuration = DetectConfiguration(
54
66
  PdfRoot=Path(pdfRoot) if pdfRoot is not None else Path.cwd(),
@@ -108,6 +120,7 @@ def _ToPlainValue(value: Any) -> Any:
108
120
  def DetectMany(
109
121
  pdfPaths: Iterable[str | Path],
110
122
  *,
123
+ runWetDetection: bool = True,
111
124
  detector: Detector | None = None,
112
125
  **kwargs: Any,
113
126
  ) -> Iterator[dict[str, Any]]:
@@ -115,17 +128,18 @@ def DetectMany(
115
128
 
116
129
  if detector is not None:
117
130
  for pdfPath in pdfPaths:
118
- yield _DetectWithDetector(detector, pdfPath)
131
+ yield _DetectWithDetector(detector, pdfPath, runWetDetection=runWetDetection)
119
132
  return
120
133
 
121
134
  for pdfPath in pdfPaths:
122
- yield DetectPdf(pdfPath, **kwargs)
135
+ yield DetectPdf(pdfPath, runWetDetection=runWetDetection, **kwargs)
123
136
 
124
137
 
125
138
  def ScanDirectory(
126
139
  pdfRoot: str | Path,
127
140
  *,
128
141
  globPattern: str = "**/*.pdf",
142
+ runWetDetection: bool = True,
129
143
  detector: Detector | None = None,
130
144
  **kwargs: Any,
131
145
  ) -> Iterator[dict[str, Any]]:
@@ -143,7 +157,7 @@ def ScanDirectory(
143
157
 
144
158
  for pdfPath in iterator:
145
159
  if pdfPath.is_file() and pdfPath.suffix.lower() == ".pdf":
146
- yield DetectPdf(pdfPath, detector=detector, **kwargs)
160
+ yield DetectPdf(pdfPath, detector=detector, runWetDetection=runWetDetection, **kwargs)
147
161
 
148
162
 
149
163
  def ToCsvRow(result: dict[str, Any]) -> dict[str, Any]:
@@ -174,11 +188,25 @@ def Version() -> str:
174
188
  return "0.0.0-dev"
175
189
 
176
190
 
177
- def _DetectWithDetector(detector: Detector, pdfPath: str | Path) -> dict[str, Any]:
191
+ def _DetectWithDetector(
192
+ detector: Detector, pdfPath: str | Path, *, runWetDetection: bool
193
+ ) -> dict[str, Any]:
178
194
  """Helper that runs ``detector`` and returns the plain dictionary result."""
179
195
 
180
196
  resolvedPath = Path(pdfPath)
181
- return _ToPlainDictionary(detector.Detect(resolvedPath))
197
+ result = detector.Detect(resolvedPath)
198
+ if runWetDetection:
199
+ configuration = _ResolveConfiguration(detector)
200
+ if configuration is not None:
201
+ apply_wet_detection(resolvedPath, configuration, result)
202
+ return _ToPlainDictionary(result)
203
+
204
+
205
+ def _ResolveConfiguration(detector: Detector) -> DetectConfiguration | None:
206
+ configuration = getattr(detector, "Configuration", None)
207
+ if isinstance(configuration, DetectConfiguration):
208
+ return configuration
209
+ return None
182
210
 
183
211
 
184
212
  @contextmanager
@@ -201,8 +229,8 @@ def CropSignatureImages(
201
229
  dpi: int = 200,
202
230
  returnBytes: Literal[False] = False,
203
231
  saveToDisk: bool = True,
204
- ) -> list[Path]:
205
- ...
232
+ docx: bool = False,
233
+ ) -> list[Path]: ...
206
234
 
207
235
 
208
236
  @overload
@@ -214,8 +242,8 @@ def CropSignatureImages(
214
242
  dpi: int,
215
243
  returnBytes: Literal[True],
216
244
  saveToDisk: bool,
217
- ) -> list[SignatureCrop]:
218
- ...
245
+ docx: bool = False,
246
+ ) -> list[SignatureCrop]: ...
219
247
 
220
248
 
221
249
  def CropSignatureImages(
@@ -226,13 +254,17 @@ def CropSignatureImages(
226
254
  dpi: int = 200,
227
255
  returnBytes: bool = False,
228
256
  saveToDisk: bool = True,
257
+ docx: bool = False,
229
258
  ) -> list[Path] | list[SignatureCrop]:
230
- """Crop detected signature regions to PNG files.
259
+ """Create PNG files containing cropped signature images (or DOCX when enabled).
231
260
 
232
261
  Accepts either a :class:`FileResult` instance or the ``dict`` returned by
233
262
  :func:`DetectPdf`. Requires the optional ``pymupdf`` dependency.
234
263
  Set ``returnBytes=True`` to also receive in-memory PNG bytes for each crop. Set
235
264
  ``saveToDisk=False`` to skip writing PNG files while still returning in-memory data.
265
+ When ``docx`` is True, DOCX files are written instead of PNG files. When ``returnBytes`` is
266
+ True and ``docx`` is enabled, the returned :class:`SignatureCrop` objects include
267
+ ``docx_bytes``.
236
268
  """
237
269
 
238
270
  from sigdetect.cropping import crop_signatures
@@ -245,6 +277,7 @@ def CropSignatureImages(
245
277
  dpi=dpi,
246
278
  return_bytes=returnBytes,
247
279
  save_files=saveToDisk,
280
+ docx=docx,
248
281
  )
249
282
  if original_dict is not None:
250
283
  original_dict.clear()
@@ -275,6 +308,9 @@ def _CoerceFileResult(
275
308
  RenderType=str(entry.get("render_type") or "unknown"),
276
309
  BoundingBox=tuple(bbox) if bbox else None,
277
310
  CropPath=entry.get("crop_path"),
311
+ CropBytes=entry.get("crop_bytes"),
312
+ CropDocxPath=entry.get("crop_docx_path"),
313
+ CropDocxBytes=entry.get("crop_docx_bytes"),
278
314
  )
279
315
  )
280
316
 
sigdetect/cli.py CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import base64
5
6
  import json
6
7
  from collections.abc import Iterator
7
8
  from dataclasses import asdict, is_dataclass
@@ -48,6 +49,12 @@ def Detect(
48
49
  configurationPath: Path | None = typer.Option(
49
50
  None, "--config", "-c", help="Path to YAML config"
50
51
  ),
52
+ writeResults: bool | None = typer.Option(
53
+ None,
54
+ "--write-results/--no-write-results",
55
+ help="Write results.json (or JSON to stdout when out_dir is none)",
56
+ show_default=False,
57
+ ),
51
58
  profileOverride: str | None = typer.Option(None, "--profile", "-p", help="hipaa or retainer"),
52
59
  recursive: bool = typer.Option(
53
60
  True,
@@ -57,13 +64,19 @@ def Detect(
57
64
  cropSignatures: bool | None = typer.Option(
58
65
  None,
59
66
  "--crop-signatures/--no-crop-signatures",
60
- help="Crop detected signature regions to PNG files (requires PyMuPDF)",
67
+ help="Write PNG crops for signature widgets (requires PyMuPDF)",
68
+ show_default=False,
69
+ ),
70
+ cropDocx: bool | None = typer.Option(
71
+ None,
72
+ "--crop-docx/--no-crop-docx",
73
+ help="Write DOCX crops instead of PNG files (requires PyMuPDF + python-docx)",
61
74
  show_default=False,
62
75
  ),
63
76
  cropDirectory: Path | None = typer.Option(
64
77
  None,
65
78
  "--crop-dir",
66
- help="Directory for signature PNG crops (defaults to out_dir/signature_crops)",
79
+ help="Directory for signature crops (defaults to out_dir/signature_crops)",
67
80
  ),
68
81
  cropDpi: int | None = typer.Option(
69
82
  None,
@@ -73,10 +86,16 @@ def Detect(
73
86
  help="Rendering DPI for signature crops",
74
87
  show_default=False,
75
88
  ),
89
+ cropBytes: bool = typer.Option(
90
+ False,
91
+ "--crop-bytes/--no-crop-bytes",
92
+ help="Embed base64 PNG bytes (and DOCX bytes when --crop-docx) in results JSON",
93
+ show_default=False,
94
+ ),
76
95
  detectWetSignatures: bool | None = typer.Option(
77
96
  None,
78
97
  "--detect-wet/--no-detect-wet",
79
- help="Run OCR-backed wet signature detection (requires PyMuPDF + Tesseract)",
98
+ help="Compatibility flag; non-e-sign PDFs always run OCR when deps are available",
80
99
  show_default=False,
81
100
  ),
82
101
  wetOcrDpi: int | None = typer.Option(
@@ -111,8 +130,12 @@ def Detect(
111
130
  configuration = configuration.model_copy(update={"Profile": normalized_profile})
112
131
 
113
132
  overrides: dict[str, object] = {}
133
+ if writeResults is not None:
134
+ overrides["WriteResults"] = writeResults
114
135
  if cropSignatures is not None:
115
136
  overrides["CropSignatures"] = cropSignatures
137
+ if cropDocx is not None:
138
+ overrides["CropDocx"] = cropDocx
116
139
  if cropDirectory is not None:
117
140
  overrides["CropOutputDirectory"] = cropDirectory
118
141
  if cropDpi is not None:
@@ -145,53 +168,66 @@ def Detect(
145
168
  except StopIteration:
146
169
  raise SystemExit(f"No PDFs found in {configuration.PdfRoot}") from None
147
170
 
148
- results_buffer: list[FileResult] | None = [] if configuration.OutputDirectory is None else None
171
+ write_results = configuration.WriteResults
172
+ results_buffer: list[FileResult] | None = (
173
+ [] if write_results and configuration.OutputDirectory is None else None
174
+ )
149
175
  json_handle = None
150
176
  json_path: Path | None = None
151
177
  wrote_first = False
152
178
 
153
- if configuration.OutputDirectory is not None:
179
+ if write_results and configuration.OutputDirectory is not None:
154
180
  outputDirectory = configuration.OutputDirectory
155
181
  outputDirectory.mkdir(parents=True, exist_ok=True)
156
182
  json_path = outputDirectory / "results.json"
157
183
  json_handle = open(json_path, "w", encoding="utf-8")
158
184
  json_handle.write("[")
159
185
 
186
+ crop_bytes_enabled = bool(cropBytes)
160
187
  crop_dir = configuration.CropOutputDirectory
188
+ if crop_dir is None:
189
+ base_dir = configuration.OutputDirectory or configuration.PdfRoot
190
+ crop_dir = base_dir / "signature_crops"
161
191
  cropping_enabled = configuration.CropSignatures
192
+ docx_enabled = configuration.CropDocx
162
193
  cropping_available = True
163
194
  cropping_attempted = False
164
- if configuration.CropSignatures and crop_dir is None:
165
- Logger.warning(
166
- "CropSignatures enabled without an output directory",
167
- extra={"pdf_root": str(configuration.PdfRoot)},
168
- )
169
- cropping_enabled = False
170
195
 
171
196
  total_bboxes = 0
172
197
 
173
198
  def _append_result(file_result: FileResult, source_pdf: Path) -> None:
174
199
  nonlocal wrote_first, json_handle, total_bboxes, cropping_available, cropping_attempted
175
200
 
176
- if cropping_enabled and cropping_available and crop_dir is not None:
201
+ if cropping_available and (cropping_enabled or crop_bytes_enabled) and crop_dir is not None:
177
202
  try:
178
- crop_signatures(
203
+ crops = crop_signatures(
179
204
  pdf_path=source_pdf,
180
205
  file_result=file_result,
181
206
  output_dir=crop_dir,
182
207
  dpi=configuration.CropImageDpi,
183
208
  logger=Logger,
209
+ return_bytes=crop_bytes_enabled,
210
+ save_files=cropping_enabled,
211
+ docx=docx_enabled,
184
212
  )
185
213
  cropping_attempted = True
214
+ if crop_bytes_enabled:
215
+ for crop in crops:
216
+ crop.signature.CropBytes = base64.b64encode(crop.image_bytes).decode(
217
+ "ascii"
218
+ )
219
+ if crop.docx_bytes:
220
+ crop.signature.CropDocxBytes = base64.b64encode(
221
+ crop.docx_bytes
222
+ ).decode("ascii")
186
223
  except SignatureCroppingUnavailable as exc:
187
224
  cropping_available = False
188
225
  Logger.warning("Signature cropping unavailable", extra={"error": str(exc)})
189
226
  typer.echo(str(exc), err=True)
190
227
  except Exception as exc: # pragma: no cover - defensive
191
- Logger.warning(
192
- "Unexpected error while cropping signatures",
193
- extra={"error": str(exc)},
194
- )
228
+ cropping_available = False
229
+ Logger.warning("Signature cropping unavailable", extra={"error": str(exc)})
230
+ typer.echo(str(exc), err=True)
195
231
 
196
232
  total_bboxes += sum(1 for sig in file_result.Signatures if sig.BoundingBox)
197
233
 
@@ -231,18 +267,24 @@ def Detect(
231
267
  json_handle.write(closing)
232
268
  json_handle.close()
233
269
 
234
- if json_handle is not None:
235
- typer.echo(f"Wrote {json_path}")
236
- else:
237
- payload = json.dumps(
238
- results_buffer or [], indent=2, ensure_ascii=False, default=_JsonSerializer
239
- )
240
- typer.echo(payload)
241
- typer.echo("Detection completed with output disabled (out_dir=none)")
242
-
243
- if cropping_enabled and cropping_available and cropping_attempted and total_bboxes == 0:
270
+ if write_results:
271
+ if json_handle is not None:
272
+ typer.echo(f"Wrote {json_path}")
273
+ else:
274
+ payload = json.dumps(
275
+ results_buffer or [], indent=2, ensure_ascii=False, default=_JsonSerializer
276
+ )
277
+ typer.echo(payload)
278
+ typer.echo("Detection completed with output disabled (out_dir=none)")
279
+
280
+ if (
281
+ (cropping_enabled or crop_bytes_enabled)
282
+ and cropping_available
283
+ and cropping_attempted
284
+ and total_bboxes == 0
285
+ ):
244
286
  Logger.warning(
245
- "No signature bounding boxes detected; try --engine pymupdf for crop-ready output",
287
+ "No signature bounding boxes detected; install PyMuPDF for crop-ready output",
246
288
  extra={"engine": configuration.Engine},
247
289
  )
248
290
 
sigdetect/config.py CHANGED
@@ -25,11 +25,13 @@ class DetectConfiguration(BaseModel):
25
25
 
26
26
  PdfRoot: Path = Field(default=Path("hipaa_results"), alias="pdf_root")
27
27
  OutputDirectory: Path | None = Field(default=Path("out"), alias="out_dir")
28
+ WriteResults: bool = Field(default=False, alias="write_results")
28
29
  Engine: EngineName = Field(default="auto", alias="engine")
29
30
  Profile: ProfileName = Field(default="hipaa", alias="profile")
30
31
  PseudoSignatures: bool = Field(default=True, alias="pseudo_signatures")
31
32
  RecurseXObjects: bool = Field(default=True, alias="recurse_xobjects")
32
33
  CropSignatures: bool = Field(default=True, alias="crop_signatures")
34
+ CropDocx: bool = Field(default=False, alias="crop_docx")
33
35
  CropOutputDirectory: Path | None = Field(default=None, alias="crop_output_dir")
34
36
  CropImageDpi: int = Field(default=200, alias="crop_image_dpi", ge=72, le=600)
35
37
  DetectWetSignatures: bool = Field(default=True, alias="detect_wet_signatures")
@@ -63,6 +65,10 @@ class DetectConfiguration(BaseModel):
63
65
  def out_dir(self) -> Path | None: # pragma: no cover - simple passthrough
64
66
  return self.OutputDirectory
65
67
 
68
+ @property
69
+ def write_results(self) -> bool: # pragma: no cover - simple passthrough
70
+ return self.WriteResults
71
+
66
72
  @property
67
73
  def engine(self) -> EngineName: # pragma: no cover - simple passthrough
68
74
  return self.Engine
@@ -83,6 +89,10 @@ class DetectConfiguration(BaseModel):
83
89
  def crop_signatures(self) -> bool: # pragma: no cover - simple passthrough
84
90
  return self.CropSignatures
85
91
 
92
+ @property
93
+ def crop_docx(self) -> bool: # pragma: no cover - simple passthrough
94
+ return self.CropDocx
95
+
86
96
  @property
87
97
  def crop_output_dir(self) -> Path | None: # pragma: no cover - simple passthrough
88
98
  return self.CropOutputDirectory
@@ -128,6 +138,7 @@ def LoadConfiguration(path: Path | None) -> DetectConfiguration:
128
138
  env_out_dir = os.getenv("SIGDETECT_OUT_DIR")
129
139
  env_profile = os.getenv("SIGDETECT_PROFILE")
130
140
  env_crop = os.getenv("SIGDETECT_CROP_SIGNATURES")
141
+ env_crop_docx = os.getenv("SIGDETECT_CROP_DOCX")
131
142
  env_crop_dir = os.getenv("SIGDETECT_CROP_DIR")
132
143
  env_crop_dpi = os.getenv("SIGDETECT_CROP_DPI")
133
144
  env_detect_wet = os.getenv("SIGDETECT_DETECT_WET")
@@ -154,6 +165,12 @@ def LoadConfiguration(path: Path | None) -> DetectConfiguration:
154
165
  raw_data["crop_signatures"] = True
155
166
  elif lowered in {"0", "false", "no", "off"}:
156
167
  raw_data["crop_signatures"] = False
168
+ if env_crop_docx is not None:
169
+ lowered = env_crop_docx.lower()
170
+ if lowered in {"1", "true", "yes", "on"}:
171
+ raw_data["crop_docx"] = True
172
+ elif lowered in {"0", "false", "no", "off"}:
173
+ raw_data["crop_docx"] = False
157
174
  if env_crop_dir:
158
175
  raw_data["crop_output_dir"] = env_crop_dir
159
176
  if env_crop_dpi:
sigdetect/cropping.py CHANGED
@@ -1,7 +1,8 @@
1
- """Helpers for converting signature bounding boxes into PNG crops."""
1
+ """Helpers for converting signature bounding boxes into PNG or DOCX crops."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import io
5
6
  import logging
6
7
  import re
7
8
  from dataclasses import dataclass
@@ -16,18 +17,28 @@ try: # pragma: no cover - optional dependency
16
17
  except Exception: # pragma: no cover - optional dependency
17
18
  fitz = None # type: ignore[misc]
18
19
 
20
+ try: # pragma: no cover - optional dependency
21
+ from docx import Document # type: ignore
22
+ except Exception: # pragma: no cover - optional dependency
23
+ Document = None # type: ignore[assignment]
24
+
19
25
 
20
26
  class SignatureCroppingUnavailable(RuntimeError):
21
27
  """Raised when PNG cropping cannot be performed (e.g., PyMuPDF missing)."""
22
28
 
23
29
 
30
+ class SignatureDocxUnavailable(SignatureCroppingUnavailable):
31
+ """Raised when DOCX creation cannot be performed (e.g., python-docx missing)."""
32
+
33
+
24
34
  @dataclass(slots=True)
25
35
  class SignatureCrop:
26
- """PNG crop metadata and in-memory content."""
36
+ """Crop metadata and in-memory content."""
27
37
 
28
38
  path: Path
29
39
  image_bytes: bytes
30
40
  signature: Signature
41
+ docx_bytes: bytes | None = None
31
42
  saved_to_disk: bool = True
32
43
 
33
44
 
@@ -41,8 +52,8 @@ def crop_signatures(
41
52
  logger: logging.Logger | None = None,
42
53
  return_bytes: Literal[False] = False,
43
54
  save_files: bool = True,
44
- ) -> list[Path]:
45
- ...
55
+ docx: bool = False,
56
+ ) -> list[Path]: ...
46
57
 
47
58
 
48
59
  @overload
@@ -55,8 +66,8 @@ def crop_signatures(
55
66
  logger: logging.Logger | None = None,
56
67
  return_bytes: Literal[True],
57
68
  save_files: bool = True,
58
- ) -> list[SignatureCrop]:
59
- ...
69
+ docx: bool = False,
70
+ ) -> list[SignatureCrop]: ...
60
71
 
61
72
 
62
73
  def crop_signatures(
@@ -68,16 +79,19 @@ def crop_signatures(
68
79
  logger: logging.Logger | None = None,
69
80
  return_bytes: bool = False,
70
81
  save_files: bool = True,
82
+ docx: bool = False,
71
83
  ) -> list[Path] | list[SignatureCrop]:
72
- """Render each signature bounding box to a PNG image using PyMuPDF.
84
+ """Render each signature bounding box to a PNG image and optionally wrap it in DOCX.
73
85
 
74
86
  Set ``return_bytes=True`` to collect in-memory PNG bytes for each crop while also writing
75
87
  the files to ``output_dir``. Set ``save_files=False`` to skip writing PNGs to disk.
88
+ When ``docx=True``, DOCX files are written instead of PNGs. When ``return_bytes`` is True
89
+ and ``docx=True``, ``SignatureCrop.docx_bytes`` will contain the DOCX payload.
76
90
  """
77
91
 
78
92
  if fitz is None: # pragma: no cover - exercised when dependency absent
79
93
  raise SignatureCroppingUnavailable(
80
- "PyMuPDF is required for PNG crops. Install 'pymupdf' or 'sigdetect[pymupdf]'."
94
+ "PyMuPDF is required for PNG crops. Install 'pymupdf' or add it to your environment."
81
95
  )
82
96
  if not save_files and not return_bytes:
83
97
  raise ValueError("At least one of save_files or return_bytes must be True")
@@ -89,6 +103,13 @@ def crop_signatures(
89
103
  generated_paths: list[Path] = []
90
104
  generated_crops: list[SignatureCrop] = []
91
105
 
106
+ docx_enabled = docx
107
+ docx_available = Document is not None
108
+ if docx_enabled and not docx_available:
109
+ raise SignatureDocxUnavailable(
110
+ "python-docx is required to generate DOCX outputs for signature crops."
111
+ )
112
+
92
113
  with fitz.open(pdf_path) as document: # type: ignore[attr-defined]
93
114
  per_document_dir = output_dir / pdf_path.stem
94
115
  if save_files:
@@ -118,14 +139,15 @@ def crop_signatures(
118
139
  continue
119
140
 
120
141
  filename = _build_filename(index, signature)
121
- destination = per_document_dir / filename
142
+ png_destination = per_document_dir / filename
143
+ docx_destination = png_destination.with_suffix(".docx")
122
144
 
123
145
  try:
124
146
  image_bytes: bytes | None = None
125
147
  pixmap = page.get_pixmap(matrix=matrix, clip=clip, alpha=False)
126
- if save_files:
127
- pixmap.save(destination)
128
- if return_bytes:
148
+ if save_files and not docx_enabled:
149
+ pixmap.save(png_destination)
150
+ if return_bytes or docx_enabled:
129
151
  image_bytes = pixmap.tobytes("png")
130
152
  except Exception as exc: # pragma: no cover - defensive
131
153
  if logger:
@@ -140,17 +162,46 @@ def crop_signatures(
140
162
  )
141
163
  continue
142
164
 
165
+ docx_bytes: bytes | None = None
166
+ if docx_enabled:
167
+ if image_bytes is None: # pragma: no cover - defensive
168
+ continue
169
+ try:
170
+ docx_bytes = _build_docx_bytes(image_bytes)
171
+ if save_files:
172
+ docx_destination.write_bytes(docx_bytes)
173
+ except SignatureDocxUnavailable as exc:
174
+ if logger:
175
+ logger.warning(
176
+ "Signature DOCX output unavailable",
177
+ extra={"error": str(exc)},
178
+ )
179
+ docx_available = False
180
+ except Exception as exc: # pragma: no cover - defensive
181
+ if logger:
182
+ logger.warning(
183
+ "Failed to write signature DOCX",
184
+ extra={"file": pdf_path.name, "error": str(exc)},
185
+ )
186
+
143
187
  if save_files:
144
- signature.CropPath = str(destination)
145
- generated_paths.append(destination)
188
+ if docx_enabled:
189
+ signature.CropPath = None
190
+ signature.CropDocxPath = str(docx_destination)
191
+ generated_paths.append(docx_destination)
192
+ else:
193
+ signature.CropDocxPath = None
194
+ signature.CropPath = str(png_destination)
195
+ generated_paths.append(png_destination)
146
196
  if return_bytes:
147
197
  if image_bytes is None: # pragma: no cover - defensive
148
198
  continue
149
199
  generated_crops.append(
150
200
  SignatureCrop(
151
- path=destination,
201
+ path=docx_destination if docx_enabled else png_destination,
152
202
  image_bytes=image_bytes,
153
203
  signature=signature,
204
+ docx_bytes=docx_bytes,
154
205
  saved_to_disk=save_files,
155
206
  )
156
207
  )
@@ -158,6 +209,18 @@ def crop_signatures(
158
209
  return generated_crops if return_bytes else generated_paths
159
210
 
160
211
 
212
+ def _build_docx_bytes(image_bytes: bytes) -> bytes:
213
+ if Document is None:
214
+ raise SignatureDocxUnavailable(
215
+ "python-docx is required to generate DOCX outputs for signature crops."
216
+ )
217
+ document = Document()
218
+ document.add_picture(io.BytesIO(image_bytes))
219
+ buffer = io.BytesIO()
220
+ document.save(buffer)
221
+ return buffer.getvalue()
222
+
223
+
161
224
  def _to_clip_rect(page, bbox: tuple[float, float, float, float]):
162
225
  width = float(page.rect.width)
163
226
  height = float(page.rect.height)
@@ -22,10 +22,13 @@ ENGINE_REGISTRY: dict[str, Type[Detector]] = {
22
22
  ENGINE_REGISTRY.setdefault("pypdf", PyPDF2Detector)
23
23
 
24
24
  try: # pragma: no cover - optional dependency
25
- from .pymupdf_engine import PyMuPDFDetector # type: ignore
25
+ from .pymupdf_engine import PyMuPDFDetector
26
+ from .pymupdf_engine import fitz as pymupdf_fitz # type: ignore
26
27
 
27
- if getattr(PyMuPDFDetector, "Name", None):
28
+ if pymupdf_fitz is not None and getattr(PyMuPDFDetector, "Name", None):
28
29
  ENGINE_REGISTRY[PyMuPDFDetector.Name] = PyMuPDFDetector
30
+ else:
31
+ PyMuPDFDetector = None # type: ignore
29
32
  except Exception:
30
33
  PyMuPDFDetector = None # type: ignore
31
34
 
@@ -33,17 +36,16 @@ except Exception:
33
36
  def BuildDetector(configuration: DetectConfiguration) -> Detector:
34
37
  """Instantiate the configured engine or raise a clear error."""
35
38
 
36
- engine_name = (
37
- getattr(configuration, "Engine", None)
38
- or getattr(configuration, "engine", None)
39
- or PyPDF2Detector.Name
40
- )
39
+ # Force geometry-capable engine selection (auto prefers PyMuPDF when available).
40
+ engine_name = "auto"
41
41
  normalized = str(engine_name).lower()
42
42
 
43
43
  if normalized == "auto":
44
44
  detector_cls: Type[Detector] | None = None
45
45
  if PyMuPDFDetector is not None:
46
- detector_cls = ENGINE_REGISTRY.get(getattr(PyMuPDFDetector, "Name", "")) or PyMuPDFDetector
46
+ detector_cls = (
47
+ ENGINE_REGISTRY.get(getattr(PyMuPDFDetector, "Name", "")) or PyMuPDFDetector
48
+ )
47
49
  if detector_cls is None:
48
50
  detector_cls = ENGINE_REGISTRY.get(PyPDF2Detector.Name) or ENGINE_REGISTRY.get("pypdf")
49
51
  warnings.warn(
@@ -30,8 +30,8 @@ class PyMuPDFDetector(PyPDF2Detector):
30
30
  def __init__(self, configuration):
31
31
  if fitz is None: # pragma: no cover - optional dependency
32
32
  raise ValueError(
33
- "PyMuPDF engine requires the optional 'pymupdf' dependency. Install via 'pip install "
34
- "sigdetect[pymupdf]' or add pymupdf to your environment."
33
+ "PyMuPDF engine requires the optional 'pymupdf' dependency. Install 'pymupdf' or add "
34
+ "it to your environment."
35
35
  )
36
36
  super().__init__(configuration)
37
37
 
@@ -20,6 +20,9 @@ class Signature:
20
20
  RenderType: str = "typed"
21
21
  BoundingBox: tuple[float, float, float, float] | None = None
22
22
  CropPath: str | None = None
23
+ CropBytes: str | None = None
24
+ CropDocxPath: str | None = None
25
+ CropDocxBytes: str | None = None
23
26
 
24
27
  def to_dict(self) -> dict[str, Any]:
25
28
  """Return the legacy snake_case representation used in JSON payloads."""
@@ -35,4 +38,7 @@ class Signature:
35
38
  "render_type": self.RenderType,
36
39
  "bounding_box": list(self.BoundingBox) if self.BoundingBox else None,
37
40
  "crop_path": self.CropPath,
41
+ "crop_bytes": self.CropBytes,
42
+ "crop_docx_path": self.CropDocxPath,
43
+ "crop_docx_bytes": self.CropDocxBytes,
38
44
  }
@@ -67,11 +67,7 @@ class OcrLine:
67
67
  def should_run_wet_pipeline(file_result: FileResult) -> bool:
68
68
  """Return ``True`` when the OCR pipeline should run for ``file_result``."""
69
69
 
70
- return (
71
- (not file_result.ElectronicSignatureFound or file_result.SignatureCount == 0)
72
- or (bool(file_result.ScannedPdf) and not file_result.ElectronicSignatureFound)
73
- or bool(file_result.MixedContent)
74
- )
70
+ return not bool(file_result.ElectronicSignatureFound)
75
71
 
76
72
 
77
73
  def apply_wet_detection(
@@ -83,8 +79,6 @@ def apply_wet_detection(
83
79
  ) -> bool:
84
80
  """Augment ``file_result`` with OCR-detected wet signatures when possible."""
85
81
 
86
- if not configuration.DetectWetSignatures:
87
- return False
88
82
  if not should_run_wet_pipeline(file_result):
89
83
  return False
90
84
 
@@ -96,6 +90,8 @@ def apply_wet_detection(
96
90
  logger.warning("Wet detection unavailable", extra={"error": str(exc)})
97
91
  return False
98
92
 
93
+ original_esign = file_result.ElectronicSignatureFound
94
+ original_mixed = file_result.MixedContent
99
95
  try:
100
96
  added = _detect(pdf_path, configuration, file_result, logger=logger)
101
97
  if not added:
@@ -106,6 +102,9 @@ def apply_wet_detection(
106
102
  if logger:
107
103
  logger.warning("Wet detection failed", extra={"error": str(exc)})
108
104
  return False
105
+ finally:
106
+ file_result.ElectronicSignatureFound = original_esign
107
+ file_result.MixedContent = original_mixed
109
108
 
110
109
 
111
110
  def _detect(
@@ -138,6 +137,7 @@ def _detect(
138
137
  )
139
138
  )
140
139
  candidates.extend(_image_candidates(page))
140
+ candidates = _filter_candidates_for_page(candidates)
141
141
  accepted = [
142
142
  candidate
143
143
  for candidate in candidates
@@ -157,7 +157,11 @@ def _detect(
157
157
  if not new_signatures:
158
158
  return False
159
159
 
160
- file_result.Signatures.extend(new_signatures)
160
+ filtered_signatures = _dedupe_wet_signatures(new_signatures)
161
+ if not filtered_signatures:
162
+ return False
163
+
164
+ file_result.Signatures.extend(filtered_signatures)
161
165
  _refresh_metadata(file_result)
162
166
  return True
163
167
  finally:
@@ -277,6 +281,31 @@ def _build_candidates(
277
281
  )
278
282
 
279
283
 
284
+ def _has_evidence(candidate: WetCandidate, token: str) -> bool:
285
+ return token in candidate.Evidence
286
+
287
+
288
+ def _is_image_candidate(candidate: WetCandidate) -> bool:
289
+ return _has_evidence(candidate, "image_signature:true")
290
+
291
+
292
+ def _has_stroke(candidate: WetCandidate) -> bool:
293
+ return _has_evidence(candidate, "stroke:yes")
294
+
295
+
296
+ def _filter_candidates_for_page(candidates: Sequence[WetCandidate]) -> list[WetCandidate]:
297
+ if not candidates:
298
+ return []
299
+ has_image = any(_is_image_candidate(candidate) for candidate in candidates)
300
+ if not has_image:
301
+ return list(candidates)
302
+ return [
303
+ candidate
304
+ for candidate in candidates
305
+ if _is_image_candidate(candidate) or _has_stroke(candidate)
306
+ ]
307
+
308
+
280
309
  def _infer_role(normalized_text: str) -> str:
281
310
  for role, keywords in ROLE_KEYWORDS.items():
282
311
  if any(keyword in normalized_text for keyword in keywords):
@@ -379,7 +408,7 @@ def _image_candidates(page) -> list[WetCandidate]:
379
408
  continue
380
409
  if hasattr(rect, "x0"):
381
410
  x0, y0, x1, y1 = float(rect.x0), float(rect.y0), float(rect.x1), float(rect.y1)
382
- elif isinstance(rect, (tuple, list)) and len(rect) == 4:
411
+ elif isinstance(rect, tuple | list) and len(rect) == 4:
383
412
  x0, y0, x1, y1 = map(float, rect)
384
413
  else:
385
414
  continue
@@ -422,7 +451,7 @@ def _infer_role_nearby(rect, words) -> str:
422
451
  proximity_x = 140.0
423
452
  if hasattr(rect, "x0"):
424
453
  rx0, ry0, rx1, ry1 = float(rect.x0), float(rect.y0), float(rect.x1), float(rect.y1)
425
- elif isinstance(rect, (tuple, list)) and len(rect) == 4:
454
+ elif isinstance(rect, tuple | list) and len(rect) == 4:
426
455
  rx0, ry0, rx1, ry1 = map(float, rect)
427
456
  else:
428
457
  return "unknown"
@@ -471,6 +500,29 @@ def _to_signatures(
471
500
  return signatures
472
501
 
473
502
 
503
+ def _signature_rank(signature: Signature) -> tuple[int, int, int]:
504
+ evidence = set(signature.Evidence or [])
505
+ if "image_signature:true" in evidence:
506
+ source_rank = 3
507
+ elif "stroke:yes" in evidence:
508
+ source_rank = 2
509
+ else:
510
+ source_rank = 1
511
+ return (source_rank, int(signature.Score or 0), int(signature.Page or 0))
512
+
513
+
514
+ def _dedupe_wet_signatures(signatures: Sequence[Signature]) -> list[Signature]:
515
+ best_by_role: dict[str, Signature] = {}
516
+ for signature in signatures:
517
+ role = (signature.Role or "unknown").strip().lower()
518
+ if role == "unknown":
519
+ continue
520
+ existing = best_by_role.get(role)
521
+ if existing is None or _signature_rank(signature) > _signature_rank(existing):
522
+ best_by_role[role] = signature
523
+ return sorted(best_by_role.values(), key=lambda sig: (int(sig.Page or 0), sig.Role or ""))
524
+
525
+
474
526
  def _mark_manual_review(file_result: FileResult, reason: str) -> None:
475
527
  hints = _split_hints(file_result.Hints)
476
528
  hints.add(f"ManualReview:{reason}")
@@ -485,9 +537,7 @@ def _refresh_metadata(file_result: FileResult) -> None:
485
537
  if roles:
486
538
  file_result.Roles = ";".join(roles)
487
539
  file_result.ElectronicSignatureFound = file_result.SignatureCount > 0
488
- file_result.MixedContent = (
489
- file_result.ElectronicSignatureFound and bool(file_result.ScannedPdf)
490
- )
540
+ file_result.MixedContent = file_result.ElectronicSignatureFound and bool(file_result.ScannedPdf)
491
541
  hints = _split_hints(file_result.Hints)
492
542
  hints |= {sig.Hint for sig in file_result.Signatures if sig.Hint}
493
543
  file_result.Hints = ";".join(sorted(hints))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sigdetect
3
- Version: 0.4.0
3
+ Version: 0.5.1
4
4
  Summary: Signature detection and role attribution for PDFs
5
5
  Author-email: BT Asmamaw <basmamaw@angeiongroup.com>
6
6
  License: MIT
@@ -10,9 +10,11 @@ Requires-Dist: pypdf>=4.0.0
10
10
  Requires-Dist: rich>=13.0
11
11
  Requires-Dist: typer>=0.12
12
12
  Requires-Dist: pydantic>=2.5
13
+ Requires-Dist: pillow>=10.0
14
+ Requires-Dist: python-docx>=1.1.0
15
+ Requires-Dist: pytesseract>=0.3.10
16
+ Requires-Dist: pymupdf>=1.23
13
17
  Requires-Dist: pyyaml>=6.0
14
- Provides-Extra: pymupdf
15
- Requires-Dist: pymupdf>=1.23; extra == "pymupdf"
16
18
 
17
19
  # CaseWorks.Automation.CaseDocumentIntake
18
20
 
@@ -95,14 +97,16 @@ sigdetect detect \
95
97
  ### Notes
96
98
 
97
99
  - The config file controls `pdf_root`, `out_dir`, `engine`, `pseudo_signatures`, `recurse_xobjects`, etc.
98
- - `--engine` accepts **auto** (default; prefers PyMuPDF when installed, falls back to PyPDF2), **pypdf2**, or **pymupdf**.
100
+ - Engine selection is forced to **auto** (prefers PyMuPDF for geometry, falls back to PyPDF2); any configured `engine` value is overridden.
99
101
  - `--pseudo-signatures` enables a vendor/Acro-only pseudo-signature when no actual `/Widget` is present (useful for DocuSign / Acrobat Sign receipts).
100
102
  - `--recurse-xobjects` allows scanning Form XObjects for vendor markers and labels embedded in page resources.
101
103
  - `--profile` selects tuned role logic:
102
104
  - `hipaa` → patient / representative / attorney
103
105
  - `retainer` → client / firm (prefers detecting two signatures)
104
106
  - `--recursive/--no-recursive` toggles whether `sigdetect detect` descends into subdirectories when hunting for PDFs (recursive by default).
105
- - Cropping (`--crop-signatures`) and wet detection (`--detect-wet`) are enabled by default for single-pass runs; disable them if you want a light, e-sign-only pass. PyMuPDF is required for crops; PyMuPDF + Tesseract are required for wet detection.
107
+ - Results output is disabled by default; set `write_results: true` or pass `--write-results` when you need `results.json` (for EDA).
108
+ - Cropping (`--crop-signatures`) writes PNG crops to disk by default; enable `--crop-docx` to write DOCX files instead of PNGs. `--crop-bytes` embeds base64 PNG data in `signatures[].crop_bytes` and, when `--crop-docx` is enabled, embeds DOCX bytes in `signatures[].crop_docx_bytes`. PyMuPDF is required for crops, and `python-docx` is required for DOCX output.
109
+ - Wet detection runs automatically for non-e-sign PDFs when dependencies are available; missing OCR dependencies add a `ManualReview:*` hint instead of failing. PyMuPDF + Tesseract are required for wet detection.
106
110
  - If the executable is not on `PATH`, you can always fall back to `python -m sigdetect.cli ...`.
107
111
 
108
112
  ### EDA (quick aggregate stats)
@@ -113,6 +117,8 @@ sigdetect eda \
113
117
 
114
118
  ~~~
115
119
 
120
+ `sigdetect eda` expects `results.json`; enable `write_results: true` when running detect.
121
+
116
122
  ---
117
123
 
118
124
  ## Library usage
@@ -136,13 +142,13 @@ result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
136
142
  print(result.to_dict())
137
143
  ~~~
138
144
 
139
- `Detect(Path)` returns a **FileResult** dataclass; call `.to_dict()` for the JSON-friendly representation (see [Result schema](#result-schema)). Each signature entry now exposes `bounding_box` coordinates (PDF points, origin bottom-left). When PNG cropping is enabled, `crop_path` points at the generated image. Use `Engine="auto"` if you want the single-pass defaults that prefer PyMuPDF (for geometry) when available.
145
+ `Detect(Path)` returns a **FileResult** dataclass; call `.to_dict()` for the JSON-friendly representation (see [Result schema](#result-schema)). Each signature entry now exposes `bounding_box` coordinates (PDF points, origin bottom-left). When PNG cropping is enabled, `crop_path` points at the generated image; when DOCX cropping is enabled, `crop_docx_path` points at the generated doc. Use `Engine="auto"` if you want the single-pass defaults that prefer PyMuPDF (for geometry) when available.
140
146
 
141
147
  ---
142
148
 
143
149
  ## Library API (embed in another script)
144
150
 
145
- Minimal, plug-and-play API that returns plain dicts (JSON-ready) without side effects unless you opt into cropping:
151
+ Minimal, plug-and-play API that returns plain dicts (JSON-ready) without side effects unless you opt into cropping. Engine selection is forced to `auto` (PyMuPDF preferred) to ensure geometry. Wet detection runs automatically for non-e-sign PDFs; pass `runWetDetection=False` to skip OCR.
146
152
 
147
153
  ~~~python
148
154
  from pathlib import Path
@@ -165,6 +171,7 @@ result = DetectPdf(
165
171
  profileName="retainer",
166
172
  includePseudoSignatures=True,
167
173
  recurseXObjects=True,
174
+ # runWetDetection=False, # disable OCR-backed wet detection if desired
168
175
  )
169
176
  print(
170
177
  result["file"],
@@ -187,7 +194,7 @@ for res in ScanDirectory(
187
194
  # store in DB, print, etc.
188
195
  pass
189
196
 
190
- # 3) Crop PNG snippets for FileResult objects (requires PyMuPDF)
197
+ # 3) Crop signature snippets for FileResult objects (requires PyMuPDF; DOCX needs python-docx)
191
198
  detector = get_detector(pdfRoot="/path/to/pdfs", profileName="hipaa")
192
199
  file_result = detector.Detect(Path("/path/to/pdfs/example.pdf"))
193
200
  CropSignatureImages(
@@ -226,7 +233,8 @@ High-level summary (per file):
226
233
  "hint": "AcroSig:sig_patient",
227
234
  "render_type": "typed",
228
235
  "bounding_box": [10.0, 10.0, 150.0, 40.0],
229
- "crop_path": "signature_crops/example/sig_01_patient.png"
236
+ "crop_path": "signature_crops/example/sig_01_patient.png",
237
+ "crop_docx_path": null
230
238
  },
231
239
  {
232
240
  "page": null,
@@ -253,6 +261,9 @@ High-level summary (per file):
253
261
  - In retainer profile, emitter prefers two signatures (client + firm), often on the same page.
254
262
  - **`signatures[].bounding_box`** reports the widget rectangle in PDF points (origin bottom-left).
255
263
  - **`signatures[].crop_path`** is populated when PNG crops are generated (via CLI `--crop-signatures` or `CropSignatureImages`).
264
+ - **`signatures[].crop_docx_path`** is populated when DOCX crops are generated (`--crop-docx` or `docx=True`).
265
+ - **`signatures[].crop_bytes`** contains base64 PNG data when CLI `--crop-bytes` is enabled.
266
+ - **`signatures[].crop_docx_bytes`** contains base64 DOCX data when `--crop-docx` and `--crop-bytes` are enabled together.
256
267
 
257
268
  ---
258
269
 
@@ -274,14 +285,16 @@ You can keep one config YAML per dataset, e.g.:
274
285
  # ./sample_data/config.yml (example)
275
286
  pdf_root: ./pdfs
276
287
  out_dir: ./sigdetect_out
277
- engine: pypdf2
288
+ engine: auto
289
+ write_results: false
278
290
  pseudo_signatures: true
279
291
  recurse_xobjects: true
280
292
  profile: retainer # or: hipaa
281
293
  crop_signatures: false # enable to write PNG crops (requires pymupdf)
294
+ crop_docx: false # enable to write DOCX crops instead of PNGs (requires python-docx)
282
295
  # crop_output_dir: ./signature_crops
283
296
  crop_image_dpi: 200
284
- detect_wet_signatures: false # opt-in OCR wet detection (PyMuPDF + Tesseract)
297
+ detect_wet_signatures: false # kept for compatibility; non-e-sign PDFs still trigger OCR
285
298
  wet_ocr_dpi: 200
286
299
  wet_ocr_languages: eng
287
300
  wet_precision_threshold: 0.82
@@ -299,7 +312,7 @@ YAML files can be customized or load at runtime (see CLI `--config`, if availabl
299
312
  - Looks for client and firm labels/tokens; boosts pages with law-firm markers (LLP/LLC/PA/PC) and “By:” blocks.
300
313
  - Applies an anti-front-matter rule to reduce page-1 false positives (e.g., letterheads, firm mastheads).
301
314
  - When only vendor/Acro clues exist (no widgets), it will emit two pseudo signatures targeting likely pages.
302
- - **Wet detection (opt-in):** With `detect_wet_signatures: true`, the CLI runs an OCR-backed pass (PyMuPDF + pytesseract/Tesseract) after e-sign detection. It emits `RenderType="wet"` signatures for high-confidence label/stroke pairs in the lower page region. Missing OCR dependencies add a `ManualReview:*` hint instead of failing.
315
+ - **Wet detection (non-e-sign):** The CLI runs an OCR-backed pass (PyMuPDF + pytesseract/Tesseract) after e-sign detection whenever no e-sign evidence is found. It emits `RenderType="wet"` signatures for high-confidence label/stroke pairs in the lower page region. When an image-based signature is present on a page, label-only OCR candidates are suppressed unless a stroke is detected. Results are deduped to the top signature per role (dropping `unknown`). Missing OCR dependencies add a `ManualReview:*` hint instead of failing.
303
316
 
304
317
  ---
305
318
 
@@ -1,24 +1,24 @@
1
1
  sigdetect/__init__.py,sha256=YvnTwlC1jfq83EhQS_1JjiiHK7_wJCCU1JvHv5E1qWY,573
2
- sigdetect/api.py,sha256=qLCpbODLvw5AQMEAvpIP6kBNoc03h01ekjilg9tDxuw,9408
3
- sigdetect/cli.py,sha256=Zco3-r4MAlVEmyEatvPUOZLLamh5ELFrquAK6ovJVlw,9290
4
- sigdetect/config.py,sha256=-6GCUusdi0Ba-Rt6pwffB5MIz1ApPlBaXVKxpIppbKk,7678
5
- sigdetect/cropping.py,sha256=zwOXzkG8tt1ZPUaDhJMHfonFEZtVNZZmZOzYQ_4nUAI,6074
2
+ sigdetect/api.py,sha256=hDfa6z4SoHth1Dw9HDfSPiytMQrqu_oyBZlXBwSh9g4,11010
3
+ sigdetect/cli.py,sha256=X5GqZ-PK67vz4OHN5r7h-V0hO886ZblUiUdKDuFowtU,10930
4
+ sigdetect/config.py,sha256=3SP1rkcWBGXloCDFomBJRMRKZOvXuHQbhIBqpVrzYmY,8365
5
+ sigdetect/cropping.py,sha256=HfOJrV2Xv9Eo0lCIl3mukz49agKB6h2TML99B0qQJNc,8837
6
6
  sigdetect/eda.py,sha256=S92G1Gjmepri__D0n_V6foq0lQgH-RXI9anW8A58jfw,4681
7
7
  sigdetect/logging_setup.py,sha256=LMF8ao_a-JwH0S522T6aYTFX3e8Ajjv_5ODS2YiBcHA,6404
8
8
  sigdetect/utils.py,sha256=T9rubLf5T9JmjOHYMOba1j34fhOJaWocAXccnGTxRUE,5198
9
- sigdetect/wet_detection.py,sha256=6ciFxMQS3f1nF502w4KLTksoYmjdudzTekh7McfWiIg,16464
9
+ sigdetect/wet_detection.py,sha256=zvi11XUmm_xLZ4BLvxInwMQg8YLcyQzEYAM9QSdJOIs,18259
10
10
  sigdetect/data/role_rules.retainer.yml,sha256=IFdwKnDBXR2cTkdfrsZ6ku6CXD8S_dg5A3vKRKLW5h8,2532
11
11
  sigdetect/data/role_rules.yml,sha256=HuLKsZR_A6sD9XvY4NHiY_VG3dS5ERNCBF9-Mxawomw,2751
12
12
  sigdetect/data/vendor_patterns.yml,sha256=NRbZNQxcx_GuL6n1jAphBn6MM6ChCpeWGCsjbRx-PEo,384
13
- sigdetect/detector/__init__.py,sha256=pUVFLwqj65cVO1qjsZy6NJ9BVY5xrJ6sQe-8LAb9O_A,2421
13
+ sigdetect/detector/__init__.py,sha256=nT52mCI9s03Rso_RS86mm223rJfl5GlGDFsXwMJ3z3E,2548
14
14
  sigdetect/detector/base.py,sha256=L-iXWXqsTetDc4jRZo_wOdbNpKqOY20mX9FefrugdT0,263
15
15
  sigdetect/detector/base_detector.py,sha256=GmAgUWO_fQgIfnihZSoyhR3wpnwZ-X3hS0Kuyz4G6Ys,608
16
16
  sigdetect/detector/file_result_model.py,sha256=j2gTc9Sw3fJOHlexYsR_m5DiwHA8DzIzAMToESfvo4A,1767
17
- sigdetect/detector/pymupdf_engine.py,sha256=SGtJOStKFdfsdBrscoe5zg9u2KGJ_JTRYZ25adL_7Lw,17390
17
+ sigdetect/detector/pymupdf_engine.py,sha256=N6oxvUa-48VvvhjbMk0R0kfScsggNKS7u5FLSeBRfWw,17358
18
18
  sigdetect/detector/pypdf2_engine.py,sha256=kB8cIp_gMvCla0LIBi9sd19g0361Oc9TjCW_ZViUBJQ,47410
19
- sigdetect/detector/signature_model.py,sha256=sdfQiOJzxnrg0WkGJxZCebA0wHqgzZnLI0gOv6ipSZA,1074
20
- sigdetect-0.4.0.dist-info/METADATA,sha256=WA7OjyLtM3AH7OtdFRmliqBw0ucNlywoD2bykytlnPA,12475
21
- sigdetect-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
22
- sigdetect-0.4.0.dist-info/entry_points.txt,sha256=iqtfKjBU44-omM7Sh-idGz2ahw19oAvpvSyKZVArG3o,48
23
- sigdetect-0.4.0.dist-info/top_level.txt,sha256=PKlfwUobkRC0viwiSXmhtw83G26FSNpimWYC1Uy00FY,10
24
- sigdetect-0.4.0.dist-info/RECORD,,
19
+ sigdetect/detector/signature_model.py,sha256=T2Hmfkfz_hZsDzwOhepxfNmkedxQp3_XHdrP8yGKoCk,1322
20
+ sigdetect-0.5.1.dist-info/METADATA,sha256=_Jnyl9_A1yZUrKwWxUxVB-9rcMG3MdUqiN5WX_zlpqQ,14131
21
+ sigdetect-0.5.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
22
+ sigdetect-0.5.1.dist-info/entry_points.txt,sha256=iqtfKjBU44-omM7Sh-idGz2ahw19oAvpvSyKZVArG3o,48
23
+ sigdetect-0.5.1.dist-info/top_level.txt,sha256=PKlfwUobkRC0viwiSXmhtw83G26FSNpimWYC1Uy00FY,10
24
+ sigdetect-0.5.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5