dforge-cli 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dforge/loading.py ADDED
@@ -0,0 +1,19 @@
1
+ from rich.console import Console
2
+ from rich.status import Status
3
+
4
+ console = Console()
5
+
6
+
7
+ class Loader:
8
+ def __init__(self, text):
9
+ self.status = Status(
10
+ f"[bold cyan]{text}[/bold cyan]",
11
+ spinner="dots"
12
+ )
13
+
14
+ def __enter__(self):
15
+ self.status.start()
16
+ return self
17
+
18
+ def __exit__(self, exc_type, exc_val, exc_tb):
19
+ self.status.stop()
dforge/menu.py ADDED
@@ -0,0 +1,115 @@
1
+ import questionary
2
+
3
+
4
+ def show_menu(title: str, choices: list[str]):
5
+ return questionary.select(
6
+ title,
7
+ choices=choices,
8
+ ).ask()
9
+
10
+
11
+ def main_menu():
12
+ return show_menu(
13
+ "What would you like to do?",
14
+ [
15
+ "📄 PDF Tools",
16
+ "🔍 OCR",
17
+ "🔄 Conversion",
18
+ "🖼 Image Processing",
19
+ "⚡ Batch Processing",
20
+ "👀 Watch Folder",
21
+ "⚙ Settings",
22
+ "❌ Exit",
23
+ ],
24
+ )
25
+
26
+
27
+ def pdf_menu():
28
+ return show_menu(
29
+ "PDF Tools",
30
+ [
31
+ "Merge PDFs",
32
+ "Split PDF",
33
+ "Compress PDF",
34
+ "Rotate PDF",
35
+ "Extract Pages",
36
+ "Watermark PDF",
37
+ "Encrypt PDF",
38
+ "Decrypt PDF",
39
+ "⬅ Back",
40
+ ],
41
+ )
42
+ def ocr_menu():
43
+ return show_menu(
44
+ "OCR Tools",
45
+ [
46
+ "OCR Image/PDF",
47
+ "Searchable PDF",
48
+ "Batch OCR",
49
+ "OCR Folder",
50
+ "Extract Tables",
51
+ "OCR Settings",
52
+ "⬅ Back",
53
+ ],
54
+ )
55
+ def conversion_menu():
56
+ return show_menu(
57
+ "Conversion Tools",
58
+ [
59
+ "Markdown → PDF",
60
+ "Markdown → DOCX",
61
+ "DOCX → PDF",
62
+ "DOCX → Markdown",
63
+ "Images → PDF",
64
+ "PDF → Images",
65
+ "⬅ Back",
66
+ ],
67
+ )
68
+ def extract_menu():
69
+ return show_menu(
70
+ "Extract Tools",
71
+ [
72
+ "Extract Text",
73
+ "Extract Images",
74
+ "Extract Metadata",
75
+ "⬅ Back",
76
+ ],
77
+ )
78
+
79
+
80
+ def batch_menu():
81
+ return show_menu(
82
+ "Batch Tools",
83
+ [
84
+ "Batch Convert",
85
+ "Batch Compress",
86
+ "Batch OCR",
87
+ "⬅ Back",
88
+ ],
89
+ )
90
+
91
+
92
+ def automation_menu():
93
+ return show_menu(
94
+ "Automation Tools",
95
+ [
96
+ "Watch Folder",
97
+ "Auto OCR",
98
+ "Auto Convert",
99
+ "Scheduled Tasks",
100
+ "⬅ Back",
101
+ ],
102
+ )
103
+
104
+
105
+ def image_menu():
106
+ return show_menu(
107
+ "Image Tools",
108
+ [
109
+ "Resize Images",
110
+ "Convert Format",
111
+ "Crop Images",
112
+ "Watermark Images",
113
+ "⬅ Back",
114
+ ],
115
+ )
dforge/operations.py ADDED
@@ -0,0 +1,314 @@
1
+ """
2
+ DForge PDF Operations
3
+ Handles: merge, split, compress, rotate, page extraction, watermark, encrypt, decrypt
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import os
9
+ import subprocess
10
+ import sys
11
+ from pathlib import Path
12
+ from typing import List, Optional
13
+
14
+ from rich.progress import Progress, SpinnerColumn, TextColumn
15
+
16
+ from dforge.utils import (
17
+ abort, console, ensure_parent, ghostscript_bin,
18
+ info, require_ghostscript, resolve_output, success, warn,
19
+ )
20
+ from dforge.config import DEFAULT_COMPRESS_PRESET
21
+
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Merge
25
+ # ---------------------------------------------------------------------------
26
+
27
+ def merge(inputs: List[Path], output: Path) -> None:
28
+ """Merge multiple PDF files into one."""
29
+ try:
30
+ from pypdf import PdfWriter
31
+ except ImportError:
32
+ abort("pypdf is required. Run: pip install pypdf")
33
+
34
+ for f in inputs:
35
+ if not f.exists():
36
+ abort(f"File not found: {f}")
37
+
38
+ writer = PdfWriter()
39
+ with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console) as progress:
40
+ task = progress.add_task("Merging PDFs...", total=len(inputs))
41
+ for f in inputs:
42
+ from pypdf import PdfReader
43
+ reader = PdfReader(str(f))
44
+ for page in reader.pages:
45
+ writer.add_page(page)
46
+ progress.advance(task)
47
+
48
+ ensure_parent(output)
49
+ with open(output, "wb") as fh:
50
+ writer.write(fh)
51
+ success(f"Merged {len(inputs)} files -> [bold]{output}[/bold]")
52
+
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Split
56
+ # ---------------------------------------------------------------------------
57
+
58
+ def split(input_path: Path, output_dir: Optional[Path] = None) -> None:
59
+ """Split a PDF into individual pages."""
60
+ try:
61
+ from pypdf import PdfReader, PdfWriter
62
+ except ImportError:
63
+ abort("pypdf is required. Run: pip install pypdf")
64
+
65
+ if not input_path.exists():
66
+ abort(f"File not found: {input_path}")
67
+
68
+ dest = output_dir or input_path.parent / (input_path.stem + "_pages")
69
+ dest.mkdir(parents=True, exist_ok=True)
70
+
71
+ reader = PdfReader(str(input_path))
72
+ total = len(reader.pages)
73
+
74
+ with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console) as progress:
75
+ task = progress.add_task(f"Splitting {total} pages...", total=total)
76
+ for i, page in enumerate(reader.pages, start=1):
77
+ writer = PdfWriter()
78
+ writer.add_page(page)
79
+ out_file = dest / f"{input_path.stem}_page_{i:04d}.pdf"
80
+ with open(out_file, "wb") as fh:
81
+ writer.write(fh)
82
+ progress.advance(task)
83
+
84
+ success(f"Split into {total} pages -> [bold]{dest}/[/bold]")
85
+
86
+
87
+ # ---------------------------------------------------------------------------
88
+ # Compress
89
+ # ---------------------------------------------------------------------------
90
+
91
+ def compress(input_path: Path, output: Optional[Path] = None, preset: str = DEFAULT_COMPRESS_PRESET) -> None:
92
+ """Compress a PDF using Ghostscript."""
93
+ require_ghostscript()
94
+
95
+ if not input_path.exists():
96
+ abort(f"File not found: {input_path}")
97
+
98
+ out = output or resolve_output(input_path, None, "_compressed", ".pdf")
99
+ ensure_parent(out)
100
+
101
+ gs = ghostscript_bin()
102
+ cmd = [
103
+ gs,
104
+ "-sDEVICE=pdfwrite",
105
+ "-dCompatibilityLevel=1.4",
106
+ f"-dPDFSETTINGS=/{preset}",
107
+ "-dNOPAUSE",
108
+ "-dQUIET",
109
+ "-dBATCH",
110
+ f"-sOutputFile={out}",
111
+ str(input_path),
112
+ ]
113
+
114
+ info(f"Compressing with preset '[bold]{preset}[/bold]'...")
115
+ result = subprocess.run(cmd, capture_output=True, text=True)
116
+ if result.returncode != 0:
117
+ abort(f"Ghostscript error:\n{result.stderr}")
118
+
119
+ original_kb = input_path.stat().st_size / 1024
120
+ compressed_kb = out.stat().st_size / 1024
121
+ ratio = (1 - compressed_kb / original_kb) * 100 if original_kb > 0 else 0
122
+ success(
123
+ f"Compressed: {original_kb:.1f} KB -> {compressed_kb:.1f} KB "
124
+ f"([green]{ratio:.1f}% reduction[/green]) -> [bold]{out}[/bold]"
125
+ )
126
+
127
+
128
+ # ---------------------------------------------------------------------------
129
+ # Rotate
130
+ # ---------------------------------------------------------------------------
131
+
132
+ def rotate(input_path: Path, degrees: int, output: Optional[Path] = None) -> None:
133
+ """Rotate all pages of a PDF by the given degrees (90, 180, 270)."""
134
+ try:
135
+ from pypdf import PdfReader, PdfWriter
136
+ except ImportError:
137
+ abort("pypdf is required.")
138
+
139
+ if degrees not in (90, 180, 270):
140
+ abort("Rotation must be 90, 180, or 270 degrees.")
141
+
142
+ if not input_path.exists():
143
+ abort(f"File not found: {input_path}")
144
+
145
+ reader = PdfReader(str(input_path))
146
+ writer = PdfWriter()
147
+
148
+ for page in reader.pages:
149
+ page.rotate(degrees)
150
+ writer.add_page(page)
151
+
152
+ out = output or resolve_output(input_path, None, f"_rotated{degrees}", ".pdf")
153
+ ensure_parent(out)
154
+ with open(out, "wb") as fh:
155
+ writer.write(fh)
156
+ success(f"Rotated {len(reader.pages)} pages by {degrees} deg -> [bold]{out}[/bold]")
157
+
158
+
159
+ # ---------------------------------------------------------------------------
160
+ # Extract page range
161
+ # ---------------------------------------------------------------------------
162
+
163
+ def extract_pages(input_path: Path, page_range: str, output: Optional[Path] = None) -> None:
164
+ """
165
+ Extract a page range from a PDF.
166
+ page_range format: "1-5" or "3" or "2,4,6"
167
+ """
168
+ try:
169
+ from pypdf import PdfReader, PdfWriter
170
+ except ImportError:
171
+ abort("pypdf is required.")
172
+
173
+ if not input_path.exists():
174
+ abort(f"File not found: {input_path}")
175
+
176
+ reader = PdfReader(str(input_path))
177
+ total = len(reader.pages)
178
+
179
+ # Parse range
180
+ pages: List[int] = []
181
+ for part in page_range.split(","):
182
+ part = part.strip()
183
+ if "-" in part:
184
+ start_s, end_s = part.split("-", 1)
185
+ start, end = int(start_s), int(end_s)
186
+ pages.extend(range(start, end + 1))
187
+ else:
188
+ pages.append(int(part))
189
+
190
+ # Validate
191
+ invalid = [p for p in pages if p < 1 or p > total]
192
+ if invalid:
193
+ abort(f"Page(s) out of range (document has {total} pages): {invalid}")
194
+
195
+ writer = PdfWriter()
196
+ for p in pages:
197
+ writer.add_page(reader.pages[p - 1])
198
+
199
+ out = output or resolve_output(input_path, None, f"_pages_{page_range.replace(',', '-')}", ".pdf")
200
+ ensure_parent(out)
201
+ with open(out, "wb") as fh:
202
+ writer.write(fh)
203
+ success(f"Extracted {len(pages)} pages -> [bold]{out}[/bold]")
204
+
205
+
206
+ # ---------------------------------------------------------------------------
207
+ # Watermark
208
+ # ---------------------------------------------------------------------------
209
+
210
+ def watermark(input_path: Path, watermark_file: Path, output: Optional[Path] = None) -> None:
211
+ """Overlay a watermark (PDF or image) on every page."""
212
+ try:
213
+ from pypdf import PdfReader, PdfWriter
214
+ except ImportError:
215
+ abort("pypdf is required.")
216
+
217
+ if not input_path.exists():
218
+ abort(f"File not found: {input_path}")
219
+ if not watermark_file.exists():
220
+ abort(f"Watermark file not found: {watermark_file}")
221
+
222
+ # If watermark is an image, convert it to a single-page PDF first
223
+ wm_path = watermark_file
224
+ if watermark_file.suffix.lower() in {".png", ".jpg", ".jpeg", ".tiff", ".bmp"}:
225
+ wm_path = _image_to_pdf_watermark(watermark_file)
226
+
227
+ wm_reader = PdfReader(str(wm_path))
228
+ wm_page = wm_reader.pages[0]
229
+
230
+ reader = PdfReader(str(input_path))
231
+ writer = PdfWriter()
232
+
233
+ for page in reader.pages:
234
+ page.merge_page(wm_page)
235
+ writer.add_page(page)
236
+
237
+ out = output or resolve_output(input_path, None, "_watermarked", ".pdf")
238
+ ensure_parent(out)
239
+ with open(out, "wb") as fh:
240
+ writer.write(fh)
241
+ success(f"Watermarked {len(reader.pages)} pages -> [bold]{out}[/bold]")
242
+
243
+
244
+ def _image_to_pdf_watermark(image_path: Path) -> Path:
245
+ """Convert an image file to a temporary single-page PDF for use as a watermark."""
246
+ import tempfile
247
+ try:
248
+ import img2pdf
249
+ from PIL import Image
250
+ except ImportError:
251
+ abort("img2pdf and Pillow are required for image watermarks.")
252
+
253
+ tmp = Path(tempfile.mktemp(suffix=".pdf"))
254
+ with open(tmp, "wb") as fh:
255
+ fh.write(img2pdf.convert(str(image_path)))
256
+ return tmp
257
+
258
+
259
+ # ---------------------------------------------------------------------------
260
+ # Encrypt
261
+ # ---------------------------------------------------------------------------
262
+
263
+ def encrypt(input_path: Path, password: str, output: Optional[Path] = None) -> None:
264
+ """Encrypt a PDF with a password."""
265
+ try:
266
+ from pypdf import PdfReader, PdfWriter
267
+ except ImportError:
268
+ abort("pypdf is required.")
269
+
270
+ if not input_path.exists():
271
+ abort(f"File not found: {input_path}")
272
+
273
+ reader = PdfReader(str(input_path))
274
+ writer = PdfWriter()
275
+ for page in reader.pages:
276
+ writer.add_page(page)
277
+
278
+ writer.encrypt(password)
279
+
280
+ out = output or resolve_output(input_path, None, "_encrypted", ".pdf")
281
+ ensure_parent(out)
282
+ with open(out, "wb") as fh:
283
+ writer.write(fh)
284
+ success(f"Encrypted -> [bold]{out}[/bold]")
285
+
286
+
287
+ # ---------------------------------------------------------------------------
288
+ # Decrypt
289
+ # ---------------------------------------------------------------------------
290
+
291
+ def decrypt(input_path: Path, password: str, output: Optional[Path] = None) -> None:
292
+ """Decrypt a password-protected PDF."""
293
+ try:
294
+ from pypdf import PdfReader, PdfWriter
295
+ except ImportError:
296
+ abort("pypdf is required.")
297
+
298
+ if not input_path.exists():
299
+ abort(f"File not found: {input_path}")
300
+
301
+ reader = PdfReader(str(input_path))
302
+ if reader.is_encrypted:
303
+ if not reader.decrypt(password):
304
+ abort("Incorrect password or unsupported encryption.")
305
+
306
+ writer = PdfWriter()
307
+ for page in reader.pages:
308
+ writer.add_page(page)
309
+
310
+ out = output or resolve_output(input_path, None, "_decrypted", ".pdf")
311
+ ensure_parent(out)
312
+ with open(out, "wb") as fh:
313
+ writer.write(fh)
314
+ success(f"Decrypted -> [bold]{out}[/bold]")
dforge/processor.py ADDED
@@ -0,0 +1,251 @@
1
+ """
2
+ DForge Image Processing Module
3
+ Handles: enhance, deskew, denoise, resize, and the full OCR preprocessing pipeline
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from pathlib import Path
9
+ from typing import Optional, Tuple
10
+
11
+ from dforge.utils import abort, info, success, warn
12
+ from dforge.config import DEFAULT_COMPRESS_QUALITY
13
+
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # Internal helpers
17
+ # ---------------------------------------------------------------------------
18
+
19
+ def _load_pil(path: Path):
20
+ """Load an image using Pillow, aborting on failure."""
21
+ try:
22
+ from PIL import Image
23
+ except ImportError:
24
+ abort("Pillow is required. Run: pip install Pillow")
25
+
26
+ if not path.exists():
27
+ abort(f"File not found: {path}")
28
+ return Image.open(str(path))
29
+
30
+
31
+ def _load_cv2(path: Path):
32
+ """Load an image using OpenCV."""
33
+ try:
34
+ import cv2
35
+ import numpy as np
36
+ except ImportError:
37
+ abort("opencv-python-headless is required. Run: pip install opencv-python-headless")
38
+
39
+ img = cv2.imread(str(path))
40
+ if img is None:
41
+ abort(f"Could not read image: {path}")
42
+ return img
43
+
44
+
45
+ def _save_pil(img, output: Path, quality: int = DEFAULT_COMPRESS_QUALITY) -> None:
46
+ kwargs = {}
47
+ if output.suffix.lower() in {".jpg", ".jpeg"}:
48
+ kwargs["quality"] = quality
49
+ img.save(str(output), **kwargs)
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Enhance
54
+ # ---------------------------------------------------------------------------
55
+
56
+ def enhance(input_path: Path, output: Optional[Path] = None) -> None:
57
+ """
58
+ Enhance an image for better readability.
59
+ Applies: auto-contrast, sharpness boost.
60
+ """
61
+ try:
62
+ from PIL import Image, ImageEnhance, ImageOps
63
+ except ImportError:
64
+ abort("Pillow is required.")
65
+
66
+ img = _load_pil(input_path)
67
+
68
+ # Convert to RGB if needed
69
+ if img.mode not in ("RGB", "L"):
70
+ img = img.convert("RGB")
71
+
72
+ img = ImageOps.autocontrast(img, cutoff=1)
73
+ sharpener = ImageEnhance.Sharpness(img)
74
+ img = sharpener.enhance(1.5)
75
+ contrast = ImageEnhance.Contrast(img)
76
+ img = contrast.enhance(1.3)
77
+
78
+ out = output or input_path.with_name(input_path.stem + "_enhanced" + input_path.suffix)
79
+ _save_pil(img, out)
80
+ success(f"Enhanced -> [bold]{out}[/bold]")
81
+
82
+
83
+ # ---------------------------------------------------------------------------
84
+ # Deskew
85
+ # ---------------------------------------------------------------------------
86
+
87
+ def deskew(input_path: Path, output: Optional[Path] = None) -> None:
88
+ """
89
+ Detect and correct the skew angle of a scanned document image.
90
+ Uses OpenCV Hough line detection.
91
+ """
92
+ try:
93
+ import cv2
94
+ import numpy as np
95
+ except ImportError:
96
+ abort("opencv-python-headless is required.")
97
+
98
+ img = _load_cv2(input_path)
99
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img
100
+ gray = cv2.bitwise_not(gray)
101
+ thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
102
+
103
+ coords = np.column_stack(np.where(thresh > 0))
104
+ if len(coords) == 0:
105
+ warn("Could not detect text regions for deskewing.")
106
+ return
107
+
108
+ angle = cv2.minAreaRect(coords)[-1]
109
+ if angle < -45:
110
+ angle = 90 + angle
111
+
112
+ (h, w) = img.shape[:2]
113
+ center = (w // 2, h // 2)
114
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
115
+ rotated = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
116
+
117
+ out = output or input_path.with_name(input_path.stem + "_deskewed" + input_path.suffix)
118
+ cv2.imwrite(str(out), rotated)
119
+ success(f"Deskewed (corrected {angle:.2f} deg) -> [bold]{out}[/bold]")
120
+
121
+
122
+ # ---------------------------------------------------------------------------
123
+ # Denoise
124
+ # ---------------------------------------------------------------------------
125
+
126
+ def denoise(input_path: Path, output: Optional[Path] = None) -> None:
127
+ """Remove noise from an image using OpenCV Non-Local Means."""
128
+ try:
129
+ import cv2
130
+ except ImportError:
131
+ abort("opencv-python-headless is required.")
132
+
133
+ img = _load_cv2(input_path)
134
+
135
+ if len(img.shape) == 2: # grayscale
136
+ denoised = cv2.fastNlMeansDenoising(img, h=10, templateWindowSize=7, searchWindowSize=21)
137
+ else:
138
+ denoised = cv2.fastNlMeansDenoisingColored(img, h=10, hColor=10, templateWindowSize=7, searchWindowSize=21)
139
+
140
+ out = output or input_path.with_name(input_path.stem + "_denoised" + input_path.suffix)
141
+ cv2.imwrite(str(out), denoised)
142
+ success(f"Denoised -> [bold]{out}[/bold]")
143
+
144
+
145
+ # ---------------------------------------------------------------------------
146
+ # Resize
147
+ # ---------------------------------------------------------------------------
148
+
149
+ def resize(
150
+ input_path: Path,
151
+ width: Optional[int] = None,
152
+ height: Optional[int] = None,
153
+ scale: Optional[float] = None,
154
+ output: Optional[Path] = None,
155
+ ) -> None:
156
+ """
157
+ Resize an image.
158
+
159
+ Provide either (width, height), one of them (maintains aspect ratio), or a scale factor.
160
+ """
161
+ try:
162
+ from PIL import Image
163
+ except ImportError:
164
+ abort("Pillow is required.")
165
+
166
+ if width is None and height is None and scale is None:
167
+ abort("Provide --width, --height, or --scale.")
168
+
169
+ img = _load_pil(input_path)
170
+ orig_w, orig_h = img.size
171
+
172
+ if scale is not None:
173
+ new_w = int(orig_w * scale)
174
+ new_h = int(orig_h * scale)
175
+ elif width and height:
176
+ new_w, new_h = width, height
177
+ elif width:
178
+ new_w = width
179
+ new_h = int(orig_h * (width / orig_w))
180
+ else:
181
+ new_h = height
182
+ new_w = int(orig_w * (height / orig_h))
183
+
184
+ resized = img.resize((new_w, new_h), Image.LANCZOS)
185
+ out = output or input_path.with_name(input_path.stem + f"_{new_w}x{new_h}" + input_path.suffix)
186
+ _save_pil(resized, out)
187
+ success(f"Resized {orig_w}x{orig_h} -> {new_w}x{new_h} -> [bold]{out}[/bold]")
188
+
189
+
190
+ # ---------------------------------------------------------------------------
191
+ # OCR Preprocessing Pipeline
192
+ # ---------------------------------------------------------------------------
193
+
194
+ def preprocess_for_ocr(input_path: Path, output: Optional[Path] = None) -> Path:
195
+ """
196
+ Full OCR preprocessing pipeline:
197
+ 1. Auto orientation detection
198
+ 2. Contrast enhancement
199
+ 3. Noise removal
200
+ 4. Threshold binarization
201
+ Returns path to the preprocessed image.
202
+ """
203
+ try:
204
+ import cv2
205
+ import numpy as np
206
+ from PIL import Image, ImageEnhance, ImageOps
207
+ except ImportError:
208
+ abort("opencv-python-headless and Pillow are required.")
209
+
210
+ if not input_path.exists():
211
+ abort(f"File not found: {input_path}")
212
+
213
+ info("Step 1/4: Orientation detection...")
214
+ img_cv = _load_cv2(input_path)
215
+ gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY) if len(img_cv.shape) == 3 else img_cv
216
+
217
+ # Deskew
218
+ inv = cv2.bitwise_not(gray)
219
+ thresh = cv2.threshold(inv, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
220
+ coords = cv2.findNonZero(thresh)
221
+ if coords is not None:
222
+ angle = cv2.minAreaRect(coords)[-1]
223
+ if angle < -45:
224
+ angle = 90 + angle
225
+ (h, w) = gray.shape[:2]
226
+ M = cv2.getRotationMatrix2D((w // 2, h // 2), angle, 1.0)
227
+ gray = cv2.warpAffine(gray, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
228
+
229
+ info("Step 2/4: Contrast enhancement...")
230
+ pil_img = Image.fromarray(gray)
231
+ pil_img = ImageOps.autocontrast(pil_img, cutoff=2)
232
+ enhancer = ImageEnhance.Contrast(pil_img)
233
+ pil_img = enhancer.enhance(1.4)
234
+ gray = __import__("numpy").array(pil_img)
235
+
236
+ info("Step 3/4: Noise removal...")
237
+ gray = cv2.fastNlMeansDenoising(gray, h=10, templateWindowSize=7, searchWindowSize=21)
238
+
239
+ info("Step 4/4: Threshold binarization...")
240
+ binary = cv2.adaptiveThreshold(
241
+ gray, 255,
242
+ cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
243
+ cv2.THRESH_BINARY,
244
+ blockSize=15,
245
+ C=4,
246
+ )
247
+
248
+ out = output or input_path.with_name(input_path.stem + "_preprocessed.png")
249
+ cv2.imwrite(str(out), binary)
250
+ success(f"Preprocessing complete -> [bold]{out}[/bold]")
251
+ return out