dforge-cli 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dforge/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "1.0.0"
dforge/banner.py ADDED
@@ -0,0 +1,31 @@
1
+ from rich.console import Console
2
+ from rich.panel import Panel
3
+ from rich.align import Align
4
+
5
+ console = Console()
6
+
7
+
8
+ def show_banner():
9
+ title = """
10
+ ⚡ DFORGE
11
+
12
+ Fast Local Document Automation
13
+ """
14
+
15
+ console.print()
16
+
17
+ console.print(
18
+ Panel(
19
+ Align.center(f"[bold cyan]{title}[/bold cyan]"),
20
+ border_style="cyan",
21
+ padding=(1, 4),
22
+ )
23
+ )
24
+
25
+ console.print(
26
+ Align.center(
27
+ "[dim]Forge your documents from your terminal[/dim]"
28
+ )
29
+ )
30
+
31
+ console.print()
dforge/batch.py ADDED
@@ -0,0 +1,156 @@
1
+ """
2
+ DForge Batch Processing
3
+ Handles: batch OCR, batch PDF compression, batch document conversion
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+ from pathlib import Path
10
+ from typing import Callable, Iterable
11
+
12
+ from rich.progress import Progress, TextColumn
13
+
14
+ from dforge.config import SUPPORTED_DOC_EXTS, SUPPORTED_IMAGE_EXTS, SUPPORTED_PDF_EXTS
15
+ from dforge.utils import (
16
+ abort,
17
+ collect_files,
18
+ console,
19
+ info,
20
+ success,
21
+ warn,
22
+ require_ghostscript,
23
+ require_pandoc,
24
+ require_tesseract,
25
+ )
26
+
27
+
28
+ def _run_parallel(
29
+ label: str,
30
+ files: list[Path],
31
+ workers: int,
32
+ handler: Callable[[Path], None],
33
+ ) -> list[tuple[Path, str]]:
34
+ errors: list[tuple[Path, str]] = []
35
+ worker_count = max(1, workers or 1)
36
+
37
+ with Progress(
38
+ TextColumn("{task.description}"),
39
+ TextColumn("{task.completed}/{task.total}"),
40
+ console=console,
41
+ ) as progress:
42
+ task = progress.add_task(label, total=len(files))
43
+
44
+ if worker_count == 1:
45
+ for path in files:
46
+ try:
47
+ handler(path)
48
+ except Exception as exc:
49
+ errors.append((path, str(exc)))
50
+ progress.advance(task)
51
+ return errors
52
+
53
+ with ThreadPoolExecutor(max_workers=worker_count) as executor:
54
+ futures = {executor.submit(handler, path): path for path in files}
55
+ for future in as_completed(futures):
56
+ path = futures[future]
57
+ try:
58
+ future.result()
59
+ except Exception as exc:
60
+ errors.append((path, str(exc)))
61
+ progress.advance(task)
62
+
63
+ return errors
64
+
65
+
66
+ def _report_errors(errors: list[tuple[Path, str]]) -> None:
67
+ if not errors:
68
+ return
69
+ warn(f"{len(errors)} file(s) failed:")
70
+ for path, err in errors:
71
+ console.print(f" [red]{path.name}[/red]: {err}")
72
+
73
+
74
+ def batch_with_ocr(
75
+ directory: Path,
76
+ lang: str,
77
+ fmt: str,
78
+ recursive: bool = True,
79
+ workers: int = 4,
80
+ ) -> None:
81
+ if not directory.exists():
82
+ abort(f"Directory not found: {directory}")
83
+
84
+ require_tesseract()
85
+
86
+ files = collect_files(directory, SUPPORTED_IMAGE_EXTS | SUPPORTED_PDF_EXTS, recursive=recursive)
87
+ if not files:
88
+ warn(f"No supported files found in {directory}")
89
+ return
90
+
91
+ info(f"Found {len(files)} file(s) to OCR...")
92
+
93
+ def handler(path: Path) -> None:
94
+ if path.suffix.lower() == ".pdf":
95
+ from dforge.engine import ocr_pdf
96
+ ocr_pdf(path, lang=lang, fmt=fmt)
97
+ else:
98
+ from dforge.engine import ocr_image
99
+ ocr_image(path, lang=lang, fmt=fmt)
100
+
101
+ errors = _run_parallel("Batch OCR", files, workers, handler)
102
+ _report_errors(errors)
103
+ success(f"Batch OCR complete. Processed {len(files) - len(errors)}/{len(files)} file(s).")
104
+
105
+
106
+ def batch_compress(
107
+ directory: Path,
108
+ recursive: bool = True,
109
+ workers: int = 4,
110
+ ) -> None:
111
+ if not directory.exists():
112
+ abort(f"Directory not found: {directory}")
113
+
114
+ require_ghostscript()
115
+
116
+ files = collect_files(directory, SUPPORTED_PDF_EXTS, recursive=recursive)
117
+ if not files:
118
+ warn(f"No PDF files found in {directory}")
119
+ return
120
+
121
+ info(f"Found {len(files)} PDF(s) to compress...")
122
+
123
+ def handler(path: Path) -> None:
124
+ from dforge.operations import compress
125
+ compress(path)
126
+
127
+ errors = _run_parallel("Batch compress", files, workers, handler)
128
+ _report_errors(errors)
129
+ success(f"Batch compress complete. Processed {len(files) - len(errors)}/{len(files)} file(s).")
130
+
131
+
132
+ def batch_convert(
133
+ directory: Path,
134
+ target_format: str,
135
+ recursive: bool = True,
136
+ workers: int = 4,
137
+ ) -> None:
138
+ if not directory.exists():
139
+ abort(f"Directory not found: {directory}")
140
+
141
+ require_pandoc()
142
+
143
+ files = collect_files(directory, SUPPORTED_DOC_EXTS, recursive=recursive)
144
+ if not files:
145
+ warn(f"No convertible documents found in {directory}")
146
+ return
147
+
148
+ info(f"Found {len(files)} document(s) to convert...")
149
+
150
+ def handler(path: Path) -> None:
151
+ from dforge.converter import convert
152
+ convert(path, target_format)
153
+
154
+ errors = _run_parallel("Batch convert", files, workers, handler)
155
+ _report_errors(errors)
156
+ success(f"Batch convert complete. Processed {len(files) - len(errors)}/{len(files)} file(s).")