dforge-cli 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dforge/__init__.py +1 -0
- dforge/banner.py +31 -0
- dforge/batch.py +156 -0
- dforge/cli.py +525 -0
- dforge/config.py +38 -0
- dforge/config_manager.py +33 -0
- dforge/converter.py +167 -0
- dforge/dependencies.py +98 -0
- dforge/engine.py +236 -0
- dforge/extractor.py +201 -0
- dforge/loading.py +19 -0
- dforge/menu.py +115 -0
- dforge/operations.py +314 -0
- dforge/processor.py +251 -0
- dforge/setup.py +107 -0
- dforge/theme.py +12 -0
- dforge/utils.py +169 -0
- dforge/watcher.py +137 -0
- dforge/workflows/__init__.py +0 -0
- dforge/workflows/automation.py +21 -0
- dforge/workflows/batch.py +18 -0
- dforge/workflows/batch_ocr.py +61 -0
- dforge/workflows/common.py +133 -0
- dforge/workflows/compress.py +73 -0
- dforge/workflows/convert.py +148 -0
- dforge/workflows/decrypt.py +50 -0
- dforge/workflows/encrypt.py +50 -0
- dforge/workflows/extract.py +18 -0
- dforge/workflows/image.py +21 -0
- dforge/workflows/merge.py +109 -0
- dforge/workflows/ocr.py +104 -0
- dforge/workflows/ocr_folder.py +0 -0
- dforge/workflows/pages.py +57 -0
- dforge/workflows/rotate.py +53 -0
- dforge/workflows/searchable.py +51 -0
- dforge/workflows/settings.py +56 -0
- dforge/workflows/split.py +32 -0
- dforge/workflows/tables.py +45 -0
- dforge/workflows/watermark.py +54 -0
- dforge_cli-1.0.1.dist-info/METADATA +244 -0
- dforge_cli-1.0.1.dist-info/RECORD +44 -0
- dforge_cli-1.0.1.dist-info/WHEEL +5 -0
- dforge_cli-1.0.1.dist-info/entry_points.txt +2 -0
- dforge_cli-1.0.1.dist-info/top_level.txt +1 -0
dforge/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.0"
|
dforge/banner.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from rich.console import Console
|
|
2
|
+
from rich.panel import Panel
|
|
3
|
+
from rich.align import Align
|
|
4
|
+
|
|
5
|
+
console = Console()
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def show_banner():
|
|
9
|
+
title = """
|
|
10
|
+
⚡ DFORGE
|
|
11
|
+
|
|
12
|
+
Fast Local Document Automation
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
console.print()
|
|
16
|
+
|
|
17
|
+
console.print(
|
|
18
|
+
Panel(
|
|
19
|
+
Align.center(f"[bold cyan]{title}[/bold cyan]"),
|
|
20
|
+
border_style="cyan",
|
|
21
|
+
padding=(1, 4),
|
|
22
|
+
)
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
console.print(
|
|
26
|
+
Align.center(
|
|
27
|
+
"[dim]Forge your documents from your terminal[/dim]"
|
|
28
|
+
)
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
console.print()
|
dforge/batch.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DForge Batch Processing
|
|
3
|
+
Handles: batch OCR, batch PDF compression, batch document conversion
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Callable, Iterable
|
|
11
|
+
|
|
12
|
+
from rich.progress import Progress, TextColumn
|
|
13
|
+
|
|
14
|
+
from dforge.config import SUPPORTED_DOC_EXTS, SUPPORTED_IMAGE_EXTS, SUPPORTED_PDF_EXTS
|
|
15
|
+
from dforge.utils import (
|
|
16
|
+
abort,
|
|
17
|
+
collect_files,
|
|
18
|
+
console,
|
|
19
|
+
info,
|
|
20
|
+
success,
|
|
21
|
+
warn,
|
|
22
|
+
require_ghostscript,
|
|
23
|
+
require_pandoc,
|
|
24
|
+
require_tesseract,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _run_parallel(
|
|
29
|
+
label: str,
|
|
30
|
+
files: list[Path],
|
|
31
|
+
workers: int,
|
|
32
|
+
handler: Callable[[Path], None],
|
|
33
|
+
) -> list[tuple[Path, str]]:
|
|
34
|
+
errors: list[tuple[Path, str]] = []
|
|
35
|
+
worker_count = max(1, workers or 1)
|
|
36
|
+
|
|
37
|
+
with Progress(
|
|
38
|
+
TextColumn("{task.description}"),
|
|
39
|
+
TextColumn("{task.completed}/{task.total}"),
|
|
40
|
+
console=console,
|
|
41
|
+
) as progress:
|
|
42
|
+
task = progress.add_task(label, total=len(files))
|
|
43
|
+
|
|
44
|
+
if worker_count == 1:
|
|
45
|
+
for path in files:
|
|
46
|
+
try:
|
|
47
|
+
handler(path)
|
|
48
|
+
except Exception as exc:
|
|
49
|
+
errors.append((path, str(exc)))
|
|
50
|
+
progress.advance(task)
|
|
51
|
+
return errors
|
|
52
|
+
|
|
53
|
+
with ThreadPoolExecutor(max_workers=worker_count) as executor:
|
|
54
|
+
futures = {executor.submit(handler, path): path for path in files}
|
|
55
|
+
for future in as_completed(futures):
|
|
56
|
+
path = futures[future]
|
|
57
|
+
try:
|
|
58
|
+
future.result()
|
|
59
|
+
except Exception as exc:
|
|
60
|
+
errors.append((path, str(exc)))
|
|
61
|
+
progress.advance(task)
|
|
62
|
+
|
|
63
|
+
return errors
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _report_errors(errors: list[tuple[Path, str]]) -> None:
|
|
67
|
+
if not errors:
|
|
68
|
+
return
|
|
69
|
+
warn(f"{len(errors)} file(s) failed:")
|
|
70
|
+
for path, err in errors:
|
|
71
|
+
console.print(f" [red]{path.name}[/red]: {err}")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def batch_with_ocr(
|
|
75
|
+
directory: Path,
|
|
76
|
+
lang: str,
|
|
77
|
+
fmt: str,
|
|
78
|
+
recursive: bool = True,
|
|
79
|
+
workers: int = 4,
|
|
80
|
+
) -> None:
|
|
81
|
+
if not directory.exists():
|
|
82
|
+
abort(f"Directory not found: {directory}")
|
|
83
|
+
|
|
84
|
+
require_tesseract()
|
|
85
|
+
|
|
86
|
+
files = collect_files(directory, SUPPORTED_IMAGE_EXTS | SUPPORTED_PDF_EXTS, recursive=recursive)
|
|
87
|
+
if not files:
|
|
88
|
+
warn(f"No supported files found in {directory}")
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
info(f"Found {len(files)} file(s) to OCR...")
|
|
92
|
+
|
|
93
|
+
def handler(path: Path) -> None:
|
|
94
|
+
if path.suffix.lower() == ".pdf":
|
|
95
|
+
from dforge.engine import ocr_pdf
|
|
96
|
+
ocr_pdf(path, lang=lang, fmt=fmt)
|
|
97
|
+
else:
|
|
98
|
+
from dforge.engine import ocr_image
|
|
99
|
+
ocr_image(path, lang=lang, fmt=fmt)
|
|
100
|
+
|
|
101
|
+
errors = _run_parallel("Batch OCR", files, workers, handler)
|
|
102
|
+
_report_errors(errors)
|
|
103
|
+
success(f"Batch OCR complete. Processed {len(files) - len(errors)}/{len(files)} file(s).")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def batch_compress(
|
|
107
|
+
directory: Path,
|
|
108
|
+
recursive: bool = True,
|
|
109
|
+
workers: int = 4,
|
|
110
|
+
) -> None:
|
|
111
|
+
if not directory.exists():
|
|
112
|
+
abort(f"Directory not found: {directory}")
|
|
113
|
+
|
|
114
|
+
require_ghostscript()
|
|
115
|
+
|
|
116
|
+
files = collect_files(directory, SUPPORTED_PDF_EXTS, recursive=recursive)
|
|
117
|
+
if not files:
|
|
118
|
+
warn(f"No PDF files found in {directory}")
|
|
119
|
+
return
|
|
120
|
+
|
|
121
|
+
info(f"Found {len(files)} PDF(s) to compress...")
|
|
122
|
+
|
|
123
|
+
def handler(path: Path) -> None:
|
|
124
|
+
from dforge.operations import compress
|
|
125
|
+
compress(path)
|
|
126
|
+
|
|
127
|
+
errors = _run_parallel("Batch compress", files, workers, handler)
|
|
128
|
+
_report_errors(errors)
|
|
129
|
+
success(f"Batch compress complete. Processed {len(files) - len(errors)}/{len(files)} file(s).")
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def batch_convert(
|
|
133
|
+
directory: Path,
|
|
134
|
+
target_format: str,
|
|
135
|
+
recursive: bool = True,
|
|
136
|
+
workers: int = 4,
|
|
137
|
+
) -> None:
|
|
138
|
+
if not directory.exists():
|
|
139
|
+
abort(f"Directory not found: {directory}")
|
|
140
|
+
|
|
141
|
+
require_pandoc()
|
|
142
|
+
|
|
143
|
+
files = collect_files(directory, SUPPORTED_DOC_EXTS, recursive=recursive)
|
|
144
|
+
if not files:
|
|
145
|
+
warn(f"No convertible documents found in {directory}")
|
|
146
|
+
return
|
|
147
|
+
|
|
148
|
+
info(f"Found {len(files)} document(s) to convert...")
|
|
149
|
+
|
|
150
|
+
def handler(path: Path) -> None:
|
|
151
|
+
from dforge.converter import convert
|
|
152
|
+
convert(path, target_format)
|
|
153
|
+
|
|
154
|
+
errors = _run_parallel("Batch convert", files, workers, handler)
|
|
155
|
+
_report_errors(errors)
|
|
156
|
+
success(f"Batch convert complete. Processed {len(files) - len(errors)}/{len(files)} file(s).")
|