dforge-cli 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dforge/__init__.py +1 -0
- dforge/banner.py +31 -0
- dforge/batch.py +156 -0
- dforge/cli.py +525 -0
- dforge/config.py +38 -0
- dforge/config_manager.py +33 -0
- dforge/converter.py +167 -0
- dforge/dependencies.py +98 -0
- dforge/engine.py +236 -0
- dforge/extractor.py +201 -0
- dforge/loading.py +19 -0
- dforge/menu.py +115 -0
- dforge/operations.py +314 -0
- dforge/processor.py +251 -0
- dforge/setup.py +107 -0
- dforge/theme.py +12 -0
- dforge/utils.py +169 -0
- dforge/watcher.py +137 -0
- dforge/workflows/__init__.py +0 -0
- dforge/workflows/automation.py +21 -0
- dforge/workflows/batch.py +18 -0
- dforge/workflows/batch_ocr.py +61 -0
- dforge/workflows/common.py +133 -0
- dforge/workflows/compress.py +73 -0
- dforge/workflows/convert.py +148 -0
- dforge/workflows/decrypt.py +50 -0
- dforge/workflows/encrypt.py +50 -0
- dforge/workflows/extract.py +18 -0
- dforge/workflows/image.py +21 -0
- dforge/workflows/merge.py +109 -0
- dforge/workflows/ocr.py +104 -0
- dforge/workflows/ocr_folder.py +0 -0
- dforge/workflows/pages.py +57 -0
- dforge/workflows/rotate.py +53 -0
- dforge/workflows/searchable.py +51 -0
- dforge/workflows/settings.py +56 -0
- dforge/workflows/split.py +32 -0
- dforge/workflows/tables.py +45 -0
- dforge/workflows/watermark.py +54 -0
- dforge_cli-1.0.1.dist-info/METADATA +244 -0
- dforge_cli-1.0.1.dist-info/RECORD +44 -0
- dforge_cli-1.0.1.dist-info/WHEEL +5 -0
- dforge_cli-1.0.1.dist-info/entry_points.txt +2 -0
- dforge_cli-1.0.1.dist-info/top_level.txt +1 -0
dforge/setup.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
from shutil import which
|
|
3
|
+
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
|
|
6
|
+
from dforge.config_manager import set_tool_path
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
console = Console()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def setup_dependencies():
|
|
13
|
+
|
|
14
|
+
packages = [
|
|
15
|
+
("Poppler", "oschwartz10612.Poppler"),
|
|
16
|
+
("Pandoc", "JohnMacFarlane.Pandoc"),
|
|
17
|
+
("MiKTeX", "MiKTeX.MiKTeX"),
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
for name, package_id in packages:
|
|
21
|
+
|
|
22
|
+
console.print(
|
|
23
|
+
f"[cyan]Installing {name}...[/cyan]"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
subprocess.run(
|
|
27
|
+
[
|
|
28
|
+
"winget",
|
|
29
|
+
"install",
|
|
30
|
+
"--id",
|
|
31
|
+
package_id,
|
|
32
|
+
"-e",
|
|
33
|
+
]
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Save discovered tools
|
|
37
|
+
|
|
38
|
+
pdfinfo = find_pdfinfo()
|
|
39
|
+
pandoc = find_pandoc()
|
|
40
|
+
tesseract = which("tesseract")
|
|
41
|
+
xelatex = find_xelatex()
|
|
42
|
+
ghostscript = (
|
|
43
|
+
which("gswin64c")
|
|
44
|
+
or which("gswin32c")
|
|
45
|
+
or which("gs")
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if pdfinfo:
|
|
49
|
+
set_tool_path("poppler", pdfinfo)
|
|
50
|
+
|
|
51
|
+
if pandoc:
|
|
52
|
+
set_tool_path("pandoc", pandoc)
|
|
53
|
+
if xelatex:
|
|
54
|
+
set_tool_path("xelatex", xelatex)
|
|
55
|
+
if tesseract:
|
|
56
|
+
set_tool_path("tesseract", tesseract)
|
|
57
|
+
|
|
58
|
+
if ghostscript:
|
|
59
|
+
set_tool_path("ghostscript", ghostscript)
|
|
60
|
+
|
|
61
|
+
console.print(
|
|
62
|
+
"\n[bold green]Setup complete.[/bold green]"
|
|
63
|
+
)
|
|
64
|
+
def find_pdfinfo():
|
|
65
|
+
roots = [
|
|
66
|
+
Path.home() / "AppData/Local/Microsoft/WinGet/Packages",
|
|
67
|
+
Path("C:/Program Files"),
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
for root in roots:
|
|
71
|
+
if root.exists():
|
|
72
|
+
files = list(root.rglob("pdfinfo.exe"))
|
|
73
|
+
if files:
|
|
74
|
+
return str(files[0])
|
|
75
|
+
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
def find_xelatex():
|
|
79
|
+
roots = [
|
|
80
|
+
Path.home() / "AppData/Local/Programs/MiKTeX",
|
|
81
|
+
Path.home() / "AppData/Local",
|
|
82
|
+
Path.home() / "AppData/Local/Microsoft/WinGet/Packages",
|
|
83
|
+
Path("C:/Program Files"),
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
for root in roots:
|
|
87
|
+
if root.exists():
|
|
88
|
+
files = list(root.rglob("xelatex.exe"))
|
|
89
|
+
if files:
|
|
90
|
+
return str(files[0])
|
|
91
|
+
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
def find_pandoc():
|
|
95
|
+
roots = [
|
|
96
|
+
Path.home() / "AppData/Local/Pandoc",
|
|
97
|
+
Path("C:/Program Files"),
|
|
98
|
+
Path.home() / "AppData/Local/Microsoft/WinGet/Packages"
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
for root in roots:
|
|
102
|
+
if root.exists():
|
|
103
|
+
files = list(root.rglob("pandoc.exe"))
|
|
104
|
+
if files:
|
|
105
|
+
return str(files[0])
|
|
106
|
+
|
|
107
|
+
return None
|
dforge/theme.py
ADDED
dforge/utils.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DForge Utilities - Shared helpers used across all modules.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
import json
|
|
7
|
+
import shutil
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List, Optional
|
|
11
|
+
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
from rich.panel import Panel
|
|
14
|
+
from rich.text import Text
|
|
15
|
+
|
|
16
|
+
console = Console()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
CONFIG_FILE = Path.cwd() / ".dforge.json"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def save_recent_folder(folder: str):
|
|
25
|
+
try:
|
|
26
|
+
data = {"recent_folder": folder}
|
|
27
|
+
|
|
28
|
+
CONFIG_FILE.write_text(
|
|
29
|
+
json.dumps(data, indent=4)
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
except Exception as e:
|
|
34
|
+
print("ERROR:", e)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def load_recent_folder():
|
|
38
|
+
if not CONFIG_FILE.exists():
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
return json.loads(
|
|
43
|
+
CONFIG_FILE.read_text()
|
|
44
|
+
).get("recent_folder")
|
|
45
|
+
except Exception:
|
|
46
|
+
return None
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
# Output path helpers
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
def resolve_output(
|
|
52
|
+
input_path: Path,
|
|
53
|
+
output: Optional[str],
|
|
54
|
+
suffix: str,
|
|
55
|
+
ext: Optional[str] = None,
|
|
56
|
+
) -> Path:
|
|
57
|
+
"""
|
|
58
|
+
Resolve where to write the output file.
|
|
59
|
+
|
|
60
|
+
If `output` is given -> use it.
|
|
61
|
+
Otherwise derive a name from the input path + suffix + optional new extension.
|
|
62
|
+
|
|
63
|
+
Example:
|
|
64
|
+
resolve_output(Path("doc.pdf"), None, "_merged", ".pdf")
|
|
65
|
+
-> Path("doc_merged.pdf")
|
|
66
|
+
"""
|
|
67
|
+
if output:
|
|
68
|
+
return Path(output)
|
|
69
|
+
src = Path(input_path)
|
|
70
|
+
new_ext = ext if ext is not None else src.suffix
|
|
71
|
+
return src.with_name(src.stem + suffix + new_ext)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def ensure_parent(path: Path) -> None:
|
|
75
|
+
"""Create parent directories for path if they don't exist."""
|
|
76
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# ---------------------------------------------------------------------------
|
|
80
|
+
# Dependency checks
|
|
81
|
+
# ---------------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
def require_tool(tool: str, install_hint: str = "") -> None:
|
|
84
|
+
"""Abort with a helpful message if an external tool is not on PATH."""
|
|
85
|
+
if shutil.which(tool) is None:
|
|
86
|
+
msg = f"[bold red]Missing dependency:[/bold red] '{tool}' was not found on PATH."
|
|
87
|
+
if install_hint:
|
|
88
|
+
msg += f"\n[dim]{install_hint}[/dim]"
|
|
89
|
+
console.print(Panel(msg, title="[red]Dependency Error[/red]", border_style="red"))
|
|
90
|
+
sys.exit(1)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def require_tesseract() -> None:
|
|
94
|
+
require_tool(
|
|
95
|
+
"tesseract",
|
|
96
|
+
"Install Tesseract: https://tesseract-ocr.github.io/tessdoc/Installation.html",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def require_ghostscript() -> None:
|
|
101
|
+
for candidate in ("gs", "gswin64c", "gswin32c"):
|
|
102
|
+
if shutil.which(candidate):
|
|
103
|
+
return
|
|
104
|
+
console.print(
|
|
105
|
+
Panel(
|
|
106
|
+
"[bold red]Missing dependency:[/bold red] 'Ghostscript' was not found on PATH.\n"
|
|
107
|
+
"[dim]Install from https://ghostscript.com/releases/gsdnld.html[/dim]",
|
|
108
|
+
title="[red]Dependency Error[/red]",
|
|
109
|
+
border_style="red",
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
sys.exit(1)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def require_pandoc() -> None:
|
|
116
|
+
require_tool(
|
|
117
|
+
"pandoc",
|
|
118
|
+
"Install Pandoc: https://pandoc.org/installing.html",
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def ghostscript_bin() -> str:
|
|
123
|
+
"""Return the first available Ghostscript binary name."""
|
|
124
|
+
for candidate in ("gs", "gswin64c", "gswin32c"):
|
|
125
|
+
if shutil.which(candidate):
|
|
126
|
+
return candidate
|
|
127
|
+
return "gs" # fallback (will fail gracefully)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# ---------------------------------------------------------------------------
|
|
131
|
+
# Pretty printing helpers
|
|
132
|
+
# ---------------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
def success(msg: str) -> None:
|
|
135
|
+
console.print(f"[bold green]OK[/bold green] {msg}")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def info(msg: str) -> None:
|
|
139
|
+
console.print(f"[bold cyan]INFO[/bold cyan] {msg}")
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def warn(msg: str) -> None:
|
|
143
|
+
console.print(f"[bold yellow]WARN[/bold yellow] {msg}")
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def error(msg: str) -> None:
|
|
147
|
+
console.print(f"[bold red]ERROR[/bold red] {msg}")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def abort(msg: str) -> None:
|
|
151
|
+
error(msg)
|
|
152
|
+
sys.exit(1)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# ---------------------------------------------------------------------------
|
|
156
|
+
# File collection helpers
|
|
157
|
+
# ---------------------------------------------------------------------------
|
|
158
|
+
|
|
159
|
+
def collect_files(
|
|
160
|
+
directory: Path,
|
|
161
|
+
extensions: set[str],
|
|
162
|
+
recursive: bool = True,
|
|
163
|
+
) -> List[Path]:
|
|
164
|
+
"""Collect all files with the given extensions from a directory."""
|
|
165
|
+
pattern = "**/*" if recursive else "*"
|
|
166
|
+
files = []
|
|
167
|
+
for ext in extensions:
|
|
168
|
+
files.extend(directory.glob(f"{pattern}{ext}"))
|
|
169
|
+
return sorted(set(files))
|
dforge/watcher.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DForge Watch Mode
|
|
3
|
+
Monitors a directory for new files and automatically processes them.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import time
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from dforge.utils import abort, console, info, success, warn
|
|
12
|
+
from dforge.config import (
|
|
13
|
+
DEFAULT_OCR_LANG,
|
|
14
|
+
SUPPORTED_IMAGE_EXTS,
|
|
15
|
+
SUPPORTED_PDF_EXTS,
|
|
16
|
+
WATCH_DEBOUNCE_SECONDS,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Event handler
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
class _DForgeHandler:
|
|
25
|
+
"""Handles file-system events and dispatches to the correct action."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, action: str, lang: str, fmt: str):
|
|
28
|
+
self.action = action
|
|
29
|
+
self.lang = lang
|
|
30
|
+
self.fmt = fmt
|
|
31
|
+
self._seen: set = set()
|
|
32
|
+
|
|
33
|
+
def dispatch(self, path: Path) -> None:
|
|
34
|
+
if path in self._seen:
|
|
35
|
+
return
|
|
36
|
+
self._seen.add(path)
|
|
37
|
+
|
|
38
|
+
# Debounce: wait for the file to finish writing
|
|
39
|
+
time.sleep(WATCH_DEBOUNCE_SECONDS)
|
|
40
|
+
if not path.exists():
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
ext = path.suffix.lower()
|
|
44
|
+
console.print(f"\n[bold cyan]-> Detected:[/bold cyan] {path.name}")
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
if self.action == "ocr":
|
|
48
|
+
if ext == ".pdf":
|
|
49
|
+
from dforge.ocr.engine import ocr_pdf
|
|
50
|
+
ocr_pdf(path, lang=self.lang, fmt=self.fmt)
|
|
51
|
+
elif ext in SUPPORTED_IMAGE_EXTS:
|
|
52
|
+
from dforge.ocr.engine import ocr_image
|
|
53
|
+
ocr_image(path, lang=self.lang, fmt=self.fmt)
|
|
54
|
+
else:
|
|
55
|
+
warn(f"Skipped (unsupported for OCR): {path.name}")
|
|
56
|
+
|
|
57
|
+
elif self.action == "searchable":
|
|
58
|
+
if ext == ".pdf":
|
|
59
|
+
from dforge.ocr.engine import make_searchable_pdf
|
|
60
|
+
make_searchable_pdf(path, lang=self.lang)
|
|
61
|
+
else:
|
|
62
|
+
warn(f"Skipped (not a PDF): {path.name}")
|
|
63
|
+
|
|
64
|
+
elif self.action == "compress":
|
|
65
|
+
if ext == ".pdf":
|
|
66
|
+
from dforge.pdf.operations import compress
|
|
67
|
+
compress(path)
|
|
68
|
+
else:
|
|
69
|
+
warn(f"Skipped (not a PDF): {path.name}")
|
|
70
|
+
|
|
71
|
+
elif self.action == "preprocess":
|
|
72
|
+
if ext in SUPPORTED_IMAGE_EXTS:
|
|
73
|
+
from dforge.image.processor import preprocess_for_ocr
|
|
74
|
+
preprocess_for_ocr(path)
|
|
75
|
+
else:
|
|
76
|
+
warn(f"Skipped (not an image): {path.name}")
|
|
77
|
+
|
|
78
|
+
else:
|
|
79
|
+
warn(f"Unknown watch action: {self.action}")
|
|
80
|
+
|
|
81
|
+
except Exception as exc:
|
|
82
|
+
console.print(f"[red]Error processing {path.name}:[/red] {exc}")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
# Watch entry point
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
def watch(
|
|
90
|
+
directory: Path,
|
|
91
|
+
action: str = "ocr",
|
|
92
|
+
lang: str = DEFAULT_OCR_LANG,
|
|
93
|
+
fmt: str = "txt",
|
|
94
|
+
) -> None:
|
|
95
|
+
"""
|
|
96
|
+
Monitor a directory and process new files automatically.
|
|
97
|
+
|
|
98
|
+
action: ocr | searchable | compress | preprocess
|
|
99
|
+
"""
|
|
100
|
+
try:
|
|
101
|
+
from watchdog.observers import Observer
|
|
102
|
+
from watchdog.events import FileSystemEventHandler
|
|
103
|
+
except ImportError:
|
|
104
|
+
abort("watchdog is required. Run: pip install watchdog")
|
|
105
|
+
|
|
106
|
+
if not directory.exists():
|
|
107
|
+
abort(f"Directory not found: {directory}")
|
|
108
|
+
|
|
109
|
+
handler_state = _DForgeHandler(action=action, lang=lang, fmt=fmt)
|
|
110
|
+
|
|
111
|
+
class _WatchdogBridge(FileSystemEventHandler):
|
|
112
|
+
def on_created(self, event):
|
|
113
|
+
if not event.is_directory:
|
|
114
|
+
handler_state.dispatch(Path(event.src_path))
|
|
115
|
+
|
|
116
|
+
def on_moved(self, event):
|
|
117
|
+
if not event.is_directory:
|
|
118
|
+
handler_state.dispatch(Path(event.dest_path))
|
|
119
|
+
|
|
120
|
+
observer = Observer()
|
|
121
|
+
observer.schedule(_WatchdogBridge(), str(directory), recursive=True)
|
|
122
|
+
observer.start()
|
|
123
|
+
|
|
124
|
+
console.print(
|
|
125
|
+
f"\n[bold green]Watching[/bold green] [bold]{directory}[/bold] "
|
|
126
|
+
f"for new files (action: [cyan]{action}[/cyan])\n"
|
|
127
|
+
"[dim]Press Ctrl+C to stop.[/dim]\n"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
while True:
|
|
132
|
+
time.sleep(1)
|
|
133
|
+
except KeyboardInterrupt:
|
|
134
|
+
observer.stop()
|
|
135
|
+
info("Watch mode stopped.")
|
|
136
|
+
|
|
137
|
+
observer.join()
|
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from dforge.menu import automation_menu
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def automation_workflow():
|
|
5
|
+
while True:
|
|
6
|
+
choice = automation_menu()
|
|
7
|
+
|
|
8
|
+
if choice == "Watch Folder":
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
elif choice == "Auto OCR":
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
elif choice == "Auto Convert":
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
elif choice == "Scheduled Tasks":
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
elif choice == "⬅ Back":
|
|
21
|
+
break
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from dforge.menu import batch_menu
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def batch_workflow():
|
|
5
|
+
while True:
|
|
6
|
+
choice = batch_menu()
|
|
7
|
+
|
|
8
|
+
if choice == "Batch Convert":
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
elif choice == "Batch Compress":
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
elif choice == "Batch OCR":
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
elif choice == "⬅ Back":
|
|
18
|
+
break
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import questionary
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from dforge.loading import Loader
|
|
6
|
+
from dforge.batch import batch_with_ocr
|
|
7
|
+
|
|
8
|
+
from dforge.workflows.common import (
|
|
9
|
+
select_folder,
|
|
10
|
+
success_screen,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def batch_ocr_workflow():
|
|
17
|
+
console.print(
|
|
18
|
+
"\n[bold cyan]Batch OCR[/bold cyan]\n"
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
folder = select_folder()
|
|
22
|
+
|
|
23
|
+
if not folder:
|
|
24
|
+
return
|
|
25
|
+
|
|
26
|
+
lang = questionary.text(
|
|
27
|
+
"OCR Language(s)",
|
|
28
|
+
default="eng",
|
|
29
|
+
).ask()
|
|
30
|
+
|
|
31
|
+
fmt = questionary.select(
|
|
32
|
+
"Output Format",
|
|
33
|
+
choices=[
|
|
34
|
+
"txt",
|
|
35
|
+
"json",
|
|
36
|
+
"md",
|
|
37
|
+
],
|
|
38
|
+
).ask()
|
|
39
|
+
|
|
40
|
+
workers = int(
|
|
41
|
+
questionary.text(
|
|
42
|
+
"Workers",
|
|
43
|
+
default="4",
|
|
44
|
+
).ask()
|
|
45
|
+
)
|
|
46
|
+
with Loader("Processing batch OCR..."):
|
|
47
|
+
batch_with_ocr(
|
|
48
|
+
Path(folder),
|
|
49
|
+
lang,
|
|
50
|
+
fmt,
|
|
51
|
+
True,
|
|
52
|
+
workers,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
success_screen(
|
|
56
|
+
"Batch OCR Complete",
|
|
57
|
+
extra_lines=[
|
|
58
|
+
f"Folder : {folder}",
|
|
59
|
+
f"Workers : {workers}",
|
|
60
|
+
],
|
|
61
|
+
)
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import questionary
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from rich.panel import Panel
|
|
6
|
+
|
|
7
|
+
from dforge.utils import save_recent_folder, load_recent_folder
|
|
8
|
+
|
|
9
|
+
console = Console()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def select_folder():
|
|
13
|
+
recent_folder = load_recent_folder()
|
|
14
|
+
|
|
15
|
+
choices = []
|
|
16
|
+
|
|
17
|
+
if recent_folder and Path(recent_folder).exists():
|
|
18
|
+
choices.append(
|
|
19
|
+
f"Recent Folder ({Path(recent_folder).name})"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
choices.extend([
|
|
23
|
+
"Current Folder",
|
|
24
|
+
"Choose Folder",
|
|
25
|
+
])
|
|
26
|
+
|
|
27
|
+
mode = questionary.select(
|
|
28
|
+
"How would you like to select files?",
|
|
29
|
+
choices=choices,
|
|
30
|
+
).ask()
|
|
31
|
+
|
|
32
|
+
if mode and mode.startswith("Recent Folder"):
|
|
33
|
+
return Path(recent_folder)
|
|
34
|
+
|
|
35
|
+
if mode == "Current Folder":
|
|
36
|
+
return Path(".")
|
|
37
|
+
|
|
38
|
+
folder_path = questionary.path(
|
|
39
|
+
"Folder containing PDFs:"
|
|
40
|
+
).ask()
|
|
41
|
+
|
|
42
|
+
if not folder_path:
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
folder = Path(folder_path)
|
|
46
|
+
|
|
47
|
+
save_recent_folder(str(folder))
|
|
48
|
+
|
|
49
|
+
return folder
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def select_multiple_pdfs():
|
|
53
|
+
folder = select_folder()
|
|
54
|
+
|
|
55
|
+
if not folder:
|
|
56
|
+
return None, None
|
|
57
|
+
|
|
58
|
+
pdfs = sorted(folder.glob("*.pdf"))
|
|
59
|
+
|
|
60
|
+
if not pdfs:
|
|
61
|
+
console.print(
|
|
62
|
+
"[red]No PDF files found.[/red]"
|
|
63
|
+
)
|
|
64
|
+
return None, None
|
|
65
|
+
|
|
66
|
+
selected = questionary.checkbox(
|
|
67
|
+
"Select PDFs",
|
|
68
|
+
choices=[pdf.name for pdf in pdfs],
|
|
69
|
+
).ask()
|
|
70
|
+
|
|
71
|
+
if not selected:
|
|
72
|
+
return None, None
|
|
73
|
+
|
|
74
|
+
return folder, selected
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def select_single_pdf():
|
|
78
|
+
folder = select_folder()
|
|
79
|
+
|
|
80
|
+
if not folder:
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
pdfs = sorted(folder.glob("*.pdf"))
|
|
84
|
+
|
|
85
|
+
if not pdfs:
|
|
86
|
+
console.print(
|
|
87
|
+
"[red]No PDF files found.[/red]"
|
|
88
|
+
)
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
selected = questionary.select(
|
|
92
|
+
"Select PDF",
|
|
93
|
+
choices=[pdf.name for pdf in pdfs],
|
|
94
|
+
).ask()
|
|
95
|
+
|
|
96
|
+
if not selected:
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
return folder / selected
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def success_screen(
|
|
103
|
+
title,
|
|
104
|
+
output_file=None,
|
|
105
|
+
extra_lines=None,
|
|
106
|
+
):
|
|
107
|
+
body = f"✓ {title}\n"
|
|
108
|
+
|
|
109
|
+
if output_file:
|
|
110
|
+
body += f"\nOutput File : {output_file}"
|
|
111
|
+
|
|
112
|
+
if extra_lines:
|
|
113
|
+
for line in extra_lines:
|
|
114
|
+
body += f"\n{line}"
|
|
115
|
+
|
|
116
|
+
console.print()
|
|
117
|
+
|
|
118
|
+
console.print(
|
|
119
|
+
Panel(
|
|
120
|
+
body,
|
|
121
|
+
title="Success",
|
|
122
|
+
border_style="green",
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
console.print()
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def get_output_name(default_name):
|
|
130
|
+
return questionary.text(
|
|
131
|
+
"Output file:",
|
|
132
|
+
default=default_name,
|
|
133
|
+
).ask()
|