dforge-cli 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dforge/setup.py ADDED
@@ -0,0 +1,107 @@
1
+ import subprocess
2
+ from shutil import which
3
+
4
+ from rich.console import Console
5
+
6
+ from dforge.config_manager import set_tool_path
7
+ from pathlib import Path
8
+
9
+ console = Console()
10
+
11
+
12
+ def setup_dependencies():
13
+
14
+ packages = [
15
+ ("Poppler", "oschwartz10612.Poppler"),
16
+ ("Pandoc", "JohnMacFarlane.Pandoc"),
17
+ ("MiKTeX", "MiKTeX.MiKTeX"),
18
+ ]
19
+
20
+ for name, package_id in packages:
21
+
22
+ console.print(
23
+ f"[cyan]Installing {name}...[/cyan]"
24
+ )
25
+
26
+ subprocess.run(
27
+ [
28
+ "winget",
29
+ "install",
30
+ "--id",
31
+ package_id,
32
+ "-e",
33
+ ]
34
+ )
35
+
36
+ # Save discovered tools
37
+
38
+ pdfinfo = find_pdfinfo()
39
+ pandoc = find_pandoc()
40
+ tesseract = which("tesseract")
41
+ xelatex = find_xelatex()
42
+ ghostscript = (
43
+ which("gswin64c")
44
+ or which("gswin32c")
45
+ or which("gs")
46
+ )
47
+
48
+ if pdfinfo:
49
+ set_tool_path("poppler", pdfinfo)
50
+
51
+ if pandoc:
52
+ set_tool_path("pandoc", pandoc)
53
+ if xelatex:
54
+ set_tool_path("xelatex", xelatex)
55
+ if tesseract:
56
+ set_tool_path("tesseract", tesseract)
57
+
58
+ if ghostscript:
59
+ set_tool_path("ghostscript", ghostscript)
60
+
61
+ console.print(
62
+ "\n[bold green]Setup complete.[/bold green]"
63
+ )
64
+ def find_pdfinfo():
65
+ roots = [
66
+ Path.home() / "AppData/Local/Microsoft/WinGet/Packages",
67
+ Path("C:/Program Files"),
68
+ ]
69
+
70
+ for root in roots:
71
+ if root.exists():
72
+ files = list(root.rglob("pdfinfo.exe"))
73
+ if files:
74
+ return str(files[0])
75
+
76
+ return None
77
+
78
+ def find_xelatex():
79
+ roots = [
80
+ Path.home() / "AppData/Local/Programs/MiKTeX",
81
+ Path.home() / "AppData/Local",
82
+ Path.home() / "AppData/Local/Microsoft/WinGet/Packages",
83
+ Path("C:/Program Files"),
84
+ ]
85
+
86
+ for root in roots:
87
+ if root.exists():
88
+ files = list(root.rglob("xelatex.exe"))
89
+ if files:
90
+ return str(files[0])
91
+
92
+ return None
93
+
94
+ def find_pandoc():
95
+ roots = [
96
+ Path.home() / "AppData/Local/Pandoc",
97
+ Path("C:/Program Files"),
98
+ Path.home() / "AppData/Local/Microsoft/WinGet/Packages"
99
+ ]
100
+
101
+ for root in roots:
102
+ if root.exists():
103
+ files = list(root.rglob("pandoc.exe"))
104
+ if files:
105
+ return str(files[0])
106
+
107
+ return None
dforge/theme.py ADDED
@@ -0,0 +1,12 @@
1
+ from rich.theme import Theme
2
+
3
+ DFORGE_THEME = Theme(
4
+ {
5
+ "title": "bold cyan",
6
+ "success": "bold green",
7
+ "warning": "bold yellow",
8
+ "error": "bold red",
9
+ "menu": "bold white",
10
+ "accent": "cyan",
11
+ }
12
+ )
dforge/utils.py ADDED
@@ -0,0 +1,169 @@
1
+ """
2
+ DForge Utilities - Shared helpers used across all modules.
3
+ """
4
+
5
+ from __future__ import annotations
6
+ import json
7
+ import shutil
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import List, Optional
11
+
12
+ from rich.console import Console
13
+ from rich.panel import Panel
14
+ from rich.text import Text
15
+
16
+ console = Console()
17
+
18
+
19
+
20
+
21
+ CONFIG_FILE = Path.cwd() / ".dforge.json"
22
+
23
+
24
+ def save_recent_folder(folder: str):
25
+ try:
26
+ data = {"recent_folder": folder}
27
+
28
+ CONFIG_FILE.write_text(
29
+ json.dumps(data, indent=4)
30
+ )
31
+
32
+
33
+ except Exception as e:
34
+ print("ERROR:", e)
35
+
36
+
37
+ def load_recent_folder():
38
+ if not CONFIG_FILE.exists():
39
+ return None
40
+
41
+ try:
42
+ return json.loads(
43
+ CONFIG_FILE.read_text()
44
+ ).get("recent_folder")
45
+ except Exception:
46
+ return None
47
+ # ---------------------------------------------------------------------------
48
+ # Output path helpers
49
+ # ---------------------------------------------------------------------------
50
+
51
+ def resolve_output(
52
+ input_path: Path,
53
+ output: Optional[str],
54
+ suffix: str,
55
+ ext: Optional[str] = None,
56
+ ) -> Path:
57
+ """
58
+ Resolve where to write the output file.
59
+
60
+ If `output` is given -> use it.
61
+ Otherwise derive a name from the input path + suffix + optional new extension.
62
+
63
+ Example:
64
+ resolve_output(Path("doc.pdf"), None, "_merged", ".pdf")
65
+ -> Path("doc_merged.pdf")
66
+ """
67
+ if output:
68
+ return Path(output)
69
+ src = Path(input_path)
70
+ new_ext = ext if ext is not None else src.suffix
71
+ return src.with_name(src.stem + suffix + new_ext)
72
+
73
+
74
+ def ensure_parent(path: Path) -> None:
75
+ """Create parent directories for path if they don't exist."""
76
+ path.parent.mkdir(parents=True, exist_ok=True)
77
+
78
+
79
+ # ---------------------------------------------------------------------------
80
+ # Dependency checks
81
+ # ---------------------------------------------------------------------------
82
+
83
+ def require_tool(tool: str, install_hint: str = "") -> None:
84
+ """Abort with a helpful message if an external tool is not on PATH."""
85
+ if shutil.which(tool) is None:
86
+ msg = f"[bold red]Missing dependency:[/bold red] '{tool}' was not found on PATH."
87
+ if install_hint:
88
+ msg += f"\n[dim]{install_hint}[/dim]"
89
+ console.print(Panel(msg, title="[red]Dependency Error[/red]", border_style="red"))
90
+ sys.exit(1)
91
+
92
+
93
+ def require_tesseract() -> None:
94
+ require_tool(
95
+ "tesseract",
96
+ "Install Tesseract: https://tesseract-ocr.github.io/tessdoc/Installation.html",
97
+ )
98
+
99
+
100
+ def require_ghostscript() -> None:
101
+ for candidate in ("gs", "gswin64c", "gswin32c"):
102
+ if shutil.which(candidate):
103
+ return
104
+ console.print(
105
+ Panel(
106
+ "[bold red]Missing dependency:[/bold red] 'Ghostscript' was not found on PATH.\n"
107
+ "[dim]Install from https://ghostscript.com/releases/gsdnld.html[/dim]",
108
+ title="[red]Dependency Error[/red]",
109
+ border_style="red",
110
+ )
111
+ )
112
+ sys.exit(1)
113
+
114
+
115
+ def require_pandoc() -> None:
116
+ require_tool(
117
+ "pandoc",
118
+ "Install Pandoc: https://pandoc.org/installing.html",
119
+ )
120
+
121
+
122
+ def ghostscript_bin() -> str:
123
+ """Return the first available Ghostscript binary name."""
124
+ for candidate in ("gs", "gswin64c", "gswin32c"):
125
+ if shutil.which(candidate):
126
+ return candidate
127
+ return "gs" # fallback (will fail gracefully)
128
+
129
+
130
+ # ---------------------------------------------------------------------------
131
+ # Pretty printing helpers
132
+ # ---------------------------------------------------------------------------
133
+
134
+ def success(msg: str) -> None:
135
+ console.print(f"[bold green]OK[/bold green] {msg}")
136
+
137
+
138
+ def info(msg: str) -> None:
139
+ console.print(f"[bold cyan]INFO[/bold cyan] {msg}")
140
+
141
+
142
+ def warn(msg: str) -> None:
143
+ console.print(f"[bold yellow]WARN[/bold yellow] {msg}")
144
+
145
+
146
+ def error(msg: str) -> None:
147
+ console.print(f"[bold red]ERROR[/bold red] {msg}")
148
+
149
+
150
+ def abort(msg: str) -> None:
151
+ error(msg)
152
+ sys.exit(1)
153
+
154
+
155
+ # ---------------------------------------------------------------------------
156
+ # File collection helpers
157
+ # ---------------------------------------------------------------------------
158
+
159
+ def collect_files(
160
+ directory: Path,
161
+ extensions: set[str],
162
+ recursive: bool = True,
163
+ ) -> List[Path]:
164
+ """Collect all files with the given extensions from a directory."""
165
+ pattern = "**/*" if recursive else "*"
166
+ files = []
167
+ for ext in extensions:
168
+ files.extend(directory.glob(f"{pattern}{ext}"))
169
+ return sorted(set(files))
dforge/watcher.py ADDED
@@ -0,0 +1,137 @@
1
+ """
2
+ DForge Watch Mode
3
+ Monitors a directory for new files and automatically processes them.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import time
9
+ from pathlib import Path
10
+
11
+ from dforge.utils import abort, console, info, success, warn
12
+ from dforge.config import (
13
+ DEFAULT_OCR_LANG,
14
+ SUPPORTED_IMAGE_EXTS,
15
+ SUPPORTED_PDF_EXTS,
16
+ WATCH_DEBOUNCE_SECONDS,
17
+ )
18
+
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Event handler
22
+ # ---------------------------------------------------------------------------
23
+
24
+ class _DForgeHandler:
25
+ """Handles file-system events and dispatches to the correct action."""
26
+
27
+ def __init__(self, action: str, lang: str, fmt: str):
28
+ self.action = action
29
+ self.lang = lang
30
+ self.fmt = fmt
31
+ self._seen: set = set()
32
+
33
+ def dispatch(self, path: Path) -> None:
34
+ if path in self._seen:
35
+ return
36
+ self._seen.add(path)
37
+
38
+ # Debounce: wait for the file to finish writing
39
+ time.sleep(WATCH_DEBOUNCE_SECONDS)
40
+ if not path.exists():
41
+ return
42
+
43
+ ext = path.suffix.lower()
44
+ console.print(f"\n[bold cyan]-> Detected:[/bold cyan] {path.name}")
45
+
46
+ try:
47
+ if self.action == "ocr":
48
+ if ext == ".pdf":
49
+ from dforge.ocr.engine import ocr_pdf
50
+ ocr_pdf(path, lang=self.lang, fmt=self.fmt)
51
+ elif ext in SUPPORTED_IMAGE_EXTS:
52
+ from dforge.ocr.engine import ocr_image
53
+ ocr_image(path, lang=self.lang, fmt=self.fmt)
54
+ else:
55
+ warn(f"Skipped (unsupported for OCR): {path.name}")
56
+
57
+ elif self.action == "searchable":
58
+ if ext == ".pdf":
59
+ from dforge.ocr.engine import make_searchable_pdf
60
+ make_searchable_pdf(path, lang=self.lang)
61
+ else:
62
+ warn(f"Skipped (not a PDF): {path.name}")
63
+
64
+ elif self.action == "compress":
65
+ if ext == ".pdf":
66
+ from dforge.pdf.operations import compress
67
+ compress(path)
68
+ else:
69
+ warn(f"Skipped (not a PDF): {path.name}")
70
+
71
+ elif self.action == "preprocess":
72
+ if ext in SUPPORTED_IMAGE_EXTS:
73
+ from dforge.image.processor import preprocess_for_ocr
74
+ preprocess_for_ocr(path)
75
+ else:
76
+ warn(f"Skipped (not an image): {path.name}")
77
+
78
+ else:
79
+ warn(f"Unknown watch action: {self.action}")
80
+
81
+ except Exception as exc:
82
+ console.print(f"[red]Error processing {path.name}:[/red] {exc}")
83
+
84
+
85
+ # ---------------------------------------------------------------------------
86
+ # Watch entry point
87
+ # ---------------------------------------------------------------------------
88
+
89
+ def watch(
90
+ directory: Path,
91
+ action: str = "ocr",
92
+ lang: str = DEFAULT_OCR_LANG,
93
+ fmt: str = "txt",
94
+ ) -> None:
95
+ """
96
+ Monitor a directory and process new files automatically.
97
+
98
+ action: ocr | searchable | compress | preprocess
99
+ """
100
+ try:
101
+ from watchdog.observers import Observer
102
+ from watchdog.events import FileSystemEventHandler
103
+ except ImportError:
104
+ abort("watchdog is required. Run: pip install watchdog")
105
+
106
+ if not directory.exists():
107
+ abort(f"Directory not found: {directory}")
108
+
109
+ handler_state = _DForgeHandler(action=action, lang=lang, fmt=fmt)
110
+
111
+ class _WatchdogBridge(FileSystemEventHandler):
112
+ def on_created(self, event):
113
+ if not event.is_directory:
114
+ handler_state.dispatch(Path(event.src_path))
115
+
116
+ def on_moved(self, event):
117
+ if not event.is_directory:
118
+ handler_state.dispatch(Path(event.dest_path))
119
+
120
+ observer = Observer()
121
+ observer.schedule(_WatchdogBridge(), str(directory), recursive=True)
122
+ observer.start()
123
+
124
+ console.print(
125
+ f"\n[bold green]Watching[/bold green] [bold]{directory}[/bold] "
126
+ f"for new files (action: [cyan]{action}[/cyan])\n"
127
+ "[dim]Press Ctrl+C to stop.[/dim]\n"
128
+ )
129
+
130
+ try:
131
+ while True:
132
+ time.sleep(1)
133
+ except KeyboardInterrupt:
134
+ observer.stop()
135
+ info("Watch mode stopped.")
136
+
137
+ observer.join()
File without changes
@@ -0,0 +1,21 @@
1
+ from dforge.menu import automation_menu
2
+
3
+
4
+ def automation_workflow():
5
+ while True:
6
+ choice = automation_menu()
7
+
8
+ if choice == "Watch Folder":
9
+ pass
10
+
11
+ elif choice == "Auto OCR":
12
+ pass
13
+
14
+ elif choice == "Auto Convert":
15
+ pass
16
+
17
+ elif choice == "Scheduled Tasks":
18
+ pass
19
+
20
+ elif choice == "⬅ Back":
21
+ break
@@ -0,0 +1,18 @@
1
+ from dforge.menu import batch_menu
2
+
3
+
4
+ def batch_workflow():
5
+ while True:
6
+ choice = batch_menu()
7
+
8
+ if choice == "Batch Convert":
9
+ pass
10
+
11
+ elif choice == "Batch Compress":
12
+ pass
13
+
14
+ elif choice == "Batch OCR":
15
+ pass
16
+
17
+ elif choice == "⬅ Back":
18
+ break
@@ -0,0 +1,61 @@
1
+ from pathlib import Path
2
+
3
+ import questionary
4
+ from rich.console import Console
5
+ from dforge.loading import Loader
6
+ from dforge.batch import batch_with_ocr
7
+
8
+ from dforge.workflows.common import (
9
+ select_folder,
10
+ success_screen,
11
+ )
12
+
13
+ console = Console()
14
+
15
+
16
+ def batch_ocr_workflow():
17
+ console.print(
18
+ "\n[bold cyan]Batch OCR[/bold cyan]\n"
19
+ )
20
+
21
+ folder = select_folder()
22
+
23
+ if not folder:
24
+ return
25
+
26
+ lang = questionary.text(
27
+ "OCR Language(s)",
28
+ default="eng",
29
+ ).ask()
30
+
31
+ fmt = questionary.select(
32
+ "Output Format",
33
+ choices=[
34
+ "txt",
35
+ "json",
36
+ "md",
37
+ ],
38
+ ).ask()
39
+
40
+ workers = int(
41
+ questionary.text(
42
+ "Workers",
43
+ default="4",
44
+ ).ask()
45
+ )
46
+ with Loader("Processing batch OCR..."):
47
+ batch_with_ocr(
48
+ Path(folder),
49
+ lang,
50
+ fmt,
51
+ True,
52
+ workers,
53
+ )
54
+
55
+ success_screen(
56
+ "Batch OCR Complete",
57
+ extra_lines=[
58
+ f"Folder : {folder}",
59
+ f"Workers : {workers}",
60
+ ],
61
+ )
@@ -0,0 +1,133 @@
1
+ from pathlib import Path
2
+
3
+ import questionary
4
+ from rich.console import Console
5
+ from rich.panel import Panel
6
+
7
+ from dforge.utils import save_recent_folder, load_recent_folder
8
+
9
+ console = Console()
10
+
11
+
12
+ def select_folder():
13
+ recent_folder = load_recent_folder()
14
+
15
+ choices = []
16
+
17
+ if recent_folder and Path(recent_folder).exists():
18
+ choices.append(
19
+ f"Recent Folder ({Path(recent_folder).name})"
20
+ )
21
+
22
+ choices.extend([
23
+ "Current Folder",
24
+ "Choose Folder",
25
+ ])
26
+
27
+ mode = questionary.select(
28
+ "How would you like to select files?",
29
+ choices=choices,
30
+ ).ask()
31
+
32
+ if mode and mode.startswith("Recent Folder"):
33
+ return Path(recent_folder)
34
+
35
+ if mode == "Current Folder":
36
+ return Path(".")
37
+
38
+ folder_path = questionary.path(
39
+ "Folder containing PDFs:"
40
+ ).ask()
41
+
42
+ if not folder_path:
43
+ return None
44
+
45
+ folder = Path(folder_path)
46
+
47
+ save_recent_folder(str(folder))
48
+
49
+ return folder
50
+
51
+
52
+ def select_multiple_pdfs():
53
+ folder = select_folder()
54
+
55
+ if not folder:
56
+ return None, None
57
+
58
+ pdfs = sorted(folder.glob("*.pdf"))
59
+
60
+ if not pdfs:
61
+ console.print(
62
+ "[red]No PDF files found.[/red]"
63
+ )
64
+ return None, None
65
+
66
+ selected = questionary.checkbox(
67
+ "Select PDFs",
68
+ choices=[pdf.name for pdf in pdfs],
69
+ ).ask()
70
+
71
+ if not selected:
72
+ return None, None
73
+
74
+ return folder, selected
75
+
76
+
77
+ def select_single_pdf():
78
+ folder = select_folder()
79
+
80
+ if not folder:
81
+ return None
82
+
83
+ pdfs = sorted(folder.glob("*.pdf"))
84
+
85
+ if not pdfs:
86
+ console.print(
87
+ "[red]No PDF files found.[/red]"
88
+ )
89
+ return None
90
+
91
+ selected = questionary.select(
92
+ "Select PDF",
93
+ choices=[pdf.name for pdf in pdfs],
94
+ ).ask()
95
+
96
+ if not selected:
97
+ return None
98
+
99
+ return folder / selected
100
+
101
+
102
+ def success_screen(
103
+ title,
104
+ output_file=None,
105
+ extra_lines=None,
106
+ ):
107
+ body = f"✓ {title}\n"
108
+
109
+ if output_file:
110
+ body += f"\nOutput File : {output_file}"
111
+
112
+ if extra_lines:
113
+ for line in extra_lines:
114
+ body += f"\n{line}"
115
+
116
+ console.print()
117
+
118
+ console.print(
119
+ Panel(
120
+ body,
121
+ title="Success",
122
+ border_style="green",
123
+ )
124
+ )
125
+
126
+ console.print()
127
+
128
+
129
+ def get_output_name(default_name):
130
+ return questionary.text(
131
+ "Output file:",
132
+ default=default_name,
133
+ ).ask()