dforge-cli 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dforge/__init__.py +1 -0
- dforge/banner.py +31 -0
- dforge/batch.py +156 -0
- dforge/cli.py +525 -0
- dforge/config.py +38 -0
- dforge/config_manager.py +33 -0
- dforge/converter.py +167 -0
- dforge/dependencies.py +98 -0
- dforge/engine.py +236 -0
- dforge/extractor.py +201 -0
- dforge/loading.py +19 -0
- dforge/menu.py +115 -0
- dforge/operations.py +314 -0
- dforge/processor.py +251 -0
- dforge/setup.py +107 -0
- dforge/theme.py +12 -0
- dforge/utils.py +169 -0
- dforge/watcher.py +137 -0
- dforge/workflows/__init__.py +0 -0
- dforge/workflows/automation.py +21 -0
- dforge/workflows/batch.py +18 -0
- dforge/workflows/batch_ocr.py +61 -0
- dforge/workflows/common.py +133 -0
- dforge/workflows/compress.py +73 -0
- dforge/workflows/convert.py +148 -0
- dforge/workflows/decrypt.py +50 -0
- dforge/workflows/encrypt.py +50 -0
- dforge/workflows/extract.py +18 -0
- dforge/workflows/image.py +21 -0
- dforge/workflows/merge.py +109 -0
- dforge/workflows/ocr.py +104 -0
- dforge/workflows/ocr_folder.py +0 -0
- dforge/workflows/pages.py +57 -0
- dforge/workflows/rotate.py +53 -0
- dforge/workflows/searchable.py +51 -0
- dforge/workflows/settings.py +56 -0
- dforge/workflows/split.py +32 -0
- dforge/workflows/tables.py +45 -0
- dforge/workflows/watermark.py +54 -0
- dforge_cli-1.0.1.dist-info/METADATA +244 -0
- dforge_cli-1.0.1.dist-info/RECORD +44 -0
- dforge_cli-1.0.1.dist-info/WHEEL +5 -0
- dforge_cli-1.0.1.dist-info/entry_points.txt +2 -0
- dforge_cli-1.0.1.dist-info/top_level.txt +1 -0
dforge/cli.py
ADDED
|
@@ -0,0 +1,525 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DForge CLI - Forge your documents from your terminal.
|
|
3
|
+
|
|
4
|
+
Entry point for all commands.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List, Optional
|
|
11
|
+
|
|
12
|
+
import typer
|
|
13
|
+
from rich.console import Console
|
|
14
|
+
from dforge.theme import DFORGE_THEME
|
|
15
|
+
from rich.panel import Panel
|
|
16
|
+
from rich.text import Text
|
|
17
|
+
from dforge.setup import setup_dependencies
|
|
18
|
+
|
|
19
|
+
from dforge import __version__
|
|
20
|
+
from dforge.config import DEFAULT_OCR_LANG, DEFAULT_COMPRESS_PRESET, DEFAULT_BATCH_WORKERS
|
|
21
|
+
from dforge.dependencies import doctor as run_doctor
|
|
22
|
+
from dforge.banner import show_banner
|
|
23
|
+
from dforge.menu import main_menu, pdf_menu ,ocr_menu
|
|
24
|
+
|
|
25
|
+
from dforge.workflows.merge import merge_workflow
|
|
26
|
+
from dforge.workflows.compress import compress_workflow
|
|
27
|
+
from dforge.workflows.split import split_workflow
|
|
28
|
+
from dforge.workflows.rotate import rotate_workflow
|
|
29
|
+
from dforge.workflows.pages import pages_workflow
|
|
30
|
+
from dforge.workflows.watermark import watermark_workflow
|
|
31
|
+
from dforge.workflows.encrypt import encrypt_workflow
|
|
32
|
+
from dforge.workflows.decrypt import decrypt_workflow
|
|
33
|
+
from dforge.workflows.ocr import ocr_workflow
|
|
34
|
+
from dforge.workflows.searchable import searchable_workflow
|
|
35
|
+
from dforge.workflows.batch_ocr import batch_ocr_workflow
|
|
36
|
+
from dforge.workflows.tables import tables_workflow
|
|
37
|
+
from dforge.workflows.settings import settings_workflow
|
|
38
|
+
|
|
39
|
+
from dforge.workflows.extract import extract_workflow
|
|
40
|
+
from dforge.workflows.batch import batch_workflow
|
|
41
|
+
from dforge.workflows.automation import automation_workflow
|
|
42
|
+
from dforge.workflows.image import image_workflow
|
|
43
|
+
from dforge.workflows.convert import conversion_workflow
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
app = typer.Typer(
|
|
48
|
+
name="dforge",
|
|
49
|
+
help="DForge - Unified Document Processing CLI. Forge your documents from your terminal.",
|
|
50
|
+
add_completion=True,
|
|
51
|
+
rich_markup_mode="rich",
|
|
52
|
+
no_args_is_help=False,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
console = Console(theme=DFORGE_THEME)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _version_callback(value: bool):
|
|
59
|
+
if value:
|
|
60
|
+
console.print(
|
|
61
|
+
Panel(
|
|
62
|
+
Text(f"DForge v{__version__}", justify="center", style="bold green"),
|
|
63
|
+
subtitle="[dim]Forge your documents from your terminal[/dim]",
|
|
64
|
+
border_style="green",
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
raise typer.Exit()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@app.callback(invoke_without_command=True)
|
|
71
|
+
def main(
|
|
72
|
+
ctx: typer.Context,
|
|
73
|
+
version: Optional[bool] = typer.Option(
|
|
74
|
+
None,
|
|
75
|
+
"--version",
|
|
76
|
+
"-v",
|
|
77
|
+
callback=_version_callback,
|
|
78
|
+
is_eager=True,
|
|
79
|
+
help="Show DForge version.",
|
|
80
|
+
),
|
|
81
|
+
):
|
|
82
|
+
if ctx.invoked_subcommand:
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
show_banner()
|
|
86
|
+
|
|
87
|
+
while True:
|
|
88
|
+
choice = main_menu()
|
|
89
|
+
|
|
90
|
+
if choice == "❌ Exit":
|
|
91
|
+
raise typer.Exit()
|
|
92
|
+
|
|
93
|
+
if choice == "📄 PDF Tools":
|
|
94
|
+
|
|
95
|
+
while True:
|
|
96
|
+
pdf_choice = pdf_menu()
|
|
97
|
+
|
|
98
|
+
if pdf_choice == "⬅ Back":
|
|
99
|
+
break
|
|
100
|
+
|
|
101
|
+
if pdf_choice == "Merge PDFs":
|
|
102
|
+
merge_workflow()
|
|
103
|
+
elif pdf_choice == "Compress PDF":
|
|
104
|
+
compress_workflow()
|
|
105
|
+
elif pdf_choice == "Split PDF":
|
|
106
|
+
split_workflow()
|
|
107
|
+
elif pdf_choice == "Rotate PDF":
|
|
108
|
+
rotate_workflow()
|
|
109
|
+
elif pdf_choice == "Extract Pages":
|
|
110
|
+
pages_workflow()
|
|
111
|
+
elif pdf_choice == "Watermark PDF":
|
|
112
|
+
watermark_workflow()
|
|
113
|
+
|
|
114
|
+
elif pdf_choice == "Encrypt PDF":
|
|
115
|
+
encrypt_workflow()
|
|
116
|
+
|
|
117
|
+
elif pdf_choice == "Decrypt PDF":
|
|
118
|
+
decrypt_workflow()
|
|
119
|
+
|
|
120
|
+
else:
|
|
121
|
+
console.print(
|
|
122
|
+
f"[yellow]{pdf_choice} workflow not implemented yet[/yellow]"
|
|
123
|
+
)
|
|
124
|
+
elif choice == "🔍 OCR":
|
|
125
|
+
|
|
126
|
+
while True:
|
|
127
|
+
|
|
128
|
+
ocr_choice = ocr_menu()
|
|
129
|
+
|
|
130
|
+
if ocr_choice == "⬅ Back":
|
|
131
|
+
break
|
|
132
|
+
|
|
133
|
+
elif ocr_choice == "OCR Image/PDF":
|
|
134
|
+
ocr_workflow()
|
|
135
|
+
|
|
136
|
+
elif ocr_choice == "Searchable PDF":
|
|
137
|
+
searchable_workflow()
|
|
138
|
+
|
|
139
|
+
elif ocr_choice == "Batch OCR":
|
|
140
|
+
batch_ocr_workflow()
|
|
141
|
+
|
|
142
|
+
elif ocr_choice == "Extract Tables":
|
|
143
|
+
tables_workflow()
|
|
144
|
+
|
|
145
|
+
elif ocr_choice == "OCR Settings":
|
|
146
|
+
settings_workflow()
|
|
147
|
+
else:
|
|
148
|
+
console.print(
|
|
149
|
+
f"[yellow]{choice} not implemented yet[/yellow]"
|
|
150
|
+
)
|
|
151
|
+
elif choice == "🔄 Conversion":
|
|
152
|
+
conversion_workflow()
|
|
153
|
+
|
|
154
|
+
elif choice == "📂 Extract":
|
|
155
|
+
extract_workflow()
|
|
156
|
+
|
|
157
|
+
elif choice == "⚡ Batch Processing":
|
|
158
|
+
batch_workflow()
|
|
159
|
+
|
|
160
|
+
elif choice == "👀 Watch Folder":
|
|
161
|
+
automation_workflow()
|
|
162
|
+
|
|
163
|
+
elif choice == "🖼 Image Processing":
|
|
164
|
+
image_workflow()
|
|
165
|
+
|
|
166
|
+
# ===========================================================================
|
|
167
|
+
# HELPER Commands
|
|
168
|
+
# ===========================================================================
|
|
169
|
+
|
|
170
|
+
@app.command("doctor")
|
|
171
|
+
def cmd_doctor():
|
|
172
|
+
"""Check external dependencies."""
|
|
173
|
+
run_doctor()
|
|
174
|
+
|
|
175
|
+
@app.command("setup")
|
|
176
|
+
def cmd_setup():
|
|
177
|
+
"""Install external dependencies."""
|
|
178
|
+
setup_dependencies()
|
|
179
|
+
# ===========================================================================
|
|
180
|
+
# PDF Commands
|
|
181
|
+
# ===========================================================================
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@app.command("merge")
|
|
185
|
+
def cmd_merge(
|
|
186
|
+
inputs: List[Path] = typer.Argument(..., help="PDF files to merge (at least 2)."),
|
|
187
|
+
output: Optional[str] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
188
|
+
):
|
|
189
|
+
"""[bold]Merge[/bold] multiple PDF files into one."""
|
|
190
|
+
if len(inputs) < 2:
|
|
191
|
+
typer.echo("Error: Provide at least 2 PDF files to merge.", err=True)
|
|
192
|
+
raise typer.Exit(1)
|
|
193
|
+
from dforge.operations import merge
|
|
194
|
+
out_path = Path(output) if output else Path(inputs[0]).with_name("merged.pdf")
|
|
195
|
+
merge(inputs, out_path)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@app.command("split")
|
|
199
|
+
def cmd_split(
|
|
200
|
+
input_file: Path = typer.Argument(..., help="PDF file to split."),
|
|
201
|
+
output_dir: Optional[Path] = typer.Option(None, "-o", "--output-dir", help="Directory for output pages."),
|
|
202
|
+
):
|
|
203
|
+
"""[bold]Split[/bold] a PDF into individual page files."""
|
|
204
|
+
from dforge.operations import split
|
|
205
|
+
split(input_file, output_dir)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
@app.command("compress")
|
|
209
|
+
def cmd_compress(
|
|
210
|
+
input_file: Path = typer.Argument(..., help="PDF file to compress."),
|
|
211
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
212
|
+
preset: str = typer.Option(DEFAULT_COMPRESS_PRESET, "--preset", help="Ghostscript preset: screen|ebook|printer|prepress|default."),
|
|
213
|
+
):
|
|
214
|
+
"""[bold]Compress[/bold] a PDF using Ghostscript."""
|
|
215
|
+
from dforge.operations import compress
|
|
216
|
+
compress(input_file, output, preset)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@app.command("rotate")
|
|
220
|
+
def cmd_rotate(
|
|
221
|
+
input_file: Path = typer.Argument(..., help="PDF file to rotate."),
|
|
222
|
+
degrees: int = typer.Argument(..., help="Degrees to rotate: 90, 180, or 270."),
|
|
223
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
224
|
+
):
|
|
225
|
+
"""[bold]Rotate[/bold] all pages of a PDF."""
|
|
226
|
+
from dforge.operations import rotate
|
|
227
|
+
rotate(input_file, degrees, output)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
@app.command("pages")
|
|
231
|
+
def cmd_pages(
|
|
232
|
+
input_file: Path = typer.Argument(..., help="PDF file."),
|
|
233
|
+
page_range: str = typer.Argument(..., help='Page range, e.g. "1-5", "3", or "1,3,5".'),
|
|
234
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
235
|
+
):
|
|
236
|
+
"""[bold]Extract[/bold] a range of pages from a PDF."""
|
|
237
|
+
from dforge.operations import extract_pages
|
|
238
|
+
extract_pages(input_file, page_range, output)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
@app.command("watermark")
|
|
242
|
+
def cmd_watermark(
|
|
243
|
+
input_file: Path = typer.Argument(..., help="PDF file to watermark."),
|
|
244
|
+
watermark_file: Path = typer.Argument(..., help="Watermark file (PDF or image)."),
|
|
245
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
246
|
+
):
|
|
247
|
+
"""[bold]Watermark[/bold] a PDF with an image or PDF overlay."""
|
|
248
|
+
from dforge.operations import watermark
|
|
249
|
+
watermark(input_file, watermark_file, output)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
@app.command("encrypt")
|
|
253
|
+
def cmd_encrypt(
|
|
254
|
+
input_file: Path = typer.Argument(..., help="PDF file to encrypt."),
|
|
255
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
256
|
+
password: str = typer.Option(..., prompt=True, hide_input=True, confirmation_prompt=True, help="Password."),
|
|
257
|
+
):
|
|
258
|
+
"""[bold]Encrypt[/bold] a PDF with a password."""
|
|
259
|
+
from dforge.operations import encrypt
|
|
260
|
+
encrypt(input_file, password, output)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
@app.command("decrypt")
|
|
264
|
+
def cmd_decrypt(
|
|
265
|
+
input_file: Path = typer.Argument(..., help="Encrypted PDF file."),
|
|
266
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
267
|
+
password: str = typer.Option(..., prompt=True, hide_input=True, help="Password."),
|
|
268
|
+
):
|
|
269
|
+
"""[bold]Decrypt[/bold] a password-protected PDF."""
|
|
270
|
+
from dforge.operations import decrypt
|
|
271
|
+
decrypt(input_file, password, output)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
# ===========================================================================
|
|
275
|
+
# OCR Commands
|
|
276
|
+
# ===========================================================================
|
|
277
|
+
|
|
278
|
+
@app.command("ocr")
|
|
279
|
+
def cmd_ocr(
|
|
280
|
+
input_file: Path = typer.Argument(..., help="Image or PDF file to run OCR on."),
|
|
281
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
282
|
+
lang: str = typer.Option(DEFAULT_OCR_LANG, "--lang", help='Tesseract language(s), e.g. "eng" or "eng+hin".'),
|
|
283
|
+
fmt: str = typer.Option("txt", "--fmt", help="Output format: txt | json | md."),
|
|
284
|
+
):
|
|
285
|
+
"""[bold]Run OCR[/bold] on an image or PDF file."""
|
|
286
|
+
suffix = input_file.suffix.lower()
|
|
287
|
+
if suffix == ".pdf":
|
|
288
|
+
from dforge.engine import ocr_pdf
|
|
289
|
+
ocr_pdf(input_file, output, lang, fmt)
|
|
290
|
+
else:
|
|
291
|
+
from dforge.engine import ocr_image
|
|
292
|
+
ocr_image(input_file, output, lang, fmt)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
@app.command("searchable")
|
|
296
|
+
def cmd_searchable(
|
|
297
|
+
input_file: Path = typer.Argument(..., help="Scanned PDF to make searchable."),
|
|
298
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
299
|
+
lang: str = typer.Option(DEFAULT_OCR_LANG, "--lang", help="Tesseract language(s)."),
|
|
300
|
+
dpi: int = typer.Option(300, "--dpi", help="DPI for page rendering."),
|
|
301
|
+
):
|
|
302
|
+
"""[bold]Create a searchable PDF[/bold] from a scanned PDF."""
|
|
303
|
+
from dforge.engine import make_searchable_pdf
|
|
304
|
+
make_searchable_pdf(input_file, output, lang, dpi)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
@app.command("batch-ocr")
|
|
308
|
+
def cmd_batch_ocr(
|
|
309
|
+
directory: Path = typer.Argument(..., help="Directory to scan for files."),
|
|
310
|
+
lang: str = typer.Option(DEFAULT_OCR_LANG, "--lang", help="Tesseract language(s)."),
|
|
311
|
+
fmt: str = typer.Option("txt", "--fmt", help="Output format: txt | json | md."),
|
|
312
|
+
workers: int = typer.Option(DEFAULT_BATCH_WORKERS, "--workers", help="Number of parallel workers."),
|
|
313
|
+
no_recursive: bool = typer.Option(False, "--no-recursive", help="Disable recursive directory scan."),
|
|
314
|
+
):
|
|
315
|
+
"""[bold]Batch OCR[/bold] all images and PDFs in a directory."""
|
|
316
|
+
from dforge.batch import batch_with_ocr
|
|
317
|
+
batch_with_ocr(directory, lang, fmt, not no_recursive, workers)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
# ===========================================================================
|
|
321
|
+
# Conversion Commands
|
|
322
|
+
# ===========================================================================
|
|
323
|
+
|
|
324
|
+
@app.command("convert")
|
|
325
|
+
def cmd_convert(
|
|
326
|
+
input_file: Path = typer.Argument(..., help="Input document."),
|
|
327
|
+
target_format: str = typer.Argument(..., help="Target format: pdf | docx | md | html | txt | rst | odt | epub."),
|
|
328
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
329
|
+
):
|
|
330
|
+
"""[bold]Convert[/bold] a document to another format using Pandoc."""
|
|
331
|
+
from dforge.converter import convert
|
|
332
|
+
convert(input_file, target_format, output)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
@app.command("img2pdf")
|
|
336
|
+
def cmd_img2pdf(
|
|
337
|
+
source: Path = typer.Argument(..., help="Image file or directory of images."),
|
|
338
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output PDF path."),
|
|
339
|
+
):
|
|
340
|
+
"""[bold]Combine images[/bold] into a single PDF."""
|
|
341
|
+
from dforge.converter import images_to_pdf
|
|
342
|
+
images_to_pdf(source, output)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
@app.command("pdf2img")
|
|
346
|
+
def cmd_pdf2img(
|
|
347
|
+
input_file: Path = typer.Argument(..., help="PDF file to convert."),
|
|
348
|
+
output_dir: Optional[Path] = typer.Option(None, "-o", "--output-dir", help="Output directory."),
|
|
349
|
+
dpi: int = typer.Option(200, "--dpi", help="Image DPI."),
|
|
350
|
+
fmt: str = typer.Option("png", "--fmt", help="Image format: png | jpeg | tiff."),
|
|
351
|
+
):
|
|
352
|
+
"""[bold]Convert PDF pages[/bold] to image files."""
|
|
353
|
+
from dforge.converter import pdf_to_images
|
|
354
|
+
pdf_to_images(input_file, output_dir, dpi, fmt)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
# ===========================================================================
|
|
358
|
+
# Extraction Commands
|
|
359
|
+
# ===========================================================================
|
|
360
|
+
|
|
361
|
+
@app.command("text")
|
|
362
|
+
def cmd_text(
|
|
363
|
+
input_file: Path = typer.Argument(..., help="PDF file."),
|
|
364
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output .txt file path."),
|
|
365
|
+
):
|
|
366
|
+
"""[bold]Extract text[/bold] from a PDF."""
|
|
367
|
+
from dforge.extractor import extract_text
|
|
368
|
+
extract_text(input_file, output)
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
@app.command("images")
|
|
372
|
+
def cmd_images(
|
|
373
|
+
input_file: Path = typer.Argument(..., help="PDF file."),
|
|
374
|
+
output_dir: Optional[Path] = typer.Option(None, "-o", "--output-dir", help="Output directory."),
|
|
375
|
+
):
|
|
376
|
+
"""[bold]Extract embedded images[/bold] from a PDF."""
|
|
377
|
+
from dforge.extractor import extract_images
|
|
378
|
+
extract_images(input_file, output_dir)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
@app.command("metadata")
|
|
382
|
+
def cmd_metadata(
|
|
383
|
+
input_file: Path = typer.Argument(..., help="PDF file."),
|
|
384
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Save metadata to JSON file."),
|
|
385
|
+
):
|
|
386
|
+
"""[bold]Display metadata[/bold] from a PDF."""
|
|
387
|
+
from dforge.extractor import extract_metadata
|
|
388
|
+
extract_metadata(input_file, output)
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
@app.command("tables")
|
|
392
|
+
def cmd_tables(
|
|
393
|
+
input_file: Path = typer.Argument(..., help="PDF file."),
|
|
394
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file or directory."),
|
|
395
|
+
fmt: str = typer.Option("csv", "--fmt", help="Export format: csv | xlsx | json."),
|
|
396
|
+
):
|
|
397
|
+
"""[bold]Extract tables[/bold] from a PDF."""
|
|
398
|
+
from dforge.extractor import extract_tables
|
|
399
|
+
extract_tables(input_file, output, fmt)
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
# ===========================================================================
|
|
403
|
+
# Image Processing Commands
|
|
404
|
+
# ===========================================================================
|
|
405
|
+
|
|
406
|
+
@app.command("enhance")
|
|
407
|
+
def cmd_enhance(
|
|
408
|
+
input_file: Path = typer.Argument(..., help="Image file to enhance."),
|
|
409
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
410
|
+
):
|
|
411
|
+
"""[bold]Enhance[/bold] image contrast, brightness, and sharpness."""
|
|
412
|
+
from dforge.processor import enhance
|
|
413
|
+
enhance(input_file, output)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
@app.command("deskew")
|
|
417
|
+
def cmd_deskew(
|
|
418
|
+
input_file: Path = typer.Argument(..., help="Image file to deskew."),
|
|
419
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
420
|
+
):
|
|
421
|
+
"""[bold]Correct the skew angle[/bold] of a scanned image."""
|
|
422
|
+
from dforge.processor import deskew
|
|
423
|
+
deskew(input_file, output)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
@app.command("denoise")
|
|
427
|
+
def cmd_denoise(
|
|
428
|
+
input_file: Path = typer.Argument(..., help="Image file to denoise."),
|
|
429
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
430
|
+
):
|
|
431
|
+
"""[bold]Remove noise[/bold] from an image."""
|
|
432
|
+
from dforge.processor import denoise
|
|
433
|
+
denoise(input_file, output)
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
@app.command("resize")
|
|
437
|
+
def cmd_resize(
|
|
438
|
+
input_file: Path = typer.Argument(..., help="Image file to resize."),
|
|
439
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
440
|
+
width: Optional[int] = typer.Option(None, "--width", help="Target width in pixels."),
|
|
441
|
+
height: Optional[int] = typer.Option(None, "--height", help="Target height in pixels."),
|
|
442
|
+
scale: Optional[float] = typer.Option(None, "--scale", help="Scale factor, e.g. 0.5 for 50%."),
|
|
443
|
+
):
|
|
444
|
+
"""[bold]Resize[/bold] an image by width, height, or scale factor."""
|
|
445
|
+
from dforge.processor import resize
|
|
446
|
+
resize(input_file, width, height, scale, output)
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
@app.command("preprocess")
|
|
450
|
+
def cmd_preprocess(
|
|
451
|
+
input_file: Path = typer.Argument(..., help="Image to preprocess for OCR."),
|
|
452
|
+
output: Optional[Path] = typer.Option(None, "-o", "--output", help="Output file path."),
|
|
453
|
+
):
|
|
454
|
+
"""[bold]OCR preprocessing pipeline[/bold]: orientation -> contrast -> denoise -> binarize."""
|
|
455
|
+
from dforge.processor import preprocess_for_ocr
|
|
456
|
+
preprocess_for_ocr(input_file, output)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
# ===========================================================================
|
|
460
|
+
# Batch Processing Command
|
|
461
|
+
# ===========================================================================
|
|
462
|
+
|
|
463
|
+
@app.command("batch")
|
|
464
|
+
def cmd_batch(
|
|
465
|
+
directory: Path = typer.Argument(..., help="Directory to batch process."),
|
|
466
|
+
ocr: bool = typer.Option(False, "--ocr", help="Run OCR on all images/PDFs."),
|
|
467
|
+
compress: bool = typer.Option(False, "--compress", help="Compress all PDFs."),
|
|
468
|
+
convert_to: Optional[str] = typer.Option(None, "--convert", help="Convert all documents to this format."),
|
|
469
|
+
lang: str = typer.Option(DEFAULT_OCR_LANG, "--lang", help="OCR language(s)."),
|
|
470
|
+
fmt: str = typer.Option("txt", "--fmt", help="OCR output format: txt | json | md."),
|
|
471
|
+
workers: int = typer.Option(DEFAULT_BATCH_WORKERS, "--workers", help="Parallel workers."),
|
|
472
|
+
no_recursive: bool = typer.Option(False, "--no-recursive", help="Disable recursive scan."),
|
|
473
|
+
):
|
|
474
|
+
"""[bold]Batch process[/bold] a directory of files."""
|
|
475
|
+
from dforge.batch import batch_with_ocr, batch_compress, batch_convert
|
|
476
|
+
|
|
477
|
+
if ocr:
|
|
478
|
+
batch_with_ocr(directory, lang, fmt, not no_recursive, workers)
|
|
479
|
+
elif compress:
|
|
480
|
+
batch_compress(directory, recursive=not no_recursive, workers=workers)
|
|
481
|
+
elif convert_to:
|
|
482
|
+
batch_convert(directory, convert_to, recursive=not no_recursive, workers=workers)
|
|
483
|
+
else:
|
|
484
|
+
console.print("[yellow]Specify an action: --ocr, --compress, or --convert <format>[/yellow]")
|
|
485
|
+
raise typer.Exit(1)
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
# ===========================================================================
|
|
489
|
+
# Watch Command
|
|
490
|
+
# ===========================================================================
|
|
491
|
+
|
|
492
|
+
@app.command("watch")
|
|
493
|
+
def cmd_watch(
|
|
494
|
+
directory: Path = typer.Argument(..., help="Directory to monitor."),
|
|
495
|
+
ocr: bool = typer.Option(False, "--ocr", help="Run OCR on new files."),
|
|
496
|
+
searchable: bool = typer.Option(False, "--searchable", help="Make new PDFs searchable."),
|
|
497
|
+
compress: bool = typer.Option(False, "--compress", help="Compress new PDFs."),
|
|
498
|
+
preprocess: bool = typer.Option(False, "--preprocess", help="Preprocess new images for OCR."),
|
|
499
|
+
lang: str = typer.Option(DEFAULT_OCR_LANG, "--lang", help="OCR language(s)."),
|
|
500
|
+
fmt: str = typer.Option("txt", "--fmt", help="OCR output format."),
|
|
501
|
+
):
|
|
502
|
+
"""[bold]Watch[/bold] a directory and auto-process new files."""
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
if ocr:
|
|
506
|
+
action = "ocr"
|
|
507
|
+
elif searchable:
|
|
508
|
+
action = "searchable"
|
|
509
|
+
elif compress:
|
|
510
|
+
action = "compress"
|
|
511
|
+
elif preprocess:
|
|
512
|
+
action = "preprocess"
|
|
513
|
+
else:
|
|
514
|
+
console.print("[yellow]Specify an action: --ocr, --searchable, --compress, or --preprocess[/yellow]")
|
|
515
|
+
raise typer.Exit(1)
|
|
516
|
+
|
|
517
|
+
watch(directory, action, lang, fmt)
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
# ===========================================================================
|
|
521
|
+
# Entry point
|
|
522
|
+
# ===========================================================================
|
|
523
|
+
|
|
524
|
+
if __name__ == "__main__":
|
|
525
|
+
app()
|
dforge/config.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DForge Configuration
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
# Default output directory (same as input unless overridden)
|
|
9
|
+
DEFAULT_OUTPUT_DIR = None
|
|
10
|
+
|
|
11
|
+
# OCR defaults
|
|
12
|
+
DEFAULT_OCR_LANG = "eng"
|
|
13
|
+
DEFAULT_OCR_DPI = 300
|
|
14
|
+
|
|
15
|
+
# Image processing defaults
|
|
16
|
+
DEFAULT_IMAGE_DPI = 200
|
|
17
|
+
DEFAULT_COMPRESS_QUALITY = 85
|
|
18
|
+
|
|
19
|
+
# PDF compress preset (ghostscript)
|
|
20
|
+
# Options: screen, ebook, printer, prepress, default
|
|
21
|
+
DEFAULT_COMPRESS_PRESET = "ebook"
|
|
22
|
+
|
|
23
|
+
# Batch processing
|
|
24
|
+
DEFAULT_BATCH_WORKERS = 4
|
|
25
|
+
SUPPORTED_IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".tiff", ".tif", ".bmp", ".webp"}
|
|
26
|
+
SUPPORTED_PDF_EXTS = {".pdf"}
|
|
27
|
+
SUPPORTED_DOC_EXTS = {".docx", ".odt", ".rtf", ".txt", ".md", ".html"}
|
|
28
|
+
|
|
29
|
+
# Watch mode
|
|
30
|
+
WATCH_DEBOUNCE_SECONDS = 2
|
|
31
|
+
|
|
32
|
+
# Paths
|
|
33
|
+
DFORGE_CONFIG_DIR = Path.home() / ".dforge"
|
|
34
|
+
DFORGE_TEMP_DIR = DFORGE_CONFIG_DIR / "tmp"
|
|
35
|
+
|
|
36
|
+
# Ensure dirs exist
|
|
37
|
+
DFORGE_CONFIG_DIR.mkdir(exist_ok=True)
|
|
38
|
+
DFORGE_TEMP_DIR.mkdir(exist_ok=True)
|
dforge/config_manager.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
CONFIG_DIR = Path.home() / ".dforge"
|
|
5
|
+
CONFIG_FILE = CONFIG_DIR / "config.json"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_config():
|
|
9
|
+
if not CONFIG_FILE.exists():
|
|
10
|
+
return {}
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
with open(CONFIG_FILE, "r") as f:
|
|
14
|
+
return json.load(f)
|
|
15
|
+
except Exception:
|
|
16
|
+
return {}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def save_config(config: dict):
|
|
20
|
+
CONFIG_DIR.mkdir(exist_ok=True)
|
|
21
|
+
|
|
22
|
+
with open(CONFIG_FILE, "w") as f:
|
|
23
|
+
json.dump(config, f, indent=4)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_tool_path(tool: str):
|
|
27
|
+
return load_config().get(tool)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def set_tool_path(tool: str, path: str):
|
|
31
|
+
config = load_config()
|
|
32
|
+
config[tool] = path
|
|
33
|
+
save_config(config)
|