dforge-cli 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ from pathlib import Path
2
+
3
+ import questionary
4
+ from rich.console import Console
5
+ from dforge.loading import Loader
6
+ from dforge.engine import make_searchable_pdf
7
+
8
+ from dforge.workflows.common import (
9
+ select_single_pdf,
10
+ success_screen,
11
+ get_output_name,
12
+ )
13
+
14
+ console = Console()
15
+
16
+
17
+ def searchable_workflow():
18
+ console.print(
19
+ "\n[bold cyan]Searchable PDF[/bold cyan]\n"
20
+ )
21
+
22
+ pdf = select_single_pdf()
23
+
24
+ if not pdf:
25
+ return
26
+
27
+ lang = questionary.text(
28
+ "OCR Language(s)",
29
+ default="eng",
30
+ ).ask()
31
+
32
+ output = get_output_name(
33
+ f"{pdf.stem}_searchable.pdf"
34
+ )
35
+
36
+ if not output:
37
+ return
38
+
39
+ output_path = Path(output)
40
+ with Loader("Creating searchable PDF..."):
41
+ make_searchable_pdf(
42
+ pdf,
43
+ output_path,
44
+ lang,
45
+ 300,
46
+ )
47
+
48
+ success_screen(
49
+ "Searchable PDF Created",
50
+ output_file=output_path.name,
51
+ )
@@ -0,0 +1,56 @@
1
+ import questionary
2
+
3
+ from dforge.config_manager import (
4
+ load_config,
5
+ save_config,
6
+ )
7
+
8
+
9
+ def settings_workflow():
10
+
11
+ config = load_config()
12
+
13
+ lang = questionary.select(
14
+ "Default OCR language",
15
+ choices=[
16
+ "eng",
17
+ "hin",
18
+ "tel",
19
+ "tam",
20
+ "jpn",
21
+ ],
22
+ default=config.get(
23
+ "ocr_language",
24
+ "eng",
25
+ ),
26
+ ).ask()
27
+
28
+ dpi = int(
29
+ questionary.text(
30
+ "DPI",
31
+ default=str(
32
+ config.get(
33
+ "ocr_dpi",
34
+ 300,
35
+ )
36
+ ),
37
+ ).ask()
38
+ )
39
+
40
+ workers = int(
41
+ questionary.text(
42
+ "Workers",
43
+ default=str(
44
+ config.get(
45
+ "ocr_workers",
46
+ 4,
47
+ )
48
+ ),
49
+ ).ask()
50
+ )
51
+
52
+ config["ocr_language"] = lang
53
+ config["ocr_dpi"] = dpi
54
+ config["ocr_workers"] = workers
55
+
56
+ save_config(config)
@@ -0,0 +1,32 @@
1
+ from rich.console import Console
2
+
3
+ from dforge.operations import split
4
+
5
+ from dforge.workflows.common import (
6
+ select_single_pdf,
7
+ success_screen,
8
+ )
9
+
10
+ console = Console()
11
+
12
+
13
+ def split_workflow():
14
+ console.print("\n[bold cyan]Split PDF[/bold cyan]\n")
15
+
16
+ pdf = select_single_pdf()
17
+
18
+ if not pdf:
19
+ return
20
+
21
+ console.print(
22
+ "\n[bold cyan]Splitting PDF...[/bold cyan]\n"
23
+ )
24
+
25
+ split(pdf)
26
+
27
+ success_screen(
28
+ "Split Complete",
29
+ extra_lines=[
30
+ f"Source : {pdf.name}",
31
+ ],
32
+ )
@@ -0,0 +1,45 @@
1
+ from pathlib import Path
2
+
3
+ import questionary
4
+ from rich.console import Console
5
+
6
+ from dforge.extractor import extract_tables
7
+
8
+ from dforge.workflows.common import (
9
+ select_single_pdf,
10
+ success_screen,
11
+ )
12
+
13
+ console = Console()
14
+
15
+
16
+ def tables_workflow():
17
+ console.print("\n[bold cyan]Extract Tables[/bold cyan]\n")
18
+
19
+ pdf = select_single_pdf()
20
+
21
+ if not pdf:
22
+ return
23
+
24
+ fmt = questionary.select(
25
+ "Output format",
26
+ choices=[
27
+ "xlsx",
28
+ "csv",
29
+ "json",
30
+ ],
31
+ ).ask()
32
+
33
+ output = Path(
34
+ f"{pdf.stem}_tables.{fmt}"
35
+ )
36
+
37
+ extract_tables(
38
+ pdf,
39
+ output,
40
+ )
41
+
42
+ success_screen(
43
+ "Table Extraction Complete",
44
+ output_file=output.name,
45
+ )
@@ -0,0 +1,54 @@
1
+ from pathlib import Path
2
+
3
+ import questionary
4
+ from rich.console import Console
5
+
6
+ from dforge.operations import watermark
7
+
8
+ from dforge.workflows.common import (
9
+ select_single_pdf,
10
+ success_screen,
11
+ get_output_name,
12
+ )
13
+
14
+ console = Console()
15
+
16
+
17
+ def watermark_workflow():
18
+ console.print("\n[bold cyan]Watermark PDF[/bold cyan]\n")
19
+
20
+ pdf = select_single_pdf()
21
+
22
+ if not pdf:
23
+ return
24
+
25
+ watermark_file = questionary.path(
26
+ "Watermark file (PDF/Image):"
27
+ ).ask()
28
+
29
+ if not watermark_file:
30
+ return
31
+
32
+ output = get_output_name(
33
+ f"{pdf.stem}_watermarked.pdf"
34
+ )
35
+
36
+ if not output:
37
+ return
38
+
39
+ output_path = Path(output)
40
+
41
+ console.print(
42
+ "\n[bold cyan]Applying Watermark...[/bold cyan]\n"
43
+ )
44
+
45
+ watermark(
46
+ pdf,
47
+ Path(watermark_file),
48
+ output_path,
49
+ )
50
+
51
+ success_screen(
52
+ "Watermark Complete",
53
+ output_file=output_path.name,
54
+ )
@@ -0,0 +1,244 @@
1
+ Metadata-Version: 2.4
2
+ Name: dforge-cli
3
+ Version: 1.0.1
4
+ Summary: DForge — Unified Document Processing CLI. Forge your documents from your terminal.
5
+ Author: Punith Naidu
6
+ License: MIT
7
+ Keywords: pdf,ocr,document,cli,conversion,tesseract,batch
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: End Users/Desktop
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Office/Business
19
+ Classifier: Topic :: Utilities
20
+ Requires-Python: >=3.9
21
+ Description-Content-Type: text/markdown
22
+ Requires-Dist: typer[all]>=0.9.0
23
+ Requires-Dist: pypdf>=3.0.0
24
+ Requires-Dist: pikepdf>=8.0.0
25
+ Requires-Dist: Pillow>=10.0.0
26
+ Requires-Dist: opencv-python-headless>=4.8.0
27
+ Requires-Dist: img2pdf>=0.4.4
28
+ Requires-Dist: watchdog>=3.0.0
29
+ Requires-Dist: tqdm>=4.66.0
30
+ Requires-Dist: rich>=13.0.0
31
+ Requires-Dist: questionary>=2.0.0
32
+ Requires-Dist: prompt_toolkit>=3.0.0
33
+ Requires-Dist: pyfiglet>=1.0.2
34
+ Provides-Extra: ocr
35
+ Requires-Dist: pytesseract>=0.3.10; extra == "ocr"
36
+ Requires-Dist: pdf2image>=1.16.3; extra == "ocr"
37
+ Provides-Extra: tables
38
+ Requires-Dist: pdfplumber>=0.10.0; extra == "tables"
39
+ Requires-Dist: pandas>=2.0.0; extra == "tables"
40
+ Requires-Dist: openpyxl>=3.1.0; extra == "tables"
41
+ Provides-Extra: full
42
+ Requires-Dist: pytesseract>=0.3.10; extra == "full"
43
+ Requires-Dist: pdf2image>=1.16.3; extra == "full"
44
+ Requires-Dist: pdfplumber>=0.10.0; extra == "full"
45
+ Requires-Dist: pandas>=2.0.0; extra == "full"
46
+ Requires-Dist: openpyxl>=3.1.0; extra == "full"
47
+
48
+ # DForge — Forge your documents from your terminal.
49
+
50
+ A unified, offline-first Python CLI for all your document processing needs.
51
+
52
+ ---
53
+
54
+ ## Installation
55
+
56
+ ```bash
57
+ pip install dforge
58
+ ```
59
+
60
+ ### External Dependencies
61
+
62
+ | Tool | Purpose | Install |
63
+ |------|---------|---------|
64
+ | Tesseract OCR | OCR engine | [Install guide](https://tesseract-ocr.github.io/tessdoc/Installation.html) |
65
+ | Ghostscript | PDF compression | [ghostscript.com](https://ghostscript.com/releases/gsdnld.html) |
66
+ | Pandoc | Document conversion | [pandoc.org](https://pandoc.org/installing.html) |
67
+ | Poppler | PDF → image (pdf2image) | `apt install poppler-utils` / `brew install poppler` |
68
+
69
+ ---
70
+
71
+ ## Quick Reference
72
+
73
+ ### PDF Operations
74
+
75
+ ```bash
76
+ # Merge PDFs
77
+ dforge merge a.pdf b.pdf c.pdf -o merged.pdf
78
+
79
+ # Split into pages
80
+ dforge split report.pdf
81
+
82
+ # Compress (uses Ghostscript)
83
+ dforge compress large.pdf --preset ebook
84
+
85
+ # Rotate pages
86
+ dforge rotate file.pdf 90
87
+
88
+ # Extract page range
89
+ dforge pages file.pdf 1-5
90
+
91
+ # Watermark
92
+ dforge watermark file.pdf logo.png
93
+
94
+ # Encrypt / Decrypt
95
+ dforge encrypt file.pdf
96
+ dforge decrypt protected.pdf
97
+ ```
98
+
99
+ ### OCR
100
+
101
+ ```bash
102
+ # OCR an image
103
+ dforge ocr scan.png
104
+
105
+ # OCR a PDF
106
+ dforge ocr scan.pdf
107
+
108
+ # Output as JSON or Markdown
109
+ dforge ocr scan.pdf --fmt json
110
+ dforge ocr scan.pdf --fmt md
111
+
112
+ # Multi-language OCR
113
+ dforge ocr scan.png --lang eng+hin
114
+
115
+ # Make a scanned PDF searchable
116
+ dforge searchable scan.pdf
117
+
118
+ # Batch OCR an entire folder
119
+ dforge batch-ocr invoices/
120
+ ```
121
+
122
+ ### Document Conversion
123
+
124
+ ```bash
125
+ # Convert DOCX → PDF
126
+ dforge convert report.docx pdf
127
+
128
+ # Convert Markdown → HTML
129
+ dforge convert notes.md html
130
+
131
+ # Combine images into a PDF
132
+ dforge img2pdf scans/
133
+
134
+ # Export PDF pages as images
135
+ dforge pdf2img report.pdf --dpi 300 --fmt png
136
+ ```
137
+
138
+ ### Content Extraction
139
+
140
+ ```bash
141
+ # Extract text
142
+ dforge text report.pdf
143
+
144
+ # Extract embedded images
145
+ dforge images report.pdf
146
+
147
+ # Show / save metadata
148
+ dforge metadata report.pdf
149
+ dforge metadata report.pdf -o meta.json
150
+
151
+ # Extract tables
152
+ dforge tables invoice.pdf --fmt xlsx
153
+ dforge tables invoice.pdf --fmt csv
154
+ dforge tables invoice.pdf --fmt json
155
+ ```
156
+
157
+ ### Image Processing
158
+
159
+ ```bash
160
+ # Enhance (contrast + sharpness)
161
+ dforge enhance scan.png
162
+
163
+ # Fix skewed scans
164
+ dforge deskew scan.png
165
+
166
+ # Remove noise
167
+ dforge denoise scan.png
168
+
169
+ # Resize
170
+ dforge resize photo.png --width 800
171
+ dforge resize photo.png --scale 0.5
172
+
173
+ # Full OCR preprocessing pipeline
174
+ dforge preprocess scan.png
175
+ ```
176
+
177
+ ### Batch Processing
178
+
179
+ ```bash
180
+ # Batch OCR with 8 workers
181
+ dforge batch ./documents --ocr --workers 8
182
+
183
+ # Batch compress
184
+ dforge batch ./pdfs --compress
185
+
186
+ # Batch convert to markdown
187
+ dforge batch ./docs --convert md
188
+ ```
189
+
190
+ ### Watch Mode
191
+
192
+ ```bash
193
+ # Auto-OCR new files dropped into a folder
194
+ dforge watch ./incoming --ocr
195
+
196
+ # Auto-make-searchable
197
+ dforge watch ./scans --searchable
198
+
199
+ # Auto-compress
200
+ dforge watch ./uploads --compress
201
+ ```
202
+
203
+ ---
204
+
205
+ ## Project Structure
206
+
207
+ ```
208
+ dforge/
209
+ ├── cli.py ← Typer CLI entry point
210
+ ├── config.py ← Global configuration
211
+ ├── utils.py ← Shared utilities
212
+ ├── pdf/
213
+ │ └── operations.py ← merge, split, compress, rotate, pages, watermark, encrypt, decrypt
214
+ ├── ocr/
215
+ │ └── engine.py ← ocr_image, ocr_pdf, make_searchable_pdf, batch_ocr
216
+ ├── convert/
217
+ │ └── converter.py ← convert, images_to_pdf, pdf_to_images
218
+ ├── extract/
219
+ │ └── extractor.py ← extract_text, extract_images, extract_metadata, extract_tables
220
+ ├── image/
221
+ │ └── processor.py ← enhance, deskew, denoise, resize, preprocess_for_ocr
222
+ ├── batch/
223
+ │ └── processor.py ← parallel batch processing
224
+ └── watch/
225
+ └── watcher.py ← watchdog-based directory monitor
226
+ ```
227
+
228
+ ---
229
+
230
+ ## Supported Formats
231
+
232
+ | Category | Formats |
233
+ |----------|---------|
234
+ | Input documents | PDF, DOCX, ODT, MD, HTML, TXT, RST, EPUB |
235
+ | Input images | PNG, JPG/JPEG, TIFF/TIF, BMP, WebP |
236
+ | OCR output | TXT, JSON, Markdown |
237
+ | Table export | CSV, XLSX, JSON |
238
+ | Image export | PNG, JPEG, TIFF |
239
+
240
+ ---
241
+
242
+ ## License
243
+
244
+ MIT License — DForge Contributors
@@ -0,0 +1,44 @@
1
+ dforge/__init__.py,sha256=Aj77VL1d5Mdku7sgCgKQmPuYavPpAHuZuJcy6bygQZE,21
2
+ dforge/banner.py,sha256=r4fjn46oY2vYKN8Dr397cQN0nM-7Nu69FN78WvYdisI,533
3
+ dforge/batch.py,sha256=d5ONPODryq-sMpIifGPNjuacLMXS_0F7v-Qj_o64V9U,4434
4
+ dforge/cli.py,sha256=9VVrHcuyyLEaTIEHwWOLhulxnYQ90mP5o6dvcTywhL0,19766
5
+ dforge/config.py,sha256=vGK-VuutSLfnDz3fPFuijH3P-p5y27nJEhaBe88oE9o,888
6
+ dforge/config_manager.py,sha256=PlUJaCBznaEvuTQFS5xDzcEqJUZ12Xfu98MZ5Z6ENdA,643
7
+ dforge/converter.py,sha256=XXUXr6paQRNLqqDJM4DeFxUMlg-nGMMLQNfRh4XA1As,5110
8
+ dforge/dependencies.py,sha256=3cfV2xb1IoOXe-RxoujqvyEaputtsENmO_hph6iZ49E,1955
9
+ dforge/engine.py,sha256=9oFbRoz7oNKEbp2iE6j25mdjCMqCq0kA5Q47idDLD3Q,7660
10
+ dforge/extractor.py,sha256=BYkNcnaxJXy1jz5fhMFz2gv7v8GckOygqDSpBtJd4Sk,6978
11
+ dforge/loading.py,sha256=u1-Svpvu4Dkooc8Oey8qcbBaxJ3ITTomYrqWa_tOYUA,396
12
+ dforge/menu.py,sha256=CQHGLNd2kGcgaqEQ-r_ALttw1JWY2ObiNiGO2jqS6f8,2295
13
+ dforge/operations.py,sha256=JbZCRFyZ1xfNk8ohxjIJBNrAjnyN9POFht5mH8f3FdM,10315
14
+ dforge/processor.py,sha256=3O3cVkEvAgSGjYnVBdbAEXEPw96EeJ6B6afsgdHFfgo,8201
15
+ dforge/setup.py,sha256=pV23z18-E25IG4tWULD4cZeAAQwYdNi9KqjNTzenfo4,2383
16
+ dforge/theme.py,sha256=F-W0HhyMIeYXWUG7wgl6LJJcAWHjB3oGNY8alJaWbfE,247
17
+ dforge/utils.py,sha256=KwZPQb59-B59nsI_V7M3rE3Kl_HufJe_dhf-qqQ5txc,4547
18
+ dforge/watcher.py,sha256=CeKlGFM3Ku7e8-Gbn_RFrvBBnkzvUd7KbyTDSKGC-F4,4322
19
+ dforge/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ dforge/workflows/automation.py,sha256=3NH1D0XrG7tYiDUDBDbQofPwnJNstQRVf1UB77i0Ldk,398
21
+ dforge/workflows/batch.py,sha256=YMiHgFh7U8seTUMB0jBwqAWCxqakGE9aAn9oBgzFLpI,327
22
+ dforge/workflows/batch_ocr.py,sha256=0Jh9O_PyejF5-x61kzwBS2vEIgx4LgCS6r6hV2rc2tk,1118
23
+ dforge/workflows/common.py,sha256=zRiwNEDlZHvn0KgW_P9Mj_j8c7mNMRKpqq5oF1iMCa8,2450
24
+ dforge/workflows/compress.py,sha256=PyEh_2rpSvk0rgG-bXtH5OAPenqrYq-gU37i3Vrm1bE,1373
25
+ dforge/workflows/convert.py,sha256=qd4YAdY6viTt8gnOlUwNoDWErIW_SZ7Ja5ArmVf7iU4,2481
26
+ dforge/workflows/decrypt.py,sha256=VaRrxA2Ohq69VTGRs3hw3I7SE3eY9EqbC0Ny2vx88W8,806
27
+ dforge/workflows/encrypt.py,sha256=ytVvSgmAH5MAFvLU2iIliIe9YGzwzsZAfc5d51P4MAY,806
28
+ dforge/workflows/extract.py,sha256=guWdlQjSx74mvvmaxzfQA5YyLaAUh3qtFBKfhAqcXS4,339
29
+ dforge/workflows/image.py,sha256=iQ8anZ9-IaF1TIeFAOvBX9ASazgk5v3phpAjTuLpRZc,390
30
+ dforge/workflows/merge.py,sha256=aMdTOAXB4GGVdDOr0PCCN5oQfSIU6OqCHeOm7Vy1e18,2242
31
+ dforge/workflows/ocr.py,sha256=FJRTpcQPcX9a89kLjjHNpEly_oFI3G6NjZZeeF6iing,1944
32
+ dforge/workflows/ocr_folder.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
+ dforge/workflows/pages.py,sha256=nXNc796ozKh1j7twAXBRokJpOGAwX1HvsfMCg4zrYMI,1001
34
+ dforge/workflows/rotate.py,sha256=kKESL8aNh1_haMSs71uWFxsT-fu19DUIMnjhugE5b1c,898
35
+ dforge/workflows/searchable.py,sha256=oPb81IHtNkZApEw4DPGYJN4xQEPlkOKgnEArPfQPP3w,947
36
+ dforge/workflows/settings.py,sha256=9hiJu-Oo3IYNpndE53diFM5S_ulhFNY5o5PjWyQxciQ,1016
37
+ dforge/workflows/split.py,sha256=BcZimQF9H4Daezu-1SlrwE-0I16WU6-Iz3_qCXYidiU,537
38
+ dforge/workflows/tables.py,sha256=1_qfa9gQPY353JnUtWTpEB4Mto5OOmt33vgdv97R5qA,748
39
+ dforge/workflows/watermark.py,sha256=bgQDMpKn2OnHQGNohaLBsEGuWUaLeVodY3oq6Kp7al4,938
40
+ dforge_cli-1.0.1.dist-info/METADATA,sha256=uxKJ-quM1Dc9qwvo6aPMMJI9rQ8z2liou6FJHko0sik,5861
41
+ dforge_cli-1.0.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
42
+ dforge_cli-1.0.1.dist-info/entry_points.txt,sha256=z9MCoKCrUNJzohLRdcdK2dq9NPoWBCts7voq5oInBVw,42
43
+ dforge_cli-1.0.1.dist-info/top_level.txt,sha256=QWUw5aSnP6-6lDz_7_h8tyxuyHMhdIKpaX8cYzpjuts,7
44
+ dforge_cli-1.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ dforge = dforge.cli:app
@@ -0,0 +1 @@
1
+ dforge