nougat-ocr-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual environments
24
+ .venv/
25
+ venv/
26
+ ENV/
27
+ env/
28
+
29
+ # IDEs
30
+ .vscode/
31
+ .idea/
32
+ *.swp
33
+ *.swo
34
+
35
+ # Testing
36
+ .pytest_cache/
37
+ .coverage
38
+ htmlcov/
39
+ .tox/
40
+
41
+ # OS
42
+ .DS_Store
43
+ Thumbs.db
44
+
45
+ # UV
46
+ .uv-cache/
47
+
48
+ # Project specific
49
+ prompts/
50
+ ocr_output/
51
+ test.pdf
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Ruben Fernandez-Fuertes
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,143 @@
1
+ Metadata-Version: 2.4
2
+ Name: nougat-ocr-cli
3
+ Version: 0.1.0
4
+ Summary: Simple CLI wrapper for Nougat OCR with GPU acceleration support
5
+ Project-URL: Homepage, https://github.com/rubenffuertes/nougat-ocr-cli
6
+ Project-URL: Repository, https://github.com/rubenffuertes/nougat-ocr-cli
7
+ Project-URL: Issues, https://github.com/rubenffuertes/nougat-ocr-cli/issues
8
+ Author-email: Ruben Fernandez-Fuertes <fernandezfuertesruben@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: cli,document,extraction,gpu,nougat,ocr,pdf
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Classifier: Topic :: Text Processing :: General
23
+ Classifier: Topic :: Utilities
24
+ Requires-Python: >=3.11
25
+ Requires-Dist: albumentations==1.3.1
26
+ Requires-Dist: nougat-ocr>=0.1.17
27
+ Requires-Dist: torch>=2.0.0
28
+ Requires-Dist: transformers>=4.30.0
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
31
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
32
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # Nougat OCR CLI
36
+
37
+ Simple, batteries-included CLI wrapper for [Nougat OCR](https://github.com/facebookresearch/nougat) with GPU acceleration.
38
+
39
+ ## Features
40
+
41
+ - GPU acceleration (CUDA & Apple Metal)
42
+ - Simple CLI interface
43
+ - Batch processing support
44
+ - Clean Markdown output
45
+ - Automatic model downloading
46
+ - Python API with type hints
47
+
48
+ ## Installation
49
+
50
+ ### From PyPI
51
+
52
+ ```bash
53
+ pip install nougat-ocr-cli
54
+ ```
55
+
56
+ ### From GitHub
57
+
58
+ ```bash
59
+ pip install git+https://github.com/rubenffuertes/nougat-ocr-cli.git
60
+ ```
61
+
62
+ ### From source
63
+
64
+ ```bash
65
+ git clone https://github.com/rubenffuertes/nougat-ocr-cli.git
66
+ cd nougat-ocr-cli
67
+ uv pip install -e .
68
+ ```
69
+
70
+ ## CLI Usage
71
+
72
+ ```bash
73
+ # Basic usage - outputs to current directory
74
+ nougat-ocr-cli document.pdf
75
+
76
+ # Specify output directory
77
+ nougat-ocr-cli document.pdf -o output/
78
+
79
+ # Process specific pages (zero-indexed)
80
+ nougat-ocr-cli document.pdf --pages 0-5
81
+ nougat-ocr-cli document.pdf --pages 1,3,5,7
82
+
83
+ # Use smaller model for faster processing
84
+ nougat-ocr-cli document.pdf --model 0.1.0-small
85
+
86
+ # Use full precision (FP32) for better accuracy
87
+ nougat-ocr-cli document.pdf --full-precision
88
+
89
+ # Set batch size manually
90
+ nougat-ocr-cli document.pdf --batch-size 4
91
+ ```
92
+
93
+ ### CLI Options
94
+
95
+ | Option | Description |
96
+ |--------|-------------|
97
+ | `input` | Input PDF file to process |
98
+ | `-o, --output` | Output directory (default: current directory) |
99
+ | `--model` | Model version (default: 0.1.0-base) |
100
+ | `--batch-size` | Batch size for processing (auto-detected) |
101
+ | `--full-precision` | Use FP32 instead of BF16 |
102
+ | `--no-markdown` | Disable markdown post-processing |
103
+ | `--pages` | Page range (e.g., '0-5' or '1,3,5') |
104
+
105
+ ## Python API
106
+
107
+ ```python
108
+ from nougat_wrapper import NougatOCR
109
+ from pathlib import Path
110
+
111
+ # Initialize (loads model to GPU automatically)
112
+ ocr = NougatOCR()
113
+
114
+ # Extract text from PDF
115
+ result = ocr.extract_text(Path("paper.pdf"))
116
+
117
+ print(f"Extracted {result.pages} pages")
118
+ print(f"Failed pages: {result.placeholder_pages}")
119
+ print(result.text) # Markdown output
120
+ ```
121
+
122
+ ### Advanced Usage
123
+
124
+ ```python
125
+ ocr = NougatOCR(
126
+ model_tag="0.1.0-small", # Use smaller model
127
+ batch_size=4, # Process 4 pages at once
128
+ full_precision=True, # Use FP32 instead of BF16
129
+ )
130
+
131
+ # Only OCR pages 0, 1, 2 (zero-indexed)
132
+ result = ocr.extract_text(pdf_path, pages=[0, 1, 2])
133
+ ```
134
+
135
+ ## Requirements
136
+
137
+ - Python 3.11+
138
+ - GPU recommended (CUDA or Apple Metal)
139
+ - ~1.3 GB for model weights (auto-downloaded)
140
+
141
+ ## License
142
+
143
+ MIT
@@ -0,0 +1,109 @@
1
+ # Nougat OCR CLI
2
+
3
+ Simple, batteries-included CLI wrapper for [Nougat OCR](https://github.com/facebookresearch/nougat) with GPU acceleration.
4
+
5
+ ## Features
6
+
7
+ - GPU acceleration (CUDA & Apple Metal)
8
+ - Simple CLI interface
9
+ - Batch processing support
10
+ - Clean Markdown output
11
+ - Automatic model downloading
12
+ - Python API with type hints
13
+
14
+ ## Installation
15
+
16
+ ### From PyPI
17
+
18
+ ```bash
19
+ pip install nougat-ocr-cli
20
+ ```
21
+
22
+ ### From GitHub
23
+
24
+ ```bash
25
+ pip install git+https://github.com/rubenffuertes/nougat-ocr-cli.git
26
+ ```
27
+
28
+ ### From source
29
+
30
+ ```bash
31
+ git clone https://github.com/rubenffuertes/nougat-ocr-cli.git
32
+ cd nougat-ocr-cli
33
+ uv pip install -e .
34
+ ```
35
+
36
+ ## CLI Usage
37
+
38
+ ```bash
39
+ # Basic usage - outputs to current directory
40
+ nougat-ocr-cli document.pdf
41
+
42
+ # Specify output directory
43
+ nougat-ocr-cli document.pdf -o output/
44
+
45
+ # Process specific pages (zero-indexed)
46
+ nougat-ocr-cli document.pdf --pages 0-5
47
+ nougat-ocr-cli document.pdf --pages 1,3,5,7
48
+
49
+ # Use smaller model for faster processing
50
+ nougat-ocr-cli document.pdf --model 0.1.0-small
51
+
52
+ # Use full precision (FP32) for better accuracy
53
+ nougat-ocr-cli document.pdf --full-precision
54
+
55
+ # Set batch size manually
56
+ nougat-ocr-cli document.pdf --batch-size 4
57
+ ```
58
+
59
+ ### CLI Options
60
+
61
+ | Option | Description |
62
+ |--------|-------------|
63
+ | `input` | Input PDF file to process |
64
+ | `-o, --output` | Output directory (default: current directory) |
65
+ | `--model` | Model version (default: 0.1.0-base) |
66
+ | `--batch-size` | Batch size for processing (auto-detected) |
67
+ | `--full-precision` | Use FP32 instead of BF16 |
68
+ | `--no-markdown` | Disable markdown post-processing |
69
+ | `--pages` | Page range (e.g., '0-5' or '1,3,5') |
70
+
71
+ ## Python API
72
+
73
+ ```python
74
+ from nougat_wrapper import NougatOCR
75
+ from pathlib import Path
76
+
77
+ # Initialize (loads model to GPU automatically)
78
+ ocr = NougatOCR()
79
+
80
+ # Extract text from PDF
81
+ result = ocr.extract_text(Path("paper.pdf"))
82
+
83
+ print(f"Extracted {result.pages} pages")
84
+ print(f"Failed pages: {result.placeholder_pages}")
85
+ print(result.text) # Markdown output
86
+ ```
87
+
88
+ ### Advanced Usage
89
+
90
+ ```python
91
+ ocr = NougatOCR(
92
+ model_tag="0.1.0-small", # Use smaller model
93
+ batch_size=4, # Process 4 pages at once
94
+ full_precision=True, # Use FP32 instead of BF16
95
+ )
96
+
97
+ # Only OCR pages 0, 1, 2 (zero-indexed)
98
+ result = ocr.extract_text(pdf_path, pages=[0, 1, 2])
99
+ ```
100
+
101
+ ## Requirements
102
+
103
+ - Python 3.11+
104
+ - GPU recommended (CUDA or Apple Metal)
105
+ - ~1.3 GB for model weights (auto-downloaded)
106
+
107
+ ## License
108
+
109
+ MIT
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env python3
2
+ """Basic usage example for nougat-ocr-cli."""
3
+
4
+ from nougat_wrapper import NougatOCR
5
+ from pathlib import Path
6
+ import sys
7
+
8
+
9
+ def main():
10
+ if len(sys.argv) < 2:
11
+ print("Usage: python basic_usage.py <pdf_file>")
12
+ sys.exit(1)
13
+
14
+ pdf_path = Path(sys.argv[1])
15
+ if not pdf_path.exists():
16
+ print(f"Error: {pdf_path} not found")
17
+ sys.exit(1)
18
+
19
+ print(f"Initializing Nougat OCR...")
20
+ ocr = NougatOCR()
21
+
22
+ print(f"Processing {pdf_path.name}...")
23
+ result = ocr.extract_text(pdf_path)
24
+
25
+ print(f"\n{'='*60}")
26
+ print(f"Results:")
27
+ print(f" Pages processed: {result.pages}")
28
+ print(f" Pages with issues: {result.placeholder_pages}")
29
+ print(f" Text length: {len(result.text):,} characters")
30
+ print(f"{'='*60}\n")
31
+
32
+ # Save to markdown file
33
+ output_path = pdf_path.with_suffix('.md')
34
+ output_path.write_text(result.text)
35
+ print(f"Saved to: {output_path}")
36
+
37
+
38
+ if __name__ == "__main__":
39
+ main()
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env python3
2
+ """Batch processing example for nougat-ocr-cli."""
3
+
4
+ from nougat_wrapper import NougatOCR
5
+ from pathlib import Path
6
+ import sys
7
+
8
+
9
+ def main():
10
+ if len(sys.argv) < 2:
11
+ print("Usage: python batch_processing.py <directory>")
12
+ print("Processes all PDF files in the specified directory.")
13
+ sys.exit(1)
14
+
15
+ input_dir = Path(sys.argv[1])
16
+ if not input_dir.is_dir():
17
+ print(f"Error: {input_dir} is not a directory")
18
+ sys.exit(1)
19
+
20
+ # Find all PDFs
21
+ pdf_files = list(input_dir.glob("*.pdf"))
22
+ if not pdf_files:
23
+ print(f"No PDF files found in {input_dir}")
24
+ sys.exit(1)
25
+
26
+ print(f"Found {len(pdf_files)} PDF files")
27
+ print(f"Initializing Nougat OCR...")
28
+ ocr = NougatOCR()
29
+
30
+ # Create output directory
31
+ output_dir = input_dir / "ocr_output"
32
+ output_dir.mkdir(exist_ok=True)
33
+
34
+ # Process each PDF
35
+ total_pages = 0
36
+ total_failed = 0
37
+
38
+ for i, pdf_path in enumerate(pdf_files, 1):
39
+ print(f"\n[{i}/{len(pdf_files)}] Processing {pdf_path.name}...")
40
+
41
+ try:
42
+ result = ocr.extract_text(pdf_path)
43
+
44
+ # Save output
45
+ output_path = output_dir / f"{pdf_path.stem}.md"
46
+ output_path.write_text(result.text)
47
+
48
+ total_pages += result.pages
49
+ total_failed += result.placeholder_pages
50
+
51
+ print(f" ✓ Pages: {result.pages}, Failed: {result.placeholder_pages}")
52
+ print(f" Saved to: {output_path}")
53
+
54
+ except Exception as e:
55
+ print(f" ✗ Error: {e}")
56
+ continue
57
+
58
+ # Summary
59
+ print(f"\n{'='*60}")
60
+ print(f"Batch Processing Summary:")
61
+ print(f" Files processed: {len(pdf_files)}")
62
+ print(f" Total pages: {total_pages}")
63
+ print(f" Failed pages: {total_failed}")
64
+ print(f" Output directory: {output_dir}")
65
+ print(f"{'='*60}")
66
+
67
+
68
+ if __name__ == "__main__":
69
+ main()
@@ -0,0 +1,73 @@
1
+ [project]
2
+ name = "nougat-ocr-cli"
3
+ version = "0.1.0"
4
+ description = "Simple CLI wrapper for Nougat OCR with GPU acceleration support"
5
+ authors = [
6
+ {name = "Ruben Fernandez-Fuertes", email = "fernandezfuertesruben@gmail.com"}
7
+ ]
8
+ readme = "README.md"
9
+ requires-python = ">=3.11"
10
+ license = {text = "MIT"}
11
+ keywords = ["ocr", "pdf", "nougat", "document", "extraction", "cli", "gpu"]
12
+ classifiers = [
13
+ "Development Status :: 4 - Beta",
14
+ "Environment :: Console",
15
+ "Intended Audience :: Developers",
16
+ "Intended Audience :: Science/Research",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
23
+ "Topic :: Text Processing :: General",
24
+ "Topic :: Utilities",
25
+ ]
26
+
27
+ dependencies = [
28
+ "torch>=2.0.0",
29
+ "nougat-ocr>=0.1.17",
30
+ "transformers>=4.30.0",
31
+ "albumentations==1.3.1",
32
+ ]
33
+
34
+ [project.optional-dependencies]
35
+ dev = [
36
+ "pytest>=7.0.0",
37
+ "pytest-cov>=4.0.0",
38
+ "ruff>=0.1.0",
39
+ ]
40
+
41
+ [project.scripts]
42
+ nougat-ocr-cli = "nougat_wrapper.cli:main"
43
+
44
+ [project.urls]
45
+ Homepage = "https://github.com/rubenffuertes/nougat-ocr-cli"
46
+ Repository = "https://github.com/rubenffuertes/nougat-ocr-cli"
47
+ Issues = "https://github.com/rubenffuertes/nougat-ocr-cli/issues"
48
+
49
+ [build-system]
50
+ requires = ["hatchling"]
51
+ build-backend = "hatchling.build"
52
+
53
+ [tool.hatch.build.targets.wheel]
54
+ packages = ["src/nougat_wrapper"]
55
+
56
+ [dependency-groups]
57
+ dev = [
58
+ "pytest>=7.0.0",
59
+ "pytest-cov>=4.0.0",
60
+ ]
61
+
62
+ [tool.ruff]
63
+ line-length = 100
64
+ target-version = "py311"
65
+
66
+ [tool.ruff.lint]
67
+ select = ["E", "F", "I", "N", "W"]
68
+ ignore = []
69
+
70
+ [tool.pytest.ini_options]
71
+ testpaths = ["tests"]
72
+ python_files = ["test_*.py"]
73
+ python_functions = ["test_*"]
@@ -0,0 +1,6 @@
1
+ """Simple wrapper for Nougat OCR with GPU acceleration support."""
2
+
3
+ from nougat_wrapper.core import NougatOCR, OCRResult
4
+
5
+ __version__ = "0.1.0"
6
+ __all__ = ["NougatOCR", "OCRResult", "__version__"]
@@ -0,0 +1,6 @@
1
+ """Allow running as `python -m nougat_wrapper`."""
2
+
3
+ from nougat_wrapper.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ raise SystemExit(main())
@@ -0,0 +1,109 @@
1
+ """Command-line interface for Nougat OCR."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ from nougat_wrapper.core import NougatOCR
10
+
11
+
12
+ def main() -> int:
13
+ """Main entry point for the CLI."""
14
+ parser = argparse.ArgumentParser(
15
+ prog="nougat-ocr-cli",
16
+ description="Extract text from PDFs using Nougat OCR with GPU acceleration.",
17
+ )
18
+ parser.add_argument(
19
+ "input",
20
+ type=Path,
21
+ help="Input PDF file or image to process",
22
+ )
23
+ parser.add_argument(
24
+ "-o",
25
+ "--output",
26
+ type=Path,
27
+ help="Output directory for markdown files (default: current directory)",
28
+ default=Path.cwd(),
29
+ )
30
+ parser.add_argument(
31
+ "--model",
32
+ type=str,
33
+ default="0.1.0-base",
34
+ help="Model version to use (default: 0.1.0-base)",
35
+ )
36
+ parser.add_argument(
37
+ "--batch-size",
38
+ type=int,
39
+ default=None,
40
+ help="Batch size for processing (auto-detected if not specified)",
41
+ )
42
+ parser.add_argument(
43
+ "--full-precision",
44
+ action="store_true",
45
+ help="Use FP32 instead of BF16 (slower but more accurate)",
46
+ )
47
+ parser.add_argument(
48
+ "--no-markdown",
49
+ action="store_true",
50
+ help="Disable markdown post-processing",
51
+ )
52
+ parser.add_argument(
53
+ "--pages",
54
+ type=str,
55
+ default=None,
56
+ help="Page range to process (e.g., '0-5' or '1,3,5')",
57
+ )
58
+
59
+ args = parser.parse_args()
60
+
61
+ # Validate input
62
+ if not args.input.exists():
63
+ print(f"Error: Input file not found: {args.input}", file=sys.stderr)
64
+ return 1
65
+
66
+ # Parse pages if specified
67
+ pages = None
68
+ if args.pages:
69
+ pages = _parse_pages(args.pages)
70
+
71
+ # Initialize OCR
72
+ print(f"Loading Nougat model ({args.model})...")
73
+ ocr = NougatOCR(
74
+ model_tag=args.model,
75
+ batch_size=args.batch_size,
76
+ markdown=not args.no_markdown,
77
+ full_precision=args.full_precision,
78
+ )
79
+
80
+ # Process the file
81
+ print(f"Processing: {args.input}")
82
+ result = ocr.extract_text(args.input, pages=pages)
83
+
84
+ # Write output
85
+ args.output.mkdir(parents=True, exist_ok=True)
86
+ output_file = args.output / f"{args.input.stem}.md"
87
+ output_file.write_text(result.text, encoding="utf-8")
88
+
89
+ print(f"Extracted {result.pages} pages ({result.placeholder_pages} failed)")
90
+ print(f"Output: {output_file}")
91
+
92
+ return 0
93
+
94
+
95
+ def _parse_pages(pages_str: str) -> list[int]:
96
+ """Parse page specification string into list of page numbers."""
97
+ pages = []
98
+ for part in pages_str.split(","):
99
+ part = part.strip()
100
+ if "-" in part:
101
+ start, end = part.split("-", 1)
102
+ pages.extend(range(int(start), int(end) + 1))
103
+ else:
104
+ pages.append(int(part))
105
+ return pages
106
+
107
+
108
+ if __name__ == "__main__":
109
+ sys.exit(main())