natural-pdf 0.1.19__py3-none-any.whl → 0.1.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/analyzers/layout/layout_manager.py +86 -80
- natural_pdf/analyzers/layout/yolo.py +2 -2
- natural_pdf/cli.py +134 -0
- natural_pdf/exporters/__init__.py +1 -0
- natural_pdf/ocr/engine_paddle.py +1 -1
- natural_pdf/ocr/ocr_factory.py +9 -9
- natural_pdf/ocr/ocr_manager.py +1 -9
- {natural_pdf-0.1.19.dist-info → natural_pdf-0.1.20.dist-info}/METADATA +8 -21
- {natural_pdf-0.1.19.dist-info → natural_pdf-0.1.20.dist-info}/RECORD +13 -11
- natural_pdf-0.1.20.dist-info/entry_points.txt +3 -0
- {natural_pdf-0.1.19.dist-info → natural_pdf-0.1.20.dist-info}/WHEEL +0 -0
- {natural_pdf-0.1.19.dist-info → natural_pdf-0.1.20.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.1.19.dist-info → natural_pdf-0.1.20.dist-info}/top_level.txt +0 -0
@@ -5,43 +5,11 @@ from typing import Any, Dict, List, Optional, Type, Union
|
|
5
5
|
|
6
6
|
from PIL import Image
|
7
7
|
|
8
|
-
# --- Import
|
9
|
-
#
|
10
|
-
|
11
|
-
from .base import LayoutDetector
|
12
|
-
except ImportError:
|
13
|
-
LayoutDetector = type("LayoutDetector", (), {})
|
14
|
-
|
15
|
-
try:
|
16
|
-
from .yolo import YOLODocLayoutDetector
|
17
|
-
except ImportError:
|
18
|
-
YOLODocLayoutDetector = None
|
19
|
-
|
20
|
-
try:
|
21
|
-
from .tatr import TableTransformerDetector
|
22
|
-
except ImportError:
|
23
|
-
TableTransformerDetector = None
|
24
|
-
|
25
|
-
try:
|
26
|
-
from .paddle import PaddleLayoutDetector
|
27
|
-
except ImportError:
|
28
|
-
PaddleLayoutDetector = None
|
29
|
-
|
30
|
-
try:
|
31
|
-
from .surya import SuryaLayoutDetector
|
32
|
-
except ImportError:
|
33
|
-
SuryaLayoutDetector = None
|
34
|
-
|
35
|
-
try:
|
36
|
-
from .docling import DoclingLayoutDetector
|
37
|
-
except ImportError:
|
38
|
-
DoclingLayoutDetector = None
|
39
|
-
|
40
|
-
try:
|
41
|
-
from .gemini import GeminiLayoutDetector
|
42
|
-
except ImportError:
|
43
|
-
GeminiLayoutDetector = None
|
8
|
+
# --- Import lightweight components only ---
|
9
|
+
# Heavy detector implementations (paddle, yolo, etc.) are **not** imported at module load.
|
10
|
+
# Instead, we provide tiny helper functions that import them lazily **only when needed**.
|
44
11
|
|
12
|
+
from .base import LayoutDetector # Lightweight base class
|
45
13
|
from .layout_options import (
|
46
14
|
BaseLayoutOptions,
|
47
15
|
DoclingLayoutOptions,
|
@@ -53,6 +21,47 @@ from .layout_options import (
|
|
53
21
|
YOLOLayoutOptions,
|
54
22
|
)
|
55
23
|
|
24
|
+
# ------------------ Lazy import helpers ------------------ #
|
25
|
+
|
26
|
+
|
27
|
+
def _lazy_import_yolo_detector():
|
28
|
+
"""Import YOLO detector lazily to avoid heavy deps at import time."""
|
29
|
+
from .yolo import YOLODocLayoutDetector # Local import
|
30
|
+
|
31
|
+
return YOLODocLayoutDetector
|
32
|
+
|
33
|
+
|
34
|
+
def _lazy_import_tatr_detector():
|
35
|
+
from .tatr import TableTransformerDetector
|
36
|
+
|
37
|
+
return TableTransformerDetector
|
38
|
+
|
39
|
+
|
40
|
+
def _lazy_import_paddle_detector():
|
41
|
+
from .paddle import PaddleLayoutDetector
|
42
|
+
|
43
|
+
return PaddleLayoutDetector
|
44
|
+
|
45
|
+
|
46
|
+
def _lazy_import_surya_detector():
|
47
|
+
from .surya import SuryaLayoutDetector
|
48
|
+
|
49
|
+
return SuryaLayoutDetector
|
50
|
+
|
51
|
+
|
52
|
+
def _lazy_import_docling_detector():
|
53
|
+
from .docling import DoclingLayoutDetector
|
54
|
+
|
55
|
+
return DoclingLayoutDetector
|
56
|
+
|
57
|
+
|
58
|
+
def _lazy_import_gemini_detector():
|
59
|
+
from .gemini import GeminiLayoutDetector
|
60
|
+
|
61
|
+
return GeminiLayoutDetector
|
62
|
+
|
63
|
+
# --------------------------------------------------------- #
|
64
|
+
|
56
65
|
logger = logging.getLogger(__name__)
|
57
66
|
|
58
67
|
|
@@ -62,39 +71,34 @@ class LayoutManager:
|
|
62
71
|
# Registry mapping engine names to classes and default options
|
63
72
|
ENGINE_REGISTRY: Dict[str, Dict[str, Any]] = {}
|
64
73
|
|
65
|
-
# Populate registry
|
66
|
-
|
67
|
-
|
68
|
-
|
74
|
+
# Populate registry with lazy import callables. The heavy imports are executed only
|
75
|
+
# when the corresponding engine is first requested.
|
76
|
+
ENGINE_REGISTRY = {
|
77
|
+
"yolo": {
|
78
|
+
"class": _lazy_import_yolo_detector, # returns detector class when called
|
69
79
|
"options_class": YOLOLayoutOptions,
|
70
|
-
}
|
71
|
-
|
72
|
-
|
73
|
-
"class": TableTransformerDetector,
|
80
|
+
},
|
81
|
+
"tatr": {
|
82
|
+
"class": _lazy_import_tatr_detector,
|
74
83
|
"options_class": TATRLayoutOptions,
|
75
|
-
}
|
76
|
-
|
77
|
-
|
78
|
-
"class": PaddleLayoutDetector,
|
84
|
+
},
|
85
|
+
"paddle": {
|
86
|
+
"class": _lazy_import_paddle_detector,
|
79
87
|
"options_class": PaddleLayoutOptions,
|
80
|
-
}
|
81
|
-
|
82
|
-
|
83
|
-
"class": SuryaLayoutDetector,
|
88
|
+
},
|
89
|
+
"surya": {
|
90
|
+
"class": _lazy_import_surya_detector,
|
84
91
|
"options_class": SuryaLayoutOptions,
|
85
|
-
}
|
86
|
-
|
87
|
-
|
88
|
-
"class": DoclingLayoutDetector,
|
92
|
+
},
|
93
|
+
"docling": {
|
94
|
+
"class": _lazy_import_docling_detector,
|
89
95
|
"options_class": DoclingLayoutOptions,
|
90
|
-
}
|
91
|
-
|
92
|
-
|
93
|
-
if GeminiLayoutDetector:
|
94
|
-
ENGINE_REGISTRY["gemini"] = {
|
95
|
-
"class": GeminiLayoutDetector,
|
96
|
+
},
|
97
|
+
"gemini": {
|
98
|
+
"class": _lazy_import_gemini_detector,
|
96
99
|
"options_class": GeminiLayoutOptions,
|
97
|
-
}
|
100
|
+
},
|
101
|
+
}
|
98
102
|
|
99
103
|
def __init__(self):
|
100
104
|
"""Initializes the Layout Manager."""
|
@@ -114,25 +118,24 @@ class LayoutManager:
|
|
114
118
|
|
115
119
|
if engine_name not in self._detector_instances:
|
116
120
|
logger.info(f"Creating instance of layout engine: {engine_name}")
|
117
|
-
|
121
|
+
engine_class_or_factory = self.ENGINE_REGISTRY[engine_name]["class"]
|
122
|
+
# If the registry provides a callable (lazy import helper), call it to obtain the real class.
|
123
|
+
if callable(engine_class_or_factory) and not isinstance(engine_class_or_factory, type):
|
124
|
+
engine_class = engine_class_or_factory()
|
125
|
+
else:
|
126
|
+
engine_class = engine_class_or_factory
|
127
|
+
|
118
128
|
detector_instance = engine_class() # Instantiate
|
119
129
|
if not detector_instance.is_available():
|
120
130
|
# Check availability before storing
|
121
131
|
# Construct helpful error message with install hint
|
122
132
|
install_hint = ""
|
123
|
-
if engine_name
|
124
|
-
install_hint = "
|
133
|
+
if engine_name in {"yolo", "paddle", "surya", "docling"}:
|
134
|
+
install_hint = f"natural-pdf install {engine_name}"
|
125
135
|
elif engine_name == "tatr":
|
126
|
-
|
127
|
-
install_hint = "(should be installed with natural-pdf, check for import errors)"
|
128
|
-
elif engine_name == "paddle":
|
129
|
-
install_hint = "pip install paddleocr paddlepaddle"
|
130
|
-
elif engine_name == "surya":
|
131
|
-
install_hint = "pip install surya-ocr"
|
132
|
-
elif engine_name == "docling":
|
133
|
-
install_hint = "pip install docling"
|
136
|
+
install_hint = "(should be installed with natural-pdf core dependencies)"
|
134
137
|
elif engine_name == "gemini":
|
135
|
-
install_hint = "pip install openai"
|
138
|
+
install_hint = "pip install openai" # keep as-is for now
|
136
139
|
else:
|
137
140
|
install_hint = f"(Check installation requirements for {engine_name})"
|
138
141
|
|
@@ -201,14 +204,17 @@ class LayoutManager:
|
|
201
204
|
available = []
|
202
205
|
for name, registry_entry in self.ENGINE_REGISTRY.items():
|
203
206
|
try:
|
204
|
-
|
205
|
-
|
207
|
+
engine_class_or_factory = registry_entry["class"]
|
208
|
+
if callable(engine_class_or_factory) and not isinstance(engine_class_or_factory, type):
|
209
|
+
# Lazy factory – call it to obtain real class
|
210
|
+
engine_class = engine_class_or_factory()
|
211
|
+
else:
|
212
|
+
engine_class = engine_class_or_factory
|
213
|
+
|
206
214
|
if hasattr(engine_class, "is_available") and callable(engine_class.is_available):
|
207
|
-
|
208
|
-
if engine_class().is_available(): # Assumes instance needed for check
|
215
|
+
if engine_class().is_available():
|
209
216
|
available.append(name)
|
210
217
|
else:
|
211
|
-
# Assume available if class exists (less robust)
|
212
218
|
available.append(name)
|
213
219
|
except Exception as e:
|
214
220
|
logger.debug(f"Layout engine '{name}' check failed: {e}")
|
@@ -92,7 +92,7 @@ class YOLODocLayoutDetector(LayoutDetector):
|
|
92
92
|
"""Load the YOLOv10 model based on options."""
|
93
93
|
if not self.is_available():
|
94
94
|
raise RuntimeError(
|
95
|
-
"YOLO dependencies not installed. Please run:
|
95
|
+
"YOLO dependencies not installed. Please run: natural-pdf install yolo"
|
96
96
|
)
|
97
97
|
self.logger.info(f"Loading YOLO model: {options.model_repo}/{options.model_file}")
|
98
98
|
try:
|
@@ -108,7 +108,7 @@ class YOLODocLayoutDetector(LayoutDetector):
|
|
108
108
|
"""Detect layout elements in an image using YOLO."""
|
109
109
|
if not self.is_available():
|
110
110
|
raise RuntimeError(
|
111
|
-
"YOLO dependencies not installed. Please run:
|
111
|
+
"YOLO dependencies not installed. Please run: natural-pdf install yolo"
|
112
112
|
)
|
113
113
|
|
114
114
|
# Ensure options are the correct type, falling back to defaults if base type passed
|
natural_pdf/cli.py
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
import argparse
|
2
|
+
import subprocess
|
3
|
+
import sys
|
4
|
+
from importlib.metadata import distribution, PackageNotFoundError, version as get_version
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Dict
|
7
|
+
from packaging.requirements import Requirement
|
8
|
+
|
9
|
+
# ---------------------------------------------------------------------------
|
10
|
+
# Mapping: sub-command name -> list of pip requirement specifiers to install
|
11
|
+
# ---------------------------------------------------------------------------
|
12
|
+
INSTALL_RECIPES: Dict[str, list[str]] = {
|
13
|
+
# heavyweight stacks
|
14
|
+
"paddle": ["paddlepaddle>=3.0.0", "paddleocr>=3.0.1", "paddlex>=3.0.2"],
|
15
|
+
"surya": ["surya-ocr>=0.13.0"],
|
16
|
+
"yolo": ["doclayout_yolo", "huggingface_hub>=0.29.3"],
|
17
|
+
"docling": ["docling"],
|
18
|
+
# light helpers
|
19
|
+
"deskew": [f"{__package__.split('.')[0]}[deskew]"],
|
20
|
+
"search": [f"{__package__.split('.')[0]}[search]"],
|
21
|
+
"easyocr": ["easyocr"],
|
22
|
+
}
|
23
|
+
|
24
|
+
|
25
|
+
def _build_pip_install_args(requirements: list[str], upgrade: bool = True):
|
26
|
+
"""Return the pip command list to install/upgrade the given requirement strings."""
|
27
|
+
cmd = [sys.executable, "-m", "pip", "install"]
|
28
|
+
if upgrade:
|
29
|
+
cmd.append("--upgrade")
|
30
|
+
cmd.extend(requirements)
|
31
|
+
return cmd
|
32
|
+
|
33
|
+
|
34
|
+
def _run(cmd):
|
35
|
+
print("$", " ".join(cmd), flush=True)
|
36
|
+
subprocess.check_call(cmd)
|
37
|
+
|
38
|
+
|
39
|
+
def cmd_install(args):
|
40
|
+
for extra in args.extras:
|
41
|
+
group_key = extra.lower()
|
42
|
+
if group_key not in INSTALL_RECIPES:
|
43
|
+
print(
|
44
|
+
f"❌ Unknown extra '{group_key}'. Known extras: {', '.join(sorted(INSTALL_RECIPES))}",
|
45
|
+
file=sys.stderr,
|
46
|
+
)
|
47
|
+
continue
|
48
|
+
|
49
|
+
requirements = INSTALL_RECIPES[group_key]
|
50
|
+
|
51
|
+
# Skip paddlex upgrade if already satisfied
|
52
|
+
if group_key == "paddle":
|
53
|
+
try:
|
54
|
+
dist = distribution("paddlex")
|
55
|
+
from packaging.version import parse as V
|
56
|
+
if V(dist.version) >= V("3.0.2"):
|
57
|
+
print("✓ paddlex already ≥ 3.0.2 – nothing to do.")
|
58
|
+
continue
|
59
|
+
except PackageNotFoundError:
|
60
|
+
pass
|
61
|
+
|
62
|
+
# Special handling for paddle stack: install paddlepaddle & paddleocr first
|
63
|
+
# each in its own resolver run, then paddlex.
|
64
|
+
if group_key == "paddle":
|
65
|
+
base_reqs = [r for r in requirements if not r.startswith("paddlex")]
|
66
|
+
for req in base_reqs:
|
67
|
+
pip_cmd = _build_pip_install_args([req])
|
68
|
+
_run(pip_cmd)
|
69
|
+
|
70
|
+
# paddlex last to override the strict pin
|
71
|
+
pip_cmd = _build_pip_install_args(["paddlex==3.0.2"])
|
72
|
+
_run(pip_cmd)
|
73
|
+
print("✔ Paddle stack installed (paddlex upgraded to 3.0.2)")
|
74
|
+
else:
|
75
|
+
for req in requirements:
|
76
|
+
pip_cmd = _build_pip_install_args([req])
|
77
|
+
_run(pip_cmd)
|
78
|
+
print("✔ Finished installing extra dependencies for", group_key)
|
79
|
+
|
80
|
+
|
81
|
+
def main():
|
82
|
+
parser = argparse.ArgumentParser(
|
83
|
+
prog="npdf",
|
84
|
+
description="Utility CLI for the natural-pdf library",
|
85
|
+
)
|
86
|
+
subparsers = parser.add_subparsers(dest="command", required=True)
|
87
|
+
|
88
|
+
# install subcommand
|
89
|
+
install_p = subparsers.add_parser(
|
90
|
+
"install", help="Install optional dependency groups (e.g. paddle, surya)"
|
91
|
+
)
|
92
|
+
install_p.add_argument("extras", nargs="+", help="One or more extras to install (e.g. paddle surya)")
|
93
|
+
install_p.set_defaults(func=cmd_install)
|
94
|
+
|
95
|
+
# list subcommand -------------------------------------------------------
|
96
|
+
list_p = subparsers.add_parser("list", help="Show status of optional dependency groups")
|
97
|
+
list_p.set_defaults(func=cmd_list)
|
98
|
+
|
99
|
+
args = parser.parse_args()
|
100
|
+
args.func(args)
|
101
|
+
|
102
|
+
|
103
|
+
# ---------------------------------------------------------------------------
|
104
|
+
# List command implementation
|
105
|
+
# ---------------------------------------------------------------------------
|
106
|
+
|
107
|
+
|
108
|
+
def _pkg_version(pkg_name: str):
|
109
|
+
try:
|
110
|
+
return get_version(pkg_name)
|
111
|
+
except PackageNotFoundError:
|
112
|
+
return None
|
113
|
+
|
114
|
+
|
115
|
+
def cmd_list(args):
|
116
|
+
print("Optional dependency groups status:\n")
|
117
|
+
for extra, reqs in INSTALL_RECIPES.items():
|
118
|
+
installed_all = True
|
119
|
+
pieces = []
|
120
|
+
for req_str in reqs:
|
121
|
+
pkg_name = Requirement(req_str).name # strip version specifiers
|
122
|
+
ver = _pkg_version(pkg_name)
|
123
|
+
if ver is None:
|
124
|
+
installed_all = False
|
125
|
+
pieces.append(f"{pkg_name} (missing)")
|
126
|
+
else:
|
127
|
+
pieces.append(f"{pkg_name} {ver}")
|
128
|
+
status = "✓" if installed_all else "✗"
|
129
|
+
print(f"{status} {extra:<8} -> " + ", ".join(pieces))
|
130
|
+
print("\nLegend: ✓ group fully installed, ✗ some packages missing\n")
|
131
|
+
|
132
|
+
|
133
|
+
if __name__ == "__main__":
|
134
|
+
main()
|
@@ -4,6 +4,7 @@ from .base import FinetuneExporter
|
|
4
4
|
def _get_paddleocr_exporter():
|
5
5
|
"""Lazy import for PaddleOCRRecognitionExporter."""
|
6
6
|
from .paddleocr import PaddleOCRRecognitionExporter
|
7
|
+
|
7
8
|
return PaddleOCRRecognitionExporter
|
8
9
|
|
9
10
|
# Make PaddleOCRRecognitionExporter available through attribute access
|
natural_pdf/ocr/engine_paddle.py
CHANGED
@@ -127,7 +127,7 @@ class PaddleOCREngine(OCREngine):
|
|
127
127
|
except ImportError as e:
|
128
128
|
self.logger.error(f"Failed to import PaddleOCR/PaddlePaddle: {e}")
|
129
129
|
raise RuntimeError(
|
130
|
-
"paddleocr is not available.
|
130
|
+
"paddleocr is not available. Install via: natural-pdf install paddle"
|
131
131
|
) from e
|
132
132
|
|
133
133
|
paddle_options = options if isinstance(options, PaddleOCROptions) else PaddleOCROptions()
|
natural_pdf/ocr/ocr_factory.py
CHANGED
@@ -32,7 +32,7 @@ class OCRFactory:
|
|
32
32
|
return SuryaOCREngine(**kwargs)
|
33
33
|
except ImportError:
|
34
34
|
raise ImportError(
|
35
|
-
"Surya engine requires
|
35
|
+
"Surya engine requires additional dependencies. " "Install with: natural-pdf install surya"
|
36
36
|
)
|
37
37
|
elif engine_type == "easyocr":
|
38
38
|
try:
|
@@ -42,7 +42,7 @@ class OCRFactory:
|
|
42
42
|
except ImportError:
|
43
43
|
raise ImportError(
|
44
44
|
"EasyOCR engine requires the 'easyocr' package. "
|
45
|
-
"Install with: pip install easyocr"
|
45
|
+
"Install with: pip install easyocr (or natural-pdf install easyocr when available)"
|
46
46
|
)
|
47
47
|
elif engine_type == "paddle":
|
48
48
|
try:
|
@@ -52,7 +52,7 @@ class OCRFactory:
|
|
52
52
|
except ImportError:
|
53
53
|
raise ImportError(
|
54
54
|
"PaddleOCR engine requires 'paddleocr' and 'paddlepaddle'. "
|
55
|
-
"Install with:
|
55
|
+
"Install with: natural-pdf install paddle"
|
56
56
|
)
|
57
57
|
elif engine_type == "doctr":
|
58
58
|
try:
|
@@ -62,7 +62,7 @@ class OCRFactory:
|
|
62
62
|
except ImportError:
|
63
63
|
raise ImportError(
|
64
64
|
"Doctr engine requires the 'python-doctr' package. "
|
65
|
-
"Install with: pip install python-doctr[torch]
|
65
|
+
"Install with: pip install python-doctr[torch]"
|
66
66
|
)
|
67
67
|
else:
|
68
68
|
raise ValueError(f"Unknown engine type: {engine_type}")
|
@@ -137,9 +137,9 @@ class OCRFactory:
|
|
137
137
|
|
138
138
|
# If we get here, no engines are available
|
139
139
|
raise ImportError(
|
140
|
-
"No OCR engines
|
141
|
-
"-
|
142
|
-
"-
|
143
|
-
"-
|
144
|
-
"-
|
140
|
+
"No OCR engines are installed. You can add one via the natural-pdf installer, e.g.:\n"
|
141
|
+
" natural-pdf install easyocr # fastest to set up\n"
|
142
|
+
" natural-pdf install paddle # best Asian-language accuracy\n"
|
143
|
+
" natural-pdf install surya # Surya OCR engine\n"
|
144
|
+
" natural-pdf install yolo # Layout detection (YOLO)\n"
|
145
145
|
)
|
natural_pdf/ocr/ocr_manager.py
CHANGED
@@ -94,15 +94,7 @@ class OCRManager:
|
|
94
94
|
engine_instance = engine_class() # Instantiate first
|
95
95
|
if not engine_instance.is_available():
|
96
96
|
# Check availability before storing
|
97
|
-
install_hint = f"
|
98
|
-
if engine_name == "easyocr":
|
99
|
-
install_hint = "pip install easyocr"
|
100
|
-
elif engine_name == "paddle":
|
101
|
-
install_hint = "pip install paddleocr paddlepaddle"
|
102
|
-
elif engine_name == "surya":
|
103
|
-
install_hint = "pip install surya-ocr"
|
104
|
-
elif engine_name == "doctr":
|
105
|
-
install_hint = "pip install 'python-doctr[torch]'"
|
97
|
+
install_hint = f"natural-pdf install {engine_name}"
|
106
98
|
|
107
99
|
raise RuntimeError(
|
108
100
|
f"Engine '{engine_name}' is not available. Please install the required dependencies: {install_hint}"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: natural-pdf
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.20
|
4
4
|
Summary: A more intuitive interface for working with PDFs
|
5
5
|
Author-email: Jonathan Soma <jonathan.soma@gmail.com>
|
6
6
|
License-Expression: MIT
|
@@ -34,13 +34,6 @@ Provides-Extra: test
|
|
34
34
|
Requires-Dist: pytest; extra == "test"
|
35
35
|
Requires-Dist: pytest-xdist; extra == "test"
|
36
36
|
Requires-Dist: setuptools; extra == "test"
|
37
|
-
Provides-Extra: search
|
38
|
-
Requires-Dist: lancedb; extra == "search"
|
39
|
-
Requires-Dist: pyarrow; extra == "search"
|
40
|
-
Provides-Extra: favorites
|
41
|
-
Requires-Dist: natural-pdf[deskew]; extra == "favorites"
|
42
|
-
Requires-Dist: natural-pdf[ocr-export]; extra == "favorites"
|
43
|
-
Requires-Dist: natural-pdf[paddle]; extra == "favorites"
|
44
37
|
Provides-Extra: dev
|
45
38
|
Requires-Dist: black; extra == "dev"
|
46
39
|
Requires-Dist: isort; extra == "dev"
|
@@ -58,25 +51,19 @@ Requires-Dist: nbclient; extra == "dev"
|
|
58
51
|
Requires-Dist: ipykernel; extra == "dev"
|
59
52
|
Requires-Dist: pre-commit; extra == "dev"
|
60
53
|
Requires-Dist: setuptools; extra == "dev"
|
61
|
-
Provides-Extra: deskew
|
62
|
-
Requires-Dist: deskew>=1.5; extra == "deskew"
|
63
|
-
Requires-Dist: img2pdf; extra == "deskew"
|
64
54
|
Provides-Extra: all
|
65
55
|
Requires-Dist: natural-pdf[ocr-export]; extra == "all"
|
66
56
|
Requires-Dist: natural-pdf[deskew]; extra == "all"
|
67
57
|
Requires-Dist: natural-pdf[test]; extra == "all"
|
68
58
|
Requires-Dist: natural-pdf[search]; extra == "all"
|
69
|
-
Requires-Dist: natural-pdf[extras]; extra == "all"
|
70
59
|
Requires-Dist: natural-pdf[favorites]; extra == "all"
|
71
|
-
|
72
|
-
|
73
|
-
Requires-Dist:
|
74
|
-
Requires-Dist:
|
75
|
-
Provides-Extra:
|
76
|
-
Requires-Dist:
|
77
|
-
Requires-Dist:
|
78
|
-
Requires-Dist: easyocr; extra == "extras"
|
79
|
-
Requires-Dist: natural-pdf[paddle]; extra == "extras"
|
60
|
+
Requires-Dist: natural-pdf[export-extras]; extra == "all"
|
61
|
+
Provides-Extra: deskew
|
62
|
+
Requires-Dist: deskew>=1.5; extra == "deskew"
|
63
|
+
Requires-Dist: img2pdf; extra == "deskew"
|
64
|
+
Provides-Extra: search
|
65
|
+
Requires-Dist: lancedb; extra == "search"
|
66
|
+
Requires-Dist: pyarrow; extra == "search"
|
80
67
|
Provides-Extra: ocr-export
|
81
68
|
Requires-Dist: pikepdf; extra == "ocr-export"
|
82
69
|
Provides-Extra: export-extras
|
@@ -1,4 +1,5 @@
|
|
1
1
|
natural_pdf/__init__.py,sha256=qDFJNF8sbEDO-2WSFAxoWEM8updOUP6dB-ckya0kxfs,3275
|
2
|
+
natural_pdf/cli.py,sha256=0nAGVO2f_40E3G9c3Q0bfK5mhROyUJH5W25-YJVLMIo,4749
|
2
3
|
natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
|
3
4
|
natural_pdf/analyzers/shape_detection_mixin.py,sha256=HHefg-v7CJMxYiJHxdGOdqdtbWe9yk4OBoW3a_aRrjM,81798
|
4
5
|
natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
|
@@ -9,14 +10,14 @@ natural_pdf/analyzers/layout/base.py,sha256=bYawhmc_0xqKG-xbxUSiazIU1om-aBox5Jh8
|
|
9
10
|
natural_pdf/analyzers/layout/docling.py,sha256=4BJYyNVR6VegZGxyisvNIBBRvVk6YKPyDVs7ZdVfzEU,12676
|
10
11
|
natural_pdf/analyzers/layout/gemini.py,sha256=ldECVCQ5HNQA3Omjg2NOsTrJXslyYb0vErDncmLIiuE,10510
|
11
12
|
natural_pdf/analyzers/layout/layout_analyzer.py,sha256=n327Zjuf7aSzKQKChPHeiCVHinzeDGaWNyKiwQ-DkJk,15571
|
12
|
-
natural_pdf/analyzers/layout/layout_manager.py,sha256=
|
13
|
+
natural_pdf/analyzers/layout/layout_manager.py,sha256=sDnh7XE-Wx2EBmgjipbvaLZQ7VSG6MfjEKfNsNXPNHs,8583
|
13
14
|
natural_pdf/analyzers/layout/layout_options.py,sha256=-Nv6bcu4_pqSCN6uNhCZ9mvoCBtRDZIUkO6kjkuLXsg,7703
|
14
15
|
natural_pdf/analyzers/layout/paddle.py,sha256=tX2bI1yayAdmRhvsfZ_Ygs7zAG5e9eW-pLJkw4NUpBQ,21325
|
15
16
|
natural_pdf/analyzers/layout/pdfplumber_table_finder.py,sha256=Tk0Q7wv7nGYPo69lh6RoezjdepTnMl90SaNIrP29Pwc,5902
|
16
17
|
natural_pdf/analyzers/layout/surya.py,sha256=4RdnhRxSS3i3Ns5mFhOA9-P0xd7Ms19uZuKvUGQfEBI,9789
|
17
18
|
natural_pdf/analyzers/layout/table_structure_utils.py,sha256=nISZDBd46RPYkFHxbQyIHwg9WweG4DslpoYJ31OMJYA,2768
|
18
19
|
natural_pdf/analyzers/layout/tatr.py,sha256=cVr0ZyhY2mNLAKZ4DGMm-b7XNJpILKh8x8ZpyDeUhLk,15032
|
19
|
-
natural_pdf/analyzers/layout/yolo.py,sha256=
|
20
|
+
natural_pdf/analyzers/layout/yolo.py,sha256=Iw8qsIOHg2lUP7z9GsmkOm3c9kJ-Ywk01Oej50kZgDw,8360
|
20
21
|
natural_pdf/classification/manager.py,sha256=pzuTP-34W9N3im1ZFhCfQpOu37VSHEx4JHoHNxyy6o0,18894
|
21
22
|
natural_pdf/classification/mixin.py,sha256=_XtoqCMqj1nxZYskIV2RbVYiVVcEWzFwae4s5vpzC74,6566
|
22
23
|
natural_pdf/classification/results.py,sha256=El1dY7cBQVOB5lP-uj52dWgH6Y7TeQgJOVcZD-OLjes,2778
|
@@ -40,7 +41,7 @@ natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,
|
|
40
41
|
natural_pdf/elements/region.py,sha256=hBklYKcXJWyxayu9todYQOZ-d9KVDtqeV-CIt9IcSn8,123400
|
41
42
|
natural_pdf/elements/text.py,sha256=13HvVZGinj2Vm_fFCAnqi7hohtoKvnpCp3VCfkpeAbc,11146
|
42
43
|
natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
|
43
|
-
natural_pdf/exporters/__init__.py,sha256=
|
44
|
+
natural_pdf/exporters/__init__.py,sha256=g1WRPCDVzceaUUsm8dchPhzdHFSjYM0NfRyc8iN0mtE,644
|
44
45
|
natural_pdf/exporters/base.py,sha256=XhR1xlkHOh7suOuX7mWbsj1h2o1pZNet-OAS5YCJyeI,2115
|
45
46
|
natural_pdf/exporters/hocr.py,sha256=wksvJvWLSxuAfhYzg_0T2_W8eqDoMgAVC-gwZ9FoO_k,19969
|
46
47
|
natural_pdf/exporters/hocr_font.py,sha256=1wsGOMj6zoaRN2rxCwrv4MMLGawpNz984WgXpmWekgw,4574
|
@@ -62,10 +63,10 @@ natural_pdf/ocr/__init__.py,sha256=VY8hhvDPf7Gh2lB-d2QRmghLLyTy6ydxlgo1cS4dOSk,2
|
|
62
63
|
natural_pdf/ocr/engine.py,sha256=ZBC1tZNM5EDbGDJJmZI9mNHr4nCMLEZvUFhiJq8GdF4,8741
|
63
64
|
natural_pdf/ocr/engine_doctr.py,sha256=ptKrupMWoulZb-R93zr9btoe94JPWU7vlJuN7OBJEIM,17740
|
64
65
|
natural_pdf/ocr/engine_easyocr.py,sha256=bWz6kHUgAJfe3rqdnZBAF-IPvw3B35DlvX5KDdFUtzo,9888
|
65
|
-
natural_pdf/ocr/engine_paddle.py,sha256=
|
66
|
+
natural_pdf/ocr/engine_paddle.py,sha256=0vobobjnsM1G3zihYL7f1roLlKKZWRwioxkGkgIxEUA,16159
|
66
67
|
natural_pdf/ocr/engine_surya.py,sha256=PNjvpsHnBghAoa-df52HEyvXzfNI-gTFgKvs2LxHgKo,5051
|
67
|
-
natural_pdf/ocr/ocr_factory.py,sha256=
|
68
|
-
natural_pdf/ocr/ocr_manager.py,sha256=
|
68
|
+
natural_pdf/ocr/ocr_factory.py,sha256=GkODuBmqNVECg4u1-KW6ZMfBgVndLkK1W5GM15faf8M,5318
|
69
|
+
natural_pdf/ocr/ocr_manager.py,sha256=K2gpFo3e6RB1ouXOstlEAAYd14DbjBNt5RH6J7ZdDQY,14263
|
69
70
|
natural_pdf/ocr/ocr_options.py,sha256=l33QKu_93r-uwi3t_v8UH8pEgHo6HTVzP4tfmQFRF1w,5488
|
70
71
|
natural_pdf/ocr/utils.py,sha256=OxuHwDbHWj6setvnC0QYwMHrAjxGkhmLzWHpMqqGupA,4397
|
71
72
|
natural_pdf/qa/__init__.py,sha256=Pjo62JTnUNEjGNsC437mvsS5KQ5m7X_BibGvavR9AW0,108
|
@@ -90,8 +91,9 @@ natural_pdf/utils/text_extraction.py,sha256=z6Jhy11pakYCsEpkvh8ldw6DkUFsYF1hCL9Y
|
|
90
91
|
natural_pdf/utils/visualization.py,sha256=30pRWQdsRJh2pSObh-brKVsFgC1n8tHmSrta_UDnVPw,8989
|
91
92
|
natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
|
92
93
|
natural_pdf/widgets/viewer.py,sha256=2VUY1TzWMDe9I-IVNOosKZ2LaqpjLB62ftMAdk-s6_8,24952
|
93
|
-
natural_pdf-0.1.
|
94
|
-
natural_pdf-0.1.
|
95
|
-
natural_pdf-0.1.
|
96
|
-
natural_pdf-0.1.
|
97
|
-
natural_pdf-0.1.
|
94
|
+
natural_pdf-0.1.20.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
|
95
|
+
natural_pdf-0.1.20.dist-info/METADATA,sha256=iyT4zmi24PZugVNCIjoUYX2ShPejzxCPx0ZuuHxg-UU,6054
|
96
|
+
natural_pdf-0.1.20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
97
|
+
natural_pdf-0.1.20.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
|
98
|
+
natural_pdf-0.1.20.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
|
99
|
+
natural_pdf-0.1.20.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|