natural-pdf 0.1.18__py3-none-any.whl → 0.1.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,43 +5,11 @@ from typing import Any, Dict, List, Optional, Type, Union
5
5
 
6
6
  from PIL import Image
7
7
 
8
- # --- Import detector classes and options ---
9
- # Use try-except blocks for robustness if some detectors might be missing dependencies
10
- try:
11
- from .base import LayoutDetector
12
- except ImportError:
13
- LayoutDetector = type("LayoutDetector", (), {})
14
-
15
- try:
16
- from .yolo import YOLODocLayoutDetector
17
- except ImportError:
18
- YOLODocLayoutDetector = None
19
-
20
- try:
21
- from .tatr import TableTransformerDetector
22
- except ImportError:
23
- TableTransformerDetector = None
24
-
25
- try:
26
- from .paddle import PaddleLayoutDetector
27
- except ImportError:
28
- PaddleLayoutDetector = None
29
-
30
- try:
31
- from .surya import SuryaLayoutDetector
32
- except ImportError:
33
- SuryaLayoutDetector = None
34
-
35
- try:
36
- from .docling import DoclingLayoutDetector
37
- except ImportError:
38
- DoclingLayoutDetector = None
39
-
40
- try:
41
- from .gemini import GeminiLayoutDetector
42
- except ImportError:
43
- GeminiLayoutDetector = None
8
+ # --- Import lightweight components only ---
9
+ # Heavy detector implementations (paddle, yolo, etc.) are **not** imported at module load.
10
+ # Instead, we provide tiny helper functions that import them lazily **only when needed**.
44
11
 
12
+ from .base import LayoutDetector # Lightweight base class
45
13
  from .layout_options import (
46
14
  BaseLayoutOptions,
47
15
  DoclingLayoutOptions,
@@ -53,6 +21,47 @@ from .layout_options import (
53
21
  YOLOLayoutOptions,
54
22
  )
55
23
 
24
+ # ------------------ Lazy import helpers ------------------ #
25
+
26
+
27
+ def _lazy_import_yolo_detector():
28
+ """Import YOLO detector lazily to avoid heavy deps at import time."""
29
+ from .yolo import YOLODocLayoutDetector # Local import
30
+
31
+ return YOLODocLayoutDetector
32
+
33
+
34
+ def _lazy_import_tatr_detector():
35
+ from .tatr import TableTransformerDetector
36
+
37
+ return TableTransformerDetector
38
+
39
+
40
+ def _lazy_import_paddle_detector():
41
+ from .paddle import PaddleLayoutDetector
42
+
43
+ return PaddleLayoutDetector
44
+
45
+
46
+ def _lazy_import_surya_detector():
47
+ from .surya import SuryaLayoutDetector
48
+
49
+ return SuryaLayoutDetector
50
+
51
+
52
+ def _lazy_import_docling_detector():
53
+ from .docling import DoclingLayoutDetector
54
+
55
+ return DoclingLayoutDetector
56
+
57
+
58
+ def _lazy_import_gemini_detector():
59
+ from .gemini import GeminiLayoutDetector
60
+
61
+ return GeminiLayoutDetector
62
+
63
+ # --------------------------------------------------------- #
64
+
56
65
  logger = logging.getLogger(__name__)
57
66
 
58
67
 
@@ -62,39 +71,34 @@ class LayoutManager:
62
71
  # Registry mapping engine names to classes and default options
63
72
  ENGINE_REGISTRY: Dict[str, Dict[str, Any]] = {}
64
73
 
65
- # Populate registry only with available detectors
66
- if YOLODocLayoutDetector:
67
- ENGINE_REGISTRY["yolo"] = {
68
- "class": YOLODocLayoutDetector,
74
+ # Populate registry with lazy import callables. The heavy imports are executed only
75
+ # when the corresponding engine is first requested.
76
+ ENGINE_REGISTRY = {
77
+ "yolo": {
78
+ "class": _lazy_import_yolo_detector, # returns detector class when called
69
79
  "options_class": YOLOLayoutOptions,
70
- }
71
- if TableTransformerDetector:
72
- ENGINE_REGISTRY["tatr"] = {
73
- "class": TableTransformerDetector,
80
+ },
81
+ "tatr": {
82
+ "class": _lazy_import_tatr_detector,
74
83
  "options_class": TATRLayoutOptions,
75
- }
76
- if PaddleLayoutDetector:
77
- ENGINE_REGISTRY["paddle"] = {
78
- "class": PaddleLayoutDetector,
84
+ },
85
+ "paddle": {
86
+ "class": _lazy_import_paddle_detector,
79
87
  "options_class": PaddleLayoutOptions,
80
- }
81
- if SuryaLayoutDetector:
82
- ENGINE_REGISTRY["surya"] = {
83
- "class": SuryaLayoutDetector,
88
+ },
89
+ "surya": {
90
+ "class": _lazy_import_surya_detector,
84
91
  "options_class": SuryaLayoutOptions,
85
- }
86
- if DoclingLayoutDetector:
87
- ENGINE_REGISTRY["docling"] = {
88
- "class": DoclingLayoutDetector,
92
+ },
93
+ "docling": {
94
+ "class": _lazy_import_docling_detector,
89
95
  "options_class": DoclingLayoutOptions,
90
- }
91
-
92
- # Add Gemini entry if available
93
- if GeminiLayoutDetector:
94
- ENGINE_REGISTRY["gemini"] = {
95
- "class": GeminiLayoutDetector,
96
+ },
97
+ "gemini": {
98
+ "class": _lazy_import_gemini_detector,
96
99
  "options_class": GeminiLayoutOptions,
97
- }
100
+ },
101
+ }
98
102
 
99
103
  def __init__(self):
100
104
  """Initializes the Layout Manager."""
@@ -114,25 +118,24 @@ class LayoutManager:
114
118
 
115
119
  if engine_name not in self._detector_instances:
116
120
  logger.info(f"Creating instance of layout engine: {engine_name}")
117
- engine_class = self.ENGINE_REGISTRY[engine_name]["class"]
121
+ engine_class_or_factory = self.ENGINE_REGISTRY[engine_name]["class"]
122
+ # If the registry provides a callable (lazy import helper), call it to obtain the real class.
123
+ if callable(engine_class_or_factory) and not isinstance(engine_class_or_factory, type):
124
+ engine_class = engine_class_or_factory()
125
+ else:
126
+ engine_class = engine_class_or_factory
127
+
118
128
  detector_instance = engine_class() # Instantiate
119
129
  if not detector_instance.is_available():
120
130
  # Check availability before storing
121
131
  # Construct helpful error message with install hint
122
132
  install_hint = ""
123
- if engine_name == "yolo":
124
- install_hint = "pip install doclayout_yolo"
133
+ if engine_name in {"yolo", "paddle", "surya", "docling"}:
134
+ install_hint = f"natural-pdf install {engine_name}"
125
135
  elif engine_name == "tatr":
126
- # This should now be installed with core dependencies
127
- install_hint = "(should be installed with natural-pdf, check for import errors)"
128
- elif engine_name == "paddle":
129
- install_hint = "pip install paddleocr paddlepaddle"
130
- elif engine_name == "surya":
131
- install_hint = "pip install surya-ocr"
132
- elif engine_name == "docling":
133
- install_hint = "pip install docling"
136
+ install_hint = "(should be installed with natural-pdf core dependencies)"
134
137
  elif engine_name == "gemini":
135
- install_hint = "pip install openai"
138
+ install_hint = "pip install openai" # keep as-is for now
136
139
  else:
137
140
  install_hint = f"(Check installation requirements for {engine_name})"
138
141
 
@@ -201,14 +204,17 @@ class LayoutManager:
201
204
  available = []
202
205
  for name, registry_entry in self.ENGINE_REGISTRY.items():
203
206
  try:
204
- engine_class = registry_entry["class"]
205
- # Check availability without full instantiation if possible
207
+ engine_class_or_factory = registry_entry["class"]
208
+ if callable(engine_class_or_factory) and not isinstance(engine_class_or_factory, type):
209
+ # Lazy factory – call it to obtain real class
210
+ engine_class = engine_class_or_factory()
211
+ else:
212
+ engine_class = engine_class_or_factory
213
+
206
214
  if hasattr(engine_class, "is_available") and callable(engine_class.is_available):
207
- # Create temporary instance only for check if needed, or use classmethod
208
- if engine_class().is_available(): # Assumes instance needed for check
215
+ if engine_class().is_available():
209
216
  available.append(name)
210
217
  else:
211
- # Assume available if class exists (less robust)
212
218
  available.append(name)
213
219
  except Exception as e:
214
220
  logger.debug(f"Layout engine '{name}' check failed: {e}")
@@ -92,7 +92,7 @@ class YOLODocLayoutDetector(LayoutDetector):
92
92
  """Load the YOLOv10 model based on options."""
93
93
  if not self.is_available():
94
94
  raise RuntimeError(
95
- "YOLO dependencies not installed. Please run: pip install 'natural-pdf[layout_yolo]'"
95
+ "YOLO dependencies not installed. Please run: natural-pdf install yolo"
96
96
  )
97
97
  self.logger.info(f"Loading YOLO model: {options.model_repo}/{options.model_file}")
98
98
  try:
@@ -108,7 +108,7 @@ class YOLODocLayoutDetector(LayoutDetector):
108
108
  """Detect layout elements in an image using YOLO."""
109
109
  if not self.is_available():
110
110
  raise RuntimeError(
111
- "YOLO dependencies not installed. Please run: pip install 'natural-pdf[layout_yolo]'"
111
+ "YOLO dependencies not installed. Please run: natural-pdf install yolo"
112
112
  )
113
113
 
114
114
  # Ensure options are the correct type, falling back to defaults if base type passed
natural_pdf/cli.py ADDED
@@ -0,0 +1,134 @@
1
+ import argparse
2
+ import subprocess
3
+ import sys
4
+ from importlib.metadata import distribution, PackageNotFoundError, version as get_version
5
+ from pathlib import Path
6
+ from typing import Dict
7
+ from packaging.requirements import Requirement
8
+
9
+ # ---------------------------------------------------------------------------
10
+ # Mapping: sub-command name -> list of pip requirement specifiers to install
11
+ # ---------------------------------------------------------------------------
12
+ INSTALL_RECIPES: Dict[str, list[str]] = {
13
+ # heavyweight stacks
14
+ "paddle": ["paddlepaddle>=3.0.0", "paddleocr>=3.0.1", "paddlex>=3.0.2"],
15
+ "surya": ["surya-ocr>=0.13.0"],
16
+ "yolo": ["doclayout_yolo", "huggingface_hub>=0.29.3"],
17
+ "docling": ["docling"],
18
+ # light helpers
19
+ "deskew": [f"{__package__.split('.')[0]}[deskew]"],
20
+ "search": [f"{__package__.split('.')[0]}[search]"],
21
+ "easyocr": ["easyocr"],
22
+ }
23
+
24
+
25
+ def _build_pip_install_args(requirements: list[str], upgrade: bool = True):
26
+ """Return the pip command list to install/upgrade the given requirement strings."""
27
+ cmd = [sys.executable, "-m", "pip", "install"]
28
+ if upgrade:
29
+ cmd.append("--upgrade")
30
+ cmd.extend(requirements)
31
+ return cmd
32
+
33
+
34
+ def _run(cmd):
35
+ print("$", " ".join(cmd), flush=True)
36
+ subprocess.check_call(cmd)
37
+
38
+
39
+ def cmd_install(args):
40
+ for extra in args.extras:
41
+ group_key = extra.lower()
42
+ if group_key not in INSTALL_RECIPES:
43
+ print(
44
+ f"❌ Unknown extra '{group_key}'. Known extras: {', '.join(sorted(INSTALL_RECIPES))}",
45
+ file=sys.stderr,
46
+ )
47
+ continue
48
+
49
+ requirements = INSTALL_RECIPES[group_key]
50
+
51
+ # Skip paddlex upgrade if already satisfied
52
+ if group_key == "paddle":
53
+ try:
54
+ dist = distribution("paddlex")
55
+ from packaging.version import parse as V
56
+ if V(dist.version) >= V("3.0.2"):
57
+ print("✓ paddlex already ≥ 3.0.2 – nothing to do.")
58
+ continue
59
+ except PackageNotFoundError:
60
+ pass
61
+
62
+ # Special handling for paddle stack: install paddlepaddle & paddleocr first
63
+ # each in its own resolver run, then paddlex.
64
+ if group_key == "paddle":
65
+ base_reqs = [r for r in requirements if not r.startswith("paddlex")]
66
+ for req in base_reqs:
67
+ pip_cmd = _build_pip_install_args([req])
68
+ _run(pip_cmd)
69
+
70
+ # paddlex last to override the strict pin
71
+ pip_cmd = _build_pip_install_args(["paddlex==3.0.2"])
72
+ _run(pip_cmd)
73
+ print("✔ Paddle stack installed (paddlex upgraded to 3.0.2)")
74
+ else:
75
+ for req in requirements:
76
+ pip_cmd = _build_pip_install_args([req])
77
+ _run(pip_cmd)
78
+ print("✔ Finished installing extra dependencies for", group_key)
79
+
80
+
81
+ def main():
82
+ parser = argparse.ArgumentParser(
83
+ prog="npdf",
84
+ description="Utility CLI for the natural-pdf library",
85
+ )
86
+ subparsers = parser.add_subparsers(dest="command", required=True)
87
+
88
+ # install subcommand
89
+ install_p = subparsers.add_parser(
90
+ "install", help="Install optional dependency groups (e.g. paddle, surya)"
91
+ )
92
+ install_p.add_argument("extras", nargs="+", help="One or more extras to install (e.g. paddle surya)")
93
+ install_p.set_defaults(func=cmd_install)
94
+
95
+ # list subcommand -------------------------------------------------------
96
+ list_p = subparsers.add_parser("list", help="Show status of optional dependency groups")
97
+ list_p.set_defaults(func=cmd_list)
98
+
99
+ args = parser.parse_args()
100
+ args.func(args)
101
+
102
+
103
+ # ---------------------------------------------------------------------------
104
+ # List command implementation
105
+ # ---------------------------------------------------------------------------
106
+
107
+
108
+ def _pkg_version(pkg_name: str):
109
+ try:
110
+ return get_version(pkg_name)
111
+ except PackageNotFoundError:
112
+ return None
113
+
114
+
115
+ def cmd_list(args):
116
+ print("Optional dependency groups status:\n")
117
+ for extra, reqs in INSTALL_RECIPES.items():
118
+ installed_all = True
119
+ pieces = []
120
+ for req_str in reqs:
121
+ pkg_name = Requirement(req_str).name # strip version specifiers
122
+ ver = _pkg_version(pkg_name)
123
+ if ver is None:
124
+ installed_all = False
125
+ pieces.append(f"{pkg_name} (missing)")
126
+ else:
127
+ pieces.append(f"{pkg_name} {ver}")
128
+ status = "✓" if installed_all else "✗"
129
+ print(f"{status} {extra:<8} -> " + ", ".join(pieces))
130
+ print("\nLegend: ✓ group fully installed, ✗ some packages missing\n")
131
+
132
+
133
+ if __name__ == "__main__":
134
+ main()
@@ -4,6 +4,7 @@ from .base import FinetuneExporter
4
4
  def _get_paddleocr_exporter():
5
5
  """Lazy import for PaddleOCRRecognitionExporter."""
6
6
  from .paddleocr import PaddleOCRRecognitionExporter
7
+
7
8
  return PaddleOCRRecognitionExporter
8
9
 
9
10
  # Make PaddleOCRRecognitionExporter available through attribute access
@@ -127,7 +127,7 @@ class PaddleOCREngine(OCREngine):
127
127
  except ImportError as e:
128
128
  self.logger.error(f"Failed to import PaddleOCR/PaddlePaddle: {e}")
129
129
  raise RuntimeError(
130
- "paddleocr is not available. Please install it and paddlepaddle with: pip install -U paddlepaddle paddleocr"
130
+ "paddleocr is not available. Install via: natural-pdf install paddle"
131
131
  ) from e
132
132
 
133
133
  paddle_options = options if isinstance(options, PaddleOCROptions) else PaddleOCROptions()
@@ -32,7 +32,7 @@ class OCRFactory:
32
32
  return SuryaOCREngine(**kwargs)
33
33
  except ImportError:
34
34
  raise ImportError(
35
- "Surya engine requires the 'surya' package. " "Install with: pip install surya"
35
+ "Surya engine requires additional dependencies. " "Install with: natural-pdf install surya"
36
36
  )
37
37
  elif engine_type == "easyocr":
38
38
  try:
@@ -42,7 +42,7 @@ class OCRFactory:
42
42
  except ImportError:
43
43
  raise ImportError(
44
44
  "EasyOCR engine requires the 'easyocr' package. "
45
- "Install with: pip install easyocr"
45
+ "Install with: pip install easyocr (or natural-pdf install easyocr when available)"
46
46
  )
47
47
  elif engine_type == "paddle":
48
48
  try:
@@ -52,7 +52,7 @@ class OCRFactory:
52
52
  except ImportError:
53
53
  raise ImportError(
54
54
  "PaddleOCR engine requires 'paddleocr' and 'paddlepaddle'. "
55
- "Install with: pip install paddleocr paddlepaddle"
55
+ "Install with: natural-pdf install paddle"
56
56
  )
57
57
  elif engine_type == "doctr":
58
58
  try:
@@ -62,7 +62,7 @@ class OCRFactory:
62
62
  except ImportError:
63
63
  raise ImportError(
64
64
  "Doctr engine requires the 'python-doctr' package. "
65
- "Install with: pip install python-doctr[torch] or python-doctr[tf]"
65
+ "Install with: pip install python-doctr[torch]"
66
66
  )
67
67
  else:
68
68
  raise ValueError(f"Unknown engine type: {engine_type}")
@@ -137,9 +137,9 @@ class OCRFactory:
137
137
 
138
138
  # If we get here, no engines are available
139
139
  raise ImportError(
140
- "No OCR engines available. Please install at least one of: \n"
141
- "- EasyOCR (recommended): pip install easyocr\n"
142
- "- Doctr: pip install python-doctr[torch] or python-doctr[tf]\n"
143
- "- PaddleOCR: pip install paddleocr paddlepaddle\n"
144
- "- Surya OCR: pip install surya"
140
+ "No OCR engines are installed. You can add one via the natural-pdf installer, e.g.:\n"
141
+ " natural-pdf install easyocr # fastest to set up\n"
142
+ " natural-pdf install paddle # best Asian-language accuracy\n"
143
+ " natural-pdf install surya # Surya OCR engine\n"
144
+ " natural-pdf install yolo # Layout detection (YOLO)\n"
145
145
  )
@@ -94,15 +94,7 @@ class OCRManager:
94
94
  engine_instance = engine_class() # Instantiate first
95
95
  if not engine_instance.is_available():
96
96
  # Check availability before storing
97
- install_hint = f"pip install 'natural-pdf[{engine_name}]'"
98
- if engine_name == "easyocr":
99
- install_hint = "pip install easyocr"
100
- elif engine_name == "paddle":
101
- install_hint = "pip install paddleocr paddlepaddle"
102
- elif engine_name == "surya":
103
- install_hint = "pip install surya-ocr"
104
- elif engine_name == "doctr":
105
- install_hint = "pip install 'python-doctr[torch]'"
97
+ install_hint = f"natural-pdf install {engine_name}"
106
98
 
107
99
  raise RuntimeError(
108
100
  f"Engine '{engine_name}' is not available. Please install the required dependencies: {install_hint}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.1.18
3
+ Version: 0.1.20
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -34,14 +34,6 @@ Provides-Extra: test
34
34
  Requires-Dist: pytest; extra == "test"
35
35
  Requires-Dist: pytest-xdist; extra == "test"
36
36
  Requires-Dist: setuptools; extra == "test"
37
- Provides-Extra: search
38
- Requires-Dist: lancedb; extra == "search"
39
- Requires-Dist: pyarrow; extra == "search"
40
- Provides-Extra: favorites
41
- Requires-Dist: natural-pdf[deskew]; extra == "favorites"
42
- Requires-Dist: natural-pdf[ocr-export]; extra == "favorites"
43
- Requires-Dist: natural-pdf[search]; extra == "favorites"
44
- Requires-Dist: surya-ocr; extra == "favorites"
45
37
  Provides-Extra: dev
46
38
  Requires-Dist: black; extra == "dev"
47
39
  Requires-Dist: isort; extra == "dev"
@@ -59,25 +51,19 @@ Requires-Dist: nbclient; extra == "dev"
59
51
  Requires-Dist: ipykernel; extra == "dev"
60
52
  Requires-Dist: pre-commit; extra == "dev"
61
53
  Requires-Dist: setuptools; extra == "dev"
62
- Provides-Extra: deskew
63
- Requires-Dist: deskew>=1.5; extra == "deskew"
64
- Requires-Dist: img2pdf; extra == "deskew"
65
54
  Provides-Extra: all
66
55
  Requires-Dist: natural-pdf[ocr-export]; extra == "all"
67
56
  Requires-Dist: natural-pdf[deskew]; extra == "all"
68
57
  Requires-Dist: natural-pdf[test]; extra == "all"
69
58
  Requires-Dist: natural-pdf[search]; extra == "all"
70
- Requires-Dist: natural-pdf[extras]; extra == "all"
71
59
  Requires-Dist: natural-pdf[favorites]; extra == "all"
72
- Provides-Extra: paddle
73
- Requires-Dist: paddlepaddle>=3.0.0; extra == "paddle"
74
- Requires-Dist: paddleocr>=3.0.1; extra == "paddle"
75
- Requires-Dist: paddlex>=3.0.1; extra == "paddle"
76
- Provides-Extra: extras
77
- Requires-Dist: surya-ocr; extra == "extras"
78
- Requires-Dist: doclayout_yolo; extra == "extras"
79
- Requires-Dist: easyocr; extra == "extras"
80
- Requires-Dist: natural-pdf[paddle]; extra == "extras"
60
+ Requires-Dist: natural-pdf[export-extras]; extra == "all"
61
+ Provides-Extra: deskew
62
+ Requires-Dist: deskew>=1.5; extra == "deskew"
63
+ Requires-Dist: img2pdf; extra == "deskew"
64
+ Provides-Extra: search
65
+ Requires-Dist: lancedb; extra == "search"
66
+ Requires-Dist: pyarrow; extra == "search"
81
67
  Provides-Extra: ocr-export
82
68
  Requires-Dist: pikepdf; extra == "ocr-export"
83
69
  Provides-Extra: export-extras
@@ -1,4 +1,5 @@
1
1
  natural_pdf/__init__.py,sha256=qDFJNF8sbEDO-2WSFAxoWEM8updOUP6dB-ckya0kxfs,3275
2
+ natural_pdf/cli.py,sha256=0nAGVO2f_40E3G9c3Q0bfK5mhROyUJH5W25-YJVLMIo,4749
2
3
  natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
3
4
  natural_pdf/analyzers/shape_detection_mixin.py,sha256=HHefg-v7CJMxYiJHxdGOdqdtbWe9yk4OBoW3a_aRrjM,81798
4
5
  natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
@@ -9,14 +10,14 @@ natural_pdf/analyzers/layout/base.py,sha256=bYawhmc_0xqKG-xbxUSiazIU1om-aBox5Jh8
9
10
  natural_pdf/analyzers/layout/docling.py,sha256=4BJYyNVR6VegZGxyisvNIBBRvVk6YKPyDVs7ZdVfzEU,12676
10
11
  natural_pdf/analyzers/layout/gemini.py,sha256=ldECVCQ5HNQA3Omjg2NOsTrJXslyYb0vErDncmLIiuE,10510
11
12
  natural_pdf/analyzers/layout/layout_analyzer.py,sha256=n327Zjuf7aSzKQKChPHeiCVHinzeDGaWNyKiwQ-DkJk,15571
12
- natural_pdf/analyzers/layout/layout_manager.py,sha256=NkOZJCFiaZuvDN4Jgu1GsYRtlSJEy2tU_I-v-4EQQqU,8322
13
+ natural_pdf/analyzers/layout/layout_manager.py,sha256=sDnh7XE-Wx2EBmgjipbvaLZQ7VSG6MfjEKfNsNXPNHs,8583
13
14
  natural_pdf/analyzers/layout/layout_options.py,sha256=-Nv6bcu4_pqSCN6uNhCZ9mvoCBtRDZIUkO6kjkuLXsg,7703
14
15
  natural_pdf/analyzers/layout/paddle.py,sha256=tX2bI1yayAdmRhvsfZ_Ygs7zAG5e9eW-pLJkw4NUpBQ,21325
15
16
  natural_pdf/analyzers/layout/pdfplumber_table_finder.py,sha256=Tk0Q7wv7nGYPo69lh6RoezjdepTnMl90SaNIrP29Pwc,5902
16
17
  natural_pdf/analyzers/layout/surya.py,sha256=4RdnhRxSS3i3Ns5mFhOA9-P0xd7Ms19uZuKvUGQfEBI,9789
17
18
  natural_pdf/analyzers/layout/table_structure_utils.py,sha256=nISZDBd46RPYkFHxbQyIHwg9WweG4DslpoYJ31OMJYA,2768
18
19
  natural_pdf/analyzers/layout/tatr.py,sha256=cVr0ZyhY2mNLAKZ4DGMm-b7XNJpILKh8x8ZpyDeUhLk,15032
19
- natural_pdf/analyzers/layout/yolo.py,sha256=ANo2U4EZgeN2eYKM1bZIuysiuJLgwl4JeQchrRxOKwA,8388
20
+ natural_pdf/analyzers/layout/yolo.py,sha256=Iw8qsIOHg2lUP7z9GsmkOm3c9kJ-Ywk01Oej50kZgDw,8360
20
21
  natural_pdf/classification/manager.py,sha256=pzuTP-34W9N3im1ZFhCfQpOu37VSHEx4JHoHNxyy6o0,18894
21
22
  natural_pdf/classification/mixin.py,sha256=_XtoqCMqj1nxZYskIV2RbVYiVVcEWzFwae4s5vpzC74,6566
22
23
  natural_pdf/classification/results.py,sha256=El1dY7cBQVOB5lP-uj52dWgH6Y7TeQgJOVcZD-OLjes,2778
@@ -40,7 +41,7 @@ natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,
40
41
  natural_pdf/elements/region.py,sha256=hBklYKcXJWyxayu9todYQOZ-d9KVDtqeV-CIt9IcSn8,123400
41
42
  natural_pdf/elements/text.py,sha256=13HvVZGinj2Vm_fFCAnqi7hohtoKvnpCp3VCfkpeAbc,11146
42
43
  natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
43
- natural_pdf/exporters/__init__.py,sha256=XG0ckcKHgG7IVma75syORUme6wEItUvDA46aCZzGqrU,639
44
+ natural_pdf/exporters/__init__.py,sha256=g1WRPCDVzceaUUsm8dchPhzdHFSjYM0NfRyc8iN0mtE,644
44
45
  natural_pdf/exporters/base.py,sha256=XhR1xlkHOh7suOuX7mWbsj1h2o1pZNet-OAS5YCJyeI,2115
45
46
  natural_pdf/exporters/hocr.py,sha256=wksvJvWLSxuAfhYzg_0T2_W8eqDoMgAVC-gwZ9FoO_k,19969
46
47
  natural_pdf/exporters/hocr_font.py,sha256=1wsGOMj6zoaRN2rxCwrv4MMLGawpNz984WgXpmWekgw,4574
@@ -62,10 +63,10 @@ natural_pdf/ocr/__init__.py,sha256=VY8hhvDPf7Gh2lB-d2QRmghLLyTy6ydxlgo1cS4dOSk,2
62
63
  natural_pdf/ocr/engine.py,sha256=ZBC1tZNM5EDbGDJJmZI9mNHr4nCMLEZvUFhiJq8GdF4,8741
63
64
  natural_pdf/ocr/engine_doctr.py,sha256=ptKrupMWoulZb-R93zr9btoe94JPWU7vlJuN7OBJEIM,17740
64
65
  natural_pdf/ocr/engine_easyocr.py,sha256=bWz6kHUgAJfe3rqdnZBAF-IPvw3B35DlvX5KDdFUtzo,9888
65
- natural_pdf/ocr/engine_paddle.py,sha256=ZUtyjso_UjjAPnJt5ac-AtOpR6PfOhO76iOyjngGzr0,16198
66
+ natural_pdf/ocr/engine_paddle.py,sha256=0vobobjnsM1G3zihYL7f1roLlKKZWRwioxkGkgIxEUA,16159
66
67
  natural_pdf/ocr/engine_surya.py,sha256=PNjvpsHnBghAoa-df52HEyvXzfNI-gTFgKvs2LxHgKo,5051
67
- natural_pdf/ocr/ocr_factory.py,sha256=gBFXdFs7E4aCynHz06sQsAhaO3s8yhgoFgN5nyxtg9c,5221
68
- natural_pdf/ocr/ocr_manager.py,sha256=M1GRAThzWl5iMkQJ41j84G6cJ7XruQD_HoPPzWf7nUk,14742
68
+ natural_pdf/ocr/ocr_factory.py,sha256=GkODuBmqNVECg4u1-KW6ZMfBgVndLkK1W5GM15faf8M,5318
69
+ natural_pdf/ocr/ocr_manager.py,sha256=K2gpFo3e6RB1ouXOstlEAAYd14DbjBNt5RH6J7ZdDQY,14263
69
70
  natural_pdf/ocr/ocr_options.py,sha256=l33QKu_93r-uwi3t_v8UH8pEgHo6HTVzP4tfmQFRF1w,5488
70
71
  natural_pdf/ocr/utils.py,sha256=OxuHwDbHWj6setvnC0QYwMHrAjxGkhmLzWHpMqqGupA,4397
71
72
  natural_pdf/qa/__init__.py,sha256=Pjo62JTnUNEjGNsC437mvsS5KQ5m7X_BibGvavR9AW0,108
@@ -90,8 +91,9 @@ natural_pdf/utils/text_extraction.py,sha256=z6Jhy11pakYCsEpkvh8ldw6DkUFsYF1hCL9Y
90
91
  natural_pdf/utils/visualization.py,sha256=30pRWQdsRJh2pSObh-brKVsFgC1n8tHmSrta_UDnVPw,8989
91
92
  natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
92
93
  natural_pdf/widgets/viewer.py,sha256=2VUY1TzWMDe9I-IVNOosKZ2LaqpjLB62ftMAdk-s6_8,24952
93
- natural_pdf-0.1.18.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
94
- natural_pdf-0.1.18.dist-info/METADATA,sha256=aU8IC02yZuy1aUrHhtDCHEp5igjwaUGP1NDnFDsOTL8,6684
95
- natural_pdf-0.1.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
96
- natural_pdf-0.1.18.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
97
- natural_pdf-0.1.18.dist-info/RECORD,,
94
+ natural_pdf-0.1.20.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
95
+ natural_pdf-0.1.20.dist-info/METADATA,sha256=iyT4zmi24PZugVNCIjoUYX2ShPejzxCPx0ZuuHxg-UU,6054
96
+ natural_pdf-0.1.20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
97
+ natural_pdf-0.1.20.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
98
+ natural_pdf-0.1.20.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
99
+ natural_pdf-0.1.20.dist-info/RECORD,,
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ natural-pdf = natural_pdf.cli:main
3
+ npdf = natural_pdf.cli:main