natural-pdf 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/api/index.md +386 -0
- docs/assets/favicon.png +3 -0
- docs/assets/favicon.svg +3 -0
- docs/assets/javascripts/custom.js +17 -0
- docs/assets/logo.svg +3 -0
- docs/assets/sample-screen.png +0 -0
- docs/assets/social-preview.png +17 -0
- docs/assets/social-preview.svg +17 -0
- docs/assets/stylesheets/custom.css +65 -0
- docs/document-qa/index.ipynb +435 -0
- docs/document-qa/index.md +79 -0
- docs/element-selection/index.ipynb +915 -0
- docs/element-selection/index.md +229 -0
- docs/index.md +170 -0
- docs/installation/index.md +69 -0
- docs/interactive-widget/index.ipynb +962 -0
- docs/interactive-widget/index.md +12 -0
- docs/layout-analysis/index.ipynb +818 -0
- docs/layout-analysis/index.md +185 -0
- docs/ocr/index.md +209 -0
- docs/pdf-navigation/index.ipynb +314 -0
- docs/pdf-navigation/index.md +97 -0
- docs/regions/index.ipynb +816 -0
- docs/regions/index.md +294 -0
- docs/tables/index.ipynb +658 -0
- docs/tables/index.md +144 -0
- docs/text-analysis/index.ipynb +370 -0
- docs/text-analysis/index.md +105 -0
- docs/text-extraction/index.ipynb +1478 -0
- docs/text-extraction/index.md +292 -0
- docs/tutorials/01-loading-and-extraction.ipynb +1710 -0
- docs/tutorials/01-loading-and-extraction.md +95 -0
- docs/tutorials/02-finding-elements.ipynb +340 -0
- docs/tutorials/02-finding-elements.md +149 -0
- docs/tutorials/03-extracting-blocks.ipynb +147 -0
- docs/tutorials/03-extracting-blocks.md +48 -0
- docs/tutorials/04-table-extraction.ipynb +114 -0
- docs/tutorials/04-table-extraction.md +50 -0
- docs/tutorials/05-excluding-content.ipynb +270 -0
- docs/tutorials/05-excluding-content.md +109 -0
- docs/tutorials/06-document-qa.ipynb +332 -0
- docs/tutorials/06-document-qa.md +91 -0
- docs/tutorials/07-layout-analysis.ipynb +288 -0
- docs/tutorials/07-layout-analysis.md +66 -0
- docs/tutorials/07-working-with-regions.ipynb +413 -0
- docs/tutorials/07-working-with-regions.md +151 -0
- docs/tutorials/08-spatial-navigation.ipynb +508 -0
- docs/tutorials/08-spatial-navigation.md +190 -0
- docs/tutorials/09-section-extraction.ipynb +2434 -0
- docs/tutorials/09-section-extraction.md +256 -0
- docs/tutorials/10-form-field-extraction.ipynb +512 -0
- docs/tutorials/10-form-field-extraction.md +201 -0
- docs/tutorials/11-enhanced-table-processing.ipynb +54 -0
- docs/tutorials/11-enhanced-table-processing.md +9 -0
- docs/tutorials/12-ocr-integration.ipynb +604 -0
- docs/tutorials/12-ocr-integration.md +175 -0
- docs/tutorials/13-semantic-search.ipynb +1328 -0
- docs/tutorials/13-semantic-search.md +77 -0
- docs/visual-debugging/index.ipynb +2970 -0
- docs/visual-debugging/index.md +157 -0
- docs/visual-debugging/region.png +0 -0
- natural_pdf/__init__.py +50 -33
- natural_pdf/analyzers/__init__.py +2 -1
- natural_pdf/analyzers/layout/base.py +32 -24
- natural_pdf/analyzers/layout/docling.py +131 -72
- natural_pdf/analyzers/layout/gemini.py +264 -0
- natural_pdf/analyzers/layout/layout_analyzer.py +156 -113
- natural_pdf/analyzers/layout/layout_manager.py +125 -58
- natural_pdf/analyzers/layout/layout_options.py +43 -17
- natural_pdf/analyzers/layout/paddle.py +152 -95
- natural_pdf/analyzers/layout/surya.py +164 -92
- natural_pdf/analyzers/layout/tatr.py +149 -84
- natural_pdf/analyzers/layout/yolo.py +89 -45
- natural_pdf/analyzers/text_options.py +22 -15
- natural_pdf/analyzers/text_structure.py +131 -85
- natural_pdf/analyzers/utils.py +30 -23
- natural_pdf/collections/pdf_collection.py +146 -97
- natural_pdf/core/__init__.py +1 -1
- natural_pdf/core/element_manager.py +419 -337
- natural_pdf/core/highlighting_service.py +268 -196
- natural_pdf/core/page.py +1044 -521
- natural_pdf/core/pdf.py +516 -313
- natural_pdf/elements/__init__.py +1 -1
- natural_pdf/elements/base.py +307 -225
- natural_pdf/elements/collections.py +805 -543
- natural_pdf/elements/line.py +39 -36
- natural_pdf/elements/rect.py +32 -30
- natural_pdf/elements/region.py +889 -879
- natural_pdf/elements/text.py +127 -99
- natural_pdf/exporters/__init__.py +0 -1
- natural_pdf/exporters/searchable_pdf.py +261 -102
- natural_pdf/ocr/__init__.py +57 -35
- natural_pdf/ocr/engine.py +150 -46
- natural_pdf/ocr/engine_easyocr.py +146 -150
- natural_pdf/ocr/engine_paddle.py +118 -175
- natural_pdf/ocr/engine_surya.py +78 -141
- natural_pdf/ocr/ocr_factory.py +114 -0
- natural_pdf/ocr/ocr_manager.py +122 -124
- natural_pdf/ocr/ocr_options.py +16 -20
- natural_pdf/ocr/utils.py +98 -0
- natural_pdf/qa/__init__.py +1 -1
- natural_pdf/qa/document_qa.py +119 -111
- natural_pdf/search/__init__.py +37 -31
- natural_pdf/search/haystack_search_service.py +312 -189
- natural_pdf/search/haystack_utils.py +186 -122
- natural_pdf/search/search_options.py +25 -14
- natural_pdf/search/search_service_protocol.py +12 -6
- natural_pdf/search/searchable_mixin.py +261 -176
- natural_pdf/selectors/__init__.py +2 -1
- natural_pdf/selectors/parser.py +159 -316
- natural_pdf/templates/__init__.py +1 -1
- natural_pdf/templates/spa/css/style.css +334 -0
- natural_pdf/templates/spa/index.html +31 -0
- natural_pdf/templates/spa/js/app.js +472 -0
- natural_pdf/templates/spa/words.txt +235976 -0
- natural_pdf/utils/debug.py +32 -0
- natural_pdf/utils/highlighting.py +8 -2
- natural_pdf/utils/identifiers.py +29 -0
- natural_pdf/utils/packaging.py +418 -0
- natural_pdf/utils/reading_order.py +65 -63
- natural_pdf/utils/text_extraction.py +195 -0
- natural_pdf/utils/visualization.py +70 -61
- natural_pdf/widgets/__init__.py +2 -3
- natural_pdf/widgets/viewer.py +749 -718
- {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/METADATA +53 -17
- natural_pdf-0.1.6.dist-info/RECORD +141 -0
- {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/WHEEL +1 -1
- natural_pdf-0.1.6.dist-info/top_level.txt +4 -0
- notebooks/Examples.ipynb +1293 -0
- pdfs/.gitkeep +0 -0
- pdfs/01-practice.pdf +543 -0
- pdfs/0500000US42001.pdf +0 -0
- pdfs/0500000US42007.pdf +0 -0
- pdfs/2014 Statistics.pdf +0 -0
- pdfs/2019 Statistics.pdf +0 -0
- pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
- pdfs/needs-ocr.pdf +0 -0
- natural_pdf/templates/ocr_debug.html +0 -517
- natural_pdf-0.1.4.dist-info/RECORD +0 -61
- natural_pdf-0.1.4.dist-info/top_level.txt +0 -1
- {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/licenses/LICENSE +0 -0
@@ -1,15 +1,16 @@
|
|
1
1
|
# layout_manager.py
|
2
|
+
import copy
|
2
3
|
import logging
|
3
|
-
from typing import Dict, List,
|
4
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
5
|
+
|
4
6
|
from PIL import Image
|
5
|
-
import copy
|
6
7
|
|
7
8
|
# --- Import detector classes and options ---
|
8
9
|
# Use try-except blocks for robustness if some detectors might be missing dependencies
|
9
10
|
try:
|
10
11
|
from .base import LayoutDetector
|
11
12
|
except ImportError:
|
12
|
-
LayoutDetector = type(
|
13
|
+
LayoutDetector = type("LayoutDetector", (), {})
|
13
14
|
|
14
15
|
try:
|
15
16
|
from .yolo import YOLODocLayoutDetector
|
@@ -35,14 +36,26 @@ try:
|
|
35
36
|
from .docling import DoclingLayoutDetector
|
36
37
|
except ImportError:
|
37
38
|
DoclingLayoutDetector = None
|
38
|
-
|
39
|
+
|
40
|
+
try:
|
41
|
+
from .gemini import GeminiLayoutDetector
|
42
|
+
except ImportError:
|
43
|
+
GeminiLayoutDetector = None
|
44
|
+
|
39
45
|
from .layout_options import (
|
40
|
-
BaseLayoutOptions,
|
41
|
-
|
46
|
+
BaseLayoutOptions,
|
47
|
+
DoclingLayoutOptions,
|
48
|
+
GeminiLayoutOptions,
|
49
|
+
LayoutOptions,
|
50
|
+
PaddleLayoutOptions,
|
51
|
+
SuryaLayoutOptions,
|
52
|
+
TATRLayoutOptions,
|
53
|
+
YOLOLayoutOptions,
|
42
54
|
)
|
43
55
|
|
44
56
|
logger = logging.getLogger(__name__)
|
45
57
|
|
58
|
+
|
46
59
|
class LayoutManager:
|
47
60
|
"""Manages layout detector selection, configuration, and execution."""
|
48
61
|
|
@@ -50,46 +63,91 @@ class LayoutManager:
|
|
50
63
|
ENGINE_REGISTRY: Dict[str, Dict[str, Any]] = {}
|
51
64
|
|
52
65
|
# Populate registry only with available detectors
|
53
|
-
if YOLODocLayoutDetector:
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
66
|
+
if YOLODocLayoutDetector:
|
67
|
+
ENGINE_REGISTRY["yolo"] = {
|
68
|
+
"class": YOLODocLayoutDetector,
|
69
|
+
"options_class": YOLOLayoutOptions,
|
70
|
+
}
|
71
|
+
if TableTransformerDetector:
|
72
|
+
ENGINE_REGISTRY["tatr"] = {
|
73
|
+
"class": TableTransformerDetector,
|
74
|
+
"options_class": TATRLayoutOptions,
|
75
|
+
}
|
76
|
+
if PaddleLayoutDetector:
|
77
|
+
ENGINE_REGISTRY["paddle"] = {
|
78
|
+
"class": PaddleLayoutDetector,
|
79
|
+
"options_class": PaddleLayoutOptions,
|
80
|
+
}
|
81
|
+
if SuryaLayoutDetector:
|
82
|
+
ENGINE_REGISTRY["surya"] = {
|
83
|
+
"class": SuryaLayoutDetector,
|
84
|
+
"options_class": SuryaLayoutOptions,
|
85
|
+
}
|
86
|
+
if DoclingLayoutDetector:
|
87
|
+
ENGINE_REGISTRY["docling"] = {
|
88
|
+
"class": DoclingLayoutDetector,
|
89
|
+
"options_class": DoclingLayoutOptions,
|
90
|
+
}
|
91
|
+
|
92
|
+
# Add Gemini entry if available
|
93
|
+
if GeminiLayoutDetector:
|
94
|
+
ENGINE_REGISTRY["gemini"] = {
|
95
|
+
"class": GeminiLayoutDetector,
|
96
|
+
"options_class": GeminiLayoutOptions,
|
97
|
+
}
|
58
98
|
|
59
99
|
# Define the limited set of kwargs allowed for the simple analyze_layout call
|
60
|
-
SIMPLE_MODE_ALLOWED_KWARGS = {
|
61
|
-
'engine', 'confidence', 'classes', 'exclude_classes', 'device'
|
62
|
-
}
|
100
|
+
SIMPLE_MODE_ALLOWED_KWARGS = {"engine", "confidence", "classes", "exclude_classes", "device"}
|
63
101
|
|
64
102
|
def __init__(self):
|
65
103
|
"""Initializes the Layout Manager."""
|
66
104
|
# Cache for detector instances (different from model cache inside detector)
|
67
105
|
self._detector_instances: Dict[str, LayoutDetector] = {}
|
68
|
-
logger.info(
|
106
|
+
logger.info(
|
107
|
+
f"LayoutManager initialized. Available engines: {list(self.ENGINE_REGISTRY.keys())}"
|
108
|
+
)
|
69
109
|
|
70
110
|
def _get_engine_instance(self, engine_name: str) -> LayoutDetector:
|
71
111
|
"""Retrieves or creates an instance of the specified layout detector."""
|
72
112
|
engine_name = engine_name.lower()
|
73
113
|
if engine_name not in self.ENGINE_REGISTRY:
|
74
|
-
raise ValueError(
|
114
|
+
raise ValueError(
|
115
|
+
f"Unknown layout engine: '{engine_name}'. Available: {list(self.ENGINE_REGISTRY.keys())}"
|
116
|
+
)
|
75
117
|
|
76
118
|
if engine_name not in self._detector_instances:
|
77
119
|
logger.info(f"Creating instance of layout engine: {engine_name}")
|
78
|
-
engine_class = self.ENGINE_REGISTRY[engine_name][
|
79
|
-
detector_instance = engine_class()
|
120
|
+
engine_class = self.ENGINE_REGISTRY[engine_name]["class"]
|
121
|
+
detector_instance = engine_class() # Instantiate
|
80
122
|
if not detector_instance.is_available():
|
81
|
-
|
82
|
-
|
83
|
-
|
123
|
+
# Check availability before storing
|
124
|
+
# Construct helpful error message with install hint
|
125
|
+
install_hint = ""
|
126
|
+
if engine_name == "yolo":
|
127
|
+
install_hint = "pip install 'natural-pdf[layout_yolo]'"
|
128
|
+
elif engine_name == "tatr":
|
129
|
+
install_hint = "pip install 'natural-pdf[core-ml]'"
|
130
|
+
elif engine_name == "paddle":
|
131
|
+
install_hint = "pip install 'natural-pdf[paddle]'"
|
132
|
+
elif engine_name == "surya":
|
133
|
+
install_hint = "pip install 'natural-pdf[surya]'"
|
134
|
+
# Add other engines like docling if they become optional extras
|
135
|
+
else:
|
136
|
+
install_hint = f"(Check installation requirements for {engine_name})"
|
137
|
+
|
138
|
+
raise RuntimeError(
|
139
|
+
f"Layout engine '{engine_name}' is not available. Please install the required dependencies: {install_hint}"
|
140
|
+
)
|
141
|
+
self._detector_instances[engine_name] = detector_instance # Store if available
|
84
142
|
|
85
143
|
return self._detector_instances[engine_name]
|
86
144
|
|
87
145
|
def analyze_layout(
|
88
146
|
self,
|
89
147
|
image: Image.Image,
|
90
|
-
engine: Optional[str] = None,
|
148
|
+
engine: Optional[str] = None, # Default engine handled below
|
91
149
|
options: Optional[LayoutOptions] = None,
|
92
|
-
**kwargs
|
150
|
+
**kwargs,
|
93
151
|
) -> List[Dict[str, Any]]:
|
94
152
|
"""
|
95
153
|
Analyzes layout of a single image using simple args or an options object.
|
@@ -109,11 +167,11 @@ class LayoutManager:
|
|
109
167
|
selected_engine_name: str
|
110
168
|
|
111
169
|
if not isinstance(image, Image.Image):
|
112
|
-
|
170
|
+
raise TypeError("Input 'image' must be a PIL Image.")
|
113
171
|
|
114
172
|
available_engines = self.get_available_engines()
|
115
173
|
if not available_engines:
|
116
|
-
|
174
|
+
raise RuntimeError("No layout engines are available. Please check dependencies.")
|
117
175
|
|
118
176
|
# Determine default engine if not specified
|
119
177
|
default_engine = engine if engine else available_engines[0]
|
@@ -123,46 +181,55 @@ class LayoutManager:
|
|
123
181
|
# Advanced Mode: An options object was provided directly (or constructed by LayoutAnalyzer)
|
124
182
|
# Use this object directly, do not deep copy or reconstruct.
|
125
183
|
logger.debug(f"LayoutManager: Using provided options object: {type(options).__name__}")
|
126
|
-
final_options = options
|
184
|
+
final_options = options # Use the provided object directly
|
127
185
|
found_engine = False
|
128
186
|
for name, registry_entry in self.ENGINE_REGISTRY.items():
|
129
|
-
if isinstance(options, registry_entry[
|
187
|
+
if isinstance(options, registry_entry["options_class"]):
|
130
188
|
selected_engine_name = name
|
131
189
|
found_engine = True
|
132
190
|
break
|
133
191
|
if not found_engine:
|
134
|
-
|
192
|
+
raise TypeError(
|
193
|
+
f"Provided options object type '{type(options).__name__}' does not match any registered layout engine options."
|
194
|
+
)
|
135
195
|
# Ignore simple kwargs if options object is present
|
136
196
|
if kwargs:
|
137
|
-
logger.warning(
|
197
|
+
logger.warning(
|
198
|
+
f"Keyword arguments {list(kwargs.keys())} were provided alongside an 'options' object and will be ignored."
|
199
|
+
)
|
138
200
|
else:
|
139
|
-
# Simple Mode: No options object provided initially.
|
201
|
+
# Simple Mode: No options object provided initially.
|
140
202
|
# Determine engine from kwargs or default, then construct options.
|
141
203
|
selected_engine_name = default_engine.lower()
|
142
|
-
logger.debug(
|
204
|
+
logger.debug(
|
205
|
+
f"LayoutManager: Using simple mode. Engine: '{selected_engine_name}', kwargs: {kwargs}"
|
206
|
+
)
|
143
207
|
|
144
208
|
if selected_engine_name not in self.ENGINE_REGISTRY:
|
145
|
-
|
209
|
+
raise ValueError(
|
210
|
+
f"Unknown or unavailable layout engine: '{selected_engine_name}'. Available: {available_engines}"
|
211
|
+
)
|
146
212
|
|
147
213
|
unexpected_kwargs = set(kwargs.keys()) - self.SIMPLE_MODE_ALLOWED_KWARGS
|
148
214
|
if unexpected_kwargs:
|
149
|
-
raise TypeError(
|
215
|
+
raise TypeError(
|
216
|
+
f"Got unexpected keyword arguments in simple mode: {list(unexpected_kwargs)}. Use the 'options' parameter for detailed configuration."
|
217
|
+
)
|
150
218
|
|
151
|
-
options_class = self.ENGINE_REGISTRY[selected_engine_name][
|
219
|
+
options_class = self.ENGINE_REGISTRY[selected_engine_name]["options_class"]
|
152
220
|
# Use BaseLayoutOptions defaults unless overridden by kwargs
|
153
221
|
base_defaults = BaseLayoutOptions()
|
154
222
|
simple_args = {
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
223
|
+
"confidence": kwargs.get("confidence", base_defaults.confidence),
|
224
|
+
"classes": kwargs.get("classes"),
|
225
|
+
"exclude_classes": kwargs.get("exclude_classes"),
|
226
|
+
"device": kwargs.get("device", base_defaults.device),
|
159
227
|
}
|
160
228
|
# Filter out None values before passing to constructor
|
161
229
|
simple_args_filtered = {k: v for k, v in simple_args.items() if v is not None}
|
162
230
|
final_options = options_class(**simple_args_filtered)
|
163
231
|
logger.debug(f"LayoutManager: Constructed options for simple mode: {final_options}")
|
164
232
|
|
165
|
-
|
166
233
|
# --- Get Engine Instance and Process ---
|
167
234
|
try:
|
168
235
|
engine_instance = self._get_engine_instance(selected_engine_name)
|
@@ -175,29 +242,29 @@ class LayoutManager:
|
|
175
242
|
return detections
|
176
243
|
|
177
244
|
except (ImportError, RuntimeError, ValueError, TypeError) as e:
|
178
|
-
|
179
|
-
|
245
|
+
logger.error(
|
246
|
+
f"Layout analysis failed for engine '{selected_engine_name}': {e}", exc_info=True
|
247
|
+
)
|
248
|
+
raise # Re-raise expected errors
|
180
249
|
except Exception as e:
|
181
|
-
|
182
|
-
|
183
|
-
|
250
|
+
logger.error(f"An unexpected error occurred during layout analysis: {e}", exc_info=True)
|
251
|
+
raise # Re-raise unexpected errors
|
184
252
|
|
185
253
|
def get_available_engines(self) -> List[str]:
|
186
254
|
"""Returns a list of registered layout engine names that are currently available."""
|
187
255
|
available = []
|
188
256
|
for name, registry_entry in self.ENGINE_REGISTRY.items():
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
257
|
+
try:
|
258
|
+
engine_class = registry_entry["class"]
|
259
|
+
# Check availability without full instantiation if possible
|
260
|
+
if hasattr(engine_class, "is_available") and callable(engine_class.is_available):
|
261
|
+
# Create temporary instance only for check if needed, or use classmethod
|
262
|
+
if engine_class().is_available(): # Assumes instance needed for check
|
263
|
+
available.append(name)
|
264
|
+
else:
|
265
|
+
# Assume available if class exists (less robust)
|
266
|
+
available.append(name)
|
267
|
+
except Exception as e:
|
268
|
+
logger.debug(f"Layout engine '{name}' check failed: {e}")
|
269
|
+
pass
|
202
270
|
return available
|
203
|
-
|
@@ -1,32 +1,40 @@
|
|
1
1
|
# layout_options.py
|
2
2
|
import logging
|
3
3
|
from dataclasses import dataclass, field
|
4
|
-
from typing import
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
5
5
|
|
6
6
|
logger = logging.getLogger(__name__)
|
7
7
|
|
8
|
+
|
8
9
|
# --- Base Layout Options ---
|
9
10
|
@dataclass
|
10
11
|
class BaseLayoutOptions:
|
11
12
|
"""Base options for layout detection engines."""
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
|
14
|
+
confidence: float = 0.5 # Minimum confidence threshold for detections
|
15
|
+
classes: Optional[List[str]] = None # Specific classes to detect (None for all)
|
16
|
+
exclude_classes: Optional[List[str]] = None # Classes to exclude
|
17
|
+
device: Optional[str] = "cpu" # Preferred device ('cpu', 'cuda', 'mps', etc.)
|
18
|
+
extra_args: Dict[str, Any] = field(
|
19
|
+
default_factory=dict
|
20
|
+
) # For engine-specific args not yet fields
|
21
|
+
|
17
22
|
|
18
23
|
# --- YOLO Specific Options ---
|
19
24
|
@dataclass
|
20
25
|
class YOLOLayoutOptions(BaseLayoutOptions):
|
21
26
|
"""Options specific to YOLO-based layout detection."""
|
27
|
+
|
22
28
|
model_repo: str = "juliozhao/DocLayout-YOLO-DocStructBench"
|
23
29
|
model_file: str = "doclayout_yolo_docstructbench_imgsz1024.pt"
|
24
|
-
image_size: int = 1024
|
30
|
+
image_size: int = 1024 # Input image size for the model
|
31
|
+
|
25
32
|
|
26
33
|
# --- TATR Specific Options ---
|
27
34
|
@dataclass
|
28
35
|
class TATRLayoutOptions(BaseLayoutOptions):
|
29
36
|
"""Options specific to Table Transformer (TATR) layout detection."""
|
37
|
+
|
30
38
|
# Which models to use (can be local paths or HF identifiers)
|
31
39
|
detection_model: str = "microsoft/table-transformer-detection"
|
32
40
|
structure_model: str = "microsoft/table-transformer-structure-recognition-v1.1-all"
|
@@ -36,35 +44,52 @@ class TATRLayoutOptions(BaseLayoutOptions):
|
|
36
44
|
# Whether to create cell regions (can be slow)
|
37
45
|
create_cells: bool = True
|
38
46
|
|
47
|
+
|
39
48
|
# --- Paddle Specific Options ---
|
40
49
|
@dataclass
|
41
50
|
class PaddleLayoutOptions(BaseLayoutOptions):
|
42
51
|
"""Options specific to PaddlePaddle PP-Structure layout detection."""
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
52
|
+
|
53
|
+
lang: str = "en" # Language ('en', 'ch', etc.)
|
54
|
+
use_angle_cls: bool = False # Use text angle classification?
|
55
|
+
enable_table: bool = True # Enable table structure detection?
|
56
|
+
show_log: bool = False # Show Paddle internal logs?
|
57
|
+
detect_text: bool = True # Also detect raw text boxes using PaddleOCR?
|
58
|
+
verbose: bool = False # Verbose logging for the detector class
|
59
|
+
|
49
60
|
|
50
61
|
# --- Surya Specific Options ---
|
51
62
|
@dataclass
|
52
63
|
class SuryaLayoutOptions(BaseLayoutOptions):
|
53
64
|
"""Options specific to Surya layout detection."""
|
54
|
-
|
55
|
-
|
65
|
+
|
66
|
+
model_name: str = "default" # Placeholder if different models become available
|
67
|
+
recognize_table_structure: bool = True # Automatically run table structure recognition?
|
68
|
+
|
56
69
|
|
57
70
|
# --- Docling Specific Options ---
|
58
71
|
@dataclass
|
59
72
|
class DoclingLayoutOptions(BaseLayoutOptions):
|
60
73
|
"""Options specific to Docling layout detection."""
|
74
|
+
|
61
75
|
# Pass kwargs directly to Docling's DocumentConverter via extra_args
|
62
76
|
# Common examples shown here for documentation, add others as needed to extra_args
|
63
77
|
# model_name: str = "ds4sd/SmolDocling-256M-preview" # Example model (pass via extra_args)
|
64
78
|
# prompt_text: Optional[str] = None # Optional prompt (pass via extra_args)
|
65
|
-
verbose: bool = False
|
79
|
+
verbose: bool = False # Verbose logging for the detector class
|
66
80
|
# Other kwargs like 'device', 'batch_size' can go in extra_args
|
67
81
|
|
82
|
+
|
83
|
+
# --- Gemini Specific Options ---
|
84
|
+
@dataclass
|
85
|
+
class GeminiLayoutOptions(BaseLayoutOptions):
|
86
|
+
"""Options specific to Gemini-based layout detection (using OpenAI compatibility)."""
|
87
|
+
|
88
|
+
model_name: str = "gemini-2.0-flash"
|
89
|
+
# Removed: prompt_template, temperature, top_p, max_output_tokens
|
90
|
+
# These are typically passed directly to the chat completion call or via extra_args
|
91
|
+
|
92
|
+
|
68
93
|
# --- Union Type ---
|
69
94
|
LayoutOptions = Union[
|
70
95
|
YOLOLayoutOptions,
|
@@ -72,5 +97,6 @@ LayoutOptions = Union[
|
|
72
97
|
PaddleLayoutOptions,
|
73
98
|
SuryaLayoutOptions,
|
74
99
|
DoclingLayoutOptions,
|
75
|
-
|
100
|
+
GeminiLayoutOptions,
|
101
|
+
BaseLayoutOptions, # Include base for typing flexibility
|
76
102
|
]
|