natural-pdf 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/__init__.py +31 -0
- natural_pdf/analyzers/layout/gemini.py +137 -162
- natural_pdf/analyzers/layout/layout_manager.py +9 -5
- natural_pdf/analyzers/layout/layout_options.py +77 -7
- natural_pdf/analyzers/layout/paddle.py +318 -165
- natural_pdf/analyzers/layout/table_structure_utils.py +78 -0
- natural_pdf/analyzers/shape_detection_mixin.py +770 -405
- natural_pdf/classification/mixin.py +2 -8
- natural_pdf/collections/pdf_collection.py +25 -30
- natural_pdf/core/highlighting_service.py +47 -32
- natural_pdf/core/page.py +226 -70
- natural_pdf/core/pdf.py +19 -22
- natural_pdf/elements/base.py +9 -9
- natural_pdf/elements/collections.py +105 -50
- natural_pdf/elements/region.py +320 -113
- natural_pdf/exporters/paddleocr.py +38 -13
- natural_pdf/flows/__init__.py +3 -3
- natural_pdf/flows/collections.py +303 -132
- natural_pdf/flows/element.py +277 -132
- natural_pdf/flows/flow.py +33 -16
- natural_pdf/flows/region.py +142 -79
- natural_pdf/ocr/engine_doctr.py +37 -4
- natural_pdf/ocr/engine_easyocr.py +23 -3
- natural_pdf/ocr/engine_paddle.py +281 -30
- natural_pdf/ocr/engine_surya.py +8 -3
- natural_pdf/ocr/ocr_manager.py +75 -76
- natural_pdf/ocr/ocr_options.py +52 -87
- natural_pdf/search/__init__.py +25 -12
- natural_pdf/search/lancedb_search_service.py +91 -54
- natural_pdf/search/numpy_search_service.py +86 -65
- natural_pdf/search/searchable_mixin.py +2 -2
- natural_pdf/selectors/parser.py +125 -81
- natural_pdf/widgets/__init__.py +1 -1
- natural_pdf/widgets/viewer.py +205 -449
- {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/METADATA +27 -45
- {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/RECORD +39 -38
- {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/WHEEL +0 -0
- {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/top_level.txt +0 -0
natural_pdf/__init__.py
CHANGED
@@ -37,6 +37,36 @@ def configure_logging(level=logging.INFO, handler=None):
|
|
37
37
|
logger.propagate = False
|
38
38
|
|
39
39
|
|
40
|
+
# Global options system
|
41
|
+
class ConfigSection:
|
42
|
+
"""A configuration section that holds key-value option pairs."""
|
43
|
+
|
44
|
+
def __init__(self, **defaults):
|
45
|
+
self.__dict__.update(defaults)
|
46
|
+
|
47
|
+
def __repr__(self):
|
48
|
+
items = [f"{k}={v!r}" for k, v in self.__dict__.items()]
|
49
|
+
return f"{self.__class__.__name__}({', '.join(items)})"
|
50
|
+
|
51
|
+
|
52
|
+
class Options:
|
53
|
+
"""Global options for natural-pdf, similar to pandas options."""
|
54
|
+
|
55
|
+
def __init__(self):
|
56
|
+
# Image rendering defaults
|
57
|
+
self.image = ConfigSection(width=None, resolution=150)
|
58
|
+
|
59
|
+
# OCR defaults
|
60
|
+
self.ocr = ConfigSection(engine="easyocr", languages=["en"], min_confidence=0.5)
|
61
|
+
|
62
|
+
# Text extraction defaults (empty for now)
|
63
|
+
self.text = ConfigSection()
|
64
|
+
|
65
|
+
|
66
|
+
# Create global options instance
|
67
|
+
options = Options()
|
68
|
+
|
69
|
+
|
40
70
|
# Version
|
41
71
|
__version__ = "0.1.1"
|
42
72
|
|
@@ -90,6 +120,7 @@ __all__ = [
|
|
90
120
|
"MultiModalSearchOptions",
|
91
121
|
"BaseSearchOptions",
|
92
122
|
"configure_logging",
|
123
|
+
"options",
|
93
124
|
]
|
94
125
|
|
95
126
|
# Add QA components to __all__ if available
|
@@ -9,16 +9,6 @@ from typing import Any, Dict, List, Optional
|
|
9
9
|
from PIL import Image
|
10
10
|
from pydantic import BaseModel, Field
|
11
11
|
|
12
|
-
# Use OpenAI library for interaction
|
13
|
-
try:
|
14
|
-
from openai import OpenAI
|
15
|
-
from openai.types.chat import ChatCompletion
|
16
|
-
|
17
|
-
# Import OpenAIError for exception handling if needed
|
18
|
-
except ImportError:
|
19
|
-
OpenAI = None
|
20
|
-
ChatCompletion = None
|
21
|
-
|
22
12
|
try:
|
23
13
|
from .base import LayoutDetector
|
24
14
|
from .layout_options import BaseLayoutOptions, GeminiLayoutOptions
|
@@ -58,22 +48,28 @@ class DetectedRegion(BaseModel):
|
|
58
48
|
|
59
49
|
|
60
50
|
class GeminiLayoutDetector(LayoutDetector):
|
61
|
-
"""
|
51
|
+
"""
|
52
|
+
GeminiLayoutDetector: Layout analysis using Gemini via OpenAI-compatible API.
|
62
53
|
|
63
|
-
|
64
|
-
|
54
|
+
To use this detector, you must provide a compatible OpenAI client (e.g., from the openai package) via GeminiLayoutOptions.client.
|
55
|
+
See the documentation for an example of how to use Gemini layout analysis with natural-pdf.
|
56
|
+
"""
|
65
57
|
|
66
58
|
def __init__(self):
|
67
59
|
super().__init__()
|
68
60
|
self.supported_classes = set() # Indicate dynamic nature
|
69
61
|
|
70
62
|
def is_available(self) -> bool:
|
71
|
-
"""
|
72
|
-
if
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
63
|
+
"""
|
64
|
+
Check if the Gemini detector is available.
|
65
|
+
|
66
|
+
Since this detector expects users to provide their own compatible OpenAI client,
|
67
|
+
the detector itself is always available. Users must ensure they have a compatible
|
68
|
+
client (e.g., from the openai package) and provide it via GeminiLayoutOptions.client.
|
69
|
+
|
70
|
+
Returns:
|
71
|
+
True - the detector is always available, but requires a compatible client.
|
72
|
+
"""
|
77
73
|
return True
|
78
74
|
|
79
75
|
def _get_cache_key(self, options: GeminiLayoutOptions) -> str:
|
@@ -87,21 +83,13 @@ class GeminiLayoutDetector(LayoutDetector):
|
|
87
83
|
|
88
84
|
def _load_model_from_options(self, options: GeminiLayoutOptions) -> Any:
|
89
85
|
"""Validate options and return the model name."""
|
90
|
-
if not self.is_available():
|
91
|
-
raise RuntimeError("OpenAI library not installed. Please run: pip install openai")
|
92
|
-
|
93
86
|
if not isinstance(options, GeminiLayoutOptions):
|
94
87
|
raise TypeError("Incorrect options type provided for Gemini model loading.")
|
95
|
-
|
96
88
|
# Model loading is deferred to detect() based on whether a client is provided
|
97
89
|
return options.model_name
|
98
90
|
|
99
91
|
def detect(self, image: Image.Image, options: BaseLayoutOptions) -> List[Dict[str, Any]]:
|
100
92
|
"""Detect layout elements in an image using Gemini via OpenAI library."""
|
101
|
-
if not self.is_available():
|
102
|
-
# The is_available check now only confirms library presence
|
103
|
-
raise RuntimeError("OpenAI library not installed. Please run: pip install openai")
|
104
|
-
|
105
93
|
# Ensure options are the correct type
|
106
94
|
final_options: GeminiLayoutOptions
|
107
95
|
if isinstance(options, GeminiLayoutOptions):
|
@@ -124,160 +112,147 @@ class GeminiLayoutDetector(LayoutDetector):
|
|
124
112
|
model_name = self._get_model(final_options)
|
125
113
|
detections = []
|
126
114
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
):
|
134
|
-
client = final_options.client
|
135
|
-
logger.debug("Using provided client instance.")
|
136
|
-
else:
|
137
|
-
logger.error(
|
138
|
-
"Provided client does not seem compatible (missing beta.chat.completions.parse)."
|
139
|
-
)
|
140
|
-
raise TypeError(
|
141
|
-
"Provided client is not compatible with the expected OpenAI interface."
|
142
|
-
)
|
115
|
+
# --- 1. Initialize OpenAI Client ---
|
116
|
+
client = getattr(final_options, "client", None)
|
117
|
+
if client is None:
|
118
|
+
raise RuntimeError(
|
119
|
+
"No client provided. Please provide a compatible OpenAI client via GeminiLayoutOptions.client."
|
120
|
+
)
|
143
121
|
|
144
|
-
|
145
|
-
|
146
|
-
|
122
|
+
if not (
|
123
|
+
hasattr(client, "beta")
|
124
|
+
and hasattr(getattr(client.beta, "chat", None), "completions")
|
125
|
+
and hasattr(getattr(client.beta.chat.completions, "parse", None), "__call__")
|
126
|
+
):
|
127
|
+
raise RuntimeError(
|
128
|
+
"Provided client is not compatible with the expected OpenAI interface."
|
129
|
+
)
|
130
|
+
logger.debug("Using provided client instance.")
|
147
131
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
132
|
+
# --- 2. Prepare Input for OpenAI API ---
|
133
|
+
if not final_options.classes:
|
134
|
+
logger.error("Gemini layout detection requires a list of classes to find.")
|
135
|
+
return []
|
152
136
|
|
153
|
-
|
137
|
+
width, height = image.size
|
138
|
+
|
139
|
+
# Convert image to base64
|
140
|
+
buffered = io.BytesIO()
|
141
|
+
image.save(buffered, format="PNG")
|
142
|
+
img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
143
|
+
image_url = f"data:image/png;base64,{img_base64}"
|
144
|
+
|
145
|
+
class_list_str = ", ".join(f"`{c}`" for c in final_options.classes)
|
146
|
+
prompt_text = (
|
147
|
+
f"Analyze the provided image of a document page ({width}x{height}). "
|
148
|
+
f"Identify all regions corresponding to the following types: {class_list_str}. "
|
149
|
+
f"Return ONLY the structured data requested as formatted JSON."
|
150
|
+
)
|
151
|
+
|
152
|
+
messages = [
|
153
|
+
{
|
154
|
+
"role": "user",
|
155
|
+
"content": [
|
156
|
+
{"type": "text", "text": prompt_text},
|
157
|
+
{
|
158
|
+
"type": "image_url",
|
159
|
+
"image_url": {"url": image_url},
|
160
|
+
},
|
161
|
+
],
|
162
|
+
}
|
163
|
+
]
|
154
164
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
159
|
-
image_url = f"data:image/png;base64,{img_base64}"
|
165
|
+
logger.debug(
|
166
|
+
f"Running Gemini detection via OpenAI lib (Model: {model_name}). Asking for classes: {final_options.classes}"
|
167
|
+
)
|
160
168
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
f"Return ONLY the structured data requested as formatted JSON."
|
166
|
-
)
|
169
|
+
completion_kwargs = {
|
170
|
+
"temperature": final_options.extra_args.get("temperature", 0.0), # Default to low temp
|
171
|
+
"max_tokens": final_options.extra_args.get("max_tokens", 4096),
|
172
|
+
}
|
167
173
|
|
168
|
-
|
169
|
-
{
|
170
|
-
"role": "user",
|
171
|
-
"content": [
|
172
|
-
{"type": "text", "text": prompt_text},
|
173
|
-
{
|
174
|
-
"type": "image_url",
|
175
|
-
"image_url": {"url": image_url},
|
176
|
-
},
|
177
|
-
],
|
178
|
-
}
|
179
|
-
]
|
174
|
+
completion_kwargs = {k: v for k, v in completion_kwargs.items() if v is not None}
|
180
175
|
|
181
|
-
|
182
|
-
|
183
|
-
)
|
176
|
+
class ImageContents(BaseModel):
|
177
|
+
regions: List[DetectedRegion]
|
184
178
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
179
|
+
completion: "ChatCompletion" = client.beta.chat.completions.parse(
|
180
|
+
model=model_name,
|
181
|
+
messages=messages,
|
182
|
+
response_format=ImageContents,
|
183
|
+
**completion_kwargs,
|
184
|
+
)
|
191
185
|
|
192
|
-
|
186
|
+
logger.debug(f"Gemini response received via OpenAI lib.")
|
193
187
|
|
194
|
-
|
195
|
-
|
188
|
+
# --- 4. Process Parsed Response ---
|
189
|
+
if not completion.choices:
|
190
|
+
logger.error("Gemini response (via OpenAI lib) contained no choices.")
|
191
|
+
return []
|
196
192
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
193
|
+
# Get the parsed Pydantic objects
|
194
|
+
parsed_results = completion.choices[0].message.parsed.regions
|
195
|
+
if not parsed_results or not isinstance(parsed_results, list):
|
196
|
+
logger.error(
|
197
|
+
f"Gemini response (via OpenAI lib) did not contain a valid list of parsed regions. Found: {type(parsed_results)}"
|
202
198
|
)
|
199
|
+
return []
|
203
200
|
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
201
|
+
# --- 5. Convert to Detections & Filter ---
|
202
|
+
normalized_classes_req = {self._normalize_class_name(c) for c in final_options.classes}
|
203
|
+
normalized_classes_excl = (
|
204
|
+
{self._normalize_class_name(c) for c in final_options.exclude_classes}
|
205
|
+
if final_options.exclude_classes
|
206
|
+
else set()
|
207
|
+
)
|
208
|
+
|
209
|
+
for item in parsed_results:
|
210
|
+
# The item is already a validated DetectedRegion Pydantic object
|
211
|
+
# Access fields directly
|
212
|
+
label = item.label
|
213
|
+
bbox_raw = item.bbox
|
214
|
+
confidence_score = item.confidence
|
215
|
+
|
216
|
+
# Coordinates should already be floats, but ensure tuple format
|
217
|
+
xmin, ymin, xmax, ymax = tuple(bbox_raw)
|
218
|
+
|
219
|
+
# --- Apply Filtering ---
|
220
|
+
normalized_class = self._normalize_class_name(label)
|
221
|
+
|
222
|
+
# Check against requested classes (Should be guaranteed by schema, but doesn't hurt)
|
223
|
+
if normalized_class not in normalized_classes_req:
|
224
|
+
logger.warning(
|
225
|
+
f"Gemini (via OpenAI) returned unexpected class '{label}' despite schema. Skipping."
|
216
226
|
)
|
217
|
-
|
218
|
-
|
219
|
-
# --- 5. Convert to Detections & Filter ---
|
220
|
-
normalized_classes_req = {self._normalize_class_name(c) for c in final_options.classes}
|
221
|
-
normalized_classes_excl = (
|
222
|
-
{self._normalize_class_name(c) for c in final_options.exclude_classes}
|
223
|
-
if final_options.exclude_classes
|
224
|
-
else set()
|
225
|
-
)
|
227
|
+
continue
|
226
228
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
# --- Apply Filtering ---
|
238
|
-
normalized_class = self._normalize_class_name(label)
|
239
|
-
|
240
|
-
# Check against requested classes (Should be guaranteed by schema, but doesn't hurt)
|
241
|
-
if normalized_class not in normalized_classes_req:
|
242
|
-
logger.warning(
|
243
|
-
f"Gemini (via OpenAI) returned unexpected class '{label}' despite schema. Skipping."
|
244
|
-
)
|
245
|
-
continue
|
246
|
-
|
247
|
-
# Check against excluded classes
|
248
|
-
if normalized_class in normalized_classes_excl:
|
249
|
-
logger.debug(
|
250
|
-
f"Skipping excluded class '{label}' (normalized: {normalized_class})."
|
251
|
-
)
|
252
|
-
continue
|
253
|
-
|
254
|
-
# Check against base confidence threshold from options
|
255
|
-
if confidence_score < final_options.confidence:
|
256
|
-
logger.debug(
|
257
|
-
f"Skipping item with confidence {confidence_score:.3f} below threshold {final_options.confidence}."
|
258
|
-
)
|
259
|
-
continue
|
260
|
-
|
261
|
-
# Add detection
|
262
|
-
detections.append(
|
263
|
-
{
|
264
|
-
"bbox": (xmin, ymin, xmax, ymax),
|
265
|
-
"class": label, # Use original label from LLM
|
266
|
-
"confidence": confidence_score,
|
267
|
-
"normalized_class": normalized_class,
|
268
|
-
"source": "layout",
|
269
|
-
"model": "gemini", # Keep model name generic as gemini
|
270
|
-
}
|
229
|
+
# Check against excluded classes
|
230
|
+
if normalized_class in normalized_classes_excl:
|
231
|
+
logger.debug(f"Skipping excluded class '{label}' (normalized: {normalized_class}).")
|
232
|
+
continue
|
233
|
+
|
234
|
+
# Check against base confidence threshold from options
|
235
|
+
if confidence_score < final_options.confidence:
|
236
|
+
logger.debug(
|
237
|
+
f"Skipping item with confidence {confidence_score:.3f} below threshold {final_options.confidence}."
|
271
238
|
)
|
239
|
+
continue
|
272
240
|
|
273
|
-
|
274
|
-
|
241
|
+
# Add detection
|
242
|
+
detections.append(
|
243
|
+
{
|
244
|
+
"bbox": (xmin, ymin, xmax, ymax),
|
245
|
+
"class": label, # Use original label from LLM
|
246
|
+
"confidence": confidence_score,
|
247
|
+
"normalized_class": normalized_class,
|
248
|
+
"source": "layout",
|
249
|
+
"model": "gemini", # Keep model name generic as gemini
|
250
|
+
}
|
275
251
|
)
|
276
252
|
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
return []
|
253
|
+
self.logger.info(
|
254
|
+
f"Gemini (via OpenAI lib) processed response. Detected {len(detections)} layout elements matching criteria."
|
255
|
+
)
|
281
256
|
|
282
257
|
return detections
|
283
258
|
|
@@ -121,14 +121,18 @@ class LayoutManager:
|
|
121
121
|
# Construct helpful error message with install hint
|
122
122
|
install_hint = ""
|
123
123
|
if engine_name == "yolo":
|
124
|
-
install_hint = "pip install
|
124
|
+
install_hint = "pip install doclayout_yolo"
|
125
125
|
elif engine_name == "tatr":
|
126
|
-
|
126
|
+
# This should now be installed with core dependencies
|
127
|
+
install_hint = "(should be installed with natural-pdf, check for import errors)"
|
127
128
|
elif engine_name == "paddle":
|
128
|
-
install_hint = "pip install
|
129
|
+
install_hint = "pip install paddleocr paddlepaddle"
|
129
130
|
elif engine_name == "surya":
|
130
|
-
install_hint = "pip install
|
131
|
-
|
131
|
+
install_hint = "pip install surya-ocr"
|
132
|
+
elif engine_name == "docling":
|
133
|
+
install_hint = "pip install docling"
|
134
|
+
elif engine_name == "gemini":
|
135
|
+
install_hint = "pip install openai"
|
132
136
|
else:
|
133
137
|
install_hint = f"(Check installation requirements for {engine_name})"
|
134
138
|
|
@@ -54,14 +54,84 @@ class TATRLayoutOptions(BaseLayoutOptions):
|
|
54
54
|
# --- Paddle Specific Options ---
|
55
55
|
@dataclass
|
56
56
|
class PaddleLayoutOptions(BaseLayoutOptions):
|
57
|
-
"""
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
57
|
+
"""
|
58
|
+
Options specific to PaddlePaddle PP-StructureV3 layout detection.
|
59
|
+
See: https://paddlepaddle.github.io/PaddleOCR/latest/en/version3.x/pipeline_usage/PP-StructureV3.html
|
60
|
+
"""
|
61
|
+
# Model paths and names
|
62
|
+
layout_detection_model_name: Optional[str] = None
|
63
|
+
layout_detection_model_dir: Optional[str] = None
|
64
|
+
layout_threshold: Optional[float] = None
|
65
|
+
layout_nms: Optional[bool] = None
|
66
|
+
layout_unclip_ratio: Optional[float] = None
|
67
|
+
layout_merge_bboxes_mode: Optional[str] = None
|
68
|
+
chart_recognition_model_name: Optional[str] = None
|
69
|
+
chart_recognition_model_dir: Optional[str] = None
|
70
|
+
chart_recognition_batch_size: Optional[int] = None
|
71
|
+
region_detection_model_name: Optional[str] = None
|
72
|
+
region_detection_model_dir: Optional[str] = None
|
73
|
+
doc_orientation_classify_model_name: Optional[str] = None
|
74
|
+
doc_orientation_classify_model_dir: Optional[str] = None
|
75
|
+
doc_unwarping_model_name: Optional[str] = None
|
76
|
+
doc_unwarping_model_dir: Optional[str] = None
|
77
|
+
text_detection_model_name: Optional[str] = None
|
78
|
+
text_detection_model_dir: Optional[str] = None
|
79
|
+
text_det_limit_side_len: Optional[int] = None
|
80
|
+
text_det_limit_type: Optional[str] = None
|
81
|
+
text_det_thresh: Optional[float] = None
|
82
|
+
text_det_box_thresh: Optional[float] = None
|
83
|
+
text_det_unclip_ratio: Optional[float] = None
|
84
|
+
textline_orientation_model_name: Optional[str] = None
|
85
|
+
textline_orientation_model_dir: Optional[str] = None
|
86
|
+
textline_orientation_batch_size: Optional[int] = None
|
87
|
+
text_recognition_model_name: Optional[str] = None
|
88
|
+
text_recognition_model_dir: Optional[str] = None
|
89
|
+
text_recognition_batch_size: Optional[int] = None
|
90
|
+
text_rec_score_thresh: Optional[float] = None
|
91
|
+
table_classification_model_name: Optional[str] = None
|
92
|
+
table_classification_model_dir: Optional[str] = None
|
93
|
+
wired_table_structure_recognition_model_name: Optional[str] = None
|
94
|
+
wired_table_structure_recognition_model_dir: Optional[str] = None
|
95
|
+
wireless_table_structure_recognition_model_name: Optional[str] = None
|
96
|
+
wireless_table_structure_recognition_model_dir: Optional[str] = None
|
97
|
+
wired_table_cells_detection_model_name: Optional[str] = None
|
98
|
+
wired_table_cells_detection_model_dir: Optional[str] = None
|
99
|
+
wireless_table_cells_detection_model_name: Optional[str] = None
|
100
|
+
wireless_table_cells_detection_model_dir: Optional[str] = None
|
101
|
+
seal_text_detection_model_name: Optional[str] = None
|
102
|
+
seal_text_detection_model_dir: Optional[str] = None
|
103
|
+
seal_det_limit_side_len: Optional[int] = None
|
104
|
+
seal_det_limit_type: Optional[str] = None
|
105
|
+
seal_det_thresh: Optional[float] = None
|
106
|
+
seal_det_box_thresh: Optional[float] = None
|
107
|
+
seal_det_unclip_ratio: Optional[float] = None
|
108
|
+
seal_text_recognition_model_name: Optional[str] = None
|
109
|
+
seal_text_recognition_model_dir: Optional[str] = None
|
110
|
+
seal_text_recognition_batch_size: Optional[int] = None
|
111
|
+
seal_rec_score_thresh: Optional[float] = None
|
112
|
+
formula_recognition_model_name: Optional[str] = None
|
113
|
+
formula_recognition_model_dir: Optional[str] = None
|
114
|
+
formula_recognition_batch_size: Optional[int] = None
|
115
|
+
# Module usage flags
|
116
|
+
use_doc_orientation_classify: Optional[bool] = True
|
117
|
+
use_doc_unwarping: Optional[bool] = True
|
118
|
+
use_textline_orientation: Optional[bool] = True
|
119
|
+
use_seal_recognition: Optional[bool] = False
|
120
|
+
use_table_recognition: Optional[bool] = True
|
121
|
+
use_formula_recognition: Optional[bool] = False
|
122
|
+
use_chart_recognition: Optional[bool] = True
|
123
|
+
use_region_detection: Optional[bool] = True
|
124
|
+
# General parameters
|
125
|
+
device: Optional[str] = None
|
126
|
+
enable_hpi: Optional[bool] = None
|
127
|
+
use_tensorrt: Optional[bool] = None
|
128
|
+
precision: Optional[str] = None
|
129
|
+
enable_mkldnn: Optional[bool] = False
|
130
|
+
cpu_threads: Optional[int] = None
|
131
|
+
paddlex_config: Optional[str] = None
|
132
|
+
lang: Optional[str] = None # For English model selection
|
64
133
|
verbose: bool = False # Verbose logging for the detector class
|
134
|
+
create_cells: Optional[bool] = True
|
65
135
|
|
66
136
|
|
67
137
|
# --- Surya Specific Options ---
|