natural-pdf 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. natural_pdf/__init__.py +31 -0
  2. natural_pdf/analyzers/layout/gemini.py +137 -162
  3. natural_pdf/analyzers/layout/layout_manager.py +9 -5
  4. natural_pdf/analyzers/layout/layout_options.py +77 -7
  5. natural_pdf/analyzers/layout/paddle.py +318 -165
  6. natural_pdf/analyzers/layout/table_structure_utils.py +78 -0
  7. natural_pdf/analyzers/shape_detection_mixin.py +770 -405
  8. natural_pdf/classification/mixin.py +2 -8
  9. natural_pdf/collections/pdf_collection.py +25 -30
  10. natural_pdf/core/highlighting_service.py +47 -32
  11. natural_pdf/core/page.py +226 -70
  12. natural_pdf/core/pdf.py +19 -22
  13. natural_pdf/elements/base.py +9 -9
  14. natural_pdf/elements/collections.py +105 -50
  15. natural_pdf/elements/region.py +320 -113
  16. natural_pdf/exporters/paddleocr.py +38 -13
  17. natural_pdf/flows/__init__.py +3 -3
  18. natural_pdf/flows/collections.py +303 -132
  19. natural_pdf/flows/element.py +277 -132
  20. natural_pdf/flows/flow.py +33 -16
  21. natural_pdf/flows/region.py +142 -79
  22. natural_pdf/ocr/engine_doctr.py +37 -4
  23. natural_pdf/ocr/engine_easyocr.py +23 -3
  24. natural_pdf/ocr/engine_paddle.py +281 -30
  25. natural_pdf/ocr/engine_surya.py +8 -3
  26. natural_pdf/ocr/ocr_manager.py +75 -76
  27. natural_pdf/ocr/ocr_options.py +52 -87
  28. natural_pdf/search/__init__.py +25 -12
  29. natural_pdf/search/lancedb_search_service.py +91 -54
  30. natural_pdf/search/numpy_search_service.py +86 -65
  31. natural_pdf/search/searchable_mixin.py +2 -2
  32. natural_pdf/selectors/parser.py +125 -81
  33. natural_pdf/widgets/__init__.py +1 -1
  34. natural_pdf/widgets/viewer.py +205 -449
  35. {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/METADATA +27 -45
  36. {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/RECORD +39 -38
  37. {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/WHEEL +0 -0
  38. {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/licenses/LICENSE +0 -0
  39. {natural_pdf-0.1.14.dist-info → natural_pdf-0.1.16.dist-info}/top_level.txt +0 -0
natural_pdf/__init__.py CHANGED
@@ -37,6 +37,36 @@ def configure_logging(level=logging.INFO, handler=None):
37
37
  logger.propagate = False
38
38
 
39
39
 
40
+ # Global options system
41
+ class ConfigSection:
42
+ """A configuration section that holds key-value option pairs."""
43
+
44
+ def __init__(self, **defaults):
45
+ self.__dict__.update(defaults)
46
+
47
+ def __repr__(self):
48
+ items = [f"{k}={v!r}" for k, v in self.__dict__.items()]
49
+ return f"{self.__class__.__name__}({', '.join(items)})"
50
+
51
+
52
+ class Options:
53
+ """Global options for natural-pdf, similar to pandas options."""
54
+
55
+ def __init__(self):
56
+ # Image rendering defaults
57
+ self.image = ConfigSection(width=None, resolution=150)
58
+
59
+ # OCR defaults
60
+ self.ocr = ConfigSection(engine="easyocr", languages=["en"], min_confidence=0.5)
61
+
62
+ # Text extraction defaults (empty for now)
63
+ self.text = ConfigSection()
64
+
65
+
66
+ # Create global options instance
67
+ options = Options()
68
+
69
+
40
70
  # Version
41
71
  __version__ = "0.1.1"
42
72
 
@@ -90,6 +120,7 @@ __all__ = [
90
120
  "MultiModalSearchOptions",
91
121
  "BaseSearchOptions",
92
122
  "configure_logging",
123
+ "options",
93
124
  ]
94
125
 
95
126
  # Add QA components to __all__ if available
@@ -9,16 +9,6 @@ from typing import Any, Dict, List, Optional
9
9
  from PIL import Image
10
10
  from pydantic import BaseModel, Field
11
11
 
12
- # Use OpenAI library for interaction
13
- try:
14
- from openai import OpenAI
15
- from openai.types.chat import ChatCompletion
16
-
17
- # Import OpenAIError for exception handling if needed
18
- except ImportError:
19
- OpenAI = None
20
- ChatCompletion = None
21
-
22
12
  try:
23
13
  from .base import LayoutDetector
24
14
  from .layout_options import BaseLayoutOptions, GeminiLayoutOptions
@@ -58,22 +48,28 @@ class DetectedRegion(BaseModel):
58
48
 
59
49
 
60
50
  class GeminiLayoutDetector(LayoutDetector):
61
- """Document layout detector using Google's Gemini models via OpenAI compatibility layer."""
51
+ """
52
+ GeminiLayoutDetector: Layout analysis using Gemini via OpenAI-compatible API.
62
53
 
63
- # Base URL for the Gemini OpenAI-compatible endpoint
64
- GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
54
+ To use this detector, you must provide a compatible OpenAI client (e.g., from the openai package) via GeminiLayoutOptions.client.
55
+ See the documentation for an example of how to use Gemini layout analysis with natural-pdf.
56
+ """
65
57
 
66
58
  def __init__(self):
67
59
  super().__init__()
68
60
  self.supported_classes = set() # Indicate dynamic nature
69
61
 
70
62
  def is_available(self) -> bool:
71
- """Check if openai library is installed."""
72
- if OpenAI is None:
73
- logger.warning(
74
- "openai package not found. Gemini detector (via OpenAI lib) will not be available. Run: pip install openai"
75
- )
76
- return False
63
+ """
64
+ Check if the Gemini detector is available.
65
+
66
+ Since this detector expects users to provide their own compatible OpenAI client,
67
+ the detector itself is always available. Users must ensure they have a compatible
68
+ client (e.g., from the openai package) and provide it via GeminiLayoutOptions.client.
69
+
70
+ Returns:
71
+ True - the detector is always available, but requires a compatible client.
72
+ """
77
73
  return True
78
74
 
79
75
  def _get_cache_key(self, options: GeminiLayoutOptions) -> str:
@@ -87,21 +83,13 @@ class GeminiLayoutDetector(LayoutDetector):
87
83
 
88
84
  def _load_model_from_options(self, options: GeminiLayoutOptions) -> Any:
89
85
  """Validate options and return the model name."""
90
- if not self.is_available():
91
- raise RuntimeError("OpenAI library not installed. Please run: pip install openai")
92
-
93
86
  if not isinstance(options, GeminiLayoutOptions):
94
87
  raise TypeError("Incorrect options type provided for Gemini model loading.")
95
-
96
88
  # Model loading is deferred to detect() based on whether a client is provided
97
89
  return options.model_name
98
90
 
99
91
  def detect(self, image: Image.Image, options: BaseLayoutOptions) -> List[Dict[str, Any]]:
100
92
  """Detect layout elements in an image using Gemini via OpenAI library."""
101
- if not self.is_available():
102
- # The is_available check now only confirms library presence
103
- raise RuntimeError("OpenAI library not installed. Please run: pip install openai")
104
-
105
93
  # Ensure options are the correct type
106
94
  final_options: GeminiLayoutOptions
107
95
  if isinstance(options, GeminiLayoutOptions):
@@ -124,160 +112,147 @@ class GeminiLayoutDetector(LayoutDetector):
124
112
  model_name = self._get_model(final_options)
125
113
  detections = []
126
114
 
127
- try:
128
- # --- 1. Initialize OpenAI Client ---
129
- client: Optional[OpenAI] = None
130
- # Use the provided client instance
131
- if hasattr(final_options.client, "beta") and hasattr(
132
- final_options.client.beta.chat.completions, "parse"
133
- ):
134
- client = final_options.client
135
- logger.debug("Using provided client instance.")
136
- else:
137
- logger.error(
138
- "Provided client does not seem compatible (missing beta.chat.completions.parse)."
139
- )
140
- raise TypeError(
141
- "Provided client is not compatible with the expected OpenAI interface."
142
- )
115
+ # --- 1. Initialize OpenAI Client ---
116
+ client = getattr(final_options, "client", None)
117
+ if client is None:
118
+ raise RuntimeError(
119
+ "No client provided. Please provide a compatible OpenAI client via GeminiLayoutOptions.client."
120
+ )
143
121
 
144
- if not client:
145
- # This should not happen if logic above is correct, but as a safeguard
146
- raise RuntimeError("Failed to obtain a valid client for Gemini detection.")
122
+ if not (
123
+ hasattr(client, "beta")
124
+ and hasattr(getattr(client.beta, "chat", None), "completions")
125
+ and hasattr(getattr(client.beta.chat.completions, "parse", None), "__call__")
126
+ ):
127
+ raise RuntimeError(
128
+ "Provided client is not compatible with the expected OpenAI interface."
129
+ )
130
+ logger.debug("Using provided client instance.")
147
131
 
148
- # --- 2. Prepare Input for OpenAI API ---
149
- if not final_options.classes:
150
- logger.error("Gemini layout detection requires a list of classes to find.")
151
- return []
132
+ # --- 2. Prepare Input for OpenAI API ---
133
+ if not final_options.classes:
134
+ logger.error("Gemini layout detection requires a list of classes to find.")
135
+ return []
152
136
 
153
- width, height = image.size
137
+ width, height = image.size
138
+
139
+ # Convert image to base64
140
+ buffered = io.BytesIO()
141
+ image.save(buffered, format="PNG")
142
+ img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
143
+ image_url = f"data:image/png;base64,{img_base64}"
144
+
145
+ class_list_str = ", ".join(f"`{c}`" for c in final_options.classes)
146
+ prompt_text = (
147
+ f"Analyze the provided image of a document page ({width}x{height}). "
148
+ f"Identify all regions corresponding to the following types: {class_list_str}. "
149
+ f"Return ONLY the structured data requested as formatted JSON."
150
+ )
151
+
152
+ messages = [
153
+ {
154
+ "role": "user",
155
+ "content": [
156
+ {"type": "text", "text": prompt_text},
157
+ {
158
+ "type": "image_url",
159
+ "image_url": {"url": image_url},
160
+ },
161
+ ],
162
+ }
163
+ ]
154
164
 
155
- # Convert image to base64
156
- buffered = io.BytesIO()
157
- image.save(buffered, format="PNG")
158
- img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
159
- image_url = f"data:image/png;base64,{img_base64}"
165
+ logger.debug(
166
+ f"Running Gemini detection via OpenAI lib (Model: {model_name}). Asking for classes: {final_options.classes}"
167
+ )
160
168
 
161
- class_list_str = ", ".join(f"`{c}`" for c in final_options.classes)
162
- prompt_text = (
163
- f"Analyze the provided image of a document page ({width}x{height}). "
164
- f"Identify all regions corresponding to the following types: {class_list_str}. "
165
- f"Return ONLY the structured data requested as formatted JSON."
166
- )
169
+ completion_kwargs = {
170
+ "temperature": final_options.extra_args.get("temperature", 0.0), # Default to low temp
171
+ "max_tokens": final_options.extra_args.get("max_tokens", 4096),
172
+ }
167
173
 
168
- messages = [
169
- {
170
- "role": "user",
171
- "content": [
172
- {"type": "text", "text": prompt_text},
173
- {
174
- "type": "image_url",
175
- "image_url": {"url": image_url},
176
- },
177
- ],
178
- }
179
- ]
174
+ completion_kwargs = {k: v for k, v in completion_kwargs.items() if v is not None}
180
175
 
181
- logger.debug(
182
- f"Running Gemini detection via OpenAI lib (Model: {model_name}). Asking for classes: {final_options.classes}"
183
- )
176
+ class ImageContents(BaseModel):
177
+ regions: List[DetectedRegion]
184
178
 
185
- completion_kwargs = {
186
- "temperature": final_options.extra_args.get(
187
- "temperature", 0.0
188
- ), # Default to low temp
189
- "max_tokens": final_options.extra_args.get("max_tokens", 4096),
190
- }
179
+ completion: "ChatCompletion" = client.beta.chat.completions.parse(
180
+ model=model_name,
181
+ messages=messages,
182
+ response_format=ImageContents,
183
+ **completion_kwargs,
184
+ )
191
185
 
192
- completion_kwargs = {k: v for k, v in completion_kwargs.items() if v is not None}
186
+ logger.debug(f"Gemini response received via OpenAI lib.")
193
187
 
194
- class ImageContents(BaseModel):
195
- regions: List[DetectedRegion]
188
+ # --- 4. Process Parsed Response ---
189
+ if not completion.choices:
190
+ logger.error("Gemini response (via OpenAI lib) contained no choices.")
191
+ return []
196
192
 
197
- completion: ChatCompletion = client.beta.chat.completions.parse(
198
- model=model_name,
199
- messages=messages,
200
- response_format=ImageContents,
201
- **completion_kwargs,
193
+ # Get the parsed Pydantic objects
194
+ parsed_results = completion.choices[0].message.parsed.regions
195
+ if not parsed_results or not isinstance(parsed_results, list):
196
+ logger.error(
197
+ f"Gemini response (via OpenAI lib) did not contain a valid list of parsed regions. Found: {type(parsed_results)}"
202
198
  )
199
+ return []
203
200
 
204
- logger.debug(f"Gemini response received via OpenAI lib.")
205
-
206
- # --- 4. Process Parsed Response ---
207
- if not completion.choices:
208
- logger.error("Gemini response (via OpenAI lib) contained no choices.")
209
- return []
210
-
211
- # Get the parsed Pydantic objects
212
- parsed_results = completion.choices[0].message.parsed.regions
213
- if not parsed_results or not isinstance(parsed_results, list):
214
- logger.error(
215
- f"Gemini response (via OpenAI lib) did not contain a valid list of parsed regions. Found: {type(parsed_results)}"
201
+ # --- 5. Convert to Detections & Filter ---
202
+ normalized_classes_req = {self._normalize_class_name(c) for c in final_options.classes}
203
+ normalized_classes_excl = (
204
+ {self._normalize_class_name(c) for c in final_options.exclude_classes}
205
+ if final_options.exclude_classes
206
+ else set()
207
+ )
208
+
209
+ for item in parsed_results:
210
+ # The item is already a validated DetectedRegion Pydantic object
211
+ # Access fields directly
212
+ label = item.label
213
+ bbox_raw = item.bbox
214
+ confidence_score = item.confidence
215
+
216
+ # Coordinates should already be floats, but ensure tuple format
217
+ xmin, ymin, xmax, ymax = tuple(bbox_raw)
218
+
219
+ # --- Apply Filtering ---
220
+ normalized_class = self._normalize_class_name(label)
221
+
222
+ # Check against requested classes (Should be guaranteed by schema, but doesn't hurt)
223
+ if normalized_class not in normalized_classes_req:
224
+ logger.warning(
225
+ f"Gemini (via OpenAI) returned unexpected class '{label}' despite schema. Skipping."
216
226
  )
217
- return []
218
-
219
- # --- 5. Convert to Detections & Filter ---
220
- normalized_classes_req = {self._normalize_class_name(c) for c in final_options.classes}
221
- normalized_classes_excl = (
222
- {self._normalize_class_name(c) for c in final_options.exclude_classes}
223
- if final_options.exclude_classes
224
- else set()
225
- )
227
+ continue
226
228
 
227
- for item in parsed_results:
228
- # The item is already a validated DetectedRegion Pydantic object
229
- # Access fields directly
230
- label = item.label
231
- bbox_raw = item.bbox
232
- confidence_score = item.confidence
233
-
234
- # Coordinates should already be floats, but ensure tuple format
235
- xmin, ymin, xmax, ymax = tuple(bbox_raw)
236
-
237
- # --- Apply Filtering ---
238
- normalized_class = self._normalize_class_name(label)
239
-
240
- # Check against requested classes (Should be guaranteed by schema, but doesn't hurt)
241
- if normalized_class not in normalized_classes_req:
242
- logger.warning(
243
- f"Gemini (via OpenAI) returned unexpected class '{label}' despite schema. Skipping."
244
- )
245
- continue
246
-
247
- # Check against excluded classes
248
- if normalized_class in normalized_classes_excl:
249
- logger.debug(
250
- f"Skipping excluded class '{label}' (normalized: {normalized_class})."
251
- )
252
- continue
253
-
254
- # Check against base confidence threshold from options
255
- if confidence_score < final_options.confidence:
256
- logger.debug(
257
- f"Skipping item with confidence {confidence_score:.3f} below threshold {final_options.confidence}."
258
- )
259
- continue
260
-
261
- # Add detection
262
- detections.append(
263
- {
264
- "bbox": (xmin, ymin, xmax, ymax),
265
- "class": label, # Use original label from LLM
266
- "confidence": confidence_score,
267
- "normalized_class": normalized_class,
268
- "source": "layout",
269
- "model": "gemini", # Keep model name generic as gemini
270
- }
229
+ # Check against excluded classes
230
+ if normalized_class in normalized_classes_excl:
231
+ logger.debug(f"Skipping excluded class '{label}' (normalized: {normalized_class}).")
232
+ continue
233
+
234
+ # Check against base confidence threshold from options
235
+ if confidence_score < final_options.confidence:
236
+ logger.debug(
237
+ f"Skipping item with confidence {confidence_score:.3f} below threshold {final_options.confidence}."
271
238
  )
239
+ continue
272
240
 
273
- self.logger.info(
274
- f"Gemini (via OpenAI lib) processed response. Detected {len(detections)} layout elements matching criteria."
241
+ # Add detection
242
+ detections.append(
243
+ {
244
+ "bbox": (xmin, ymin, xmax, ymax),
245
+ "class": label, # Use original label from LLM
246
+ "confidence": confidence_score,
247
+ "normalized_class": normalized_class,
248
+ "source": "layout",
249
+ "model": "gemini", # Keep model name generic as gemini
250
+ }
275
251
  )
276
252
 
277
- except Exception as e:
278
- # Catch potential OpenAI API errors or other issues
279
- self.logger.error(f"Error during Gemini detection (via OpenAI lib): {e}", exc_info=True)
280
- return []
253
+ self.logger.info(
254
+ f"Gemini (via OpenAI lib) processed response. Detected {len(detections)} layout elements matching criteria."
255
+ )
281
256
 
282
257
  return detections
283
258
 
@@ -121,14 +121,18 @@ class LayoutManager:
121
121
  # Construct helpful error message with install hint
122
122
  install_hint = ""
123
123
  if engine_name == "yolo":
124
- install_hint = "pip install 'natural-pdf[layout_yolo]'"
124
+ install_hint = "pip install doclayout_yolo"
125
125
  elif engine_name == "tatr":
126
- install_hint = "pip install 'natural-pdf[core-ml]'"
126
+ # This should now be installed with core dependencies
127
+ install_hint = "(should be installed with natural-pdf, check for import errors)"
127
128
  elif engine_name == "paddle":
128
- install_hint = "pip install 'natural-pdf[paddle]'"
129
+ install_hint = "pip install paddleocr paddlepaddle"
129
130
  elif engine_name == "surya":
130
- install_hint = "pip install 'natural-pdf[surya]'"
131
- # Add other engines like docling if they become optional extras
131
+ install_hint = "pip install surya-ocr"
132
+ elif engine_name == "docling":
133
+ install_hint = "pip install docling"
134
+ elif engine_name == "gemini":
135
+ install_hint = "pip install openai"
132
136
  else:
133
137
  install_hint = f"(Check installation requirements for {engine_name})"
134
138
 
@@ -54,14 +54,84 @@ class TATRLayoutOptions(BaseLayoutOptions):
54
54
  # --- Paddle Specific Options ---
55
55
  @dataclass
56
56
  class PaddleLayoutOptions(BaseLayoutOptions):
57
- """Options specific to PaddlePaddle PP-Structure layout detection."""
58
-
59
- lang: str = "en" # Language ('en', 'ch', etc.)
60
- use_angle_cls: bool = False # Use text angle classification?
61
- enable_table: bool = True # Enable table structure detection?
62
- show_log: bool = False # Show Paddle internal logs?
63
- detect_text: bool = True # Also detect raw text boxes using PaddleOCR?
57
+ """
58
+ Options specific to PaddlePaddle PP-StructureV3 layout detection.
59
+ See: https://paddlepaddle.github.io/PaddleOCR/latest/en/version3.x/pipeline_usage/PP-StructureV3.html
60
+ """
61
+ # Model paths and names
62
+ layout_detection_model_name: Optional[str] = None
63
+ layout_detection_model_dir: Optional[str] = None
64
+ layout_threshold: Optional[float] = None
65
+ layout_nms: Optional[bool] = None
66
+ layout_unclip_ratio: Optional[float] = None
67
+ layout_merge_bboxes_mode: Optional[str] = None
68
+ chart_recognition_model_name: Optional[str] = None
69
+ chart_recognition_model_dir: Optional[str] = None
70
+ chart_recognition_batch_size: Optional[int] = None
71
+ region_detection_model_name: Optional[str] = None
72
+ region_detection_model_dir: Optional[str] = None
73
+ doc_orientation_classify_model_name: Optional[str] = None
74
+ doc_orientation_classify_model_dir: Optional[str] = None
75
+ doc_unwarping_model_name: Optional[str] = None
76
+ doc_unwarping_model_dir: Optional[str] = None
77
+ text_detection_model_name: Optional[str] = None
78
+ text_detection_model_dir: Optional[str] = None
79
+ text_det_limit_side_len: Optional[int] = None
80
+ text_det_limit_type: Optional[str] = None
81
+ text_det_thresh: Optional[float] = None
82
+ text_det_box_thresh: Optional[float] = None
83
+ text_det_unclip_ratio: Optional[float] = None
84
+ textline_orientation_model_name: Optional[str] = None
85
+ textline_orientation_model_dir: Optional[str] = None
86
+ textline_orientation_batch_size: Optional[int] = None
87
+ text_recognition_model_name: Optional[str] = None
88
+ text_recognition_model_dir: Optional[str] = None
89
+ text_recognition_batch_size: Optional[int] = None
90
+ text_rec_score_thresh: Optional[float] = None
91
+ table_classification_model_name: Optional[str] = None
92
+ table_classification_model_dir: Optional[str] = None
93
+ wired_table_structure_recognition_model_name: Optional[str] = None
94
+ wired_table_structure_recognition_model_dir: Optional[str] = None
95
+ wireless_table_structure_recognition_model_name: Optional[str] = None
96
+ wireless_table_structure_recognition_model_dir: Optional[str] = None
97
+ wired_table_cells_detection_model_name: Optional[str] = None
98
+ wired_table_cells_detection_model_dir: Optional[str] = None
99
+ wireless_table_cells_detection_model_name: Optional[str] = None
100
+ wireless_table_cells_detection_model_dir: Optional[str] = None
101
+ seal_text_detection_model_name: Optional[str] = None
102
+ seal_text_detection_model_dir: Optional[str] = None
103
+ seal_det_limit_side_len: Optional[int] = None
104
+ seal_det_limit_type: Optional[str] = None
105
+ seal_det_thresh: Optional[float] = None
106
+ seal_det_box_thresh: Optional[float] = None
107
+ seal_det_unclip_ratio: Optional[float] = None
108
+ seal_text_recognition_model_name: Optional[str] = None
109
+ seal_text_recognition_model_dir: Optional[str] = None
110
+ seal_text_recognition_batch_size: Optional[int] = None
111
+ seal_rec_score_thresh: Optional[float] = None
112
+ formula_recognition_model_name: Optional[str] = None
113
+ formula_recognition_model_dir: Optional[str] = None
114
+ formula_recognition_batch_size: Optional[int] = None
115
+ # Module usage flags
116
+ use_doc_orientation_classify: Optional[bool] = True
117
+ use_doc_unwarping: Optional[bool] = True
118
+ use_textline_orientation: Optional[bool] = True
119
+ use_seal_recognition: Optional[bool] = False
120
+ use_table_recognition: Optional[bool] = True
121
+ use_formula_recognition: Optional[bool] = False
122
+ use_chart_recognition: Optional[bool] = True
123
+ use_region_detection: Optional[bool] = True
124
+ # General parameters
125
+ device: Optional[str] = None
126
+ enable_hpi: Optional[bool] = None
127
+ use_tensorrt: Optional[bool] = None
128
+ precision: Optional[str] = None
129
+ enable_mkldnn: Optional[bool] = False
130
+ cpu_threads: Optional[int] = None
131
+ paddlex_config: Optional[str] = None
132
+ lang: Optional[str] = None # For English model selection
64
133
  verbose: bool = False # Verbose logging for the detector class
134
+ create_cells: Optional[bool] = True
65
135
 
66
136
 
67
137
  # --- Surya Specific Options ---