natural-pdf 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. docs/api/index.md +386 -0
  2. docs/assets/favicon.png +3 -0
  3. docs/assets/favicon.svg +3 -0
  4. docs/assets/javascripts/custom.js +17 -0
  5. docs/assets/logo.svg +3 -0
  6. docs/assets/sample-screen.png +0 -0
  7. docs/assets/social-preview.png +17 -0
  8. docs/assets/social-preview.svg +17 -0
  9. docs/assets/stylesheets/custom.css +65 -0
  10. docs/document-qa/index.ipynb +435 -0
  11. docs/document-qa/index.md +79 -0
  12. docs/element-selection/index.ipynb +915 -0
  13. docs/element-selection/index.md +229 -0
  14. docs/index.md +170 -0
  15. docs/installation/index.md +69 -0
  16. docs/interactive-widget/index.ipynb +962 -0
  17. docs/interactive-widget/index.md +12 -0
  18. docs/layout-analysis/index.ipynb +818 -0
  19. docs/layout-analysis/index.md +185 -0
  20. docs/ocr/index.md +222 -0
  21. docs/pdf-navigation/index.ipynb +314 -0
  22. docs/pdf-navigation/index.md +97 -0
  23. docs/regions/index.ipynb +816 -0
  24. docs/regions/index.md +294 -0
  25. docs/tables/index.ipynb +658 -0
  26. docs/tables/index.md +144 -0
  27. docs/text-analysis/index.ipynb +370 -0
  28. docs/text-analysis/index.md +105 -0
  29. docs/text-extraction/index.ipynb +1478 -0
  30. docs/text-extraction/index.md +292 -0
  31. docs/tutorials/01-loading-and-extraction.ipynb +1696 -0
  32. docs/tutorials/01-loading-and-extraction.md +95 -0
  33. docs/tutorials/02-finding-elements.ipynb +340 -0
  34. docs/tutorials/02-finding-elements.md +149 -0
  35. docs/tutorials/03-extracting-blocks.ipynb +147 -0
  36. docs/tutorials/03-extracting-blocks.md +48 -0
  37. docs/tutorials/04-table-extraction.ipynb +114 -0
  38. docs/tutorials/04-table-extraction.md +50 -0
  39. docs/tutorials/05-excluding-content.ipynb +270 -0
  40. docs/tutorials/05-excluding-content.md +109 -0
  41. docs/tutorials/06-document-qa.ipynb +332 -0
  42. docs/tutorials/06-document-qa.md +91 -0
  43. docs/tutorials/07-layout-analysis.ipynb +260 -0
  44. docs/tutorials/07-layout-analysis.md +66 -0
  45. docs/tutorials/07-working-with-regions.ipynb +409 -0
  46. docs/tutorials/07-working-with-regions.md +151 -0
  47. docs/tutorials/08-spatial-navigation.ipynb +508 -0
  48. docs/tutorials/08-spatial-navigation.md +190 -0
  49. docs/tutorials/09-section-extraction.ipynb +2434 -0
  50. docs/tutorials/09-section-extraction.md +256 -0
  51. docs/tutorials/10-form-field-extraction.ipynb +484 -0
  52. docs/tutorials/10-form-field-extraction.md +201 -0
  53. docs/tutorials/11-enhanced-table-processing.ipynb +54 -0
  54. docs/tutorials/11-enhanced-table-processing.md +9 -0
  55. docs/tutorials/12-ocr-integration.ipynb +586 -0
  56. docs/tutorials/12-ocr-integration.md +188 -0
  57. docs/tutorials/13-semantic-search.ipynb +1888 -0
  58. docs/tutorials/13-semantic-search.md +77 -0
  59. docs/visual-debugging/index.ipynb +2970 -0
  60. docs/visual-debugging/index.md +157 -0
  61. docs/visual-debugging/region.png +0 -0
  62. natural_pdf/__init__.py +39 -20
  63. natural_pdf/analyzers/__init__.py +2 -1
  64. natural_pdf/analyzers/layout/base.py +32 -24
  65. natural_pdf/analyzers/layout/docling.py +131 -72
  66. natural_pdf/analyzers/layout/layout_analyzer.py +156 -113
  67. natural_pdf/analyzers/layout/layout_manager.py +98 -58
  68. natural_pdf/analyzers/layout/layout_options.py +32 -17
  69. natural_pdf/analyzers/layout/paddle.py +152 -95
  70. natural_pdf/analyzers/layout/surya.py +164 -92
  71. natural_pdf/analyzers/layout/tatr.py +149 -84
  72. natural_pdf/analyzers/layout/yolo.py +84 -44
  73. natural_pdf/analyzers/text_options.py +22 -15
  74. natural_pdf/analyzers/text_structure.py +131 -85
  75. natural_pdf/analyzers/utils.py +30 -23
  76. natural_pdf/collections/pdf_collection.py +126 -98
  77. natural_pdf/core/__init__.py +1 -1
  78. natural_pdf/core/element_manager.py +416 -337
  79. natural_pdf/core/highlighting_service.py +268 -196
  80. natural_pdf/core/page.py +910 -516
  81. natural_pdf/core/pdf.py +387 -289
  82. natural_pdf/elements/__init__.py +1 -1
  83. natural_pdf/elements/base.py +302 -214
  84. natural_pdf/elements/collections.py +714 -514
  85. natural_pdf/elements/line.py +39 -36
  86. natural_pdf/elements/rect.py +32 -30
  87. natural_pdf/elements/region.py +854 -883
  88. natural_pdf/elements/text.py +122 -99
  89. natural_pdf/exporters/__init__.py +0 -1
  90. natural_pdf/exporters/searchable_pdf.py +261 -102
  91. natural_pdf/ocr/__init__.py +23 -14
  92. natural_pdf/ocr/engine.py +17 -8
  93. natural_pdf/ocr/engine_easyocr.py +63 -47
  94. natural_pdf/ocr/engine_paddle.py +97 -68
  95. natural_pdf/ocr/engine_surya.py +54 -44
  96. natural_pdf/ocr/ocr_manager.py +88 -62
  97. natural_pdf/ocr/ocr_options.py +16 -10
  98. natural_pdf/qa/__init__.py +1 -1
  99. natural_pdf/qa/document_qa.py +119 -111
  100. natural_pdf/search/__init__.py +37 -31
  101. natural_pdf/search/haystack_search_service.py +312 -189
  102. natural_pdf/search/haystack_utils.py +186 -122
  103. natural_pdf/search/search_options.py +25 -14
  104. natural_pdf/search/search_service_protocol.py +12 -6
  105. natural_pdf/search/searchable_mixin.py +261 -176
  106. natural_pdf/selectors/__init__.py +2 -1
  107. natural_pdf/selectors/parser.py +159 -316
  108. natural_pdf/templates/__init__.py +1 -1
  109. natural_pdf/utils/highlighting.py +8 -2
  110. natural_pdf/utils/reading_order.py +65 -63
  111. natural_pdf/utils/text_extraction.py +195 -0
  112. natural_pdf/utils/visualization.py +70 -61
  113. natural_pdf/widgets/__init__.py +2 -3
  114. natural_pdf/widgets/viewer.py +749 -718
  115. {natural_pdf-0.1.3.dist-info → natural_pdf-0.1.5.dist-info}/METADATA +29 -15
  116. natural_pdf-0.1.5.dist-info/RECORD +134 -0
  117. natural_pdf-0.1.5.dist-info/top_level.txt +5 -0
  118. notebooks/Examples.ipynb +1293 -0
  119. pdfs/.gitkeep +0 -0
  120. pdfs/01-practice.pdf +543 -0
  121. pdfs/0500000US42001.pdf +0 -0
  122. pdfs/0500000US42007.pdf +0 -0
  123. pdfs/2014 Statistics.pdf +0 -0
  124. pdfs/2019 Statistics.pdf +0 -0
  125. pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
  126. pdfs/needs-ocr.pdf +0 -0
  127. tests/test_loading.py +50 -0
  128. tests/test_optional_deps.py +298 -0
  129. natural_pdf-0.1.3.dist-info/RECORD +0 -61
  130. natural_pdf-0.1.3.dist-info/top_level.txt +0 -1
  131. {natural_pdf-0.1.3.dist-info → natural_pdf-0.1.5.dist-info}/WHEEL +0 -0
  132. {natural_pdf-0.1.3.dist-info → natural_pdf-0.1.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,34 +1,42 @@
1
+ import copy
1
2
  import logging
2
- from typing import List, Dict, Any, Optional, Union
3
+ from typing import Any, Dict, List, Optional, Union
4
+
3
5
  from PIL import Image
4
- import copy
5
6
 
6
- from natural_pdf.elements.region import Region
7
7
  from natural_pdf.analyzers.layout.layout_manager import LayoutManager
8
- from natural_pdf.analyzers.layout.layout_options import LayoutOptions, TATRLayoutOptions, BaseLayoutOptions
8
+ from natural_pdf.analyzers.layout.layout_options import (
9
+ BaseLayoutOptions,
10
+ LayoutOptions,
11
+ TATRLayoutOptions,
12
+ )
13
+ from natural_pdf.elements.region import Region
9
14
 
10
15
  logger = logging.getLogger(__name__)
11
16
 
17
+
12
18
  class LayoutAnalyzer:
13
19
  """
14
20
  Handles layout analysis for PDF pages, including image rendering,
15
21
  coordinate scaling, region creation, and result storage.
16
22
  """
17
-
23
+
18
24
  def __init__(self, page, layout_manager: Optional[LayoutManager] = None):
19
25
  """
20
26
  Initialize the layout analyzer.
21
-
27
+
22
28
  Args:
23
29
  page: The Page object to analyze
24
30
  layout_manager: Optional LayoutManager instance. If None, will try to get from page's parent.
25
31
  """
26
32
  self._page = page
27
- self._layout_manager = layout_manager or getattr(page._parent, '_layout_manager', None)
28
-
33
+ self._layout_manager = layout_manager or getattr(page._parent, "_layout_manager", None)
34
+
29
35
  if not self._layout_manager:
30
- logger.warning(f"LayoutManager not available for page {page.number}. Layout analysis will fail.")
31
-
36
+ logger.warning(
37
+ f"LayoutManager not available for page {page.number}. Layout analysis will fail."
38
+ )
39
+
32
40
  def analyze_layout(
33
41
  self,
34
42
  engine: Optional[str] = None,
@@ -38,14 +46,14 @@ class LayoutAnalyzer:
38
46
  exclude_classes: Optional[List[str]] = None,
39
47
  device: Optional[str] = None,
40
48
  existing: str = "replace",
41
- **kwargs
49
+ **kwargs,
42
50
  ) -> List[Region]:
43
51
  """
44
52
  Analyze the page layout using the configured LayoutManager.
45
-
46
- This method constructs the final options object, including internal context,
53
+
54
+ This method constructs the final options object, including internal context,
47
55
  and passes it to the LayoutManager.
48
-
56
+
49
57
  Args:
50
58
  engine: Name of the layout engine (e.g., 'yolo', 'tatr'). Uses manager's default if None and no options object given.
51
59
  options: Specific LayoutOptions object for advanced configuration. If provided, simple args (confidence, etc.) are ignored.
@@ -60,122 +68,149 @@ class LayoutAnalyzer:
60
68
  List of created Region objects.
61
69
  """
62
70
  if not self._layout_manager:
63
- logger.error(f"Page {self._page.number}: LayoutManager not available. Cannot analyze layout.")
71
+ logger.error(
72
+ f"Page {self._page.number}: LayoutManager not available. Cannot analyze layout."
73
+ )
64
74
  return []
65
75
 
66
- logger.info(f"Page {self._page.number}: Analyzing layout (Engine: {engine or 'default'}, Options provided: {options is not None})...")
76
+ logger.info(
77
+ f"Page {self._page.number}: Analyzing layout (Engine: {engine or 'default'}, Options provided: {options is not None})..."
78
+ )
67
79
 
68
80
  # --- Render Page Image (Standard Resolution) ---
69
- logger.debug(f" Rendering page {self._page.number} to image for initial layout detection...")
81
+ logger.debug(
82
+ f" Rendering page {self._page.number} to image for initial layout detection..."
83
+ )
70
84
  try:
71
- layout_scale = getattr(self._page._parent, '_config', {}).get('layout_image_scale', 1.5)
85
+ layout_scale = getattr(self._page._parent, "_config", {}).get("layout_image_scale", 1.5)
72
86
  layout_resolution = layout_scale * 72
73
- std_res_page_image = self._page.to_image(resolution=layout_resolution, include_highlights=False)
87
+ std_res_page_image = self._page.to_image(
88
+ resolution=layout_resolution, include_highlights=False
89
+ )
74
90
  if not std_res_page_image:
75
91
  raise ValueError("Initial page rendering returned None")
76
- logger.debug(f" Initial rendered image size: {std_res_page_image.width}x{std_res_page_image.height}")
92
+ logger.debug(
93
+ f" Initial rendered image size: {std_res_page_image.width}x{std_res_page_image.height}"
94
+ )
77
95
  except Exception as e:
78
96
  logger.error(f" Failed to render initial page image: {e}", exc_info=True)
79
97
  return []
80
-
98
+
81
99
  # --- Calculate Scaling Factors (Standard Res Image <-> PDF) ---
82
100
  if std_res_page_image.width == 0 or std_res_page_image.height == 0:
83
- logger.error(f"Page {self._page.number}: Invalid initial rendered image dimensions. Cannot scale results.")
101
+ logger.error(
102
+ f"Page {self._page.number}: Invalid initial rendered image dimensions. Cannot scale results."
103
+ )
84
104
  return []
85
105
  img_scale_x = self._page.width / std_res_page_image.width
86
106
  img_scale_y = self._page.height / std_res_page_image.height
87
107
  logger.debug(f" StdRes Image -> PDF Scaling: x={img_scale_x:.4f}, y={img_scale_y:.4f}")
88
108
 
89
- # --- Construct Final Options Object ---
109
+ # --- Construct Final Options Object ---
90
110
  final_options: BaseLayoutOptions
91
-
111
+
92
112
  if options is not None:
93
- # User provided a complete options object, use it directly
94
- logger.debug("Using user-provided options object.")
95
- final_options = copy.deepcopy(options) # Copy to avoid modifying original user object
96
- if kwargs:
97
- logger.warning(f"Ignoring kwargs {list(kwargs.keys())} because a full options object was provided.")
98
- # Infer engine from options type if engine arg wasn't provided
99
- if engine is None:
100
- for name, registry_entry in self._layout_manager.ENGINE_REGISTRY.items():
101
- if isinstance(final_options, registry_entry['options_class']):
102
- engine = name
103
- logger.debug(f"Inferred engine '{engine}' from options type.")
104
- break
105
- if engine is None:
106
- logger.warning("Could not infer engine from provided options object.")
113
+ # User provided a complete options object, use it directly
114
+ logger.debug("Using user-provided options object.")
115
+ final_options = copy.deepcopy(options) # Copy to avoid modifying original user object
116
+ if kwargs:
117
+ logger.warning(
118
+ f"Ignoring kwargs {list(kwargs.keys())} because a full options object was provided."
119
+ )
120
+ # Infer engine from options type if engine arg wasn't provided
121
+ if engine is None:
122
+ for name, registry_entry in self._layout_manager.ENGINE_REGISTRY.items():
123
+ if isinstance(final_options, registry_entry["options_class"]):
124
+ engine = name
125
+ logger.debug(f"Inferred engine '{engine}' from options type.")
126
+ break
127
+ if engine is None:
128
+ logger.warning("Could not infer engine from provided options object.")
107
129
  else:
108
- # Construct options from simple args (engine, confidence, classes, etc.)
109
- logger.debug("Constructing options from simple arguments.")
110
- selected_engine = engine or self._layout_manager.get_available_engines()[0] # Use provided or first available
111
- engine_lower = selected_engine.lower()
112
- registry = self._layout_manager.ENGINE_REGISTRY
113
-
114
- if engine_lower not in registry:
115
- raise ValueError(f"Unknown or unavailable engine: '{selected_engine}'. Available: {list(registry.keys())}")
116
-
117
- options_class = registry[engine_lower]['options_class']
118
-
119
- # Get base defaults
120
- base_defaults = BaseLayoutOptions()
121
-
122
- # Prepare args for constructor, prioritizing explicit args over defaults
123
- constructor_args = {
124
- 'confidence': confidence if confidence is not None else base_defaults.confidence,
125
- 'classes': classes, # Pass None if not provided
126
- 'exclude_classes': exclude_classes, # Pass None if not provided
127
- 'device': device if device is not None else base_defaults.device,
128
- 'extra_args': kwargs # Pass other kwargs here
129
- }
130
- # Remove None values unless they are valid defaults (like classes=None)
131
- # We can pass all to the dataclass constructor; it handles defaults
132
-
133
- try:
134
- final_options = options_class(**constructor_args)
135
- logger.debug(f"Constructed options: {final_options}")
136
- except TypeError as e:
137
- logger.error(f"Failed to construct options object {options_class.__name__} with args {constructor_args}: {e}")
138
- # Filter kwargs to only include fields defined in the specific options class? Complex.
139
- # Re-raise for now, indicates programming error or invalid kwarg.
140
- raise e
141
-
142
- # --- Add Internal Context to extra_args (ALWAYS) ---
143
- if not hasattr(final_options, 'extra_args') or final_options.extra_args is None:
144
- final_options.extra_args = {}
145
- final_options.extra_args['_page_ref'] = self._page
146
- final_options.extra_args['_img_scale_x'] = img_scale_x
147
- final_options.extra_args['_img_scale_y'] = img_scale_y
148
- logger.debug(f"Added internal context to final_options.extra_args: {final_options.extra_args}")
149
-
150
- # --- Call Layout Manager with the Final Options ---
130
+ # Construct options from simple args (engine, confidence, classes, etc.)
131
+ logger.debug("Constructing options from simple arguments.")
132
+ selected_engine = (
133
+ engine or self._layout_manager.get_available_engines()[0]
134
+ ) # Use provided or first available
135
+ engine_lower = selected_engine.lower()
136
+ registry = self._layout_manager.ENGINE_REGISTRY
137
+
138
+ if engine_lower not in registry:
139
+ raise ValueError(
140
+ f"Unknown or unavailable engine: '{selected_engine}'. Available: {list(registry.keys())}"
141
+ )
142
+
143
+ options_class = registry[engine_lower]["options_class"]
144
+
145
+ # Get base defaults
146
+ base_defaults = BaseLayoutOptions()
147
+
148
+ # Prepare args for constructor, prioritizing explicit args over defaults
149
+ constructor_args = {
150
+ "confidence": confidence if confidence is not None else base_defaults.confidence,
151
+ "classes": classes, # Pass None if not provided
152
+ "exclude_classes": exclude_classes, # Pass None if not provided
153
+ "device": device if device is not None else base_defaults.device,
154
+ "extra_args": kwargs, # Pass other kwargs here
155
+ }
156
+ # Remove None values unless they are valid defaults (like classes=None)
157
+ # We can pass all to the dataclass constructor; it handles defaults
158
+
159
+ try:
160
+ final_options = options_class(**constructor_args)
161
+ logger.debug(f"Constructed options: {final_options}")
162
+ except TypeError as e:
163
+ logger.error(
164
+ f"Failed to construct options object {options_class.__name__} with args {constructor_args}: {e}"
165
+ )
166
+ # Filter kwargs to only include fields defined in the specific options class? Complex.
167
+ # Re-raise for now, indicates programming error or invalid kwarg.
168
+ raise e
169
+
170
+ # --- Add Internal Context to extra_args (ALWAYS) ---
171
+ if not hasattr(final_options, "extra_args") or final_options.extra_args is None:
172
+ final_options.extra_args = {}
173
+ final_options.extra_args["_page_ref"] = self._page
174
+ final_options.extra_args["_img_scale_x"] = img_scale_x
175
+ final_options.extra_args["_img_scale_y"] = img_scale_y
176
+ logger.debug(
177
+ f"Added internal context to final_options.extra_args: {final_options.extra_args}"
178
+ )
179
+
180
+ # --- Call Layout Manager with the Final Options ---
151
181
  logger.debug(f"Calling Layout Manager with final options object.")
152
182
  try:
153
183
  # Pass only image and the constructed options object
154
184
  detections = self._layout_manager.analyze_layout(
155
- image=std_res_page_image,
156
- options=final_options
185
+ image=std_res_page_image,
186
+ options=final_options,
157
187
  # No engine, confidence, classes etc. passed here directly
158
188
  )
159
189
  logger.info(f" Layout Manager returned {len(detections)} detections.")
190
+ # Specifically let errors about unknown/unavailable engines propagate
191
+ except (ValueError, RuntimeError) as engine_error:
192
+ logger.error(f"Layout analysis failed: {engine_error}")
193
+ raise engine_error # Re-raise the specific error
160
194
  except Exception as e:
161
- logger.error(f" Layout analysis failed: {e}", exc_info=True)
162
- return []
195
+ # Catch other unexpected errors during analysis execution
196
+ logger.error(f" Layout analysis failed with unexpected error: {e}", exc_info=True)
197
+ return [] # Return empty list for other runtime errors
163
198
 
164
199
  # --- Process Detections (Convert to Regions, Scale Coords from Image to PDF) ---
165
200
  layout_regions = []
166
- docling_id_to_region = {} # For hierarchy if using Docling
201
+ docling_id_to_region = {} # For hierarchy if using Docling
167
202
 
168
203
  for detection in detections:
169
204
  try:
170
205
  # bbox is relative to std_res_page_image
171
- x_min, y_min, x_max, y_max = detection['bbox']
206
+ x_min, y_min, x_max, y_max = detection["bbox"]
172
207
 
173
208
  # Convert coordinates from image to PDF space
174
209
  pdf_x0 = x_min * img_scale_x
175
210
  pdf_y0 = y_min * img_scale_y
176
211
  pdf_x1 = x_max * img_scale_x
177
212
  pdf_y1 = y_max * img_scale_y
178
-
213
+
179
214
  # Ensure PDF coords are valid
180
215
  pdf_x0, pdf_x1 = min(pdf_x0, pdf_x1), max(pdf_x0, pdf_x1)
181
216
  pdf_y0, pdf_y1 = min(pdf_y0, pdf_y1), max(pdf_y0, pdf_y1)
@@ -186,21 +221,24 @@ class LayoutAnalyzer:
186
221
 
187
222
  # Create a Region object with PDF coordinates
188
223
  region = Region(self._page, (pdf_x0, pdf_y0, pdf_x1, pdf_y1))
189
- region.region_type = detection.get('class', 'unknown')
190
- region.normalized_type = detection.get('normalized_class', 'unknown')
191
- region.confidence = detection.get('confidence', 0.0)
192
- region.model = detection.get('model', engine or 'unknown')
193
- region.source = 'detected'
194
-
224
+ region.region_type = detection.get("class", "unknown")
225
+ region.normalized_type = detection.get("normalized_class", "unknown")
226
+ region.confidence = detection.get("confidence", 0.0)
227
+ region.model = detection.get("model", engine or "unknown")
228
+ region.source = "detected"
229
+
195
230
  # Add extra info if available
196
- if 'text' in detection: region.text_content = detection['text']
197
- if 'docling_id' in detection: region.docling_id = detection['docling_id']
198
- if 'parent_id' in detection: region.parent_id = detection['parent_id']
231
+ if "text" in detection:
232
+ region.text_content = detection["text"]
233
+ if "docling_id" in detection:
234
+ region.docling_id = detection["docling_id"]
235
+ if "parent_id" in detection:
236
+ region.parent_id = detection["parent_id"]
199
237
 
200
238
  layout_regions.append(region)
201
239
 
202
240
  # Track Docling IDs for hierarchy
203
- if hasattr(region, 'docling_id') and region.docling_id:
241
+ if hasattr(region, "docling_id") and region.docling_id:
204
242
  docling_id_to_region[region.docling_id] = region
205
243
 
206
244
  except (KeyError, IndexError, TypeError, ValueError) as e:
@@ -211,10 +249,10 @@ class LayoutAnalyzer:
211
249
  if docling_id_to_region:
212
250
  logger.debug("Building Docling region hierarchy...")
213
251
  for region in layout_regions:
214
- if hasattr(region, 'parent_id') and region.parent_id:
252
+ if hasattr(region, "parent_id") and region.parent_id:
215
253
  parent_region = docling_id_to_region.get(region.parent_id)
216
254
  if parent_region:
217
- if hasattr(parent_region, 'add_child'):
255
+ if hasattr(parent_region, "add_child"):
218
256
  parent_region.add_child(region)
219
257
  else:
220
258
  logger.warning("Region object missing add_child method for hierarchy.")
@@ -222,34 +260,39 @@ class LayoutAnalyzer:
222
260
  # --- Store Results ---
223
261
  logger.debug(f"Storing {len(layout_regions)} processed layout regions (mode: {existing}).")
224
262
  # Handle existing regions based on mode
225
- if existing.lower() == 'append':
226
- if 'detected' not in self._page._regions: self._page._regions['detected'] = []
227
- self._page._regions['detected'].extend(layout_regions)
228
- else: # Default is 'replace'
229
- self._page._regions['detected'] = layout_regions
263
+ if existing.lower() == "append":
264
+ if "detected" not in self._page._regions:
265
+ self._page._regions["detected"] = []
266
+ self._page._regions["detected"].extend(layout_regions)
267
+ else: # Default is 'replace'
268
+ self._page._regions["detected"] = layout_regions
230
269
 
231
270
  # Add regions to the element manager
232
271
  for region in layout_regions:
233
272
  self._page._element_mgr.add_region(region)
234
273
 
235
274
  # Store layout regions in a dedicated attribute for easier access
236
- self._page.detected_layout_regions = self._page._regions['detected']
275
+ self._page.detected_layout_regions = self._page._regions["detected"]
237
276
  logger.info(f"Layout analysis complete for page {self._page.number}.")
238
-
277
+
239
278
  # --- Auto-create cells if requested by TATR options ---
240
279
  if isinstance(final_options, TATRLayoutOptions) and final_options.create_cells:
241
- logger.info(f" Option create_cells=True detected for TATR. Attempting cell creation...")
280
+ logger.info(
281
+ f" Option create_cells=True detected for TATR. Attempting cell creation..."
282
+ )
242
283
  created_cell_count = 0
243
284
  for region in layout_regions:
244
285
  # Only attempt on regions identified as tables by the TATR model
245
- if region.model == 'tatr' and region.region_type == 'table':
286
+ if region.model == "tatr" and region.region_type == "table":
246
287
  try:
247
288
  # create_cells now modifies the page elements directly and returns self
248
289
  region.create_cells()
249
- # We could potentially count cells created here if needed,
290
+ # We could potentially count cells created here if needed,
250
291
  # but the method logs its own count.
251
292
  except Exception as cell_error:
252
- logger.warning(f" Error calling create_cells for table region {region.bbox}: {cell_error}")
293
+ logger.warning(
294
+ f" Error calling create_cells for table region {region.bbox}: {cell_error}"
295
+ )
253
296
  logger.info(f" Finished cell creation process triggered by options.")
254
-
255
- return layout_regions
297
+
298
+ return layout_regions