PyPI - natural-pdf - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

natural-pdf 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

natural_pdf/__init__.py +1 -1
natural_pdf/analyzers/layout/layout_analyzer.py +133 -44
natural_pdf/analyzers/layout/layout_manager.py +9 -6
natural_pdf/analyzers/layout/layout_options.py +2 -4
natural_pdf/analyzers/layout/surya.py +199 -91
natural_pdf/core/highlighting_service.py +48 -17
natural_pdf/core/page.py +92 -27
natural_pdf/core/pdf.py +11 -0
natural_pdf/elements/base.py +99 -14
natural_pdf/elements/collections.py +56 -0
natural_pdf/elements/region.py +56 -131
natural_pdf/qa/document_qa.py +4 -3
natural_pdf/selectors/parser.py +215 -1
natural_pdf/utils/visualization.py +2 -2
natural_pdf-0.1.2.dist-info/METADATA +124 -0
{natural_pdf-0.1.0.dist-info → natural_pdf-0.1.2.dist-info}/RECORD +19 -19
natural_pdf-0.1.0.dist-info/METADATA +0 -295
{natural_pdf-0.1.0.dist-info → natural_pdf-0.1.2.dist-info}/WHEEL +0 -0
{natural_pdf-0.1.0.dist-info → natural_pdf-0.1.2.dist-info}/licenses/LICENSE +0 -0
{natural_pdf-0.1.0.dist-info → natural_pdf-0.1.2.dist-info}/top_level.txt +0 -0

natural_pdf/__init__.py CHANGED Viewed

@@ -47,7 +47,7 @@ try:
 except ImportError:
     HAS_QA = False
-__version__ = "0.1.0"
+__version__ = "0.1.1"
 if HAS_QA:
     __all__ = ["PDF", "Page", "Region", "ElementCollection", "configure_logging", "DocumentQA", "get_qa_engine"]

natural_pdf/analyzers/layout/layout_analyzer.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import logging
 from typing import List, Dict, Any, Optional, Union
 from PIL import Image
+import copy
 from natural_pdf.elements.region import Region
 from natural_pdf.analyzers.layout.layout_manager import LayoutManager
-from natural_pdf.analyzers.layout.layout_options import LayoutOptions
+from natural_pdf.analyzers.layout.layout_options import LayoutOptions, TATRLayoutOptions, BaseLayoutOptions
 logger = logging.getLogger(__name__)
@@ -36,20 +37,25 @@ class LayoutAnalyzer:
         classes: Optional[List[str]] = None,
         exclude_classes: Optional[List[str]] = None,
         device: Optional[str] = None,
-        existing: str = "replace"
+        existing: str = "replace",
+        **kwargs
     ) -> List[Region]:
         """
         Analyze the page layout using the configured LayoutManager.
+        This method constructs the final options object, including internal context,
+        and passes it to the LayoutManager.
         Args:
-            engine: Name of the layout engine (e.g., 'yolo', 'tatr'). Uses manager's default if None.
-            options: Specific LayoutOptions object for advanced configuration.
+            engine: Name of the layout engine (e.g., 'yolo', 'tatr'). Uses manager's default if None and no options object given.
+            options: Specific LayoutOptions object for advanced configuration. If provided, simple args (confidence, etc.) are ignored.
             confidence: Minimum confidence threshold (simple mode).
             classes: Specific classes to detect (simple mode).
             exclude_classes: Classes to exclude (simple mode).
             device: Device for inference (simple mode).
             existing: How to handle existing detected regions: 'replace' (default) or 'append'.
+            **kwargs: Additional engine-specific arguments (added to options.extra_args or used by constructor if options=None).
         Returns:
             List of created Region objects.
         """
@@ -57,72 +63,139 @@ class LayoutAnalyzer:
             logger.error(f"Page {self._page.number}: LayoutManager not available. Cannot analyze layout.")
             return []
-        logger.info(f"Page {self._page.number}: Analyzing layout (Engine: {engine or 'default'}, Options: {options is not None})...")
+        logger.info(f"Page {self._page.number}: Analyzing layout (Engine: {engine or 'default'}, Options provided: {options is not None})...")
-        # --- Render Page Image ---
-        logger.debug(f"  Rendering page {self._page.number} to image for layout analysis...")
+        # --- Render Page Image (Standard Resolution) ---
+        logger.debug(f"  Rendering page {self._page.number} to image for initial layout detection...")
         try:
-            # Use a resolution suitable for layout analysis, potentially configurable
-            layout_scale = getattr(self._page._parent, '_config', {}).get('layout_image_scale', 1.5) # ~108 DPI default
+            layout_scale = getattr(self._page._parent, '_config', {}).get('layout_image_scale', 1.5)
             layout_resolution = layout_scale * 72
-            # Render without existing highlights to avoid interference
-            page_image = self._page.to_image(resolution=layout_resolution, include_highlights=False)
-            logger.debug(f"  Rendered image size: {page_image.width}x{page_image.height}")
+            std_res_page_image = self._page.to_image(resolution=layout_resolution, include_highlights=False)
+            if not std_res_page_image:
+                raise ValueError("Initial page rendering returned None")
+            logger.debug(f"  Initial rendered image size: {std_res_page_image.width}x{std_res_page_image.height}")
         except Exception as e:
-            logger.error(f"  Failed to render page {self._page.number} to image: {e}", exc_info=True)
+            logger.error(f"  Failed to render initial page image: {e}", exc_info=True)
             return []
+        # --- Calculate Scaling Factors (Standard Res Image <-> PDF) ---
+        if std_res_page_image.width == 0 or std_res_page_image.height == 0:
+            logger.error(f"Page {self._page.number}: Invalid initial rendered image dimensions. Cannot scale results.")
+            return []
+        img_scale_x = self._page.width / std_res_page_image.width
+        img_scale_y = self._page.height / std_res_page_image.height
+        logger.debug(f"  StdRes Image -> PDF Scaling: x={img_scale_x:.4f}, y={img_scale_y:.4f}")
-        # --- Prepare Arguments for Layout Manager ---
-        manager_args = {'image': page_image, 'options': options, 'engine': engine}
-        if confidence is not None: manager_args['confidence'] = confidence
-        if classes is not None: manager_args['classes'] = classes
-        if exclude_classes is not None: manager_args['exclude_classes'] = exclude_classes
-        if device is not None: manager_args['device'] = device
-        # --- Call Layout Manager ---
-        logger.debug(f"  Calling Layout Manager...")
+        # --- Construct Final Options Object ---
+        final_options: BaseLayoutOptions
+        if options is not None:
+             # User provided a complete options object, use it directly
+             logger.debug("Using user-provided options object.")
+             final_options = copy.deepcopy(options) # Copy to avoid modifying original user object
+             if kwargs:
+                  logger.warning(f"Ignoring kwargs {list(kwargs.keys())} because a full options object was provided.")
+             # Infer engine from options type if engine arg wasn't provided
+             if engine is None:
+                  for name, registry_entry in self._layout_manager.ENGINE_REGISTRY.items():
+                       if isinstance(final_options, registry_entry['options_class']):
+                            engine = name
+                            logger.debug(f"Inferred engine '{engine}' from options type.")
+                            break
+                  if engine is None:
+                       logger.warning("Could not infer engine from provided options object.")
+        else:
+             # Construct options from simple args (engine, confidence, classes, etc.)
+             logger.debug("Constructing options from simple arguments.")
+             selected_engine = engine or self._layout_manager.get_available_engines()[0] # Use provided or first available
+             engine_lower = selected_engine.lower()
+             registry = self._layout_manager.ENGINE_REGISTRY
+             if engine_lower not in registry:
+                  raise ValueError(f"Unknown or unavailable engine: '{selected_engine}'. Available: {list(registry.keys())}")
+             options_class = registry[engine_lower]['options_class']
+             # Get base defaults
+             base_defaults = BaseLayoutOptions()
+             # Prepare args for constructor, prioritizing explicit args over defaults
+             constructor_args = {
+                 'confidence': confidence if confidence is not None else base_defaults.confidence,
+                 'classes': classes, # Pass None if not provided
+                 'exclude_classes': exclude_classes, # Pass None if not provided
+                 'device': device if device is not None else base_defaults.device,
+                 'extra_args': kwargs # Pass other kwargs here
+             }
+             # Remove None values unless they are valid defaults (like classes=None)
+             # We can pass all to the dataclass constructor; it handles defaults
+             try:
+                  final_options = options_class(**constructor_args)
+                  logger.debug(f"Constructed options: {final_options}")
+             except TypeError as e:
+                  logger.error(f"Failed to construct options object {options_class.__name__} with args {constructor_args}: {e}")
+                  # Filter kwargs to only include fields defined in the specific options class? Complex.
+                  # Re-raise for now, indicates programming error or invalid kwarg.
+                  raise e
+        # --- Add Internal Context to extra_args (ALWAYS) ---
+        if not hasattr(final_options, 'extra_args') or final_options.extra_args is None:
+             final_options.extra_args = {}
+        final_options.extra_args['_page_ref'] = self._page
+        final_options.extra_args['_img_scale_x'] = img_scale_x
+        final_options.extra_args['_img_scale_y'] = img_scale_y
+        logger.debug(f"Added internal context to final_options.extra_args: {final_options.extra_args}")
+        # --- Call Layout Manager with the Final Options ---
+        logger.debug(f"Calling Layout Manager with final options object.")
         try:
-            detections = self._layout_manager.analyze_layout(**manager_args)
+            # Pass only image and the constructed options object
+            detections = self._layout_manager.analyze_layout(
+                image=std_res_page_image,
+                options=final_options
+                # No engine, confidence, classes etc. passed here directly
+            )
             logger.info(f"  Layout Manager returned {len(detections)} detections.")
         except Exception as e:
             logger.error(f"  Layout analysis failed: {e}", exc_info=True)
             return []
-        # --- Process Detections (Convert to Regions, Scale Coords) ---
-        # Calculate scale factor to convert from image back to PDF coordinates
-        if page_image.width == 0 or page_image.height == 0:
-            logger.error(f"Page {self._page.number}: Invalid rendered image dimensions ({page_image.width}x{page_image.height}). Cannot scale layout results.")
-            return []
-        scale_x = self._page.width / page_image.width
-        scale_y = self._page.height / page_image.height
-        logger.debug(f"  Scaling factors: x={scale_x:.4f}, y={scale_y:.4f}")
+        # --- Process Detections (Convert to Regions, Scale Coords from Image to PDF) ---
         layout_regions = []
         docling_id_to_region = {} # For hierarchy if using Docling
         for detection in detections:
             try:
+                # bbox is relative to std_res_page_image
                 x_min, y_min, x_max, y_max = detection['bbox']
                 # Convert coordinates from image to PDF space
-                pdf_x0 = x_min * scale_x
-                pdf_y0 = y_min * scale_y
-                pdf_x1 = x_max * scale_x
-                pdf_y1 = y_max * scale_y
-                # Create a Region object
+                pdf_x0 = x_min * img_scale_x
+                pdf_y0 = y_min * img_scale_y
+                pdf_x1 = x_max * img_scale_x
+                pdf_y1 = y_max * img_scale_y
+                # Ensure PDF coords are valid
+                pdf_x0, pdf_x1 = min(pdf_x0, pdf_x1), max(pdf_x0, pdf_x1)
+                pdf_y0, pdf_y1 = min(pdf_y0, pdf_y1), max(pdf_y0, pdf_y1)
+                pdf_x0 = max(0, pdf_x0)
+                pdf_y0 = max(0, pdf_y0)
+                pdf_x1 = min(self._page.width, pdf_x1)
+                pdf_y1 = min(self._page.height, pdf_y1)
+                # Create a Region object with PDF coordinates
                 region = Region(self._page, (pdf_x0, pdf_y0, pdf_x1, pdf_y1))
-                region.region_type = detection.get('class', 'unknown') # Original class name
-                region.normalized_type = detection.get('normalized_class', 'unknown') # Hyphenated name
+                region.region_type = detection.get('class', 'unknown')
+                region.normalized_type = detection.get('normalized_class', 'unknown')
                 region.confidence = detection.get('confidence', 0.0)
-                region.model = detection.get('model', engine or 'unknown') # Store model name
+                region.model = detection.get('model', engine or 'unknown')
                 region.source = 'detected'
                 # Add extra info if available
                 if 'text' in detection: region.text_content = detection['text']
                 if 'docling_id' in detection: region.docling_id = detection['docling_id']
                 if 'parent_id' in detection: region.parent_id = detection['parent_id']
-                # Add other fields like polygon, position, row/col index if needed
                 layout_regions.append(region)
@@ -163,4 +236,20 @@ class LayoutAnalyzer:
         self._page.detected_layout_regions = self._page._regions['detected']
         logger.info(f"Layout analysis complete for page {self._page.number}.")
+        # --- Auto-create cells if requested by TATR options ---
+        if isinstance(final_options, TATRLayoutOptions) and final_options.create_cells:
+            logger.info(f"  Option create_cells=True detected for TATR. Attempting cell creation...")
+            created_cell_count = 0
+            for region in layout_regions:
+                # Only attempt on regions identified as tables by the TATR model
+                if region.model == 'tatr' and region.region_type == 'table':
+                    try:
+                        # create_cells now modifies the page elements directly and returns self
+                        region.create_cells()
+                        # We could potentially count cells created here if needed,
+                        # but the method logs its own count.
+                    except Exception as cell_error:
+                        logger.warning(f"    Error calling create_cells for table region {region.bbox}: {cell_error}")
+            logger.info(f"  Finished cell creation process triggered by options.")
         return layout_regions

natural_pdf/analyzers/layout/layout_manager.py CHANGED Viewed

@@ -120,9 +120,10 @@ class LayoutManager:
         # --- Determine Options and Engine ---
         if options is not None:
-            # Advanced Mode
-            logger.debug(f"LayoutManager: Using advanced mode with options object: {type(options).__name__}")
-            final_options = copy.deepcopy(options) # Use copy
+            # Advanced Mode: An options object was provided directly (or constructed by LayoutAnalyzer)
+            # Use this object directly, do not deep copy or reconstruct.
+            logger.debug(f"LayoutManager: Using provided options object: {type(options).__name__}")
+            final_options = options # Use the provided object directly
             found_engine = False
             for name, registry_entry in self.ENGINE_REGISTRY.items():
                 if isinstance(options, registry_entry['options_class']):
@@ -131,12 +132,14 @@ class LayoutManager:
                     break
             if not found_engine:
                  raise TypeError(f"Provided options object type '{type(options).__name__}' does not match any registered layout engine options.")
+            # Ignore simple kwargs if options object is present
             if kwargs:
-                logger.warning(f"Keyword arguments {list(kwargs.keys())} were provided alongside 'options' and will be ignored.")
+                logger.warning(f"Keyword arguments {list(kwargs.keys())} were provided alongside an 'options' object and will be ignored.")
         else:
-            # Simple Mode
+            # Simple Mode: No options object provided initially.
+            # Determine engine from kwargs or default, then construct options.
             selected_engine_name = default_engine.lower()
-            logger.debug(f"LayoutManager: Using simple mode with engine: '{selected_engine_name}' and kwargs: {kwargs}")
+            logger.debug(f"LayoutManager: Using simple mode. Engine: '{selected_engine_name}', kwargs: {kwargs}")
             if selected_engine_name not in self.ENGINE_REGISTRY:
                  raise ValueError(f"Unknown or unavailable layout engine: '{selected_engine_name}'. Available: {available_engines}")

natural_pdf/analyzers/layout/layout_options.py CHANGED Viewed

@@ -34,7 +34,7 @@ class TATRLayoutOptions(BaseLayoutOptions):
     max_detection_size: int = 800
     max_structure_size: int = 1000
     # Whether to create cell regions (can be slow)
-    create_cells: bool = False # Keep the flag for cell creation control
+    create_cells: bool = True
 # --- Paddle Specific Options ---
 @dataclass
@@ -51,10 +51,8 @@ class PaddleLayoutOptions(BaseLayoutOptions):
 @dataclass
 class SuryaLayoutOptions(BaseLayoutOptions):
     """Options specific to Surya layout detection."""
-    # Surya doesn't seem to have many config options based on the example,
-    # but we can add placeholders if needed. Device is handled by BaseLayoutOptions.
     model_name: str = "default" # Placeholder if different models become available
-    verbose: bool = False # Verbose logging for the detector class
+    recognize_table_structure: bool = True # Automatically run table structure recognition?
 # --- Docling Specific Options ---
 @dataclass

natural_pdf/analyzers/layout/surya.py CHANGED Viewed

@@ -3,6 +3,7 @@ import logging
 import importlib.util
 import os
 import tempfile
+import copy
 from typing import List, Dict, Any, Optional, Tuple
 from PIL import Image
@@ -11,20 +12,23 @@ from .layout_options import SuryaLayoutOptions, BaseLayoutOptions
 logger = logging.getLogger(__name__)
-# Check for dependency
+# Check for dependencies
 surya_spec = importlib.util.find_spec("surya")
 LayoutPredictor = None
+TableRecPredictor = None
 if surya_spec:
     try:
         from surya.layout import LayoutPredictor
+        from surya.table_rec import TableRecPredictor
     except ImportError as e:
-        logger.warning(f"Could not import Surya dependencies: {e}")
+        logger.warning(f"Could not import Surya dependencies (layout and/or table_rec): {e}")
 else:
     logger.warning("surya not found. SuryaLayoutDetector will not be available.")
 class SuryaLayoutDetector(LayoutDetector):
-    """Document layout detector using Surya models."""
+    """Document layout and table structure detector using Surya models."""
     def __init__(self):
         super().__init__()
@@ -32,120 +36,224 @@ class SuryaLayoutDetector(LayoutDetector):
             'text', 'pageheader', 'pagefooter', 'sectionheader',
             'table', 'tableofcontents', 'picture', 'caption',
             'heading', 'title', 'list', 'listitem', 'code',
-            'textinlinemath', 'mathformula', 'form'
+            'textinlinemath', 'mathformula', 'form',
+            'table-row', 'table-column'
         }
-        # Predictor instance is cached via _get_model
+        self._page_ref = None # To store page reference from options
     def is_available(self) -> bool:
-        """Check if surya is installed."""
-        return LayoutPredictor is not None
+        return LayoutPredictor is not None and TableRecPredictor is not None
     def _get_cache_key(self, options: BaseLayoutOptions) -> str:
-        """Generate cache key based on model name and device."""
         if not isinstance(options, SuryaLayoutOptions):
-             options = SuryaLayoutOptions(device=options.device) # Use base device
+             options = SuryaLayoutOptions(device=options.device)
         device_key = str(options.device).lower() if options.device else 'default_device'
-        # Include model_name if it affects loading, otherwise device might be enough
         model_key = options.model_name
         return f"{self.__class__.__name__}_{device_key}_{model_key}"
-    def _load_model_from_options(self, options: BaseLayoutOptions) -> Any:
-        """Load the Surya LayoutPredictor model."""
+    def _load_model_from_options(self, options: BaseLayoutOptions) -> Dict[str, Any]:
         if not self.is_available():
-            raise RuntimeError("Surya dependency (surya-ocr) not installed.")
+            raise RuntimeError("Surya dependencies (surya.layout and surya.table_rec) not installed.")
         if not isinstance(options, SuryaLayoutOptions):
             raise TypeError("Incorrect options type provided for Surya model loading.")
-        self.logger.info(f"Loading Surya LayoutPredictor (device={options.device})...")
-        try:
-            # Pass device and potentially other init args from options.extra_args
-            predictor_args = {'device': options.device} if options.device else {}
-            predictor_args.update(options.extra_args) # Add any extra init args
-            predictor = LayoutPredictor(**predictor_args)
-            self.logger.info("Surya LayoutPredictor loaded.")
-            return predictor
+        self.logger.info(f"Loading Surya models (device={options.device})...")
+        models = {}
+        try:
+            models['layout'] = LayoutPredictor()
+            models['table_rec'] = TableRecPredictor()
+            self.logger.info("Surya LayoutPredictor and TableRecPredictor loaded.")
+            return models
         except Exception as e:
-            self.logger.error(f"Failed to load Surya LayoutPredictor: {e}", exc_info=True)
+            self.logger.error(f"Failed to load Surya models: {e}", exc_info=True)
             raise
+    def _expand_bbox(self, bbox: Tuple[float, float, float, float],
+                     padding: int, max_width: int, max_height: int) -> Tuple[int, int, int, int]:
+        """Expand bbox by padding, clamping to max dimensions."""
+        x0, y0, x1, y1 = bbox
+        x0 = max(0, int(x0 - padding))
+        y0 = max(0, int(y0 - padding))
+        x1 = min(max_width, int(x1 + padding))
+        y1 = min(max_height, int(y1 + padding))
+        return x0, y0, x1, y1
     def detect(self, image: Image.Image, options: BaseLayoutOptions) -> List[Dict[str, Any]]:
-        """Detect layout elements in an image using Surya."""
+        """Detect layout elements and optionally table structure in an image using Surya."""
         if not self.is_available():
-            raise RuntimeError("Surya dependency (surya-ocr) not installed.")
+            raise RuntimeError("Surya dependencies (layout and table_rec) not installed.")
         if not isinstance(options, SuryaLayoutOptions):
              self.logger.warning("Received BaseLayoutOptions, expected SuryaLayoutOptions. Using defaults.")
              options = SuryaLayoutOptions(
                  confidence=options.confidence, classes=options.classes,
                  exclude_classes=options.exclude_classes, device=options.device,
-                 extra_args=options.extra_args
+                 extra_args=options.extra_args,
+                 recognize_table_structure=True
              )
+        # Extract page reference and scaling factors from extra_args (passed by LayoutAnalyzer)
+        self._page_ref = options.extra_args.get('_page_ref')
+        img_scale_x = options.extra_args.get('_img_scale_x')
+        img_scale_y = options.extra_args.get('_img_scale_y')
+        # We still need this check, otherwise later steps that need these vars will fail
+        can_do_table_rec = options.recognize_table_structure and self._page_ref and img_scale_x is not None and img_scale_y is not None
+        if options.recognize_table_structure and not can_do_table_rec:
+             logger.warning("Surya table recognition cannot proceed without page reference and scaling factors. Disabling.")
+             options.recognize_table_structure = False
-        self.validate_classes(options.classes or [])
-        if options.exclude_classes:
-            self.validate_classes(options.exclude_classes)
-        # Get the cached/loaded predictor instance
-        layout_predictor = self._get_model(options)
-        # Surya predictor takes a list of images
-        input_image_list = [image.convert("RGB")] # Ensure RGB
-        detections = []
-        try:
-            self.logger.debug("Running Surya layout prediction...")
-            # Call the predictor (returns a list of LayoutResult objects)
-            layout_predictions = layout_predictor(input_image_list)
-            self.logger.debug(f"Surya prediction returned {len(layout_predictions)} results.")
-            if not layout_predictions:
-                self.logger.warning("Surya returned empty predictions list.")
-                return []
-            # Process results for the first (and only) image
-            prediction = layout_predictions[0] # LayoutResult object
-            # Prepare normalized class filters once
-            normalized_classes_req = {self._normalize_class_name(c) for c in options.classes} if options.classes else None
-            normalized_classes_excl = {self._normalize_class_name(c) for c in options.exclude_classes} if options.exclude_classes else set()
-            for layout_box in prediction.bboxes:
-                # Extract the class name and normalize it
-                class_name_orig = layout_box.label
-                normalized_class = self._normalize_class_name(class_name_orig)
-                score = float(layout_box.confidence)
-                # Apply confidence threshold
-                if score < options.confidence: continue
-                # Apply class filtering
-                if normalized_classes_req and normalized_class not in normalized_classes_req: continue
-                if normalized_class in normalized_classes_excl: continue
-                # Extract bbox coordinates (Surya provides [x_min, y_min, x_max, y_max])
-                x_min, y_min, x_max, y_max = map(float, layout_box.bbox)
-                # Add detection
-                detection_data = {
-                    'bbox': (x_min, y_min, x_max, y_max),
-                    'class': class_name_orig,
-                    'confidence': score,
-                    'normalized_class': normalized_class,
-                    'source': 'layout',
-                    'model': 'surya'
-                    # Add polygon etc. if needed, check attributes on layout_box
-                    # 'polygon': layout_box.polygon if hasattr(layout_box, 'polygon') else None,
-                }
-                detections.append(detection_data)
-            self.logger.info(f"Surya detected {len(detections)} layout elements matching criteria.")
+        # Validate classes
+        if options.classes: self.validate_classes(options.classes)
+        if options.exclude_classes: self.validate_classes(options.exclude_classes)
-        except Exception as e:
-            self.logger.error(f"Error during Surya layout detection: {e}", exc_info=True)
-            raise
+        models = self._get_model(options)
+        layout_predictor = models['layout']
+        table_rec_predictor = models['table_rec']
+        input_image = image.convert("RGB")
+        input_image_list = [input_image]
+        initial_layout_detections = [] # Detections relative to input_image
+        tables_to_process = []
+        # --- Initial Layout Detection ---
+        self.logger.debug("Running Surya layout prediction...")
+        layout_predictions = layout_predictor(input_image_list)
+        self.logger.debug(f"Surya prediction returned {len(layout_predictions)} results.")
+        if not layout_predictions: return []
+        prediction = layout_predictions[0]
+        normalized_classes_req = {self._normalize_class_name(c) for c in options.classes} if options.classes else None
+        normalized_classes_excl = {self._normalize_class_name(c) for c in options.exclude_classes} if options.exclude_classes else set()
+        for layout_box in prediction.bboxes:
+            class_name_orig = layout_box.label
+            normalized_class = self._normalize_class_name(class_name_orig)
+            score = float(layout_box.confidence)
+            if score < options.confidence: continue
+            if normalized_classes_req and normalized_class not in normalized_classes_req: continue
+            if normalized_class in normalized_classes_excl: continue
+            x_min, y_min, x_max, y_max = map(float, layout_box.bbox)
+            detection_data = {
+                'bbox': (x_min, y_min, x_max, y_max),
+                'class': class_name_orig,
+                'confidence': score,
+                'normalized_class': normalized_class,
+                'source': 'layout',
+                'model': 'surya'
+            }
+            initial_layout_detections.append(detection_data)
+            if options.recognize_table_structure and normalized_class in ('table', 'tableofcontents'):
+                tables_to_process.append(detection_data)
+        self.logger.info(f"Surya initially detected {len(initial_layout_detections)} layout elements matching criteria.")
+        # --- Table Structure Recognition (Optional) ---
+        if not options.recognize_table_structure or not tables_to_process:
+            self.logger.debug("Skipping Surya table structure recognition (disabled or no tables found).")
+            return initial_layout_detections
+        self.logger.info(f"Attempting Surya table structure recognition for {len(tables_to_process)} tables...")
+        high_res_crops = []
+        pdf_offsets = [] # Store (pdf_x0, pdf_y0) for each crop
+        high_res_dpi = getattr(self._page_ref._parent, '_config', {}).get('surya_table_rec_dpi', 192)
+        bbox_padding = getattr(self._page_ref._parent, '_config', {}).get('surya_table_bbox_padding', 10)
+        pdf_to_highres_scale = high_res_dpi / 72.0
+        # Render high-res page ONCE
+        self.logger.debug(f"Rendering page {self._page_ref.number} at {high_res_dpi} DPI for table recognition...")
+        high_res_page_image = self._page_ref.to_image(resolution=high_res_dpi, include_highlights=False)
+        if not high_res_page_image:
+            raise RuntimeError(f"Failed to render page {self._page_ref.number} at high resolution.")
+        self.logger.debug(f"  High-res image size: {high_res_page_image.width}x{high_res_page_image.height}")
+        for i, table_detection in enumerate(tables_to_process):
+            img_x0, img_y0, img_x1, img_y1 = table_detection['bbox']
+            # PDF coords
+            pdf_x0 = img_x0 * img_scale_x
+            pdf_y0 = img_y0 * img_scale_y
+            pdf_x1 = img_x1 * img_scale_x
+            pdf_y1 = img_y1 * img_scale_y
+            pdf_x0 = max(0, pdf_x0)
+            pdf_y0 = max(0, pdf_y0)
+            pdf_x1 = min(self._page_ref.width, pdf_x1)
+            pdf_y1 = min(self._page_ref.height, pdf_y1)
+            # High-res image coords
+            hr_x0 = pdf_x0 * pdf_to_highres_scale
+            hr_y0 = pdf_y0 * pdf_to_highres_scale
+            hr_x1 = pdf_x1 * pdf_to_highres_scale
+            hr_y1 = pdf_y1 * pdf_to_highres_scale
+            # Expand high-res bbox
+            hr_x0_exp, hr_y0_exp, hr_x1_exp, hr_y1_exp = self._expand_bbox(
+                (hr_x0, hr_y0, hr_x1, hr_y1),
+                padding=bbox_padding,
+                max_width=high_res_page_image.width,
+                max_height=high_res_page_image.height
+            )
+            crop = high_res_page_image.crop((hr_x0_exp, hr_y0_exp, hr_x1_exp, hr_y1_exp))
+            high_res_crops.append(crop)
+            pdf_offsets.append((pdf_x0, pdf_y0))
+        if not high_res_crops:
+            self.logger.info("No valid high-resolution table crops generated.")
+            return initial_layout_detections
+        structure_detections = [] # Detections relative to std_res input_image
+        # --- Run Table Recognition (will raise error on failure) ---
+        self.logger.debug(f"Running Surya table recognition on {len(high_res_crops)} high-res images...")
+        table_predictions = table_rec_predictor(high_res_crops)
+        self.logger.debug(f"Surya table recognition returned {len(table_predictions)} results.")
+        # --- Process Results ---
+        if len(table_predictions) != len(pdf_offsets):
+             # This case is less likely if predictor didn't error, but good sanity check
+             raise RuntimeError(f"Mismatch between table inputs ({len(pdf_offsets)}) and predictions ({len(table_predictions)}).")
+        for table_pred, (offset_pdf_x0, offset_pdf_y0) in zip(table_predictions, pdf_offsets):
+            # Process Rows
+            for row_box in table_pred.rows:
+                crop_rx0, crop_ry0, crop_rx1, crop_ry1 = map(float, row_box.bbox)
+                pdf_row_x0 = offset_pdf_x0 + crop_rx0 / pdf_to_highres_scale
+                pdf_row_y0 = offset_pdf_y0 + crop_ry0 / pdf_to_highres_scale
+                pdf_row_x1 = offset_pdf_x0 + crop_rx1 / pdf_to_highres_scale
+                pdf_row_y1 = offset_pdf_y0 + crop_ry1 / pdf_to_highres_scale
+                img_row_x0 = pdf_row_x0 / img_scale_x
+                img_row_y0 = pdf_row_y0 / img_scale_y
+                img_row_x1 = pdf_row_x1 / img_scale_x
+                img_row_y1 = pdf_row_y1 / img_scale_y
+                structure_detections.append({
+                    'bbox': (img_row_x0, img_row_y0, img_row_x1, img_row_y1),
+                    'class': 'table-row', 'confidence': 1.0, 'normalized_class': 'table-row',
+                    'source': 'layout', 'model': 'surya'
+                })
+            # Process Columns
+            for col_box in table_pred.cols:
+                crop_cx0, crop_cy0, crop_cx1, crop_cy1 = map(float, col_box.bbox)
+                pdf_col_x0 = offset_pdf_x0 + crop_cx0 / pdf_to_highres_scale
+                pdf_col_y0 = offset_pdf_y0 + crop_cy0 / pdf_to_highres_scale
+                pdf_col_x1 = offset_pdf_x0 + crop_cx1 / pdf_to_highres_scale
+                pdf_col_y1 = offset_pdf_y0 + crop_cy1 / pdf_to_highres_scale
+                img_col_x0 = pdf_col_x0 / img_scale_x
+                img_col_y0 = pdf_col_y0 / img_scale_y
+                img_col_x1 = pdf_col_x1 / img_scale_x
+                img_col_y1 = pdf_col_y1 / img_scale_y
+                structure_detections.append({
+                    'bbox': (img_col_x0, img_col_y0, img_col_x1, img_col_y1),
+                    'class': 'table-column', 'confidence': 1.0, 'normalized_class': 'table-column',
+                    'source': 'layout', 'model': 'surya'
+                })
+        self.logger.info(f"Added {len(structure_detections)} table structure elements.")
-        return detections
+        return initial_layout_detections + structure_detections

natural-pdf 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

natural-pdf 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl