PyPI - natural-pdf - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

natural-pdf 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

docs/api/index.md +386 -0
docs/assets/favicon.png +3 -0
docs/assets/favicon.svg +3 -0
docs/assets/javascripts/custom.js +17 -0
docs/assets/logo.svg +3 -0
docs/assets/sample-screen.png +0 -0
docs/assets/social-preview.png +17 -0
docs/assets/social-preview.svg +17 -0
docs/assets/stylesheets/custom.css +65 -0
docs/document-qa/index.ipynb +435 -0
docs/document-qa/index.md +79 -0
docs/element-selection/index.ipynb +915 -0
docs/element-selection/index.md +229 -0
docs/index.md +170 -0
docs/installation/index.md +69 -0
docs/interactive-widget/index.ipynb +962 -0
docs/interactive-widget/index.md +12 -0
docs/layout-analysis/index.ipynb +818 -0
docs/layout-analysis/index.md +185 -0
docs/ocr/index.md +209 -0
docs/pdf-navigation/index.ipynb +314 -0
docs/pdf-navigation/index.md +97 -0
docs/regions/index.ipynb +816 -0
docs/regions/index.md +294 -0
docs/tables/index.ipynb +658 -0
docs/tables/index.md +144 -0
docs/text-analysis/index.ipynb +370 -0
docs/text-analysis/index.md +105 -0
docs/text-extraction/index.ipynb +1478 -0
docs/text-extraction/index.md +292 -0
docs/tutorials/01-loading-and-extraction.ipynb +1710 -0
docs/tutorials/01-loading-and-extraction.md +95 -0
docs/tutorials/02-finding-elements.ipynb +340 -0
docs/tutorials/02-finding-elements.md +149 -0
docs/tutorials/03-extracting-blocks.ipynb +147 -0
docs/tutorials/03-extracting-blocks.md +48 -0
docs/tutorials/04-table-extraction.ipynb +114 -0
docs/tutorials/04-table-extraction.md +50 -0
docs/tutorials/05-excluding-content.ipynb +270 -0
docs/tutorials/05-excluding-content.md +109 -0
docs/tutorials/06-document-qa.ipynb +332 -0
docs/tutorials/06-document-qa.md +91 -0
docs/tutorials/07-layout-analysis.ipynb +288 -0
docs/tutorials/07-layout-analysis.md +66 -0
docs/tutorials/07-working-with-regions.ipynb +413 -0
docs/tutorials/07-working-with-regions.md +151 -0
docs/tutorials/08-spatial-navigation.ipynb +508 -0
docs/tutorials/08-spatial-navigation.md +190 -0
docs/tutorials/09-section-extraction.ipynb +2434 -0
docs/tutorials/09-section-extraction.md +256 -0
docs/tutorials/10-form-field-extraction.ipynb +512 -0
docs/tutorials/10-form-field-extraction.md +201 -0
docs/tutorials/11-enhanced-table-processing.ipynb +54 -0
docs/tutorials/11-enhanced-table-processing.md +9 -0
docs/tutorials/12-ocr-integration.ipynb +604 -0
docs/tutorials/12-ocr-integration.md +175 -0
docs/tutorials/13-semantic-search.ipynb +1328 -0
docs/tutorials/13-semantic-search.md +77 -0
docs/visual-debugging/index.ipynb +2970 -0
docs/visual-debugging/index.md +157 -0
docs/visual-debugging/region.png +0 -0
natural_pdf/__init__.py +50 -33
natural_pdf/analyzers/__init__.py +2 -1
natural_pdf/analyzers/layout/base.py +32 -24
natural_pdf/analyzers/layout/docling.py +131 -72
natural_pdf/analyzers/layout/gemini.py +264 -0
natural_pdf/analyzers/layout/layout_analyzer.py +156 -113
natural_pdf/analyzers/layout/layout_manager.py +125 -58
natural_pdf/analyzers/layout/layout_options.py +43 -17
natural_pdf/analyzers/layout/paddle.py +152 -95
natural_pdf/analyzers/layout/surya.py +164 -92
natural_pdf/analyzers/layout/tatr.py +149 -84
natural_pdf/analyzers/layout/yolo.py +89 -45
natural_pdf/analyzers/text_options.py +22 -15
natural_pdf/analyzers/text_structure.py +131 -85
natural_pdf/analyzers/utils.py +30 -23
natural_pdf/collections/pdf_collection.py +146 -97
natural_pdf/core/__init__.py +1 -1
natural_pdf/core/element_manager.py +419 -337
natural_pdf/core/highlighting_service.py +268 -196
natural_pdf/core/page.py +1044 -521
natural_pdf/core/pdf.py +516 -313
natural_pdf/elements/__init__.py +1 -1
natural_pdf/elements/base.py +307 -225
natural_pdf/elements/collections.py +805 -543
natural_pdf/elements/line.py +39 -36
natural_pdf/elements/rect.py +32 -30
natural_pdf/elements/region.py +889 -879
natural_pdf/elements/text.py +127 -99
natural_pdf/exporters/__init__.py +0 -1
natural_pdf/exporters/searchable_pdf.py +261 -102
natural_pdf/ocr/__init__.py +57 -35
natural_pdf/ocr/engine.py +150 -46
natural_pdf/ocr/engine_easyocr.py +146 -150
natural_pdf/ocr/engine_paddle.py +118 -175
natural_pdf/ocr/engine_surya.py +78 -141
natural_pdf/ocr/ocr_factory.py +114 -0
natural_pdf/ocr/ocr_manager.py +122 -124
natural_pdf/ocr/ocr_options.py +16 -20
natural_pdf/ocr/utils.py +98 -0
natural_pdf/qa/__init__.py +1 -1
natural_pdf/qa/document_qa.py +119 -111
natural_pdf/search/__init__.py +37 -31
natural_pdf/search/haystack_search_service.py +312 -189
natural_pdf/search/haystack_utils.py +186 -122
natural_pdf/search/search_options.py +25 -14
natural_pdf/search/search_service_protocol.py +12 -6
natural_pdf/search/searchable_mixin.py +261 -176
natural_pdf/selectors/__init__.py +2 -1
natural_pdf/selectors/parser.py +159 -316
natural_pdf/templates/__init__.py +1 -1
natural_pdf/templates/spa/css/style.css +334 -0
natural_pdf/templates/spa/index.html +31 -0
natural_pdf/templates/spa/js/app.js +472 -0
natural_pdf/templates/spa/words.txt +235976 -0
natural_pdf/utils/debug.py +32 -0
natural_pdf/utils/highlighting.py +8 -2
natural_pdf/utils/identifiers.py +29 -0
natural_pdf/utils/packaging.py +418 -0
natural_pdf/utils/reading_order.py +65 -63
natural_pdf/utils/text_extraction.py +195 -0
natural_pdf/utils/visualization.py +70 -61
natural_pdf/widgets/__init__.py +2 -3
natural_pdf/widgets/viewer.py +749 -718
{natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/METADATA +53 -17
natural_pdf-0.1.6.dist-info/RECORD +141 -0
{natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/WHEEL +1 -1
natural_pdf-0.1.6.dist-info/top_level.txt +4 -0
notebooks/Examples.ipynb +1293 -0
pdfs/.gitkeep +0 -0
pdfs/01-practice.pdf +543 -0
pdfs/0500000US42001.pdf +0 -0
pdfs/0500000US42007.pdf +0 -0
pdfs/2014 Statistics.pdf +0 -0
pdfs/2019 Statistics.pdf +0 -0
pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
pdfs/needs-ocr.pdf +0 -0
natural_pdf/templates/ocr_debug.html +0 -517
natural_pdf-0.1.4.dist-info/RECORD +0 -61
natural_pdf-0.1.4.dist-info/top_level.txt +0 -1
{natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/licenses/LICENSE +0 -0

natural_pdf/analyzers/layout/tatr.py CHANGED Viewed

@@ -1,14 +1,15 @@
 # layout_detector_tatr.py
-import logging
 import importlib.util
+import logging
 import os
 import tempfile
-from typing import List, Dict, Any, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 from PIL import Image
 # Assuming base class and options are importable
 from .base import LayoutDetector
-from .layout_options import TATRLayoutOptions, BaseLayoutOptions
+from .layout_options import BaseLayoutOptions, TATRLayoutOptions
 logger = logging.getLogger(__name__)
@@ -26,9 +27,13 @@ if torch_spec and torchvision_spec and transformers_spec:
         from torchvision import transforms
         from transformers import AutoModelForObjectDetection
     except ImportError as e:
-        logger.warning(f"Could not import TATR dependencies (torch, torchvision, transformers): {e}")
+        logger.warning(
+            f"Could not import TATR dependencies (torch, torchvision, transformers): {e}"
+        )
 else:
-    logger.warning("torch, torchvision, or transformers not found. TableTransformerDetector will not be available.")
+    logger.warning(
+        "torch, torchvision, or transformers not found. TableTransformerDetector will not be available."
+    )
 class TableTransformerDetector(LayoutDetector):
@@ -36,26 +41,36 @@ class TableTransformerDetector(LayoutDetector):
     # Custom resize transform (keep as nested class or move outside)
     class MaxResize(object):
-        def __init__(self, max_size=800):
+        def __init__(self, max_size=2000):
             self.max_size = max_size
         def __call__(self, image):
             width, height = image.size
             current_max_size = max(width, height)
             scale = self.max_size / current_max_size
             # Use LANCZOS for resizing
-            resized_image = image.resize((int(round(scale*width)), int(round(scale*height))), Image.Resampling.LANCZOS)
+            resized_image = image.resize(
+                (int(round(scale * width)), int(round(scale * height))), Image.Resampling.LANCZOS
+            )
             return resized_image
     def __init__(self):
         super().__init__()
         self.supported_classes = {
-            'table', 'table row', 'table column', 'table column header', 'table projected row header', 'table spanning cell' # Add others if supported by models used
+            "table",
+            "table row",
+            "table column",
+            "table column header",
+            "table projected row header",
+            "table spanning cell",  # Add others if supported by models used
         }
         # Models are loaded via _get_model
     def is_available(self) -> bool:
         """Check if dependencies are installed."""
-        return torch is not None and transforms is not None and AutoModelForObjectDetection is not None
+        return (
+            torch is not None and transforms is not None and AutoModelForObjectDetection is not None
+        )
     def _get_cache_key(self, options: TATRLayoutOptions) -> str:
         """Generate cache key based on model IDs and device."""
@@ -63,26 +78,30 @@ class TableTransformerDetector(LayoutDetector):
             options = TATRLayoutOptions(device=options.device)
         device_key = str(options.device).lower()
-        det_model_key = options.detection_model.replace('/','_')
-        struct_model_key = options.structure_model.replace('/','_')
+        det_model_key = options.detection_model.replace("/", "_")
+        struct_model_key = options.structure_model.replace("/", "_")
         return f"{self.__class__.__name__}_{device_key}_{det_model_key}_{struct_model_key}"
     def _load_model_from_options(self, options: TATRLayoutOptions) -> Dict[str, Any]:
         """Load the TATR detection and structure models."""
         if not self.is_available():
-             raise RuntimeError("TATR dependencies (torch, torchvision, transformers) not installed.")
+            raise RuntimeError(
+                "TATR dependencies (torch, torchvision, transformers) not installed."
+            )
         device = options.device or ("cuda" if torch.cuda.is_available() else "cpu")
-        self.logger.info(f"Loading TATR models: Detection='{options.detection_model}', Structure='{options.structure_model}' onto device='{device}'")
+        self.logger.info(
+            f"Loading TATR models: Detection='{options.detection_model}', Structure='{options.structure_model}' onto device='{device}'"
+        )
         try:
             detection_model = AutoModelForObjectDetection.from_pretrained(
-                options.detection_model, revision="no_timm" # Important revision for some versions
+                options.detection_model, revision="no_timm"  # Important revision for some versions
             ).to(device)
             structure_model = AutoModelForObjectDetection.from_pretrained(
                 options.structure_model
             ).to(device)
             self.logger.info("TATR models loaded.")
-            return {'detection': detection_model, 'structure': structure_model}
+            return {"detection": detection_model, "structure": structure_model}
         except Exception as e:
             self.logger.error(f"Failed to load TATR models: {e}", exc_info=True)
             raise
@@ -97,19 +116,21 @@ class TableTransformerDetector(LayoutDetector):
     def rescale_bboxes(self, out_bbox, size):
         img_w, img_h = size
         boxes = self.box_cxcywh_to_xyxy(out_bbox)
-        boxes = boxes * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32).to(out_bbox.device) # Ensure tensor on correct device
+        boxes = boxes * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32).to(
+            out_bbox.device
+        )  # Ensure tensor on correct device
         return boxes
     def outputs_to_objects(self, outputs, img_size, id2label):
         logits = outputs.logits
         bboxes = outputs.pred_boxes
         # Use softmax activation function
-        prob = logits.softmax(-1)[0, :, :-1] # Exclude the "no object" class
+        prob = logits.softmax(-1)[0, :, :-1]  # Exclude the "no object" class
         scores, labels = prob.max(-1)
         # Convert to absolute coordinates
         img_w, img_h = img_size
-        boxes = self.rescale_bboxes(bboxes[0, ...], (img_w, img_h)) # Pass tuple size
+        boxes = self.rescale_bboxes(bboxes[0, ...], (img_w, img_h))  # Pass tuple size
         # Move results to CPU for list comprehension
         scores = scores.cpu().tolist()
@@ -118,49 +139,62 @@ class TableTransformerDetector(LayoutDetector):
         objects = []
         for score, label_idx, bbox in zip(scores, labels, boxes):
-            class_label = id2label.get(label_idx, 'unknown') # Use get with default
-            if class_label != 'no object' and class_label != 'unknown':
-                objects.append({
-                    'label': class_label,
-                    'score': float(score),
-                    'bbox': [round(float(c), 2) for c in bbox] # Round coordinates
-                })
+            class_label = id2label.get(label_idx, "unknown")  # Use get with default
+            if class_label != "no object" and class_label != "unknown":
+                objects.append(
+                    {
+                        "label": class_label,
+                        "score": float(score),
+                        "bbox": [round(float(c), 2) for c in bbox],  # Round coordinates
+                    }
+                )
         return objects
     # --- End Helper Methods ---
     def detect(self, image: Image.Image, options: BaseLayoutOptions) -> List[Dict[str, Any]]:
         """Detect tables and their structure in an image."""
         if not self.is_available():
-            raise RuntimeError("TATR dependencies (torch, torchvision, transformers) not installed.")
+            raise RuntimeError(
+                "TATR dependencies (torch, torchvision, transformers) not installed."
+            )
         if not isinstance(options, TATRLayoutOptions):
-             self.logger.warning("Received BaseLayoutOptions, expected TATRLayoutOptions. Using defaults.")
-             options = TATRLayoutOptions(
-                 confidence=options.confidence, classes=options.classes,
-                 exclude_classes=options.exclude_classes, device=options.device,
-                 extra_args=options.extra_args
-             )
+            self.logger.warning(
+                "Received BaseLayoutOptions, expected TATRLayoutOptions. Using defaults."
+            )
+            options = TATRLayoutOptions(
+                confidence=options.confidence,
+                classes=options.classes,
+                exclude_classes=options.exclude_classes,
+                device=options.device,
+                extra_args=options.extra_args,
+            )
         self.validate_classes(options.classes or [])
         if options.exclude_classes:
             self.validate_classes(options.exclude_classes)
         models = self._get_model(options)
-        detection_model = models['detection']
-        structure_model = models['structure']
+        detection_model = models["detection"]
+        structure_model = models["structure"]
         device = options.device or ("cuda" if torch.cuda.is_available() else "cpu")
         # Prepare transforms based on options
-        detection_transform = transforms.Compose([
-            self.MaxResize(options.max_detection_size),
-            transforms.ToTensor(),
-            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
-        ])
-        structure_transform = transforms.Compose([
-            self.MaxResize(options.max_structure_size),
-            transforms.ToTensor(),
-            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
-        ])
+        detection_transform = transforms.Compose(
+            [
+                self.MaxResize(options.max_detection_size),
+                transforms.ToTensor(),
+                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+            ]
+        )
+        structure_transform = transforms.Compose(
+            [
+                self.MaxResize(options.max_structure_size),
+                transforms.ToTensor(),
+                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+            ]
+        )
         # --- Detect Tables ---
         self.logger.debug("Running TATR table detection...")
@@ -169,38 +203,60 @@ class TableTransformerDetector(LayoutDetector):
             outputs = detection_model(pixel_values)
         id2label_det = detection_model.config.id2label
-        id2label_det[detection_model.config.num_labels] = "no object" # Add no object class
+        id2label_det[detection_model.config.num_labels] = "no object"  # Add no object class
         tables = self.outputs_to_objects(outputs, image.size, id2label_det)
-        tables = [t for t in tables if t['score'] >= options.confidence and t['label'] == 'table'] # Filter for tables
+        tables = [
+            t for t in tables if t["score"] >= options.confidence and t["label"] == "table"
+        ]  # Filter for tables
         self.logger.debug(f"Detected {len(tables)} table regions.")
         all_detections = []
         # Add table detections if requested
-        normalized_classes_req = {self._normalize_class_name(c) for c in options.classes} if options.classes else None
-        normalized_classes_excl = {self._normalize_class_name(c) for c in options.exclude_classes} if options.exclude_classes else set()
-        if normalized_classes_req is None or 'table' in normalized_classes_req:
-             if 'table' not in normalized_classes_excl:
-                 for table in tables:
-                     all_detections.append({
-                         'bbox': tuple(table['bbox']),
-                         'class': 'table',
-                         'confidence': float(table['score']),
-                         'normalized_class': 'table',
-                         'source': 'layout',
-                         'model': 'tatr'
-                     })
+        normalized_classes_req = (
+            {self._normalize_class_name(c) for c in options.classes} if options.classes else None
+        )
+        normalized_classes_excl = (
+            {self._normalize_class_name(c) for c in options.exclude_classes}
+            if options.exclude_classes
+            else set()
+        )
+        if normalized_classes_req is None or "table" in normalized_classes_req:
+            if "table" not in normalized_classes_excl:
+                for table in tables:
+                    all_detections.append(
+                        {
+                            "bbox": tuple(table["bbox"]),
+                            "class": "table",
+                            "confidence": float(table["score"]),
+                            "normalized_class": "table",
+                            "source": "layout",
+                            "model": "tatr",
+                        }
+                    )
         # --- Process Structure ---
-        structure_class_names = {'table row', 'table column', 'table column header', 'table projected row header', 'table spanning cell'}
-        normalized_structure_classes = {self._normalize_class_name(c) for c in structure_class_names}
+        structure_class_names = {
+            "table row",
+            "table column",
+            "table column header",
+            "table projected row header",
+            "table spanning cell",
+        }
+        normalized_structure_classes = {
+            self._normalize_class_name(c) for c in structure_class_names
+        }
         needed_structure = False
-        if normalized_classes_req is None: # If no specific classes requested
-             needed_structure = any(norm_cls not in normalized_classes_excl for norm_cls in normalized_structure_classes)
-        else: # Specific classes requested
-             needed_structure = any(norm_cls in normalized_classes_req for norm_cls in normalized_structure_classes)
+        if normalized_classes_req is None:  # If no specific classes requested
+            needed_structure = any(
+                norm_cls not in normalized_classes_excl for norm_cls in normalized_structure_classes
+            )
+        else:  # Specific classes requested
+            needed_structure = any(
+                norm_cls in normalized_classes_req for norm_cls in normalized_structure_classes
+            )
         if needed_structure and tables:
             self.logger.debug("Running TATR structure recognition...")
@@ -208,44 +264,53 @@ class TableTransformerDetector(LayoutDetector):
             id2label_struct[structure_model.config.num_labels] = "no object"
             for table in tables:
-                x_min, y_min, x_max, y_max = map(int, table['bbox'])
+                x_min, y_min, x_max, y_max = map(int, table["bbox"])
                 # Ensure coordinates are within image bounds
                 x_min, y_min = max(0, x_min), max(0, y_min)
                 x_max, y_max = min(image.width, x_max), min(image.height, y_max)
-                if x_max <= x_min or y_max <= y_min: continue # Skip invalid crop
+                if x_max <= x_min or y_max <= y_min:
+                    continue  # Skip invalid crop
                 cropped_table = image.crop((x_min, y_min, x_max, y_max))
-                if cropped_table.width == 0 or cropped_table.height == 0: continue # Skip empty crop
+                if cropped_table.width == 0 or cropped_table.height == 0:
+                    continue  # Skip empty crop
                 pixel_values_struct = structure_transform(cropped_table).unsqueeze(0).to(device)
                 with torch.no_grad():
                     outputs_struct = structure_model(pixel_values_struct)
-                structure_elements = self.outputs_to_objects(outputs_struct, cropped_table.size, id2label_struct)
-                structure_elements = [e for e in structure_elements if e['score'] >= options.confidence]
+                structure_elements = self.outputs_to_objects(
+                    outputs_struct, cropped_table.size, id2label_struct
+                )
+                structure_elements = [
+                    e for e in structure_elements if e["score"] >= options.confidence
+                ]
                 for element in structure_elements:
-                    element_class_orig = element['label']
+                    element_class_orig = element["label"]
                     normalized_class = self._normalize_class_name(element_class_orig)
                     # Apply class filtering
-                    if normalized_classes_req and normalized_class not in normalized_classes_req: continue
-                    if normalized_class in normalized_classes_excl: continue
+                    if normalized_classes_req and normalized_class not in normalized_classes_req:
+                        continue
+                    if normalized_class in normalized_classes_excl:
+                        continue
                     # Adjust coordinates
-                    ex0, ey0, ex1, ey1 = element['bbox']
+                    ex0, ey0, ex1, ey1 = element["bbox"]
                     adj_bbox = (ex0 + x_min, ey0 + y_min, ex1 + x_min, ey1 + y_min)
-                    all_detections.append({
-                        'bbox': adj_bbox,
-                        'class': element_class_orig,
-                        'confidence': float(element['score']),
-                        'normalized_class': normalized_class,
-                        'source': 'layout',
-                        'model': 'tatr'
-                    })
+                    all_detections.append(
+                        {
+                            "bbox": adj_bbox,
+                            "class": element_class_orig,
+                            "confidence": float(element["score"]),
+                            "normalized_class": normalized_class,
+                            "source": "layout",
+                            "model": "tatr",
+                        }
+                    )
             self.logger.debug(f"Added {len(all_detections) - len(tables)} structure elements.")
         self.logger.info(f"TATR detected {len(all_detections)} layout elements matching criteria.")
         return all_detections

natural_pdf/analyzers/layout/yolo.py CHANGED Viewed

@@ -1,24 +1,38 @@
 # layout_detector_yolo.py
-import logging
 import importlib.util
+import logging
 import os
 import tempfile
-from typing import List, Dict, Any, Optional
+from typing import Any, Dict, List, Optional
 from PIL import Image
 # Assuming base class and options are importable
 try:
     from .base import LayoutDetector
-    from .layout_options import YOLOLayoutOptions, BaseLayoutOptions
+    from .layout_options import BaseLayoutOptions, YOLOLayoutOptions
 except ImportError:
     # Placeholders if run standalone or imports fail
-    class BaseLayoutOptions: pass
-    class YOLOLayoutOptions(BaseLayoutOptions): pass
+    class BaseLayoutOptions:
+        pass
+    class YOLOLayoutOptions(BaseLayoutOptions):
+        pass
     class LayoutDetector:
-         def __init__(self): self.logger=logging.getLogger(); self.supported_classes=set()
-         def _get_model(self, options): raise NotImplementedError
-         def _normalize_class_name(self, n): return n
-         def validate_classes(self, c): pass
+        def __init__(self):
+            self.logger = logging.getLogger()
+            self.supported_classes = set()
+        def _get_model(self, options):
+            raise NotImplementedError
+        def _normalize_class_name(self, n):
+            return n
+        def validate_classes(self, c):
+            pass
     logging.basicConfig()
 logger = logging.getLogger(__name__)
@@ -36,7 +50,9 @@ if yolo_spec and hf_spec:
     except ImportError as e:
         logger.warning(f"Could not import YOLO dependencies: {e}")
 else:
-    logger.warning("doclayout_yolo or huggingface_hub not found. YOLODocLayoutDetector will not be available.")
+    logger.warning(
+        "doclayout_yolo or huggingface_hub not found. YOLODocLayoutDetector will not be available."
+    )
 class YOLODocLayoutDetector(LayoutDetector):
@@ -45,9 +61,16 @@ class YOLODocLayoutDetector(LayoutDetector):
     def __init__(self):
         super().__init__()
         self.supported_classes = {
-            'title', 'plain text', 'abandon', 'figure', 'figure_caption',
-            'table', 'table_caption', 'table_footnote', 'isolate_formula',
-            'formula_caption'
+            "title",
+            "plain text",
+            "abandon",
+            "figure",
+            "figure_caption",
+            "table",
+            "table_caption",
+            "table_footnote",
+            "isolate_formula",
+            "formula_caption",
         }
     def is_available(self) -> bool:
@@ -58,8 +81,8 @@ class YOLODocLayoutDetector(LayoutDetector):
         """Generate cache key based on model repo/file and device."""
         # Ensure options is the correct type
         if not isinstance(options, YOLOLayoutOptions):
-             # This shouldn't happen if called correctly, but handle defensively
-             options = YOLOLayoutOptions(device=options.device) # Use base device
+            # This shouldn't happen if called correctly, but handle defensively
+            options = YOLOLayoutOptions(device=options.device)  # Use base device
         device_key = str(options.device).lower()
         model_key = f"{options.model_repo.replace('/','_')}_{options.model_file}"
@@ -68,7 +91,9 @@ class YOLODocLayoutDetector(LayoutDetector):
     def _load_model_from_options(self, options: YOLOLayoutOptions) -> Any:
         """Load the YOLOv10 model based on options."""
         if not self.is_available():
-             raise RuntimeError("YOLO dependencies (doclayout_yolo, huggingface_hub) not installed.")
+            raise RuntimeError(
+                "YOLO dependencies not installed. Please run: pip install 'natural-pdf[layout_yolo]'"
+            )
         self.logger.info(f"Loading YOLO model: {options.model_repo}/{options.model_file}")
         try:
             model_path = hf_hub_download(repo_id=options.model_repo, filename=options.model_file)
@@ -82,16 +107,22 @@ class YOLODocLayoutDetector(LayoutDetector):
     def detect(self, image: Image.Image, options: BaseLayoutOptions) -> List[Dict[str, Any]]:
         """Detect layout elements in an image using YOLO."""
         if not self.is_available():
-            raise RuntimeError("YOLO dependencies (doclayout_yolo, huggingface_hub) not installed.")
+            raise RuntimeError(
+                "YOLO dependencies not installed. Please run: pip install 'natural-pdf[layout_yolo]'"
+            )
         # Ensure options are the correct type, falling back to defaults if base type passed
         if not isinstance(options, YOLOLayoutOptions):
-             self.logger.warning("Received BaseLayoutOptions, expected YOLOLayoutOptions. Using defaults.")
-             options = YOLOLayoutOptions(
-                 confidence=options.confidence, classes=options.classes,
-                 exclude_classes=options.exclude_classes, device=options.device,
-                 extra_args=options.extra_args
-             )
+            self.logger.warning(
+                "Received BaseLayoutOptions, expected YOLOLayoutOptions. Using defaults."
+            )
+            options = YOLOLayoutOptions(
+                confidence=options.confidence,
+                classes=options.classes,
+                exclude_classes=options.exclude_classes,
+                device=options.device,
+                extra_args=options.extra_args,
+            )
         # Validate classes before proceeding
         self.validate_classes(options.classes or [])
@@ -108,58 +139,71 @@ class YOLODocLayoutDetector(LayoutDetector):
             temp_image_path = os.path.join(temp_dir, "temp_layout_image.png")
             try:
                 self.logger.debug(f"Saving temporary image for YOLO detector to: {temp_image_path}")
-                image.convert("RGB").save(temp_image_path) # Ensure RGB
+                image.convert("RGB").save(temp_image_path)  # Ensure RGB
                 # Run model prediction
-                self.logger.debug(f"Running YOLO prediction (imgsz={options.image_size}, conf={options.confidence}, device={options.device})...")
+                self.logger.debug(
+                    f"Running YOLO prediction (imgsz={options.image_size}, conf={options.confidence}, device={options.device})..."
+                )
                 results = model.predict(
                     temp_image_path,
                     imgsz=options.image_size,
                     conf=options.confidence,
-                    device=options.device or 'cpu' # Default to cpu if None
+                    device=options.device or "cpu",  # Default to cpu if None
                     # Add other predict args from options.extra_args if needed
                     # **options.extra_args
                 )
                 self.logger.debug(f"YOLO prediction returned {len(results)} result objects.")
                 # Process results into standardized format
-                img_width, img_height = image.size # Get original image size for context if needed
+                img_width, img_height = image.size  # Get original image size for context if needed
                 for result in results:
-                    if result.boxes is None: continue
+                    if result.boxes is None:
+                        continue
                     boxes = result.boxes.xyxy
                     labels = result.boxes.cls
                     scores = result.boxes.conf
-                    class_names = result.names # Dictionary mapping index to name
+                    class_names = result.names  # Dictionary mapping index to name
                     for box, label_idx_tensor, score_tensor in zip(boxes, labels, scores):
                         x_min, y_min, x_max, y_max = map(float, box.tolist())
-                        label_idx = int(label_idx_tensor.item()) # Get int index
-                        score = float(score_tensor.item()) # Get float score
+                        label_idx = int(label_idx_tensor.item())  # Get int index
+                        score = float(score_tensor.item())  # Get float score
                         if label_idx not in class_names:
-                             self.logger.warning(f"Label index {label_idx} not found in model names dict. Skipping.")
-                             continue
+                            self.logger.warning(
+                                f"Label index {label_idx} not found in model names dict. Skipping."
+                            )
+                            continue
                         label_name = class_names[label_idx]
                         normalized_class = self._normalize_class_name(label_name)
                         # Apply class filtering (using normalized names)
-                        if options.classes and normalized_class not in [self._normalize_class_name(c) for c in options.classes]:
+                        if options.classes and normalized_class not in [
+                            self._normalize_class_name(c) for c in options.classes
+                        ]:
                             continue
-                        if options.exclude_classes and normalized_class in [self._normalize_class_name(c) for c in options.exclude_classes]:
+                        if options.exclude_classes and normalized_class in [
+                            self._normalize_class_name(c) for c in options.exclude_classes
+                        ]:
                             continue
-                        detections.append({
-                            'bbox': (x_min, y_min, x_max, y_max),
-                            'class': label_name,
-                            'confidence': score,
-                            'normalized_class': normalized_class,
-                            'source': 'layout',
-                            'model': 'yolo'
-                        })
-                self.logger.info(f"YOLO detected {len(detections)} layout elements matching criteria.")
+                        detections.append(
+                            {
+                                "bbox": (x_min, y_min, x_max, y_max),
+                                "class": label_name,
+                                "confidence": score,
+                                "normalized_class": normalized_class,
+                                "source": "layout",
+                                "model": "yolo",
+                            }
+                        )
+                self.logger.info(
+                    f"YOLO detected {len(detections)} layout elements matching criteria."
+                )
             except Exception as e:
                 self.logger.error(f"Error during YOLO detection: {e}", exc_info=True)
-                raise # Re-raise the exception
+                raise  # Re-raise the exception
         return detections

natural-pdf 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

natural-pdf 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl