PyPI - natural-pdf - Versions diffs - 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl - Mend

natural-pdf 0.1.8py3-none-any.whl → 0.1.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

natural_pdf/__init__.py +1 -0
natural_pdf/analyzers/layout/base.py +1 -5
natural_pdf/analyzers/layout/gemini.py +61 -51
natural_pdf/analyzers/layout/layout_analyzer.py +40 -11
natural_pdf/analyzers/layout/layout_manager.py +26 -84
natural_pdf/analyzers/layout/layout_options.py +7 -0
natural_pdf/analyzers/layout/pdfplumber_table_finder.py +142 -0
natural_pdf/analyzers/layout/surya.py +46 -123
natural_pdf/analyzers/layout/tatr.py +51 -4
natural_pdf/analyzers/text_structure.py +3 -5
natural_pdf/analyzers/utils.py +3 -3
natural_pdf/classification/manager.py +241 -158
natural_pdf/classification/mixin.py +52 -38
natural_pdf/classification/results.py +71 -45
natural_pdf/collections/mixins.py +85 -20
natural_pdf/collections/pdf_collection.py +245 -100
natural_pdf/core/element_manager.py +30 -14
natural_pdf/core/highlighting_service.py +13 -22
natural_pdf/core/page.py +423 -101
natural_pdf/core/pdf.py +694 -195
natural_pdf/elements/base.py +134 -40
natural_pdf/elements/collections.py +610 -134
natural_pdf/elements/region.py +659 -90
natural_pdf/elements/text.py +1 -1
natural_pdf/export/mixin.py +137 -0
natural_pdf/exporters/base.py +3 -3
natural_pdf/exporters/paddleocr.py +4 -3
natural_pdf/extraction/manager.py +50 -49
natural_pdf/extraction/mixin.py +90 -57
natural_pdf/extraction/result.py +9 -23
natural_pdf/ocr/__init__.py +5 -5
natural_pdf/ocr/engine_doctr.py +346 -0
natural_pdf/ocr/ocr_factory.py +24 -4
natural_pdf/ocr/ocr_manager.py +61 -25
natural_pdf/ocr/ocr_options.py +70 -10
natural_pdf/ocr/utils.py +6 -4
natural_pdf/search/__init__.py +20 -34
natural_pdf/search/haystack_search_service.py +309 -265
natural_pdf/search/haystack_utils.py +99 -75
natural_pdf/search/search_service_protocol.py +11 -12
natural_pdf/selectors/parser.py +219 -143
natural_pdf/utils/debug.py +3 -3
natural_pdf/utils/identifiers.py +1 -1
natural_pdf/utils/locks.py +1 -1
natural_pdf/utils/packaging.py +8 -6
natural_pdf/utils/text_extraction.py +24 -16
natural_pdf/utils/tqdm_utils.py +18 -10
natural_pdf/utils/visualization.py +18 -0
natural_pdf/widgets/viewer.py +4 -25
{natural_pdf-0.1.8.dist-info → natural_pdf-0.1.10.dist-info}/METADATA +12 -3
natural_pdf-0.1.10.dist-info/RECORD +80 -0
{natural_pdf-0.1.8.dist-info → natural_pdf-0.1.10.dist-info}/WHEEL +1 -1
{natural_pdf-0.1.8.dist-info → natural_pdf-0.1.10.dist-info}/top_level.txt +0 -2
docs/api/index.md +0 -386
docs/assets/favicon.png +0 -3
docs/assets/favicon.svg +0 -3
docs/assets/javascripts/custom.js +0 -17
docs/assets/logo.svg +0 -3
docs/assets/sample-screen.png +0 -0
docs/assets/social-preview.png +0 -17
docs/assets/social-preview.svg +0 -17
docs/assets/stylesheets/custom.css +0 -65
docs/categorizing-documents/index.md +0 -168
docs/data-extraction/index.md +0 -87
docs/document-qa/index.ipynb +0 -435
docs/document-qa/index.md +0 -79
docs/element-selection/index.ipynb +0 -969
docs/element-selection/index.md +0 -249
docs/finetuning/index.md +0 -176
docs/index.md +0 -189
docs/installation/index.md +0 -69
docs/interactive-widget/index.ipynb +0 -962
docs/interactive-widget/index.md +0 -12
docs/layout-analysis/index.ipynb +0 -818
docs/layout-analysis/index.md +0 -185
docs/ocr/index.md +0 -256
docs/pdf-navigation/index.ipynb +0 -314
docs/pdf-navigation/index.md +0 -97
docs/regions/index.ipynb +0 -816
docs/regions/index.md +0 -294
docs/tables/index.ipynb +0 -658
docs/tables/index.md +0 -144
docs/text-analysis/index.ipynb +0 -370
docs/text-analysis/index.md +0 -105
docs/text-extraction/index.ipynb +0 -1478
docs/text-extraction/index.md +0 -292
docs/tutorials/01-loading-and-extraction.ipynb +0 -1873
docs/tutorials/01-loading-and-extraction.md +0 -95
docs/tutorials/02-finding-elements.ipynb +0 -417
docs/tutorials/02-finding-elements.md +0 -149
docs/tutorials/03-extracting-blocks.ipynb +0 -152
docs/tutorials/03-extracting-blocks.md +0 -48
docs/tutorials/04-table-extraction.ipynb +0 -119
docs/tutorials/04-table-extraction.md +0 -50
docs/tutorials/05-excluding-content.ipynb +0 -275
docs/tutorials/05-excluding-content.md +0 -109
docs/tutorials/06-document-qa.ipynb +0 -337
docs/tutorials/06-document-qa.md +0 -91
docs/tutorials/07-layout-analysis.ipynb +0 -293
docs/tutorials/07-layout-analysis.md +0 -66
docs/tutorials/07-working-with-regions.ipynb +0 -414
docs/tutorials/07-working-with-regions.md +0 -151
docs/tutorials/08-spatial-navigation.ipynb +0 -513
docs/tutorials/08-spatial-navigation.md +0 -190
docs/tutorials/09-section-extraction.ipynb +0 -2439
docs/tutorials/09-section-extraction.md +0 -256
docs/tutorials/10-form-field-extraction.ipynb +0 -517
docs/tutorials/10-form-field-extraction.md +0 -201
docs/tutorials/11-enhanced-table-processing.ipynb +0 -59
docs/tutorials/11-enhanced-table-processing.md +0 -9
docs/tutorials/12-ocr-integration.ipynb +0 -3712
docs/tutorials/12-ocr-integration.md +0 -137
docs/tutorials/13-semantic-search.ipynb +0 -1718
docs/tutorials/13-semantic-search.md +0 -77
docs/visual-debugging/index.ipynb +0 -2970
docs/visual-debugging/index.md +0 -157
docs/visual-debugging/region.png +0 -0
natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -420
natural_pdf/templates/spa/css/style.css +0 -334
natural_pdf/templates/spa/index.html +0 -31
natural_pdf/templates/spa/js/app.js +0 -472
natural_pdf/templates/spa/words.txt +0 -235976
natural_pdf/widgets/frontend/viewer.js +0 -88
natural_pdf-0.1.8.dist-info/RECORD +0 -156
notebooks/Examples.ipynb +0 -1293
pdfs/.gitkeep +0 -0
pdfs/01-practice.pdf +0 -543
pdfs/0500000US42001.pdf +0 -0
pdfs/0500000US42007.pdf +0 -0
pdfs/2014 Statistics.pdf +0 -0
pdfs/2019 Statistics.pdf +0 -0
pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
pdfs/needs-ocr.pdf +0 -0
{natural_pdf-0.1.8.dist-info → natural_pdf-0.1.10.dist-info}/licenses/LICENSE +0 -0

natural_pdf/__init__.py CHANGED Viewed

@@ -4,6 +4,7 @@ Natural PDF - A more intuitive interface for working with PDFs.
 import logging
 import os
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # Create library logger

natural_pdf/analyzers/layout/base.py CHANGED Viewed

@@ -98,7 +98,7 @@ class LayoutDetector(ABC):
                 self.logger.error(f"Failed to load model for key {cache_key}: {e}", exc_info=True)
                 # Remove potentially corrupted cache entry
                 self._model_cache.pop(cache_key, None)
-                raise  # Re-raise exception after logging
+                raise
         else:
             self.logger.debug(f"Using cached model for key: {cache_key}")
         return self._model_cache[cache_key]
@@ -135,7 +135,6 @@ class LayoutDetector(ABC):
             return
         if classes:
-            # Normalize both requested and supported classes for comparison
             normalized_supported = {self._normalize_class_name(c) for c in self.supported_classes}
             normalized_requested = {self._normalize_class_name(c) for c in classes}
             unsupported_normalized = normalized_requested - normalized_supported
@@ -153,7 +152,4 @@ class LayoutDetector(ABC):
     def __del__(self):
         """Cleanup resources."""
         self.logger.info(f"Cleaning up {self.__class__.__name__} resources.")
-        # Clear model cache to free up memory/GPU resources if models are large
-        # Consider implications if models are shared or expensive to reload
-        # del self._model_cache # Optional: uncomment if models should be released aggressively
         self._model_cache.clear()

natural_pdf/analyzers/layout/gemini.py CHANGED Viewed

@@ -1,13 +1,13 @@
 # layout_detector_gemini.py
+import base64
 import importlib.util
+import io
 import logging
 import os
 from typing import Any, Dict, List, Optional
-import base64
-import io
-from pydantic import BaseModel, Field
 from PIL import Image
+from pydantic import BaseModel, Field
 # Use OpenAI library for interaction
 try:
@@ -53,10 +53,8 @@ logger = logging.getLogger(__name__)
 # This is used by the openai library's `response_format`
 class DetectedRegion(BaseModel):
     label: str = Field(description="The identified class name.")
-    bbox: List[float] = Field(
-        description="Bounding box coordinates [xmin, ymin, xmax, ymax].", min_items=4, max_items=4
-    )
-    confidence: float = Field(description="Confidence score [0.0, 1.0].", ge=0.0, le=1.0)
+    bbox: List[float] = Field(description="Bounding box coordinates [xmin, ymin, xmax, ymax].")
+    confidence: float = Field(description="Confidence score [0.0, 1.0].")
 class GeminiLayoutDetector(LayoutDetector):
@@ -70,16 +68,10 @@ class GeminiLayoutDetector(LayoutDetector):
         self.supported_classes = set()  # Indicate dynamic nature
     def is_available(self) -> bool:
-        """Check if openai library is installed and GOOGLE_API_KEY is available."""
-        api_key = os.environ.get("GOOGLE_API_KEY")
-        if not api_key:
-            logger.warning(
-                "GOOGLE_API_KEY environment variable not set. Gemini detector (via OpenAI lib) will not be available."
-            )
-            return False
+        """Check if openai library is installed."""
         if OpenAI is None:
             logger.warning(
-                "openai package not found. Gemini detector (via OpenAI lib) will not be available."
+                "openai package not found. Gemini detector (via OpenAI lib) will not be available. Run: pip install openai"
             )
             return False
         return True
@@ -96,44 +88,65 @@ class GeminiLayoutDetector(LayoutDetector):
     def _load_model_from_options(self, options: GeminiLayoutOptions) -> Any:
         """Validate options and return the model name."""
         if not self.is_available():
-            raise RuntimeError(
-                "OpenAI library not installed or GOOGLE_API_KEY not set. Please run: pip install openai"
-            )
+            raise RuntimeError("OpenAI library not installed. Please run: pip install openai")
         if not isinstance(options, GeminiLayoutOptions):
             raise TypeError("Incorrect options type provided for Gemini model loading.")
-        # Simply return the model name, client is created in detect()
+        # Model loading is deferred to detect() based on whether a client is provided
         return options.model_name
     def detect(self, image: Image.Image, options: BaseLayoutOptions) -> List[Dict[str, Any]]:
         """Detect layout elements in an image using Gemini via OpenAI library."""
         if not self.is_available():
-            raise RuntimeError("OpenAI library not installed or GOOGLE_API_KEY not set.")
+            # The is_available check now only confirms library presence
+            raise RuntimeError("OpenAI library not installed. Please run: pip install openai")
         # Ensure options are the correct type
-        if not isinstance(options, GeminiLayoutOptions):
+        final_options: GeminiLayoutOptions
+        if isinstance(options, GeminiLayoutOptions):
+            final_options = options
+        else:
+            # If base options are passed, try to convert, keeping extra_args
+            # Note: This won't transfer a 'client' if it was somehow attached to BaseLayoutOptions
             self.logger.warning(
-                "Received BaseLayoutOptions, expected GeminiLayoutOptions. Using defaults."
+                "Received BaseLayoutOptions, expected GeminiLayoutOptions. Converting and using defaults."
             )
-            options = GeminiLayoutOptions(
+            final_options = GeminiLayoutOptions(
                 confidence=options.confidence,
                 classes=options.classes,
                 exclude_classes=options.exclude_classes,
-                device=options.device,
+                device=options.device,  # device is not used by Gemini detector currently
                 extra_args=options.extra_args,
+                # client will be None here, forcing default client creation below
             )
-        model_name = self._get_model(options)
-        api_key = os.environ.get("GOOGLE_API_KEY")
+        model_name = self._get_model(final_options)
         detections = []
         try:
-            # --- 1. Initialize OpenAI Client for Gemini ---
-            client = OpenAI(api_key=api_key, base_url=self.GEMINI_BASE_URL)
+            # --- 1. Initialize OpenAI Client ---
+            client: Optional[OpenAI] = None
+            # Use the provided client instance
+            if hasattr(final_options.client, "beta") and hasattr(
+                final_options.client.beta.chat.completions, "parse"
+            ):
+                client = final_options.client
+                logger.debug("Using provided client instance.")
+            else:
+                logger.error(
+                    "Provided client does not seem compatible (missing beta.chat.completions.parse)."
+                )
+                raise TypeError(
+                    "Provided client is not compatible with the expected OpenAI interface."
+                )
+            if not client:
+                # This should not happen if logic above is correct, but as a safeguard
+                raise RuntimeError("Failed to obtain a valid client for Gemini detection.")
             # --- 2. Prepare Input for OpenAI API ---
-            if not options.classes:
+            if not final_options.classes:
                 logger.error("Gemini layout detection requires a list of classes to find.")
                 return []
@@ -145,15 +158,13 @@ class GeminiLayoutDetector(LayoutDetector):
             img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
             image_url = f"data:image/png;base64,{img_base64}"
-            # Construct the prompt text
-            class_list_str = ", ".join(f"`{c}`" for c in options.classes)
+            class_list_str = ", ".join(f"`{c}`" for c in final_options.classes)
             prompt_text = (
                 f"Analyze the provided image of a document page ({width}x{height}). "
                 f"Identify all regions corresponding to the following types: {class_list_str}. "
-                f"Return ONLY the structured data requested."
+                f"Return ONLY the structured data requested as formatted JSON."
             )
-            # Prepare messages for chat completions endpoint
             messages = [
                 {
                     "role": "user",
@@ -167,27 +178,26 @@ class GeminiLayoutDetector(LayoutDetector):
                 }
             ]
-            # --- 3. Call OpenAI API using .parse for structured output ---
             logger.debug(
-                f"Running Gemini detection via OpenAI lib (Model: {model_name}). Asking for classes: {options.classes}"
+                f"Running Gemini detection via OpenAI lib (Model: {model_name}). Asking for classes: {final_options.classes}"
             )
-            # Extract relevant generation parameters from extra_args if provided
-            # Mapping common names: temperature, top_p, max_tokens
             completion_kwargs = {
-                "temperature": options.extra_args.get("temperature", 0.2),  # Default to low temp
-                "top_p": options.extra_args.get("top_p"),
-                "max_tokens": options.extra_args.get(
-                    "max_tokens", 4096
-                ),  # Map from max_output_tokens
+                "temperature": final_options.extra_args.get(
+                    "temperature", 0.0
+                ),  # Default to low temp
+                "max_tokens": final_options.extra_args.get("max_tokens", 4096),
             }
-            # Filter out None values
             completion_kwargs = {k: v for k, v in completion_kwargs.items() if v is not None}
+            class ImageContents(BaseModel):
+                regions: List[DetectedRegion]
             completion: ChatCompletion = client.beta.chat.completions.parse(
                 model=model_name,
                 messages=messages,
-                response_format=List[DetectedRegion],  # Pass the Pydantic model list
+                response_format=ImageContents,
                 **completion_kwargs,
             )
@@ -199,7 +209,7 @@ class GeminiLayoutDetector(LayoutDetector):
                 return []
             # Get the parsed Pydantic objects
-            parsed_results = completion.choices[0].message.parsed
+            parsed_results = completion.choices[0].message.parsed.regions
             if not parsed_results or not isinstance(parsed_results, list):
                 logger.error(
                     f"Gemini response (via OpenAI lib) did not contain a valid list of parsed regions. Found: {type(parsed_results)}"
@@ -207,10 +217,10 @@ class GeminiLayoutDetector(LayoutDetector):
                 return []
             # --- 5. Convert to Detections & Filter ---
-            normalized_classes_req = {self._normalize_class_name(c) for c in options.classes}
+            normalized_classes_req = {self._normalize_class_name(c) for c in final_options.classes}
             normalized_classes_excl = (
-                {self._normalize_class_name(c) for c in options.exclude_classes}
-                if options.exclude_classes
+                {self._normalize_class_name(c) for c in final_options.exclude_classes}
+                if final_options.exclude_classes
                 else set()
             )
@@ -242,9 +252,9 @@ class GeminiLayoutDetector(LayoutDetector):
                     continue
                 # Check against base confidence threshold from options
-                if confidence_score < options.confidence:
+                if confidence_score < final_options.confidence:
                     logger.debug(
-                        f"Skipping item with confidence {confidence_score:.3f} below threshold {options.confidence}."
+                        f"Skipping item with confidence {confidence_score:.3f} below threshold {final_options.confidence}."
                     )
                     continue

natural_pdf/analyzers/layout/layout_analyzer.py CHANGED Viewed

@@ -7,6 +7,7 @@ from PIL import Image
 from natural_pdf.analyzers.layout.layout_manager import LayoutManager
 from natural_pdf.analyzers.layout.layout_options import (
     BaseLayoutOptions,
+    GeminiLayoutOptions,
     LayoutOptions,
     TATRLayoutOptions,
 )
@@ -82,10 +83,10 @@ class LayoutAnalyzer:
             f"  Rendering page {self._page.number} to image for initial layout detection..."
         )
         try:
-            layout_scale = getattr(self._page._parent, "_config", {}).get("layout_image_scale", 1.5)
+            layout_scale = getattr(self._page._parent, "_config", {}).get("layout_image_scale", 1.0)
             layout_resolution = layout_scale * 72
             std_res_page_image = self._page.to_image(
-                resolution=layout_resolution, include_highlights=False
+                resolution=layout_resolution, include_highlights=False, scale=1.0
             )
             if not std_res_page_image:
                 raise ValueError("Initial page rendering returned None")
@@ -110,12 +111,11 @@ class LayoutAnalyzer:
         final_options: BaseLayoutOptions
         if options is not None:
-            # User provided a complete options object, use it directly
             logger.debug("Using user-provided options object.")
             final_options = copy.deepcopy(options)  # Copy to avoid modifying original user object
             if kwargs:
                 logger.warning(
-                    f"Ignoring kwargs {list(kwargs.keys())} because a full options object was provided."
+                    f"Ignoring simple mode keyword arguments {list(kwargs.keys())} because a full options object was provided."
                 )
             # Infer engine from options type if engine arg wasn't provided
             if engine is None:
@@ -145,16 +145,39 @@ class LayoutAnalyzer:
             # Get base defaults
             base_defaults = BaseLayoutOptions()
+            # Separate client from other kwargs
+            client_instance = kwargs.pop("client", None)  # Get client, remove from kwargs
+            # Separate model_name if provided for Gemini
+            model_name_kwarg = None
+            if issubclass(options_class, GeminiLayoutOptions):
+                model_name_kwarg = kwargs.pop("model_name", None)
             # Prepare args for constructor, prioritizing explicit args over defaults
             constructor_args = {
                 "confidence": confidence if confidence is not None else base_defaults.confidence,
                 "classes": classes,  # Pass None if not provided
                 "exclude_classes": exclude_classes,  # Pass None if not provided
                 "device": device if device is not None else base_defaults.device,
-                "extra_args": kwargs,  # Pass other kwargs here
+                # Pass client explicitly if constructing Gemini options
+                # Note: We check issubclass *before* calling constructor
+                **(
+                    {"client": client_instance}
+                    if client_instance and issubclass(options_class, GeminiLayoutOptions)
+                    else {}
+                ),
+                # Pass model_name explicitly if constructing Gemini options and it was provided
+                **(
+                    {"model_name": model_name_kwarg}
+                    if model_name_kwarg and issubclass(options_class, GeminiLayoutOptions)
+                    else {}
+                ),
+                "extra_args": kwargs,  # Pass REMAINING kwargs here
             }
             # Remove None values unless they are valid defaults (like classes=None)
             # We can pass all to the dataclass constructor; it handles defaults
+            # **Filter constructor_args to remove None values that aren't defaults?**
+            # For simplicity, let dataclass handle it for now.
             try:
                 final_options = options_class(**constructor_args)
@@ -167,24 +190,30 @@ class LayoutAnalyzer:
                 # Re-raise for now, indicates programming error or invalid kwarg.
                 raise e
-        # --- Add Internal Context to extra_args (ALWAYS) ---
+        # --- Add Internal Context to extra_args (Applies to the final_options object) ---
         if not hasattr(final_options, "extra_args") or final_options.extra_args is None:
+            # Ensure extra_args exists, potentially overwriting if needed
+            final_options.extra_args = {}
+        elif not isinstance(final_options.extra_args, dict):
+            logger.warning(
+                f"final_options.extra_args was not a dict ({type(final_options.extra_args)}), replacing with internal context."
+            )
             final_options.extra_args = {}
         final_options.extra_args["_page_ref"] = self._page
         final_options.extra_args["_img_scale_x"] = img_scale_x
         final_options.extra_args["_img_scale_y"] = img_scale_y
         logger.debug(
-            f"Added internal context to final_options.extra_args: {final_options.extra_args}"
+            f"Added/updated internal context in final_options.extra_args: {final_options.extra_args}"
         )
-        # --- Call Layout Manager with the Final Options ---
+        # --- Call Layout Manager (ALWAYS with options object) ---
         logger.debug(f"Calling Layout Manager with final options object.")
         try:
-            # Pass only image and the constructed options object
+            # ALWAYS pass the constructed/modified options object
             detections = self._layout_manager.analyze_layout(
                 image=std_res_page_image,
-                options=final_options,
-                # No engine, confidence, classes etc. passed here directly
+                options=final_options,  # Pass the final object with internal context
             )
             logger.info(f"  Layout Manager returned {len(detections)} detections.")
         # Specifically let errors about unknown/unavailable engines propagate

natural_pdf/analyzers/layout/layout_manager.py CHANGED Viewed

@@ -96,9 +96,6 @@ class LayoutManager:
             "options_class": GeminiLayoutOptions,
         }
-    # Define the limited set of kwargs allowed for the simple analyze_layout call
-    SIMPLE_MODE_ALLOWED_KWARGS = {"engine", "confidence", "classes", "exclude_classes", "device"}
     def __init__(self):
         """Initializes the Layout Manager."""
         # Cache for detector instances (different from model cache inside detector)
@@ -145,109 +142,54 @@ class LayoutManager:
     def analyze_layout(
         self,
         image: Image.Image,
-        engine: Optional[str] = None,  # Default engine handled below
-        options: Optional[LayoutOptions] = None,
-        **kwargs,
+        options: LayoutOptions,
     ) -> List[Dict[str, Any]]:
         """
-        Analyzes layout of a single image using simple args or an options object.
+        Analyzes layout of a single image using a specific options object.
         Args:
             image: The PIL Image to analyze.
-            engine: Name of the engine (e.g., 'yolo', 'tatr'). Ignored if 'options' provided.
-                    Defaults to the first available engine if None.
-            options: Specific LayoutOptions object for advanced configuration.
-            **kwargs: For simple mode, accepts: 'confidence', 'classes',
-                      'exclude_classes', 'device'.
+            options: Specific LayoutOptions object containing configuration and context.
+                     This object MUST be provided.
         Returns:
             A list of standardized detection dictionaries.
         """
-        final_options: BaseLayoutOptions
-        selected_engine_name: str
-        if not isinstance(image, Image.Image):
-            raise TypeError("Input 'image' must be a PIL Image.")
-        available_engines = self.get_available_engines()
-        if not available_engines:
-            raise RuntimeError("No layout engines are available. Please check dependencies.")
-        # Determine default engine if not specified
-        default_engine = engine if engine else available_engines[0]
-        # --- Determine Options and Engine ---
-        if options is not None:
-            # Advanced Mode: An options object was provided directly (or constructed by LayoutAnalyzer)
-            # Use this object directly, do not deep copy or reconstruct.
-            logger.debug(f"LayoutManager: Using provided options object: {type(options).__name__}")
-            final_options = options  # Use the provided object directly
-            found_engine = False
-            for name, registry_entry in self.ENGINE_REGISTRY.items():
-                if isinstance(options, registry_entry["options_class"]):
-                    selected_engine_name = name
-                    found_engine = True
-                    break
-            if not found_engine:
-                raise TypeError(
-                    f"Provided options object type '{type(options).__name__}' does not match any registered layout engine options."
-                )
-            # Ignore simple kwargs if options object is present
-            if kwargs:
-                logger.warning(
-                    f"Keyword arguments {list(kwargs.keys())} were provided alongside an 'options' object and will be ignored."
-                )
-        else:
-            # Simple Mode: No options object provided initially.
-            # Determine engine from kwargs or default, then construct options.
-            selected_engine_name = default_engine.lower()
-            logger.debug(
-                f"LayoutManager: Using simple mode. Engine: '{selected_engine_name}', kwargs: {kwargs}"
+        selected_engine_name: Optional[str] = None
+        found_engine = False
+        for name, registry_entry in self.ENGINE_REGISTRY.items():
+            if isinstance(options, registry_entry["options_class"]):
+                selected_engine_name = name
+                found_engine = True
+                break
+        if not found_engine or selected_engine_name is None:
+            available_options_types = [
+                reg["options_class"].__name__ for reg in self.ENGINE_REGISTRY.values()
+            ]
+            raise TypeError(
+                f"Provided options object type '{type(options).__name__}' does not match any registered layout engine options: {available_options_types}"
             )
-            if selected_engine_name not in self.ENGINE_REGISTRY:
-                raise ValueError(
-                    f"Unknown or unavailable layout engine: '{selected_engine_name}'. Available: {available_engines}"
-                )
-            unexpected_kwargs = set(kwargs.keys()) - self.SIMPLE_MODE_ALLOWED_KWARGS
-            if unexpected_kwargs:
-                raise TypeError(
-                    f"Got unexpected keyword arguments in simple mode: {list(unexpected_kwargs)}. Use the 'options' parameter for detailed configuration."
-                )
-            options_class = self.ENGINE_REGISTRY[selected_engine_name]["options_class"]
-            # Use BaseLayoutOptions defaults unless overridden by kwargs
-            base_defaults = BaseLayoutOptions()
-            simple_args = {
-                "confidence": kwargs.get("confidence", base_defaults.confidence),
-                "classes": kwargs.get("classes"),
-                "exclude_classes": kwargs.get("exclude_classes"),
-                "device": kwargs.get("device", base_defaults.device),
-            }
-            # Filter out None values before passing to constructor
-            simple_args_filtered = {k: v for k, v in simple_args.items() if v is not None}
-            final_options = options_class(**simple_args_filtered)
-            logger.debug(f"LayoutManager: Constructed options for simple mode: {final_options}")
-        # --- Get Engine Instance and Process ---
         try:
             engine_instance = self._get_engine_instance(selected_engine_name)
             logger.info(f"Analyzing layout with engine '{selected_engine_name}'...")
-            # Call the engine's detect method
-            detections = engine_instance.detect(image, final_options)
+            detections = engine_instance.detect(image, options)  # Pass options directly
             logger.info(f"Layout analysis complete. Found {len(detections)} regions.")
             return detections
         except (ImportError, RuntimeError, ValueError, TypeError) as e:
-            logger.error(
-                f"Layout analysis failed for engine '{selected_engine_name}': {e}", exc_info=True
-            )
+            # Add engine name to error message if possible
+            engine_context = f" for engine '{selected_engine_name}'" if selected_engine_name else ""
+            logger.error(f"Layout analysis failed{engine_context}: {e}", exc_info=True)
             raise  # Re-raise expected errors
         except Exception as e:
-            logger.error(f"An unexpected error occurred during layout analysis: {e}", exc_info=True)
+            engine_context = f" for engine '{selected_engine_name}'" if selected_engine_name else ""
+            logger.error(
+                f"An unexpected error occurred during layout analysis{engine_context}: {e}",
+                exc_info=True,
+            )
             raise  # Re-raise unexpected errors
     def get_available_engines(self) -> List[str]:

natural_pdf/analyzers/layout/layout_options.py CHANGED Viewed

@@ -43,6 +43,12 @@ class TATRLayoutOptions(BaseLayoutOptions):
     max_structure_size: int = 1000
     # Whether to create cell regions (can be slow)
     create_cells: bool = True
+    # Image enhancement options
+    enhance_contrast: float = 1.5  # Contrast enhancement factor (1.0 = no change)
+    # Special thresholds for specific elements
+    column_threshold: Optional[float] = (
+        None  # Lower threshold for columns (default: confidence * 0.8)
+    )
 # --- Paddle Specific Options ---
@@ -86,6 +92,7 @@ class GeminiLayoutOptions(BaseLayoutOptions):
     """Options specific to Gemini-based layout detection (using OpenAI compatibility)."""
     model_name: str = "gemini-2.0-flash"
+    client: Optional[Any] = None  # Allow passing a pre-configured client
     # Removed: prompt_template, temperature, top_p, max_output_tokens
     # These are typically passed directly to the chat completion call or via extra_args

natural-pdf 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

natural-pdf 0.1.8py3-none-any.whl → 0.1.10py3-none-any.whl