PyPI - docling - Versions diffs - 2.11.0__tar.gz → 2.12.0__tar.gz - Mend

docling 2.11.0tar.gz → 2.12.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{docling-2.11.0 → docling-2.12.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling
-Version: 2.11.0
+Version: 2.12.0
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Home-page: https://github.com/DS4SD/docling
 License: MIT
@@ -27,7 +27,7 @@ Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
 Requires-Dist: certifi (>=2024.7.4)
 Requires-Dist: deepsearch-glm (>=1.0.0,<2.0.0)
 Requires-Dist: docling-core[chunking] (>=2.9.0,<3.0.0)
-Requires-Dist: docling-ibm-models (>=2.0.6,<3.0.0)
+Requires-Dist: docling-ibm-models (>=3.1.0,<4.0.0)
 Requires-Dist: docling-parse (>=3.0.0,<4.0.0)
 Requires-Dist: easyocr (>=1.7,<2.0)
 Requires-Dist: filetype (>=1.2.0,<2.0.0)

{docling-2.11.0 → docling-2.12.0}/docling/cli/main.py RENAMED Viewed

@@ -26,6 +26,8 @@ from docling.datamodel.base_models import (
 )
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
+    AcceleratorDevice,
+    AcceleratorOptions,
     EasyOcrOptions,
     OcrEngine,
     OcrMacOptions,
@@ -257,6 +259,10 @@ def convert(
             help="The timeout for processing each document, in seconds.",
         ),
     ] = None,
+    num_threads: Annotated[int, typer.Option(..., help="Number of threads")] = 4,
+    device: Annotated[
+        AcceleratorDevice, typer.Option(..., help="Accelerator device")
+    ] = AcceleratorDevice.AUTO,
 ):
     if verbose == 0:
         logging.basicConfig(level=logging.WARNING)
@@ -336,7 +342,9 @@ def convert(
         if ocr_lang_list is not None:
             ocr_options.lang = ocr_lang_list
+        accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device)
         pipeline_options = PdfPipelineOptions(
+            accelerator_options=accelerator_options,
             do_ocr=ocr,
             ocr_options=ocr_options,
             do_table_structure=True,

{docling-2.11.0 → docling-2.12.0}/docling/datamodel/pipeline_options.py RENAMED Viewed

@@ -1,8 +1,66 @@
+import logging
+import os
+import warnings
 from enum import Enum
 from pathlib import Path
-from typing import List, Literal, Optional, Union
+from typing import Annotated, Any, Dict, List, Literal, Optional, Tuple, Type, Union
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
+from pydantic_settings import (
+    BaseSettings,
+    PydanticBaseSettingsSource,
+    SettingsConfigDict,
+)
+from typing_extensions import deprecated
+_log = logging.getLogger(__name__)
+class AcceleratorDevice(str, Enum):
+    """Devices to run model inference"""
+    AUTO = "auto"
+    CPU = "cpu"
+    CUDA = "cuda"
+    MPS = "mps"
+class AcceleratorOptions(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_prefix="DOCLING_", env_nested_delimiter="_", populate_by_name=True
+    )
+    num_threads: int = 4
+    device: AcceleratorDevice = AcceleratorDevice.AUTO
+    @model_validator(mode="before")
+    @classmethod
+    def check_alternative_envvars(cls, data: Any) -> Any:
+        r"""
+        Set num_threads from the "alternative" envvar OMP_NUM_THREADS.
+        The alternative envvar is used only if it is valid and the regular envvar is not set.
+        Notice: The standard pydantic settings mechanism with parameter "aliases" does not provide
+        the same functionality. In case the alias envvar is set and the user tries to override the
+        parameter in settings initialization, Pydantic treats the parameter provided in __init__()
+        as an extra input instead of simply overwriting the evvar value for that parameter.
+        """
+        if isinstance(data, dict):
+            input_num_threads = data.get("num_threads")
+            # Check if to set the num_threads from the alternative envvar
+            if input_num_threads is None:
+                docling_num_threads = os.getenv("DOCLING_NUM_THREADS")
+                omp_num_threads = os.getenv("OMP_NUM_THREADS")
+                if docling_num_threads is None and omp_num_threads is not None:
+                    try:
+                        data["num_threads"] = int(omp_num_threads)
+                    except ValueError:
+                        _log.error(
+                            "Ignoring misformatted envvar OMP_NUM_THREADS '%s'",
+                            omp_num_threads,
+                        )
+        return data
 class TableFormerMode(str, Enum):
@@ -78,9 +136,11 @@ class EasyOcrOptions(OcrOptions):
     kind: Literal["easyocr"] = "easyocr"
     lang: List[str] = ["fr", "de", "es", "en"]
-    use_gpu: bool = True  # same default as easyocr.Reader
+    use_gpu: Optional[bool] = None
     model_storage_directory: Optional[str] = None
-    download_enabled: bool = True  # same default as easyocr.Reader
+    download_enabled: bool = True
     model_config = ConfigDict(
         extra="forbid",
@@ -153,6 +213,7 @@ class PipelineOptions(BaseModel):
         True  # This default will be set to False on a future version of docling
     )
     document_timeout: Optional[float] = None
+    accelerator_options: AcceleratorOptions = AcceleratorOptions()
 class PdfPipelineOptions(PipelineOptions):

{docling-2.11.0 → docling-2.12.0}/docling/models/easyocr_model.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import logging
+import warnings
 from typing import Iterable
 import numpy
@@ -7,16 +8,26 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling.datamodel.base_models import Cell, OcrCell, Page
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import EasyOcrOptions
+from docling.datamodel.pipeline_options import (
+    AcceleratorDevice,
+    AcceleratorOptions,
+    EasyOcrOptions,
+)
 from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
+from docling.utils.accelerator_utils import decide_device
 from docling.utils.profiling import TimeRecorder
 _log = logging.getLogger(__name__)
 class EasyOcrModel(BaseOcrModel):
-    def __init__(self, enabled: bool, options: EasyOcrOptions):
+    def __init__(
+        self,
+        enabled: bool,
+        options: EasyOcrOptions,
+        accelerator_options: AcceleratorOptions,
+    ):
         super().__init__(enabled=enabled, options=options)
         self.options: EasyOcrOptions
@@ -31,11 +42,32 @@ class EasyOcrModel(BaseOcrModel):
                     "Alternatively, Docling has support for other OCR engines. See the documentation."
                 )
+            if self.options.use_gpu is None:
+                device = decide_device(accelerator_options.device)
+                # Enable easyocr GPU if running on CUDA, MPS
+                use_gpu = any(
+                    [
+                        device.startswith(x)
+                        for x in [
+                            AcceleratorDevice.CUDA.value,
+                            AcceleratorDevice.MPS.value,
+                        ]
+                    ]
+                )
+            else:
+                warnings.warn(
+                    "Deprecated field. Better to set the `accelerator_options.device` in `pipeline_options`. "
+                    "When `use_gpu and accelerator_options.device == AcceleratorDevice.CUDA` the GPU is used "
+                    "to run EasyOCR. Otherwise, EasyOCR runs in CPU."
+                )
+                use_gpu = self.options.use_gpu
             self.reader = easyocr.Reader(
                 lang_list=self.options.lang,
-                gpu=self.options.use_gpu,
+                gpu=use_gpu,
                 model_storage_directory=self.options.model_storage_directory,
                 download_enabled=self.options.download_enabled,
+                verbose=False,
             )
     def __call__(

{docling-2.11.0 → docling-2.12.0}/docling/models/layout_model.py RENAMED Viewed

@@ -9,6 +9,7 @@ from docling_core.types.doc import CoordOrigin, DocItemLabel
 from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
 from PIL import ImageDraw
+import docling.utils.layout_utils as lu
 from docling.datamodel.base_models import (
     BoundingBox,
     Cell,
@@ -17,9 +18,10 @@ from docling.datamodel.base_models import (
     Page,
 )
 from docling.datamodel.document import ConversionResult
+from docling.datamodel.pipeline_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
-from docling.utils import layout_utils as lu
+from docling.utils.accelerator_utils import decide_device
 from docling.utils.profiling import TimeRecorder
 _log = logging.getLogger(__name__)
@@ -46,8 +48,16 @@ class LayoutModel(BasePageModel):
     FIGURE_LABEL = DocItemLabel.PICTURE
     FORMULA_LABEL = DocItemLabel.FORMULA
-    def __init__(self, artifacts_path: Path):
-        self.layout_predictor = LayoutPredictor(artifacts_path)  # TODO temporary
+    def __init__(self, artifacts_path: Path, accelerator_options: AcceleratorOptions):
+        device = decide_device(accelerator_options.device)
+        self.layout_predictor = LayoutPredictor(
+            artifact_path=str(artifacts_path),
+            device=device,
+            num_threads=accelerator_options.num_threads,
+            base_threshold=0.6,
+            blacklist_classes={"Form", "Key-Value Region"},
+        )
     def postprocess(self, clusters_in: List[Cluster], cells: List[Cell], page_height):
         MIN_INTERSECTION = 0.2

{docling-2.11.0 → docling-2.12.0}/docling/models/rapid_ocr_model.py RENAMED Viewed

@@ -6,16 +6,26 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling.datamodel.base_models import OcrCell, Page
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import RapidOcrOptions
+from docling.datamodel.pipeline_options import (
+    AcceleratorDevice,
+    AcceleratorOptions,
+    RapidOcrOptions,
+)
 from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
+from docling.utils.accelerator_utils import decide_device
 from docling.utils.profiling import TimeRecorder
 _log = logging.getLogger(__name__)
 class RapidOcrModel(BaseOcrModel):
-    def __init__(self, enabled: bool, options: RapidOcrOptions):
+    def __init__(
+        self,
+        enabled: bool,
+        options: RapidOcrOptions,
+        accelerator_options: AcceleratorOptions,
+    ):
         super().__init__(enabled=enabled, options=options)
         self.options: RapidOcrOptions
@@ -30,52 +40,21 @@ class RapidOcrModel(BaseOcrModel):
                     "Alternatively, Docling has support for other OCR engines. See the documentation."
                 )
-            # This configuration option will be revamped while introducing device settings for all models.
-            # For the moment we will default to auto and let onnx-runtime pick the best.
-            cls_use_cuda = True
-            rec_use_cuda = True
-            det_use_cuda = True
-            det_use_dml = True
-            cls_use_dml = True
-            rec_use_dml = True
-            # # Same as Defaults in RapidOCR
-            # cls_use_cuda = False
-            # rec_use_cuda = False
-            # det_use_cuda = False
-            # det_use_dml = False
-            # cls_use_dml = False
-            # rec_use_dml = False
-            # # If we set everything to true onnx-runtime would automatically choose the fastest accelerator
-            # if self.options.device == self.options.Device.AUTO:
-            #     cls_use_cuda = True
-            #     rec_use_cuda = True
-            #     det_use_cuda = True
-            #     det_use_dml = True
-            #     cls_use_dml = True
-            #     rec_use_dml = True
-            # # If we set use_cuda to true onnx would use the cuda device available in runtime if no cuda device is available it would run on CPU.
-            # elif self.options.device == self.options.Device.CUDA:
-            #     cls_use_cuda = True
-            #     rec_use_cuda = True
-            #     det_use_cuda = True
-            # # If we set use_dml to true onnx would use the dml device available in runtime if no dml device is available it would work on CPU.
-            # elif self.options.device == self.options.Device.DIRECTML:
-            #     det_use_dml = True
-            #     cls_use_dml = True
-            #     rec_use_dml = True
+            # Decide the accelerator devices
+            device = decide_device(accelerator_options.device)
+            use_cuda = str(AcceleratorDevice.CUDA.value).lower() in device
+            use_dml = accelerator_options.device == AcceleratorDevice.AUTO
+            intra_op_num_threads = accelerator_options.num_threads
             self.reader = RapidOCR(
                 text_score=self.options.text_score,
-                cls_use_cuda=cls_use_cuda,
-                rec_use_cuda=rec_use_cuda,
-                det_use_cuda=det_use_cuda,
-                det_use_dml=det_use_dml,
-                cls_use_dml=cls_use_dml,
-                rec_use_dml=rec_use_dml,
+                cls_use_cuda=use_cuda,
+                rec_use_cuda=use_cuda,
+                det_use_cuda=use_cuda,
+                det_use_dml=use_dml,
+                cls_use_dml=use_dml,
+                rec_use_dml=use_dml,
+                intra_op_num_threads=intra_op_num_threads,
                 print_verbose=self.options.print_verbose,
                 det_model_path=self.options.det_model_path,
                 cls_model_path=self.options.cls_model_path,

{docling-2.11.0 → docling-2.12.0}/docling/models/table_structure_model.py RENAMED Viewed

@@ -9,15 +9,25 @@ from PIL import ImageDraw
 from docling.datamodel.base_models import Page, Table, TableStructurePrediction
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions
+from docling.datamodel.pipeline_options import (
+    AcceleratorDevice,
+    AcceleratorOptions,
+    TableFormerMode,
+    TableStructureOptions,
+)
 from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
+from docling.utils.accelerator_utils import decide_device
 from docling.utils.profiling import TimeRecorder
 class TableStructureModel(BasePageModel):
     def __init__(
-        self, enabled: bool, artifacts_path: Path, options: TableStructureOptions
+        self,
+        enabled: bool,
+        artifacts_path: Path,
+        options: TableStructureOptions,
+        accelerator_options: AcceleratorOptions,
     ):
         self.options = options
         self.do_cell_matching = self.options.do_cell_matching
@@ -26,16 +36,26 @@ class TableStructureModel(BasePageModel):
         self.enabled = enabled
         if self.enabled:
             if self.mode == TableFormerMode.ACCURATE:
-                artifacts_path = artifacts_path / "fat"
+                artifacts_path = artifacts_path / "accurate"
+            else:
+                artifacts_path = artifacts_path / "fast"
             # Third Party
             import docling_ibm_models.tableformer.common as c
+            device = decide_device(accelerator_options.device)
+            # Disable MPS here, until we know why it makes things slower.
+            if device == AcceleratorDevice.MPS.value:
+                device = AcceleratorDevice.CPU.value
             self.tm_config = c.read_config(f"{artifacts_path}/tm_config.json")
             self.tm_config["model"]["save_dir"] = artifacts_path
             self.tm_model_type = self.tm_config["model"]["type"]
-            self.tf_predictor = TFPredictor(self.tm_config)
+            self.tf_predictor = TFPredictor(
+                self.tm_config, device, accelerator_options.num_threads
+            )
             self.scale = 2.0  # Scale up table input images to 144 dpi
     def draw_table_and_cells(

{docling-2.11.0 → docling-2.12.0}/docling/pipeline/standard_pdf_pipeline.py RENAMED Viewed

@@ -38,7 +38,7 @@ _log = logging.getLogger(__name__)
 class StandardPdfPipeline(PaginatedPipeline):
-    _layout_model_path = "model_artifacts/layout/beehive_v0.0.5_pt"
+    _layout_model_path = "model_artifacts/layout"
     _table_model_path = "model_artifacts/tableformer"
     def __init__(self, pipeline_options: PdfPipelineOptions):
@@ -75,7 +75,8 @@ class StandardPdfPipeline(PaginatedPipeline):
             # Layout model
             LayoutModel(
                 artifacts_path=self.artifacts_path
-                / StandardPdfPipeline._layout_model_path
+                / StandardPdfPipeline._layout_model_path,
+                accelerator_options=pipeline_options.accelerator_options,
             ),
             # Table structure model
             TableStructureModel(
@@ -83,6 +84,7 @@ class StandardPdfPipeline(PaginatedPipeline):
                 artifacts_path=self.artifacts_path
                 / StandardPdfPipeline._table_model_path,
                 options=pipeline_options.table_structure_options,
+                accelerator_options=pipeline_options.accelerator_options,
             ),
             # Page assemble
             PageAssembleModel(options=PageAssembleOptions(keep_images=keep_images)),
@@ -104,7 +106,7 @@ class StandardPdfPipeline(PaginatedPipeline):
             repo_id="ds4sd/docling-models",
             force_download=force,
             local_dir=local_dir,
-            revision="v2.0.1",
+            revision="v2.1.0",
         )
         return Path(download_path)
@@ -114,6 +116,7 @@ class StandardPdfPipeline(PaginatedPipeline):
             return EasyOcrModel(
                 enabled=self.pipeline_options.do_ocr,
                 options=self.pipeline_options.ocr_options,
+                accelerator_options=self.pipeline_options.accelerator_options,
             )
         elif isinstance(self.pipeline_options.ocr_options, TesseractCliOcrOptions):
             return TesseractOcrCliModel(
@@ -129,6 +132,7 @@ class StandardPdfPipeline(PaginatedPipeline):
             return RapidOcrModel(
                 enabled=self.pipeline_options.do_ocr,
                 options=self.pipeline_options.ocr_options,
+                accelerator_options=self.pipeline_options.accelerator_options,
             )
         elif isinstance(self.pipeline_options.ocr_options, OcrMacOptions):
             if "darwin" != sys.platform:

docling-2.12.0/docling/utils/accelerator_utils.py ADDED Viewed

@@ -0,0 +1,42 @@
+import logging
+import torch
+from docling.datamodel.pipeline_options import AcceleratorDevice
+_log = logging.getLogger(__name__)
+def decide_device(accelerator_device: AcceleratorDevice) -> str:
+    r"""
+    Resolve the device based on the acceleration options and the available devices in the system
+    Rules:
+    1. AUTO: Check for the best available device on the system.
+    2. User-defined: Check if the device actually exists, otherwise fall-back to CPU
+    """
+    cuda_index = 0
+    device = "cpu"
+    has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
+    has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
+    if accelerator_device == AcceleratorDevice.AUTO:
+        if has_cuda:
+            device = f"cuda:{cuda_index}"
+        elif has_mps:
+            device = "mps"
+    else:
+        if accelerator_device == AcceleratorDevice.CUDA:
+            if has_cuda:
+                device = f"cuda:{cuda_index}"
+            else:
+                _log.warning("CUDA is not available in the system. Fall back to 'CPU'")
+        elif accelerator_device == AcceleratorDevice.MPS:
+            if has_mps:
+                device = "mps"
+            else:
+                _log.warning("MPS is not available in the system. Fall back to 'CPU'")
+    _log.info("Accelerator device: '%s'", device)
+    return device

{docling-2.11.0 → docling-2.12.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "docling"
-version = "2.11.0"  # DO NOT EDIT, updated automatically
+version = "2.12.0"  # DO NOT EDIT, updated automatically
 description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
 authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Panos Vagenas <pva@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
 license = "MIT"
@@ -27,8 +27,9 @@ packages = [{include = "docling"}]
 python = "^3.9"
 docling-core = { version = "^2.9.0", extras = ["chunking"] }
 pydantic = "^2.0.0"
-docling-ibm-models = "^2.0.6"
+docling-ibm-models = "^3.1.0"
 deepsearch-glm = "^1.0.0"
+docling-parse = "^3.0.0"
 filetype = "^1.2.0"
 pypdfium2 = "^4.30.0"
 pydantic-settings = "^2.3.0"
@@ -36,7 +37,6 @@ huggingface_hub = ">=0.23,<1"
 requests = "^2.32.3"
 easyocr = "^1.7"
 tesserocr = { version = "^2.7.1", optional = true }
-docling-parse = "^3.0.0"
 certifi = ">=2024.7.4"
 rtree = "^1.3.0"
 scipy = "^1.6.0"