PyPI - docling - Versions diffs - 2.39.0__tar.gz → 2.40.0__tar.gz - Mend

docling 2.39.0tar.gz → 2.40.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

{docling-2.39.0 → docling-2.40.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docling
-Version: 2.39.0
+Version: 2.40.0
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
 License-Expression: MIT
@@ -27,8 +27,8 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: pydantic<3.0.0,>=2.0.0
 Requires-Dist: docling-core[chunking]<3.0.0,>=2.39.0
-Requires-Dist: docling-ibm-models<4.0.0,>=3.4.4
 Requires-Dist: docling-parse<5.0.0,>=4.0.0
+Requires-Dist: docling-ibm-models<4,>=3.6.0
 Requires-Dist: filetype<2.0.0,>=1.2.0
 Requires-Dist: pypdfium2<5.0.0,>=4.30.0
 Requires-Dist: pydantic-settings<3.0.0,>=2.3.0
@@ -57,7 +57,7 @@ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrm
 Provides-Extra: vlm
 Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
 Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
-Requires-Dist: mlx-vlm>=0.1.22; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
+Requires-Dist: mlx-vlm<0.2,>=0.1.22; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
 Provides-Extra: rapidocr
 Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
 Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"

{docling-2.39.0 → docling-2.40.0}/docling/backend/docling_parse_v4_backend.py RENAMED Viewed

@@ -187,7 +187,17 @@ class DoclingParseV4DocumentBackend(PdfDocumentBackend):
     def unload(self):
         super().unload()
-        self.dp_doc.unload()
-        with pypdfium2_lock:
-            self._pdoc.close()
-        self._pdoc = None
+        # Unload docling-parse document first
+        if self.dp_doc is not None:
+            self.dp_doc.unload()
+            self.dp_doc = None
+        # Then close pypdfium2 document with proper locking
+        if self._pdoc is not None:
+            with pypdfium2_lock:
+                try:
+                    self._pdoc.close()
+                except Exception:
+                    # Ignore cleanup errors
+                    pass
+            self._pdoc = None

{docling-2.39.0 → docling-2.40.0}/docling/backend/msexcel_backend.py RENAMED Viewed

@@ -337,10 +337,17 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
         # Collect the data within the bounds
         data = []
         visited_cells: set[tuple[int, int]] = set()
-        for ri in range(start_row, max_row + 1):
-            for rj in range(start_col, max_col + 1):
-                cell = sheet.cell(row=ri + 1, column=rj + 1)  # 1-based indexing
+        for ri, row in enumerate(
+            sheet.iter_rows(
+                min_row=start_row + 1,  # start_row is 0-based but iter_rows is 1-based
+                max_row=max_row + 1,
+                min_col=start_col + 1,
+                max_col=max_col + 1,
+                values_only=False,
+            ),
+            start_row,
+        ):
+            for rj, cell in enumerate(row, start_col):
                 # Check if the cell belongs to a merged range
                 row_span = 1
                 col_span = 1
@@ -397,10 +404,16 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
         """
         max_row: int = start_row
-        while max_row < sheet.max_row - 1:
-            # Get the cell value or check if it is part of a merged cell
-            cell = sheet.cell(row=max_row + 2, column=start_col + 1)
+        for ri, (cell,) in enumerate(
+            sheet.iter_rows(
+                min_row=start_row + 2,
+                max_row=sheet.max_row,
+                min_col=start_col + 1,
+                max_col=start_col + 1,
+                values_only=False,
+            ),
+            start_row + 1,
+        ):
             # Check if the cell is part of a merged range
             merged_range = next(
                 (mr for mr in sheet.merged_cells.ranges if cell.coordinate in mr),
@@ -414,7 +427,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
             if merged_range:
                 max_row = max(max_row, merged_range.max_row - 1)
             else:
-                max_row += 1
+                max_row = ri
         return max_row
@@ -433,10 +446,16 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
         """
         max_col: int = start_col
-        while max_col < sheet.max_column - 1:
-            # Get the cell value or check if it is part of a merged cell
-            cell = sheet.cell(row=start_row + 1, column=max_col + 2)
+        for rj, (cell,) in enumerate(
+            sheet.iter_cols(
+                min_row=start_row + 1,
+                max_row=start_row + 1,
+                min_col=start_col + 2,
+                max_col=sheet.max_column,
+                values_only=False,
+            ),
+            start_col + 1,
+        ):
             # Check if the cell is part of a merged range
             merged_range = next(
                 (mr for mr in sheet.merged_cells.ranges if cell.coordinate in mr),
@@ -450,7 +469,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
             if merged_range:
                 max_col = max(max_col, merged_range.max_col - 1)
             else:
-                max_col += 1
+                max_col = rj
         return max_col

{docling-2.39.0 → docling-2.40.0}/docling/datamodel/pipeline_options.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import logging
+from datetime import datetime
 from enum import Enum
 from pathlib import Path
 from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
@@ -265,6 +266,12 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
     )
+class LayoutOptions(BaseModel):
+    """Options for layout processing."""
+    create_orphan_clusters: bool = True  # Whether to create clusters for orphaned cells
 class AsrPipelineOptions(PipelineOptions):
     asr_options: Union[InlineAsrOptions] = asr_model_specs.WHISPER_TINY
     artifacts_path: Optional[Union[Path, str]] = None
@@ -289,6 +296,7 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
     picture_description_options: PictureDescriptionBaseOptions = (
         smolvlm_picture_description
     )
+    layout_options: LayoutOptions = LayoutOptions()
     images_scale: float = 1.0
     generate_page_images: bool = False

{docling-2.39.0 → docling-2.40.0}/docling/models/base_ocr_model.py RENAMED Viewed

@@ -3,14 +3,13 @@ import logging
 from abc import abstractmethod
 from collections.abc import Iterable
 from pathlib import Path
-from typing import List, Optional, Type
+from typing import TYPE_CHECKING, List, Optional, Type
 import numpy as np
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling_core.types.doc.page import TextCell
 from PIL import Image, ImageDraw
 from rtree import index
-from scipy.ndimage import binary_dilation, find_objects, label
 from docling.datamodel.accelerator_options import AcceleratorOptions
 from docling.datamodel.base_models import Page
@@ -31,11 +30,16 @@ class BaseOcrModel(BasePageModel, BaseModelWithOptions):
         options: OcrOptions,
         accelerator_options: AcceleratorOptions,
     ):
+        # Make sure any delay/error from import occurs on ocr model init and not first use
+        from scipy.ndimage import binary_dilation, find_objects, label
         self.enabled = enabled
         self.options = options
     # Computes the optimum amount and coordinates of rectangles to OCR on a given page
     def get_ocr_rects(self, page: Page) -> List[BoundingBox]:
+        from scipy.ndimage import binary_dilation, find_objects, label
         BITMAP_COVERAGE_TRESHOLD = 0.75
         assert page.size is not None

{docling-2.39.0 → docling-2.40.0}/docling/models/layout_model.py RENAMED Viewed

@@ -7,12 +7,12 @@ from typing import Optional
 import numpy as np
 from docling_core.types.doc import DocItemLabel
-from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
 from PIL import Image
 from docling.datamodel.accelerator_options import AcceleratorOptions
 from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
 from docling.datamodel.document import ConversionResult
+from docling.datamodel.pipeline_options import LayoutOptions
 from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
 from docling.models.utils.hf_model_download import download_hf_model
@@ -49,8 +49,15 @@ class LayoutModel(BasePageModel):
     CONTAINER_LABELS = [DocItemLabel.FORM, DocItemLabel.KEY_VALUE_REGION]
     def __init__(
-        self, artifacts_path: Optional[Path], accelerator_options: AcceleratorOptions
+        self,
+        artifacts_path: Optional[Path],
+        accelerator_options: AcceleratorOptions,
+        options: LayoutOptions,
     ):
+        from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
+        self.options = options
         device = decide_device(accelerator_options.device)
         if artifacts_path is None:
@@ -176,7 +183,7 @@ class LayoutModel(BasePageModel):
                     # Apply postprocessing
                     processed_clusters, processed_cells = LayoutPostprocessor(
-                        page, clusters
+                        page, clusters, self.options
                     ).postprocess()
                     # Note: LayoutPostprocessor updates page.cells and page.parsed_page internally

{docling-2.39.0 → docling-2.40.0}/docling/models/picture_description_vlm_model.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import threading
 from collections.abc import Iterable
 from pathlib import Path
 from typing import Optional, Type, Union
@@ -15,6 +16,9 @@ from docling.models.utils.hf_model_download import (
 )
 from docling.utils.accelerator_utils import decide_device
+# Global lock for model initialization to prevent threading issues
+_model_init_lock = threading.Lock()
 class PictureDescriptionVlmModel(
     PictureDescriptionBaseModel, HuggingFaceModelDownloadMixin
@@ -57,17 +61,18 @@ class PictureDescriptionVlmModel(
                 )
             # Initialize processor and model
-            self.processor = AutoProcessor.from_pretrained(artifacts_path)
-            self.model = AutoModelForVision2Seq.from_pretrained(
-                artifacts_path,
-                torch_dtype=torch.bfloat16,
-                _attn_implementation=(
-                    "flash_attention_2"
-                    if self.device.startswith("cuda")
-                    and accelerator_options.cuda_use_flash_attention2
-                    else "eager"
-                ),
-            ).to(self.device)
+            with _model_init_lock:
+                self.processor = AutoProcessor.from_pretrained(artifacts_path)
+                self.model = AutoModelForVision2Seq.from_pretrained(
+                    artifacts_path,
+                    torch_dtype=torch.bfloat16,
+                    _attn_implementation=(
+                        "flash_attention_2"
+                        if self.device.startswith("cuda")
+                        and accelerator_options.cuda_use_flash_attention2
+                        else "eager"
+                    ),
+                ).to(self.device)
             self.provenance = f"{self.options.repo_id}"

docling-2.40.0/docling/models/plugins/defaults.py ADDED Viewed

@@ -0,0 +1,28 @@
+def ocr_engines():
+    from docling.models.easyocr_model import EasyOcrModel
+    from docling.models.ocr_mac_model import OcrMacModel
+    from docling.models.rapid_ocr_model import RapidOcrModel
+    from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
+    from docling.models.tesseract_ocr_model import TesseractOcrModel
+    return {
+        "ocr_engines": [
+            EasyOcrModel,
+            OcrMacModel,
+            RapidOcrModel,
+            TesseractOcrModel,
+            TesseractOcrCliModel,
+        ]
+    }
+def picture_description():
+    from docling.models.picture_description_api_model import PictureDescriptionApiModel
+    from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
+    return {
+        "picture_description": [
+            PictureDescriptionVlmModel,
+            PictureDescriptionApiModel,
+        ]
+    }

{docling-2.39.0 → docling-2.40.0}/docling/models/readingorder_model.py RENAMED Viewed

@@ -12,6 +12,9 @@ from docling_core.types.doc import (
     TableData,
 )
 from docling_core.types.doc.document import ContentLayer
+from docling_ibm_models.list_item_normalizer.list_marker_processor import (
+    ListItemMarkerProcessor,
+)
 from docling_ibm_models.reading_order.reading_order_rb import (
     PageElement as ReadingOrderPageElement,
     ReadingOrderPredictor,
@@ -40,6 +43,7 @@ class ReadingOrderModel:
     def __init__(self, options: ReadingOrderOptions):
         self.options = options
         self.ro_model = ReadingOrderPredictor()
+        self.list_item_processor = ListItemMarkerProcessor()
     def _assembled_to_readingorder_elements(
         self, conv_res: ConversionResult
@@ -92,7 +96,8 @@ class ReadingOrderModel:
             )
             if c_label == DocItemLabel.LIST_ITEM:
                 # TODO: Infer if this is a numbered or a bullet list item
-                doc.add_list_item(parent=doc_item, text=c_text, prov=c_prov)
+                l_item = doc.add_list_item(parent=doc_item, text=c_text, prov=c_prov)
+                self.list_item_processor.process_list_item(l_item)
             elif c_label == DocItemLabel.SECTION_HEADER:
                 doc.add_heading(parent=doc_item, text=c_text, prov=c_prov)
             else:
@@ -301,6 +306,8 @@ class ReadingOrderModel:
             new_item = out_doc.add_list_item(
                 text=cap_text, enumerated=False, prov=prov, parent=current_list
             )
+            self.list_item_processor.process_list_item(new_item)
         elif label == DocItemLabel.SECTION_HEADER:
             current_list = None

{docling-2.39.0 → docling-2.40.0}/docling/models/table_structure_model.py RENAMED Viewed

@@ -10,7 +10,6 @@ from docling_core.types.doc.page import (
     BoundingRectangle,
     TextCellUnit,
 )
-from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
 from PIL import ImageDraw
 from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
@@ -70,6 +69,9 @@ class TableStructureModel(BasePageModel):
             # Third Party
             import docling_ibm_models.tableformer.common as c
+            from docling_ibm_models.tableformer.data_management.tf_predictor import (
+                TFPredictor,
+            )
             device = decide_device(accelerator_options.device)

{docling-2.39.0 → docling-2.40.0}/docling/models/tesseract_ocr_model.py RENAMED Viewed

@@ -144,7 +144,10 @@ class TesseractOcrModel(BaseOcrModel):
                         local_reader = self.reader
                         self.osd_reader.SetImage(high_res_image)
+                        doc_orientation = 0
                         osd = self.osd_reader.DetectOrientationScript()
                         # No text, or Orientation and Script detection failure
                         if osd is None:
                             _log.error(
@@ -158,11 +161,14 @@ class TesseractOcrModel(BaseOcrModel):
                             # to OCR in the hope OCR will succeed while OSD failed
                             if self._is_auto:
                                 continue
-                        doc_orientation = parse_tesseract_orientation(osd["orient_deg"])
-                        if doc_orientation != 0:
-                            high_res_image = high_res_image.rotate(
-                                -doc_orientation, expand=True
+                        else:
+                            doc_orientation = parse_tesseract_orientation(
+                                osd["orient_deg"]
                             )
+                            if doc_orientation != 0:
+                                high_res_image = high_res_image.rotate(
+                                    -doc_orientation, expand=True
+                                )
                         if self._is_auto:
                             script = osd["script_name"]
                             script = map_tesseract_script(script)

{docling-2.39.0 → docling-2.40.0}/docling/pipeline/standard_pdf_pipeline.py RENAMED Viewed

@@ -80,6 +80,7 @@ class StandardPdfPipeline(PaginatedPipeline):
             LayoutModel(
                 artifacts_path=artifacts_path,
                 accelerator_options=pipeline_options.accelerator_options,
+                options=pipeline_options.layout_options,
             ),
             # Table structure model
             TableStructureModel(

{docling-2.39.0 → docling-2.40.0}/docling/utils/accelerator_utils.py RENAMED Viewed

@@ -1,8 +1,6 @@
 import logging
 from typing import List, Optional
-import torch
 from docling.datamodel.accelerator_options import AcceleratorDevice
 _log = logging.getLogger(__name__)
@@ -18,6 +16,8 @@ def decide_device(
     1. AUTO: Check for the best available device on the system.
     2. User-defined: Check if the device actually exists, otherwise fall-back to CPU
     """
+    import torch
     device = "cpu"
     has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()

{docling-2.39.0 → docling-2.40.0}/docling/utils/layout_postprocessor.py RENAMED Viewed

@@ -9,6 +9,7 @@ from docling_core.types.doc.page import TextCell
 from rtree import index
 from docling.datamodel.base_models import BoundingBox, Cluster, Page
+from docling.datamodel.pipeline_options import LayoutOptions
 _log = logging.getLogger(__name__)
@@ -194,12 +195,16 @@ class LayoutPostprocessor:
         DocItemLabel.TITLE: DocItemLabel.SECTION_HEADER,
     }
-    def __init__(self, page: Page, clusters: List[Cluster]) -> None:
+    def __init__(
+        self, page: Page, clusters: List[Cluster], options: LayoutOptions
+    ) -> None:
         """Initialize processor with page and clusters."""
         self.cells = page.cells
         self.page = page
         self.page_size = page.size
         self.all_clusters = clusters
+        self.options = options
         self.regular_clusters = [
             c for c in clusters if c.label not in self.SPECIAL_TYPES
         ]
@@ -267,7 +272,7 @@ class LayoutPostprocessor:
         # Handle orphaned cells
         unassigned = self._find_unassigned_cells(clusters)
-        if unassigned:
+        if unassigned and self.options.create_orphan_clusters:
             next_id = max((c.id for c in self.all_clusters), default=0) + 1
             orphan_clusters = []
             for i, cell in enumerate(unassigned):

{docling-2.39.0 → docling-2.40.0}/docling.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docling
-Version: 2.39.0
+Version: 2.40.0
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
 License-Expression: MIT
@@ -27,8 +27,8 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: pydantic<3.0.0,>=2.0.0
 Requires-Dist: docling-core[chunking]<3.0.0,>=2.39.0
-Requires-Dist: docling-ibm-models<4.0.0,>=3.4.4
 Requires-Dist: docling-parse<5.0.0,>=4.0.0
+Requires-Dist: docling-ibm-models<4,>=3.6.0
 Requires-Dist: filetype<2.0.0,>=1.2.0
 Requires-Dist: pypdfium2<5.0.0,>=4.30.0
 Requires-Dist: pydantic-settings<3.0.0,>=2.3.0
@@ -57,7 +57,7 @@ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrm
 Provides-Extra: vlm
 Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
 Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
-Requires-Dist: mlx-vlm>=0.1.22; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
+Requires-Dist: mlx-vlm<0.2,>=0.1.22; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
 Provides-Extra: rapidocr
 Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
 Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"

{docling-2.39.0 → docling-2.40.0}/docling.egg-info/requires.txt RENAMED Viewed

@@ -1,7 +1,7 @@
 pydantic<3.0.0,>=2.0.0
 docling-core[chunking]<3.0.0,>=2.39.0
-docling-ibm-models<4.0.0,>=3.4.4
 docling-parse<5.0.0,>=4.0.0
+docling-ibm-models<4,>=3.6.0
 filetype<2.0.0,>=1.2.0
 pypdfium2<5.0.0,>=4.30.0
 pydantic-settings<3.0.0,>=2.3.0
@@ -46,4 +46,4 @@ transformers<5.0.0,>=4.46.0
 accelerate<2.0.0,>=1.2.1
 [vlm:python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"]
-mlx-vlm>=0.1.22
+mlx-vlm<0.2,>=0.1.22

{docling-2.39.0 → docling-2.40.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "docling"
-version = "2.39.0"  # DO NOT EDIT, updated automatically
+version = "2.40.0"  # DO NOT EDIT, updated automatically
 description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
 license = "MIT"
 keywords = [
@@ -45,8 +45,8 @@ requires-python = '>=3.9,<4.0'
 dependencies = [
   'pydantic (>=2.0.0,<3.0.0)',
   'docling-core[chunking] (>=2.39.0,<3.0.0)',
-  'docling-ibm-models (>=3.4.4,<4.0.0)',
   'docling-parse (>=4.0.0,<5.0.0)',
+  'docling-ibm-models (>=3.6.0,<4)',
   'filetype (>=1.2.0,<2.0.0)',
   'pypdfium2 (>=4.30.0,<5.0.0)',
   'pydantic-settings (>=2.3.0,<3.0.0)',
@@ -91,7 +91,7 @@ ocrmac = ['ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"']
 vlm = [
   'transformers (>=4.46.0,<5.0.0)',
   'accelerate (>=1.2.1,<2.0.0)',
-  'mlx-vlm >=0.1.22 ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
+  'mlx-vlm (>=0.1.22,<0.2) ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
 ]
 rapidocr = [
   'rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; python_version < "3.13"',

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_docling_parse_v4.py RENAMED Viewed

@@ -46,6 +46,12 @@ def test_text_cell_counts():
                 )
             last_cell_count = len(cells)
+            # Clean up page backend after each iteration
+            page_backend.unload()
+    # Explicitly clean up document backend to prevent race conditions in CI
+    doc_backend.unload()
 def test_get_text_from_rect(test_doc_path):
     doc_backend = _get_backend(test_doc_path)
@@ -59,6 +65,10 @@ def test_get_text_from_rect(test_doc_path):
     assert textpiece.strip() == ref
+    # Explicitly clean up resources
+    page_backend.unload()
+    doc_backend.unload()
 def test_crop_page_image(test_doc_path):
     doc_backend = _get_backend(test_doc_path)
@@ -70,7 +80,14 @@ def test_crop_page_image(test_doc_path):
     )
     # im.show()
+    # Explicitly clean up resources
+    page_backend.unload()
+    doc_backend.unload()
 def test_num_pages(test_doc_path):
     doc_backend = _get_backend(test_doc_path)
     doc_backend.page_count() == 9
+    # Explicitly clean up resources to prevent race conditions in CI
+    doc_backend.unload()

docling-2.39.0/docling/models/plugins/defaults.py DELETED Viewed

@@ -1,28 +0,0 @@
-from docling.models.easyocr_model import EasyOcrModel
-from docling.models.ocr_mac_model import OcrMacModel
-from docling.models.picture_description_api_model import PictureDescriptionApiModel
-from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
-from docling.models.rapid_ocr_model import RapidOcrModel
-from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
-from docling.models.tesseract_ocr_model import TesseractOcrModel
-def ocr_engines():
-    return {
-        "ocr_engines": [
-            EasyOcrModel,
-            OcrMacModel,
-            RapidOcrModel,
-            TesseractOcrModel,
-            TesseractOcrCliModel,
-        ]
-    }
-def picture_description():
-    return {
-        "picture_description": [
-            PictureDescriptionVlmModel,
-            PictureDescriptionApiModel,
-        ]
-    }

{docling-2.39.0 → docling-2.40.0}/LICENSE RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/README.md RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/abstract_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/asciidoc_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/csv_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/docling_parse_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/docling_parse_v2_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/docx/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/docx/latex/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/docx/latex/latex_dict.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/docx/latex/omml.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/html_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/json/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/json/docling_json_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/md_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/mspowerpoint_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/msword_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/noop_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/pdf_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/pypdfium2_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/xml/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/xml/jats_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/backend/xml/uspto_backend.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/chunking/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/cli/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/cli/main.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/cli/models.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/cli/tools.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/datamodel/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/datamodel/accelerator_options.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/datamodel/asr_model_specs.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/datamodel/base_models.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/datamodel/document.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/datamodel/pipeline_options_asr_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/datamodel/pipeline_options_vlm_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/datamodel/settings.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/datamodel/vlm_model_specs.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/document_converter.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/exceptions.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/api_vlm_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/base_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/code_formula_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/document_picture_classifier.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/easyocr_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/factories/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/factories/base_factory.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/factories/ocr_factory.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/factories/picture_description_factory.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/ocr_mac_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/page_assemble_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/page_preprocessing_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/picture_description_api_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/picture_description_base_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/plugins/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/rapid_ocr_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/tesseract_ocr_cli_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/utils/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/utils/hf_model_download.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/vlm_models_inline/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/vlm_models_inline/hf_transformers_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/models/vlm_models_inline/mlx_model.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/pipeline/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/pipeline/asr_pipeline.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/pipeline/base_pipeline.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/pipeline/simple_pipeline.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/pipeline/vlm_pipeline.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/py.typed RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/utils/__init__.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/utils/api_image_request.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/utils/export.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/utils/glm_utils.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/utils/locks.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/utils/model_downloader.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/utils/ocr_utils.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/utils/orientation.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/utils/profiling.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/utils/utils.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling/utils/visualization.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling.egg-info/entry_points.txt RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/docling.egg-info/top_level.txt RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/setup.cfg RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_asr_pipeline.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_asciidoc.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_csv.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_docling_json.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_docling_parse.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_docling_parse_v2.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_html.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_jats.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_markdown.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_msexcel.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_msword.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_patent_uspto.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_pdfium.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_pptx.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_backend_webp.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_cli.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_code_formula.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_data_gen_flag.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_document_picture_classifier.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_e2e_conversion.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_e2e_ocr_conversion.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_input_doc.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_interfaces.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_invalid_input.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_legacy_format_transform.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_options.py RENAMED Viewed

File without changes

{docling-2.39.0 → docling-2.40.0}/tests/test_settings_load.py RENAMED Viewed

File without changes

docling 2.39.0__tar.gz → 2.40.0__tar.gz

docling 2.39.0tar.gz → 2.40.0tar.gz