PyPI - docling - Versions diffs - 2.58.0__tar.gz → 2.60.0__tar.gz - Mend

docling 2.58.0tar.gz → 2.60.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docling might be problematic. Click here for more details.

Files changed (153) hide show

{docling-2.58.0 → docling-2.60.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docling
-Version: 2.58.0
+Version: 2.60.0
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
 License-Expression: MIT
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Requires-Python: <4.0,>=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
@@ -45,7 +46,7 @@ Requires-Dist: beautifulsoup4<5.0.0,>=4.12.3
 Requires-Dist: pandas<3.0.0,>=2.1.4
 Requires-Dist: marko<3.0.0,>=2.1.2
 Requires-Dist: openpyxl<4.0.0,>=3.1.5
-Requires-Dist: lxml<6.0.0,>=4.0.0
+Requires-Dist: lxml<7.0.0,>=4.0.0
 Requires-Dist: pillow<12.0.0,>=10.0.0
 Requires-Dist: tqdm<5.0.0,>=4.65.0
 Requires-Dist: pluggy<2.0.0,>=1.0.0
@@ -62,15 +63,15 @@ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrm
 Provides-Extra: vlm
 Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
 Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
-Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
-Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
+Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
+Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
 Requires-Dist: qwen-vl-utils>=0.0.11; extra == "vlm"
 Provides-Extra: rapidocr
-Requires-Dist: rapidocr<4.0.0,>=3.3; python_version < "3.14" and extra == "rapidocr"
-Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
+Requires-Dist: rapidocr<4.0.0,>=3.3; extra == "rapidocr"
+Requires-Dist: onnxruntime<2.0.0,>=1.7.0; python_version < "3.14" and extra == "rapidocr"
 Provides-Extra: asr
-Requires-Dist: mlx-whisper>=0.4.3; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "asr"
-Requires-Dist: openai-whisper>=20250625; extra == "asr"
+Requires-Dist: mlx-whisper>=0.4.3; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "asr"
+Requires-Dist: openai-whisper>=20250625; python_version < "3.14" and extra == "asr"
 Dynamic: license-file
 <p align="center">

{docling-2.58.0 → docling-2.60.0}/docling/backend/msexcel_backend.py RENAMED Viewed

@@ -139,10 +139,14 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
         self.workbook = None
         try:
             if isinstance(self.path_or_stream, BytesIO):
-                self.workbook = load_workbook(filename=self.path_or_stream)
+                self.workbook = load_workbook(
+                    filename=self.path_or_stream, data_only=True
+                )
             elif isinstance(self.path_or_stream, Path):
-                self.workbook = load_workbook(filename=str(self.path_or_stream))
+                self.workbook = load_workbook(
+                    filename=str(self.path_or_stream), data_only=True
+                )
             self.valid = self.workbook is not None
         except Exception as e:

{docling-2.58.0 → docling-2.60.0}/docling/backend/pypdfium2_backend.py RENAMED Viewed

@@ -229,10 +229,10 @@ class PyPdfiumPageBackend(PdfPageBackend):
                     b=max(cell.rect.to_bounding_box().b for cell in group),
                 )
-                assert self._ppage is not None
-                self.text_page = self._ppage.get_textpage()
+                assert self.text_page is not None
                 bbox = merged_bbox.to_bottom_left_origin(page_size.height)
-                merged_text = self.text_page.get_text_bounded(*bbox.as_tuple())
+                with pypdfium2_lock:
+                    merged_text = self.text_page.get_text_bounded(*bbox.as_tuple())
                 return TextCell(
                     index=group[0].index,
@@ -255,9 +255,9 @@ class PyPdfiumPageBackend(PdfPageBackend):
     def get_bitmap_rects(self, scale: float = 1) -> Iterable[BoundingBox]:
         AREA_THRESHOLD = 0  # 32 * 32
         page_size = self.get_size()
-        rotation = self._ppage.get_rotation()
         with pypdfium2_lock:
+            rotation = self._ppage.get_rotation()
             for obj in self._ppage.get_objects(filter=[pdfium_c.FPDF_PAGEOBJ_IMAGE]):
                 pos = obj.get_pos()
                 if rotation == 90:

{docling-2.58.0 → docling-2.60.0}/docling/cli/main.py RENAMED Viewed

@@ -738,10 +738,15 @@ def convert(  # noqa: C901
                         pipeline_options.vlm_options = SMOLDOCLING_MLX
                     except ImportError:
-                        _log.warning(
-                            "To run SmolDocling faster, please install mlx-vlm:\n"
-                            "pip install mlx-vlm"
-                        )
+                        if sys.version_info < (3, 14):
+                            _log.warning(
+                                "To run SmolDocling faster, please install mlx-vlm:\n"
+                                "pip install mlx-vlm"
+                            )
+                        else:
+                            _log.warning(
+                                "You can run SmolDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
+                            )
             elif vlm_model == VlmModelType.GRANITEDOCLING:
                 pipeline_options.vlm_options = GRANITEDOCLING_TRANSFORMERS
@@ -751,10 +756,16 @@ def convert(  # noqa: C901
                         pipeline_options.vlm_options = GRANITEDOCLING_MLX
                     except ImportError:
-                        _log.warning(
-                            "To run GraniteDocling faster, please install mlx-vlm:\n"
-                            "pip install mlx-vlm"
-                        )
+                        if sys.version_info < (3, 14):
+                            _log.warning(
+                                "To run GraniteDocling faster, please install mlx-vlm:\n"
+                                "pip install mlx-vlm"
+                            )
+                        else:
+                            _log.warning(
+                                "You can run GraniteDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
+                            )
             elif vlm_model == VlmModelType.SMOLDOCLING_VLLM:
                 pipeline_options.vlm_options = SMOLDOCLING_VLLM

{docling-2.58.0 → docling-2.60.0}/docling/datamodel/base_models.py RENAMED Viewed

@@ -207,6 +207,8 @@ class VlmPrediction(BaseModel):
     text: str = ""
     generated_tokens: list[VlmPredictionToken] = []
     generation_time: float = -1
+    num_tokens: Optional[int] = None
+    stop_reason: Optional[str] = None  # todo define an enum for possible stop reasons
 class ContainerElement(

{docling-2.58.0 → docling-2.60.0}/docling/datamodel/pipeline_options.py RENAMED Viewed

@@ -361,15 +361,7 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
     generate_parsed_pages: bool = False
-class ProcessingPipeline(str, Enum):
-    STANDARD = "standard"
-    VLM = "vlm"
-    ASR = "asr"
-class ThreadedPdfPipelineOptions(PdfPipelineOptions):
-    """Pipeline options for the threaded PDF pipeline with batching and backpressure control"""
+    ### Arguments for threaded PDF pipeline with batching and backpressure control
     # Batch sizes for different stages
     ocr_batch_size: int = 4
@@ -377,7 +369,18 @@ class ThreadedPdfPipelineOptions(PdfPipelineOptions):
     table_batch_size: int = 4
     # Timing control
-    batch_timeout_seconds: float = 2.0
+    batch_polling_interval_seconds: float = 0.5
     # Backpressure and queue control
     queue_max_size: int = 100
+class ProcessingPipeline(str, Enum):
+    LEGACY = "legacy"
+    STANDARD = "standard"
+    VLM = "vlm"
+    ASR = "asr"
+class ThreadedPdfPipelineOptions(PdfPipelineOptions):
+    """Pipeline options for the threaded PDF pipeline with batching and backpressure control"""

{docling-2.58.0 → docling-2.60.0}/docling/datamodel/pipeline_options_vlm_model.py RENAMED Viewed

@@ -82,6 +82,7 @@ class InlineVlmOptions(BaseVlmOptions):
     use_kv_cache: bool = True
     max_new_tokens: int = 4096
+    track_generated_tokens: bool = False
     @property
     def repo_cache_folder(self) -> str:

{docling-2.58.0 → docling-2.60.0}/docling/models/api_vlm_model.py RENAMED Viewed

@@ -73,7 +73,7 @@ class ApiVlmModel(BasePageModel):
                         # Skip non-GenerationStopper criteria (should have been caught in validation)
                     # Streaming path with early abort support
-                    page_tags = api_image_request_streaming(
+                    page_tags, num_tokens = api_image_request_streaming(
                         image=hi_res_image,
                         prompt=prompt,
                         url=self.vlm_options.url,
@@ -84,7 +84,7 @@ class ApiVlmModel(BasePageModel):
                     )
                 else:
                     # Non-streaming fallback (existing behavior)
-                    page_tags = api_image_request(
+                    page_tags, num_tokens = api_image_request(
                         image=hi_res_image,
                         prompt=prompt,
                         url=self.vlm_options.url,
@@ -94,7 +94,9 @@ class ApiVlmModel(BasePageModel):
                     )
                 page_tags = self.vlm_options.decode_response(page_tags)
-                page.predictions.vlm_response = VlmPrediction(text=page_tags)
+                page.predictions.vlm_response = VlmPrediction(
+                    text=page_tags, num_tokens=num_tokens
+                )
             return page
         with ThreadPoolExecutor(max_workers=self.concurrency) as executor:

{docling-2.58.0 → docling-2.60.0}/docling/models/layout_model.py RENAMED Viewed

@@ -167,6 +167,10 @@ class LayoutModel(BasePageModel):
             valid_pages.append(page)
             valid_page_images.append(page_image)
+        print(f"{len(pages)=}, {pages[0].page_no}-{pages[-1].page_no}")
+        print(f"{len(valid_pages)=}")
+        print(f"{len(valid_page_images)=}")
         # Process all valid pages with batch prediction
         batch_predictions = []
         if valid_page_images:

{docling-2.58.0 → docling-2.60.0}/docling/models/picture_description_vlm_model.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import sys
 import threading
 from collections.abc import Iterable
 from pathlib import Path
@@ -75,7 +76,10 @@ class PictureDescriptionVlmModel(
                         else "sdpa"
                     ),
                 )
-                self.model = torch.compile(self.model)  # type: ignore
+                if sys.version_info < (3, 14):
+                    self.model = torch.compile(self.model)  # type: ignore
+                else:
+                    self.model.eval()
             self.provenance = f"{self.options.repo_id}"

{docling-2.58.0 → docling-2.60.0}/docling/models/vlm_models_inline/hf_transformers_model.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import importlib.metadata
 import logging
+import sys
 import time
 from collections.abc import Iterable
 from pathlib import Path
@@ -129,7 +130,10 @@ class HuggingFaceTransformersVlmModel(BaseVlmPageModel, HuggingFaceModelDownload
                 trust_remote_code=vlm_options.trust_remote_code,
                 revision=vlm_options.revision,
             )
-            self.vlm_model = torch.compile(self.vlm_model)  # type: ignore
+            if sys.version_info < (3, 14):
+                self.vlm_model = torch.compile(self.vlm_model)  # type: ignore
+            else:
+                self.vlm_model.eval()
             # Load generation config
             self.generation_config = GenerationConfig.from_pretrained(
@@ -363,13 +367,19 @@ class HuggingFaceTransformersVlmModel(BaseVlmPageModel, HuggingFaceModelDownload
             decoded_texts = [text.rstrip(pad_token) for text in decoded_texts]
         # -- Optional logging
+        num_tokens = None
         if generated_ids.shape[0] > 0:
+            num_tokens = int(generated_ids[0].shape[0])
             _log.debug(
-                f"Generated {int(generated_ids[0].shape[0])} tokens in {generation_time:.2f}s "
+                f"Generated {num_tokens} tokens in {generation_time:.2f}s "
                 f"for batch size {generated_ids.shape[0]}."
             )
         for text in decoded_texts:
             # Apply decode_response to the output text
             decoded_text = self.vlm_options.decode_response(text)
-            yield VlmPrediction(text=decoded_text, generation_time=generation_time)
+            yield VlmPrediction(
+                text=decoded_text,
+                generation_time=generation_time,
+                num_tokens=num_tokens,
+            )

{docling-2.58.0 → docling-2.60.0}/docling/models/vlm_models_inline/mlx_model.py RENAMED Viewed

@@ -50,9 +50,14 @@ class HuggingFaceMlxModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
                 from mlx_vlm.prompt_utils import apply_chat_template  # type: ignore
                 from mlx_vlm.utils import load_config  # type: ignore
             except ImportError:
-                raise ImportError(
-                    "mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
-                )
+                if sys.version_info < (3, 14):
+                    raise ImportError(
+                        "mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
+                    )
+                else:
+                    raise ImportError(
+                        "mlx-vlm is not installed. It is not yet available on Python 3.14."
+                    )
             repo_cache_folder = vlm_options.repo_id.replace("/", "--")
@@ -313,5 +318,6 @@ class HuggingFaceMlxModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
                     text=decoded_output,
                     generation_time=generation_time,
                     generated_tokens=tokens,
+                    num_tokens=len(tokens),
                 )
             _log.debug("MLX model: Released global lock")

{docling-2.58.0 → docling-2.60.0}/docling/models/vlm_models_inline/nuextract_transformers_model.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import logging
+import sys
 import time
 from collections.abc import Iterable
 from pathlib import Path
@@ -153,7 +154,10 @@ class NuExtractTransformersModel(BaseVlmModel, HuggingFaceModelDownloadMixin):
                 ),
                 trust_remote_code=vlm_options.trust_remote_code,
             )
-            self.vlm_model = torch.compile(self.vlm_model)  # type: ignore
+            if sys.version_info < (3, 14):
+                self.vlm_model = torch.compile(self.vlm_model)  # type: ignore
+            else:
+                self.vlm_model.eval()
             # Load generation config
             self.generation_config = GenerationConfig.from_pretrained(artifacts_path)
@@ -278,13 +282,19 @@ class NuExtractTransformersModel(BaseVlmModel, HuggingFaceModelDownloadMixin):
         )
         # Optional logging
+        num_tokens = None
         if generated_ids.shape[0] > 0:  # type: ignore
+            num_tokens = int(generated_ids[0].shape[0])
             _log.debug(
-                f"Generated {int(generated_ids[0].shape[0])} tokens in {generation_time:.2f}s "
+                f"Generated {num_tokens} tokens in {generation_time:.2f}s "
                 f"for batch size {generated_ids.shape[0]}."  # type: ignore
             )
         for text in decoded_texts:
             # Apply decode_response to the output text
             decoded_text = self.vlm_options.decode_response(text)
-            yield VlmPrediction(text=decoded_text, generation_time=generation_time)
+            yield VlmPrediction(
+                text=decoded_text,
+                generation_time=generation_time,
+                num_tokens=num_tokens,
+            )

{docling-2.58.0 → docling-2.60.0}/docling/models/vlm_models_inline/vllm_model.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import logging
+import sys
 import time
 from collections.abc import Iterable
 from pathlib import Path
@@ -8,7 +9,7 @@ import numpy as np
 from PIL.Image import Image
 from docling.datamodel.accelerator_options import AcceleratorOptions
-from docling.datamodel.base_models import Page, VlmPrediction
+from docling.datamodel.base_models import Page, VlmPrediction, VlmPredictionToken
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options_vlm_model import (
     InlineVlmOptions,
@@ -87,7 +88,7 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
         vlm_options: InlineVlmOptions,
     ):
         self.enabled = enabled
-        self.vlm_options = vlm_options
+        self.vlm_options: InlineVlmOptions = vlm_options
         self.llm = None
         self.sampling_params = None
@@ -100,7 +101,18 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
             return
         from transformers import AutoProcessor
-        from vllm import LLM, SamplingParams
+        try:
+            from vllm import LLM, SamplingParams
+        except ImportError:
+            if sys.version_info < (3, 14):
+                raise ImportError(
+                    "vllm is not installed. Please install it via `pip install vllm`."
+                )
+            else:
+                raise ImportError(
+                    "vllm is not installed. It is not yet available on Python 3.14."
+                )
         # Device selection
         self.device = decide_device(
@@ -222,7 +234,8 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
                     pages_with_images.append(page)
                 if images:
-                    predictions = list(self.process_images(images, user_prompts))
+                    with TimeRecorder(conv_res, "vlm_inference"):
+                        predictions = list(self.process_images(images, user_prompts))
                     for page, prediction in zip(pages_with_images, predictions):
                         page.predictions.vlm_response = prediction
@@ -288,13 +301,34 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
         # Optional debug
         if outputs:
             try:
-                num_tokens = len(outputs[0].outputs[0].token_ids)
-                _log.debug(f"Generated {num_tokens} tokens in {generation_time:.2f}s.")
+                num_tokens_within_batch = len(outputs[0].outputs[0].token_ids)
+                _log.debug(
+                    f"Generated {num_tokens_within_batch} tokens for batch in {generation_time:.2f}s."
+                )
             except Exception:
-                pass
+                num_tokens_within_batch = 0
         # Emit predictions
         for output in outputs:
             text = output.outputs[0].text if output.outputs else ""
+            stop_reason = output.outputs[0].stop_reason if output.outputs else ""
+            generated_tokens = [
+                VlmPredictionToken(token=int(p)) for p in output.outputs[0].token_ids
+            ]
+            num_tokens = len(generated_tokens)
             decoded_text = self.vlm_options.decode_response(text)
-            yield VlmPrediction(text=decoded_text, generation_time=generation_time)
+            if self.vlm_options.track_generated_tokens:
+                yield VlmPrediction(
+                    text=decoded_text,
+                    generation_time=generation_time,
+                    num_tokens=num_tokens,
+                    stop_reason=stop_reason,
+                    generated_tokens=generated_tokens,
+                )
+            else:
+                yield VlmPrediction(
+                    text=decoded_text,
+                    generation_time=generation_time,
+                    num_tokens=num_tokens,
+                    stop_reason=stop_reason,
+                )

{docling-2.58.0 → docling-2.60.0}/docling/pipeline/asr_pipeline.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import logging
 import os
 import re
+import sys
 import tempfile
 from io import BytesIO
 from pathlib import Path
@@ -117,9 +118,15 @@ class _NativeWhisperModel:
             try:
                 import whisper  # type: ignore
             except ImportError:
-                raise ImportError(
-                    "whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
-                )
+                if sys.version_info < (3, 14):
+                    raise ImportError(
+                        "whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
+                    )
+                else:
+                    raise ImportError(
+                        "whisper is not installed. Unfortunately its dependencies are not yet available for Python 3.14."
+                    )
             self.asr_options = asr_options
             self.max_tokens = asr_options.max_new_tokens
             self.temperature = asr_options.temperature

docling-2.58.0/docling/pipeline/standard_pdf_pipeline.py → docling-2.60.0/docling/pipeline/legacy_standard_pdf_pipeline.py RENAMED Viewed

@@ -31,7 +31,7 @@ from docling.utils.profiling import ProfilingScope, TimeRecorder
 _log = logging.getLogger(__name__)
-class StandardPdfPipeline(PaginatedPipeline):
+class LegacyStandardPdfPipeline(PaginatedPipeline):
     def __init__(self, pipeline_options: PdfPipelineOptions):
         super().__init__(pipeline_options)
         self.pipeline_options: PdfPipelineOptions
@@ -102,7 +102,7 @@ class StandardPdfPipeline(PaginatedPipeline):
         local_dir: Optional[Path] = None, force: bool = False
     ) -> Path:
         warnings.warn(
-            "The usage of StandardPdfPipeline.download_models_hf() is deprecated "
+            "The usage of LegacyStandardPdfPipeline.download_models_hf() is deprecated "
             "use instead the utility `docling-tools models download`, or "
             "the upstream method docling.utils.models_downloader.download_all()",
             DeprecationWarning,

docling 2.58.0__tar.gz → 2.60.0__tar.gz

Potentially problematic release.

docling 2.58.0tar.gz → 2.60.0tar.gz