PyPI - docling - Versions diffs - 2.58.0__tar.gz → 2.59.0__tar.gz - Mend

docling 2.58.0tar.gz → 2.59.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docling might be problematic. Click here for more details.

Files changed (152) hide show

{docling-2.58.0 → docling-2.59.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docling
-Version: 2.58.0
+Version: 2.59.0
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
 License-Expression: MIT
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Requires-Python: <4.0,>=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
@@ -45,7 +46,7 @@ Requires-Dist: beautifulsoup4<5.0.0,>=4.12.3
 Requires-Dist: pandas<3.0.0,>=2.1.4
 Requires-Dist: marko<3.0.0,>=2.1.2
 Requires-Dist: openpyxl<4.0.0,>=3.1.5
-Requires-Dist: lxml<6.0.0,>=4.0.0
+Requires-Dist: lxml<7.0.0,>=4.0.0
 Requires-Dist: pillow<12.0.0,>=10.0.0
 Requires-Dist: tqdm<5.0.0,>=4.65.0
 Requires-Dist: pluggy<2.0.0,>=1.0.0
@@ -62,15 +63,15 @@ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrm
 Provides-Extra: vlm
 Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
 Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
-Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
-Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
+Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
+Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
 Requires-Dist: qwen-vl-utils>=0.0.11; extra == "vlm"
 Provides-Extra: rapidocr
-Requires-Dist: rapidocr<4.0.0,>=3.3; python_version < "3.14" and extra == "rapidocr"
-Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
+Requires-Dist: rapidocr<4.0.0,>=3.3; extra == "rapidocr"
+Requires-Dist: onnxruntime<2.0.0,>=1.7.0; python_version < "3.14" and extra == "rapidocr"
 Provides-Extra: asr
-Requires-Dist: mlx-whisper>=0.4.3; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "asr"
-Requires-Dist: openai-whisper>=20250625; extra == "asr"
+Requires-Dist: mlx-whisper>=0.4.3; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "asr"
+Requires-Dist: openai-whisper>=20250625; python_version < "3.14" and extra == "asr"
 Dynamic: license-file
 <p align="center">

{docling-2.58.0 → docling-2.59.0}/docling/backend/msexcel_backend.py RENAMED Viewed

@@ -139,10 +139,14 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
         self.workbook = None
         try:
             if isinstance(self.path_or_stream, BytesIO):
-                self.workbook = load_workbook(filename=self.path_or_stream)
+                self.workbook = load_workbook(
+                    filename=self.path_or_stream, data_only=True
+                )
             elif isinstance(self.path_or_stream, Path):
-                self.workbook = load_workbook(filename=str(self.path_or_stream))
+                self.workbook = load_workbook(
+                    filename=str(self.path_or_stream), data_only=True
+                )
             self.valid = self.workbook is not None
         except Exception as e:

{docling-2.58.0 → docling-2.59.0}/docling/cli/main.py RENAMED Viewed

@@ -738,10 +738,15 @@ def convert(  # noqa: C901
                         pipeline_options.vlm_options = SMOLDOCLING_MLX
                     except ImportError:
-                        _log.warning(
-                            "To run SmolDocling faster, please install mlx-vlm:\n"
-                            "pip install mlx-vlm"
-                        )
+                        if sys.version_info < (3, 14):
+                            _log.warning(
+                                "To run SmolDocling faster, please install mlx-vlm:\n"
+                                "pip install mlx-vlm"
+                            )
+                        else:
+                            _log.warning(
+                                "You can run SmolDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
+                            )
             elif vlm_model == VlmModelType.GRANITEDOCLING:
                 pipeline_options.vlm_options = GRANITEDOCLING_TRANSFORMERS
@@ -751,10 +756,16 @@ def convert(  # noqa: C901
                         pipeline_options.vlm_options = GRANITEDOCLING_MLX
                     except ImportError:
-                        _log.warning(
-                            "To run GraniteDocling faster, please install mlx-vlm:\n"
-                            "pip install mlx-vlm"
-                        )
+                        if sys.version_info < (3, 14):
+                            _log.warning(
+                                "To run GraniteDocling faster, please install mlx-vlm:\n"
+                                "pip install mlx-vlm"
+                            )
+                        else:
+                            _log.warning(
+                                "You can run GraniteDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
+                            )
             elif vlm_model == VlmModelType.SMOLDOCLING_VLLM:
                 pipeline_options.vlm_options = SMOLDOCLING_VLLM

{docling-2.58.0 → docling-2.59.0}/docling/datamodel/base_models.py RENAMED Viewed

@@ -207,6 +207,8 @@ class VlmPrediction(BaseModel):
     text: str = ""
     generated_tokens: list[VlmPredictionToken] = []
     generation_time: float = -1
+    num_tokens: Optional[int] = None
+    stop_reason: Optional[str] = None  # todo define an enum for possible stop reasons
 class ContainerElement(

{docling-2.58.0 → docling-2.59.0}/docling/datamodel/pipeline_options_vlm_model.py RENAMED Viewed

@@ -82,6 +82,7 @@ class InlineVlmOptions(BaseVlmOptions):
     use_kv_cache: bool = True
     max_new_tokens: int = 4096
+    track_generated_tokens: bool = False
     @property
     def repo_cache_folder(self) -> str:

{docling-2.58.0 → docling-2.59.0}/docling/models/api_vlm_model.py RENAMED Viewed

@@ -73,7 +73,7 @@ class ApiVlmModel(BasePageModel):
                         # Skip non-GenerationStopper criteria (should have been caught in validation)
                     # Streaming path with early abort support
-                    page_tags = api_image_request_streaming(
+                    page_tags, num_tokens = api_image_request_streaming(
                         image=hi_res_image,
                         prompt=prompt,
                         url=self.vlm_options.url,
@@ -84,7 +84,7 @@ class ApiVlmModel(BasePageModel):
                     )
                 else:
                     # Non-streaming fallback (existing behavior)
-                    page_tags = api_image_request(
+                    page_tags, num_tokens = api_image_request(
                         image=hi_res_image,
                         prompt=prompt,
                         url=self.vlm_options.url,
@@ -94,7 +94,9 @@ class ApiVlmModel(BasePageModel):
                     )
                 page_tags = self.vlm_options.decode_response(page_tags)
-                page.predictions.vlm_response = VlmPrediction(text=page_tags)
+                page.predictions.vlm_response = VlmPrediction(
+                    text=page_tags, num_tokens=num_tokens
+                )
             return page
         with ThreadPoolExecutor(max_workers=self.concurrency) as executor:

{docling-2.58.0 → docling-2.59.0}/docling/models/picture_description_vlm_model.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import sys
 import threading
 from collections.abc import Iterable
 from pathlib import Path
@@ -75,7 +76,10 @@ class PictureDescriptionVlmModel(
                         else "sdpa"
                     ),
                 )
-                self.model = torch.compile(self.model)  # type: ignore
+                if sys.version_info < (3, 14):
+                    self.model = torch.compile(self.model)  # type: ignore
+                else:
+                    self.model.eval()
             self.provenance = f"{self.options.repo_id}"

{docling-2.58.0 → docling-2.59.0}/docling/models/vlm_models_inline/hf_transformers_model.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import importlib.metadata
 import logging
+import sys
 import time
 from collections.abc import Iterable
 from pathlib import Path
@@ -129,7 +130,10 @@ class HuggingFaceTransformersVlmModel(BaseVlmPageModel, HuggingFaceModelDownload
                 trust_remote_code=vlm_options.trust_remote_code,
                 revision=vlm_options.revision,
             )
-            self.vlm_model = torch.compile(self.vlm_model)  # type: ignore
+            if sys.version_info < (3, 14):
+                self.vlm_model = torch.compile(self.vlm_model)  # type: ignore
+            else:
+                self.vlm_model.eval()
             # Load generation config
             self.generation_config = GenerationConfig.from_pretrained(
@@ -363,13 +367,19 @@ class HuggingFaceTransformersVlmModel(BaseVlmPageModel, HuggingFaceModelDownload
             decoded_texts = [text.rstrip(pad_token) for text in decoded_texts]
         # -- Optional logging
+        num_tokens = None
         if generated_ids.shape[0] > 0:
+            num_tokens = int(generated_ids[0].shape[0])
             _log.debug(
-                f"Generated {int(generated_ids[0].shape[0])} tokens in {generation_time:.2f}s "
+                f"Generated {num_tokens} tokens in {generation_time:.2f}s "
                 f"for batch size {generated_ids.shape[0]}."
             )
         for text in decoded_texts:
             # Apply decode_response to the output text
             decoded_text = self.vlm_options.decode_response(text)
-            yield VlmPrediction(text=decoded_text, generation_time=generation_time)
+            yield VlmPrediction(
+                text=decoded_text,
+                generation_time=generation_time,
+                num_tokens=num_tokens,
+            )

{docling-2.58.0 → docling-2.59.0}/docling/models/vlm_models_inline/mlx_model.py RENAMED Viewed

@@ -50,9 +50,14 @@ class HuggingFaceMlxModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
                 from mlx_vlm.prompt_utils import apply_chat_template  # type: ignore
                 from mlx_vlm.utils import load_config  # type: ignore
             except ImportError:
-                raise ImportError(
-                    "mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
-                )
+                if sys.version_info < (3, 14):
+                    raise ImportError(
+                        "mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
+                    )
+                else:
+                    raise ImportError(
+                        "mlx-vlm is not installed. It is not yet available on Python 3.14."
+                    )
             repo_cache_folder = vlm_options.repo_id.replace("/", "--")
@@ -313,5 +318,6 @@ class HuggingFaceMlxModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
                     text=decoded_output,
                     generation_time=generation_time,
                     generated_tokens=tokens,
+                    num_tokens=len(tokens),
                 )
             _log.debug("MLX model: Released global lock")

{docling-2.58.0 → docling-2.59.0}/docling/models/vlm_models_inline/nuextract_transformers_model.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import logging
+import sys
 import time
 from collections.abc import Iterable
 from pathlib import Path
@@ -153,7 +154,10 @@ class NuExtractTransformersModel(BaseVlmModel, HuggingFaceModelDownloadMixin):
                 ),
                 trust_remote_code=vlm_options.trust_remote_code,
             )
-            self.vlm_model = torch.compile(self.vlm_model)  # type: ignore
+            if sys.version_info < (3, 14):
+                self.vlm_model = torch.compile(self.vlm_model)  # type: ignore
+            else:
+                self.vlm_model.eval()
             # Load generation config
             self.generation_config = GenerationConfig.from_pretrained(artifacts_path)
@@ -278,13 +282,19 @@ class NuExtractTransformersModel(BaseVlmModel, HuggingFaceModelDownloadMixin):
         )
         # Optional logging
+        num_tokens = None
         if generated_ids.shape[0] > 0:  # type: ignore
+            num_tokens = int(generated_ids[0].shape[0])
             _log.debug(
-                f"Generated {int(generated_ids[0].shape[0])} tokens in {generation_time:.2f}s "
+                f"Generated {num_tokens} tokens in {generation_time:.2f}s "
                 f"for batch size {generated_ids.shape[0]}."  # type: ignore
             )
         for text in decoded_texts:
             # Apply decode_response to the output text
             decoded_text = self.vlm_options.decode_response(text)
-            yield VlmPrediction(text=decoded_text, generation_time=generation_time)
+            yield VlmPrediction(
+                text=decoded_text,
+                generation_time=generation_time,
+                num_tokens=num_tokens,
+            )

{docling-2.58.0 → docling-2.59.0}/docling/models/vlm_models_inline/vllm_model.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import logging
+import sys
 import time
 from collections.abc import Iterable
 from pathlib import Path
@@ -8,7 +9,7 @@ import numpy as np
 from PIL.Image import Image
 from docling.datamodel.accelerator_options import AcceleratorOptions
-from docling.datamodel.base_models import Page, VlmPrediction
+from docling.datamodel.base_models import Page, VlmPrediction, VlmPredictionToken
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options_vlm_model import (
     InlineVlmOptions,
@@ -87,7 +88,7 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
         vlm_options: InlineVlmOptions,
     ):
         self.enabled = enabled
-        self.vlm_options = vlm_options
+        self.vlm_options: InlineVlmOptions = vlm_options
         self.llm = None
         self.sampling_params = None
@@ -100,7 +101,18 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
             return
         from transformers import AutoProcessor
-        from vllm import LLM, SamplingParams
+        try:
+            from vllm import LLM, SamplingParams
+        except ImportError:
+            if sys.version_info < (3, 14):
+                raise ImportError(
+                    "vllm is not installed. Please install it via `pip install vllm`."
+                )
+            else:
+                raise ImportError(
+                    "vllm is not installed. It is not yet available on Python 3.14."
+                )
         # Device selection
         self.device = decide_device(
@@ -222,7 +234,8 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
                     pages_with_images.append(page)
                 if images:
-                    predictions = list(self.process_images(images, user_prompts))
+                    with TimeRecorder(conv_res, "vlm_inference"):
+                        predictions = list(self.process_images(images, user_prompts))
                     for page, prediction in zip(pages_with_images, predictions):
                         page.predictions.vlm_response = prediction
@@ -288,13 +301,34 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
         # Optional debug
         if outputs:
             try:
-                num_tokens = len(outputs[0].outputs[0].token_ids)
-                _log.debug(f"Generated {num_tokens} tokens in {generation_time:.2f}s.")
+                num_tokens_within_batch = len(outputs[0].outputs[0].token_ids)
+                _log.debug(
+                    f"Generated {num_tokens_within_batch} tokens for batch in {generation_time:.2f}s."
+                )
             except Exception:
-                pass
+                num_tokens_within_batch = 0
         # Emit predictions
         for output in outputs:
             text = output.outputs[0].text if output.outputs else ""
+            stop_reason = output.outputs[0].stop_reason if output.outputs else ""
+            generated_tokens = [
+                VlmPredictionToken(token=int(p)) for p in output.outputs[0].token_ids
+            ]
+            num_tokens = len(generated_tokens)
             decoded_text = self.vlm_options.decode_response(text)
-            yield VlmPrediction(text=decoded_text, generation_time=generation_time)
+            if self.vlm_options.track_generated_tokens:
+                yield VlmPrediction(
+                    text=decoded_text,
+                    generation_time=generation_time,
+                    num_tokens=num_tokens,
+                    stop_reason=stop_reason,
+                    generated_tokens=generated_tokens,
+                )
+            else:
+                yield VlmPrediction(
+                    text=decoded_text,
+                    generation_time=generation_time,
+                    num_tokens=num_tokens,
+                    stop_reason=stop_reason,
+                )

{docling-2.58.0 → docling-2.59.0}/docling/pipeline/asr_pipeline.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import logging
 import os
 import re
+import sys
 import tempfile
 from io import BytesIO
 from pathlib import Path
@@ -117,9 +118,15 @@ class _NativeWhisperModel:
             try:
                 import whisper  # type: ignore
             except ImportError:
-                raise ImportError(
-                    "whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
-                )
+                if sys.version_info < (3, 14):
+                    raise ImportError(
+                        "whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
+                    )
+                else:
+                    raise ImportError(
+                        "whisper is not installed. Unfortunately its dependencies are not yet available for Python 3.14."
+                    )
             self.asr_options = asr_options
             self.max_tokens = asr_options.max_new_tokens
             self.temperature = asr_options.temperature

{docling-2.58.0 → docling-2.59.0}/docling/utils/api_image_request.py RENAMED Viewed

@@ -2,7 +2,7 @@ import base64
 import json
 import logging
 from io import BytesIO
-from typing import Optional
+from typing import Dict, List, Optional, Tuple
 import requests
 from PIL import Image
@@ -21,7 +21,7 @@ def api_image_request(
     timeout: float = 20,
     headers: Optional[dict[str, str]] = None,
     **params,
-) -> str:
+) -> Tuple[str, Optional[int]]:
     img_io = BytesIO()
     image.save(img_io, "PNG")
     image_base64 = base64.b64encode(img_io.getvalue()).decode("utf-8")
@@ -60,7 +60,8 @@ def api_image_request(
     api_resp = OpenAiApiResponse.model_validate_json(r.text)
     generated_text = api_resp.choices[0].message.content.strip()
-    return generated_text
+    num_tokens = api_resp.usage.total_tokens
+    return generated_text, num_tokens
 def api_image_request_streaming(
@@ -72,7 +73,7 @@ def api_image_request_streaming(
     headers: Optional[dict[str, str]] = None,
     generation_stoppers: list[GenerationStopper] = [],
     **params,
-) -> str:
+) -> Tuple[str, Optional[int]]:
     """
     Stream a chat completion from an OpenAI-compatible server (e.g., vLLM).
     Parses SSE lines: 'data: {json}\\n\\n', terminated by 'data: [DONE]'.
@@ -150,6 +151,16 @@ def api_image_request_streaming(
                 _log.debug("Unexpected SSE chunk shape: %s", e)
                 piece = ""
+            # Try to extract token count
+            num_tokens = None
+            try:
+                if "usage" in obj:
+                    usage = obj["usage"]
+                    num_tokens = usage.get("total_tokens")
+            except Exception as e:
+                num_tokens = None
+                _log.debug("Usage key not included in response: %s", e)
             if piece:
                 full_text.append(piece)
                 for stopper in generation_stoppers:
@@ -162,6 +173,6 @@ def api_image_request_streaming(
                         # closing the connection when we exit the 'with' block.
                         # vLLM/OpenAI-compatible servers will detect the client disconnect
                         # and abort the request server-side.
-                        return "".join(full_text)
+                        return "".join(full_text), num_tokens
-        return "".join(full_text)
+        return "".join(full_text), num_tokens

{docling-2.58.0 → docling-2.59.0}/docling.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docling
-Version: 2.58.0
+Version: 2.59.0
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
 License-Expression: MIT
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Requires-Python: <4.0,>=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
@@ -45,7 +46,7 @@ Requires-Dist: beautifulsoup4<5.0.0,>=4.12.3
 Requires-Dist: pandas<3.0.0,>=2.1.4
 Requires-Dist: marko<3.0.0,>=2.1.2
 Requires-Dist: openpyxl<4.0.0,>=3.1.5
-Requires-Dist: lxml<6.0.0,>=4.0.0
+Requires-Dist: lxml<7.0.0,>=4.0.0
 Requires-Dist: pillow<12.0.0,>=10.0.0
 Requires-Dist: tqdm<5.0.0,>=4.65.0
 Requires-Dist: pluggy<2.0.0,>=1.0.0
@@ -62,15 +63,15 @@ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrm
 Provides-Extra: vlm
 Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
 Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
-Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
-Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
+Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
+Requires-Dist: vllm<1.0.0,>=0.10.0; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "linux" and platform_machine == "x86_64") and extra == "vlm"
 Requires-Dist: qwen-vl-utils>=0.0.11; extra == "vlm"
 Provides-Extra: rapidocr
-Requires-Dist: rapidocr<4.0.0,>=3.3; python_version < "3.14" and extra == "rapidocr"
-Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
+Requires-Dist: rapidocr<4.0.0,>=3.3; extra == "rapidocr"
+Requires-Dist: onnxruntime<2.0.0,>=1.7.0; python_version < "3.14" and extra == "rapidocr"
 Provides-Extra: asr
-Requires-Dist: mlx-whisper>=0.4.3; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "asr"
-Requires-Dist: openai-whisper>=20250625; extra == "asr"
+Requires-Dist: mlx-whisper>=0.4.3; (python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "asr"
+Requires-Dist: openai-whisper>=20250625; python_version < "3.14" and extra == "asr"
 Dynamic: license-file
 <p align="center">

{docling-2.58.0 → docling-2.59.0}/docling.egg-info/requires.txt RENAMED Viewed

@@ -16,7 +16,7 @@ beautifulsoup4<5.0.0,>=4.12.3
 pandas<3.0.0,>=2.1.4
 marko<3.0.0,>=2.1.2
 openpyxl<4.0.0,>=3.1.5
-lxml<6.0.0,>=4.0.0
+lxml<7.0.0,>=4.0.0
 pillow<12.0.0,>=10.0.0
 tqdm<5.0.0,>=4.65.0
 pluggy<2.0.0,>=1.0.0
@@ -32,9 +32,11 @@ rapidocr<4.0.0,>=3.3
 ocrmac<2.0.0,>=1.0.0
 [asr]
+[asr:python_version < "3.14"]
 openai-whisper>=20250625
-[asr:python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"]
+[asr:python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64"]
 mlx-whisper>=0.4.3
 [easyocr]
@@ -46,10 +48,10 @@ easyocr<2.0,>=1.7
 ocrmac<2.0.0,>=1.0.0
 [rapidocr]
-onnxruntime<2.0.0,>=1.7.0
+rapidocr<4.0.0,>=3.3
 [rapidocr:python_version < "3.14"]
-rapidocr<4.0.0,>=3.3
+onnxruntime<2.0.0,>=1.7.0
 [tesserocr]
 tesserocr<3.0.0,>=2.7.1
@@ -59,8 +61,8 @@ transformers<5.0.0,>=4.46.0
 accelerate<2.0.0,>=1.2.1
 qwen-vl-utils>=0.0.11
-[vlm:python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"]
+[vlm:python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64"]
 mlx-vlm<1.0.0,>=0.3.0
-[vlm:python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"]
+[vlm:python_version >= "3.10" and python_version < "3.14" and sys_platform == "linux" and platform_machine == "x86_64"]
 vllm<1.0.0,>=0.10.0

{docling-2.58.0 → docling-2.59.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "docling"
-version = "2.58.0"  # DO NOT EDIT, updated automatically
+version = "2.59.0"  # DO NOT EDIT, updated automatically
 description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
 license = "MIT"
 keywords = [
@@ -30,6 +30,7 @@ classifiers = [
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
   "Programming Language :: Python :: 3.13",
+  "Programming Language :: Python :: 3.14",
 ]
 readme = "README.md"
 authors = [
@@ -63,7 +64,7 @@ dependencies = [
   'pandas (>=2.1.4,<3.0.0)',
   'marko (>=2.1.2,<3.0.0)',
   'openpyxl (>=3.1.5,<4.0.0)',
-  'lxml (>=4.0.0,<6.0.0)',
+  'lxml (>=4.0.0,<7.0.0)',
   'pillow (>=10.0.0,<12.0.0)',
   'tqdm (>=4.65.0,<5.0.0)',
   'pluggy (>=1.0.0,<2.0.0)',
@@ -95,19 +96,19 @@ ocrmac = ['ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"']
 vlm = [
   'transformers (>=4.46.0,<5.0.0)',
   'accelerate (>=1.2.1,<2.0.0)',
-  'mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
-  'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"',
+  'mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64"',
+  'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "linux" and platform_machine == "x86_64"',
   "qwen-vl-utils>=0.0.11",
 ]
 rapidocr = [
-  'rapidocr (>=3.3,<4.0.0) ; python_version < "3.14"',
-  'onnxruntime (>=1.7.0,<2.0.0)',
+  'rapidocr (>=3.3,<4.0.0)',
+  'onnxruntime (>=1.7.0,<2.0.0) ; python_version < "3.14"',
   # 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
   # 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
 ]
 asr = [
-    'mlx-whisper>=0.4.3 ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
-    "openai-whisper>=20250625",
+    'mlx-whisper>=0.4.3 ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "darwin" and platform_machine == "arm64"',
+    'openai-whisper>=20250625 ; python_version < "3.14"',
 ]
 [dependency-groups]
@@ -146,10 +147,10 @@ examples = [
   "langchain-milvus~=0.1",
   "langchain-text-splitters~=0.2",
   "modelscope>=1.29.0",
-  "gliner>=0.2.21",
+  'gliner>=0.2.21 ; python_version < "3.14"',  # gliner depends on onnxruntime which is not available on py3.14
 ]
 constraints = [
-  'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
+  'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10" and python_version < "3.14"',
   'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
 ]

{docling-2.58.0 → docling-2.59.0}/tests/test_asr_pipeline.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import sys
 from pathlib import Path
 from unittest.mock import Mock, patch
@@ -10,6 +11,11 @@ from docling.datamodel.pipeline_options import AsrPipelineOptions
 from docling.document_converter import AudioFormatOption, DocumentConverter
 from docling.pipeline.asr_pipeline import AsrPipeline
+pytestmark = pytest.mark.skipif(
+    sys.version_info >= (3, 14),
+    reason="Python 3.14 is not yet supported by whisper dependencies.",
+)
 @pytest.fixture
 def test_audio_path():

docling 2.58.0__tar.gz → 2.59.0__tar.gz

Potentially problematic release.

docling 2.58.0tar.gz → 2.59.0tar.gz