PyPI - docling - Versions diffs - 2.52.0__py3-none-any.whl → 2.54.0__py3-none-any.whl - Mend

docling 2.52.0py3-none-any.whl → 2.54.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docling might be problematic. Click here for more details.

Files changed (18) hide show

docling/backend/msword_backend.py +176 -75
docling/backend/webvtt_backend.py +572 -0
docling/cli/main.py +15 -1
docling/cli/models.py +4 -0
docling/datamodel/base_models.py +23 -23
docling/datamodel/document.py +2 -0
docling/datamodel/pipeline_options.py +7 -3
docling/datamodel/vlm_model_specs.py +30 -0
docling/document_converter.py +4 -0
docling/models/rapid_ocr_model.py +40 -25
docling/models/table_structure_model.py +3 -3
docling/utils/model_downloader.py +22 -0
{docling-2.52.0.dist-info → docling-2.54.0.dist-info}/METADATA +7 -7
{docling-2.52.0.dist-info → docling-2.54.0.dist-info}/RECORD +18 -17
{docling-2.52.0.dist-info → docling-2.54.0.dist-info}/WHEEL +0 -0
{docling-2.52.0.dist-info → docling-2.54.0.dist-info}/entry_points.txt +0 -0
{docling-2.52.0.dist-info → docling-2.54.0.dist-info}/licenses/LICENSE +0 -0
{docling-2.52.0.dist-info → docling-2.54.0.dist-info}/top_level.txt +0 -0

docling/datamodel/base_models.py CHANGED Viewed

@@ -1,7 +1,6 @@
-import math
 from collections import defaultdict
 from enum import Enum
-from typing import TYPE_CHECKING, Dict, List, Optional, Type, Union
+from typing import TYPE_CHECKING, Optional, Type, Union
 import numpy as np
 from docling_core.types.doc import (
@@ -14,9 +13,7 @@ from docling_core.types.doc import (
 )
 from docling_core.types.doc.base import PydanticSerCtxKey, round_pydantic_float
 from docling_core.types.doc.page import SegmentedPdfPage, TextCell
-from docling_core.types.io import (
-    DocumentStream,
-)
+from docling_core.types.io import DocumentStream
 # DO NOT REMOVE; explicitly exposed from this location
 from PIL.Image import Image
@@ -71,6 +68,7 @@ class InputFormat(str, Enum):
     METS_GBS = "mets_gbs"
     JSON_DOCLING = "json_docling"
     AUDIO = "audio"
+    VTT = "vtt"
 class OutputFormat(str, Enum):
@@ -82,7 +80,7 @@ class OutputFormat(str, Enum):
     DOCTAGS = "doctags"
-FormatToExtensions: Dict[InputFormat, List[str]] = {
+FormatToExtensions: dict[InputFormat, list[str]] = {
     InputFormat.DOCX: ["docx", "dotx", "docm", "dotm"],
     InputFormat.PPTX: ["pptx", "potx", "ppsx", "pptm", "potm", "ppsm"],
     InputFormat.PDF: ["pdf"],
@@ -97,9 +95,10 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
     InputFormat.METS_GBS: ["tar.gz"],
     InputFormat.JSON_DOCLING: ["json"],
     InputFormat.AUDIO: ["wav", "mp3"],
+    InputFormat.VTT: ["vtt"],
 }
-FormatToMimeType: Dict[InputFormat, List[str]] = {
+FormatToMimeType: dict[InputFormat, list[str]] = {
     InputFormat.DOCX: [
         "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
         "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
@@ -130,6 +129,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
     InputFormat.METS_GBS: ["application/mets+xml"],
     InputFormat.JSON_DOCLING: ["application/json"],
     InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
+    InputFormat.VTT: ["text/vtt"],
 }
 MimeTypeToFormat: dict[str, list[InputFormat]] = {
@@ -162,8 +162,8 @@ class Cluster(BaseModel):
     label: DocItemLabel
     bbox: BoundingBox
     confidence: float = 1.0
-    cells: List[TextCell] = []
-    children: List["Cluster"] = []  # Add child cluster support
+    cells: list[TextCell] = []
+    children: list["Cluster"] = []  # Add child cluster support
     @field_serializer("confidence")
     def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
@@ -179,7 +179,7 @@ class BasePageElement(BaseModel):
 class LayoutPrediction(BaseModel):
-    clusters: List[Cluster] = []
+    clusters: list[Cluster] = []
 class VlmPredictionToken(BaseModel):
@@ -201,14 +201,14 @@ class ContainerElement(
 class Table(BasePageElement):
-    otsl_seq: List[str]
+    otsl_seq: list[str]
     num_rows: int = 0
     num_cols: int = 0
-    table_cells: List[TableCell]
+    table_cells: list[TableCell]
 class TableStructurePrediction(BaseModel):
-    table_map: Dict[int, Table] = {}
+    table_map: dict[int, Table] = {}
 class TextElement(BasePageElement):
@@ -216,7 +216,7 @@ class TextElement(BasePageElement):
 class FigureElement(BasePageElement):
-    annotations: List[PictureDataType] = []
+    annotations: list[PictureDataType] = []
     provenance: Optional[str] = None
     predicted_class: Optional[str] = None
     confidence: Optional[float] = None
@@ -234,12 +234,12 @@ class FigureElement(BasePageElement):
 class FigureClassificationPrediction(BaseModel):
     figure_count: int = 0
-    figure_map: Dict[int, FigureElement] = {}
+    figure_map: dict[int, FigureElement] = {}
 class EquationPrediction(BaseModel):
     equation_count: int = 0
-    equation_map: Dict[int, TextElement] = {}
+    equation_map: dict[int, TextElement] = {}
 class PagePredictions(BaseModel):
@@ -254,9 +254,9 @@ PageElement = Union[TextElement, Table, FigureElement, ContainerElement]
 class AssembledUnit(BaseModel):
-    elements: List[PageElement] = []
-    body: List[PageElement] = []
-    headers: List[PageElement] = []
+    elements: list[PageElement] = []
+    body: list[PageElement] = []
+    headers: list[PageElement] = []
 class ItemAndImageEnrichmentElement(BaseModel):
@@ -280,12 +280,12 @@ class Page(BaseModel):
         None  # Internal PDF backend. By default it is cleared during assembling.
     )
     _default_image_scale: float = 1.0  # Default image scale for external usage.
-    _image_cache: Dict[
+    _image_cache: dict[
         float, Image
     ] = {}  # Cache of images in different scales. By default it is cleared during assembling.
     @property
-    def cells(self) -> List[TextCell]:
+    def cells(self) -> list[TextCell]:
         """Return text cells as a read-only view of parsed_page.textline_cells."""
         if self.parsed_page is not None:
             return self.parsed_page.textline_cells
@@ -354,7 +354,7 @@ class OpenAiApiResponse(BaseModel):
     id: str
     model: Optional[str] = None  # returned by openai
-    choices: List[OpenAiResponseChoice]
+    choices: list[OpenAiResponseChoice]
     created: int
     usage: OpenAiResponseUsage
@@ -430,7 +430,7 @@ class PageConfidenceScores(BaseModel):
 class ConfidenceReport(PageConfidenceScores):
-    pages: Dict[int, PageConfidenceScores] = Field(
+    pages: dict[int, PageConfidenceScores] = Field(
         default_factory=lambda: defaultdict(PageConfidenceScores)
     )

docling/datamodel/document.py CHANGED Viewed

@@ -394,6 +394,8 @@ class _DocumentConversionInput(BaseModel):
             mime = FormatToMimeType[InputFormat.PPTX][0]
         elif ext in FormatToExtensions[InputFormat.XLSX]:
             mime = FormatToMimeType[InputFormat.XLSX][0]
+        elif ext in FormatToExtensions[InputFormat.VTT]:
+            mime = FormatToMimeType[InputFormat.VTT][0]
         return mime

docling/datamodel/pipeline_options.py CHANGED Viewed

@@ -12,7 +12,7 @@ from pydantic import (
 )
 from typing_extensions import deprecated
-from docling.datamodel import asr_model_specs
+from docling.datamodel import asr_model_specs, vlm_model_specs
 # Import the following for backwards compatibility
 from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
@@ -114,7 +114,11 @@ class RapidOcrOptions(OcrOptions):
     cls_model_path: Optional[str] = None  # same default as rapidocr
     rec_model_path: Optional[str] = None  # same default as rapidocr
     rec_keys_path: Optional[str] = None  # same default as rapidocr
-    rec_font_path: Optional[str] = None  # same default as rapidocr
+    rec_font_path: Optional[str] = None  # Deprecated, please use font_path instead
+    font_path: Optional[str] = None  # same default as rapidocr
+    # Dictionary to overwrite or pass-through additional parameters
+    rapidocr_params: Dict[str, Any] = Field(default_factory=dict)
     model_config = ConfigDict(
         extra="forbid",
@@ -286,7 +290,7 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
     )
     # If True, text from backend will be used instead of generated text
     vlm_options: Union[InlineVlmOptions, ApiVlmOptions] = (
-        smoldocling_vlm_conversion_options
+        vlm_model_specs.GRANITEDOCLING_TRANSFORMERS
     )

docling/datamodel/vlm_model_specs.py CHANGED Viewed

@@ -18,6 +18,35 @@ from docling.datamodel.pipeline_options_vlm_model import (
 _log = logging.getLogger(__name__)
+# Granite-Docling
+GRANITEDOCLING_TRANSFORMERS = InlineVlmOptions(
+    repo_id="ibm-granite/granite-docling-258M",
+    prompt="Convert this page to docling.",
+    response_format=ResponseFormat.DOCTAGS,
+    inference_framework=InferenceFramework.TRANSFORMERS,
+    transformers_model_type=TransformersModelType.AUTOMODEL_IMAGETEXTTOTEXT,
+    supported_devices=[
+        AcceleratorDevice.CPU,
+        AcceleratorDevice.CUDA,
+    ],
+    scale=2.0,
+    temperature=0.0,
+    max_new_tokens=8192,
+    stop_strings=["</doctag>", "<|end_of_text|>"],
+)
+GRANITEDOCLING_MLX = InlineVlmOptions(
+    repo_id="ibm-granite/granite-docling-258M-mlx",
+    prompt="Convert this page to docling.",
+    response_format=ResponseFormat.DOCTAGS,
+    inference_framework=InferenceFramework.MLX,
+    supported_devices=[AcceleratorDevice.MPS],
+    scale=2.0,
+    temperature=0.0,
+    max_new_tokens=8192,
+    stop_strings=["</doctag>", "<|end_of_text|>"],
+)
 # SmolDocling
 SMOLDOCLING_MLX = InlineVlmOptions(
     repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
@@ -272,3 +301,4 @@ class VlmModelType(str, Enum):
     GRANITE_VISION_VLLM = "granite_vision_vllm"
     GRANITE_VISION_OLLAMA = "granite_vision_ollama"
     GOT_OCR_2 = "got_ocr_2"
+    GRANITEDOCLING = "granite_docling"

docling/document_converter.py CHANGED Viewed

@@ -25,6 +25,7 @@ from docling.backend.msexcel_backend import MsExcelDocumentBackend
 from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
 from docling.backend.msword_backend import MsWordDocumentBackend
 from docling.backend.noop_backend import NoOpBackend
+from docling.backend.webvtt_backend import WebVTTDocumentBackend
 from docling.backend.xml.jats_backend import JatsDocumentBackend
 from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
 from docling.datamodel.base_models import (
@@ -170,6 +171,9 @@ def _get_default_option(format: InputFormat) -> FormatOption:
             pipeline_cls=SimplePipeline, backend=DoclingJSONBackend
         ),
         InputFormat.AUDIO: FormatOption(pipeline_cls=AsrPipeline, backend=NoOpBackend),
+        InputFormat.VTT: FormatOption(
+            pipeline_cls=SimplePipeline, backend=WebVTTDocumentBackend
+        ),
     }
     if (options := format_to_default_options.get(format)) is not None:
         return options

docling/models/rapid_ocr_model.py CHANGED Viewed

@@ -62,32 +62,44 @@ class RapidOcrModel(BaseOcrModel):
             }
             backend_enum = _ALIASES.get(self.options.backend, EngineType.ONNXRUNTIME)
+            params = {
+                # Global settings (these are still correct)
+                "Global.text_score": self.options.text_score,
+                "Global.font_path": self.options.font_path,
+                # "Global.verbose": self.options.print_verbose,
+                # Detection model settings
+                "Det.model_path": self.options.det_model_path,
+                "Det.use_cuda": use_cuda,
+                "Det.use_dml": use_dml,
+                "Det.intra_op_num_threads": intra_op_num_threads,
+                # Classification model settings
+                "Cls.model_path": self.options.cls_model_path,
+                "Cls.use_cuda": use_cuda,
+                "Cls.use_dml": use_dml,
+                "Cls.intra_op_num_threads": intra_op_num_threads,
+                # Recognition model settings
+                "Rec.model_path": self.options.rec_model_path,
+                "Rec.font_path": self.options.rec_font_path,
+                "Rec.keys_path": self.options.rec_keys_path,
+                "Rec.use_cuda": use_cuda,
+                "Rec.use_dml": use_dml,
+                "Rec.intra_op_num_threads": intra_op_num_threads,
+                "Det.engine_type": backend_enum,
+                "Cls.engine_type": backend_enum,
+                "Rec.engine_type": backend_enum,
+            }
+            if self.options.rec_font_path is not None:
+                _log.warning(
+                    "The 'rec_font_path' option for RapidOCR is deprecated. Please use 'font_path' instead."
+                )
+            user_params = self.options.rapidocr_params
+            if user_params:
+                _log.debug("Overwriting RapidOCR params with user-provided values.")
+                params.update(user_params)
             self.reader = RapidOCR(
-                params={
-                    # Global settings (these are still correct)
-                    "Global.text_score": self.options.text_score,
-                    # "Global.verbose": self.options.print_verbose,
-                    # Detection model settings
-                    "Det.model_path": self.options.det_model_path,
-                    "Det.use_cuda": use_cuda,
-                    "Det.use_dml": use_dml,
-                    "Det.intra_op_num_threads": intra_op_num_threads,
-                    # Classification model settings
-                    "Cls.model_path": self.options.cls_model_path,
-                    "Cls.use_cuda": use_cuda,
-                    "Cls.use_dml": use_dml,
-                    "Cls.intra_op_num_threads": intra_op_num_threads,
-                    # Recognition model settings
-                    "Rec.model_path": self.options.rec_model_path,
-                    "Rec.font_path": self.options.rec_font_path,
-                    "Rec.keys_path": self.options.rec_keys_path,
-                    "Rec.use_cuda": use_cuda,
-                    "Rec.use_dml": use_dml,
-                    "Rec.intra_op_num_threads": intra_op_num_threads,
-                    "Det.engine_type": backend_enum,
-                    "Cls.engine_type": backend_enum,
-                    "Rec.engine_type": backend_enum,
-                }
+                params=params,
             )
     def __call__(
@@ -120,6 +132,9 @@ class RapidOcrModel(BaseOcrModel):
                             use_cls=self.options.use_cls,
                             use_rec=self.options.use_rec,
                         )
+                        if result is None or result.boxes is None:
+                            _log.warning("RapidOCR returned empty result!")
+                            continue
                         result = list(
                             zip(result.boxes.tolist(), result.txts, result.scores)
                         )

docling/models/table_structure_model.py CHANGED Viewed

@@ -121,7 +121,7 @@ class TableStructureModel(BasePageModel):
         for table_element in tbl_list:
             x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple()
-            y0 *= scale_x
+            y0 *= scale_y
             y1 *= scale_y
             x0 *= scale_x
             x1 *= scale_x
@@ -132,7 +132,7 @@ class TableStructureModel(BasePageModel):
                 x0, y0, x1, y1 = cell.rect.to_bounding_box().as_tuple()
                 x0 *= scale_x
                 x1 *= scale_x
-                y0 *= scale_x
+                y0 *= scale_y
                 y1 *= scale_y
                 draw.rectangle([(x0, y0), (x1, y1)], outline="green")
@@ -142,7 +142,7 @@ class TableStructureModel(BasePageModel):
                     x0, y0, x1, y1 = tc.bbox.as_tuple()
                     x0 *= scale_x
                     x1 *= scale_x
-                    y0 *= scale_x
+                    y0 *= scale_y
                     y1 *= scale_y
                     if tc.column_header:

docling/utils/model_downloader.py CHANGED Viewed

@@ -10,6 +10,8 @@ from docling.datamodel.pipeline_options import (
 )
 from docling.datamodel.settings import settings
 from docling.datamodel.vlm_model_specs import (
+    GRANITEDOCLING_MLX,
+    GRANITEDOCLING_TRANSFORMERS,
     SMOLDOCLING_MLX,
     SMOLDOCLING_TRANSFORMERS,
 )
@@ -34,6 +36,8 @@ def download_models(
     with_code_formula: bool = True,
     with_picture_classifier: bool = True,
     with_smolvlm: bool = False,
+    with_granitedocling: bool = False,
+    with_granitedocling_mlx: bool = False,
     with_smoldocling: bool = False,
     with_smoldocling_mlx: bool = False,
     with_granite_vision: bool = False,
@@ -86,6 +90,24 @@ def download_models(
             progress=progress,
         )
+    if with_granitedocling:
+        _log.info("Downloading GraniteDocling model...")
+        download_hf_model(
+            repo_id=GRANITEDOCLING_TRANSFORMERS.repo_id,
+            local_dir=output_dir / GRANITEDOCLING_TRANSFORMERS.repo_cache_folder,
+            force=force,
+            progress=progress,
+        )
+    if with_granitedocling_mlx:
+        _log.info("Downloading GraniteDocling MLX model...")
+        download_hf_model(
+            repo_id=GRANITEDOCLING_MLX.repo_id,
+            local_dir=output_dir / GRANITEDOCLING_MLX.repo_cache_folder,
+            force=force,
+            progress=progress,
+        )
     if with_smoldocling:
         _log.info("Downloading SmolDocling model...")
         download_hf_model(

{docling-2.52.0.dist-info → docling-2.54.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docling
-Version: 2.52.0
+Version: 2.54.0
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
 License-Expression: MIT
@@ -26,7 +26,7 @@ Requires-Python: <4.0,>=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: pydantic<3.0.0,>=2.0.0
-Requires-Dist: docling-core[chunking]<3.0.0,>=2.48.0
+Requires-Dist: docling-core[chunking]<3.0.0,>=2.48.2
 Requires-Dist: docling-parse<5.0.0,>=4.4.0
 Requires-Dist: docling-ibm-models<4,>=3.9.1
 Requires-Dist: filetype<2.0.0,>=1.2.0
@@ -101,14 +101,14 @@ Docling simplifies document processing, parsing diverse formats — including ad
 ## Features
-* 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, images (PNG, TIFF, JPEG, ...), and more
+* 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, VTT, images (PNG, TIFF, JPEG, ...), and more
 * 📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more
 * 🧬 Unified, expressive [DoclingDocument][docling_document] representation format
 * ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, [DocTags](https://arxiv.org/abs/2503.11576) and lossless JSON
 * 🔒 Local execution capabilities for sensitive data and air-gapped environments
 * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
 * 🔍 Extensive OCR support for scanned PDFs and images
-* 👓 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
+* 👓 Support of several Visual Language Models ([GraniteDocling](https://huggingface.co/ibm-granite/granite-docling-258M))
 * 🎙️ Audio support with Automatic Speech Recognition (ASR) models
 * 🔌 Connect to any agent using the [MCP server](https://docling-project.github.io/docling/usage/mcp/)
 * 💻 Simple and convenient CLI
@@ -117,13 +117,13 @@ Docling simplifies document processing, parsing diverse formats — including ad
 * 📤 Structured [information extraction][extraction] \[🧪 beta\]
 * 📑 New layout model (**Heron**) by default, for faster PDF parsing
 * 🔌 [MCP server](https://docling-project.github.io/docling/usage/mcp/) for agentic applications
+* 💬 Parsing of Web Video Text Tracks (WebVTT) files
 ### Coming soon
 * 📝 Metadata extraction, including title, authors, references & language
 * 📝 Chart understanding (Barchart, Piechart, LinePlot, etc)
 * 📝 Complex chemistry understanding (Molecular structures)
-* 📝 Parsing of Web Video Text Tracks (WebVTT) files
 ## Installation
@@ -160,9 +160,9 @@ Docling has a built-in CLI to run conversions.
 docling https://arxiv.org/pdf/2206.01062
 ```
-You can also use 🥚[SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview) and other VLMs via Docling CLI:
+You can also use 🥚[GraniteDocling](https://huggingface.co/ibm-granite/granite-docling-258M) and other VLMs via Docling CLI:
 ```bash
-docling --pipeline vlm --vlm-model smoldocling https://arxiv.org/pdf/2206.01062
+docling --pipeline vlm --vlm-model granite_docling https://arxiv.org/pdf/2206.01062
 ```
 This will use MLX acceleration on supported Apple Silicon hardware.

{docling-2.52.0.dist-info → docling-2.54.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/document_converter.py,sha256=CKMlobhTt8Y5yZ_tQOnPAP7_otBiddQ_klRGT5Bgwyo,15827
+docling/document_converter.py,sha256=gPyBrNegMgeBGxN7iebrjqEDm7zQQOmFNm8hVi-pFEQ,16013
 docling/document_extractor.py,sha256=-RbQRvLWLXF15HYqBbV_lJhh08Zl487UEQKhP-_FR8k,11969
 docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
 docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
@@ -15,10 +15,11 @@ docling/backend/md_backend.py,sha256=qCI7SD9hnWWGrkG_drpzQv2Z7DVBG4Tsq3hhTsYV790
 docling/backend/mets_gbs_backend.py,sha256=EA8sY6tbmGiysKGYPPZiNlK-i7Adn8bLTo-7Ym15hTU,12774
 docling/backend/msexcel_backend.py,sha256=5JRbPwOjR1r45AMeIts1rj6InbOgLBf_CtAhvNPVmsQ,19157
 docling/backend/mspowerpoint_backend.py,sha256=wJgB2JStEPfD7MPpWQlpPN7bffPxaHFUnKD4wj8SLxU,15114
-docling/backend/msword_backend.py,sha256=fKeAMGGR5ABimedo_ofCQAybzdqmqWA3A3mpLl7X6qY,49129
+docling/backend/msword_backend.py,sha256=kQI9hrx_lvHn__KdxW8MbvB78snoVzA_m4jXx6f_LJ8,54419
 docling/backend/noop_backend.py,sha256=EOPbD86FzZPX-K_DpNrJh0_lC0bZz--4DpG-OagDNGY,1688
 docling/backend/pdf_backend.py,sha256=Wcd1NSrAMjXK8VicTki5p-j-JLofklt07eF0kIG17_0,3361
 docling/backend/pypdfium2_backend.py,sha256=AYhWs9S8W_TkAK0-OkRmUNf4HUZl26FP7-XYjwU5zDk,14209
+docling/backend/webvtt_backend.py,sha256=9xPcfWVLuqhEAFrkv8aU36qHnSgjeINZAXT_C9C6XJA,19165
 docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/backend/docx/latex/latex_dict.py,sha256=tFJp4ScT_AkY2ON7nLEa560p601Jq2glcZvMKxxjn7w,6593
@@ -30,21 +31,21 @@ docling/backend/xml/jats_backend.py,sha256=LPj33EFdi2MRCakkLWrRLlUAc-B-949f8zp5g
 docling/backend/xml/uspto_backend.py,sha256=nyAMr5ht7dclxkVDwsKNeiOhLQrUtRLS8JdscB2AVJg,70924
 docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
 docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/cli/main.py,sha256=K4m7dtnLUM2gqU8n_Mntpc_ODrwWtrjBPTUZakQ8erg,32111
-docling/cli/models.py,sha256=5C3CZz3HZXoCrBl92Is62KMCtUqsZK-oygj1hqzJ8vo,6008
+docling/cli/main.py,sha256=J_hXHclzT-uNu-cuKNdlc3vwCnyDRxXrJ5L2LJofzeo,32729
+docling/cli/models.py,sha256=rw_2JfeJ-k_iOLpz3JfgL1QbJY__W9nE23nHdov6VfU,6252
 docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
 docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/datamodel/accelerator_options.py,sha256=wv6dOFTVAwr9onkE-0pfUqX_fDb6gX53iPPE6o8nKjI,2511
 docling/datamodel/asr_model_specs.py,sha256=Wg7z3zm_wXIWu122iPVy0RMECsA_JCFHrlFF-xxHoVQ,2187
-docling/datamodel/base_models.py,sha256=vOt895z0GsFirHkkI3hM23e9oyUuz9RXfcGFtoINLtw,12334
-docling/datamodel/document.py,sha256=ElY7G6FYJ6Bayyw433_tbnxyE47fnQRoBG_mygvOBrA,17370
+docling/datamodel/base_models.py,sha256=CQ6eThPzVeVD2Gq7BNz9Q5RDLwhe4NgMzk7tdLtk1c8,12382
+docling/datamodel/document.py,sha256=HyO3kdJcXIJ3wL95sPoL3zvsO4Rww3-qHH6IkL4I0q4,17483
 docling/datamodel/extraction.py,sha256=7dgvtK5SuvgfB8LHAwS1FwrW1kcMQJuJG0ol8uAQgoQ,1323
 docling/datamodel/layout_model_specs.py,sha256=GSkJ-Z_0PVgwWGi7C7TsxbzRjlrWS9ZrHJjHumv-Z5U,2339
-docling/datamodel/pipeline_options.py,sha256=N9g-3FA4hFU8A0uGvPmcy1emBBT4JH6u7CUzl3D-Ta0,11049
+docling/datamodel/pipeline_options.py,sha256=28opZ3woXA8IKaG2-BHM-lmmi-gyuScCMHGxhlxGOsk,11290
 docling/datamodel/pipeline_options_asr_model.py,sha256=7X068xl-qpbyPxC7-TwX7Q6tLyZXGT5h1osZ_xLNLM0,1454
 docling/datamodel/pipeline_options_vlm_model.py,sha256=AcqqThSW74hwQ6x7pazzm57LnJiUqB7gQi5wFayGlbk,2628
 docling/datamodel/settings.py,sha256=c0MTw6pO5be_BKxHKYl4SaBJAw_qL-aapxp-g5HHj1A,2084
-docling/datamodel/vlm_model_specs.py,sha256=8D-bF95EoaD-Wd29lVX094HPJT1gYN393aFmzv7RipQ,8713
+docling/datamodel/vlm_model_specs.py,sha256=UMXiTzWCXcx2BtF5slYfWhjRXAx0s1oiAvE-vCzrATo,9686
 docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/models/api_vlm_model.py,sha256=-zisU32pgDRbychyG6-neB0qweNbPaYnLXwiGT7SEdI,2859
 docling/models/base_model.py,sha256=beMGyrpl-yYX3YnLzQkLfxMLxwmDWnbcFhkjbUlWJSU,7146
@@ -59,9 +60,9 @@ docling/models/page_preprocessing_model.py,sha256=EmusNexws5ZmR93js_saVU0BedqZ_H
 docling/models/picture_description_api_model.py,sha256=o3EkV5aHW_6WzE_fdj_VRnNCrS_btclO_ZCLAUqrfl0,2377
 docling/models/picture_description_base_model.py,sha256=kLthLhdlgwhootQ4_xhhcAk6A-vso5-qcsFJ3TcYfO0,2991
 docling/models/picture_description_vlm_model.py,sha256=Uja_BQSk7F-U1J2hm4yeLguirUzKYv1K8zRyw1IYomY,4150
-docling/models/rapid_ocr_model.py,sha256=7yZC7I1qoC9xC8xJIjTk2c8VFm89RfB6Vr7IDOnr5gs,7102
+docling/models/rapid_ocr_model.py,sha256=anUVUwaj9Wubgu4FnHdYMuOVkQP_hJiLY1qRToelBoc,7700
 docling/models/readingorder_model.py,sha256=bZoXHaSwUsa8niSmJrbCuy784ixCeBXT-RQBUfgHJ4A,14925
-docling/models/table_structure_model.py,sha256=7vO8LisdoqCTsY8X8lsk9d-oD2hVjUtdaWlkMTQxEg0,12518
+docling/models/table_structure_model.py,sha256=7g_mFf1YzfF8PXQfefNu6XYZu7TzJAn86zKb6IEUdCg,12518
 docling/models/tesseract_ocr_cli_model.py,sha256=I3Gn28Y-LD8OfvyCElN9fLiNgpo2sT0uMkVt258253s,12881
 docling/models/tesseract_ocr_model.py,sha256=GdI5Cjfi87qcehVbM3wdKRvKkl_F9A4bwTUbjXZCJYA,10745
 docling/models/factories/__init__.py,sha256=x_EM5dDg_A3HBcBYzOoqwmA2AFLtJ1IzYDPX-R1A-Sg,868
@@ -93,15 +94,15 @@ docling/utils/export.py,sha256=VwVUnYDk3mhGmISDbVm306fwpGNnoojouStBD4UajXI,4673
 docling/utils/glm_utils.py,sha256=TKOWQqWAHsX_w4fvoAA7_2xCi_urhnp1DsmjY8_sk5w,12274
 docling/utils/layout_postprocessor.py,sha256=sE9UR3Nv4iOk26uoIsN3bFioE7ScfAjj0orDBDneLXg,25166
 docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
-docling/utils/model_downloader.py,sha256=lAIyevIC6dyv1TS0ElRSAGNylB5n_V8pWs1PhxH8wAQ,4104
+docling/utils/model_downloader.py,sha256=kFIxr5KUQbisQH0h8yP9GZMqsRJD3Xo1uOIiLiB1T78,4869
 docling/utils/ocr_utils.py,sha256=nmresYyfin0raanpQc_GGeU3WoLsfExf6SEXNIQ7Djg,2325
 docling/utils/orientation.py,sha256=jTyLxyT31FlOodZoBMlADHNQK2lAWKYVs5z7pXd_6Cg,1842
 docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
 docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
 docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
-docling-2.52.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
-docling-2.52.0.dist-info/METADATA,sha256=EhUePtqwKQJTgkU9pCtvpWT7wtU-84KXkc48XExkRSQ,11233
-docling-2.52.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-docling-2.52.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
-docling-2.52.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
-docling-2.52.0.dist-info/RECORD,,
+docling-2.54.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
+docling-2.54.0.dist-info/METADATA,sha256=_GsdUYyPCv8XKeLeSO9Y0euAH8Eanr5i_y5kLvDEb1g,11252
+docling-2.54.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+docling-2.54.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
+docling-2.54.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
+docling-2.54.0.dist-info/RECORD,,

{docling-2.52.0.dist-info → docling-2.54.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{docling-2.52.0.dist-info → docling-2.54.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{docling-2.52.0.dist-info → docling-2.54.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{docling-2.52.0.dist-info → docling-2.54.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

docling 2.52.0__py3-none-any.whl → 2.54.0__py3-none-any.whl

Potentially problematic release.

docling 2.52.0py3-none-any.whl → 2.54.0py3-none-any.whl