PyPI - docling - Versions diffs - 2.37.0__py3-none-any.whl → 2.38.1__py3-none-any.whl - Mend

docling 2.37.0py3-none-any.whl → 2.38.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

docling/backend/md_backend.py +185 -80
docling/backend/msword_backend.py +76 -63
docling/backend/noop_backend.py +51 -0
docling/cli/main.py +82 -14
docling/datamodel/asr_model_specs.py +92 -0
docling/datamodel/base_models.py +12 -2
docling/datamodel/document.py +3 -1
docling/datamodel/pipeline_options.py +13 -2
docling/datamodel/pipeline_options_asr_model.py +57 -0
docling/datamodel/pipeline_options_vlm_model.py +2 -3
docling/document_converter.py +8 -0
docling/models/api_vlm_model.py +3 -1
docling/models/base_model.py +1 -1
docling/models/readingorder_model.py +1 -1
docling/models/vlm_models_inline/hf_transformers_model.py +3 -1
docling/models/vlm_models_inline/mlx_model.py +3 -1
docling/pipeline/asr_pipeline.py +253 -0
docling/pipeline/base_pipeline.py +11 -0
{docling-2.37.0.dist-info → docling-2.38.1.dist-info}/METADATA +7 -4
{docling-2.37.0.dist-info → docling-2.38.1.dist-info}/RECORD +24 -20
{docling-2.37.0.dist-info → docling-2.38.1.dist-info}/WHEEL +0 -0
{docling-2.37.0.dist-info → docling-2.38.1.dist-info}/entry_points.txt +0 -0
{docling-2.37.0.dist-info → docling-2.38.1.dist-info}/licenses/LICENSE +0 -0
{docling-2.37.0.dist-info → docling-2.38.1.dist-info}/top_level.txt +0 -0

docling/pipeline/asr_pipeline.py ADDED Viewed

@@ -0,0 +1,253 @@
+import logging
+import os
+import re
+from io import BytesIO
+from pathlib import Path
+from typing import List, Optional, Union, cast
+from docling_core.types.doc import DoclingDocument, DocumentOrigin
+# import whisper  # type: ignore
+# import librosa
+# import numpy as np
+# import soundfile as sf  # type: ignore
+from docling_core.types.doc.labels import DocItemLabel
+from pydantic import BaseModel, Field, validator
+from docling.backend.abstract_backend import AbstractDocumentBackend
+from docling.backend.noop_backend import NoOpBackend
+# from pydub import AudioSegment  # type: ignore
+# from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
+from docling.datamodel.accelerator_options import (
+    AcceleratorOptions,
+)
+from docling.datamodel.base_models import (
+    ConversionStatus,
+    FormatToMimeType,
+)
+from docling.datamodel.document import ConversionResult, InputDocument
+from docling.datamodel.pipeline_options import (
+    AsrPipelineOptions,
+)
+from docling.datamodel.pipeline_options_asr_model import (
+    InlineAsrNativeWhisperOptions,
+    # AsrResponseFormat,
+    InlineAsrOptions,
+)
+from docling.datamodel.pipeline_options_vlm_model import (
+    InferenceFramework,
+)
+from docling.datamodel.settings import settings
+from docling.pipeline.base_pipeline import BasePipeline
+from docling.utils.accelerator_utils import decide_device
+from docling.utils.profiling import ProfilingScope, TimeRecorder
+_log = logging.getLogger(__name__)
+class _ConversationWord(BaseModel):
+    text: str
+    start_time: Optional[float] = Field(
+        None, description="Start time in seconds from video start"
+    )
+    end_time: Optional[float] = Field(
+        None, ge=0, description="End time in seconds from video start"
+    )
+class _ConversationItem(BaseModel):
+    text: str
+    start_time: Optional[float] = Field(
+        None, description="Start time in seconds from video start"
+    )
+    end_time: Optional[float] = Field(
+        None, ge=0, description="End time in seconds from video start"
+    )
+    speaker_id: Optional[int] = Field(None, description="Numeric speaker identifier")
+    speaker: Optional[str] = Field(
+        None, description="Speaker name, defaults to speaker-{speaker_id}"
+    )
+    words: Optional[list[_ConversationWord]] = Field(
+        None, description="Individual words with time-stamps"
+    )
+    def __lt__(self, other):
+        if not isinstance(other, _ConversationItem):
+            return NotImplemented
+        return self.start_time < other.start_time
+    def __eq__(self, other):
+        if not isinstance(other, _ConversationItem):
+            return NotImplemented
+        return self.start_time == other.start_time
+    def to_string(self) -> str:
+        """Format the conversation entry as a string"""
+        result = ""
+        if (self.start_time is not None) and (self.end_time is not None):
+            result += f"[time: {self.start_time}-{self.end_time}] "
+        if self.speaker is not None:
+            result += f"[speaker:{self.speaker}] "
+        result += self.text
+        return result
+class _NativeWhisperModel:
+    def __init__(
+        self,
+        enabled: bool,
+        artifacts_path: Optional[Path],
+        accelerator_options: AcceleratorOptions,
+        asr_options: InlineAsrNativeWhisperOptions,
+    ):
+        """
+        Transcriber using native Whisper.
+        """
+        self.enabled = enabled
+        _log.info(f"artifacts-path: {artifacts_path}")
+        _log.info(f"accelerator_options: {accelerator_options}")
+        if self.enabled:
+            try:
+                import whisper  # type: ignore
+            except ImportError:
+                raise ImportError(
+                    "whisper is not installed. Please install it via `pip install openai-whisper` or do `uv sync --extra asr`."
+                )
+            self.asr_options = asr_options
+            self.max_tokens = asr_options.max_new_tokens
+            self.temperature = asr_options.temperature
+            self.device = decide_device(
+                accelerator_options.device,
+                supported_devices=asr_options.supported_devices,
+            )
+            _log.info(f"Available device for Whisper: {self.device}")
+            self.model_name = asr_options.repo_id
+            _log.info(f"loading _NativeWhisperModel({self.model_name})")
+            if artifacts_path is not None:
+                _log.info(f"loading {self.model_name} from {artifacts_path}")
+                self.model = whisper.load_model(
+                    name=self.model_name,
+                    device=self.device,
+                    download_root=str(artifacts_path),
+                )
+            else:
+                self.model = whisper.load_model(
+                    name=self.model_name, device=self.device
+                )
+            self.verbose = asr_options.verbose
+            self.timestamps = asr_options.timestamps
+            self.word_timestamps = asr_options.word_timestamps
+    def run(self, conv_res: ConversionResult) -> ConversionResult:
+        audio_path: Path = Path(conv_res.input.file).resolve()
+        try:
+            conversation = self.transcribe(audio_path)
+            # Ensure we have a proper DoclingDocument
+            origin = DocumentOrigin(
+                filename=conv_res.input.file.name or "audio.wav",
+                mimetype="audio/x-wav",
+                binary_hash=conv_res.input.document_hash,
+            )
+            conv_res.document = DoclingDocument(
+                name=conv_res.input.file.stem or "audio.wav", origin=origin
+            )
+            for citem in conversation:
+                conv_res.document.add_text(
+                    label=DocItemLabel.TEXT, text=citem.to_string()
+                )
+            conv_res.status = ConversionStatus.SUCCESS
+            return conv_res
+        except Exception as exc:
+            _log.error(f"Audio tranciption has an error: {exc}")
+        conv_res.status = ConversionStatus.FAILURE
+        return conv_res
+    def transcribe(self, fpath: Path) -> list[_ConversationItem]:
+        result = self.model.transcribe(
+            str(fpath), verbose=self.verbose, word_timestamps=self.word_timestamps
+        )
+        convo: list[_ConversationItem] = []
+        for _ in result["segments"]:
+            item = _ConversationItem(
+                start_time=_["start"], end_time=_["end"], text=_["text"], words=[]
+            )
+            if "words" in _ and self.word_timestamps:
+                item.words = []
+                for __ in _["words"]:
+                    item.words.append(
+                        _ConversationWord(
+                            start_time=__["start"],
+                            end_time=__["end"],
+                            text=__["word"],
+                        )
+                    )
+            convo.append(item)
+        return convo
+class AsrPipeline(BasePipeline):
+    def __init__(self, pipeline_options: AsrPipelineOptions):
+        super().__init__(pipeline_options)
+        self.keep_backend = True
+        self.pipeline_options: AsrPipelineOptions = pipeline_options
+        artifacts_path: Optional[Path] = None
+        if pipeline_options.artifacts_path is not None:
+            artifacts_path = Path(pipeline_options.artifacts_path).expanduser()
+        elif settings.artifacts_path is not None:
+            artifacts_path = Path(settings.artifacts_path).expanduser()
+        if artifacts_path is not None and not artifacts_path.is_dir():
+            raise RuntimeError(
+                f"The value of {artifacts_path=} is not valid. "
+                "When defined, it must point to a folder containing all models required by the pipeline."
+            )
+        if isinstance(self.pipeline_options.asr_options, InlineAsrNativeWhisperOptions):
+            asr_options: InlineAsrNativeWhisperOptions = (
+                self.pipeline_options.asr_options
+            )
+            self._model = _NativeWhisperModel(
+                enabled=True,  # must be always enabled for this pipeline to make sense.
+                artifacts_path=artifacts_path,
+                accelerator_options=pipeline_options.accelerator_options,
+                asr_options=asr_options,
+            )
+        else:
+            _log.error(f"No model support for {self.pipeline_options.asr_options}")
+    def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
+        status = ConversionStatus.SUCCESS
+        return status
+    @classmethod
+    def get_default_options(cls) -> AsrPipelineOptions:
+        return AsrPipelineOptions()
+    def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
+        _log.info(f"start _build_document in AsrPipeline: {conv_res.input.file}")
+        with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
+            self._model.run(conv_res=conv_res)
+        return conv_res
+    @classmethod
+    def is_backend_supported(cls, backend: AbstractDocumentBackend):
+        return isinstance(backend, NoOpBackend)

docling/pipeline/base_pipeline.py CHANGED Viewed

@@ -193,6 +193,17 @@ class PaginatedPipeline(BasePipeline):  # TODO this is a bad name.
                 )
                 raise e
+            # Filter out uninitialized pages (those with size=None) that may remain
+            # after timeout or processing failures to prevent assertion errors downstream
+            initial_page_count = len(conv_res.pages)
+            conv_res.pages = [page for page in conv_res.pages if page.size is not None]
+            if len(conv_res.pages) < initial_page_count:
+                _log.info(
+                    f"Filtered out {initial_page_count - len(conv_res.pages)} uninitialized pages "
+                    f"due to timeout or processing failures"
+                )
         return conv_res
     def _unload(self, conv_res: ConversionResult) -> ConversionResult:

{docling-2.37.0.dist-info → docling-2.38.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docling
-Version: 2.37.0
+Version: 2.38.1
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
 License-Expression: MIT
@@ -61,6 +61,8 @@ Requires-Dist: mlx-vlm>=0.1.22; (python_version >= "3.10" and sys_platform == "d
 Provides-Extra: rapidocr
 Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
 Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
+Provides-Extra: asr
+Requires-Dist: openai-whisper>=20240930; extra == "asr"
 Dynamic: license-file
 <p align="center">
@@ -93,14 +95,15 @@ Docling simplifies document processing, parsing diverse formats — including ad
 ## Features
-* 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, XLSX, HTML, images, and more
+* 🗂️  Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, images (PNG, TIFF, JPEG, ...), and more
 * 📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more
 * 🧬 Unified, expressive [DoclingDocument][docling_document] representation format
-* ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, and lossless JSON
+* ↪️  Various [export formats][supported_formats] and options, including Markdown, HTML, [DocTags](https://arxiv.org/abs/2503.11576) and lossless JSON
 * 🔒 Local execution capabilities for sensitive data and air-gapped environments
 * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
 * 🔍 Extensive OCR support for scanned PDFs and images
-* 🥚 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
+* 👓 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
+* 🎙️  Support for Audio with Automatic Speech Recognition (ASR) models
 * 💻 Simple and convenient CLI
 ### Coming soon

{docling-2.37.0.dist-info → docling-2.38.1.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/document_converter.py,sha256=bnUA9k1LCuCfNwCsneGQiGCvFdnX8W-vbpnu6U_fuuI,14003
+docling/document_converter.py,sha256=3jWywP_TLy-1PMvjJBUlnTM9FNzpBLRCHYA1RKFvGR4,14333
 docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
 docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -10,10 +10,11 @@ docling/backend/docling_parse_backend.py,sha256=9rUo1vPxX6QLzGqF-2B2iEYglZg6YQ3U
 docling/backend/docling_parse_v2_backend.py,sha256=3ckTfke8IICjaImlIzc3TRhG7KDuxDDba0AuCEcjA-M,9500
 docling/backend/docling_parse_v4_backend.py,sha256=7tQvpCwpYoq98PNszDkrXaFhy5eWmQqMP4RjWWPLPgw,6197
 docling/backend/html_backend.py,sha256=3K-l5SUAAyqISNEb7nPst_I51xzYOVOkgmwXh3lv9sw,21063
-docling/backend/md_backend.py,sha256=JkY1qTvQFXjKSZGfD-83d-fZelorUG_l6mpJdYGqvX8,17210
+docling/backend/md_backend.py,sha256=kSQ7dn_IrAmt53kL_0Z5LnpE2fWif9RkBAGtqzgfQaM,20514
 docling/backend/msexcel_backend.py,sha256=3j0WQfqDpgPXdPMCguefdv7arcNVDedPD6gl54cmLn8,18110
 docling/backend/mspowerpoint_backend.py,sha256=0lsb8ZeQFxbDt7jZpSQyk5wYHYa3SP2T2y2dMI-o30o,15216
-docling/backend/msword_backend.py,sha256=GCwUnebgRgvHlF6z1RP8RUb1nhHheJ5bpiVeTfNGsBU,44694
+docling/backend/msword_backend.py,sha256=xj009k1s7uzmNx3yGZZelsSgxa6ylaJ1yYHxYfHVLOo,44975
+docling/backend/noop_backend.py,sha256=EOPbD86FzZPX-K_DpNrJh0_lC0bZz--4DpG-OagDNGY,1688
 docling/backend/pdf_backend.py,sha256=KE9TMuFO5WX-o5A_DAd4tEaLi4HMZ4XjKdpllItVkWM,2238
 docling/backend/pypdfium2_backend.py,sha256=8dVniLHgiTdJuDbYr66kPp6Ccv5ZDlqDMEbA2xIfS7U,13370
 docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -27,20 +28,22 @@ docling/backend/xml/jats_backend.py,sha256=ghGi9bHjx3BvaOtmzLw86-wZy4UxpQPOPQL4e
 docling/backend/xml/uspto_backend.py,sha256=nyAMr5ht7dclxkVDwsKNeiOhLQrUtRLS8JdscB2AVJg,70924
 docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
 docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/cli/main.py,sha256=fDGjepShl6KO_BdA6qUNyNBoCjqZUKRnmmkzesGtvVU,27202
+docling/cli/main.py,sha256=D2gEoArnQ2yQ9BesH9CkxZbYQyhZRGgjjNWYqmRRUtU,29617
 docling/cli/models.py,sha256=9yLGp6QRJGpR86U3SjmWAXDt3MvBaJLLY4xDVdsu3O8,4160
 docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
 docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/datamodel/accelerator_options.py,sha256=wv6dOFTVAwr9onkE-0pfUqX_fDb6gX53iPPE6o8nKjI,2511
-docling/datamodel/base_models.py,sha256=iHkzAgWXPyvYwhqrcsgHTY1YoKoQZQO3eNvIjxagRp0,10818
-docling/datamodel/document.py,sha256=vPwiVU5zWCKbVYMq-TSmb7LTjijrqJq0FyAgDBa0XGA,16154
-docling/datamodel/pipeline_options.py,sha256=NCldcrDjmV_N1PUtK4FfpxVQaKj4f0IdSIbXf5nZYVY,9155
-docling/datamodel/pipeline_options_vlm_model.py,sha256=kivUljsC97CQGb7VEJ5nqC-d26q9Kj_2uRdInH1YTX4,2052
+docling/datamodel/asr_model_specs.py,sha256=L7ETXsUKVbPsVcPLhEIMxQjd4UzMGZBVsy74CLsZBkU,2181
+docling/datamodel/base_models.py,sha256=67o1ptOTT8tW7i-g6gM2JKEX_1CDbmKEMQ_B9ZYM2z0,11156
+docling/datamodel/document.py,sha256=CA_dgt4V_phze5HXpfgfKNBKd1cPC1o3WE_IENX63EM,16252
+docling/datamodel/pipeline_options.py,sha256=7mKv1IThXYpu3osggp_Y2h7E5C8nbxJLQXS7JJPMvYQ,9479
+docling/datamodel/pipeline_options_asr_model.py,sha256=7X068xl-qpbyPxC7-TwX7Q6tLyZXGT5h1osZ_xLNLM0,1454
+docling/datamodel/pipeline_options_vlm_model.py,sha256=rtDMVtKFZbgQD269w8FvHMXEhdRBrsA4rVYk6A-M-b4,2063
 docling/datamodel/settings.py,sha256=ajMz7Ao2m0ZGYkfArqTDDbiF89O408mtgeh06PUi0MA,1900
 docling/datamodel/vlm_model_specs.py,sha256=--jZexGeu-s_lWp7y_WwWEf6CD1J4XqADrS1-OY_pWM,4737
 docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/models/api_vlm_model.py,sha256=w3P1wOsr3JvZsawbK1Z4uwnD5ehUMbcKGkyhcX83Okc,2738
-docling/models/base_model.py,sha256=Zx_nByGYkubTvvYiQxwiB6P8lc7wOD4ZTC2QIw6vCEg,2950
+docling/models/api_vlm_model.py,sha256=GDDJGAia4SJjK7JFxsZy5oEU-D8yQo8Kb3NvvPbTvT0,2820
+docling/models/base_model.py,sha256=NNjIapqCruAEAWR-CCdsNgXc2QkwiPYAcaQ_ZYe1W28,2978
 docling/models/base_ocr_model.py,sha256=HtrefTq9Zy4UnUInMchPv0tbobiA7CQU5VUauKJD7IU,8006
 docling/models/code_formula_model.py,sha256=5uWh-eI-Ejmv3DujKJoKKgJBuvPLokt7AJ_ybt8VHEw,11373
 docling/models/document_picture_classifier.py,sha256=fkJLV7pMy3v6iNwOzVb6zdBU1dGtBM1ARHLIRPfoAG4,6124
@@ -53,7 +56,7 @@ docling/models/picture_description_api_model.py,sha256=o3EkV5aHW_6WzE_fdj_VRnNCr
 docling/models/picture_description_base_model.py,sha256=kLthLhdlgwhootQ4_xhhcAk6A-vso5-qcsFJ3TcYfO0,2991
 docling/models/picture_description_vlm_model.py,sha256=7LeCx9ZdPxsmWJ468OtxCdAkH48A1HD0iwH9cs_7-1Q,3800
 docling/models/rapid_ocr_model.py,sha256=AMdc66s_iWO4p6nQ0LNjQMUYVxrDSxMyLNPpjPYt6N8,5916
-docling/models/readingorder_model.py,sha256=46ZYGJrRIp2ueJAQPmqXHjEw-5LcNtVUECSd4yIcHnM,14582
+docling/models/readingorder_model.py,sha256=QHb5fyiqmxU8lg4W5IzdukqHPh6V7rNw_57O4-z-Az4,14615
 docling/models/table_structure_model.py,sha256=dQf6u_zn5fHCkHzmTwYfCbRtZCBddsyAM0WNVBUUQzk,12473
 docling/models/tesseract_ocr_cli_model.py,sha256=qcM3-n7Z_dm1CGBhVUcNr2XT41iXnU32zk4RqKHBl9I,12775
 docling/models/tesseract_ocr_model.py,sha256=9DPAE7XP7smej7HYhr7mdwpuxSjAcv_GPrYZG3bb1RA,10587
@@ -66,10 +69,11 @@ docling/models/plugins/defaults.py,sha256=qslXGnRX07Z3GGttNriqaox0v0vXp4zs4KLurH
 docling/models/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/models/utils/hf_model_download.py,sha256=scBEfsM4yl7xPzqe7UtPvDh9RfQZQnuOhqQKilYBHls,984
 docling/models/vlm_models_inline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/models/vlm_models_inline/hf_transformers_model.py,sha256=4o1_G2__4opIl3J1HzujmdGyZaabqtEGgTmkraZYsXo,7343
-docling/models/vlm_models_inline/mlx_model.py,sha256=CFe1UNxQufZd5K4iaOW3HsplQBPb_1cENf3KIwWUSWw,5702
+docling/models/vlm_models_inline/hf_transformers_model.py,sha256=w9_N4ccjmYYK5yYQou0LSMGaj6gs8l0hULvXbkfYXSQ,7425
+docling/models/vlm_models_inline/mlx_model.py,sha256=qpyi6fGHm0vPqW2yeTsRBKOTTshNJ1LAPbH1SBDp8Y8,5784
 docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/pipeline/base_pipeline.py,sha256=DnuxAf7EQusdSRae0QUVth-0f2mSff8JZjX-2vazk00,8751
+docling/pipeline/asr_pipeline.py,sha256=tQkhu9fXdkSuYIL22xzV2YRUlQh-9qktHBbs2qeXhJI,9070
+docling/pipeline/base_pipeline.py,sha256=14yQrDjsojl4RgbBjKFSEfVBYR_sULZfBI1uDzFLi8Y,9331
 docling/pipeline/simple_pipeline.py,sha256=TXZOwR7hZRji462ZTIpte0VJjzbxvNVE8dbLFANDhSU,2253
 docling/pipeline/standard_pdf_pipeline.py,sha256=2Hqg2wnAXfbZbLUOQrRus8PMEuZ549jR1mfR86-CAB4,12659
 docling/pipeline/vlm_pipeline.py,sha256=IrjDbajCPmUPep_jATKNiABST4tQ8mvpkQz9mtBQ8qQ,15279
@@ -86,9 +90,9 @@ docling/utils/orientation.py,sha256=xXlOfowL54FKwjsTFrM7y3ogk1wChLNn_-u74tYIf1s,
 docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
 docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
 docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
-docling-2.37.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
-docling-2.37.0.dist-info/METADATA,sha256=MvNxmbh1_bNJ8Z2_GG3EoJHH2S-5rbOEBpM6x4LQeoA,10036
-docling-2.37.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-docling-2.37.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
-docling-2.37.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
-docling-2.37.0.dist-info/RECORD,,
+docling-2.38.1.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
+docling-2.38.1.dist-info/METADATA,sha256=14E9MwQXlyuB4nWa31ZTjW6vvv5p2eCs2xxVTE4-qT4,10273
+docling-2.38.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+docling-2.38.1.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
+docling-2.38.1.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
+docling-2.38.1.dist-info/RECORD,,

{docling-2.37.0.dist-info → docling-2.38.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{docling-2.37.0.dist-info → docling-2.38.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{docling-2.37.0.dist-info → docling-2.38.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{docling-2.37.0.dist-info → docling-2.38.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

docling 2.37.0__py3-none-any.whl → 2.38.1__py3-none-any.whl

docling 2.37.0py3-none-any.whl → 2.38.1py3-none-any.whl