PyPI - docling - Versions diffs - 2.56.1__py3-none-any.whl → 2.58.0__py3-none-any.whl - Mend

docling 2.56.1py3-none-any.whl → 2.58.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docling might be problematic. Click here for more details.

Files changed (30) hide show

docling/backend/abstract_backend.py +24 -3
docling/backend/asciidoc_backend.py +3 -3
docling/backend/docling_parse_v4_backend.py +15 -4
docling/backend/docx/drawingml/utils.py +131 -0
docling/backend/html_backend.py +130 -20
docling/backend/md_backend.py +27 -5
docling/backend/msexcel_backend.py +115 -27
docling/backend/mspowerpoint_backend.py +2 -2
docling/backend/msword_backend.py +104 -29
docling/backend/pdf_backend.py +9 -2
docling/backend/pypdfium2_backend.py +12 -3
docling/cli/main.py +85 -30
docling/datamodel/asr_model_specs.py +408 -6
docling/datamodel/backend_options.py +82 -0
docling/datamodel/base_models.py +17 -2
docling/datamodel/document.py +81 -48
docling/datamodel/pipeline_options_asr_model.py +21 -1
docling/document_converter.py +37 -45
docling/document_extractor.py +12 -11
docling/models/readingorder_model.py +6 -7
docling/pipeline/asr_pipeline.py +139 -3
docling/pipeline/vlm_pipeline.py +53 -33
docling/utils/api_image_request.py +4 -4
docling/utils/layout_postprocessor.py +23 -24
{docling-2.56.1.dist-info → docling-2.58.0.dist-info}/METADATA +4 -2
{docling-2.56.1.dist-info → docling-2.58.0.dist-info}/RECORD +30 -28
{docling-2.56.1.dist-info → docling-2.58.0.dist-info}/WHEEL +0 -0
{docling-2.56.1.dist-info → docling-2.58.0.dist-info}/entry_points.txt +0 -0
{docling-2.56.1.dist-info → docling-2.58.0.dist-info}/licenses/LICENSE +0 -0
{docling-2.56.1.dist-info → docling-2.58.0.dist-info}/top_level.txt +0 -0

docling/pipeline/asr_pipeline.py CHANGED Viewed

@@ -4,7 +4,7 @@ import re
 import tempfile
 from io import BytesIO
 from pathlib import Path
-from typing import List, Optional, Union, cast
+from typing import TYPE_CHECKING, List, Optional, Union, cast
 from docling_core.types.doc import DoclingDocument, DocumentOrigin
@@ -32,6 +32,7 @@ from docling.datamodel.pipeline_options import (
     AsrPipelineOptions,
 )
 from docling.datamodel.pipeline_options_asr_model import (
+    InlineAsrMlxWhisperOptions,
     InlineAsrNativeWhisperOptions,
     # AsrResponseFormat,
     InlineAsrOptions,
@@ -228,22 +229,157 @@ class _NativeWhisperModel:
         return convo
+class _MlxWhisperModel:
+    def __init__(
+        self,
+        enabled: bool,
+        artifacts_path: Optional[Path],
+        accelerator_options: AcceleratorOptions,
+        asr_options: InlineAsrMlxWhisperOptions,
+    ):
+        """
+        Transcriber using MLX Whisper for Apple Silicon optimization.
+        """
+        self.enabled = enabled
+        _log.info(f"artifacts-path: {artifacts_path}")
+        _log.info(f"accelerator_options: {accelerator_options}")
+        if self.enabled:
+            try:
+                import mlx_whisper  # type: ignore
+            except ImportError:
+                raise ImportError(
+                    "mlx-whisper is not installed. Please install it via `pip install mlx-whisper` or do `uv sync --extra asr`."
+                )
+            self.asr_options = asr_options
+            self.mlx_whisper = mlx_whisper
+            self.device = decide_device(
+                accelerator_options.device,
+                supported_devices=asr_options.supported_devices,
+            )
+            _log.info(f"Available device for MLX Whisper: {self.device}")
+            self.model_name = asr_options.repo_id
+            _log.info(f"loading _MlxWhisperModel({self.model_name})")
+            # MLX Whisper models are loaded differently - they use HuggingFace repos
+            self.model_path = self.model_name
+            # Store MLX-specific options
+            self.language = asr_options.language
+            self.task = asr_options.task
+            self.word_timestamps = asr_options.word_timestamps
+            self.no_speech_threshold = asr_options.no_speech_threshold
+            self.logprob_threshold = asr_options.logprob_threshold
+            self.compression_ratio_threshold = asr_options.compression_ratio_threshold
+    def run(self, conv_res: ConversionResult) -> ConversionResult:
+        audio_path: Path = Path(conv_res.input.file).resolve()
+        try:
+            conversation = self.transcribe(audio_path)
+            # Ensure we have a proper DoclingDocument
+            origin = DocumentOrigin(
+                filename=conv_res.input.file.name or "audio.wav",
+                mimetype="audio/x-wav",
+                binary_hash=conv_res.input.document_hash,
+            )
+            conv_res.document = DoclingDocument(
+                name=conv_res.input.file.stem or "audio.wav", origin=origin
+            )
+            for citem in conversation:
+                conv_res.document.add_text(
+                    label=DocItemLabel.TEXT, text=citem.to_string()
+                )
+            conv_res.status = ConversionStatus.SUCCESS
+            return conv_res
+        except Exception as exc:
+            _log.error(f"MLX Audio transcription has an error: {exc}")
+        conv_res.status = ConversionStatus.FAILURE
+        return conv_res
+    def transcribe(self, fpath: Path) -> list[_ConversationItem]:
+        """
+        Transcribe audio using MLX Whisper.
+        Args:
+            fpath: Path to audio file
+        Returns:
+            List of conversation items with timestamps
+        """
+        result = self.mlx_whisper.transcribe(
+            str(fpath),
+            path_or_hf_repo=self.model_path,
+            language=self.language,
+            task=self.task,
+            word_timestamps=self.word_timestamps,
+            no_speech_threshold=self.no_speech_threshold,
+            logprob_threshold=self.logprob_threshold,
+            compression_ratio_threshold=self.compression_ratio_threshold,
+        )
+        convo: list[_ConversationItem] = []
+        # MLX Whisper returns segments similar to native Whisper
+        for segment in result.get("segments", []):
+            item = _ConversationItem(
+                start_time=segment.get("start"),
+                end_time=segment.get("end"),
+                text=segment.get("text", "").strip(),
+                words=[],
+            )
+            # Add word-level timestamps if available
+            if self.word_timestamps and "words" in segment:
+                item.words = []
+                for word_data in segment["words"]:
+                    item.words.append(
+                        _ConversationWord(
+                            start_time=word_data.get("start"),
+                            end_time=word_data.get("end"),
+                            text=word_data.get("word", ""),
+                        )
+                    )
+            convo.append(item)
+        return convo
 class AsrPipeline(BasePipeline):
     def __init__(self, pipeline_options: AsrPipelineOptions):
         super().__init__(pipeline_options)
         self.keep_backend = True
         self.pipeline_options: AsrPipelineOptions = pipeline_options
+        self._model: Union[_NativeWhisperModel, _MlxWhisperModel]
         if isinstance(self.pipeline_options.asr_options, InlineAsrNativeWhisperOptions):
-            asr_options: InlineAsrNativeWhisperOptions = (
+            native_asr_options: InlineAsrNativeWhisperOptions = (
                 self.pipeline_options.asr_options
             )
             self._model = _NativeWhisperModel(
                 enabled=True,  # must be always enabled for this pipeline to make sense.
                 artifacts_path=self.artifacts_path,
                 accelerator_options=pipeline_options.accelerator_options,
-                asr_options=asr_options,
+                asr_options=native_asr_options,
+            )
+        elif isinstance(self.pipeline_options.asr_options, InlineAsrMlxWhisperOptions):
+            mlx_asr_options: InlineAsrMlxWhisperOptions = (
+                self.pipeline_options.asr_options
+            )
+            self._model = _MlxWhisperModel(
+                enabled=True,  # must be always enabled for this pipeline to make sense.
+                artifacts_path=self.artifacts_path,
+                accelerator_options=pipeline_options.accelerator_options,
+                asr_options=mlx_asr_options,
             )
         else:
             _log.error(f"No model support for {self.pipeline_options.asr_options}")

docling/pipeline/vlm_pipeline.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import List, Optional, Union, cast
 from docling_core.types.doc import (
     BoundingBox,
+    ContentLayer,
     DocItem,
     DoclingDocument,
     ImageRef,
@@ -251,9 +252,9 @@ class VlmPipeline(PaginatedPipeline):
                 # No code blocks found, return original text
                 return text
-        for pg_idx, page in enumerate(conv_res.pages):
-            page_no = pg_idx + 1  # FIXME: might be incorrect
+        page_docs = []
+        for pg_idx, page in enumerate(conv_res.pages):
             predicted_text = ""
             if page.predictions.vlm_response:
                 predicted_text = page.predictions.vlm_response.text + "\n\n"
@@ -273,6 +274,24 @@ class VlmPipeline(PaginatedPipeline):
             )
             page_doc = backend.convert()
+            # Modify provenance in place for all items in the page document
+            for item, level in page_doc.iterate_items(
+                with_groups=True,
+                traverse_pictures=True,
+                included_content_layers=set(ContentLayer),
+            ):
+                if isinstance(item, DocItem):
+                    item.prov = [
+                        ProvenanceItem(
+                            page_no=pg_idx + 1,
+                            bbox=BoundingBox(
+                                t=0.0, b=0.0, l=0.0, r=0.0
+                            ),  # FIXME: would be nice not to have to "fake" it
+                            charspan=[0, 0],
+                        )
+                    ]
+            # Add page metadata to the page document before concatenation
             if page.image is not None:
                 pg_width = page.image.width
                 pg_height = page.image.height
@@ -280,27 +299,18 @@ class VlmPipeline(PaginatedPipeline):
                 pg_width = 1
                 pg_height = 1
-            conv_res.document.add_page(
-                page_no=page_no,
+            page_doc.add_page(
+                page_no=pg_idx + 1,
                 size=Size(width=pg_width, height=pg_height),
                 image=ImageRef.from_pil(image=page.image, dpi=72)
                 if page.image
                 else None,
             )
-            for item, level in page_doc.iterate_items():
-                item.prov = [
-                    ProvenanceItem(
-                        page_no=pg_idx + 1,
-                        bbox=BoundingBox(
-                            t=0.0, b=0.0, l=0.0, r=0.0
-                        ),  # FIXME: would be nice not to have to "fake" it
-                        charspan=[0, 0],
-                    )
-                ]
-                conv_res.document.append_child_item(child=item)
+            page_docs.append(page_doc)
-        return conv_res.document
+        final_doc = DoclingDocument.concatenate(docs=page_docs)
+        return final_doc
     def _turn_html_into_doc(self, conv_res):
         def _extract_html_code(text):
@@ -328,9 +338,9 @@ class VlmPipeline(PaginatedPipeline):
                 # No code blocks found, return original text
                 return text
-        for pg_idx, page in enumerate(conv_res.pages):
-            page_no = pg_idx + 1  # FIXME: might be incorrect
+        page_docs = []
+        for pg_idx, page in enumerate(conv_res.pages):
             predicted_text = ""
             if page.predictions.vlm_response:
                 predicted_text = page.predictions.vlm_response.text + "\n\n"
@@ -341,7 +351,7 @@ class VlmPipeline(PaginatedPipeline):
             out_doc = InputDocument(
                 path_or_stream=response_bytes,
                 filename=conv_res.input.file.name,
-                format=InputFormat.MD,
+                format=InputFormat.HTML,
                 backend=HTMLDocumentBackend,
             )
             backend = HTMLDocumentBackend(
@@ -350,6 +360,24 @@ class VlmPipeline(PaginatedPipeline):
             )
             page_doc = backend.convert()
+            # Modify provenance in place for all items in the page document
+            for item, level in page_doc.iterate_items(
+                with_groups=True,
+                traverse_pictures=True,
+                included_content_layers=set(ContentLayer),
+            ):
+                if isinstance(item, DocItem):
+                    item.prov = [
+                        ProvenanceItem(
+                            page_no=pg_idx + 1,
+                            bbox=BoundingBox(
+                                t=0.0, b=0.0, l=0.0, r=0.0
+                            ),  # FIXME: would be nice not to have to "fake" it
+                            charspan=[0, 0],
+                        )
+                    ]
+            # Add page metadata to the page document before concatenation
             if page.image is not None:
                 pg_width = page.image.width
                 pg_height = page.image.height
@@ -357,27 +385,19 @@ class VlmPipeline(PaginatedPipeline):
                 pg_width = 1
                 pg_height = 1
-            conv_res.document.add_page(
-                page_no=page_no,
+            page_doc.add_page(
+                page_no=pg_idx + 1,
                 size=Size(width=pg_width, height=pg_height),
                 image=ImageRef.from_pil(image=page.image, dpi=72)
                 if page.image
                 else None,
             )
-            for item, level in page_doc.iterate_items():
-                item.prov = [
-                    ProvenanceItem(
-                        page_no=pg_idx + 1,
-                        bbox=BoundingBox(
-                            t=0.0, b=0.0, l=0.0, r=0.0
-                        ),  # FIXME: would be nice not to have to "fake" it
-                        charspan=[0, 0],
-                    )
-                ]
-                conv_res.document.append_child_item(child=item)
+            page_docs.append(page_doc)
-        return conv_res.document
+        # Concatenate all page documents to preserve hierarchy
+        final_doc = DoclingDocument.concatenate(docs=page_docs)
+        return final_doc
     @classmethod
     def get_default_options(cls) -> VlmPipelineOptions:

docling/utils/api_image_request.py CHANGED Viewed

@@ -2,7 +2,7 @@ import base64
 import json
 import logging
 from io import BytesIO
-from typing import Dict, List, Optional
+from typing import Optional
 import requests
 from PIL import Image
@@ -19,7 +19,7 @@ def api_image_request(
     prompt: str,
     url: AnyUrl,
     timeout: float = 20,
-    headers: Optional[Dict[str, str]] = None,
+    headers: Optional[dict[str, str]] = None,
     **params,
 ) -> str:
     img_io = BytesIO()
@@ -69,8 +69,8 @@ def api_image_request_streaming(
     url: AnyUrl,
     *,
     timeout: float = 20,
-    headers: Optional[Dict[str, str]] = None,
-    generation_stoppers: List[GenerationStopper] = [],
+    headers: Optional[dict[str, str]] = None,
+    generation_stoppers: list[GenerationStopper] = [],
     **params,
 ) -> str:
     """

docling/utils/layout_postprocessor.py CHANGED Viewed

@@ -2,7 +2,6 @@ import bisect
 import logging
 import sys
 from collections import defaultdict
-from typing import Dict, List, Set, Tuple
 from docling_core.types.doc import DocItemLabel, Size
 from docling_core.types.doc.page import TextCell
@@ -39,7 +38,7 @@ class UnionFind:
             self.parent[root_y] = root_x
             self.rank[root_x] += 1
-    def get_groups(self) -> Dict[int, List[int]]:
+    def get_groups(self) -> dict[int, list[int]]:
         """Returns groups as {root: [elements]}."""
         groups = defaultdict(list)
         for elem in self.parent:
@@ -50,13 +49,13 @@ class UnionFind:
 class SpatialClusterIndex:
     """Efficient spatial indexing for clusters using R-tree and interval trees."""
-    def __init__(self, clusters: List[Cluster]):
+    def __init__(self, clusters: list[Cluster]):
         p = index.Property()
         p.dimension = 2
         self.spatial_index = index.Index(properties=p)
         self.x_intervals = IntervalTree()
         self.y_intervals = IntervalTree()
-        self.clusters_by_id: Dict[int, Cluster] = {}
+        self.clusters_by_id: dict[int, Cluster] = {}
         for cluster in clusters:
             self.add_cluster(cluster)
@@ -72,7 +71,7 @@ class SpatialClusterIndex:
         self.spatial_index.delete(cluster.id, cluster.bbox.as_tuple())
         del self.clusters_by_id[cluster.id]
-    def find_candidates(self, bbox: BoundingBox) -> Set[int]:
+    def find_candidates(self, bbox: BoundingBox) -> set[int]:
         """Find potential overlapping cluster IDs using all indexes."""
         spatial = set(self.spatial_index.intersection(bbox.as_tuple()))
         x_candidates = self.x_intervals.find_containing(
@@ -123,13 +122,13 @@ class IntervalTree:
     """Memory-efficient interval tree for 1D overlap queries."""
     def __init__(self):
-        self.intervals: List[Interval] = []  # Sorted by min_val
+        self.intervals: list[Interval] = []  # Sorted by min_val
     def insert(self, min_val: float, max_val: float, id: int):
         interval = Interval(min_val, max_val, id)
         bisect.insort(self.intervals, interval)
-    def find_containing(self, point: float) -> Set[int]:
+    def find_containing(self, point: float) -> set[int]:
         """Find all intervals containing the point."""
         pos = bisect.bisect_left(self.intervals, point)
         result = set()
@@ -196,7 +195,7 @@ class LayoutPostprocessor:
     }
     def __init__(
-        self, page: Page, clusters: List[Cluster], options: LayoutOptions
+        self, page: Page, clusters: list[Cluster], options: LayoutOptions
     ) -> None:
         """Initialize processor with page and clusters."""
@@ -219,7 +218,7 @@ class LayoutPostprocessor:
             [c for c in self.special_clusters if c.label in self.WRAPPER_TYPES]
         )
-    def postprocess(self) -> Tuple[List[Cluster], List[TextCell]]:
+    def postprocess(self) -> tuple[list[Cluster], list[TextCell]]:
         """Main processing pipeline."""
         self.regular_clusters = self._process_regular_clusters()
         self.special_clusters = self._process_special_clusters()
@@ -254,7 +253,7 @@ class LayoutPostprocessor:
         return final_clusters, self.cells
-    def _process_regular_clusters(self) -> List[Cluster]:
+    def _process_regular_clusters(self) -> list[Cluster]:
         """Process regular clusters with iterative refinement."""
         clusters = [
             c
@@ -311,7 +310,7 @@ class LayoutPostprocessor:
         return clusters
-    def _process_special_clusters(self) -> List[Cluster]:
+    def _process_special_clusters(self) -> list[Cluster]:
         special_clusters = [
             c
             for c in self.special_clusters
@@ -381,7 +380,7 @@ class LayoutPostprocessor:
         return picture_clusters + wrapper_clusters
-    def _handle_cross_type_overlaps(self, special_clusters) -> List[Cluster]:
+    def _handle_cross_type_overlaps(self, special_clusters) -> list[Cluster]:
         """Handle overlaps between regular and wrapper clusters before child assignment.
         In particular, KEY_VALUE_REGION proposals that are almost identical to a TABLE
@@ -454,7 +453,7 @@ class LayoutPostprocessor:
     def _select_best_cluster_from_group(
         self,
-        group_clusters: List[Cluster],
+        group_clusters: list[Cluster],
         params: dict,
     ) -> Cluster:
         """Select best cluster from a group of overlapping clusters based on all rules."""
@@ -487,11 +486,11 @@ class LayoutPostprocessor:
     def _remove_overlapping_clusters(
         self,
-        clusters: List[Cluster],
+        clusters: list[Cluster],
         cluster_type: str,
         overlap_threshold: float = 0.8,
         containment_threshold: float = 0.8,
-    ) -> List[Cluster]:
+    ) -> list[Cluster]:
         if not clusters:
             return []
@@ -544,7 +543,7 @@ class LayoutPostprocessor:
     def _select_best_cluster(
         self,
-        clusters: List[Cluster],
+        clusters: list[Cluster],
         area_threshold: float,
         conf_threshold: float,
     ) -> Cluster:
@@ -572,7 +571,7 @@ class LayoutPostprocessor:
         return current_best if current_best else clusters[0]
-    def _deduplicate_cells(self, cells: List[TextCell]) -> List[TextCell]:
+    def _deduplicate_cells(self, cells: list[TextCell]) -> list[TextCell]:
         """Ensure each cell appears only once, maintaining order of first appearance."""
         seen_ids = set()
         unique_cells = []
@@ -583,8 +582,8 @@ class LayoutPostprocessor:
         return unique_cells
     def _assign_cells_to_clusters(
-        self, clusters: List[Cluster], min_overlap: float = 0.2
-    ) -> List[Cluster]:
+        self, clusters: list[Cluster], min_overlap: float = 0.2
+    ) -> list[Cluster]:
         """Assign cells to best overlapping cluster."""
         for cluster in clusters:
             cluster.cells = []
@@ -616,7 +615,7 @@ class LayoutPostprocessor:
         return clusters
-    def _find_unassigned_cells(self, clusters: List[Cluster]) -> List[TextCell]:
+    def _find_unassigned_cells(self, clusters: list[Cluster]) -> list[TextCell]:
         """Find cells not assigned to any cluster."""
         assigned = {cell.index for cluster in clusters for cell in cluster.cells}
         return [
@@ -625,7 +624,7 @@ class LayoutPostprocessor:
             if cell.index not in assigned and cell.text.strip()
         ]
-    def _adjust_cluster_bboxes(self, clusters: List[Cluster]) -> List[Cluster]:
+    def _adjust_cluster_bboxes(self, clusters: list[Cluster]) -> list[Cluster]:
         """Adjust cluster bounding boxes to contain their cells."""
         for cluster in clusters:
             if not cluster.cells:
@@ -651,13 +650,13 @@ class LayoutPostprocessor:
         return clusters
-    def _sort_cells(self, cells: List[TextCell]) -> List[TextCell]:
+    def _sort_cells(self, cells: list[TextCell]) -> list[TextCell]:
         """Sort cells in native reading order."""
         return sorted(cells, key=lambda c: (c.index))
     def _sort_clusters(
-        self, clusters: List[Cluster], mode: str = "id"
-    ) -> List[Cluster]:
+        self, clusters: list[Cluster], mode: str = "id"
+    ) -> list[Cluster]:
         """Sort clusters in reading order (top-to-bottom, left-to-right)."""
         if mode == "id":  # sort in the order the cells are printed in the PDF.
             return sorted(

{docling-2.56.1.dist-info → docling-2.58.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docling
-Version: 2.56.1
+Version: 2.58.0
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
 License-Expression: MIT
@@ -27,7 +27,7 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: pydantic<3.0.0,>=2.0.0
 Requires-Dist: docling-core[chunking]<3.0.0,>=2.48.2
-Requires-Dist: docling-parse<5.0.0,>=4.4.0
+Requires-Dist: docling-parse<5.0.0,>=4.7.0
 Requires-Dist: docling-ibm-models<4,>=3.9.1
 Requires-Dist: filetype<2.0.0,>=1.2.0
 Requires-Dist: pypdfium2!=4.30.1,<5.0.0,>=4.30.0
@@ -69,6 +69,7 @@ Provides-Extra: rapidocr
 Requires-Dist: rapidocr<4.0.0,>=3.3; python_version < "3.14" and extra == "rapidocr"
 Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
 Provides-Extra: asr
+Requires-Dist: mlx-whisper>=0.4.3; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "asr"
 Requires-Dist: openai-whisper>=20250625; extra == "asr"
 Dynamic: license-file
@@ -96,6 +97,7 @@ Dynamic: license-file
 [![PyPI Downloads](https://static.pepy.tech/badge/docling/month)](https://pepy.tech/projects/docling)
 [![Docling Actor](https://apify.com/actor-badge?actor=vancura/docling?fpr=docling)](https://apify.com/vancura/docling)
 [![Chat with Dosu](https://dosu.dev/dosu-chat-badge.svg)](https://app.dosu.dev/097760a8-135e-4789-8234-90c8837d7f1c/ask?utm_source=github)
+[![Discord](https://img.shields.io/discord/1399788921306746971?color=6A7EC2&logo=discord&logoColor=ffffff)](https://docling.ai/discord)
 [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/10101/badge)](https://www.bestpractices.dev/projects/10101)
 [![LF AI & Data](https://img.shields.io/badge/LF%20AI%20%26%20Data-003778?logo=linuxfoundation&logoColor=fff&color=0094ff&labelColor=003778)](https://lfaidata.foundation/projects/)

docling 2.56.1__py3-none-any.whl → 2.58.0__py3-none-any.whl

Potentially problematic release.

docling 2.56.1py3-none-any.whl → 2.58.0py3-none-any.whl