PyPI - docling - Versions diffs - 2.34.0__py3-none-any.whl → 2.36.0__py3-none-any.whl - Mend

docling 2.34.0py3-none-any.whl → 2.36.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

docling/backend/xml/jats_backend.py +0 -0
docling/cli/main.py +48 -18
docling/datamodel/accelerator_options.py +68 -0
docling/datamodel/base_models.py +10 -8
docling/datamodel/document.py +7 -2
docling/datamodel/pipeline_options.py +29 -161
docling/datamodel/pipeline_options_vlm_model.py +81 -0
docling/datamodel/vlm_model_specs.py +144 -0
docling/document_converter.py +5 -0
docling/models/api_vlm_model.py +1 -1
docling/models/base_ocr_model.py +2 -1
docling/models/code_formula_model.py +6 -11
docling/models/document_picture_classifier.py +6 -11
docling/models/easyocr_model.py +1 -2
docling/models/layout_model.py +22 -17
docling/models/ocr_mac_model.py +1 -1
docling/models/page_preprocessing_model.py +11 -6
docling/models/picture_description_api_model.py +1 -1
docling/models/picture_description_base_model.py +1 -1
docling/models/picture_description_vlm_model.py +7 -22
docling/models/rapid_ocr_model.py +1 -2
docling/models/table_structure_model.py +6 -12
docling/models/tesseract_ocr_cli_model.py +1 -1
docling/models/tesseract_ocr_model.py +1 -1
docling/models/utils/__init__.py +0 -0
docling/models/utils/hf_model_download.py +40 -0
docling/models/vlm_models_inline/__init__.py +0 -0
docling/models/vlm_models_inline/hf_transformers_model.py +194 -0
docling/models/{hf_mlx_model.py → vlm_models_inline/mlx_model.py} +56 -44
docling/pipeline/standard_pdf_pipeline.py +69 -57
docling/pipeline/vlm_pipeline.py +228 -61
docling/utils/accelerator_utils.py +17 -2
docling/utils/model_downloader.py +13 -12
{docling-2.34.0.dist-info → docling-2.36.0.dist-info}/METADATA +54 -55
{docling-2.34.0.dist-info → docling-2.36.0.dist-info}/RECORD +48 -41
{docling-2.34.0.dist-info → docling-2.36.0.dist-info}/WHEEL +2 -1
docling-2.36.0.dist-info/entry_points.txt +6 -0
docling-2.36.0.dist-info/top_level.txt +1 -0
docling/models/hf_vlm_model.py +0 -182
docling-2.34.0.dist-info/entry_points.txt +0 -7
{docling-2.34.0.dist-info → docling-2.36.0.dist-info/licenses}/LICENSE +0 -0

docling/pipeline/vlm_pipeline.py CHANGED Viewed

@@ -1,29 +1,46 @@
 import logging
+import re
 from io import BytesIO
 from pathlib import Path
 from typing import List, Optional, Union, cast
-from docling_core.types import DoclingDocument
-from docling_core.types.doc import BoundingBox, DocItem, ImageRef, PictureItem, TextItem
+from docling_core.types.doc import (
+    BoundingBox,
+    DocItem,
+    DoclingDocument,
+    ImageRef,
+    PictureItem,
+    ProvenanceItem,
+    TextItem,
+)
+from docling_core.types.doc.base import (
+    BoundingBox,
+    Size,
+)
 from docling_core.types.doc.document import DocTagsDocument
 from PIL import Image as PILImage
 from docling.backend.abstract_backend import AbstractDocumentBackend
+from docling.backend.html_backend import HTMLDocumentBackend
 from docling.backend.md_backend import MarkdownDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.datamodel.base_models import InputFormat, Page
 from docling.datamodel.document import ConversionResult, InputDocument
 from docling.datamodel.pipeline_options import (
+    VlmPipelineOptions,
+)
+from docling.datamodel.pipeline_options_vlm_model import (
     ApiVlmOptions,
-    HuggingFaceVlmOptions,
     InferenceFramework,
+    InlineVlmOptions,
     ResponseFormat,
-    VlmPipelineOptions,
 )
 from docling.datamodel.settings import settings
 from docling.models.api_vlm_model import ApiVlmModel
-from docling.models.hf_mlx_model import HuggingFaceMlxModel
-from docling.models.hf_vlm_model import HuggingFaceVlmModel
+from docling.models.vlm_models_inline.hf_transformers_model import (
+    HuggingFaceTransformersVlmModel,
+)
+from docling.models.vlm_models_inline.mlx_model import HuggingFaceMlxModel
 from docling.pipeline.base_pipeline import PaginatedPipeline
 from docling.utils.profiling import ProfilingScope, TimeRecorder
@@ -66,8 +83,8 @@ class VlmPipeline(PaginatedPipeline):
                     vlm_options=cast(ApiVlmOptions, self.pipeline_options.vlm_options),
                 ),
             ]
-        elif isinstance(self.pipeline_options.vlm_options, HuggingFaceVlmOptions):
-            vlm_options = cast(HuggingFaceVlmOptions, self.pipeline_options.vlm_options)
+        elif isinstance(self.pipeline_options.vlm_options, InlineVlmOptions):
+            vlm_options = cast(InlineVlmOptions, self.pipeline_options.vlm_options)
             if vlm_options.inference_framework == InferenceFramework.MLX:
                 self.build_pipe = [
                     HuggingFaceMlxModel(
@@ -77,15 +94,19 @@ class VlmPipeline(PaginatedPipeline):
                         vlm_options=vlm_options,
                     ),
                 ]
-            else:
+            elif vlm_options.inference_framework == InferenceFramework.TRANSFORMERS:
                 self.build_pipe = [
-                    HuggingFaceVlmModel(
+                    HuggingFaceTransformersVlmModel(
                         enabled=True,  # must be always enabled for this pipeline to make sense.
                         artifacts_path=artifacts_path,
                         accelerator_options=pipeline_options.accelerator_options,
                         vlm_options=vlm_options,
                     ),
                 ]
+            else:
+                raise ValueError(
+                    f"Could not instantiate the right type of VLM pipeline: {vlm_options.inference_framework}"
+                )
         self.enrichment_pipe = [
             # Other models working on `NodeItem` elements in the DoclingDocument
@@ -116,49 +137,19 @@ class VlmPipeline(PaginatedPipeline):
                 self.pipeline_options.vlm_options.response_format
                 == ResponseFormat.DOCTAGS
             ):
-                doctags_list = []
-                image_list = []
-                for page in conv_res.pages:
-                    predicted_doctags = ""
-                    img = PILImage.new("RGB", (1, 1), "rgb(255,255,255)")
-                    if page.predictions.vlm_response:
-                        predicted_doctags = page.predictions.vlm_response.text
-                    if page.image:
-                        img = page.image
-                    image_list.append(img)
-                    doctags_list.append(predicted_doctags)
-                doctags_list_c = cast(List[Union[Path, str]], doctags_list)
-                image_list_c = cast(List[Union[Path, PILImage.Image]], image_list)
-                doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(
-                    doctags_list_c, image_list_c
-                )
-                conv_res.document = DoclingDocument.load_from_doctags(doctags_doc)
-                # If forced backend text, replace model predicted text with backend one
-                if self.force_backend_text:
-                    scale = self.pipeline_options.images_scale
-                    for element, _level in conv_res.document.iterate_items():
-                        if not isinstance(element, TextItem) or len(element.prov) == 0:
-                            continue
-                        page_ix = element.prov[0].page_no - 1
-                        page = conv_res.pages[page_ix]
-                        if not page.size:
-                            continue
-                        crop_bbox = (
-                            element.prov[0]
-                            .bbox.scaled(scale=scale)
-                            .to_top_left_origin(page_height=page.size.height * scale)
-                        )
-                        txt = self.extract_text_from_backend(page, crop_bbox)
-                        element.text = txt
-                        element.orig = txt
+                conv_res.document = self._turn_dt_into_doc(conv_res)
             elif (
                 self.pipeline_options.vlm_options.response_format
                 == ResponseFormat.MARKDOWN
             ):
                 conv_res.document = self._turn_md_into_doc(conv_res)
+            elif (
+                self.pipeline_options.vlm_options.response_format == ResponseFormat.HTML
+            ):
+                conv_res.document = self._turn_html_into_doc(conv_res)
             else:
                 raise RuntimeError(
                     f"Unsupported VLM response format {self.pipeline_options.vlm_options.response_format}"
@@ -192,23 +183,199 @@ class VlmPipeline(PaginatedPipeline):
         return conv_res
-    def _turn_md_into_doc(self, conv_res):
-        predicted_text = ""
-        for pg_idx, page in enumerate(conv_res.pages):
+    def _turn_dt_into_doc(self, conv_res) -> DoclingDocument:
+        doctags_list = []
+        image_list = []
+        for page in conv_res.pages:
+            predicted_doctags = ""
+            img = PILImage.new("RGB", (1, 1), "rgb(255,255,255)")
             if page.predictions.vlm_response:
-                predicted_text += page.predictions.vlm_response.text + "\n\n"
-        response_bytes = BytesIO(predicted_text.encode("utf8"))
-        out_doc = InputDocument(
-            path_or_stream=response_bytes,
-            filename=conv_res.input.file.name,
-            format=InputFormat.MD,
-            backend=MarkdownDocumentBackend,
+                predicted_doctags = page.predictions.vlm_response.text
+            if page.image:
+                img = page.image
+            image_list.append(img)
+            doctags_list.append(predicted_doctags)
+        doctags_list_c = cast(List[Union[Path, str]], doctags_list)
+        image_list_c = cast(List[Union[Path, PILImage.Image]], image_list)
+        doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(
+            doctags_list_c, image_list_c
         )
-        backend = MarkdownDocumentBackend(
-            in_doc=out_doc,
-            path_or_stream=response_bytes,
+        conv_res.document = DoclingDocument.load_from_doctags(
+            doctag_document=doctags_doc
         )
-        return backend.convert()
+        # If forced backend text, replace model predicted text with backend one
+        if page.size:
+            if self.force_backend_text:
+                scale = self.pipeline_options.images_scale
+                for element, _level in conv_res.document.iterate_items():
+                    if not isinstance(element, TextItem) or len(element.prov) == 0:
+                        continue
+                    crop_bbox = (
+                        element.prov[0]
+                        .bbox.scaled(scale=scale)
+                        .to_top_left_origin(page_height=page.size.height * scale)
+                    )
+                    txt = self.extract_text_from_backend(page, crop_bbox)
+                    element.text = txt
+                    element.orig = txt
+        return conv_res.document
+    def _turn_md_into_doc(self, conv_res):
+        def _extract_markdown_code(text):
+            """
+            Extracts text from markdown code blocks (enclosed in triple backticks).
+            If no code blocks are found, returns the original text.
+            Args:
+                text (str): Input text that may contain markdown code blocks
+            Returns:
+                str: Extracted code if code blocks exist, otherwise original text
+            """
+            # Regex pattern to match content between triple backticks
+            # This handles multiline content and optional language specifier
+            pattern = r"^```(?:\w*\n)?(.*?)```(\n)*$"
+            # Search with DOTALL flag to match across multiple lines
+            mtch = re.search(pattern, text, re.DOTALL)
+            if mtch:
+                # Return only the content of the first capturing group
+                return mtch.group(1)
+            else:
+                # No code blocks found, return original text
+                return text
+        for pg_idx, page in enumerate(conv_res.pages):
+            page_no = pg_idx + 1  # FIXME: might be incorrect
+            predicted_text = ""
+            if page.predictions.vlm_response:
+                predicted_text = page.predictions.vlm_response.text + "\n\n"
+            predicted_text = _extract_markdown_code(text=predicted_text)
+            response_bytes = BytesIO(predicted_text.encode("utf8"))
+            out_doc = InputDocument(
+                path_or_stream=response_bytes,
+                filename=conv_res.input.file.name,
+                format=InputFormat.MD,
+                backend=MarkdownDocumentBackend,
+            )
+            backend = MarkdownDocumentBackend(
+                in_doc=out_doc,
+                path_or_stream=response_bytes,
+            )
+            page_doc = backend.convert()
+            if page.image is not None:
+                pg_width = page.image.width
+                pg_height = page.image.height
+            else:
+                pg_width = 1
+                pg_height = 1
+            conv_res.document.add_page(
+                page_no=page_no,
+                size=Size(width=pg_width, height=pg_height),
+                image=ImageRef.from_pil(image=page.image, dpi=72)
+                if page.image
+                else None,
+            )
+            for item, level in page_doc.iterate_items():
+                item.prov = [
+                    ProvenanceItem(
+                        page_no=pg_idx + 1,
+                        bbox=BoundingBox(
+                            t=0.0, b=0.0, l=0.0, r=0.0
+                        ),  # FIXME: would be nice not to have to "fake" it
+                        charspan=[0, 0],
+                    )
+                ]
+                conv_res.document.append_child_item(child=item)
+        return conv_res.document
+    def _turn_html_into_doc(self, conv_res):
+        def _extract_html_code(text):
+            """
+            Extracts text from markdown code blocks (enclosed in triple backticks).
+            If no code blocks are found, returns the original text.
+            Args:
+                text (str): Input text that may contain markdown code blocks
+            Returns:
+                str: Extracted code if code blocks exist, otherwise original text
+            """
+            # Regex pattern to match content between triple backticks
+            # This handles multiline content and optional language specifier
+            pattern = r"^```(?:\w*\n)?(.*?)```(\n)*$"
+            # Search with DOTALL flag to match across multiple lines
+            mtch = re.search(pattern, text, re.DOTALL)
+            if mtch:
+                # Return only the content of the first capturing group
+                return mtch.group(1)
+            else:
+                # No code blocks found, return original text
+                return text
+        for pg_idx, page in enumerate(conv_res.pages):
+            page_no = pg_idx + 1  # FIXME: might be incorrect
+            predicted_text = ""
+            if page.predictions.vlm_response:
+                predicted_text = page.predictions.vlm_response.text + "\n\n"
+            predicted_text = _extract_html_code(text=predicted_text)
+            response_bytes = BytesIO(predicted_text.encode("utf8"))
+            out_doc = InputDocument(
+                path_or_stream=response_bytes,
+                filename=conv_res.input.file.name,
+                format=InputFormat.MD,
+                backend=HTMLDocumentBackend,
+            )
+            backend = HTMLDocumentBackend(
+                in_doc=out_doc,
+                path_or_stream=response_bytes,
+            )
+            page_doc = backend.convert()
+            if page.image is not None:
+                pg_width = page.image.width
+                pg_height = page.image.height
+            else:
+                pg_width = 1
+                pg_height = 1
+            conv_res.document.add_page(
+                page_no=page_no,
+                size=Size(width=pg_width, height=pg_height),
+                image=ImageRef.from_pil(image=page.image, dpi=72)
+                if page.image
+                else None,
+            )
+            for item, level in page_doc.iterate_items():
+                item.prov = [
+                    ProvenanceItem(
+                        page_no=pg_idx + 1,
+                        bbox=BoundingBox(
+                            t=0.0, b=0.0, l=0.0, r=0.0
+                        ),  # FIXME: would be nice not to have to "fake" it
+                        charspan=[0, 0],
+                    )
+                ]
+                conv_res.document.append_child_item(child=item)
+        return conv_res.document
     @classmethod
     def get_default_options(cls) -> VlmPipelineOptions:

docling/utils/accelerator_utils.py CHANGED Viewed

@@ -1,13 +1,16 @@
 import logging
+from typing import List, Optional
 import torch
-from docling.datamodel.pipeline_options import AcceleratorDevice
+from docling.datamodel.accelerator_options import AcceleratorDevice
 _log = logging.getLogger(__name__)
-def decide_device(accelerator_device: str) -> str:
+def decide_device(
+    accelerator_device: str, supported_devices: Optional[List[AcceleratorDevice]] = None
+) -> str:
     r"""
     Resolve the device based on the acceleration options and the available devices in the system.
@@ -20,6 +23,18 @@ def decide_device(accelerator_device: str) -> str:
     has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
     has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
+    if supported_devices is not None:
+        if has_cuda and AcceleratorDevice.CUDA not in supported_devices:
+            _log.info(
+                f"Removing CUDA from available devices because it is not in {supported_devices=}"
+            )
+            has_cuda = False
+        if has_mps and AcceleratorDevice.MPS not in supported_devices:
+            _log.info(
+                f"Removing MPS from available devices because it is not in {supported_devices=}"
+            )
+            has_mps = False
     if accelerator_device == AcceleratorDevice.AUTO.value:  # Handle 'auto'
         if has_cuda:
             device = "cuda:0"

docling/utils/model_downloader.py CHANGED Viewed

@@ -4,18 +4,20 @@ from typing import Optional
 from docling.datamodel.pipeline_options import (
     granite_picture_description,
-    smoldocling_vlm_conversion_options,
-    smoldocling_vlm_mlx_conversion_options,
     smolvlm_picture_description,
 )
 from docling.datamodel.settings import settings
+from docling.datamodel.vlm_model_specs import (
+    SMOLDOCLING_MLX,
+    SMOLDOCLING_TRANSFORMERS,
+)
 from docling.models.code_formula_model import CodeFormulaModel
 from docling.models.document_picture_classifier import DocumentPictureClassifier
 from docling.models.easyocr_model import EasyOcrModel
-from docling.models.hf_vlm_model import HuggingFaceVlmModel
 from docling.models.layout_model import LayoutModel
 from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
 from docling.models.table_structure_model import TableStructureModel
+from docling.models.utils.hf_model_download import download_hf_model
 _log = logging.getLogger(__name__)
@@ -75,7 +77,7 @@ def download_models(
     if with_smolvlm:
         _log.info("Downloading SmolVlm model...")
-        PictureDescriptionVlmModel.download_models(
+        download_hf_model(
             repo_id=smolvlm_picture_description.repo_id,
             local_dir=output_dir / smolvlm_picture_description.repo_cache_folder,
             force=force,
@@ -84,26 +86,25 @@ def download_models(
     if with_smoldocling:
         _log.info("Downloading SmolDocling model...")
-        HuggingFaceVlmModel.download_models(
-            repo_id=smoldocling_vlm_conversion_options.repo_id,
-            local_dir=output_dir / smoldocling_vlm_conversion_options.repo_cache_folder,
+        download_hf_model(
+            repo_id=SMOLDOCLING_TRANSFORMERS.repo_id,
+            local_dir=output_dir / SMOLDOCLING_TRANSFORMERS.repo_cache_folder,
             force=force,
             progress=progress,
         )
     if with_smoldocling_mlx:
         _log.info("Downloading SmolDocling MLX model...")
-        HuggingFaceVlmModel.download_models(
-            repo_id=smoldocling_vlm_mlx_conversion_options.repo_id,
-            local_dir=output_dir
-            / smoldocling_vlm_mlx_conversion_options.repo_cache_folder,
+        download_hf_model(
+            repo_id=SMOLDOCLING_MLX.repo_id,
+            local_dir=output_dir / SMOLDOCLING_MLX.repo_cache_folder,
             force=force,
             progress=progress,
         )
     if with_granite_vision:
         _log.info("Downloading Granite Vision model...")
-        PictureDescriptionVlmModel.download_models(
+        download_hf_model(
             repo_id=granite_picture_description.repo_id,
             local_dir=output_dir / granite_picture_description.repo_cache_folder,
             force=force,

{docling-2.34.0.dist-info → docling-2.36.0.dist-info}/METADATA RENAMED Viewed

@@ -1,67 +1,68 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: docling
-Version: 2.34.0
+Version: 2.36.0
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
-Home-page: https://github.com/docling-project/docling
-License: MIT
+Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
+License-Expression: MIT
+Project-URL: homepage, https://github.com/docling-project/docling
+Project-URL: repository, https://github.com/docling-project/docling
+Project-URL: issues, https://github.com/docling-project/docling/issues
+Project-URL: changelog, https://github.com/docling-project/docling/blob/main/CHANGELOG.md
 Keywords: docling,convert,document,pdf,docx,html,markdown,layout model,segmentation,table structure,table former
-Author: Christoph Auer
-Author-email: cau@zurich.ibm.com
-Requires-Python: >=3.9,<4.0
+Classifier: Operating System :: MacOS :: MacOS X
+Classifier: Operating System :: POSIX :: Linux
+Classifier: Operating System :: Microsoft :: Windows
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Science/Research
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: MacOS :: MacOS X
-Classifier: Operating System :: POSIX :: Linux
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
-Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Provides-Extra: ocrmac
-Provides-Extra: rapidocr
+Requires-Python: <4.0,>=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pydantic<3.0.0,>=2.0.0
+Requires-Dist: docling-core[chunking]<3.0.0,>=2.29.0
+Requires-Dist: docling-ibm-models<4.0.0,>=3.4.4
+Requires-Dist: docling-parse<5.0.0,>=4.0.0
+Requires-Dist: filetype<2.0.0,>=1.2.0
+Requires-Dist: pypdfium2<5.0.0,>=4.30.0
+Requires-Dist: pydantic-settings<3.0.0,>=2.3.0
+Requires-Dist: huggingface_hub<1,>=0.23
+Requires-Dist: requests<3.0.0,>=2.32.2
+Requires-Dist: easyocr<2.0,>=1.7
+Requires-Dist: certifi>=2024.7.4
+Requires-Dist: rtree<2.0.0,>=1.3.0
+Requires-Dist: typer<0.16.0,>=0.12.5
+Requires-Dist: python-docx<2.0.0,>=1.1.2
+Requires-Dist: python-pptx<2.0.0,>=1.0.2
+Requires-Dist: beautifulsoup4<5.0.0,>=4.12.3
+Requires-Dist: pandas<3.0.0,>=2.1.4
+Requires-Dist: marko<3.0.0,>=2.1.2
+Requires-Dist: openpyxl<4.0.0,>=3.1.5
+Requires-Dist: lxml<6.0.0,>=4.0.0
+Requires-Dist: pillow<12.0.0,>=10.0.0
+Requires-Dist: tqdm<5.0.0,>=4.65.0
+Requires-Dist: pluggy<2.0.0,>=1.0.0
+Requires-Dist: pylatexenc<3.0,>=2.10
+Requires-Dist: click<8.2.0
+Requires-Dist: scipy<2.0.0,>=1.6.0
 Provides-Extra: tesserocr
+Requires-Dist: tesserocr<3.0.0,>=2.7.1; extra == "tesserocr"
+Provides-Extra: ocrmac
+Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrmac"
 Provides-Extra: vlm
-Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
-Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
-Requires-Dist: certifi (>=2024.7.4)
-Requires-Dist: click (<8.2.0)
-Requires-Dist: docling-core[chunking] (>=2.29.0,<3.0.0)
-Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
-Requires-Dist: docling-parse (>=4.0.0,<5.0.0)
-Requires-Dist: easyocr (>=1.7,<2.0)
-Requires-Dist: filetype (>=1.2.0,<2.0.0)
-Requires-Dist: huggingface_hub (>=0.23,<1)
-Requires-Dist: lxml (>=4.0.0,<6.0.0)
-Requires-Dist: marko (>=2.1.2,<3.0.0)
-Requires-Dist: ocrmac (>=1.0.0,<2.0.0) ; (sys_platform == "darwin") and (extra == "ocrmac")
-Requires-Dist: onnxruntime (>=1.7.0,<1.20.0) ; (python_version < "3.10") and (extra == "rapidocr")
-Requires-Dist: onnxruntime (>=1.7.0,<2.0.0) ; (python_version >= "3.10") and (extra == "rapidocr")
-Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
-Requires-Dist: pandas (>=2.1.4,<3.0.0)
-Requires-Dist: pillow (>=10.0.0,<12.0.0)
-Requires-Dist: pluggy (>=1.0.0,<2.0.0)
-Requires-Dist: pydantic (>=2.0.0,<3.0.0)
-Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
-Requires-Dist: pylatexenc (>=2.10,<3.0)
-Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
-Requires-Dist: python-docx (>=1.1.2,<2.0.0)
-Requires-Dist: python-pptx (>=1.0.2,<2.0.0)
-Requires-Dist: rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; (python_version < "3.13") and (extra == "rapidocr")
-Requires-Dist: requests (>=2.32.2,<3.0.0)
-Requires-Dist: rtree (>=1.3.0,<2.0.0)
-Requires-Dist: scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"
-Requires-Dist: scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"
-Requires-Dist: tesserocr (>=2.7.1,<3.0.0) ; extra == "tesserocr"
-Requires-Dist: tqdm (>=4.65.0,<5.0.0)
-Requires-Dist: transformers (>=4.42.0,<4.43.0) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "vlm")
-Requires-Dist: transformers (>=4.46.0,<5.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
-Requires-Dist: typer (>=0.12.5,<0.16.0)
-Project-URL: Repository, https://github.com/docling-project/docling
-Description-Content-Type: text/markdown
+Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
+Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
+Requires-Dist: mlx-vlm>=0.1.22; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
+Provides-Extra: rapidocr
+Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
+Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"
+Dynamic: license-file
 <p align="center">
   <a href="https://github.com/docling-project/docling">
@@ -79,9 +80,8 @@ Description-Content-Type: text/markdown
 [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://docling-project.github.io/docling/)
 [![PyPI version](https://img.shields.io/pypi/v/docling)](https://pypi.org/project/docling/)
 [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling)](https://pypi.org/project/docling/)
-[![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
-[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
-[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
+[![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
+[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
 [![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev)
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
 [![License MIT](https://img.shields.io/github/license/docling-project/docling)](https://opensource.org/licenses/MIT)
@@ -101,7 +101,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
 * 🔒 Local execution capabilities for sensitive data and air-gapped environments
 * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
 * 🔍 Extensive OCR support for scanned PDFs and images
-* 🥚 Support of Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview)) 🆕
+* 🥚 Support of several Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview))
 * 💻 Simple and convenient CLI
 ### Coming soon
@@ -214,4 +214,3 @@ The project was started by the AI for knowledge team at IBM Research Zurich.
 [supported_formats]: https://docling-project.github.io/docling/usage/supported_formats/
 [docling_document]: https://docling-project.github.io/docling/concepts/docling_document/
 [integrations]: https://docling-project.github.io/docling/integrations/

docling 2.34.0__py3-none-any.whl → 2.36.0__py3-none-any.whl

docling 2.34.0py3-none-any.whl → 2.36.0py3-none-any.whl