PyPI - docling - Versions diffs - 2.27.0__py3-none-any.whl → 2.28.0__py3-none-any.whl - Mend

docling 2.27.0py3-none-any.whl → 2.28.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

docling/backend/docling_parse_v4_backend.py +20 -13
docling/backend/mspowerpoint_backend.py +18 -0
docling/backend/msword_backend.py +5 -2
docling/cli/main.py +81 -38
docling/datamodel/pipeline_options.py +28 -2
docling/models/hf_mlx_model.py +137 -0
docling/pipeline/vlm_pipeline.py +78 -398
{docling-2.27.0.dist-info → docling-2.28.0.dist-info}/METADATA +27 -32
{docling-2.27.0.dist-info → docling-2.28.0.dist-info}/RECORD +12 -11
{docling-2.27.0.dist-info → docling-2.28.0.dist-info}/LICENSE +0 -0
{docling-2.27.0.dist-info → docling-2.28.0.dist-info}/WHEEL +0 -0
{docling-2.27.0.dist-info → docling-2.28.0.dist-info}/entry_points.txt +0 -0

docling/backend/docling_parse_v4_backend.py CHANGED Viewed

@@ -112,23 +112,30 @@ class DoclingParseV4PageBackend(PdfPageBackend):
             padbox.r = page_size.width - padbox.r
             padbox.t = page_size.height - padbox.t
-        image = (
-            self._ppage.render(
-                scale=scale * 1.5,
-                rotation=0,  # no additional rotation
-                crop=padbox.as_tuple(),
-            )
-            .to_pil()
-            .resize(size=(round(cropbox.width * scale), round(cropbox.height * scale)))
-        )  # We resize the image from 1.5x the given scale to make it sharper.
+        with pypdfium2_lock:
+            image = (
+                self._ppage.render(
+                    scale=scale * 1.5,
+                    rotation=0,  # no additional rotation
+                    crop=padbox.as_tuple(),
+                )
+                .to_pil()
+                .resize(
+                    size=(round(cropbox.width * scale), round(cropbox.height * scale))
+                )
+            )  # We resize the image from 1.5x the given scale to make it sharper.
         return image
     def get_size(self) -> Size:
-        return Size(
-            width=self._dpage.dimension.width,
-            height=self._dpage.dimension.height,
-        )
+        with pypdfium2_lock:
+            return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
+        # TODO: Take width and height from docling-parse.
+        # return Size(
+        #    width=self._dpage.dimension.width,
+        #    height=self._dpage.dimension.height,
+        # )
     def unload(self):
         self._ppage = None

docling/backend/mspowerpoint_backend.py CHANGED Viewed

@@ -16,6 +16,7 @@ from docling_core.types.doc import (
     TableCell,
     TableData,
 )
+from docling_core.types.doc.document import ContentLayer
 from PIL import Image, UnidentifiedImageError
 from pptx import Presentation
 from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER
@@ -421,4 +422,21 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
             for shape in slide.shapes:
                 handle_shapes(shape, parent_slide, slide_ind, doc, slide_size)
+            # Handle notes slide
+            if slide.has_notes_slide:
+                notes_slide = slide.notes_slide
+                notes_text = notes_slide.notes_text_frame.text.strip()
+                if notes_text:
+                    bbox = BoundingBox(l=0, t=0, r=0, b=0)
+                    prov = ProvenanceItem(
+                        page_no=slide_ind + 1, charspan=[0, len(notes_text)], bbox=bbox
+                    )
+                    doc.add_text(
+                        label=DocItemLabel.TEXT,
+                        parent=parent_slide,
+                        text=notes_text,
+                        prov=prov,
+                        content_layer=ContentLayer.FURNITURE,
+                    )
         return doc

docling/backend/msword_backend.py CHANGED Viewed

@@ -275,8 +275,10 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
                 only_equations.append(latex_equation)
                 texts_and_equations.append(latex_equation)
-        if "".join(only_texts) != text:
-            return text
+        if "".join(only_texts).strip() != text.strip():
+            # If we are not able to reconstruct the initial raw text
+            # do not try to parse equations and return the original
+            return text, []
         return "".join(texts_and_equations), only_equations
@@ -365,6 +367,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
                 for eq in equations:
                     if len(text_tmp) == 0:
                         break
                     pre_eq_text = text_tmp.split(eq, maxsplit=1)[0]
                     text_tmp = text_tmp.split(eq, maxsplit=1)[1]
                     if len(pre_eq_text) > 0:

docling/cli/main.py CHANGED Viewed

@@ -32,13 +32,21 @@ from docling.datamodel.pipeline_options import (
     AcceleratorOptions,
     EasyOcrOptions,
     OcrOptions,
+    PaginatedPipelineOptions,
     PdfBackend,
+    PdfPipeline,
     PdfPipelineOptions,
     TableFormerMode,
+    VlmModelType,
+    VlmPipelineOptions,
+    granite_vision_vlm_conversion_options,
+    smoldocling_vlm_conversion_options,
+    smoldocling_vlm_mlx_conversion_options,
 )
 from docling.datamodel.settings import settings
 from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
 from docling.models.factories import get_ocr_factory
+from docling.pipeline.vlm_pipeline import VlmPipeline
 warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
 warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
@@ -200,6 +208,14 @@ def convert(
             help="Image export mode for the document (only in case of JSON, Markdown or HTML). With `placeholder`, only the position of the image is marked in the output. In `embedded` mode, the image is embedded as base64 encoded string. In `referenced` mode, the image is exported in PNG format and referenced from the main exported document.",
         ),
     ] = ImageRefMode.EMBEDDED,
+    pipeline: Annotated[
+        PdfPipeline,
+        typer.Option(..., help="Choose the pipeline to process PDF or image files."),
+    ] = PdfPipeline.STANDARD,
+    vlm_model: Annotated[
+        VlmModelType,
+        typer.Option(..., help="Choose the VLM model to use with PDF or image files."),
+    ] = VlmModelType.SMOLDOCLING,
     ocr: Annotated[
         bool,
         typer.Option(
@@ -420,50 +436,77 @@ def convert(
             ocr_options.lang = ocr_lang_list
         accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device)
-        pipeline_options = PdfPipelineOptions(
-            allow_external_plugins=allow_external_plugins,
-            enable_remote_services=enable_remote_services,
-            accelerator_options=accelerator_options,
-            do_ocr=ocr,
-            ocr_options=ocr_options,
-            do_table_structure=True,
-            do_code_enrichment=enrich_code,
-            do_formula_enrichment=enrich_formula,
-            do_picture_description=enrich_picture_description,
-            do_picture_classification=enrich_picture_classes,
-            document_timeout=document_timeout,
-        )
-        pipeline_options.table_structure_options.do_cell_matching = (
-            True  # do_cell_matching
-        )
-        pipeline_options.table_structure_options.mode = table_mode
+        pipeline_options: PaginatedPipelineOptions
+        if pipeline == PdfPipeline.STANDARD:
+            pipeline_options = PdfPipelineOptions(
+                allow_external_plugins=allow_external_plugins,
+                enable_remote_services=enable_remote_services,
+                accelerator_options=accelerator_options,
+                do_ocr=ocr,
+                ocr_options=ocr_options,
+                do_table_structure=True,
+                do_code_enrichment=enrich_code,
+                do_formula_enrichment=enrich_formula,
+                do_picture_description=enrich_picture_description,
+                do_picture_classification=enrich_picture_classes,
+                document_timeout=document_timeout,
+            )
+            pipeline_options.table_structure_options.do_cell_matching = (
+                True  # do_cell_matching
+            )
+            pipeline_options.table_structure_options.mode = table_mode
+            if image_export_mode != ImageRefMode.PLACEHOLDER:
+                pipeline_options.generate_page_images = True
+                pipeline_options.generate_picture_images = (
+                    True  # FIXME: to be deprecated in verson 3
+                )
+                pipeline_options.images_scale = 2
+            backend: Type[PdfDocumentBackend]
+            if pdf_backend == PdfBackend.DLPARSE_V1:
+                backend = DoclingParseDocumentBackend
+            elif pdf_backend == PdfBackend.DLPARSE_V2:
+                backend = DoclingParseV2DocumentBackend
+            elif pdf_backend == PdfBackend.DLPARSE_V4:
+                backend = DoclingParseV4DocumentBackend  # type: ignore
+            elif pdf_backend == PdfBackend.PYPDFIUM2:
+                backend = PyPdfiumDocumentBackend  # type: ignore
+            else:
+                raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}")
+            pdf_format_option = PdfFormatOption(
+                pipeline_options=pipeline_options,
+                backend=backend,  # pdf_backend
+            )
+        elif pipeline == PdfPipeline.VLM:
+            pipeline_options = VlmPipelineOptions()
+            if vlm_model == VlmModelType.GRANITE_VISION:
+                pipeline_options.vlm_options = granite_vision_vlm_conversion_options
+            elif vlm_model == VlmModelType.SMOLDOCLING:
+                pipeline_options.vlm_options = smoldocling_vlm_conversion_options
+                if sys.platform == "darwin":
+                    try:
+                        import mlx_vlm
+                        pipeline_options.vlm_options = (
+                            smoldocling_vlm_mlx_conversion_options
+                        )
+                    except ImportError:
+                        _log.warning(
+                            "To run SmolDocling faster, please install mlx-vlm:\n"
+                            "pip install mlx-vlm"
+                        )
-        if image_export_mode != ImageRefMode.PLACEHOLDER:
-            pipeline_options.generate_page_images = True
-            pipeline_options.generate_picture_images = (
-                True  # FIXME: to be deprecated in verson 3
+            pdf_format_option = PdfFormatOption(
+                pipeline_cls=VlmPipeline, pipeline_options=pipeline_options
             )
-            pipeline_options.images_scale = 2
         if artifacts_path is not None:
             pipeline_options.artifacts_path = artifacts_path
-        backend: Type[PdfDocumentBackend]
-        if pdf_backend == PdfBackend.DLPARSE_V1:
-            backend = DoclingParseDocumentBackend
-        elif pdf_backend == PdfBackend.DLPARSE_V2:
-            backend = DoclingParseV2DocumentBackend
-        elif pdf_backend == PdfBackend.DLPARSE_V4:
-            backend = DoclingParseV4DocumentBackend  # type: ignore
-        elif pdf_backend == PdfBackend.PYPDFIUM2:
-            backend = PyPdfiumDocumentBackend  # type: ignore
-        else:
-            raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}")
-        pdf_format_option = PdfFormatOption(
-            pipeline_options=pipeline_options,
-            backend=backend,  # pdf_backend
-        )
         format_options: Dict[InputFormat, FormatOption] = {
             InputFormat.PDF: pdf_format_option,
             InputFormat.IMAGE: pdf_format_option,

docling/datamodel/pipeline_options.py CHANGED Viewed

@@ -263,6 +263,11 @@ class ResponseFormat(str, Enum):
     MARKDOWN = "markdown"
+class InferenceFramework(str, Enum):
+    MLX = "mlx"
+    TRANSFORMERS = "transformers"
 class HuggingFaceVlmOptions(BaseVlmOptions):
     kind: Literal["hf_model_options"] = "hf_model_options"
@@ -271,6 +276,7 @@ class HuggingFaceVlmOptions(BaseVlmOptions):
     llm_int8_threshold: float = 6.0
     quantized: bool = False
+    inference_framework: InferenceFramework
     response_format: ResponseFormat
     @property
@@ -278,10 +284,19 @@ class HuggingFaceVlmOptions(BaseVlmOptions):
         return self.repo_id.replace("/", "--")
+smoldocling_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
+    repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
+    prompt="Convert this page to docling.",
+    response_format=ResponseFormat.DOCTAGS,
+    inference_framework=InferenceFramework.MLX,
+)
 smoldocling_vlm_conversion_options = HuggingFaceVlmOptions(
     repo_id="ds4sd/SmolDocling-256M-preview",
     prompt="Convert this page to docling.",
     response_format=ResponseFormat.DOCTAGS,
+    inference_framework=InferenceFramework.TRANSFORMERS,
 )
 granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
@@ -289,9 +304,15 @@ granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
     # prompt="OCR the full page to markdown.",
     prompt="OCR this image.",
     response_format=ResponseFormat.MARKDOWN,
+    inference_framework=InferenceFramework.TRANSFORMERS,
 )
+class VlmModelType(str, Enum):
+    SMOLDOCLING = "smoldocling"
+    GRANITE_VISION = "granite_vision"
 # Define an enum for the backend options
 class PdfBackend(str, Enum):
     """Enum of valid PDF backends."""
@@ -327,13 +348,14 @@ class PipelineOptions(BaseModel):
 class PaginatedPipelineOptions(PipelineOptions):
+    artifacts_path: Optional[Union[Path, str]] = None
     images_scale: float = 1.0
     generate_page_images: bool = False
     generate_picture_images: bool = False
 class VlmPipelineOptions(PaginatedPipelineOptions):
-    artifacts_path: Optional[Union[Path, str]] = None
     generate_page_images: bool = True
     force_backend_text: bool = (
@@ -346,7 +368,6 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
 class PdfPipelineOptions(PaginatedPipelineOptions):
     """Options for the PDF pipeline."""
-    artifacts_path: Optional[Union[Path, str]] = None
     do_table_structure: bool = True  # True: perform table structure extraction
     do_ocr: bool = True  # True: perform OCR, replace programmatic PDF text
     do_code_enrichment: bool = False  # True: perform code OCR
@@ -377,3 +398,8 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
     )
     generate_parsed_pages: bool = False
+class PdfPipeline(str, Enum):
+    STANDARD = "standard"
+    VLM = "vlm"

docling/models/hf_mlx_model.py ADDED Viewed

@@ -0,0 +1,137 @@
+import logging
+import time
+from pathlib import Path
+from typing import Iterable, List, Optional
+from docling.datamodel.base_models import Page, VlmPrediction
+from docling.datamodel.document import ConversionResult
+from docling.datamodel.pipeline_options import (
+    AcceleratorDevice,
+    AcceleratorOptions,
+    HuggingFaceVlmOptions,
+)
+from docling.datamodel.settings import settings
+from docling.models.base_model import BasePageModel
+from docling.utils.accelerator_utils import decide_device
+from docling.utils.profiling import TimeRecorder
+_log = logging.getLogger(__name__)
+class HuggingFaceMlxModel(BasePageModel):
+    def __init__(
+        self,
+        enabled: bool,
+        artifacts_path: Optional[Path],
+        accelerator_options: AcceleratorOptions,
+        vlm_options: HuggingFaceVlmOptions,
+    ):
+        self.enabled = enabled
+        self.vlm_options = vlm_options
+        if self.enabled:
+            try:
+                from mlx_vlm import generate, load  # type: ignore
+                from mlx_vlm.prompt_utils import apply_chat_template  # type: ignore
+                from mlx_vlm.utils import load_config, stream_generate  # type: ignore
+            except ImportError:
+                raise ImportError(
+                    "mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."
+                )
+            repo_cache_folder = vlm_options.repo_id.replace("/", "--")
+            self.apply_chat_template = apply_chat_template
+            self.stream_generate = stream_generate
+            # PARAMETERS:
+            if artifacts_path is None:
+                artifacts_path = self.download_models(self.vlm_options.repo_id)
+            elif (artifacts_path / repo_cache_folder).exists():
+                artifacts_path = artifacts_path / repo_cache_folder
+            self.param_question = vlm_options.prompt  # "Perform Layout Analysis."
+            ## Load the model
+            self.vlm_model, self.processor = load(artifacts_path)
+            self.config = load_config(artifacts_path)
+    @staticmethod
+    def download_models(
+        repo_id: str,
+        local_dir: Optional[Path] = None,
+        force: bool = False,
+        progress: bool = False,
+    ) -> Path:
+        from huggingface_hub import snapshot_download
+        from huggingface_hub.utils import disable_progress_bars
+        if not progress:
+            disable_progress_bars()
+        download_path = snapshot_download(
+            repo_id=repo_id,
+            force_download=force,
+            local_dir=local_dir,
+            # revision="v0.0.1",
+        )
+        return Path(download_path)
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
+        for page in page_batch:
+            assert page._backend is not None
+            if not page._backend.is_valid():
+                yield page
+            else:
+                with TimeRecorder(conv_res, "vlm"):
+                    assert page.size is not None
+                    hi_res_image = page.get_image(scale=2.0)  # 144dpi
+                    # hi_res_image = page.get_image(scale=1.0)  # 72dpi
+                    if hi_res_image is not None:
+                        im_width, im_height = hi_res_image.size
+                    # populate page_tags with predicted doc tags
+                    page_tags = ""
+                    if hi_res_image:
+                        if hi_res_image.mode != "RGB":
+                            hi_res_image = hi_res_image.convert("RGB")
+                    prompt = self.apply_chat_template(
+                        self.processor, self.config, self.param_question, num_images=1
+                    )
+                    start_time = time.time()
+                    # Call model to generate:
+                    output = ""
+                    for token in self.stream_generate(
+                        self.vlm_model,
+                        self.processor,
+                        prompt,
+                        [hi_res_image],
+                        max_tokens=4096,
+                        verbose=False,
+                    ):
+                        output += token.text
+                        if "</doctag>" in token.text:
+                            break
+                    generation_time = time.time() - start_time
+                    page_tags = output
+                    # inference_time = time.time() - start_time
+                    # tokens_per_second = num_tokens / generation_time
+                    # print("")
+                    # print(f"Page Inference Time: {inference_time:.2f} seconds")
+                    # print(f"Total tokens on page: {num_tokens:.2f}")
+                    # print(f"Tokens/sec: {tokens_per_second:.2f}")
+                    # print("")
+                    page.predictions.vlm_response = VlmPrediction(text=page_tags)
+                yield page

docling/pipeline/vlm_pipeline.py CHANGED Viewed

@@ -1,30 +1,13 @@
-import itertools
 import logging
-import re
 import warnings
 from io import BytesIO
-# from io import BytesIO
 from pathlib import Path
-from typing import Optional
+from typing import List, Optional, Union, cast
-from docling_core.types import DoclingDocument
-from docling_core.types.doc import (
-    BoundingBox,
-    DocItem,
-    DocItemLabel,
-    DoclingDocument,
-    GroupLabel,
-    ImageRef,
-    ImageRefMode,
-    PictureItem,
-    ProvenanceItem,
-    Size,
-    TableCell,
-    TableData,
-    TableItem,
-)
-from docling_core.types.doc.tokens import DocumentToken, TableToken
+# from docling_core.types import DoclingDocument
+from docling_core.types.doc import BoundingBox, DocItem, ImageRef, PictureItem, TextItem
+from docling_core.types.doc.document import DocTagsDocument
+from PIL import Image as PILImage
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.md_backend import MarkdownDocumentBackend
@@ -32,11 +15,12 @@ from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.datamodel.base_models import InputFormat, Page
 from docling.datamodel.document import ConversionResult, InputDocument
 from docling.datamodel.pipeline_options import (
-    PdfPipelineOptions,
+    InferenceFramework,
     ResponseFormat,
     VlmPipelineOptions,
 )
 from docling.datamodel.settings import settings
+from docling.models.hf_mlx_model import HuggingFaceMlxModel
 from docling.models.hf_vlm_model import HuggingFaceVlmModel
 from docling.pipeline.base_pipeline import PaginatedPipeline
 from docling.utils.profiling import ProfilingScope, TimeRecorder
@@ -50,12 +34,6 @@ class VlmPipeline(PaginatedPipeline):
         super().__init__(pipeline_options)
         self.keep_backend = True
-        warnings.warn(
-            "The VlmPipeline is currently experimental and may change in upcoming versions without notice.",
-            category=UserWarning,
-            stacklevel=2,
-        )
         self.pipeline_options: VlmPipelineOptions
         artifacts_path: Optional[Path] = None
@@ -79,14 +57,27 @@ class VlmPipeline(PaginatedPipeline):
         self.keep_images = self.pipeline_options.generate_page_images
-        self.build_pipe = [
-            HuggingFaceVlmModel(
-                enabled=True,  # must be always enabled for this pipeline to make sense.
-                artifacts_path=artifacts_path,
-                accelerator_options=pipeline_options.accelerator_options,
-                vlm_options=self.pipeline_options.vlm_options,
-            ),
-        ]
+        if (
+            self.pipeline_options.vlm_options.inference_framework
+            == InferenceFramework.MLX
+        ):
+            self.build_pipe = [
+                HuggingFaceMlxModel(
+                    enabled=True,  # must be always enabled for this pipeline to make sense.
+                    artifacts_path=artifacts_path,
+                    accelerator_options=pipeline_options.accelerator_options,
+                    vlm_options=self.pipeline_options.vlm_options,
+                ),
+            ]
+        else:
+            self.build_pipe = [
+                HuggingFaceVlmModel(
+                    enabled=True,  # must be always enabled for this pipeline to make sense.
+                    artifacts_path=artifacts_path,
+                    accelerator_options=pipeline_options.accelerator_options,
+                    vlm_options=self.pipeline_options.vlm_options,
+                ),
+            ]
         self.enrichment_pipe = [
             # Other models working on `NodeItem` elements in the DoclingDocument
@@ -100,6 +91,17 @@ class VlmPipeline(PaginatedPipeline):
         return page
+    def extract_text_from_backend(
+        self, page: Page, bbox: Union[BoundingBox, None]
+    ) -> str:
+        # Convert bounding box normalized to 0-100 into page coordinates for cropping
+        text = ""
+        if bbox:
+            if page.size:
+                if page._backend:
+                    text = page._backend.get_text_in_rect(bbox)
+        return text
     def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
         with TimeRecorder(conv_res, "doc_assemble", scope=ProfilingScope.DOCUMENT):
@@ -107,7 +109,45 @@ class VlmPipeline(PaginatedPipeline):
                 self.pipeline_options.vlm_options.response_format
                 == ResponseFormat.DOCTAGS
             ):
-                conv_res.document = self._turn_tags_into_doc(conv_res.pages)
+                doctags_list = []
+                image_list = []
+                for page in conv_res.pages:
+                    predicted_doctags = ""
+                    img = PILImage.new("RGB", (1, 1), "rgb(255,255,255)")
+                    if page.predictions.vlm_response:
+                        predicted_doctags = page.predictions.vlm_response.text
+                    if page.image:
+                        img = page.image
+                    image_list.append(img)
+                    doctags_list.append(predicted_doctags)
+                doctags_list_c = cast(List[Union[Path, str]], doctags_list)
+                image_list_c = cast(List[Union[Path, PILImage.Image]], image_list)
+                doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(
+                    doctags_list_c, image_list_c
+                )
+                conv_res.document.load_from_doctags(doctags_doc)
+                # If forced backend text, replace model predicted text with backend one
+                if page.size:
+                    if self.force_backend_text:
+                        scale = self.pipeline_options.images_scale
+                        for element, _level in conv_res.document.iterate_items():
+                            if (
+                                not isinstance(element, TextItem)
+                                or len(element.prov) == 0
+                            ):
+                                continue
+                            crop_bbox = (
+                                element.prov[0]
+                                .bbox.scaled(scale=scale)
+                                .to_top_left_origin(
+                                    page_height=page.size.height * scale
+                                )
+                            )
+                            txt = self.extract_text_from_backend(page, crop_bbox)
+                            element.text = txt
+                            element.orig = txt
             elif (
                 self.pipeline_options.vlm_options.response_format
                 == ResponseFormat.MARKDOWN
@@ -165,366 +205,6 @@ class VlmPipeline(PaginatedPipeline):
         )
         return backend.convert()
-    def _turn_tags_into_doc(self, pages: list[Page]) -> DoclingDocument:
-        ###############################################
-        # Tag definitions and color mappings
-        ###############################################
-        # Maps the recognized tag to a Docling label.
-        # Code items will be given DocItemLabel.CODE
-        tag_to_doclabel = {
-            "title": DocItemLabel.TITLE,
-            "document_index": DocItemLabel.DOCUMENT_INDEX,
-            "otsl": DocItemLabel.TABLE,
-            "section_header_level_1": DocItemLabel.SECTION_HEADER,
-            "checkbox_selected": DocItemLabel.CHECKBOX_SELECTED,
-            "checkbox_unselected": DocItemLabel.CHECKBOX_UNSELECTED,
-            "text": DocItemLabel.TEXT,
-            "page_header": DocItemLabel.PAGE_HEADER,
-            "page_footer": DocItemLabel.PAGE_FOOTER,
-            "formula": DocItemLabel.FORMULA,
-            "caption": DocItemLabel.CAPTION,
-            "picture": DocItemLabel.PICTURE,
-            "list_item": DocItemLabel.LIST_ITEM,
-            "footnote": DocItemLabel.FOOTNOTE,
-            "code": DocItemLabel.CODE,
-        }
-        # Maps each tag to an associated bounding box color.
-        tag_to_color = {
-            "title": "blue",
-            "document_index": "darkblue",
-            "otsl": "green",
-            "section_header_level_1": "purple",
-            "checkbox_selected": "black",
-            "checkbox_unselected": "gray",
-            "text": "red",
-            "page_header": "orange",
-            "page_footer": "cyan",
-            "formula": "pink",
-            "caption": "magenta",
-            "picture": "yellow",
-            "list_item": "brown",
-            "footnote": "darkred",
-            "code": "lightblue",
-        }
-        def extract_bounding_box(text_chunk: str) -> Optional[BoundingBox]:
-            """Extracts <loc_...> bounding box coords from the chunk, normalized by / 500."""
-            coords = re.findall(r"<loc_(\d+)>", text_chunk)
-            if len(coords) == 4:
-                l, t, r, b = map(float, coords)
-                return BoundingBox(l=l / 500, t=t / 500, r=r / 500, b=b / 500)
-            return None
-        def extract_inner_text(text_chunk: str) -> str:
-            """Strips all <...> tags inside the chunk to get the raw text content."""
-            return re.sub(r"<.*?>", "", text_chunk, flags=re.DOTALL).strip()
-        def extract_text_from_backend(page: Page, bbox: BoundingBox | None) -> str:
-            # Convert bounding box normalized to 0-100 into page coordinates for cropping
-            text = ""
-            if bbox:
-                if page.size:
-                    bbox.l = bbox.l * page.size.width
-                    bbox.t = bbox.t * page.size.height
-                    bbox.r = bbox.r * page.size.width
-                    bbox.b = bbox.b * page.size.height
-                    if page._backend:
-                        text = page._backend.get_text_in_rect(bbox)
-            return text
-        def otsl_parse_texts(texts, tokens):
-            split_word = TableToken.OTSL_NL.value
-            split_row_tokens = [
-                list(y)
-                for x, y in itertools.groupby(tokens, lambda z: z == split_word)
-                if not x
-            ]
-            table_cells = []
-            r_idx = 0
-            c_idx = 0
-            def count_right(tokens, c_idx, r_idx, which_tokens):
-                span = 0
-                c_idx_iter = c_idx
-                while tokens[r_idx][c_idx_iter] in which_tokens:
-                    c_idx_iter += 1
-                    span += 1
-                    if c_idx_iter >= len(tokens[r_idx]):
-                        return span
-                return span
-            def count_down(tokens, c_idx, r_idx, which_tokens):
-                span = 0
-                r_idx_iter = r_idx
-                while tokens[r_idx_iter][c_idx] in which_tokens:
-                    r_idx_iter += 1
-                    span += 1
-                    if r_idx_iter >= len(tokens):
-                        return span
-                return span
-            for i, text in enumerate(texts):
-                cell_text = ""
-                if text in [
-                    TableToken.OTSL_FCEL.value,
-                    TableToken.OTSL_ECEL.value,
-                    TableToken.OTSL_CHED.value,
-                    TableToken.OTSL_RHED.value,
-                    TableToken.OTSL_SROW.value,
-                ]:
-                    row_span = 1
-                    col_span = 1
-                    right_offset = 1
-                    if text != TableToken.OTSL_ECEL.value:
-                        cell_text = texts[i + 1]
-                        right_offset = 2
-                    # Check next element(s) for lcel / ucel / xcel, set properly row_span, col_span
-                    next_right_cell = ""
-                    if i + right_offset < len(texts):
-                        next_right_cell = texts[i + right_offset]
-                    next_bottom_cell = ""
-                    if r_idx + 1 < len(split_row_tokens):
-                        if c_idx < len(split_row_tokens[r_idx + 1]):
-                            next_bottom_cell = split_row_tokens[r_idx + 1][c_idx]
-                    if next_right_cell in [
-                        TableToken.OTSL_LCEL.value,
-                        TableToken.OTSL_XCEL.value,
-                    ]:
-                        # we have horisontal spanning cell or 2d spanning cell
-                        col_span += count_right(
-                            split_row_tokens,
-                            c_idx + 1,
-                            r_idx,
-                            [TableToken.OTSL_LCEL.value, TableToken.OTSL_XCEL.value],
-                        )
-                    if next_bottom_cell in [
-                        TableToken.OTSL_UCEL.value,
-                        TableToken.OTSL_XCEL.value,
-                    ]:
-                        # we have a vertical spanning cell or 2d spanning cell
-                        row_span += count_down(
-                            split_row_tokens,
-                            c_idx,
-                            r_idx + 1,
-                            [TableToken.OTSL_UCEL.value, TableToken.OTSL_XCEL.value],
-                        )
-                    table_cells.append(
-                        TableCell(
-                            text=cell_text.strip(),
-                            row_span=row_span,
-                            col_span=col_span,
-                            start_row_offset_idx=r_idx,
-                            end_row_offset_idx=r_idx + row_span,
-                            start_col_offset_idx=c_idx,
-                            end_col_offset_idx=c_idx + col_span,
-                        )
-                    )
-                if text in [
-                    TableToken.OTSL_FCEL.value,
-                    TableToken.OTSL_ECEL.value,
-                    TableToken.OTSL_CHED.value,
-                    TableToken.OTSL_RHED.value,
-                    TableToken.OTSL_SROW.value,
-                    TableToken.OTSL_LCEL.value,
-                    TableToken.OTSL_UCEL.value,
-                    TableToken.OTSL_XCEL.value,
-                ]:
-                    c_idx += 1
-                if text == TableToken.OTSL_NL.value:
-                    r_idx += 1
-                    c_idx = 0
-            return table_cells, split_row_tokens
-        def otsl_extract_tokens_and_text(s: str):
-            # Pattern to match anything enclosed by < > (including the angle brackets themselves)
-            pattern = r"(<[^>]+>)"
-            # Find all tokens (e.g. "<otsl>", "<loc_140>", etc.)
-            tokens = re.findall(pattern, s)
-            # Remove any tokens that start with "<loc_"
-            tokens = [
-                token
-                for token in tokens
-                if not (
-                    token.startswith(rf"<{DocumentToken.LOC.value}")
-                    or token
-                    in [
-                        rf"<{DocumentToken.OTSL.value}>",
-                        rf"</{DocumentToken.OTSL.value}>",
-                    ]
-                )
-            ]
-            # Split the string by those tokens to get the in-between text
-            text_parts = re.split(pattern, s)
-            text_parts = [
-                token
-                for token in text_parts
-                if not (
-                    token.startswith(rf"<{DocumentToken.LOC.value}")
-                    or token
-                    in [
-                        rf"<{DocumentToken.OTSL.value}>",
-                        rf"</{DocumentToken.OTSL.value}>",
-                    ]
-                )
-            ]
-            # Remove any empty or purely whitespace strings from text_parts
-            text_parts = [part for part in text_parts if part.strip()]
-            return tokens, text_parts
-        def parse_table_content(otsl_content: str) -> TableData:
-            tokens, mixed_texts = otsl_extract_tokens_and_text(otsl_content)
-            table_cells, split_row_tokens = otsl_parse_texts(mixed_texts, tokens)
-            return TableData(
-                num_rows=len(split_row_tokens),
-                num_cols=(
-                    max(len(row) for row in split_row_tokens) if split_row_tokens else 0
-                ),
-                table_cells=table_cells,
-            )
-        doc = DoclingDocument(name="Document")
-        for pg_idx, page in enumerate(pages):
-            xml_content = ""
-            predicted_text = ""
-            if page.predictions.vlm_response:
-                predicted_text = page.predictions.vlm_response.text
-            image = page.image
-            page_no = pg_idx + 1
-            bounding_boxes = []
-            if page.size:
-                pg_width = page.size.width
-                pg_height = page.size.height
-                size = Size(width=pg_width, height=pg_height)
-                parent_page = doc.add_page(page_no=page_no, size=size)
-            """
-            1. Finds all <tag>...</tag> blocks in the entire string (multi-line friendly) in the order they appear.
-            2. For each chunk, extracts bounding box (if any) and inner text.
-            3. Adds the item to a DoclingDocument structure with the right label.
-            4. Tracks bounding boxes + color in a separate list for later visualization.
-            """
-            # Regex for all recognized tags
-            tag_pattern = (
-                rf"<(?P<tag>{DocItemLabel.TITLE}|{DocItemLabel.DOCUMENT_INDEX}|"
-                rf"{DocItemLabel.CHECKBOX_UNSELECTED}|{DocItemLabel.CHECKBOX_SELECTED}|"
-                rf"{DocItemLabel.TEXT}|{DocItemLabel.PAGE_HEADER}|"
-                rf"{DocItemLabel.PAGE_FOOTER}|{DocItemLabel.FORMULA}|"
-                rf"{DocItemLabel.CAPTION}|{DocItemLabel.PICTURE}|"
-                rf"{DocItemLabel.LIST_ITEM}|{DocItemLabel.FOOTNOTE}|{DocItemLabel.CODE}|"
-                rf"{DocItemLabel.SECTION_HEADER}_level_1|{DocumentToken.OTSL.value})>.*?</(?P=tag)>"
-            )
-            # DocumentToken.OTSL
-            pattern = re.compile(tag_pattern, re.DOTALL)
-            # Go through each match in order
-            for match in pattern.finditer(predicted_text):
-                full_chunk = match.group(0)
-                tag_name = match.group("tag")
-                bbox = extract_bounding_box(full_chunk)
-                doc_label = tag_to_doclabel.get(tag_name, DocItemLabel.PARAGRAPH)
-                color = tag_to_color.get(tag_name, "white")
-                # Store bounding box + color
-                if bbox:
-                    bounding_boxes.append((bbox, color))
-                if tag_name == DocumentToken.OTSL.value:
-                    table_data = parse_table_content(full_chunk)
-                    bbox = extract_bounding_box(full_chunk)
-                    if bbox:
-                        prov = ProvenanceItem(
-                            bbox=bbox.resize_by_scale(pg_width, pg_height),
-                            charspan=(0, 0),
-                            page_no=page_no,
-                        )
-                        doc.add_table(data=table_data, prov=prov)
-                    else:
-                        doc.add_table(data=table_data)
-                elif tag_name == DocItemLabel.PICTURE:
-                    text_caption_content = extract_inner_text(full_chunk)
-                    if image:
-                        if bbox:
-                            im_width, im_height = image.size
-                            crop_box = (
-                                int(bbox.l * im_width),
-                                int(bbox.t * im_height),
-                                int(bbox.r * im_width),
-                                int(bbox.b * im_height),
-                            )
-                            cropped_image = image.crop(crop_box)
-                            pic = doc.add_picture(
-                                parent=None,
-                                image=ImageRef.from_pil(image=cropped_image, dpi=72),
-                                prov=(
-                                    ProvenanceItem(
-                                        bbox=bbox.resize_by_scale(pg_width, pg_height),
-                                        charspan=(0, 0),
-                                        page_no=page_no,
-                                    )
-                                ),
-                            )
-                            # If there is a caption to an image, add it as well
-                            if len(text_caption_content) > 0:
-                                caption_item = doc.add_text(
-                                    label=DocItemLabel.CAPTION,
-                                    text=text_caption_content,
-                                    parent=None,
-                                )
-                                pic.captions.append(caption_item.get_ref())
-                    else:
-                        if bbox:
-                            # In case we don't have access to an binary of an image
-                            doc.add_picture(
-                                parent=None,
-                                prov=ProvenanceItem(
-                                    bbox=bbox, charspan=(0, 0), page_no=page_no
-                                ),
-                            )
-                            # If there is a caption to an image, add it as well
-                            if len(text_caption_content) > 0:
-                                caption_item = doc.add_text(
-                                    label=DocItemLabel.CAPTION,
-                                    text=text_caption_content,
-                                    parent=None,
-                                )
-                                pic.captions.append(caption_item.get_ref())
-                else:
-                    # For everything else, treat as text
-                    if self.force_backend_text:
-                        text_content = extract_text_from_backend(page, bbox)
-                    else:
-                        text_content = extract_inner_text(full_chunk)
-                    doc.add_text(
-                        label=doc_label,
-                        text=text_content,
-                        prov=(
-                            ProvenanceItem(
-                                bbox=bbox.resize_by_scale(pg_width, pg_height),
-                                charspan=(0, len(text_content)),
-                                page_no=page_no,
-                            )
-                            if bbox
-                            else None
-                        ),
-                    )
-        return doc
     @classmethod
     def get_default_options(cls) -> VlmPipelineOptions:
         return VlmPipelineOptions()

{docling-2.27.0.dist-info → docling-2.28.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling
-Version: 2.27.0
+Version: 2.28.0
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Home-page: https://github.com/docling-project/docling
 License: MIT
@@ -28,7 +28,7 @@ Provides-Extra: vlm
 Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
 Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
 Requires-Dist: certifi (>=2024.7.4)
-Requires-Dist: docling-core[chunking] (>=2.23.0,<3.0.0)
+Requires-Dist: docling-core[chunking] (>=2.23.1,<3.0.0)
 Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
 Requires-Dist: docling-parse (>=4.0.0,<5.0.0)
 Requires-Dist: easyocr (>=1.7,<2.0)
@@ -86,6 +86,7 @@ Description-Content-Type: text/markdown
 [![License MIT](https://img.shields.io/github/license/docling-project/docling)](https://opensource.org/licenses/MIT)
 [![PyPI Downloads](https://static.pepy.tech/badge/docling/month)](https://pepy.tech/projects/docling)
 [![Docling Actor](https://apify.com/actor-badge?actor=vancura/docling?fpr=docling)](https://apify.com/vancura/docling)
+[![LF AI & Data](https://img.shields.io/badge/LF%20AI%20%26%20Data-003778?logo=linuxfoundation&logoColor=fff&color=0094ff&labelColor=003778)](https://lfaidata.foundation/projects/)
 Docling simplifies document processing, parsing diverse formats — including advanced PDF understanding — and providing seamless integrations with the gen AI ecosystem.
@@ -98,12 +99,12 @@ Docling simplifies document processing, parsing diverse formats — including ad
 * 🔒 Local execution capabilities for sensitive data and air-gapped environments
 * 🤖 Plug-and-play [integrations][integrations] incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
 * 🔍 Extensive OCR support for scanned PDFs and images
+* 🥚 Support of Visual Language Models ([SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview)) 🆕
 * 💻 Simple and convenient CLI
 ### Coming soon
 * 📝 Metadata extraction, including title, authors, references & language
-* 📝 Inclusion of Visual Language Models ([SmolDocling](https://huggingface.co/blog/smolervlm#smoldocling))
 * 📝 Chart understanding (Barchart, Piechart, LinePlot, etc)
 * 📝 Complex chemistry understanding (Molecular structures)
@@ -120,7 +121,7 @@ More [detailed installation instructions](https://docling-project.github.io/docl
 ## Getting started
-To convert individual documents, use `convert()`, for example:
+To convert individual documents with python, use `convert()`, for example:
 ```python
 from docling.document_converter import DocumentConverter
@@ -134,6 +135,22 @@ print(result.document.export_to_markdown())  # output: "## Docling Technical Rep
 More [advanced usage options](https://docling-project.github.io/docling/usage/) are available in
 the docs.
+## CLI
+Docling has a built-in CLI to run conversions.
+```bash
+docling https://arxiv.org/pdf/2206.01062
+```
+You can also use 🥚[SmolDocling](https://huggingface.co/ds4sd/SmolDocling-256M-preview) and other VLMs via Docling CLI:
+```bash
+docling --pipeline vlm --vlm-model smoldocling https://arxiv.org/pdf/2206.01062
+```
+This will use MLX acceleration on supported Apple Silicon hardware.
+Read more [here](https://docling-project.github.io/docling/usage/)
 ## Documentation
 Check out Docling's [documentation](https://docling-project.github.io/docling/), for details on
@@ -150,32 +167,6 @@ To further accelerate your AI application development, check out Docling's nativ
 [integrations](https://docling-project.github.io/docling/integrations/) with popular frameworks
 and tools.
-## Apify Actor
-<a href="https://apify.com/vancura/docling?fpr=docling"><img src="https://apify.com/ext/run-on-apify.png" alt="Run Docling Actor on Apify" width="176" height="39" /></a>
-You can run Docling in the cloud without installation using the [Docling Actor](https://apify.com/vancura/docling?fpr=docling) on Apify platform. Simply provide a document URL and get the processed result:
-```bash
-apify call vancura/docling -i '{
-  "options": {
-    "to_formats": ["md", "json", "html", "text", "doctags"]
-  },
-  "http_sources": [
-    {"url": "https://vancura.dev/assets/actor-test/facial-hairstyles-and-filtering-facepiece-respirators.pdf"},
-    {"url": "https://arxiv.org/pdf/2408.09869"}
-  ]
-}'
-```
-The Actor stores results in:
-* Processed document in key-value store (`OUTPUT_RESULT`)
-* Processing logs (`DOCLING_LOG`)
-* Dataset record with result URL and status
-Read more about the [Docling Actor](.actor/README.md), including how to use it via the Apify API and CLI.
 ## Get help and support
 Please feel free to connect with us using the [discussion section](https://github.com/docling-project/docling/discussions).
@@ -210,9 +201,13 @@ If you use Docling in your projects, please consider citing the following:
 The Docling codebase is under MIT license.
 For individual model usage, please refer to the model licenses found in the original packages.
-## IBM ❤️ Open Source AI
+## LF AI & Data
+Docling is hosted as a project in the [LF AI & Data Foundation](https://lfaidata.foundation/projects/).
+### IBM ❤️ Open Source AI
-Docling has been brought to you by IBM.
+The project was started by the AI for knowledge team at IBM Research Zurich.
 [supported_formats]: https://docling-project.github.io/docling/usage/supported_formats/
 [docling_document]: https://docling-project.github.io/docling/concepts/docling_document/

{docling-2.27.0.dist-info → docling-2.28.0.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ docling/backend/asciidoc_backend.py,sha256=xBtmYkRkPICIfMbB8AFIw_or4IZGB17mP_LhX
 docling/backend/csv_backend.py,sha256=lCNSkgB55IbAig7w4IyXRkX23aM3Nojj6GdXNoaNjY4,4536
 docling/backend/docling_parse_backend.py,sha256=tcy4cPD_dtGD37CjivbFvwzwXVcrb3HVmofyasxLum8,7991
 docling/backend/docling_parse_v2_backend.py,sha256=70kXqYhht-A8zb9z5emMe_1i0l9dyQGrM8lg1cmAvqc,9369
-docling/backend/docling_parse_v4_backend.py,sha256=sUjcgD62n2Z15gOYhLNAnwkzqSAnlQ8eKkDuVrlK_rk,6002
+docling/backend/docling_parse_v4_backend.py,sha256=IECMJQWEvYqQv043_1Ho6dLkCbuaK8cMUsqcxwqruXo,6287
 docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/backend/docx/latex/latex_dict.py,sha256=a0UC3VLmG1BLN-hGmEaQamzKbDB10fCz0U8qRU--aBw,6613
@@ -15,8 +15,8 @@ docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
 docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
 docling/backend/md_backend.py,sha256=v230PXShYJo2QaabwUHiBpE-EGScHIerjL78zPaJpZM,16837
 docling/backend/msexcel_backend.py,sha256=_ZVZFKRRijpg-Xz10xNxu2m-NpDaYvoiBqEZP6GbrgE,11095
-docling/backend/mspowerpoint_backend.py,sha256=wUriELF9wHwThITXxSyseVASe6W6Sw0E7Qg_U-Q3JNU,16434
-docling/backend/msword_backend.py,sha256=uSQJ5PHoTIlw2bcAe8NGWutjgceNYWfg4N1ze17F4D0,23101
+docling/backend/mspowerpoint_backend.py,sha256=zXdXr8nGJJbPGTgR5_dqq5WmNL1wDCaK0RqFqtuHPqs,17213
+docling/backend/msword_backend.py,sha256=VjTvJe249FjHJDBpK0RC4iyosMzmpJLTuFIAPNEdReU,23259
 docling/backend/pdf_backend.py,sha256=odWb1rxk3WCUIEJMhq-dYFNUQ1pSDuNHbU9wlTZIRAs,2211
 docling/backend/pypdfium2_backend.py,sha256=wRwhA5XHRqL7vyNhCAHM6P-ONkwtyjKG9LgC4NJ-4i8,10784
 docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -24,13 +24,13 @@ docling/backend/xml/jats_backend.py,sha256=HXailrDjiwu4swwFnXy3lNfRtLZmkBBp4yqaf
 docling/backend/xml/uspto_backend.py,sha256=H0jwIt2skOke_yEUk0wfXCtodrB-hrj2ygLtB3jMWaI,71056
 docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
 docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/cli/main.py,sha256=1N4h1HrNCWEymkqb4_mXyplcdVgVNAR7lRAZFXTiRKk,18310
+docling/cli/main.py,sha256=zr36i-itYkX013g_DK6aNiNe8UPaD27_A7UtG5qwLUo,20174
 docling/cli/models.py,sha256=tM_qbMM3YOPxFU7JlME96MLbtd1CX_bOAK7FS-NhJvY,3979
 docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
 docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/datamodel/base_models.py,sha256=MAHr8LlffZ2uIXZ3AXOsikh_-oQIEYTiwwjsz-dQW9U,7287
 docling/datamodel/document.py,sha256=DbJifyMgBEkAk80BMYXTuSgqH2vijDENDkU7Fmr6j_g,14567
-docling/datamodel/pipeline_options.py,sha256=n45Xgl1qnrHZxztd4CyhdDPYa8FygADJ8EpfbUuIlmc,11963
+docling/datamodel/pipeline_options.py,sha256=TpRf_-7UuCjjaytFWA0nL2m-KP4no9jeAjaXRjBLMLE,12593
 docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
 docling/document_converter.py,sha256=LwbnfGzma937EmSrNWMzM-dldI9Cbu4DUgY8gL1OVHo,13184
 docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
@@ -44,6 +44,7 @@ docling/models/factories/__init__.py,sha256=e4lFmRfmW5hWqvJjY5xaVFbvCQhDBCrVeSq8
 docling/models/factories/base_factory.py,sha256=pNR9-B_BKs2sYNyHnp2ON2l3r6Dy9lcof4qmwHlAryI,4032
 docling/models/factories/ocr_factory.py,sha256=G5RkmkKvkl-ihpo6qSj8WC77VdlVSQ1s0ekwUX2ILts,316
 docling/models/factories/picture_description_factory.py,sha256=Ru3-TnVVEKf5O07C_UpGf2HCOHc7j20AJzfficw3agM,385
+docling/models/hf_mlx_model.py,sha256=2eSHphJm5LAfiSA24blVMc2znJlKMYrtmmzq8ffc-rU,4924
 docling/models/hf_vlm_model.py,sha256=NUtLEuG-kNGJeDHWmQKAAOZG4WF0a5hn-KXUUM1mHBQ,6820
 docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3w,7804
 docling/models/ocr_mac_model.py,sha256=2pZaUWg19go_u88mKWr5y_52PAYEN__GsbyUYLdY4zo,5353
@@ -63,7 +64,7 @@ docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
 docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
 docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
 docling/pipeline/standard_pdf_pipeline.py,sha256=tHOHFyJajX6IAhm4y3I27uqn5jfMTuCaSaFOKT5JM2M,10593
-docling/pipeline/vlm_pipeline.py,sha256=glPwNH1QEuHj35L3tdPyuCX0CGlJn81ZDFrj3WwLa7o,22265
+docling/pipeline/vlm_pipeline.py,sha256=1eKt3gqWf6PxGvYZuqhKi2BFljJGJWIyHemzOAwa39Y,9065
 docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
@@ -76,8 +77,8 @@ docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,26
 docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
 docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
 docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
-docling-2.27.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
-docling-2.27.0.dist-info/METADATA,sha256=bjSjck82ddDda67NwQaZwW_s9T_jTHw9lE3RhhXf1Y4,10142
-docling-2.27.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-docling-2.27.0.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
-docling-2.27.0.dist-info/RECORD,,
+docling-2.28.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
+docling-2.28.0.dist-info/METADATA,sha256=miIkWRX5hgrOeGbyYDAiQaymAR6PxK6Qdlss5DR1YhM,9982
+docling-2.28.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+docling-2.28.0.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
+docling-2.28.0.dist-info/RECORD,,

{docling-2.27.0.dist-info → docling-2.28.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{docling-2.27.0.dist-info → docling-2.28.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{docling-2.27.0.dist-info → docling-2.28.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

docling 2.27.0__py3-none-any.whl → 2.28.0__py3-none-any.whl

docling 2.27.0py3-none-any.whl → 2.28.0py3-none-any.whl