PyPI - docling - Versions diffs - 2.6.0__py3-none-any.whl → 2.7.1__py3-none-any.whl - Mend

docling 2.6.0py3-none-any.whl → 2.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

docling/backend/msword_backend.py +22 -9
docling/cli/main.py +20 -18
docling/datamodel/pipeline_options.py +14 -3
docling/document_converter.py +4 -4
docling/models/ocr_mac_model.py +118 -0
docling/pipeline/standard_pdf_pipeline.py +12 -0
{docling-2.6.0.dist-info → docling-2.7.1.dist-info}/METADATA +13 -8
{docling-2.6.0.dist-info → docling-2.7.1.dist-info}/RECORD +11 -10
{docling-2.6.0.dist-info → docling-2.7.1.dist-info}/LICENSE +0 -0
{docling-2.6.0.dist-info → docling-2.7.1.dist-info}/WHEEL +0 -0
{docling-2.6.0.dist-info → docling-2.7.1.dist-info}/entry_points.txt +0 -0

docling/backend/msword_backend.py CHANGED Viewed

@@ -14,7 +14,8 @@ from docling_core.types.doc import (
     TableData,
 )
 from lxml import etree
-from PIL import Image
+from lxml.etree import XPath
+from PIL import Image, UnidentifiedImageError
 from docling.backend.abstract_backend import DeclarativeDocumentBackend
 from docling.datamodel.base_models import InputFormat
@@ -132,8 +133,14 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
     def walk_linear(self, body, docx_obj, doc) -> DoclingDocument:
         for element in body:
             tag_name = etree.QName(element).localname
             # Check for Inline Images (blip elements)
-            drawing_blip = element.xpath(".//a:blip")
+            namespaces = {
+                "a": "http://schemas.openxmlformats.org/drawingml/2006/main",
+                "r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
+            }
+            xpath_expr = XPath(".//a:blip", namespaces=namespaces)
+            drawing_blip = xpath_expr(element)
             # Check for Tables
             if element.tag.endswith("tbl"):
@@ -210,7 +217,6 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
         paragraph = docx.text.paragraph.Paragraph(element, docx_obj)
         if paragraph.text is None:
-            # _log.warn(f"paragraph has text==None")
             return
         text = paragraph.text.strip()
         # if len(text)==0 # keep empty paragraphs, they seperate adjacent lists!
@@ -502,10 +508,17 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
         image_data = get_docx_image(element, drawing_blip)
         image_bytes = BytesIO(image_data)
         # Open the BytesIO object with PIL to create an Image
-        pil_image = Image.open(image_bytes)
-        doc.add_picture(
-            parent=self.parents[self.level],
-            image=ImageRef.from_pil(image=pil_image, dpi=72),
-            caption=None,
-        )
+        try:
+            pil_image = Image.open(image_bytes)
+            doc.add_picture(
+                parent=self.parents[self.level],
+                image=ImageRef.from_pil(image=pil_image, dpi=72),
+                caption=None,
+            )
+        except (UnidentifiedImageError, OSError) as e:
+            _log.warning("Warning: image cannot be loaded by Pillow")
+            doc.add_picture(
+                parent=self.parents[self.level],
+                caption=None,
+            )
         return

docling/cli/main.py CHANGED Viewed

@@ -24,6 +24,7 @@ from docling.datamodel.base_models import (
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
     EasyOcrOptions,
+    OcrMacOptions,
     OcrOptions,
     PdfPipelineOptions,
     TableFormerMode,
@@ -74,6 +75,7 @@ class OcrEngine(str, Enum):
     EASYOCR = "easyocr"
     TESSERACT_CLI = "tesseract_cli"
     TESSERACT = "tesseract"
+    OCRMAC = "ocrmac"
 def export_documents(
@@ -252,15 +254,16 @@ def convert(
     export_txt = OutputFormat.TEXT in to_formats
     export_doctags = OutputFormat.DOCTAGS in to_formats
-    match ocr_engine:
-        case OcrEngine.EASYOCR:
-            ocr_options: OcrOptions = EasyOcrOptions(force_full_page_ocr=force_ocr)
-        case OcrEngine.TESSERACT_CLI:
-            ocr_options = TesseractCliOcrOptions(force_full_page_ocr=force_ocr)
-        case OcrEngine.TESSERACT:
-            ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr)
-        case _:
-            raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}")
+    if ocr_engine == OcrEngine.EASYOCR:
+        ocr_options: OcrOptions = EasyOcrOptions(force_full_page_ocr=force_ocr)
+    elif ocr_engine == OcrEngine.TESSERACT_CLI:
+        ocr_options = TesseractCliOcrOptions(force_full_page_ocr=force_ocr)
+    elif ocr_engine == OcrEngine.TESSERACT:
+        ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr)
+    elif ocr_engine == OcrEngine.OCRMAC:
+        ocr_options = OcrMacOptions(force_full_page_ocr=force_ocr)
+    else:
+        raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}")
     ocr_lang_list = _split_list(ocr_lang)
     if ocr_lang_list is not None:
@@ -277,15 +280,14 @@ def convert(
     if artifacts_path is not None:
         pipeline_options.artifacts_path = artifacts_path
-    match pdf_backend:
-        case PdfBackend.DLPARSE_V1:
-            backend: Type[PdfDocumentBackend] = DoclingParseDocumentBackend
-        case PdfBackend.DLPARSE_V2:
-            backend = DoclingParseV2DocumentBackend
-        case PdfBackend.PYPDFIUM2:
-            backend = PyPdfiumDocumentBackend
-        case _:
-            raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}")
+    if pdf_backend == PdfBackend.DLPARSE_V1:
+        backend: Type[PdfDocumentBackend] = DoclingParseDocumentBackend
+    elif pdf_backend == PdfBackend.DLPARSE_V2:
+        backend = DoclingParseV2DocumentBackend
+    elif pdf_backend == PdfBackend.PYPDFIUM2:
+        backend = PyPdfiumDocumentBackend
+    else:
+        raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}")
     format_options: Dict[InputFormat, FormatOption] = {
         InputFormat.PDF: PdfFormatOption(

docling/datamodel/pipeline_options.py CHANGED Viewed

@@ -63,6 +63,17 @@ class TesseractOcrOptions(OcrOptions):
     )
+class OcrMacOptions(OcrOptions):
+    kind: Literal["ocrmac"] = "ocrmac"
+    lang: List[str] = ["fr-FR", "de-DE", "es-ES", "en-US"]
+    recognition: str = "accurate"
+    framework: str = "vision"
+    model_config = ConfigDict(
+        extra="forbid",
+    )
 class PipelineOptions(BaseModel):
     create_legacy_output: bool = (
         True  # This defautl will be set to False on a future version of docling
@@ -75,9 +86,9 @@ class PdfPipelineOptions(PipelineOptions):
     do_ocr: bool = True  # True: perform OCR, replace programmatic PDF text
     table_structure_options: TableStructureOptions = TableStructureOptions()
-    ocr_options: Union[EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions] = (
-        Field(EasyOcrOptions(), discriminator="kind")
-    )
+    ocr_options: Union[
+        EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, OcrMacOptions
+    ] = Field(EasyOcrOptions(), discriminator="kind")
     images_scale: float = 1.0
     generate_page_images: bool = False

docling/document_converter.py CHANGED Viewed

@@ -3,7 +3,7 @@ import sys
 import time
 from functools import partial
 from pathlib import Path
-from typing import Dict, Iterable, Iterator, List, Optional, Type
+from typing import Dict, Iterable, Iterator, List, Optional, Type, Union
 from pydantic import BaseModel, ConfigDict, model_validator, validate_call
@@ -155,7 +155,7 @@ class DocumentConverter:
     @validate_call(config=ConfigDict(strict=True))
     def convert(
         self,
-        source: Path | str | DocumentStream,  # TODO review naming
+        source: Union[Path, str, DocumentStream],  # TODO review naming
         raises_on_error: bool = True,
         max_num_pages: int = sys.maxsize,
         max_file_size: int = sys.maxsize,
@@ -172,7 +172,7 @@ class DocumentConverter:
     @validate_call(config=ConfigDict(strict=True))
     def convert_all(
         self,
-        source: Iterable[Path | str | DocumentStream],  # TODO review naming
+        source: Iterable[Union[Path, str, DocumentStream]],  # TODO review naming
         raises_on_error: bool = True,  # True: raises on first conversion error; False: does not raise on conv error
         max_num_pages: int = sys.maxsize,
         max_file_size: int = sys.maxsize,
@@ -183,7 +183,7 @@ class DocumentConverter:
         )
         conv_input = _DocumentConversionInput(
             path_or_stream_iterator=source,
-            limit=limits,
+            limits=limits,
         )
         conv_res_iter = self._convert(conv_input, raises_on_error=raises_on_error)
         for conv_res in conv_res_iter:

docling/models/ocr_mac_model.py ADDED Viewed

@@ -0,0 +1,118 @@
+import logging
+import tempfile
+from typing import Iterable, Optional, Tuple
+from docling_core.types.doc import BoundingBox, CoordOrigin
+from docling.datamodel.base_models import OcrCell, Page
+from docling.datamodel.document import ConversionResult
+from docling.datamodel.pipeline_options import OcrMacOptions
+from docling.datamodel.settings import settings
+from docling.models.base_ocr_model import BaseOcrModel
+from docling.utils.profiling import TimeRecorder
+_log = logging.getLogger(__name__)
+class OcrMacModel(BaseOcrModel):
+    def __init__(self, enabled: bool, options: OcrMacOptions):
+        super().__init__(enabled=enabled, options=options)
+        self.options: OcrMacOptions
+        self.scale = 3  # multiplier for 72 dpi == 216 dpi.
+        if self.enabled:
+            install_errmsg = (
+                "ocrmac is not correctly installed. "
+                "Please install it via `pip install ocrmac` to use this OCR engine. "
+                "Alternatively, Docling has support for other OCR engines. See the documentation: "
+                "https://ds4sd.github.io/docling/installation/"
+            )
+            try:
+                from ocrmac import ocrmac
+            except ImportError:
+                raise ImportError(install_errmsg)
+            self.reader_RIL = ocrmac.OCR
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
+        if not self.enabled:
+            yield from page_batch
+            return
+        for page in page_batch:
+            assert page._backend is not None
+            if not page._backend.is_valid():
+                yield page
+            else:
+                with TimeRecorder(conv_res, "ocr"):
+                    ocr_rects = self.get_ocr_rects(page)
+                    all_ocr_cells = []
+                    for ocr_rect in ocr_rects:
+                        # Skip zero area boxes
+                        if ocr_rect.area() == 0:
+                            continue
+                        high_res_image = page._backend.get_page_image(
+                            scale=self.scale, cropbox=ocr_rect
+                        )
+                        with tempfile.NamedTemporaryFile(
+                            suffix=".png", mode="w"
+                        ) as image_file:
+                            fname = image_file.name
+                            high_res_image.save(fname)
+                            boxes = self.reader_RIL(
+                                fname,
+                                recognition_level=self.options.recognition,
+                                framework=self.options.framework,
+                                language_preference=self.options.lang,
+                            ).recognize()
+                        im_width, im_height = high_res_image.size
+                        cells = []
+                        for ix, (text, confidence, box) in enumerate(boxes):
+                            x = float(box[0])
+                            y = float(box[1])
+                            w = float(box[2])
+                            h = float(box[3])
+                            x1 = x * im_width
+                            y2 = (1 - y) * im_height
+                            x2 = x1 + w * im_width
+                            y1 = y2 - h * im_height
+                            left = x1 / self.scale
+                            top = y1 / self.scale
+                            right = x2 / self.scale
+                            bottom = y2 / self.scale
+                            cells.append(
+                                OcrCell(
+                                    id=ix,
+                                    text=text,
+                                    confidence=confidence,
+                                    bbox=BoundingBox.from_tuple(
+                                        coord=(left, top, right, bottom),
+                                        origin=CoordOrigin.TOPLEFT,
+                                    ),
+                                )
+                            )
+                        # del high_res_image
+                        all_ocr_cells.extend(cells)
+                    # Post-process the cells
+                    page.cells = self.post_process_cells(all_ocr_cells, page.cells)
+                # DEBUG code:
+                if settings.debug.visualize_ocr:
+                    self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
+                yield page

docling/pipeline/standard_pdf_pipeline.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import logging
+import sys
 from pathlib import Path
 from typing import Optional
@@ -10,6 +11,7 @@ from docling.datamodel.base_models import AssembledUnit, Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
     EasyOcrOptions,
+    OcrMacOptions,
     PdfPipelineOptions,
     TesseractCliOcrOptions,
     TesseractOcrOptions,
@@ -18,6 +20,7 @@ from docling.models.base_ocr_model import BaseOcrModel
 from docling.models.ds_glm_model import GlmModel, GlmOptions
 from docling.models.easyocr_model import EasyOcrModel
 from docling.models.layout_model import LayoutModel
+from docling.models.ocr_mac_model import OcrMacModel
 from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
 from docling.models.page_preprocessing_model import (
     PagePreprocessingModel,
@@ -118,6 +121,15 @@ class StandardPdfPipeline(PaginatedPipeline):
                 enabled=self.pipeline_options.do_ocr,
                 options=self.pipeline_options.ocr_options,
             )
+        elif isinstance(self.pipeline_options.ocr_options, OcrMacOptions):
+            if "darwin" != sys.platform:
+                raise RuntimeError(
+                    f"The specified OCR type is only supported on Mac: {self.pipeline_options.ocr_options.kind}."
+                )
+            return OcrMacModel(
+                enabled=self.pipeline_options.do_ocr,
+                options=self.pipeline_options.ocr_options,
+            )
         return None
     def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:

{docling-2.6.0.dist-info → docling-2.7.1.dist-info}/METADATA RENAMED Viewed

@@ -1,13 +1,13 @@
 Metadata-Version: 2.1
 Name: docling
-Version: 2.6.0
+Version: 2.7.1
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Home-page: https://github.com/DS4SD/docling
 License: MIT
 Keywords: docling,convert,document,pdf,docx,html,markdown,layout model,segmentation,table structure,table former
 Author: Christoph Auer
 Author-email: cau@zurich.ibm.com
-Requires-Python: >=3.10,<4.0
+Requires-Python: >=3.9,<4.0
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Science/Research
@@ -15,32 +15,36 @@ Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: MacOS :: MacOS X
 Classifier: Operating System :: POSIX :: Linux
 Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Provides-Extra: ocrmac
 Provides-Extra: tesserocr
 Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
 Requires-Dist: certifi (>=2024.7.4)
 Requires-Dist: deepsearch-glm (>=0.26.1,<0.27.0)
 Requires-Dist: docling-core (>=2.4.0,<3.0.0)
-Requires-Dist: docling-ibm-models (>=2.0.3,<3.0.0)
-Requires-Dist: docling-parse (>=2.0.2,<3.0.0)
+Requires-Dist: docling-ibm-models (>=2.0.6,<3.0.0)
+Requires-Dist: docling-parse (>=2.0.5,<3.0.0)
 Requires-Dist: easyocr (>=1.7,<2.0)
 Requires-Dist: filetype (>=1.2.0,<2.0.0)
 Requires-Dist: huggingface_hub (>=0.23,<1)
+Requires-Dist: lxml (>=4.0.0,<6.0.0)
 Requires-Dist: marko (>=2.1.2,<3.0.0)
+Requires-Dist: ocrmac (>=1.0.0,<2.0.0) ; (sys_platform == "darwin") and (extra == "ocrmac")
 Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
 Requires-Dist: pandas (>=2.1.4,<3.0.0)
 Requires-Dist: pyarrow (>=16.1.0,<17.0.0)
-Requires-Dist: pydantic (>=2.0.0,<3.0.0)
+Requires-Dist: pydantic (>=2.0.0,<2.10)
 Requires-Dist: pydantic-settings (>=2.3.0,<3.0.0)
 Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
 Requires-Dist: python-docx (>=1.1.2,<2.0.0)
 Requires-Dist: python-pptx (>=1.0.2,<2.0.0)
 Requires-Dist: requests (>=2.32.3,<3.0.0)
 Requires-Dist: rtree (>=1.3.0,<2.0.0)
-Requires-Dist: scipy (>=1.14.1,<2.0.0)
+Requires-Dist: scipy (>=1.6.0,<2.0.0)
 Requires-Dist: tesserocr (>=2.7.1,<3.0.0) ; extra == "tesserocr"
 Requires-Dist: typer (>=0.12.5,<0.13.0)
 Project-URL: Repository, https://github.com/DS4SD/docling
@@ -61,19 +65,20 @@ Description-Content-Type: text/markdown
 [![arXiv](https://img.shields.io/badge/arXiv-2408.09869-b31b1b.svg)](https://arxiv.org/abs/2408.09869)
 [![Docs](https://img.shields.io/badge/docs-live-brightgreen)](https://ds4sd.github.io/docling/)
 [![PyPI version](https://img.shields.io/pypi/v/docling)](https://pypi.org/project/docling/)
-![Python](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12-blue)
+[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docling)](https://pypi.org/project/docling/)
 [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)](https://python-poetry.org/)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
 [![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev)
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
 [![License MIT](https://img.shields.io/github/license/DS4SD/docling)](https://opensource.org/licenses/MIT)
+[![PyPI Downloads](https://static.pepy.tech/badge/docling/month)](https://pepy.tech/projects/docling)
 Docling parses documents and exports them to the desired format with ease and speed.
 ## Features
-* 🗂️ Reads popular document formats (PDF, DOCX, PPTX, Images, HTML, AsciiDoc, Markdown) and exports to Markdown and JSON
+* 🗂️ Reads popular document formats (PDF, DOCX, PPTX, XLSX, Images, HTML, AsciiDoc & Markdown) and exports to Markdown and JSON
 * 📑 Advanced PDF document understanding including page layout, reading order & table structures
 * 🧩 Unified, expressive [DoclingDocument](https://ds4sd.github.io/docling/concepts/docling_document/) representation format
 * 🤖 Easy integration with LlamaIndex 🦙 & LangChain 🦜🔗 for powerful RAG / QA applications

{docling-2.6.0.dist-info → docling-2.7.1.dist-info}/RECORD RENAMED Viewed

@@ -8,23 +8,24 @@ docling/backend/html_backend.py,sha256=qbu1W8xoTGnXMuZPRPLq68hDbCEj6ygnpxP5gYaod
 docling/backend/md_backend.py,sha256=tmuSCghjor9PqKIiVieCuZ4_t5JEjZMy3cq7u3yTgyU,14032
 docling/backend/msexcel_backend.py,sha256=23qUEScqr5GhY06xiqg-eBQ_JlAqO0FkPEmX6554sVA,12040
 docling/backend/mspowerpoint_backend.py,sha256=QD0NaatTO8U9CIFoiipkq3X5HxLZaaahH8nlrQ6ecDA,15710
-docling/backend/msword_backend.py,sha256=-cCEh4EhdGknHrxiVGFE4GDo_iYpAqP2QxRaeqrJHUE,17939
+docling/backend/msword_backend.py,sha256=sMumfB9Xa2Md1a8WO-fGPPAKf1s3mCvErMyZ-xnBC2E,18495
 docling/backend/pdf_backend.py,sha256=unnw7QiRE1VXg6Pj-eYrtnFGrp5SSYiI324OlFxyv6c,2050
 docling/backend/pypdfium2_backend.py,sha256=B4bfv-dfzlWiKTfF8LN5fto_99YBu8A2c1_XIVwRUWI,8996
 docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/cli/main.py,sha256=nxwx4Ro4zKyK4fjiTNROtsGIKGU1opVSqBfNhno6lGQ,10603
+docling/cli/main.py,sha256=MpjbAXhOlbGnAnl5_OaKCdub61YPQBy1NOqroXQtNYE,10722
 docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/datamodel/base_models.py,sha256=6qlwPamDZ3XUsE2kTAyGKG6O2IJClVjCqaE7DZ74KHU,5533
 docling/datamodel/document.py,sha256=9dQf_J18X_MEWs-Mg3Ed6BykFPJ79ETmkkxcssY-vYo,20698
-docling/datamodel/pipeline_options.py,sha256=joR6_aBsZvLmK_K4RaKBLCnc1WZwobLGQRRY64XfPsM,2845
+docling/datamodel/pipeline_options.py,sha256=aC_CmtEhNLIbn9n3JuYhL_aA8UA0vFgw7HcGMUuOI4o,3117
 docling/datamodel/settings.py,sha256=JK8lZPBjUx2kD2q-Qpg-o3vOElADMcyQbRUL0EHZ7us,1263
-docling/document_converter.py,sha256=OaIiJLckHvXC_7DAZ6ZSYg3dJ7QWC5TtO2m4dSS80xI,10922
+docling/document_converter.py,sha256=L0A3g7IQBaKIK7dWpUFC72ZqKywIPYkyh71Qd6DiNPE,10940
 docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/models/base_model.py,sha256=Yq_-FmUhqhE20vXYG3WiQXDRTIPjik1CyuEZ8iYTGAY,701
 docling/models/base_ocr_model.py,sha256=rGSpBF4dByITcsBaRIgvFKpiu0CrhmZS_PHIo686Dw0,6428
 docling/models/ds_glm_model.py,sha256=hBRCx6oFGhxBbKEJlRSWVndDwFtB5IpeLOowFAVqFM0,12033
 docling/models/easyocr_model.py,sha256=c2m4x9dZpSc-cMgeEdFBRVBlB78uMGlYD8Q_2gzRuMU,3734
 docling/models/layout_model.py,sha256=ZvbTSyxvXB5yLHNEti0Wv3trz0vwGuHySI5TCdApb0U,14011
+docling/models/ocr_mac_model.py,sha256=bLP14UUmZcSzjDe-HLj-mtksTuBmsCTg2C1wCxUpan0,4502
 docling/models/page_assemble_model.py,sha256=kSGNiRKhmzkpFH7xCiT3rulMsgJmUXFa6Th_eB-cLEk,7103
 docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
 docling/models/table_structure_model.py,sha256=-ANSQpiN2avt3B9sbi7dHcoULUJbMBalAR5xxlrM7To,8421
@@ -33,14 +34,14 @@ docling/models/tesseract_ocr_model.py,sha256=RDf6iV1q-oXaGfZXv0bW6SqjHNKQvBUDlUs
 docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/pipeline/base_pipeline.py,sha256=IF1XWYgUGbdB4-teLkmM4Hvg_UNEfPrGuhExMRTUsk8,7168
 docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
-docling/pipeline/standard_pdf_pipeline.py,sha256=h59eA0CLMYuuJoH-0SyCRkYEregNs6i0pa46Ioqf8kU,7947
+docling/pipeline/standard_pdf_pipeline.py,sha256=btm_y1ZsjUrtWvMbF6RA8BVM0ENrK4z_rqF0jjdeZmU,8473
 docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/utils/export.py,sha256=KyGF1BVDHPFfHVXZc8vegsWlFfOgGPP2YckWpTadyI8,4694
 docling/utils/layout_utils.py,sha256=vlN0rc8i0ayRGn3WnaG-pdmqEL00KKGl2zez3Gj-hrk,32074
 docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
 docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
-docling-2.6.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
-docling-2.6.0.dist-info/METADATA,sha256=l4Fj3nRKRzZJ2GXWU7c4yLNhIO4NbwdeUofY5MvBPH8,6571
-docling-2.6.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-docling-2.6.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
-docling-2.6.0.dist-info/RECORD,,
+docling-2.7.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
+docling-2.7.1.dist-info/METADATA,sha256=TvD3BGlbO1ci54NzwmLxqSITXIdMefyj71YjdZkD7Vs,6906
+docling-2.7.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+docling-2.7.1.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
+docling-2.7.1.dist-info/RECORD,,

{docling-2.6.0.dist-info → docling-2.7.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{docling-2.6.0.dist-info → docling-2.7.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{docling-2.6.0.dist-info → docling-2.7.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

docling 2.6.0__py3-none-any.whl → 2.7.1__py3-none-any.whl

docling 2.6.0py3-none-any.whl → 2.7.1py3-none-any.whl