PyPI - docling - Versions diffs - 2.14.0__py3-none-any.whl → 2.15.1__py3-none-any.whl - Mend

docling 2.14.0py3-none-any.whl → 2.15.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

docling/backend/docling_parse_backend.py +1 -1
docling/backend/docling_parse_v2_backend.py +1 -1
docling/backend/html_backend.py +2 -2
docling/backend/mspowerpoint_backend.py +15 -11
docling/backend/pypdfium2_backend.py +1 -1
docling/cli/main.py +14 -2
docling/datamodel/document.py +6 -1
docling/datamodel/pipeline_options.py +1 -1
docling/document_converter.py +4 -2
docling/models/base_ocr_model.py +29 -13
docling/models/layout_model.py +18 -25
docling/models/table_structure_model.py +20 -0
{docling-2.14.0.dist-info → docling-2.15.1.dist-info}/METADATA +4 -5
{docling-2.14.0.dist-info → docling-2.15.1.dist-info}/RECORD +17 -17
{docling-2.14.0.dist-info → docling-2.15.1.dist-info}/LICENSE +0 -0
{docling-2.14.0.dist-info → docling-2.15.1.dist-info}/WHEEL +0 -0
{docling-2.14.0.dist-info → docling-2.15.1.dist-info}/entry_points.txt +0 -0

docling/backend/docling_parse_backend.py CHANGED Viewed

@@ -132,7 +132,7 @@ class DoclingParsePageBackend(PdfPageBackend):
         return cells
     def get_bitmap_rects(self, scale: float = 1) -> Iterable[BoundingBox]:
-        AREA_THRESHOLD = 32 * 32
+        AREA_THRESHOLD = 0  # 32 * 32
         for i in range(len(self._dpage["images"])):
             bitmap = self._dpage["images"][i]

docling/backend/docling_parse_v2_backend.py CHANGED Viewed

@@ -140,7 +140,7 @@ class DoclingParseV2PageBackend(PdfPageBackend):
         return cells
     def get_bitmap_rects(self, scale: float = 1) -> Iterable[BoundingBox]:
-        AREA_THRESHOLD = 32 * 32
+        AREA_THRESHOLD = 0  # 32 * 32
         images = self._dpage["sanitized"]["images"]["data"]
         images_header = self._dpage["sanitized"]["images"]["header"]

docling/backend/html_backend.py CHANGED Viewed

@@ -37,10 +37,10 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         try:
             if isinstance(self.path_or_stream, BytesIO):
-                text_stream = self.path_or_stream.getvalue().decode("utf-8")
+                text_stream = self.path_or_stream.getvalue()
                 self.soup = BeautifulSoup(text_stream, "html.parser")
             if isinstance(self.path_or_stream, Path):
-                with open(self.path_or_stream, "r", encoding="utf-8") as f:
+                with open(self.path_or_stream, "rb") as f:
                     html_content = f.read()
                     self.soup = BeautifulSoup(html_content, "html.parser")
         except Exception as e:

docling/backend/mspowerpoint_backend.py CHANGED Viewed

@@ -16,7 +16,7 @@ from docling_core.types.doc import (
     TableCell,
     TableData,
 )
-from PIL import Image
+from PIL import Image, UnidentifiedImageError
 from pptx import Presentation
 from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER
@@ -120,6 +120,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
         bullet_type = "None"
         list_text = ""
         list_label = GroupLabel.LIST
+        doc_label = DocItemLabel.LIST_ITEM
         prov = self.generate_prov(shape, slide_ind, shape.text.strip())
         # Identify if shape contains lists
@@ -276,16 +277,19 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
         im_dpi, _ = image.dpi
         # Open it with PIL
-        pil_image = Image.open(BytesIO(image_bytes))
-        # shape has picture
-        prov = self.generate_prov(shape, slide_ind, "")
-        doc.add_picture(
-            parent=parent_slide,
-            image=ImageRef.from_pil(image=pil_image, dpi=im_dpi),
-            caption=None,
-            prov=prov,
-        )
+        try:
+            pil_image = Image.open(BytesIO(image_bytes))
+            # shape has picture
+            prov = self.generate_prov(shape, slide_ind, "")
+            doc.add_picture(
+                parent=parent_slide,
+                image=ImageRef.from_pil(image=pil_image, dpi=im_dpi),
+                caption=None,
+                prov=prov,
+            )
+        except (UnidentifiedImageError, OSError) as e:
+            _log.warning(f"Warning: image cannot be loaded by Pillow: {e}")
         return
     def handle_tables(self, shape, parent_slide, slide_ind, doc):

docling/backend/pypdfium2_backend.py CHANGED Viewed

@@ -39,7 +39,7 @@ class PyPdfiumPageBackend(PdfPageBackend):
         return self.valid
     def get_bitmap_rects(self, scale: float = 1) -> Iterable[BoundingBox]:
-        AREA_THRESHOLD = 32 * 32
+        AREA_THRESHOLD = 0  # 32 * 32
         for obj in self._ppage.get_objects(filter=[pdfium_c.FPDF_PAGEOBJ_IMAGE]):
             pos = obj.get_pos()
             cropbox = BoundingBox.from_tuple(

docling/cli/main.py CHANGED Viewed

@@ -164,6 +164,11 @@ def convert(
     to_formats: List[OutputFormat] = typer.Option(
         None, "--to", help="Specify output formats. Defaults to Markdown."
     ),
+    headers: str = typer.Option(
+        None,
+        "--headers",
+        help="Specify http request headers used when fetching url input sources in the form of a JSON string",
+    ),
     image_export_mode: Annotated[
         ImageRefMode,
         typer.Option(
@@ -279,12 +284,19 @@ def convert(
     if from_formats is None:
         from_formats = [e for e in InputFormat]
+    parsed_headers: Optional[Dict[str, str]] = None
+    if headers is not None:
+        headers_t = TypeAdapter(Dict[str, str])
+        parsed_headers = headers_t.validate_json(headers)
     with tempfile.TemporaryDirectory() as tempdir:
         input_doc_paths: List[Path] = []
         for src in input_sources:
             try:
                 # check if we can fetch some remote url
-                source = resolve_source_to_path(source=src, workdir=Path(tempdir))
+                source = resolve_source_to_path(
+                    source=src, headers=parsed_headers, workdir=Path(tempdir)
+                )
                 input_doc_paths.append(source)
             except FileNotFoundError:
                 err_console.print(
@@ -390,7 +402,7 @@ def convert(
         start_time = time.time()
         conv_results = doc_converter.convert_all(
-            input_doc_paths, raises_on_error=abort_on_error
+            input_doc_paths, headers=parsed_headers, raises_on_error=abort_on_error
         )
         output.mkdir(parents=True, exist_ok=True)

docling/datamodel/document.py CHANGED Viewed

@@ -227,13 +227,18 @@ class _DummyBackend(AbstractDocumentBackend):
 class _DocumentConversionInput(BaseModel):
     path_or_stream_iterator: Iterable[Union[Path, str, DocumentStream]]
+    headers: Optional[Dict[str, str]] = None
     limits: Optional[DocumentLimits] = DocumentLimits()
     def docs(
         self, format_options: Dict[InputFormat, "FormatOption"]
     ) -> Iterable[InputDocument]:
         for item in self.path_or_stream_iterator:
-            obj = resolve_source_to_stream(item) if isinstance(item, str) else item
+            obj = (
+                resolve_source_to_stream(item, self.headers)
+                if isinstance(item, str)
+                else item
+            )
             format = self._guess_format(obj)
             backend: Type[AbstractDocumentBackend]
             if format not in format_options.keys():

docling/datamodel/pipeline_options.py CHANGED Viewed

@@ -139,7 +139,7 @@ class EasyOcrOptions(OcrOptions):
     use_gpu: Optional[bool] = None
-    confidence_threshold: float = 0.65
+    confidence_threshold: float = 0.5
     model_storage_directory: Optional[str] = None
     recog_network: Optional[str] = "standard"

docling/document_converter.py CHANGED Viewed

@@ -176,6 +176,7 @@ class DocumentConverter:
     def convert(
         self,
         source: Union[Path, str, DocumentStream],  # TODO review naming
+        headers: Optional[Dict[str, str]] = None,
         raises_on_error: bool = True,
         max_num_pages: int = sys.maxsize,
         max_file_size: int = sys.maxsize,
@@ -185,6 +186,7 @@ class DocumentConverter:
             raises_on_error=raises_on_error,
             max_num_pages=max_num_pages,
             max_file_size=max_file_size,
+            headers=headers,
         )
         return next(all_res)
@@ -192,6 +194,7 @@ class DocumentConverter:
     def convert_all(
         self,
         source: Iterable[Union[Path, str, DocumentStream]],  # TODO review naming
+        headers: Optional[Dict[str, str]] = None,
         raises_on_error: bool = True,  # True: raises on first conversion error; False: does not raise on conv error
         max_num_pages: int = sys.maxsize,
         max_file_size: int = sys.maxsize,
@@ -201,8 +204,7 @@ class DocumentConverter:
             max_file_size=max_file_size,
         )
         conv_input = _DocumentConversionInput(
-            path_or_stream_iterator=source,
-            limits=limits,
+            path_or_stream_iterator=source, limits=limits, headers=headers
         )
         conv_res_iter = self._convert(conv_input, raises_on_error=raises_on_error)

docling/models/base_ocr_model.py CHANGED Viewed

@@ -8,7 +8,7 @@ import numpy as np
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from PIL import Image, ImageDraw
 from rtree import index
-from scipy.ndimage import find_objects, label
+from scipy.ndimage import binary_dilation, find_objects, label
 from docling.datamodel.base_models import Cell, OcrCell, Page
 from docling.datamodel.document import ConversionResult
@@ -43,6 +43,12 @@ class BaseOcrModel(BasePageModel):
             np_image = np.array(image)
+            # Dilate the image by 10 pixels to merge nearby bitmap rectangles
+            structure = np.ones(
+                (20, 20)
+            )  # Create a 20x20 structure element (10 pixels in all directions)
+            np_image = binary_dilation(np_image > 0, structure=structure)
             # Find the connected components
             labeled_image, num_features = label(
                 np_image > 0
@@ -72,7 +78,7 @@ class BaseOcrModel(BasePageModel):
             bitmap_rects = []
         coverage, ocr_rects = find_ocr_rects(page.size, bitmap_rects)
-        # return full-page rectangle if sufficiently covered with bitmaps
+        # return full-page rectangle if page is dominantly covered with bitmaps
         if self.options.force_full_page_ocr or coverage > max(
             BITMAP_COVERAGE_TRESHOLD, self.options.bitmap_area_threshold
         ):
@@ -85,17 +91,11 @@ class BaseOcrModel(BasePageModel):
                     coord_origin=CoordOrigin.TOPLEFT,
                 )
             ]
-        # return individual rectangles if the bitmap coverage is smaller
-        else:  # coverage <= BITMAP_COVERAGE_TRESHOLD:
-            # skip OCR if the bitmap area on the page is smaller than the options threshold
-            ocr_rects = [
-                rect
-                for rect in ocr_rects
-                if rect.area() / (page.size.width * page.size.height)
-                > self.options.bitmap_area_threshold
-            ]
+        # return individual rectangles if the bitmap coverage is above the threshold
+        elif coverage > self.options.bitmap_area_threshold:
             return ocr_rects
+        else:  # overall coverage of bitmaps is too low, drop all bitmap rectangles.
+            return []
     # Filters OCR cells by dropping any OCR cell that intersects with an existing programmatic cell.
     def _filter_ocr_cells(self, ocr_cells, programmatic_cells):
@@ -138,18 +138,34 @@ class BaseOcrModel(BasePageModel):
     def draw_ocr_rects_and_cells(self, conv_res, page, ocr_rects, show: bool = False):
         image = copy.deepcopy(page.image)
+        scale_x = image.width / page.size.width
+        scale_y = image.height / page.size.height
         draw = ImageDraw.Draw(image, "RGBA")
         # Draw OCR rectangles as yellow filled rect
         for rect in ocr_rects:
             x0, y0, x1, y1 = rect.as_tuple()
+            y0 *= scale_x
+            y1 *= scale_y
+            x0 *= scale_x
+            x1 *= scale_x
             shade_color = (255, 255, 0, 40)  # transparent yellow
             draw.rectangle([(x0, y0), (x1, y1)], fill=shade_color, outline=None)
         # Draw OCR and programmatic cells
         for tc in page.cells:
             x0, y0, x1, y1 = tc.bbox.as_tuple()
-            color = "red"
+            y0 *= scale_x
+            y1 *= scale_y
+            x0 *= scale_x
+            x1 *= scale_x
+            if y1 <= y0:
+                y1, y0 = y0, y1
+            color = "gray"
             if isinstance(tc, OcrCell):
                 color = "magenta"
             draw.rectangle([(x0, y0), (x1, y1)], outline=color)

docling/models/layout_model.py CHANGED Viewed

@@ -67,29 +67,9 @@ class LayoutModel(BasePageModel):
         - Right: Clusters including FORM, KEY_VALUE_REGION, and PICTURE.
         Includes label names and confidence scores for each cluster.
         """
-        label_to_color = {
-            DocItemLabel.TEXT: (255, 255, 153),  # Light Yellow
-            DocItemLabel.CAPTION: (255, 204, 153),  # Light Orange
-            DocItemLabel.LIST_ITEM: (153, 153, 255),  # Light Purple
-            DocItemLabel.FORMULA: (192, 192, 192),  # Gray
-            DocItemLabel.TABLE: (255, 204, 204),  # Light Pink
-            DocItemLabel.PICTURE: (255, 204, 164),  # Light Beige
-            DocItemLabel.SECTION_HEADER: (255, 153, 153),  # Light Red
-            DocItemLabel.PAGE_HEADER: (204, 255, 204),  # Light Green
-            DocItemLabel.PAGE_FOOTER: (
-                204,
-                255,
-                204,
-            ),  # Light Green (same as Page-Header)
-            DocItemLabel.TITLE: (255, 153, 153),  # Light Red (same as Section-Header)
-            DocItemLabel.FOOTNOTE: (200, 200, 255),  # Light Blue
-            DocItemLabel.DOCUMENT_INDEX: (220, 220, 220),  # Light Gray
-            DocItemLabel.CODE: (125, 125, 125),  # Gray
-            DocItemLabel.CHECKBOX_SELECTED: (255, 182, 193),  # Pale Green
-            DocItemLabel.CHECKBOX_UNSELECTED: (255, 182, 193),  # Light Pink
-            DocItemLabel.FORM: (200, 255, 255),  # Light Cyan
-            DocItemLabel.KEY_VALUE_REGION: (183, 65, 14),  # Rusty orange
-        }
+        scale_x = page.image.width / page.size.width
+        scale_y = page.image.height / page.size.height
         # Filter clusters for left and right images
         exclude_labels = {
             DocItemLabel.FORM,
@@ -118,6 +98,11 @@ class LayoutModel(BasePageModel):
                     cell_color = (0, 0, 0, 40)  # Transparent black for cells
                     for tc in c.cells:
                         cx0, cy0, cx1, cy1 = tc.bbox.as_tuple()
+                        cx0 *= scale_x
+                        cx1 *= scale_x
+                        cy0 *= scale_x
+                        cy1 *= scale_y
                         draw.rectangle(
                             [(cx0, cy0), (cx1, cy1)],
                             outline=None,
@@ -125,8 +110,16 @@ class LayoutModel(BasePageModel):
                         )
                     # Draw cluster rectangle
                     x0, y0, x1, y1 = c.bbox.as_tuple()
-                    cluster_fill_color = (*list(label_to_color.get(c.label)), 70)
-                    cluster_outline_color = (*list(label_to_color.get(c.label)), 255)
+                    x0 *= scale_x
+                    x1 *= scale_x
+                    y0 *= scale_x
+                    y1 *= scale_y
+                    cluster_fill_color = (*list(DocItemLabel.get_color(c.label)), 70)
+                    cluster_outline_color = (
+                        *list(DocItemLabel.get_color(c.label)),
+                        255,
+                    )
                     draw.rectangle(
                         [(x0, y0), (x1, y1)],
                         outline=cluster_outline_color,

docling/models/table_structure_model.py CHANGED Viewed

@@ -66,23 +66,43 @@ class TableStructureModel(BasePageModel):
         show: bool = False,
     ):
         assert page._backend is not None
+        assert page.size is not None
         image = (
             page._backend.get_page_image()
         )  # make new image to avoid drawing on the saved ones
+        scale_x = image.width / page.size.width
+        scale_y = image.height / page.size.height
         draw = ImageDraw.Draw(image)
         for table_element in tbl_list:
             x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple()
+            y0 *= scale_x
+            y1 *= scale_y
+            x0 *= scale_x
+            x1 *= scale_x
             draw.rectangle([(x0, y0), (x1, y1)], outline="red")
             for cell in table_element.cluster.cells:
                 x0, y0, x1, y1 = cell.bbox.as_tuple()
+                x0 *= scale_x
+                x1 *= scale_x
+                y0 *= scale_x
+                y1 *= scale_y
                 draw.rectangle([(x0, y0), (x1, y1)], outline="green")
             for tc in table_element.table_cells:
                 if tc.bbox is not None:
                     x0, y0, x1, y1 = tc.bbox.as_tuple()
+                    x0 *= scale_x
+                    x1 *= scale_x
+                    y0 *= scale_x
+                    y1 *= scale_y
                     if tc.column_header:
                         width = 3
                     else:

{docling-2.14.0.dist-info → docling-2.15.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling
-Version: 2.14.0
+Version: 2.15.1
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Home-page: https://github.com/DS4SD/docling
 License: MIT
@@ -26,7 +26,7 @@ Provides-Extra: tesserocr
 Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
 Requires-Dist: certifi (>=2024.7.4)
 Requires-Dist: deepsearch-glm (>=1.0.0,<2.0.0)
-Requires-Dist: docling-core[chunking] (>=2.12.1,<3.0.0)
+Requires-Dist: docling-core[chunking] (>=2.13.1,<3.0.0)
 Requires-Dist: docling-ibm-models (>=3.1.0,<4.0.0)
 Requires-Dist: docling-parse (>=3.0.0,<4.0.0)
 Requires-Dist: easyocr (>=1.7,<2.0)
@@ -45,7 +45,7 @@ Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
 Requires-Dist: python-docx (>=1.1.2,<2.0.0)
 Requires-Dist: python-pptx (>=1.0.2,<2.0.0)
 Requires-Dist: rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; (python_version < "3.13") and (extra == "rapidocr")
-Requires-Dist: requests (>=2.32.3,<3.0.0)
+Requires-Dist: requests (>=2.32.2,<3.0.0)
 Requires-Dist: rtree (>=1.3.0,<2.0.0)
 Requires-Dist: scipy (>=1.6.0,<2.0.0)
 Requires-Dist: tesserocr (>=2.7.1,<3.0.0) ; extra == "tesserocr"
@@ -84,7 +84,7 @@ Docling parses documents and exports them to the desired format with ease and sp
 * 🗂️ Reads popular document formats (PDF, DOCX, PPTX, XLSX, Images, HTML, AsciiDoc & Markdown) and exports to HTML, Markdown and JSON (with embedded and referenced images)
 * 📑 Advanced PDF document understanding including page layout, reading order & table structures
 * 🧩 Unified, expressive [DoclingDocument](https://ds4sd.github.io/docling/concepts/docling_document/) representation format
-* 🤖 Easy integration with 🦙 LlamaIndex & 🦜🔗 LangChain for powerful RAG / QA applications
+* 🤖 Plug-and-play [integrations](https://ds4sd.github.io/docling/integrations/) incl. LangChain, LlamaIndex, Crew AI & Haystack for agentic AI
 * 🔍 OCR support for scanned PDFs
 * 💻 Simple and convenient CLI
@@ -94,7 +94,6 @@ Explore the [documentation](https://ds4sd.github.io/docling/) to discover plenty
 * ♾️ Equation & code extraction
 * 📝 Metadata extraction, including title, authors, references & language
-* 🦜🔗 Native LangChain extension
 ## Installation

{docling-2.14.0.dist-info → docling-2.15.1.dist-info}/RECORD RENAMED Viewed

@@ -2,39 +2,39 @@ docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/backend/abstract_backend.py,sha256=-or6kWVV7egQeyIuN-vI0Tr7Q1htalBZSlhgq_G2RdU,1678
 docling/backend/asciidoc_backend.py,sha256=kXZxOLk_LvLFVZwnJVVwjmvc3QWZ0iiG7VnwjgtC3hI,14051
-docling/backend/docling_parse_backend.py,sha256=_jY5f5-KGI3hi5pcZAY6e7tPLocSi5JUWrxraDVszqI,7631
-docling/backend/docling_parse_v2_backend.py,sha256=1TDUdMIp3fEjCWBNjusUHiCUmH1g6yZQ-b13scofP0Y,8637
-docling/backend/html_backend.py,sha256=qbu1W8xoTGnXMuZPRPLq68hDbCEj6ygnpxP5gYaodAQ,15593
+docling/backend/docling_parse_backend.py,sha256=cJLkuOmfCtshRrwsv7WWayRNeMQASZv76v3nUHucqgM,7636
+docling/backend/docling_parse_v2_backend.py,sha256=-lLsorxhK_Awrql_zXPen2LX0Gt9UvcDLMcmXf7_LKc,8642
+docling/backend/html_backend.py,sha256=O8qXaw7MzOIdaxbBcjHieM9Ce4GEdtBj9YW0vpJspuA,15560
 docling/backend/md_backend.py,sha256=tmuSCghjor9PqKIiVieCuZ4_t5JEjZMy3cq7u3yTgyU,14032
 docling/backend/msexcel_backend.py,sha256=23qUEScqr5GhY06xiqg-eBQ_JlAqO0FkPEmX6554sVA,12040
-docling/backend/mspowerpoint_backend.py,sha256=QD0NaatTO8U9CIFoiipkq3X5HxLZaaahH8nlrQ6ecDA,15710
+docling/backend/mspowerpoint_backend.py,sha256=kOGawhcn0BFq4M_C6kW0mY8vMIB24_6R6q6GaszbSt0,15957
 docling/backend/msword_backend.py,sha256=K1D_h0ulLA6KQsPe62327cDVkQqV1f7EetCHo66wCKw,19233
 docling/backend/pdf_backend.py,sha256=unnw7QiRE1VXg6Pj-eYrtnFGrp5SSYiI324OlFxyv6c,2050
-docling/backend/pypdfium2_backend.py,sha256=B4bfv-dfzlWiKTfF8LN5fto_99YBu8A2c1_XIVwRUWI,8996
+docling/backend/pypdfium2_backend.py,sha256=Exb3NBp3x2YSLoNfmXq4NefShgooJXsxTXrJ4JbTzcc,9001
 docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/backend/xml/pubmed_backend.py,sha256=LMnpowjnxa5SydfNC00Ll840BYraL8dCJu-FfC9iSKk,20447
 docling/backend/xml/uspto_backend.py,sha256=2YsnB-WRARIAaHPL6gxHePP24GQGi-Up2_K8ZapD3k4,70974
 docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
 docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/cli/main.py,sha256=SdavhL0VTApK9JrKz0Pc1IYdnQhK-0OOaGT8zlTiN5c,15022
+docling/cli/main.py,sha256=NR7NEt8Sf3FE9D7sHpEmABM9mFMTMO5w0VPwYIIvVsk,15481
 docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/datamodel/base_models.py,sha256=50Jf5zk9c4-zmnOzZLoPBnHQhTX0_OFQzIkKgnKK1o4,6229
-docling/datamodel/document.py,sha256=rnNw2tGuCZ1BDoBptlNpPllQ2osJMQHMvcbyrQZuSL4,12948
-docling/datamodel/pipeline_options.py,sha256=u37Q12FVfu1UTEhgBiZ2KslyBtG3z3Eobqvaqd_MYaA,7735
+docling/datamodel/document.py,sha256=OHM6bm0a-62xnAZ8DFlMHzATmbgNcfMxQoQO2udaW5Q,13071
+docling/datamodel/pipeline_options.py,sha256=wKFzw8sAim6emQGsjuS12n7FfpMo8HVNoMOPhkXTkVo,7734
 docling/datamodel/settings.py,sha256=Sw0rN_f8rdLV1eNvVeKiyET2Oe6oz9jtW3lJzniW9Do,1302
-docling/document_converter.py,sha256=PoRcL2IzGoT7ZppGk6laPmKiHOwrXl1-dLMNWumNogg,12298
+docling/document_converter.py,sha256=_pk0sHuPXJ14NEutatf5bK2VyNiU5cvYsVbh1HIgrIw,12431
 docling/exceptions.py,sha256=-FoP46rFJgz_jn5uDv2V052udEEg8gckk6uhoItchXc,85
 docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/models/base_model.py,sha256=Yq_-FmUhqhE20vXYG3WiQXDRTIPjik1CyuEZ8iYTGAY,701
-docling/models/base_ocr_model.py,sha256=rGSpBF4dByITcsBaRIgvFKpiu0CrhmZS_PHIo686Dw0,6428
+docling/models/base_ocr_model.py,sha256=YiUMvdjnHw9SHjnfJKT5INrPMoIGEf_Z2OApfl_VRTE,6919
 docling/models/ds_glm_model.py,sha256=CkhsP0cEWwm4wb1g3cLFriVGpVtELiUK3REDMkPwAMw,13028
 docling/models/easyocr_model.py,sha256=Kakb20ioBxDmNsIqoGvSSs_vbqAWN3QQNHYtEi-eErg,4990
-docling/models/layout_model.py,sha256=skfFdWh_NgijR4bIqyUH8zlda5mMOIIdN3yMttdmsN8,9871
+docling/models/layout_model.py,sha256=Xo8sclRTOO_V8Cr4RwuxB67vSWKF0LZ5nJRYU1WI--k,9063
 docling/models/ocr_mac_model.py,sha256=bLP14UUmZcSzjDe-HLj-mtksTuBmsCTg2C1wCxUpan0,4502
 docling/models/page_assemble_model.py,sha256=qdEX0AIb76ZOqJV6O9j-7r67WmuIkUlwbb2PsL7eFK4,7608
 docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
 docling/models/rapid_ocr_model.py,sha256=LOIvczJs3_db2o8mtrKk-pIXgC-xqWqRLu2cjA3wvy4,4980
-docling/models/table_structure_model.py,sha256=3bUBeP26WwDNCb5_aAlRwVZe4xUYgnwsSHgWQYZxk9E,8892
+docling/models/table_structure_model.py,sha256=fUpCHthO4Uk3BhA99a85BHBm51fmdE9kfqhAk3WjuBw,9392
 docling/models/tesseract_ocr_cli_model.py,sha256=aKQBaty4cYu6zG_C5uy6Zm3eeRQo5fxIierbKixa2kc,6622
 docling/models/tesseract_ocr_model.py,sha256=RDf6iV1q-oXaGfZXv0bW6SqjHNKQvBUDlUsOkuz0neY,6095
 docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -49,8 +49,8 @@ docling/utils/glm_utils.py,sha256=IB19wToGath97gD3jAA3G_rQSptnZKhQCWLvPUCnkww,11
 docling/utils/layout_postprocessor.py,sha256=urRzeF9PrKiMBvA6DdHHwyLxG06CMhelgJeV5B1l6l0,24258
 docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
 docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
-docling-2.14.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
-docling-2.14.0.dist-info/METADATA,sha256=FmM_aRgxeqVSKDOYc-8MEKH1ec_Z7x8cgMQoMVeaKDw,7732
-docling-2.14.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-docling-2.14.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
-docling-2.14.0.dist-info/RECORD,,
+docling-2.15.1.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
+docling-2.15.1.dist-info/METADATA,sha256=6WRzA633us43nw7RHwhX_jwizh2JSpGWxNh0pJq2ZYs,7739
+docling-2.15.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+docling-2.15.1.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
+docling-2.15.1.dist-info/RECORD,,

{docling-2.14.0.dist-info → docling-2.15.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{docling-2.14.0.dist-info → docling-2.15.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{docling-2.14.0.dist-info → docling-2.15.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

docling 2.14.0__py3-none-any.whl → 2.15.1__py3-none-any.whl

docling 2.14.0py3-none-any.whl → 2.15.1py3-none-any.whl