PyPI - docling - Versions diffs - 2.31.0__py3-none-any.whl → 2.31.2__py3-none-any.whl - Mend

docling 2.31.0py3-none-any.whl → 2.31.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

docling/backend/asciidoc_backend.py +1 -1
docling/backend/html_backend.py +41 -17
docling/backend/md_backend.py +1 -1
docling/backend/msword_backend.py +1 -1
docling/backend/xml/jats_backend.py +1 -1
docling/backend/xml/uspto_backend.py +4 -4
docling/cli/main.py +1 -1
docling/cli/models.py +4 -0
docling/datamodel/document.py +8 -0
docling/document_converter.py +3 -1
docling/models/picture_description_vlm_model.py +4 -1
docling/models/readingorder_model.py +1 -1
docling/models/table_structure_model.py +2 -2
docling/models/tesseract_ocr_model.py +5 -3
docling/utils/model_downloader.py +24 -0
docling/utils/utils.py +2 -2
{docling-2.31.0.dist-info → docling-2.31.2.dist-info}/METADATA +2 -1
{docling-2.31.0.dist-info → docling-2.31.2.dist-info}/RECORD +21 -21
{docling-2.31.0.dist-info → docling-2.31.2.dist-info}/LICENSE +0 -0
{docling-2.31.0.dist-info → docling-2.31.2.dist-info}/WHEEL +0 -0
{docling-2.31.0.dist-info → docling-2.31.2.dist-info}/entry_points.txt +0 -0

docling/backend/asciidoc_backend.py CHANGED Viewed

@@ -287,7 +287,7 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
     #   =========   Section headers
     def _is_section_header(self, line):
-        return re.match(r"^==+", line)
+        return re.match(r"^==+\s+", line)
     def _parse_section_header(self, line):
         match = re.match(r"^(=+)\s+(.*)", line)

docling/backend/html_backend.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import logging
+import traceback
 from io import BytesIO
 from pathlib import Path
 from typing import Final, Optional, Union, cast
@@ -137,7 +138,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                     self.analyze_tag(cast(Tag, element), doc)
                 except Exception as exc_child:
                     _log.error(
-                        f"Error processing child from tag {tag.name}: {exc_child!r}"
+                        f"Error processing child from tag {tag.name}:\n{traceback.format_exc()}"
                     )
                     raise exc_child
             elif isinstance(element, NavigableString) and not isinstance(
@@ -390,46 +391,64 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
             _log.debug(f"list-item has no text: {element}")
     @staticmethod
-    def parse_table_data(element: Tag) -> Optional[TableData]:
+    def parse_table_data(element: Tag) -> Optional[TableData]:  # noqa: C901
         nested_tables = element.find("table")
         if nested_tables is not None:
             _log.debug("Skipping nested table.")
             return None
-        # Count the number of rows (number of <tr> elements)
-        num_rows = len(element("tr"))
-        # Find the number of columns (taking into account colspan)
+        # Find the number of rows and columns (taking into account spans)
+        num_rows = 0
         num_cols = 0
         for row in element("tr"):
             col_count = 0
+            is_row_header = True
             if not isinstance(row, Tag):
                 continue
             for cell in row(["td", "th"]):
                 if not isinstance(row, Tag):
                     continue
-                val = cast(Tag, cell).get("colspan", "1")
+                cell_tag = cast(Tag, cell)
+                val = cell_tag.get("colspan", "1")
                 colspan = int(val) if (isinstance(val, str) and val.isnumeric()) else 1
                 col_count += colspan
+                if cell_tag.name == "td" or cell_tag.get("rowspan") is None:
+                    is_row_header = False
             num_cols = max(num_cols, col_count)
+            if not is_row_header:
+                num_rows += 1
+        _log.debug(f"The table has {num_rows} rows and {num_cols} cols.")
         grid: list = [[None for _ in range(num_cols)] for _ in range(num_rows)]
         data = TableData(num_rows=num_rows, num_cols=num_cols, table_cells=[])
         # Iterate over the rows in the table
-        for row_idx, row in enumerate(element("tr")):
+        start_row_span = 0
+        row_idx = -1
+        for row in element("tr"):
             if not isinstance(row, Tag):
                 continue
             # For each row, find all the column cells (both <td> and <th>)
             cells = row(["td", "th"])
-            # Check if each cell in the row is a header -> means it is a column header
+            # Check if cell is in a column header or row header
             col_header = True
+            row_header = True
             for html_cell in cells:
-                if isinstance(html_cell, Tag) and html_cell.name == "td":
-                    col_header = False
+                if isinstance(html_cell, Tag):
+                    if html_cell.name == "td":
+                        col_header = False
+                        row_header = False
+                    elif html_cell.get("rowspan") is None:
+                        row_header = False
+            if not row_header:
+                row_idx += 1
+                start_row_span = 0
+            else:
+                start_row_span += 1
             # Extract the text content of each cell
             col_idx = 0
@@ -460,19 +479,24 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                     if isinstance(row_val, str) and row_val.isnumeric()
                     else 1
                 )
-                while grid[row_idx][col_idx] is not None:
+                if row_header:
+                    row_span -= 1
+                while (
+                    col_idx < num_cols
+                    and grid[row_idx + start_row_span][col_idx] is not None
+                ):
                     col_idx += 1
-                for r in range(row_span):
+                for r in range(start_row_span, start_row_span + row_span):
                     for c in range(col_span):
-                        grid[row_idx + r][col_idx + c] = text
+                        if row_idx + r < num_rows and col_idx + c < num_cols:
+                            grid[row_idx + r][col_idx + c] = text
                 table_cell = TableCell(
                     text=text,
                     row_span=row_span,
                     col_span=col_span,
-                    start_row_offset_idx=row_idx,
-                    end_row_offset_idx=row_idx + row_span,
+                    start_row_offset_idx=start_row_span + row_idx,
+                    end_row_offset_idx=start_row_span + row_idx + row_span,
                     start_col_offset_idx=col_idx,
                     end_col_offset_idx=col_idx + col_span,
                     column_header=col_header,

docling/backend/md_backend.py CHANGED Viewed

@@ -409,7 +409,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
                         )
                     return _txt
-                # restore original HTML by removing previouly added markers
+                # restore original HTML by removing previously added markers
                 for regex in [
                     rf"<pre>\s*<code>\s*{_START_MARKER}",
                     rf"{_STOP_MARKER}\s*</code>\s*</pre>",

docling/backend/msword_backend.py CHANGED Viewed

@@ -436,7 +436,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
         # Common styles for bullet and numbered lists.
         # "List Bullet", "List Number", "List Paragraph"
-        # Identify wether list is a numbered list or not
+        # Identify whether list is a numbered list or not
         # is_numbered = "List Bullet" not in paragraph.style.name
         is_numbered = False
         p_style_id, p_level = self._get_label_and_level(paragraph)

docling/backend/xml/jats_backend.py CHANGED Viewed

@@ -91,7 +91,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
         super().__init__(in_doc, path_or_stream)
         self.path_or_stream = path_or_stream
-        # Initialize the root of the document hiearchy
+        # Initialize the root of the document hierarchy
         self.root: Optional[NodeItem] = None
         self.valid = False

docling/backend/xml/uspto_backend.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Backend to parse patents from the United States Patent Office (USPTO).
-The parsers included in this module can handle patent grants pubished since 1976 and
+The parsers included in this module can handle patent grants published since 1976 and
 patent applications since 2001.
 The original files can be found in https://bulkdata.uspto.gov.
 """
@@ -440,7 +440,7 @@ class PatentUsptoIce(PatentUspto):
                     )
             elif name == self.Element.PARAGRAPH.value and text:
-                # remmove blank spaces added in paragraphs
+                # remove blank spaces added in paragraphs
                 text = re.sub("\\s+", " ", text)
                 if self.Element.ABSTRACT.value in self.property:
                     self.abstract = (
@@ -1697,7 +1697,7 @@ class XmlTable:
 class HtmlEntity:
     """Provide utility functions to get the HTML entities of styled characters.
-    This class has been developped from:
+    This class has been developed from:
     https://unicode-table.com/en/html-entities/
     https://www.w3.org/TR/WD-math-970515/table03.html
     """
@@ -1896,7 +1896,7 @@ class HtmlEntity:
         """Get an HTML entity of a greek letter in ISO 8879.
         Args:
-            The text to transform, as an ISO 8879 entitiy.
+            The text to transform, as an ISO 8879 entity.
         Returns:
             The HTML entity representing a greek letter. If the input text is not

docling/cli/main.py CHANGED Viewed

@@ -521,7 +521,7 @@ def convert(  # noqa: C901
             if image_export_mode != ImageRefMode.PLACEHOLDER:
                 pipeline_options.generate_page_images = True
                 pipeline_options.generate_picture_images = (
-                    True  # FIXME: to be deprecated in verson 3
+                    True  # FIXME: to be deprecated in version 3
                 )
                 pipeline_options.images_scale = 2

docling/cli/models.py CHANGED Viewed

@@ -32,6 +32,8 @@ class _AvailableModels(str, Enum):
     CODE_FORMULA = "code_formula"
     PICTURE_CLASSIFIER = "picture_classifier"
     SMOLVLM = "smolvlm"
+    SMOLDOCLING = "smoldocling"
+    SMOLDOCLING_MLX = "smoldocling_mlx"
     GRANITE_VISION = "granite_vision"
     EASYOCR = "easyocr"
@@ -105,6 +107,8 @@ def download(
         with_code_formula=_AvailableModels.CODE_FORMULA in to_download,
         with_picture_classifier=_AvailableModels.PICTURE_CLASSIFIER in to_download,
         with_smolvlm=_AvailableModels.SMOLVLM in to_download,
+        with_smoldocling=_AvailableModels.SMOLDOCLING in to_download,
+        with_smoldocling_mlx=_AvailableModels.SMOLDOCLING_MLX in to_download,
         with_granite_vision=_AvailableModels.GRANITE_VISION in to_download,
         with_easyocr=_AvailableModels.EASYOCR in to_download,
     )

docling/datamodel/document.py CHANGED Viewed

@@ -303,6 +303,14 @@ class _DocumentConversionInput(BaseModel):
                     else ""
                 )
                 mime = _DocumentConversionInput._mime_from_extension(ext)
+            if mime is not None and mime.lower() == "application/zip":
+                objname = obj.name.lower()
+                if objname.endswith(".xlsx"):
+                    mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+                elif objname.endswith(".docx"):
+                    mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+                elif objname.endswith(".pptx"):
+                    mime = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
         mime = mime or _DocumentConversionInput._detect_html_xhtml(content)
         mime = mime or _DocumentConversionInput._detect_csv(content)

docling/document_converter.py CHANGED Viewed

@@ -189,7 +189,9 @@ class DocumentConverter:
     def _get_pipeline_options_hash(self, pipeline_options: PipelineOptions) -> str:
         """Generate a hash of pipeline options to use as part of the cache key."""
         options_str = str(pipeline_options.model_dump())
-        return hashlib.md5(options_str.encode("utf-8")).hexdigest()
+        return hashlib.md5(
+            options_str.encode("utf-8"), usedforsecurity=False
+        ).hexdigest()
     def initialize_pipeline(self, format: InputFormat):
         """Initialize the conversion pipeline for the selected format."""

docling/models/picture_description_vlm_model.py CHANGED Viewed

@@ -57,7 +57,10 @@ class PictureDescriptionVlmModel(PictureDescriptionBaseModel):
                 artifacts_path,
                 torch_dtype=torch.bfloat16,
                 _attn_implementation=(
-                    "flash_attention_2" if self.device.startswith("cuda") else "eager"
+                    "flash_attention_2"
+                    if self.device.startswith("cuda")
+                    and accelerator_options.cuda_use_flash_attention2
+                    else "eager"
                 ),
             ).to(self.device)

docling/models/readingorder_model.py CHANGED Viewed

@@ -346,7 +346,7 @@ class ReadingOrderModel:
         new_item.prov.append(prov)
     def __call__(self, conv_res: ConversionResult) -> DoclingDocument:
-        with TimeRecorder(conv_res, "glm", scope=ProfilingScope.DOCUMENT):
+        with TimeRecorder(conv_res, "reading_order", scope=ProfilingScope.DOCUMENT):
             page_elements = self._assembled_to_readingorder_elements(conv_res)
             # Apply reading order

docling/models/table_structure_model.py CHANGED Viewed

@@ -234,7 +234,7 @@ class TableStructureModel(BasePageModel):
                                 tcells = table_cluster.cells
                             tokens = []
                             for c in tcells:
-                                # Only allow non empty stings (spaces) into the cells of a table
+                                # Only allow non empty strings (spaces) into the cells of a table
                                 if len(c.text.strip()) > 0:
                                     new_cell = copy.deepcopy(c)
                                     new_cell.rect = BoundingRectangle.from_bounding_box(
@@ -267,7 +267,7 @@ class TableStructureModel(BasePageModel):
                                     element["bbox"]["token"] = text_piece
                                 tc = TableCell.model_validate(element)
-                                if self.do_cell_matching and tc.bbox is not None:
+                                if tc.bbox is not None:
                                     tc.bbox = tc.bbox.scaled(1 / self.scale)
                                 table_cells.append(tc)

docling/models/tesseract_ocr_model.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import logging
 from collections.abc import Iterable
 from pathlib import Path
@@ -38,6 +40,8 @@ class TesseractOcrModel(BaseOcrModel):
         self.options: TesseractOcrOptions
         self.scale = 3  # multiplier for 72 dpi == 216 dpi.
+        self.reader = None
+        self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {}
         if self.enabled:
             install_errmsg = (
@@ -84,9 +88,7 @@ class TesseractOcrModel(BaseOcrModel):
                 "oem": tesserocr.OEM.DEFAULT,
             }
-            self.reader = None
             self.osd_reader = None
-            self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {}
             if self.options.path is not None:
                 tesserocr_kwargs["path"] = self.options.path
@@ -151,7 +153,7 @@ class TesseractOcrModel(BaseOcrModel):
                             script = map_tesseract_script(script)
                             lang = f"{self.script_prefix}{script}"
-                            # Check if the detected languge is present in the system
+                            # Check if the detected language is present in the system
                             if lang not in self._tesserocr_languages:
                                 msg = f"Tesseract detected the script '{script}' and language '{lang}'."
                                 msg += " However this language is not installed in your system and will be ignored."

docling/utils/model_downloader.py CHANGED Viewed

@@ -4,12 +4,15 @@ from typing import Optional
 from docling.datamodel.pipeline_options import (
     granite_picture_description,
+    smoldocling_vlm_conversion_options,
+    smoldocling_vlm_mlx_conversion_options,
     smolvlm_picture_description,
 )
 from docling.datamodel.settings import settings
 from docling.models.code_formula_model import CodeFormulaModel
 from docling.models.document_picture_classifier import DocumentPictureClassifier
 from docling.models.easyocr_model import EasyOcrModel
+from docling.models.hf_vlm_model import HuggingFaceVlmModel
 from docling.models.layout_model import LayoutModel
 from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
 from docling.models.table_structure_model import TableStructureModel
@@ -27,6 +30,8 @@ def download_models(
     with_code_formula: bool = True,
     with_picture_classifier: bool = True,
     with_smolvlm: bool = False,
+    with_smoldocling: bool = False,
+    with_smoldocling_mlx: bool = False,
     with_granite_vision: bool = False,
     with_easyocr: bool = True,
 ):
@@ -77,6 +82,25 @@ def download_models(
             progress=progress,
         )
+    if with_smoldocling:
+        _log.info("Downloading SmolDocling model...")
+        HuggingFaceVlmModel.download_models(
+            repo_id=smoldocling_vlm_conversion_options.repo_id,
+            local_dir=output_dir / smoldocling_vlm_conversion_options.repo_cache_folder,
+            force=force,
+            progress=progress,
+        )
+    if with_smoldocling_mlx:
+        _log.info("Downloading SmolDocling MLX model...")
+        HuggingFaceVlmModel.download_models(
+            repo_id=smoldocling_vlm_mlx_conversion_options.repo_id,
+            local_dir=output_dir
+            / smoldocling_vlm_mlx_conversion_options.repo_cache_folder,
+            force=force,
+            progress=progress,
+        )
     if with_granite_vision:
         _log.info("Downloading Granite Vision model...")
         PictureDescriptionVlmModel.download_models(

docling/utils/utils.py CHANGED Viewed

@@ -20,7 +20,7 @@ def create_file_hash(path_or_stream: Union[BytesIO, Path]) -> str:
     """Create a stable page_hash of the path_or_stream of a file"""
     block_size = 65536
-    hasher = hashlib.sha256()
+    hasher = hashlib.sha256(usedforsecurity=False)
     def _hash_buf(binary_stream):
         buf = binary_stream.read(block_size)  # read and page_hash in chunks
@@ -38,7 +38,7 @@ def create_file_hash(path_or_stream: Union[BytesIO, Path]) -> str:
 def create_hash(string: str):
-    hasher = hashlib.sha256()
+    hasher = hashlib.sha256(usedforsecurity=False)
     hasher.update(string.encode("utf-8"))
     return hasher.hexdigest()

{docling-2.31.0.dist-info → docling-2.31.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling
-Version: 2.31.0
+Version: 2.31.2
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Home-page: https://github.com/docling-project/docling
 License: MIT
@@ -28,6 +28,7 @@ Provides-Extra: vlm
 Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
 Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
 Requires-Dist: certifi (>=2024.7.4)
+Requires-Dist: click (<8.2.0)
 Requires-Dist: docling-core[chunking] (>=2.26.0,<3.0.0)
 Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
 Requires-Dist: docling-parse (>=4.0.0,<5.0.0)

{docling-2.31.0.dist-info → docling-2.31.2.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 docling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
-docling/backend/asciidoc_backend.py,sha256=VZ8Xk1VHGHRqBo_TdtMzRAu1NFaFaJ8dk4CaEcBaEm0,14038
+docling/backend/asciidoc_backend.py,sha256=W-4MRcID6AU9Ax23q8FwDwGG-OOCrBoqcNf2Ch_WPUc,14041
 docling/backend/csv_backend.py,sha256=2g9famYG2W-ID9jEdZPxc6O8QGv1vWQfjN8pL-QMBE0,4536
 docling/backend/docling_parse_backend.py,sha256=V_CsUdN5RkGQBBq7A_ReAiUW4CQVh0-1Ur157Ozurdg,8017
 docling/backend/docling_parse_v2_backend.py,sha256=6fokgqb1hMbZua33gL46EFamrwPTC7ms6ZuEHw-Dv28,9395
@@ -10,29 +10,29 @@ docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
 docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/backend/docx/latex/latex_dict.py,sha256=tFJp4ScT_AkY2ON7nLEa560p601Jq2glcZvMKxxjn7w,6593
 docling/backend/docx/latex/omml.py,sha256=nEpcfyyrOucJyj6cD7wfThrIa-q0CQCoqMb3dkrhCRg,12094
-docling/backend/html_backend.py,sha256=TBiMAp3s_QbQTRymFA7wScXECyHn_w-Kb8MbqRibTmE,20099
+docling/backend/html_backend.py,sha256=3K-l5SUAAyqISNEb7nPst_I51xzYOVOkgmwXh3lv9sw,21063
 docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
-docling/backend/md_backend.py,sha256=EdGBXe0n8zniO1LSF3VIjviKs1VRUujpF8aFUpJ5D1k,17209
+docling/backend/md_backend.py,sha256=JkY1qTvQFXjKSZGfD-83d-fZelorUG_l6mpJdYGqvX8,17210
 docling/backend/msexcel_backend.py,sha256=3j0WQfqDpgPXdPMCguefdv7arcNVDedPD6gl54cmLn8,18110
 docling/backend/mspowerpoint_backend.py,sha256=RwqfvvzrtM56L9uf7PR9lvlHJ-LyYGpkS1iVxkTl72Q,17203
-docling/backend/msword_backend.py,sha256=Xdrs_k160-tDUmhcFGZ7MBbpiYkwPLT3wl3FUO2Ui1A,32476
+docling/backend/msword_backend.py,sha256=lVVMNwt0WIl4RD5wAf8pc8bJsb60x1BA8hTTkVmEVa8,32477
 docling/backend/pdf_backend.py,sha256=KE9TMuFO5WX-o5A_DAd4tEaLi4HMZ4XjKdpllItVkWM,2238
 docling/backend/pypdfium2_backend.py,sha256=pX8f0WbUb0KTDTKyQuLzP_lgHHubyGXWD33vmpefPy8,10805
 docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/backend/xml/jats_backend.py,sha256=g9YNSS8kqhVL7ceZF2jR7Aaqbh1F1Zn6jmte0HyEH20,24926
-docling/backend/xml/uspto_backend.py,sha256=iE1PSAgXqtHkqcsC1RUJiwuyKNlf2elucCV1V2sk0kQ,70926
+docling/backend/xml/jats_backend.py,sha256=ghGi9bHjx3BvaOtmzLw86-wZy4UxpQPOPQL4e73-BI8,24927
+docling/backend/xml/uspto_backend.py,sha256=nyAMr5ht7dclxkVDwsKNeiOhLQrUtRLS8JdscB2AVJg,70924
 docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
 docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/cli/main.py,sha256=Z6EdrwESOKUBHF6yJlzuwnznScBtdrlbU_xB0AT9cA4,26137
-docling/cli/models.py,sha256=Cyv7d_c8J62luGWsYvbcC9_3UpPp_TVsFo5vJAyr4kI,3940
+docling/cli/main.py,sha256=D7WEY4x6pQCVFRy3peK9KUDOb0Y5IVc-vTDqPnHPK00,26138
+docling/cli/models.py,sha256=9yLGp6QRJGpR86U3SjmWAXDt3MvBaJLLY4xDVdsu3O8,4160
 docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
 docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/datamodel/base_models.py,sha256=DRE_XoldtCreWF4ucO0iK0l8uOnfvnhQaYjV0z1Qe0M,7921
-docling/datamodel/document.py,sha256=02QybqtnQ0genFU7UF9pVL3fIwguu9br0JbdtcUvu4o,14998
+docling/datamodel/document.py,sha256=_0Z4zUgCB5677ZW8Y7C1fv75enLZJOJUjcUkGTSiTBA,15553
 docling/datamodel/pipeline_options.py,sha256=-1QG8dY0RZkTJb66lXErEAnPq4F_1vgnk_5AcIr3cgU,13350
 docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
-docling/document_converter.py,sha256=zSaGp2zx73kiE1KHmEHwnG-wxJvcMiyyn2fCAM2vdYk,13804
+docling/document_converter.py,sha256=PRRr65nigQ3LZDl4G2fBMkOtJyswT7xyGt7fpUeDO3w,13849
 docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
 docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/models/api_vlm_model.py,sha256=w1SzdG3Ypz_0iZGiX-skMwV1E1JnOHH2BJiNkcEEIAA,2478
@@ -53,14 +53,14 @@ docling/models/page_assemble_model.py,sha256=GO7JI1D6T6EkSW94cLQobPGNQUahkxQqTPR
 docling/models/page_preprocessing_model.py,sha256=6pOGXiFQ-oz06UmJdcaYMdVyfZ0YVLWS6efGcx7Mxws,3105
 docling/models/picture_description_api_model.py,sha256=qs3n0smC9DXhzwJeK_iQG08Y6ZFHInKtdGPVhzgvxgU,2091
 docling/models/picture_description_base_model.py,sha256=FbBVXzAOB87xpJN28tuGCxoAdcf6mZNUOqJR7ljUg5g,2946
-docling/models/picture_description_vlm_model.py,sha256=nS68qbJQCP94-gbgFfAzMLaEC-wquSLEwuDix287c9c,4067
+docling/models/picture_description_vlm_model.py,sha256=DiTjnehVy1n0N04xPUvZl8rx4TiNHzHn9Cnzy_ePGts,4177
 docling/models/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/models/plugins/defaults.py,sha256=qslXGnRX07Z3GGttNriqaox0v0vXp4zs4KLurHCZjp4,858
 docling/models/rapid_ocr_model.py,sha256=Tq_1Egu5Hjx7Y69Vox17QTtRXztSyflB1fhN08CWQwY,5894
-docling/models/readingorder_model.py,sha256=BxACJ-aIl2aUlyLcyl-uDtuSZH_mCLJgbkDG4Sx_www,14564
-docling/models/table_structure_model.py,sha256=dR3JkiPkdbScaNy6dia4_ZXPYESSiMDZztD-lLHE1uY,12591
+docling/models/readingorder_model.py,sha256=S9ru2ApY9sE-Uue3hptWHmbmElwo36bUbAikxCFpHYs,14574
+docling/models/table_structure_model.py,sha256=1gxLaooK0IKMrnmS8nT1BItKqt1GAKghfpmLKb3i53g,12566
 docling/models/tesseract_ocr_cli_model.py,sha256=iFdOud5ymoW9WV8bWLCDpd3LJBo9M5bTT5vc635zEDY,10229
-docling/models/tesseract_ocr_model.py,sha256=oPKOoTTcpYUTDNRteBG-MFcxB9SDC6dk4HuKjIODwMk,9310
+docling/models/tesseract_ocr_model.py,sha256=72009TJL_7tXTEnhlsGRiw_KibrQ0LjZlCBtW8NtwUc,9339
 docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/pipeline/base_pipeline.py,sha256=DnuxAf7EQusdSRae0QUVth-0f2mSff8JZjX-2vazk00,8751
 docling/pipeline/simple_pipeline.py,sha256=TXZOwR7hZRji462ZTIpte0VJjzbxvNVE8dbLFANDhSU,2253
@@ -74,13 +74,13 @@ docling/utils/export.py,sha256=VwVUnYDk3mhGmISDbVm306fwpGNnoojouStBD4UajXI,4673
 docling/utils/glm_utils.py,sha256=TKOWQqWAHsX_w4fvoAA7_2xCi_urhnp1DsmjY8_sk5w,12274
 docling/utils/layout_postprocessor.py,sha256=x7exVG3HYzV9M_O78FfyoG43Y2L7PPMMydvSNwjqh8s,24528
 docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
-docling/utils/model_downloader.py,sha256=AMqfHTmZzzsPrlcHFdX7hhW-a3Ki6ndjnTjQQYrDSxU,3206
+docling/utils/model_downloader.py,sha256=ocvud3G3qlBQhzMo69Q3RJMnvq5HPZ2DwNbMuEp8RCs,4142
 docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
 docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
-docling/utils/utils.py,sha256=xxmVMhQWr7oVBy93IGGpr2x8FBVRHNDwD31kwAF5xK4,1866
+docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
 docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
-docling-2.31.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
-docling-2.31.0.dist-info/METADATA,sha256=tScsMgyfrwtIaCKPl-ygViccYxnRADeUNuHKDGjw7ww,10108
-docling-2.31.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-docling-2.31.0.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
-docling-2.31.0.dist-info/RECORD,,
+docling-2.31.2.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
+docling-2.31.2.dist-info/METADATA,sha256=V11tJajepssRJ-ltuRsNThmo9_6U6Gc28wqZlgDzdz0,10138
+docling-2.31.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+docling-2.31.2.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
+docling-2.31.2.dist-info/RECORD,,

{docling-2.31.0.dist-info → docling-2.31.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{docling-2.31.0.dist-info → docling-2.31.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{docling-2.31.0.dist-info → docling-2.31.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

docling 2.31.0__py3-none-any.whl → 2.31.2__py3-none-any.whl

docling 2.31.0py3-none-any.whl → 2.31.2py3-none-any.whl