PyPI - docling - Versions diffs - 2.53.0__py3-none-any.whl → 2.55.0__py3-none-any.whl - Mend

docling 2.53.0py3-none-any.whl → 2.55.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

docling/backend/asciidoc_backend.py +1 -1
docling/backend/html_backend.py +254 -136
docling/backend/md_backend.py +4 -1
docling/backend/msword_backend.py +177 -76
docling/backend/webvtt_backend.py +572 -0
docling/backend/xml/jats_backend.py +111 -7
docling/backend/xml/uspto_backend.py +1 -1
docling/cli/main.py +5 -0
docling/datamodel/base_models.py +23 -23
docling/datamodel/document.py +2 -0
docling/datamodel/pipeline_options_vlm_model.py +13 -2
docling/datamodel/vlm_model_specs.py +9 -0
docling/document_converter.py +4 -0
docling/models/api_vlm_model.py +45 -16
docling/models/base_model.py +2 -1
docling/models/readingorder_model.py +1 -1
docling/models/table_structure_model.py +3 -3
docling/models/utils/generation_utils.py +157 -0
docling/models/utils/hf_model_download.py +6 -1
docling/models/vlm_models_inline/hf_transformers_model.py +75 -14
docling/models/vlm_models_inline/mlx_model.py +58 -1
docling/models/vlm_models_inline/vllm_model.py +189 -124
docling/utils/api_image_request.py +107 -1
{docling-2.53.0.dist-info → docling-2.55.0.dist-info}/METADATA +5 -5
{docling-2.53.0.dist-info → docling-2.55.0.dist-info}/RECORD +29 -27
{docling-2.53.0.dist-info → docling-2.55.0.dist-info}/WHEEL +0 -0
{docling-2.53.0.dist-info → docling-2.55.0.dist-info}/entry_points.txt +0 -0
{docling-2.53.0.dist-info → docling-2.55.0.dist-info}/licenses/LICENSE +0 -0
{docling-2.53.0.dist-info → docling-2.55.0.dist-info}/top_level.txt +0 -0

docling/backend/xml/jats_backend.py CHANGED Viewed

@@ -2,9 +2,9 @@ import logging
 import traceback
 from io import BytesIO
 from pathlib import Path
-from typing import Final, Optional, Union
+from typing import Final, Optional, Union, cast
-from bs4 import BeautifulSoup, Tag
+from bs4 import BeautifulSoup, NavigableString, Tag
 from docling_core.types.doc import (
     DocItemLabel,
     DoclingDocument,
@@ -12,6 +12,8 @@ from docling_core.types.doc import (
     GroupItem,
     GroupLabel,
     NodeItem,
+    TableCell,
+    TableData,
     TextItem,
 )
 from lxml import etree
@@ -350,7 +352,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
         return
-    def _parse_element_citation(self, node: etree._Element) -> str:  # noqa: C901
+    def _parse_element_citation(self, node: etree._Element) -> str:
         citation: Citation = {
             "author_names": "",
             "title": "",
@@ -535,6 +537,110 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
         return
+    @staticmethod
+    def parse_table_data(element: Tag) -> Optional[TableData]:
+        # TODO, see how to implement proper support for rich tables from HTML backend
+        nested_tables = element.find("table")
+        if nested_tables is not None:
+            _log.debug("Skipping nested table.")
+            return None
+        # Find the number of rows and columns (taking into account spans)
+        num_rows = 0
+        num_cols = 0
+        for row in element("tr"):
+            col_count = 0
+            is_row_header = True
+            if not isinstance(row, Tag):
+                continue
+            for cell in row(["td", "th"]):
+                if not isinstance(row, Tag):
+                    continue
+                cell_tag = cast(Tag, cell)
+                col_span, row_span = HTMLDocumentBackend._get_cell_spans(cell_tag)
+                col_count += col_span
+                if cell_tag.name == "td" or row_span == 1:
+                    is_row_header = False
+            num_cols = max(num_cols, col_count)
+            if not is_row_header:
+                num_rows += 1
+        _log.debug(f"The table has {num_rows} rows and {num_cols} cols.")
+        grid: list = [[None for _ in range(num_cols)] for _ in range(num_rows)]
+        data = TableData(num_rows=num_rows, num_cols=num_cols, table_cells=[])
+        # Iterate over the rows in the table
+        start_row_span = 0
+        row_idx = -1
+        for row in element("tr"):
+            if not isinstance(row, Tag):
+                continue
+            # For each row, find all the column cells (both <td> and <th>)
+            cells = row(["td", "th"])
+            # Check if cell is in a column header or row header
+            col_header = True
+            row_header = True
+            for html_cell in cells:
+                if isinstance(html_cell, Tag):
+                    _, row_span = HTMLDocumentBackend._get_cell_spans(html_cell)
+                    if html_cell.name == "td":
+                        col_header = False
+                        row_header = False
+                    elif row_span == 1:
+                        row_header = False
+            if not row_header:
+                row_idx += 1
+                start_row_span = 0
+            else:
+                start_row_span += 1
+            # Extract the text content of each cell
+            col_idx = 0
+            for html_cell in cells:
+                if not isinstance(html_cell, Tag):
+                    continue
+                # extract inline formulas
+                for formula in html_cell("inline-formula"):
+                    math_parts = formula.text.split("$$")
+                    if len(math_parts) == 3:
+                        math_formula = f"$${math_parts[1]}$$"
+                        formula.replace_with(NavigableString(math_formula))
+                # TODO: extract content correctly from table-cells with lists
+                text = HTMLDocumentBackend.get_text(html_cell).strip()
+                col_span, row_span = HTMLDocumentBackend._get_cell_spans(html_cell)
+                if row_header:
+                    row_span -= 1
+                while (
+                    col_idx < num_cols
+                    and grid[row_idx + start_row_span][col_idx] is not None
+                ):
+                    col_idx += 1
+                for r in range(start_row_span, start_row_span + row_span):
+                    for c in range(col_span):
+                        if row_idx + r < num_rows and col_idx + c < num_cols:
+                            grid[row_idx + r][col_idx + c] = text
+                table_cell = TableCell(
+                    text=text,
+                    row_span=row_span,
+                    col_span=col_span,
+                    start_row_offset_idx=start_row_span + row_idx,
+                    end_row_offset_idx=start_row_span + row_idx + row_span,
+                    start_col_offset_idx=col_idx,
+                    end_col_offset_idx=col_idx + col_span,
+                    column_header=col_header,
+                    row_header=((not col_header) and html_cell.name == "th"),
+                )
+                data.table_cells.append(table_cell)
+        return data
     def _add_table(
         self, doc: DoclingDocument, parent: NodeItem, table_xml_component: Table
     ) -> None:
@@ -543,8 +649,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
         if not isinstance(table_tag, Tag):
             return
-        data = HTMLDocumentBackend.parse_table_data(table_tag)
+        data = JatsDocumentBackend.parse_table_data(table_tag)
         # TODO: format label vs caption once styling is supported
         label = table_xml_component["label"]
         caption = table_xml_component["caption"]
@@ -554,7 +659,6 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
             if table_text
             else None
         )
         if data is not None:
             doc.add_table(data=data, parent=parent, caption=table_caption)
@@ -609,7 +713,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
         )
         return
-    def _walk_linear(  # noqa: C901
+    def _walk_linear(
         self, doc: DoclingDocument, parent: NodeItem, node: etree._Element
     ) -> str:
         skip_tags = ["term"]

docling/backend/xml/uspto_backend.py CHANGED Viewed

@@ -1523,7 +1523,7 @@ class XmlTable:
         return ncols_max
-    def _parse_table(self, table: Tag) -> TableData:  # noqa: C901
+    def _parse_table(self, table: Tag) -> TableData:
         """Parse the content of a table tag.
         Args:

docling/cli/main.py CHANGED Viewed

@@ -66,6 +66,7 @@ from docling.datamodel.vlm_model_specs import (
     GRANITE_VISION_TRANSFORMERS,
     GRANITEDOCLING_MLX,
     GRANITEDOCLING_TRANSFORMERS,
+    GRANITEDOCLING_VLLM,
     SMOLDOCLING_MLX,
     SMOLDOCLING_TRANSFORMERS,
     SMOLDOCLING_VLLM,
@@ -686,6 +687,7 @@ def convert(  # noqa: C901
                             "To run SmolDocling faster, please install mlx-vlm:\n"
                             "pip install mlx-vlm"
                         )
             elif vlm_model == VlmModelType.GRANITEDOCLING:
                 pipeline_options.vlm_options = GRANITEDOCLING_TRANSFORMERS
                 if sys.platform == "darwin":
@@ -701,6 +703,9 @@ def convert(  # noqa: C901
             elif vlm_model == VlmModelType.SMOLDOCLING_VLLM:
                 pipeline_options.vlm_options = SMOLDOCLING_VLLM
+            elif vlm_model == VlmModelType.GRANITEDOCLING_VLLM:
+                pipeline_options.vlm_options = GRANITEDOCLING_VLLM
             pdf_format_option = PdfFormatOption(
                 pipeline_cls=VlmPipeline, pipeline_options=pipeline_options
             )

docling/datamodel/base_models.py CHANGED Viewed

@@ -1,7 +1,6 @@
-import math
 from collections import defaultdict
 from enum import Enum
-from typing import TYPE_CHECKING, Dict, List, Optional, Type, Union
+from typing import TYPE_CHECKING, Optional, Type, Union
 import numpy as np
 from docling_core.types.doc import (
@@ -14,9 +13,7 @@ from docling_core.types.doc import (
 )
 from docling_core.types.doc.base import PydanticSerCtxKey, round_pydantic_float
 from docling_core.types.doc.page import SegmentedPdfPage, TextCell
-from docling_core.types.io import (
-    DocumentStream,
-)
+from docling_core.types.io import DocumentStream
 # DO NOT REMOVE; explicitly exposed from this location
 from PIL.Image import Image
@@ -71,6 +68,7 @@ class InputFormat(str, Enum):
     METS_GBS = "mets_gbs"
     JSON_DOCLING = "json_docling"
     AUDIO = "audio"
+    VTT = "vtt"
 class OutputFormat(str, Enum):
@@ -82,7 +80,7 @@ class OutputFormat(str, Enum):
     DOCTAGS = "doctags"
-FormatToExtensions: Dict[InputFormat, List[str]] = {
+FormatToExtensions: dict[InputFormat, list[str]] = {
     InputFormat.DOCX: ["docx", "dotx", "docm", "dotm"],
     InputFormat.PPTX: ["pptx", "potx", "ppsx", "pptm", "potm", "ppsm"],
     InputFormat.PDF: ["pdf"],
@@ -97,9 +95,10 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
     InputFormat.METS_GBS: ["tar.gz"],
     InputFormat.JSON_DOCLING: ["json"],
     InputFormat.AUDIO: ["wav", "mp3"],
+    InputFormat.VTT: ["vtt"],
 }
-FormatToMimeType: Dict[InputFormat, List[str]] = {
+FormatToMimeType: dict[InputFormat, list[str]] = {
     InputFormat.DOCX: [
         "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
         "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
@@ -130,6 +129,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
     InputFormat.METS_GBS: ["application/mets+xml"],
     InputFormat.JSON_DOCLING: ["application/json"],
     InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
+    InputFormat.VTT: ["text/vtt"],
 }
 MimeTypeToFormat: dict[str, list[InputFormat]] = {
@@ -162,8 +162,8 @@ class Cluster(BaseModel):
     label: DocItemLabel
     bbox: BoundingBox
     confidence: float = 1.0
-    cells: List[TextCell] = []
-    children: List["Cluster"] = []  # Add child cluster support
+    cells: list[TextCell] = []
+    children: list["Cluster"] = []  # Add child cluster support
     @field_serializer("confidence")
     def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
@@ -179,7 +179,7 @@ class BasePageElement(BaseModel):
 class LayoutPrediction(BaseModel):
-    clusters: List[Cluster] = []
+    clusters: list[Cluster] = []
 class VlmPredictionToken(BaseModel):
@@ -201,14 +201,14 @@ class ContainerElement(
 class Table(BasePageElement):
-    otsl_seq: List[str]
+    otsl_seq: list[str]
     num_rows: int = 0
     num_cols: int = 0
-    table_cells: List[TableCell]
+    table_cells: list[TableCell]
 class TableStructurePrediction(BaseModel):
-    table_map: Dict[int, Table] = {}
+    table_map: dict[int, Table] = {}
 class TextElement(BasePageElement):
@@ -216,7 +216,7 @@ class TextElement(BasePageElement):
 class FigureElement(BasePageElement):
-    annotations: List[PictureDataType] = []
+    annotations: list[PictureDataType] = []
     provenance: Optional[str] = None
     predicted_class: Optional[str] = None
     confidence: Optional[float] = None
@@ -234,12 +234,12 @@ class FigureElement(BasePageElement):
 class FigureClassificationPrediction(BaseModel):
     figure_count: int = 0
-    figure_map: Dict[int, FigureElement] = {}
+    figure_map: dict[int, FigureElement] = {}
 class EquationPrediction(BaseModel):
     equation_count: int = 0
-    equation_map: Dict[int, TextElement] = {}
+    equation_map: dict[int, TextElement] = {}
 class PagePredictions(BaseModel):
@@ -254,9 +254,9 @@ PageElement = Union[TextElement, Table, FigureElement, ContainerElement]
 class AssembledUnit(BaseModel):
-    elements: List[PageElement] = []
-    body: List[PageElement] = []
-    headers: List[PageElement] = []
+    elements: list[PageElement] = []
+    body: list[PageElement] = []
+    headers: list[PageElement] = []
 class ItemAndImageEnrichmentElement(BaseModel):
@@ -280,12 +280,12 @@ class Page(BaseModel):
         None  # Internal PDF backend. By default it is cleared during assembling.
     )
     _default_image_scale: float = 1.0  # Default image scale for external usage.
-    _image_cache: Dict[
+    _image_cache: dict[
         float, Image
     ] = {}  # Cache of images in different scales. By default it is cleared during assembling.
     @property
-    def cells(self) -> List[TextCell]:
+    def cells(self) -> list[TextCell]:
         """Return text cells as a read-only view of parsed_page.textline_cells."""
         if self.parsed_page is not None:
             return self.parsed_page.textline_cells
@@ -354,7 +354,7 @@ class OpenAiApiResponse(BaseModel):
     id: str
     model: Optional[str] = None  # returned by openai
-    choices: List[OpenAiResponseChoice]
+    choices: list[OpenAiResponseChoice]
     created: int
     usage: OpenAiResponseUsage
@@ -430,7 +430,7 @@ class PageConfidenceScores(BaseModel):
 class ConfidenceReport(PageConfidenceScores):
-    pages: Dict[int, PageConfidenceScores] = Field(
+    pages: dict[int, PageConfidenceScores] = Field(
         default_factory=lambda: defaultdict(PageConfidenceScores)
     )

docling/datamodel/document.py CHANGED Viewed

@@ -394,6 +394,8 @@ class _DocumentConversionInput(BaseModel):
             mime = FormatToMimeType[InputFormat.PPTX][0]
         elif ext in FormatToExtensions[InputFormat.XLSX]:
             mime = FormatToMimeType[InputFormat.XLSX][0]
+        elif ext in FormatToExtensions[InputFormat.VTT]:
+            mime = FormatToMimeType[InputFormat.VTT][0]
         return mime

docling/datamodel/pipeline_options_vlm_model.py CHANGED Viewed

@@ -1,11 +1,13 @@
 from enum import Enum
-from typing import Any, Dict, List, Literal, Optional
+from typing import Any, Dict, List, Literal, Optional, Union
 from docling_core.types.doc.page import SegmentedPage
-from pydantic import AnyUrl, BaseModel
+from pydantic import AnyUrl, BaseModel, ConfigDict
+from transformers import StoppingCriteria
 from typing_extensions import deprecated
 from docling.datamodel.accelerator_options import AcceleratorDevice
+from docling.models.utils.generation_utils import GenerationStopper
 class BaseVlmOptions(BaseModel):
@@ -50,9 +52,12 @@ class TransformersPromptStyle(str, Enum):
 class InlineVlmOptions(BaseVlmOptions):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
     kind: Literal["inline_model_options"] = "inline_model_options"
     repo_id: str
+    revision: str = "main"
     trust_remote_code: bool = False
     load_in_8bit: bool = True
     llm_int8_threshold: float = 6.0
@@ -71,6 +76,7 @@ class InlineVlmOptions(BaseVlmOptions):
     ]
     stop_strings: List[str] = []
+    custom_stopping_criteria: List[Union[StoppingCriteria, GenerationStopper]] = []
     extra_generation_config: Dict[str, Any] = {}
     extra_processor_kwargs: Dict[str, Any] = {}
@@ -88,6 +94,8 @@ class HuggingFaceVlmOptions(InlineVlmOptions):
 class ApiVlmOptions(BaseVlmOptions):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
     kind: Literal["api_model_options"] = "api_model_options"
     url: AnyUrl = AnyUrl(
@@ -98,3 +106,6 @@ class ApiVlmOptions(BaseVlmOptions):
     timeout: float = 60
     concurrency: int = 1
     response_format: ResponseFormat
+    stop_strings: List[str] = []
+    custom_stopping_criteria: List[Union[GenerationStopper]] = []

docling/datamodel/vlm_model_specs.py CHANGED Viewed

@@ -29,12 +29,20 @@ GRANITEDOCLING_TRANSFORMERS = InlineVlmOptions(
         AcceleratorDevice.CPU,
         AcceleratorDevice.CUDA,
     ],
+    extra_generation_config=dict(skip_special_tokens=False),
     scale=2.0,
     temperature=0.0,
     max_new_tokens=8192,
     stop_strings=["</doctag>", "<|end_of_text|>"],
 )
+GRANITEDOCLING_VLLM = GRANITEDOCLING_TRANSFORMERS.model_copy()
+GRANITEDOCLING_VLLM.inference_framework = InferenceFramework.VLLM
+GRANITEDOCLING_VLLM.revision = (
+    "untied"  # change back to "main" with next vllm relase after 0.10.2
+)
 GRANITEDOCLING_MLX = InlineVlmOptions(
     repo_id="ibm-granite/granite-docling-258M-mlx",
     prompt="Convert this page to docling.",
@@ -302,3 +310,4 @@ class VlmModelType(str, Enum):
     GRANITE_VISION_OLLAMA = "granite_vision_ollama"
     GOT_OCR_2 = "got_ocr_2"
     GRANITEDOCLING = "granite_docling"
+    GRANITEDOCLING_VLLM = "granite_docling_vllm"

docling/document_converter.py CHANGED Viewed

@@ -25,6 +25,7 @@ from docling.backend.msexcel_backend import MsExcelDocumentBackend
 from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
 from docling.backend.msword_backend import MsWordDocumentBackend
 from docling.backend.noop_backend import NoOpBackend
+from docling.backend.webvtt_backend import WebVTTDocumentBackend
 from docling.backend.xml.jats_backend import JatsDocumentBackend
 from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
 from docling.datamodel.base_models import (
@@ -170,6 +171,9 @@ def _get_default_option(format: InputFormat) -> FormatOption:
             pipeline_cls=SimplePipeline, backend=DoclingJSONBackend
         ),
         InputFormat.AUDIO: FormatOption(pipeline_cls=AsrPipeline, backend=NoOpBackend),
+        InputFormat.VTT: FormatOption(
+            pipeline_cls=SimplePipeline, backend=WebVTTDocumentBackend
+        ),
     }
     if (options := format_to_default_options.get(format)) is not None:
         return options

docling/models/api_vlm_model.py CHANGED Viewed

@@ -1,12 +1,18 @@
 from collections.abc import Iterable
 from concurrent.futures import ThreadPoolExecutor
+from transformers import StoppingCriteria
 from docling.datamodel.base_models import Page, VlmPrediction
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options_vlm_model import ApiVlmOptions
 from docling.exceptions import OperationNotAllowed
 from docling.models.base_model import BasePageModel
-from docling.utils.api_image_request import api_image_request
+from docling.models.utils.generation_utils import GenerationStopper
+from docling.utils.api_image_request import (
+    api_image_request,
+    api_image_request_streaming,
+)
 from docling.utils.profiling import TimeRecorder
@@ -41,19 +47,43 @@ class ApiVlmModel(BasePageModel):
             assert page._backend is not None
             if not page._backend.is_valid():
                 return page
-            else:
-                with TimeRecorder(conv_res, "vlm"):
-                    assert page.size is not None
-                    hi_res_image = page.get_image(
-                        scale=self.vlm_options.scale, max_size=self.vlm_options.max_size
-                    )
-                    assert hi_res_image is not None
-                    if hi_res_image:
-                        if hi_res_image.mode != "RGB":
-                            hi_res_image = hi_res_image.convert("RGB")
+            with TimeRecorder(conv_res, "vlm"):
+                assert page.size is not None
+                hi_res_image = page.get_image(
+                    scale=self.vlm_options.scale, max_size=self.vlm_options.max_size
+                )
+                assert hi_res_image is not None
+                if hi_res_image and hi_res_image.mode != "RGB":
+                    hi_res_image = hi_res_image.convert("RGB")
-                    prompt = self.vlm_options.build_prompt(page.parsed_page)
+                prompt = self.vlm_options.build_prompt(page.parsed_page)
+                if self.vlm_options.custom_stopping_criteria:
+                    # Instantiate any GenerationStopper classes before passing to streaming
+                    instantiated_stoppers = []
+                    for criteria in self.vlm_options.custom_stopping_criteria:
+                        if isinstance(criteria, GenerationStopper):
+                            instantiated_stoppers.append(criteria)
+                        elif isinstance(criteria, type) and issubclass(
+                            criteria, GenerationStopper
+                        ):
+                            instantiated_stoppers.append(criteria())
+                        # Skip non-GenerationStopper criteria (should have been caught in validation)
+                    # Streaming path with early abort support
+                    page_tags = api_image_request_streaming(
+                        image=hi_res_image,
+                        prompt=prompt,
+                        url=self.vlm_options.url,
+                        timeout=self.timeout,
+                        headers=self.vlm_options.headers,
+                        generation_stoppers=instantiated_stoppers,
+                        **self.params,
+                    )
+                else:
+                    # Non-streaming fallback (existing behavior)
                     page_tags = api_image_request(
                         image=hi_res_image,
                         prompt=prompt,
@@ -63,10 +93,9 @@ class ApiVlmModel(BasePageModel):
                         **self.params,
                     )
-                    page_tags = self.vlm_options.decode_response(page_tags)
-                    page.predictions.vlm_response = VlmPrediction(text=page_tags)
-                return page
+                page_tags = self.vlm_options.decode_response(page_tags)
+                page.predictions.vlm_response = VlmPrediction(text=page_tags)
+            return page
         with ThreadPoolExecutor(max_workers=self.concurrency) as executor:
             yield from executor.map(_vlm_request, page_batch)

docling/models/base_model.py CHANGED Viewed

@@ -88,7 +88,8 @@ class BaseVlmPageModel(BasePageModel, BaseVlmModel):
         if self.vlm_options.transformers_prompt_style == TransformersPromptStyle.RAW:
             return user_prompt
+        elif self.vlm_options.transformers_prompt_style == TransformersPromptStyle.NONE:
+            return ""
         elif self.vlm_options.repo_id == "microsoft/Phi-4-multimodal-instruct":
             _log.debug("Using specialized prompt for Phi-4")
             # Note: This might need adjustment for VLLM vs transformers

docling/models/readingorder_model.py CHANGED Viewed

@@ -103,7 +103,7 @@ class ReadingOrderModel:
             else:
                 doc.add_text(parent=doc_item, label=c_label, text=c_text, prov=c_prov)
-    def _readingorder_elements_to_docling_doc(  # noqa: C901
+    def _readingorder_elements_to_docling_doc(
         self,
         conv_res: ConversionResult,
         ro_elements: List[ReadingOrderPageElement],

docling/models/table_structure_model.py CHANGED Viewed

@@ -121,7 +121,7 @@ class TableStructureModel(BasePageModel):
         for table_element in tbl_list:
             x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple()
-            y0 *= scale_x
+            y0 *= scale_y
             y1 *= scale_y
             x0 *= scale_x
             x1 *= scale_x
@@ -132,7 +132,7 @@ class TableStructureModel(BasePageModel):
                 x0, y0, x1, y1 = cell.rect.to_bounding_box().as_tuple()
                 x0 *= scale_x
                 x1 *= scale_x
-                y0 *= scale_x
+                y0 *= scale_y
                 y1 *= scale_y
                 draw.rectangle([(x0, y0), (x1, y1)], outline="green")
@@ -142,7 +142,7 @@ class TableStructureModel(BasePageModel):
                     x0, y0, x1, y1 = tc.bbox.as_tuple()
                     x0 *= scale_x
                     x1 *= scale_x
-                    y0 *= scale_x
+                    y0 *= scale_y
                     y1 *= scale_y
                     if tc.column_header:

docling 2.53.0__py3-none-any.whl → 2.55.0__py3-none-any.whl

docling 2.53.0py3-none-any.whl → 2.55.0py3-none-any.whl