PyPI - docling - Versions diffs - 2.29.0__py3-none-any.whl → 2.30.0__py3-none-any.whl - Mend

docling 2.29.0py3-none-any.whl → 2.30.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

docling/backend/msexcel_backend.py +272 -90
docling/backend/msword_backend.py +20 -12
docling/cli/main.py +20 -2
docling/datamodel/base_models.py +33 -0
docling/datamodel/document.py +7 -0
docling/datamodel/pipeline_options.py +29 -3
docling/models/api_vlm_model.py +67 -0
docling/models/picture_description_api_model.py +8 -75
docling/models/picture_description_base_model.py +14 -2
docling/pipeline/standard_pdf_pipeline.py +6 -2
docling/pipeline/vlm_pipeline.py +27 -17
docling/utils/api_image_request.py +61 -0
{docling-2.29.0.dist-info → docling-2.30.0.dist-info}/METADATA +3 -3
{docling-2.29.0.dist-info → docling-2.30.0.dist-info}/RECORD +17 -15
{docling-2.29.0.dist-info → docling-2.30.0.dist-info}/LICENSE +0 -0
{docling-2.29.0.dist-info → docling-2.30.0.dist-info}/WHEEL +0 -0
{docling-2.29.0.dist-info → docling-2.30.0.dist-info}/entry_points.txt +0 -0

docling/backend/msexcel_backend.py CHANGED Viewed

@@ -1,36 +1,50 @@
 import logging
 from io import BytesIO
 from pathlib import Path
-from typing import Dict, Set, Tuple, Union
+from typing import Any, Union, cast
 from docling_core.types.doc import (
+    BoundingBox,
+    CoordOrigin,
+    DocItem,
     DoclingDocument,
     DocumentOrigin,
     GroupLabel,
     ImageRef,
+    ProvenanceItem,
+    Size,
     TableCell,
     TableData,
 )
-# from lxml import etree
-from openpyxl import Workbook, load_workbook
-from openpyxl.cell.cell import Cell
+from openpyxl import load_workbook
 from openpyxl.drawing.image import Image
+from openpyxl.drawing.spreadsheet_drawing import TwoCellAnchor
 from openpyxl.worksheet.worksheet import Worksheet
+from PIL import Image as PILImage
+from pydantic import BaseModel, NonNegativeInt, PositiveInt
+from typing_extensions import override
-from docling.backend.abstract_backend import DeclarativeDocumentBackend
+from docling.backend.abstract_backend import (
+    DeclarativeDocumentBackend,
+    PaginatedDocumentBackend,
+)
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import InputDocument
 _log = logging.getLogger(__name__)
-from typing import Any, List
-from PIL import Image as PILImage
-from pydantic import BaseModel
+class ExcelCell(BaseModel):
+    """Represents an Excel cell.
+    Attributes:
+        row: The row number of the cell.
+        col: The column number of the cell.
+        text: The text content of the cell.
+        row_span: The number of rows the cell spans.
+        col_span: The number of columns the cell spans.
+    """
-class ExcelCell(BaseModel):
     row: int
     col: int
     text: str
@@ -39,19 +53,57 @@ class ExcelCell(BaseModel):
 class ExcelTable(BaseModel):
+    """Represents an Excel table on a worksheet.
+    Attributes:
+        anchor: The column and row indices of the upper-left cell of the table
+        (0-based index).
+        num_rows: The number of rows in the table.
+        num_cols: The number of columns in the table.
+        data: The data in the table, represented as a list of ExcelCell objects.
+    """
+    anchor: tuple[NonNegativeInt, NonNegativeInt]
     num_rows: int
     num_cols: int
-    data: List[ExcelCell]
+    data: list[ExcelCell]
-class MsExcelDocumentBackend(DeclarativeDocumentBackend):
-    def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
+class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBackend):
+    """Backend for parsing Excel workbooks.
+    The backend converts an Excel workbook into a DoclingDocument object.
+    Each worksheet is converted into a separate page.
+    The following elements are parsed:
+    - Cell contents, parsed as tables. If two groups of cells are disconnected
+    between each other, they will be parsed as two different tables.
+    - Images, parsed as PictureItem objects.
+    The DoclingDocument tables and pictures have their provenance information, including
+    the position in their original Excel worksheet. The position is represented by a
+    bounding box object with the cell indices as units (0-based index). The size of this
+    bounding box is the number of columns and rows that the table or picture spans.
+    """
+    @override
+    def __init__(
+        self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]
+    ) -> None:
+        """Initialize the MsExcelDocumentBackend object.
+        Parameters:
+            in_doc: The input document object.
+            path_or_stream: The path or stream to the Excel file.
+        Raises:
+            RuntimeError: An error occurred parsing the file.
+        """
         super().__init__(in_doc, path_or_stream)
         # Initialise the parents for the hierarchy
         self.max_levels = 10
-        self.parents: Dict[int, Any] = {}
+        self.parents: dict[int, Any] = {}
         for i in range(-1, self.max_levels):
             self.parents[i] = None
@@ -63,35 +115,47 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
             elif isinstance(self.path_or_stream, Path):
                 self.workbook = load_workbook(filename=str(self.path_or_stream))
-            self.valid = True
+            self.valid = self.workbook is not None
         except Exception as e:
             self.valid = False
             raise RuntimeError(
-                f"MsPowerpointDocumentBackend could not load document with hash {self.document_hash}"
+                f"MsExcelDocumentBackend could not load document with hash {self.document_hash}"
             ) from e
+    @override
     def is_valid(self) -> bool:
-        _log.info(f"valid: {self.valid}")
+        _log.debug(f"valid: {self.valid}")
         return self.valid
     @classmethod
+    @override
     def supports_pagination(cls) -> bool:
         return True
-    def unload(self):
-        if isinstance(self.path_or_stream, BytesIO):
-            self.path_or_stream.close()
-        self.path_or_stream = None
+    @override
+    def page_count(self) -> int:
+        if self.is_valid() and self.workbook:
+            return len(self.workbook.sheetnames)
+        else:
+            return 0
     @classmethod
-    def supported_formats(cls) -> Set[InputFormat]:
+    @override
+    def supported_formats(cls) -> set[InputFormat]:
         return {InputFormat.XLSX}
+    @override
     def convert(self) -> DoclingDocument:
-        # Parses the XLSX into a structured document model.
+        """Parse the Excel workbook into a DoclingDocument object.
+        Raises:
+            RuntimeError: Unable to run the conversion since the backend object failed to
+            initialize.
+        Returns:
+            The DoclingDocument object representing the Excel workbook.
+        """
         origin = DocumentOrigin(
             filename=self.file.name or "file.xlsx",
             mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
@@ -110,6 +174,14 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
         return doc
     def _convert_workbook(self, doc: DoclingDocument) -> DoclingDocument:
+        """Parse the Excel workbook and attach its structure to a DoclingDocument.
+        Args:
+            doc: A DoclingDocument object.
+        Returns:
+            A DoclingDocument object with the parsed items.
+        """
         if self.workbook is not None:
@@ -117,22 +189,34 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
             for sheet_name in self.workbook.sheetnames:
                 _log.info(f"Processing sheet: {sheet_name}")
-                # Access the sheet by name
                 sheet = self.workbook[sheet_name]
+                page_no = self.workbook.index(sheet) + 1
+                # do not rely on sheet.max_column, sheet.max_row if there are images
+                page = doc.add_page(page_no=page_no, size=Size(width=0, height=0))
                 self.parents[0] = doc.add_group(
                     parent=None,
                     label=GroupLabel.SECTION,
                     name=f"sheet: {sheet_name}",
                 )
                 doc = self._convert_sheet(doc, sheet)
+                width, height = self._find_page_size(doc, page_no)
+                page.size = Size(width=width, height=height)
         else:
             _log.error("Workbook is not initialized.")
         return doc
-    def _convert_sheet(self, doc: DoclingDocument, sheet: Worksheet):
+    def _convert_sheet(self, doc: DoclingDocument, sheet: Worksheet) -> DoclingDocument:
+        """Parse an Excel worksheet and attach its structure to a DoclingDocument
+        Args:
+            doc: The DoclingDocument to be updated.
+            sheet: The Excel worksheet to be parsed.
+        Returns:
+            The updated DoclingDocument.
+        """
         doc = self._find_tables_in_sheet(doc, sheet)
@@ -140,47 +224,81 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
         return doc
-    def _find_tables_in_sheet(self, doc: DoclingDocument, sheet: Worksheet):
-        tables = self._find_data_tables(sheet)
+    def _find_tables_in_sheet(
+        self, doc: DoclingDocument, sheet: Worksheet
+    ) -> DoclingDocument:
+        """Find all tables in an Excel sheet and attach them to a DoclingDocument.
-        for excel_table in tables:
-            num_rows = excel_table.num_rows
-            num_cols = excel_table.num_cols
+        Args:
+            doc: The DoclingDocument to be updated.
+            sheet: The Excel worksheet to be parsed.
-            table_data = TableData(
-                num_rows=num_rows,
-                num_cols=num_cols,
-                table_cells=[],
-            )
+        Returns:
+            The updated DoclingDocument.
+        """
-            for excel_cell in excel_table.data:
-                cell = TableCell(
-                    text=excel_cell.text,
-                    row_span=excel_cell.row_span,
-                    col_span=excel_cell.col_span,
-                    start_row_offset_idx=excel_cell.row,
-                    end_row_offset_idx=excel_cell.row + excel_cell.row_span,
-                    start_col_offset_idx=excel_cell.col,
-                    end_col_offset_idx=excel_cell.col + excel_cell.col_span,
-                    column_header=excel_cell.row == 0,
-                    row_header=False,
+        if self.workbook is not None:
+            tables = self._find_data_tables(sheet)
+            for excel_table in tables:
+                origin_col = excel_table.anchor[0]
+                origin_row = excel_table.anchor[1]
+                num_rows = excel_table.num_rows
+                num_cols = excel_table.num_cols
+                table_data = TableData(
+                    num_rows=num_rows,
+                    num_cols=num_cols,
+                    table_cells=[],
                 )
-                table_data.table_cells.append(cell)
-            doc.add_table(data=table_data, parent=self.parents[0])
+                for excel_cell in excel_table.data:
+                    cell = TableCell(
+                        text=excel_cell.text,
+                        row_span=excel_cell.row_span,
+                        col_span=excel_cell.col_span,
+                        start_row_offset_idx=excel_cell.row,
+                        end_row_offset_idx=excel_cell.row + excel_cell.row_span,
+                        start_col_offset_idx=excel_cell.col,
+                        end_col_offset_idx=excel_cell.col + excel_cell.col_span,
+                        column_header=excel_cell.row == 0,
+                        row_header=False,
+                    )
+                    table_data.table_cells.append(cell)
+                page_no = self.workbook.index(sheet) + 1
+                doc.add_table(
+                    data=table_data,
+                    parent=self.parents[0],
+                    prov=ProvenanceItem(
+                        page_no=page_no,
+                        charspan=(0, 0),
+                        bbox=BoundingBox.from_tuple(
+                            (
+                                origin_col,
+                                origin_row,
+                                origin_col + num_cols,
+                                origin_row + num_rows,
+                            ),
+                            origin=CoordOrigin.TOPLEFT,
+                        ),
+                    ),
+                )
         return doc
-    def _find_data_tables(self, sheet: Worksheet) -> List[ExcelTable]:
-        """
-        Find all compact rectangular data tables in a sheet.
-        """
-        # _log.info("find_data_tables")
+    def _find_data_tables(self, sheet: Worksheet) -> list[ExcelTable]:
+        """Find all compact rectangular data tables in an Excel worksheet.
+        Args:
+            sheet: The Excel worksheet to be parsed.
-        tables = []  # List to store found tables
-        visited: set[Tuple[int, int]] = set()  # Track already visited cells
+        Returns:
+            A list of ExcelTable objects representing the data tables.
+        """
+        tables: list[ExcelTable] = []  # List to store found tables
+        visited: set[tuple[int, int]] = set()  # Track already visited cells
         # Iterate over all cells in the sheet
         for ri, row in enumerate(sheet.iter_rows(values_only=False)):
@@ -191,9 +309,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
                     continue
                 # If the cell starts a new table, find its bounds
-                table_bounds, visited_cells = self._find_table_bounds(
-                    sheet, ri, rj, visited
-                )
+                table_bounds, visited_cells = self._find_table_bounds(sheet, ri, rj)
                 visited.update(visited_cells)  # Mark these cells as visited
                 tables.append(table_bounds)
@@ -205,22 +321,25 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
         sheet: Worksheet,
         start_row: int,
         start_col: int,
-        visited: set[Tuple[int, int]],
-    ):
-        """
-        Determine the bounds of a compact rectangular table.
+    ) -> tuple[ExcelTable, set[tuple[int, int]]]:
+        """Determine the bounds of a compact rectangular table.
+        Args:
+            sheet: The Excel worksheet to be parsed.
+            start_row: The row number of the starting cell.
+            start_col: The column number of the starting cell.
         Returns:
-        - A dictionary with the bounds and data.
-        - A set of visited cell coordinates.
+            A tuple with an Excel table and a set of cell coordinates.
         """
-        _log.info("find_table_bounds")
+        _log.debug("find_table_bounds")
         max_row = self._find_table_bottom(sheet, start_row, start_col)
         max_col = self._find_table_right(sheet, start_row, start_col)
         # Collect the data within the bounds
         data = []
-        visited_cells = set()
+        visited_cells: set[tuple[int, int]] = set()
         for ri in range(start_row, max_row + 1):
             for rj in range(start_col, max_col + 1):
@@ -230,7 +349,6 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
                 row_span = 1
                 col_span = 1
-                # _log.info(sheet.merged_cells.ranges)
                 for merged_range in sheet.merged_cells.ranges:
                     if (
@@ -254,7 +372,6 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
                             col_span=col_span,
                         )
                     )
-                    # _log.info(f"cell: {ri}, {rj} -> {ri - start_row}, {rj - start_col}, {row_span}, {col_span}: {str(cell.value)}")
                     # Mark all cells in the span as visited
                     for span_row in range(ri, ri + row_span):
@@ -263,6 +380,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
         return (
             ExcelTable(
+                anchor=(start_col, start_row),
                 num_rows=max_row + 1 - start_row,
                 num_cols=max_col + 1 - start_col,
                 data=data,
@@ -270,10 +388,20 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
             visited_cells,
         )
-    def _find_table_bottom(self, sheet: Worksheet, start_row: int, start_col: int):
-        """Function to find the bottom boundary of the table"""
+    def _find_table_bottom(
+        self, sheet: Worksheet, start_row: int, start_col: int
+    ) -> int:
+        """Find the bottom boundary of a table.
-        max_row = start_row
+        Args:
+            sheet: The Excel worksheet to be parsed.
+            start_row: The starting row of the table.
+            start_col: The starting column of the table.
+        Returns:
+            The row index representing the bottom boundary of the table.
+        """
+        max_row: int = start_row
         while max_row < sheet.max_row - 1:
             # Get the cell value or check if it is part of a merged cell
@@ -296,10 +424,20 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
         return max_row
-    def _find_table_right(self, sheet: Worksheet, start_row: int, start_col: int):
-        """Function to find the right boundary of the table"""
+    def _find_table_right(
+        self, sheet: Worksheet, start_row: int, start_col: int
+    ) -> int:
+        """Find the right boundary of a table.
+        Args:
+            sheet: The Excel worksheet to be parsed.
+            start_row: The starting row of the table.
+            start_col: The starting column of the table.
-        max_col = start_col
+        Returns:
+            The column index representing the right boundary of the table."
+        """
+        max_col: int = start_col
         while max_col < sheet.max_column - 1:
             # Get the cell value or check if it is part of a merged cell
@@ -325,19 +463,63 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
     def _find_images_in_sheet(
         self, doc: DoclingDocument, sheet: Worksheet
     ) -> DoclingDocument:
+        """Find images in the Excel sheet and attach them to the DoclingDocument.
-        # Iterate over byte images in the sheet
-        for idx, image in enumerate(sheet._images):  # type: ignore
+        Args:
+            doc: The DoclingDocument to be updated.
+            sheet: The Excel worksheet to be parsed.
-            try:
-                pil_image = PILImage.open(image.ref)
-                doc.add_picture(
-                    parent=self.parents[0],
-                    image=ImageRef.from_pil(image=pil_image, dpi=72),
-                    caption=None,
-                )
-            except:
-                _log.error("could not extract the image from excel sheets")
+        Returns:
+            The updated DoclingDocument.
+        """
+        if self.workbook is not None:
+            # Iterate over byte images in the sheet
+            for item in sheet._images:  # type: ignore[attr-defined]
+                try:
+                    image: Image = cast(Image, item)
+                    pil_image = PILImage.open(image.ref)  # type: ignore[arg-type]
+                    page_no = self.workbook.index(sheet) + 1
+                    anchor = (0, 0, 0, 0)
+                    if isinstance(image.anchor, TwoCellAnchor):
+                        anchor = (
+                            image.anchor._from.col,
+                            image.anchor._from.row,
+                            image.anchor.to.col + 1,
+                            image.anchor.to.row + 1,
+                        )
+                    doc.add_picture(
+                        parent=self.parents[0],
+                        image=ImageRef.from_pil(image=pil_image, dpi=72),
+                        caption=None,
+                        prov=ProvenanceItem(
+                            page_no=page_no,
+                            charspan=(0, 0),
+                            bbox=BoundingBox.from_tuple(
+                                anchor, origin=CoordOrigin.TOPLEFT
+                            ),
+                        ),
+                    )
+                except:
+                    _log.error("could not extract the image from excel sheets")
         return doc
+    @staticmethod
+    def _find_page_size(
+        doc: DoclingDocument, page_no: PositiveInt
+    ) -> tuple[float, float]:
+        left: float = -1.0
+        top: float = -1.0
+        right: float = -1.0
+        bottom: float = -1.0
+        for item, _ in doc.iterate_items(traverse_pictures=True, page_no=page_no):
+            if not isinstance(item, DocItem):
+                continue
+            for provenance in item.prov:
+                bbox = provenance.bbox
+                left = min(left, bbox.l) if left != -1 else bbox.l
+                right = max(right, bbox.r) if right != -1 else bbox.r
+                top = min(top, bbox.t) if top != -1 else bbox.t
+                bottom = max(bottom, bbox.b) if bottom != -1 else bbox.b
+        return (right - left, bottom - top)

docling/backend/msword_backend.py CHANGED Viewed

@@ -850,7 +850,8 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
     def _handle_pictures(
         self, docx_obj: DocxDocument, drawing_blip: Any, doc: DoclingDocument
     ) -> None:
-        def get_docx_image(drawing_blip):
+        def get_docx_image(drawing_blip: Any) -> Optional[bytes]:
+            image_data: Optional[bytes] = None
             rId = drawing_blip[0].get(
                 "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
             )
@@ -862,19 +863,26 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
         level = self._get_level()
         # Open the BytesIO object with PIL to create an Image
-        try:
-            image_data = get_docx_image(drawing_blip)
-            image_bytes = BytesIO(image_data)
-            pil_image = Image.open(image_bytes)
-            doc.add_picture(
-                parent=self.parents[level - 1],
-                image=ImageRef.from_pil(image=pil_image, dpi=72),
-                caption=None,
-            )
-        except (UnidentifiedImageError, OSError) as e:
-            _log.warning("Warning: image cannot be loaded by Pillow")
+        image_data: Optional[bytes] = get_docx_image(drawing_blip)
+        if image_data is None:
+            _log.warning("Warning: image cannot be found")
             doc.add_picture(
                 parent=self.parents[level - 1],
                 caption=None,
             )
+        else:
+            try:
+                image_bytes = BytesIO(image_data)
+                pil_image = Image.open(image_bytes)
+                doc.add_picture(
+                    parent=self.parents[level - 1],
+                    image=ImageRef.from_pil(image=pil_image, dpi=72),
+                    caption=None,
+                )
+            except (UnidentifiedImageError, OSError) as e:
+                _log.warning("Warning: image cannot be loaded by Pillow")
+                doc.add_picture(
+                    parent=self.parents[level - 1],
+                    caption=None,
+                )
         return

docling/cli/main.py CHANGED Viewed

@@ -40,6 +40,7 @@ from docling.datamodel.pipeline_options import (
     VlmModelType,
     VlmPipelineOptions,
     granite_vision_vlm_conversion_options,
+    granite_vision_vlm_ollama_conversion_options,
     smoldocling_vlm_conversion_options,
     smoldocling_vlm_mlx_conversion_options,
 )
@@ -153,6 +154,7 @@ def export_documents(
     output_dir: Path,
     export_json: bool,
     export_html: bool,
+    export_html_split_page: bool,
     export_md: bool,
     export_txt: bool,
     export_doctags: bool,
@@ -180,7 +182,15 @@ def export_documents(
                 fname = output_dir / f"{doc_filename}.html"
                 _log.info(f"writing HTML output to {fname}")
                 conv_res.document.save_as_html(
-                    filename=fname, image_mode=image_export_mode
+                    filename=fname, image_mode=image_export_mode, split_page_view=False
+                )
+            # Export HTML format:
+            if export_html_split_page:
+                fname = output_dir / f"{doc_filename}.html"
+                _log.info(f"writing HTML output to {fname}")
+                conv_res.document.save_as_html(
+                    filename=fname, image_mode=image_export_mode, split_page_view=True
                 )
             # Export Text format:
@@ -471,6 +481,7 @@ def convert(
         export_json = OutputFormat.JSON in to_formats
         export_html = OutputFormat.HTML in to_formats
+        export_html_split_page = OutputFormat.HTML_SPLIT_PAGE in to_formats
         export_md = OutputFormat.MARKDOWN in to_formats
         export_txt = OutputFormat.TEXT in to_formats
         export_doctags = OutputFormat.DOCTAGS in to_formats
@@ -531,10 +542,16 @@ def convert(
                 backend=backend,  # pdf_backend
             )
         elif pipeline == PdfPipeline.VLM:
-            pipeline_options = VlmPipelineOptions()
+            pipeline_options = VlmPipelineOptions(
+                enable_remote_services=enable_remote_services,
+            )
             if vlm_model == VlmModelType.GRANITE_VISION:
                 pipeline_options.vlm_options = granite_vision_vlm_conversion_options
+            elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA:
+                pipeline_options.vlm_options = (
+                    granite_vision_vlm_ollama_conversion_options
+                )
             elif vlm_model == VlmModelType.SMOLDOCLING:
                 pipeline_options.vlm_options = smoldocling_vlm_conversion_options
                 if sys.platform == "darwin":
@@ -578,6 +595,7 @@ def convert(
             output_dir=output,
             export_json=export_json,
             export_html=export_html,
+            export_html_split_page=export_html_split_page,
             export_md=export_md,
             export_txt=export_txt,
             export_doctags=export_doctags,

docling/datamodel/base_models.py CHANGED Viewed

@@ -50,6 +50,7 @@ class OutputFormat(str, Enum):
     MARKDOWN = "md"
     JSON = "json"
     HTML = "html"
+    HTML_SPLIT_PAGE = "html_split_page"
     TEXT = "text"
     DOCTAGS = "doctags"
@@ -262,3 +263,35 @@ class Page(BaseModel):
     @property
     def image(self) -> Optional[Image]:
         return self.get_image(scale=self._default_image_scale)
+## OpenAI API Request / Response Models ##
+class OpenAiChatMessage(BaseModel):
+    role: str
+    content: str
+class OpenAiResponseChoice(BaseModel):
+    index: int
+    message: OpenAiChatMessage
+    finish_reason: str
+class OpenAiResponseUsage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+class OpenAiApiResponse(BaseModel):
+    model_config = ConfigDict(
+        protected_namespaces=(),
+    )
+    id: str
+    model: Optional[str] = None  # returned by openai
+    choices: List[OpenAiResponseChoice]
+    created: int
+    usage: OpenAiResponseUsage

docling/datamodel/document.py CHANGED Viewed

@@ -283,6 +283,13 @@ class _DocumentConversionInput(BaseModel):
             if mime is None:  # must guess from
                 with obj.open("rb") as f:
                     content = f.read(1024)  # Read first 1KB
+            if mime is not None and mime.lower() == "application/zip":
+                if obj.suffixes[-1].lower() == ".xlsx":
+                    mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+                elif obj.suffixes[-1].lower() == ".docx":
+                    mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+                elif obj.suffixes[-1].lower() == ".pptx":
+                    mime = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
         elif isinstance(obj, DocumentStream):
             content = obj.stream.read(8192)

docling/datamodel/pipeline_options.py CHANGED Viewed

@@ -213,8 +213,8 @@ class PictureDescriptionBaseOptions(BaseOptions):
     batch_size: int = 8
     scale: float = 2
-    bitmap_area_threshold: float = (
-        0.2  # percentage of the area for a bitmap to processed with the models
+    picture_area_threshold: float = (
+        0.05  # percentage of the area for a picture to processed with the models
     )
@@ -266,6 +266,7 @@ class ResponseFormat(str, Enum):
 class InferenceFramework(str, Enum):
     MLX = "mlx"
     TRANSFORMERS = "transformers"
+    OPENAI = "openai"
 class HuggingFaceVlmOptions(BaseVlmOptions):
@@ -284,6 +285,19 @@ class HuggingFaceVlmOptions(BaseVlmOptions):
         return self.repo_id.replace("/", "--")
+class ApiVlmOptions(BaseVlmOptions):
+    kind: Literal["api_model_options"] = "api_model_options"
+    url: AnyUrl = AnyUrl(
+        "http://localhost:11434/v1/chat/completions"
+    )  # Default to ollama
+    headers: Dict[str, str] = {}
+    params: Dict[str, Any] = {}
+    scale: float = 2.0
+    timeout: float = 60
+    response_format: ResponseFormat
 smoldocling_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
     repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
     prompt="Convert this page to docling.",
@@ -307,10 +321,20 @@ granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
     inference_framework=InferenceFramework.TRANSFORMERS,
 )
+granite_vision_vlm_ollama_conversion_options = ApiVlmOptions(
+    url=AnyUrl("http://localhost:11434/v1/chat/completions"),
+    params={"model": "granite3.2-vision:2b"},
+    prompt="OCR the full page to markdown.",
+    scale=1.0,
+    timeout=120,
+    response_format=ResponseFormat.MARKDOWN,
+)
 class VlmModelType(str, Enum):
     SMOLDOCLING = "smoldocling"
     GRANITE_VISION = "granite_vision"
+    GRANITE_VISION_OLLAMA = "granite_vision_ollama"
 # Define an enum for the backend options
@@ -362,7 +386,9 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
         False  # (To be used with vlms, or other generative models)
     )
     # If True, text from backend will be used instead of generated text
-    vlm_options: Union[HuggingFaceVlmOptions] = smoldocling_vlm_conversion_options
+    vlm_options: Union[HuggingFaceVlmOptions, ApiVlmOptions] = (
+        smoldocling_vlm_conversion_options
+    )
 class PdfPipelineOptions(PaginatedPipelineOptions):

docling/models/api_vlm_model.py ADDED Viewed

@@ -0,0 +1,67 @@
+from typing import Iterable
+from docling.datamodel.base_models import Page, VlmPrediction
+from docling.datamodel.document import ConversionResult
+from docling.datamodel.pipeline_options import ApiVlmOptions
+from docling.exceptions import OperationNotAllowed
+from docling.models.base_model import BasePageModel
+from docling.utils.api_image_request import api_image_request
+from docling.utils.profiling import TimeRecorder
+class ApiVlmModel(BasePageModel):
+    def __init__(
+        self,
+        enabled: bool,
+        enable_remote_services: bool,
+        vlm_options: ApiVlmOptions,
+    ):
+        self.enabled = enabled
+        self.vlm_options = vlm_options
+        if self.enabled:
+            if not enable_remote_services:
+                raise OperationNotAllowed(
+                    "Connections to remote services is only allowed when set explicitly. "
+                    "pipeline_options.enable_remote_services=True, or using the CLI "
+                    "--enable-remote-services."
+                )
+            self.timeout = self.vlm_options.timeout
+            self.prompt_content = (
+                f"This is a page from a document.\n{self.vlm_options.prompt}"
+            )
+            self.params = {
+                **self.vlm_options.params,
+                "temperature": 0,
+            }
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
+        for page in page_batch:
+            assert page._backend is not None
+            if not page._backend.is_valid():
+                yield page
+            else:
+                with TimeRecorder(conv_res, "vlm"):
+                    assert page.size is not None
+                    hi_res_image = page.get_image(scale=self.vlm_options.scale)
+                    assert hi_res_image is not None
+                    if hi_res_image:
+                        if hi_res_image.mode != "RGB":
+                            hi_res_image = hi_res_image.convert("RGB")
+                    page_tags = api_image_request(
+                        image=hi_res_image,
+                        prompt=self.prompt_content,
+                        url=self.vlm_options.url,
+                        timeout=self.timeout,
+                        headers=self.vlm_options.headers,
+                        **self.params,
+                    )
+                    page.predictions.vlm_response = VlmPrediction(text=page_tags)
+                yield page

docling/models/picture_description_api_model.py CHANGED Viewed

@@ -1,12 +1,7 @@
-import base64
-import io
-import logging
 from pathlib import Path
-from typing import Iterable, List, Optional, Type, Union
+from typing import Iterable, Optional, Type, Union
-import requests
 from PIL import Image
-from pydantic import BaseModel, ConfigDict
 from docling.datamodel.pipeline_options import (
     AcceleratorOptions,
@@ -15,37 +10,7 @@ from docling.datamodel.pipeline_options import (
 )
 from docling.exceptions import OperationNotAllowed
 from docling.models.picture_description_base_model import PictureDescriptionBaseModel
-_log = logging.getLogger(__name__)
-class ChatMessage(BaseModel):
-    role: str
-    content: str
-class ResponseChoice(BaseModel):
-    index: int
-    message: ChatMessage
-    finish_reason: str
-class ResponseUsage(BaseModel):
-    prompt_tokens: int
-    completion_tokens: int
-    total_tokens: int
-class ApiResponse(BaseModel):
-    model_config = ConfigDict(
-        protected_namespaces=(),
-    )
-    id: str
-    model: Optional[str] = None  # returned by openai
-    choices: List[ResponseChoice]
-    created: int
-    usage: ResponseUsage
+from docling.utils.api_image_request import api_image_request
 class PictureDescriptionApiModel(PictureDescriptionBaseModel):
@@ -83,43 +48,11 @@ class PictureDescriptionApiModel(PictureDescriptionBaseModel):
         # Note: technically we could make a batch request here,
         # but not all APIs will allow for it. For example, vllm won't allow more than 1.
         for image in images:
-            img_io = io.BytesIO()
-            image.save(img_io, "PNG")
-            image_base64 = base64.b64encode(img_io.getvalue()).decode("utf-8")
-            messages = [
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": self.options.prompt,
-                        },
-                        {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:image/png;base64,{image_base64}"
-                            },
-                        },
-                    ],
-                }
-            ]
-            payload = {
-                "messages": messages,
-                **self.options.params,
-            }
-            r = requests.post(
-                str(self.options.url),
-                headers=self.options.headers,
-                json=payload,
+            yield api_image_request(
+                image=image,
+                prompt=self.options.prompt,
+                url=self.options.url,
                 timeout=self.options.timeout,
+                headers=self.options.headers,
+                **self.options.params,
             )
-            if not r.ok:
-                _log.error(f"Error calling the API. Reponse was {r.text}")
-            r.raise_for_status()
-            api_resp = ApiResponse.model_validate_json(r.text)
-            generated_text = api_resp.choices[0].message.content.strip()
-            yield generated_text

docling/models/picture_description_base_model.py CHANGED Viewed

@@ -63,8 +63,20 @@ class PictureDescriptionBaseModel(
         elements: List[PictureItem] = []
         for el in element_batch:
             assert isinstance(el.item, PictureItem)
-            elements.append(el.item)
-            images.append(el.image)
+            describe_image = True
+            # Don't describe the image if it's smaller than the threshold
+            if len(el.item.prov) > 0:
+                prov = el.item.prov[0]  # PictureItems have at most a single provenance
+                page = doc.pages.get(prov.page_no)
+                if page is not None:
+                    page_area = page.size.width * page.size.height
+                    if page_area > 0:
+                        area_fraction = prov.bbox.area() / page_area
+                        if area_fraction < self.options.picture_area_threshold:
+                            describe_image = False
+            if describe_image:
+                elements.append(el.item)
+                images.append(el.image)
         outputs = self._annotate_images(images)

docling/pipeline/standard_pdf_pipeline.py CHANGED Viewed

@@ -2,7 +2,7 @@ import logging
 import sys
 import warnings
 from pathlib import Path
-from typing import Optional
+from typing import Optional, cast
 from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
@@ -226,7 +226,11 @@ class StandardPdfPipeline(PaginatedPipeline):
                         and self.pipeline_options.generate_table_images
                     ):
                         page_ix = element.prov[0].page_no - 1
-                        page = conv_res.pages[page_ix]
+                        page = next(
+                            (p for p in conv_res.pages if p.page_no == page_ix),
+                            cast("Page", None),
+                        )
+                        assert page is not None
                         assert page.size is not None
                         assert page.image is not None

docling/pipeline/vlm_pipeline.py CHANGED Viewed

@@ -15,11 +15,14 @@ from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.datamodel.base_models import InputFormat, Page
 from docling.datamodel.document import ConversionResult, InputDocument
 from docling.datamodel.pipeline_options import (
+    ApiVlmOptions,
+    HuggingFaceVlmOptions,
     InferenceFramework,
     ResponseFormat,
     VlmPipelineOptions,
 )
 from docling.datamodel.settings import settings
+from docling.models.api_vlm_model import ApiVlmModel
 from docling.models.hf_mlx_model import HuggingFaceMlxModel
 from docling.models.hf_vlm_model import HuggingFaceVlmModel
 from docling.pipeline.base_pipeline import PaginatedPipeline
@@ -57,27 +60,34 @@ class VlmPipeline(PaginatedPipeline):
         self.keep_images = self.pipeline_options.generate_page_images
-        if (
-            self.pipeline_options.vlm_options.inference_framework
-            == InferenceFramework.MLX
-        ):
+        if isinstance(pipeline_options.vlm_options, ApiVlmOptions):
             self.build_pipe = [
-                HuggingFaceMlxModel(
+                ApiVlmModel(
                     enabled=True,  # must be always enabled for this pipeline to make sense.
-                    artifacts_path=artifacts_path,
-                    accelerator_options=pipeline_options.accelerator_options,
-                    vlm_options=self.pipeline_options.vlm_options,
-                ),
-            ]
-        else:
-            self.build_pipe = [
-                HuggingFaceVlmModel(
-                    enabled=True,  # must be always enabled for this pipeline to make sense.
-                    artifacts_path=artifacts_path,
-                    accelerator_options=pipeline_options.accelerator_options,
-                    vlm_options=self.pipeline_options.vlm_options,
+                    enable_remote_services=self.pipeline_options.enable_remote_services,
+                    vlm_options=cast(ApiVlmOptions, self.pipeline_options.vlm_options),
                 ),
             ]
+        elif isinstance(self.pipeline_options.vlm_options, HuggingFaceVlmOptions):
+            vlm_options = cast(HuggingFaceVlmOptions, self.pipeline_options.vlm_options)
+            if vlm_options.inference_framework == InferenceFramework.MLX:
+                self.build_pipe = [
+                    HuggingFaceMlxModel(
+                        enabled=True,  # must be always enabled for this pipeline to make sense.
+                        artifacts_path=artifacts_path,
+                        accelerator_options=pipeline_options.accelerator_options,
+                        vlm_options=vlm_options,
+                    ),
+                ]
+            else:
+                self.build_pipe = [
+                    HuggingFaceVlmModel(
+                        enabled=True,  # must be always enabled for this pipeline to make sense.
+                        artifacts_path=artifacts_path,
+                        accelerator_options=pipeline_options.accelerator_options,
+                        vlm_options=vlm_options,
+                    ),
+                ]
         self.enrichment_pipe = [
             # Other models working on `NodeItem` elements in the DoclingDocument

docling/utils/api_image_request.py ADDED Viewed

@@ -0,0 +1,61 @@
+import base64
+import logging
+from io import BytesIO
+from typing import Dict, Optional
+import requests
+from PIL import Image
+from pydantic import AnyUrl
+from docling.datamodel.base_models import OpenAiApiResponse
+_log = logging.getLogger(__name__)
+def api_image_request(
+    image: Image.Image,
+    prompt: str,
+    url: AnyUrl,
+    timeout: float = 20,
+    headers: Optional[Dict[str, str]] = None,
+    **params,
+) -> str:
+    img_io = BytesIO()
+    image.save(img_io, "PNG")
+    image_base64 = base64.b64encode(img_io.getvalue()).decode("utf-8")
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{image_base64}"},
+                },
+                {
+                    "type": "text",
+                    "text": prompt,
+                },
+            ],
+        }
+    ]
+    payload = {
+        "messages": messages,
+        **params,
+    }
+    headers = headers or {}
+    r = requests.post(
+        str(url),
+        headers=headers,
+        json=payload,
+        timeout=timeout,
+    )
+    if not r.ok:
+        _log.error(f"Error calling the API. Response was {r.text}")
+    r.raise_for_status()
+    api_resp = OpenAiApiResponse.model_validate_json(r.text)
+    generated_text = api_resp.choices[0].message.content.strip()
+    return generated_text

{docling-2.29.0.dist-info → docling-2.30.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling
-Version: 2.29.0
+Version: 2.30.0
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Home-page: https://github.com/docling-project/docling
 License: MIT
@@ -28,7 +28,7 @@ Provides-Extra: vlm
 Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
 Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
 Requires-Dist: certifi (>=2024.7.4)
-Requires-Dist: docling-core[chunking] (>=2.24.1,<3.0.0)
+Requires-Dist: docling-core[chunking] (>=2.26.0,<3.0.0)
 Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
 Requires-Dist: docling-parse (>=4.0.0,<5.0.0)
 Requires-Dist: easyocr (>=1.7,<2.0)
@@ -58,7 +58,7 @@ Requires-Dist: tesserocr (>=2.7.1,<3.0.0) ; extra == "tesserocr"
 Requires-Dist: tqdm (>=4.65.0,<5.0.0)
 Requires-Dist: transformers (>=4.42.0,<4.43.0) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "vlm")
 Requires-Dist: transformers (>=4.46.0,<5.0.0) ; (sys_platform != "darwin" or platform_machine != "x86_64") and (extra == "vlm")
-Requires-Dist: typer (>=0.12.5,<0.13.0)
+Requires-Dist: typer (>=0.12.5,<0.16.0)
 Project-URL: Repository, https://github.com/docling-project/docling
 Description-Content-Type: text/markdown

{docling-2.29.0.dist-info → docling-2.30.0.dist-info}/RECORD RENAMED Viewed

@@ -14,9 +14,9 @@ docling/backend/html_backend.py,sha256=ghPLZfdBEPBzLIO9IWzzx0t1Os9B9r4VyGyEZtMsZ
 docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
 docling/backend/md_backend.py,sha256=lqDiKIBHGsA0u-H1n9oVpPlrcpVT4gYRuNXXcyGlftM,17219
-docling/backend/msexcel_backend.py,sha256=_ZVZFKRRijpg-Xz10xNxu2m-NpDaYvoiBqEZP6GbrgE,11095
+docling/backend/msexcel_backend.py,sha256=KRPoHRDv-mqko9RUHGQCzdRrvDo7g7zSU2Z5zoL_Hzo,18106
 docling/backend/mspowerpoint_backend.py,sha256=X55-1anXm562wxAuYn5uwQkqKjirmgrn1KfbeaKUbXw,17273
-docling/backend/msword_backend.py,sha256=1Yjs8J9vRSNDsgb9IKSKYcbvnoj1hO4Kf_mqncz3Ijs,32103
+docling/backend/msword_backend.py,sha256=CgNPjU8SQ7rkAYH_BGiUyv568MGhoH3R0M39WBT8gkc,32468
 docling/backend/pdf_backend.py,sha256=odWb1rxk3WCUIEJMhq-dYFNUQ1pSDuNHbU9wlTZIRAs,2211
 docling/backend/pypdfium2_backend.py,sha256=wRwhA5XHRqL7vyNhCAHM6P-ONkwtyjKG9LgC4NJ-4i8,10784
 docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -24,17 +24,18 @@ docling/backend/xml/jats_backend.py,sha256=HXailrDjiwu4swwFnXy3lNfRtLZmkBBp4yqaf
 docling/backend/xml/uspto_backend.py,sha256=H0jwIt2skOke_yEUk0wfXCtodrB-hrj2ygLtB3jMWaI,71056
 docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
 docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/cli/main.py,sha256=x8wmu0vb_wwpswdj8EKJyc3EnpVA1wnTJA4bjXRdi5Q,25255
+docling/cli/main.py,sha256=TD-cEf4giuk1O5NPoB-heXHHteUqKoLsj4Rg4xsBUrs,26119
 docling/cli/models.py,sha256=tM_qbMM3YOPxFU7JlME96MLbtd1CX_bOAK7FS-NhJvY,3979
 docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
 docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docling/datamodel/base_models.py,sha256=MAHr8LlffZ2uIXZ3AXOsikh_-oQIEYTiwwjsz-dQW9U,7287
-docling/datamodel/document.py,sha256=DbJifyMgBEkAk80BMYXTuSgqH2vijDENDkU7Fmr6j_g,14567
-docling/datamodel/pipeline_options.py,sha256=TpRf_-7UuCjjaytFWA0nL2m-KP4no9jeAjaXRjBLMLE,12593
+docling/datamodel/base_models.py,sha256=fJfFMaHXc-CUrAVfhPF8lKrdb-gaXr2tohx6dHldvRU,7926
+docling/datamodel/document.py,sha256=V0iK1MYOkPIzd4eQa-G8unp-t01fktlG9wwQ1IwE6Zg,15109
+docling/datamodel/pipeline_options.py,sha256=iGLijZR-YOtmg0RQs59pqoG_1uGsDYbg5wMDD0FWYx4,13351
 docling/datamodel/settings.py,sha256=bNMdowIKv7RUchabQTo4rFNEsxfB6pGg2LoZSY634zo,1869
 docling/document_converter.py,sha256=LCX92FzgmXNJLFVSQfjqH9SGe3zA7FGwARedSigFIpY,13798
 docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
 docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling/models/api_vlm_model.py,sha256=6SxMsFPf0SbT365P67KspdpF3TXZSeu5kmPE3lXAhW4,2470
 docling/models/base_model.py,sha256=9xJ0VIlpR2BzqoEWMC8LYp5Y96QAEKip4b_HCwCDltY,2931
 docling/models/base_ocr_model.py,sha256=xvKMhE4ZOGkL2GAhpDvrAHLLFps3ZUfxXZ5ctL1lXUw,7226
 docling/models/code_formula_model.py,sha256=mOu5luYMzyrCCr8MRGOciNcSvULpQysDd_FXn96WPc8,11477
@@ -50,8 +51,8 @@ docling/models/layout_model.py,sha256=7fQWipGV1HDrvbP4uOKa9QAicQl89jp7lailQmbFL3
 docling/models/ocr_mac_model.py,sha256=2pZaUWg19go_u88mKWr5y_52PAYEN__GsbyUYLdY4zo,5353
 docling/models/page_assemble_model.py,sha256=ivkCdbZJpFcGl7CazLegcP1tLK8ZixDfVhQXqsdW_UA,6359
 docling/models/page_preprocessing_model.py,sha256=Ja7RE1K-2fWxWrxOzNm6QDSGqFf-MY6_uY5OAZ7AQSo,3078
-docling/models/picture_description_api_model.py,sha256=SRjOkCTBYa1pTIaQffDLUPabljjYrLOQ916MywESEXk,3715
-docling/models/picture_description_base_model.py,sha256=uRpjBXC2qjpPyWFUt600N1GvmvF-vWwB8f-OTQ7PfDg,2305
+docling/models/picture_description_api_model.py,sha256=DowWOU93MXAjj3N1A9ex88Sa3Nic2c3dfoOYir5jZEA,2064
+docling/models/picture_description_base_model.py,sha256=khuhQZDAZemZMe4BsrBMpjEwkY3nhMFXuczjQpSQrVY,2971
 docling/models/picture_description_vlm_model.py,sha256=I2Un3vfhQVeWEyZ3Sd3Kygw9la2QSZCwDfl_7XVlMm4,4042
 docling/models/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/models/plugins/defaults.py,sha256=qslXGnRX07Z3GGttNriqaox0v0vXp4zs4KLurHCZjp4,858
@@ -63,11 +64,12 @@ docling/models/tesseract_ocr_model.py,sha256=UpLAgKgJtBgbKtJELmKBNMcejJJKBCyFK0q
 docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/pipeline/base_pipeline.py,sha256=9ABK-Cr235bxE5vweoIA5rgBZV_EF8qFxAqLI27H_Pg,8749
 docling/pipeline/simple_pipeline.py,sha256=mZqANqUtAOFAyqQEShErQnAUz6tJFOl6zVoazEDJ_wE,2254
-docling/pipeline/standard_pdf_pipeline.py,sha256=tHOHFyJajX6IAhm4y3I27uqn5jfMTuCaSaFOKT5JM2M,10593
-docling/pipeline/vlm_pipeline.py,sha256=1eKt3gqWf6PxGvYZuqhKi2BFljJGJWIyHemzOAwa39Y,9065
+docling/pipeline/standard_pdf_pipeline.py,sha256=gPNqUparhIONG4AyMekW9OfZ7t8YMs0odhtbE6Z-Hxw,10784
+docling/pipeline/vlm_pipeline.py,sha256=dqQYAd3viW577TVSZltnB4P-f-ZUWQh0J8SSFDuQN6Q,9738
 docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
 docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
+docling/utils/api_image_request.py,sha256=_CgdzmPqdsyXmyYUFGLZcXcoH586qC6A1p5vsNbj1Q0,1416
 docling/utils/export.py,sha256=4W-ptI1fLdVrtoqHdHY1RF9Xn2Yescs-hunITqxJ7Is,4697
 docling/utils/glm_utils.py,sha256=W4JRoP0xQ6SJmhhIoAfcKxm5dr1CFvLHp8pqI1kdhxs,12250
 docling/utils/layout_postprocessor.py,sha256=Q36DfcIYMuMfC6LzCBIrYtHK7pBE-Xyvjepz660s9UM,24508
@@ -77,8 +79,8 @@ docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,26
 docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
 docling/utils/utils.py,sha256=0ozCk7zUkYzxRVmYoIB2zA1lqjQOuaQzxfGuf1wmKW4,1866
 docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
-docling-2.29.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
-docling-2.29.0.dist-info/METADATA,sha256=PPcVfE4GnjhcLLurofnugm6QLj0EKRuaIuhlPuXYRT8,9982
-docling-2.29.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-docling-2.29.0.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
-docling-2.29.0.dist-info/RECORD,,
+docling-2.30.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
+docling-2.30.0.dist-info/METADATA,sha256=HSI154YUnSDJE8BMMjOuu-U3EXQg0ksFuyuyzv7-UdU,9982
+docling-2.30.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+docling-2.30.0.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
+docling-2.30.0.dist-info/RECORD,,

{docling-2.29.0.dist-info → docling-2.30.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{docling-2.29.0.dist-info → docling-2.30.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{docling-2.29.0.dist-info → docling-2.30.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

docling 2.29.0__py3-none-any.whl → 2.30.0__py3-none-any.whl

docling 2.29.0py3-none-any.whl → 2.30.0py3-none-any.whl