PyPI - docling - Versions diffs - 2.29.0__py3-none-any.whl → 2.31.0__py3-none-any.whl - Mend

docling 2.29.0py3-none-any.whl → 2.31.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

docling/backend/asciidoc_backend.py +7 -15
docling/backend/csv_backend.py +1 -1
docling/backend/docling_parse_backend.py +2 -2
docling/backend/docling_parse_v2_backend.py +2 -2
docling/backend/docling_parse_v4_backend.py +3 -4
docling/backend/docx/latex/latex_dict.py +0 -5
docling/backend/docx/latex/omml.py +4 -7
docling/backend/html_backend.py +26 -9
docling/backend/md_backend.py +5 -7
docling/backend/msexcel_backend.py +271 -95
docling/backend/mspowerpoint_backend.py +4 -7
docling/backend/msword_backend.py +23 -15
docling/backend/pdf_backend.py +2 -1
docling/backend/pypdfium2_backend.py +3 -3
docling/backend/xml/jats_backend.py +10 -13
docling/backend/xml/uspto_backend.py +15 -19
docling/cli/main.py +27 -9
docling/cli/models.py +2 -3
docling/datamodel/base_models.py +40 -5
docling/datamodel/document.py +18 -10
docling/datamodel/pipeline_options.py +29 -4
docling/document_converter.py +5 -5
docling/models/api_vlm_model.py +66 -0
docling/models/base_model.py +2 -4
docling/models/base_ocr_model.py +2 -2
docling/models/code_formula_model.py +2 -1
docling/models/document_picture_classifier.py +2 -1
docling/models/easyocr_model.py +10 -11
docling/models/factories/__init__.py +2 -2
docling/models/factories/base_factory.py +1 -1
docling/models/hf_mlx_model.py +4 -6
docling/models/hf_vlm_model.py +7 -5
docling/models/layout_model.py +2 -2
docling/models/ocr_mac_model.py +3 -4
docling/models/page_assemble_model.py +7 -12
docling/models/page_preprocessing_model.py +2 -1
docling/models/picture_description_api_model.py +9 -75
docling/models/picture_description_base_model.py +16 -5
docling/models/picture_description_vlm_model.py +2 -3
docling/models/rapid_ocr_model.py +2 -3
docling/models/readingorder_model.py +8 -23
docling/models/table_structure_model.py +2 -6
docling/models/tesseract_ocr_cli_model.py +17 -16
docling/models/tesseract_ocr_model.py +8 -6
docling/pipeline/base_pipeline.py +4 -8
docling/pipeline/simple_pipeline.py +0 -1
docling/pipeline/standard_pdf_pipeline.py +6 -3
docling/pipeline/vlm_pipeline.py +27 -20
docling/utils/api_image_request.py +61 -0
docling/utils/export.py +2 -4
docling/utils/glm_utils.py +2 -2
docling/utils/layout_postprocessor.py +4 -2
docling/utils/model_downloader.py +7 -7
docling/utils/utils.py +1 -1
{docling-2.29.0.dist-info → docling-2.31.0.dist-info}/METADATA +4 -3
docling-2.31.0.dist-info/RECORD +86 -0
docling-2.29.0.dist-info/RECORD +0 -84
{docling-2.29.0.dist-info → docling-2.31.0.dist-info}/LICENSE +0 -0
{docling-2.29.0.dist-info → docling-2.31.0.dist-info}/WHEEL +0 -0
{docling-2.29.0.dist-info → docling-2.31.0.dist-info}/entry_points.txt +0 -0

docling/backend/pypdfium2_backend.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import logging
 import random
+from collections.abc import Iterable
 from io import BytesIO
 from pathlib import Path
-from typing import TYPE_CHECKING, Iterable, List, Optional, Union
+from typing import TYPE_CHECKING, List, Optional, Union
 import pypdfium2 as pdfium
 import pypdfium2.raw as pdfium_c
@@ -29,7 +30,7 @@ class PyPdfiumPageBackend(PdfPageBackend):
         self.valid = True  # No better way to tell from pypdfium.
         try:
             self._ppage: pdfium.PdfPage = pdfium_doc[page_no]
-        except PdfiumError as e:
+        except PdfiumError:
             _log.info(
                 f"An exception occurred when loading page {page_no} of document {document_hash}.",
                 exc_info=True,
@@ -225,7 +226,6 @@ class PyPdfiumPageBackend(PdfPageBackend):
     def get_page_image(
         self, scale: float = 1, cropbox: Optional[BoundingBox] = None
     ) -> Image.Image:
         page_size = self.get_size()
         if not cropbox:

docling/backend/xml/jats_backend.py CHANGED Viewed

@@ -102,13 +102,13 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
             doc_info: etree.DocInfo = self.tree.docinfo
             if doc_info.system_url and any(
-                [kwd in doc_info.system_url for kwd in JATS_DTD_URL]
+                kwd in doc_info.system_url for kwd in JATS_DTD_URL
             ):
                 self.valid = True
                 return
             for ent in doc_info.internalDTD.iterentities():
                 if ent.system_url and any(
-                    [kwd in ent.system_url for kwd in JATS_DTD_URL]
+                    kwd in ent.system_url for kwd in JATS_DTD_URL
                 ):
                     self.valid = True
                     return
@@ -232,10 +232,9 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
                 # TODO: once superscript is supported, add label with formatting
                 aff = aff.removeprefix(f"{label[0].text}, ")
             affiliation_names.append(aff)
-        affiliation_ids_names = {
-            id: name
-            for id, name in zip(meta.xpath(".//aff[@id]/@id"), affiliation_names)
-        }
+        affiliation_ids_names = dict(
+            zip(meta.xpath(".//aff[@id]/@id"), affiliation_names)
+        )
         # Get author names and affiliation names
         for author_node in meta.xpath(
@@ -300,7 +299,6 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
     def _add_abstract(
         self, doc: DoclingDocument, xml_components: XMLComponents
     ) -> None:
         for abstract in xml_components["abstract"]:
             text: str = abstract["content"]
             title: str = abstract["label"] or DEFAULT_HEADER_ABSTRACT
@@ -349,7 +347,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
         return
-    def _parse_element_citation(self, node: etree._Element) -> str:
+    def _parse_element_citation(self, node: etree._Element) -> str:  # noqa: C901
         citation: Citation = {
             "author_names": "",
             "title": "",
@@ -440,7 +438,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
             citation["page"] = node.xpath("fpage")[0].text.replace("\n", " ").strip()
             if len(node.xpath("lpage")) > 0:
                 citation["page"] += (
-                    "–" + node.xpath("lpage")[0].text.replace("\n", " ").strip()
+                    "–" + node.xpath("lpage")[0].text.replace("\n", " ").strip()  # noqa: RUF001
                 )
         # Flatten the citation to string
@@ -595,9 +593,8 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
         try:
             self._add_table(doc, parent, table)
-        except Exception as e:
-            _log.warning(f"Skipping unsupported table in {str(self.file)}")
-            pass
+        except Exception:
+            _log.warning(f"Skipping unsupported table in {self.file!s}")
         return
@@ -609,7 +606,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
         )
         return
-    def _walk_linear(
+    def _walk_linear(  # noqa: C901
         self, doc: DoclingDocument, parent: NodeItem, node: etree._Element
     ) -> str:
         skip_tags = ["term"]

docling/backend/xml/uspto_backend.py CHANGED Viewed

@@ -122,7 +122,6 @@ class PatentUsptoDocumentBackend(DeclarativeDocumentBackend):
     @override
     def convert(self) -> DoclingDocument:
         if self.parser is not None:
             doc = self.parser.parse(self.patent_content)
             if doc is None:
@@ -163,7 +162,6 @@ class PatentUspto(ABC):
         Returns:
             The patent parsed as a docling document.
         """
-        pass
 class PatentUsptoIce(PatentUspto):
@@ -265,7 +263,7 @@ class PatentUsptoIce(PatentUspto):
             self.style_html = HtmlEntity()
         @override
-        def startElement(self, tag, attributes):  # noqa: N802
+        def startElement(self, tag, attributes):
             """Signal the start of an element.
             Args:
@@ -281,7 +279,7 @@ class PatentUsptoIce(PatentUspto):
             self._start_registered_elements(tag, attributes)
         @override
-        def skippedEntity(self, name):  # noqa: N802
+        def skippedEntity(self, name):
             """Receive notification of a skipped entity.
             HTML entities will be skipped by the parser. This method will unescape them
@@ -315,7 +313,7 @@ class PatentUsptoIce(PatentUspto):
                         self.text += unescaped
         @override
-        def endElement(self, tag):  # noqa: N802
+        def endElement(self, tag):
             """Signal the end of an element.
             Args:
@@ -603,7 +601,7 @@ class PatentUsptoGrantV2(PatentUspto):
             self.style_html = HtmlEntity()
         @override
-        def startElement(self, tag, attributes):  # noqa: N802
+        def startElement(self, tag, attributes):
             """Signal the start of an element.
             Args:
@@ -616,7 +614,7 @@ class PatentUsptoGrantV2(PatentUspto):
             self._start_registered_elements(tag, attributes)
         @override
-        def skippedEntity(self, name):  # noqa: N802
+        def skippedEntity(self, name):
             """Receive notification of a skipped entity.
             HTML entities will be skipped by the parser. This method will unescape them
@@ -650,7 +648,7 @@ class PatentUsptoGrantV2(PatentUspto):
                         self.text += unescaped
         @override
-        def endElement(self, tag):  # noqa: N802
+        def endElement(self, tag):
             """Signal the end of an element.
             Args:
@@ -691,7 +689,7 @@ class PatentUsptoGrantV2(PatentUspto):
             if tag in [member.value for member in self.Element]:
                 if (
                     tag == self.Element.HEADING.value
-                    and not self.Element.SDOCL.value in self.property
+                    and self.Element.SDOCL.value not in self.property
                 ):
                     level_attr: str = attributes.get("LVL", "")
                     new_level: int = int(level_attr) if level_attr.isnumeric() else 1
@@ -743,7 +741,7 @@ class PatentUsptoGrantV2(PatentUspto):
                 # headers except claims statement
                 elif (
                     self.Element.HEADING.value in self.property
-                    and not self.Element.SDOCL.value in self.property
+                    and self.Element.SDOCL.value not in self.property
                     and text.strip()
                 ):
                     self.parents[self.level + 1] = self.doc.add_heading(
@@ -1164,7 +1162,7 @@ class PatentUsptoAppV1(PatentUspto):
             self.style_html = HtmlEntity()
         @override
-        def startElement(self, tag, attributes):  # noqa: N802
+        def startElement(self, tag, attributes):
             """Signal the start of an element.
             Args:
@@ -1177,7 +1175,7 @@ class PatentUsptoAppV1(PatentUspto):
             self._start_registered_elements(tag, attributes)
         @override
-        def skippedEntity(self, name):  # noqa: N802
+        def skippedEntity(self, name):
             """Receive notification of a skipped entity.
             HTML entities will be skipped by the parser. This method will unescape them
@@ -1211,7 +1209,7 @@ class PatentUsptoAppV1(PatentUspto):
                         self.text += unescaped
         @override
-        def endElement(self, tag):  # noqa: N802
+        def endElement(self, tag):
             """Signal the end of an element.
             Args:
@@ -1474,9 +1472,7 @@ class XmlTable:
                 if cw == 0:
                     offset_w0.append(col["offset"][ic])
-            min_colinfo["offset"] = sorted(
-                list(set(col["offset"] + min_colinfo["offset"]))
-            )
+            min_colinfo["offset"] = sorted(set(col["offset"] + min_colinfo["offset"]))
         # add back the 0 width cols to offset list
         offset_w0 = list(set(offset_w0))
@@ -1527,7 +1523,7 @@ class XmlTable:
         return ncols_max
-    def _parse_table(self, table: Tag) -> TableData:
+    def _parse_table(self, table: Tag) -> TableData:  # noqa: C901
         """Parse the content of a table tag.
         Args:
@@ -1722,7 +1718,7 @@ class HtmlEntity:
                 "0": "&#8304;",
                 "+": "&#8314;",
                 "-": "&#8315;",
-                "−": "&#8315;",
+                "−": "&#8315;",  # noqa: RUF001
                 "=": "&#8316;",
                 "(": "&#8317;",
                 ")": "&#8318;",
@@ -1746,7 +1742,7 @@ class HtmlEntity:
                 "0": "&#8320;",
                 "+": "&#8330;",
                 "-": "&#8331;",
-                "−": "&#8331;",
+                "−": "&#8331;",  # noqa: RUF001
                 "=": "&#8332;",
                 "(": "&#8333;",
                 ")": "&#8334;",

docling/cli/main.py CHANGED Viewed

@@ -6,14 +6,16 @@ import sys
 import tempfile
 import time
 import warnings
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Annotated, Dict, Iterable, List, Optional, Type
+from typing import Annotated, Dict, List, Optional, Type
 import rich.table
 import typer
 from docling_core.types.doc import ImageRefMode
 from docling_core.utils.file import resolve_source_to_path
 from pydantic import TypeAdapter
+from rich.console import Console
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
@@ -40,6 +42,7 @@ from docling.datamodel.pipeline_options import (
     VlmModelType,
     VlmPipelineOptions,
     granite_vision_vlm_conversion_options,
+    granite_vision_vlm_ollama_conversion_options,
     smoldocling_vlm_conversion_options,
     smoldocling_vlm_mlx_conversion_options,
 )
@@ -52,7 +55,6 @@ warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|
 warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
 _log = logging.getLogger(__name__)
-from rich.console import Console
 console = Console()
 err_console = Console(stderr=True)
@@ -153,12 +155,12 @@ def export_documents(
     output_dir: Path,
     export_json: bool,
     export_html: bool,
+    export_html_split_page: bool,
     export_md: bool,
     export_txt: bool,
     export_doctags: bool,
     image_export_mode: ImageRefMode,
 ):
     success_count = 0
     failure_count = 0
@@ -180,7 +182,15 @@ def export_documents(
                 fname = output_dir / f"{doc_filename}.html"
                 _log.info(f"writing HTML output to {fname}")
                 conv_res.document.save_as_html(
-                    filename=fname, image_mode=image_export_mode
+                    filename=fname, image_mode=image_export_mode, split_page_view=False
+                )
+            # Export HTML format:
+            if export_html_split_page:
+                fname = output_dir / f"{doc_filename}.html"
+                _log.info(f"writing HTML output to {fname}")
+                conv_res.document.save_as_html(
+                    filename=fname, image_mode=image_export_mode, split_page_view=True
                 )
             # Export Text format:
@@ -223,7 +233,7 @@ def _split_list(raw: Optional[str]) -> Optional[List[str]]:
 @app.command(no_args_is_help=True)
-def convert(
+def convert(  # noqa: C901
     input_sources: Annotated[
         List[str],
         typer.Argument(
@@ -279,7 +289,7 @@ def convert(
             ...,
             help=(
                 f"The OCR engine to use. When --allow-external-plugins is *not* set, the available values are: "
-                f"{', '.join((o.value for o in ocr_engines_enum_internal))}. "
+                f"{', '.join(o.value for o in ocr_engines_enum_internal)}. "
                 f"Use the option --show-external-plugins to see the options allowed with external plugins."
             ),
         ),
@@ -411,7 +421,7 @@ def convert(
         logging.basicConfig(level=logging.WARNING)
     elif verbose == 1:
         logging.basicConfig(level=logging.INFO)
-    elif verbose == 2:
+    else:
         logging.basicConfig(level=logging.DEBUG)
     settings.debug.visualize_cells = debug_visualize_cells
@@ -420,7 +430,7 @@ def convert(
     settings.debug.visualize_ocr = debug_visualize_ocr
     if from_formats is None:
-        from_formats = [e for e in InputFormat]
+        from_formats = list(InputFormat)
     parsed_headers: Optional[Dict[str, str]] = None
     if headers is not None:
@@ -471,6 +481,7 @@ def convert(
         export_json = OutputFormat.JSON in to_formats
         export_html = OutputFormat.HTML in to_formats
+        export_html_split_page = OutputFormat.HTML_SPLIT_PAGE in to_formats
         export_md = OutputFormat.MARKDOWN in to_formats
         export_txt = OutputFormat.TEXT in to_formats
         export_doctags = OutputFormat.DOCTAGS in to_formats
@@ -531,10 +542,16 @@ def convert(
                 backend=backend,  # pdf_backend
             )
         elif pipeline == PdfPipeline.VLM:
-            pipeline_options = VlmPipelineOptions()
+            pipeline_options = VlmPipelineOptions(
+                enable_remote_services=enable_remote_services,
+            )
             if vlm_model == VlmModelType.GRANITE_VISION:
                 pipeline_options.vlm_options = granite_vision_vlm_conversion_options
+            elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA:
+                pipeline_options.vlm_options = (
+                    granite_vision_vlm_ollama_conversion_options
+                )
             elif vlm_model == VlmModelType.SMOLDOCLING:
                 pipeline_options.vlm_options = smoldocling_vlm_conversion_options
                 if sys.platform == "darwin":
@@ -578,6 +595,7 @@ def convert(
             output_dir=output,
             export_json=export_json,
             export_html=export_html,
+            export_html_split_page=export_html_split_page,
             export_md=export_md,
             export_txt=export_txt,
             export_doctags=export_doctags,

docling/cli/models.py CHANGED Viewed

@@ -62,7 +62,7 @@ def download(
     models: Annotated[
         Optional[list[_AvailableModels]],
         typer.Argument(
-            help=f"Models to download (default behavior: a predefined set of models will be downloaded).",
+            help="Models to download (default behavior: a predefined set of models will be downloaded).",
         ),
     ] = None,
     all: Annotated[
@@ -89,14 +89,13 @@ def download(
             "Cannot simultaneously set 'all' parameter and specify models to download."
         )
     if not quiet:
-        FORMAT = "%(message)s"
         logging.basicConfig(
             level=logging.INFO,
             format="[blue]%(message)s[/blue]",
             datefmt="[%X]",
             handlers=[RichHandler(show_level=False, show_time=False, markup=True)],
         )
-    to_download = models or ([m for m in _AvailableModels] if all else _default_models)
+    to_download = models or (list(_AvailableModels) if all else _default_models)
     output_dir = download_models(
         output_dir=output_dir,
         force=force,

docling/datamodel/base_models.py CHANGED Viewed

@@ -10,7 +10,9 @@ from docling_core.types.doc import (
     TableCell,
 )
 from docling_core.types.doc.page import SegmentedPdfPage, TextCell
-from docling_core.types.io import (  # DO ΝΟΤ REMOVE; explicitly exposed from this location
+# DO NOT REMOVE; explicitly exposed from this location
+from docling_core.types.io import (
     DocumentStream,
 )
 from PIL.Image import Image
@@ -50,6 +52,7 @@ class OutputFormat(str, Enum):
     MARKDOWN = "md"
     JSON = "json"
     HTML = "html"
+    HTML_SPLIT_PAGE = "html_split_page"
     TEXT = "text"
     DOCTAGS = "doctags"
@@ -232,9 +235,9 @@ class Page(BaseModel):
         None  # Internal PDF backend. By default it is cleared during assembling.
     )
     _default_image_scale: float = 1.0  # Default image scale for external usage.
-    _image_cache: Dict[float, Image] = (
-        {}
-    )  # Cache of images in different scales. By default it is cleared during assembling.
+    _image_cache: Dict[
+        float, Image
+    ] = {}  # Cache of images in different scales. By default it is cleared during assembling.
     def get_image(
         self, scale: float = 1.0, cropbox: Optional[BoundingBox] = None
@@ -242,7 +245,7 @@ class Page(BaseModel):
         if self._backend is None:
             return self._image_cache.get(scale, None)
-        if not scale in self._image_cache:
+        if scale not in self._image_cache:
             if cropbox is None:
                 self._image_cache[scale] = self._backend.get_page_image(scale=scale)
             else:
@@ -262,3 +265,35 @@ class Page(BaseModel):
     @property
     def image(self) -> Optional[Image]:
         return self.get_image(scale=self._default_image_scale)
+## OpenAI API Request / Response Models ##
+class OpenAiChatMessage(BaseModel):
+    role: str
+    content: str
+class OpenAiResponseChoice(BaseModel):
+    index: int
+    message: OpenAiChatMessage
+    finish_reason: str
+class OpenAiResponseUsage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+class OpenAiApiResponse(BaseModel):
+    model_config = ConfigDict(
+        protected_namespaces=(),
+    )
+    id: str
+    model: Optional[str] = None  # returned by openai
+    choices: List[OpenAiResponseChoice]
+    created: int
+    usage: OpenAiResponseUsage

docling/datamodel/document.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import csv
 import logging
 import re
+from collections.abc import Iterable
 from enum import Enum
 from io import BytesIO
 from pathlib import Path, PurePath
 from typing import (
     TYPE_CHECKING,
     Dict,
-    Iterable,
     List,
     Literal,
     Optional,
@@ -17,6 +17,8 @@ from typing import (
 )
 import filetype
+# DO NOT REMOVE; explicitly exposed from this location
 from docling_core.types.doc import (
     DocItem,
     DocItemLabel,
@@ -35,14 +37,14 @@ from docling_core.types.legacy_doc.base import (
     PageReference,
     Prov,
     Ref,
+    Table as DsSchemaTable,
+    TableCell,
 )
-from docling_core.types.legacy_doc.base import Table as DsSchemaTable
-from docling_core.types.legacy_doc.base import TableCell
 from docling_core.types.legacy_doc.document import (
     CCSDocumentDescription as DsDocumentDescription,
+    CCSFileInfoObject as DsFileInfoObject,
+    ExportedCCSDocument as DsDocument,
 )
-from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileInfoObject
-from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument
 from docling_core.utils.file import resolve_source_to_stream
 from docling_core.utils.legacy import docling_document_to_legacy
 from pydantic import BaseModel
@@ -65,7 +67,7 @@ from docling.datamodel.base_models import (
 )
 from docling.datamodel.settings import DocumentLimits
 from docling.utils.profiling import ProfilingItem
-from docling.utils.utils import create_file_hash, create_hash
+from docling.utils.utils import create_file_hash
 if TYPE_CHECKING:
     from docling.document_converter import FormatOption
@@ -134,9 +136,9 @@ class InputDocument(BaseModel):
                     self._init_doc(backend, path_or_stream)
             elif isinstance(path_or_stream, BytesIO):
-                assert (
-                    filename is not None
-                ), "Can't construct InputDocument from stream without providing filename arg."
+                assert filename is not None, (
+                    "Can't construct InputDocument from stream without providing filename arg."
+                )
                 self.file = PurePath(filename)
                 self.filesize = path_or_stream.getbuffer().nbytes
@@ -228,7 +230,6 @@ class _DummyBackend(AbstractDocumentBackend):
 class _DocumentConversionInput(BaseModel):
     path_or_stream_iterator: Iterable[Union[Path, str, DocumentStream]]
     headers: Optional[Dict[str, str]] = None
     limits: Optional[DocumentLimits] = DocumentLimits()
@@ -283,6 +284,13 @@ class _DocumentConversionInput(BaseModel):
             if mime is None:  # must guess from
                 with obj.open("rb") as f:
                     content = f.read(1024)  # Read first 1KB
+            if mime is not None and mime.lower() == "application/zip":
+                if obj.suffixes[-1].lower() == ".xlsx":
+                    mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+                elif obj.suffixes[-1].lower() == ".docx":
+                    mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+                elif obj.suffixes[-1].lower() == ".pptx":
+                    mime = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
         elif isinstance(obj, DocumentStream):
             content = obj.stream.read(8192)

docling/datamodel/pipeline_options.py CHANGED Viewed

@@ -213,8 +213,8 @@ class PictureDescriptionBaseOptions(BaseOptions):
     batch_size: int = 8
     scale: float = 2
-    bitmap_area_threshold: float = (
-        0.2  # percentage of the area for a bitmap to processed with the models
+    picture_area_threshold: float = (
+        0.05  # percentage of the area for a picture to processed with the models
     )
@@ -266,6 +266,7 @@ class ResponseFormat(str, Enum):
 class InferenceFramework(str, Enum):
     MLX = "mlx"
     TRANSFORMERS = "transformers"
+    OPENAI = "openai"
 class HuggingFaceVlmOptions(BaseVlmOptions):
@@ -284,6 +285,19 @@ class HuggingFaceVlmOptions(BaseVlmOptions):
         return self.repo_id.replace("/", "--")
+class ApiVlmOptions(BaseVlmOptions):
+    kind: Literal["api_model_options"] = "api_model_options"
+    url: AnyUrl = AnyUrl(
+        "http://localhost:11434/v1/chat/completions"
+    )  # Default to ollama
+    headers: Dict[str, str] = {}
+    params: Dict[str, Any] = {}
+    scale: float = 2.0
+    timeout: float = 60
+    response_format: ResponseFormat
 smoldocling_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
     repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
     prompt="Convert this page to docling.",
@@ -307,10 +321,20 @@ granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
     inference_framework=InferenceFramework.TRANSFORMERS,
 )
+granite_vision_vlm_ollama_conversion_options = ApiVlmOptions(
+    url=AnyUrl("http://localhost:11434/v1/chat/completions"),
+    params={"model": "granite3.2-vision:2b"},
+    prompt="OCR the full page to markdown.",
+    scale=1.0,
+    timeout=120,
+    response_format=ResponseFormat.MARKDOWN,
+)
 class VlmModelType(str, Enum):
     SMOLDOCLING = "smoldocling"
     GRANITE_VISION = "granite_vision"
+    GRANITE_VISION_OLLAMA = "granite_vision_ollama"
 # Define an enum for the backend options
@@ -356,13 +380,14 @@ class PaginatedPipelineOptions(PipelineOptions):
 class VlmPipelineOptions(PaginatedPipelineOptions):
     generate_page_images: bool = True
     force_backend_text: bool = (
         False  # (To be used with vlms, or other generative models)
     )
     # If True, text from backend will be used instead of generated text
-    vlm_options: Union[HuggingFaceVlmOptions] = smoldocling_vlm_conversion_options
+    vlm_options: Union[HuggingFaceVlmOptions, ApiVlmOptions] = (
+        smoldocling_vlm_conversion_options
+    )
 class PdfPipelineOptions(PaginatedPipelineOptions):

docling/document_converter.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import hashlib
 import logging
-import math
 import sys
 import time
+from collections.abc import Iterable, Iterator
 from functools import partial
 from pathlib import Path
-from typing import Dict, Iterable, Iterator, List, Optional, Tuple, Type, Union
+from typing import Dict, List, Optional, Tuple, Type, Union
 from pydantic import BaseModel, ConfigDict, model_validator, validate_call
@@ -172,7 +172,7 @@ class DocumentConverter:
         format_options: Optional[Dict[InputFormat, FormatOption]] = None,
     ):
         self.allowed_formats = (
-            allowed_formats if allowed_formats is not None else [e for e in InputFormat]
+            allowed_formats if allowed_formats is not None else list(InputFormat)
         )
         self.format_to_options = {
             format: (
@@ -254,7 +254,7 @@ class DocumentConverter:
         if not had_result and raises_on_error:
             raise ConversionError(
-                f"Conversion failed because the provided file has no recognizable format or it wasn't in the list of allowed formats."
+                "Conversion failed because the provided file has no recognizable format or it wasn't in the list of allowed formats."
             )
     def _convert(
@@ -266,7 +266,7 @@ class DocumentConverter:
             conv_input.docs(self.format_to_options),
             settings.perf.doc_batch_size,  # pass format_options
         ):
-            _log.info(f"Going to convert document batch...")
+            _log.info("Going to convert document batch...")
             # parallel processing only within input_batch
             # with ThreadPoolExecutor(

docling 2.29.0__py3-none-any.whl → 2.31.0__py3-none-any.whl

docling 2.29.0py3-none-any.whl → 2.31.0py3-none-any.whl