PyPI - docling - Versions diffs - 2.26.0__py3-none-any.whl → 2.27.0__py3-none-any.whl - Mend

docling 2.26.0py3-none-any.whl → 2.27.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

docling/backend/asciidoc_backend.py +1 -1
docling/backend/csv_backend.py +1 -1
docling/backend/docling_parse_backend.py +21 -13
docling/backend/docling_parse_v2_backend.py +20 -12
docling/backend/docling_parse_v4_backend.py +185 -0
docling/backend/docx/__init__.py +0 -0
docling/backend/docx/latex/__init__.py +0 -0
docling/backend/docx/latex/latex_dict.py +271 -0
docling/backend/docx/latex/omml.py +453 -0
docling/backend/html_backend.py +7 -7
docling/backend/md_backend.py +1 -1
docling/backend/msexcel_backend.py +2 -45
docling/backend/mspowerpoint_backend.py +1 -1
docling/backend/msword_backend.py +65 -3
docling/backend/pdf_backend.py +7 -2
docling/backend/pypdfium2_backend.py +52 -30
docling/backend/xml/uspto_backend.py +1 -1
docling/cli/main.py +60 -21
docling/cli/models.py +1 -1
docling/datamodel/base_models.py +8 -10
docling/datamodel/pipeline_options.py +26 -30
docling/document_converter.py +5 -5
docling/models/base_model.py +9 -1
docling/models/base_ocr_model.py +27 -16
docling/models/easyocr_model.py +28 -13
docling/models/factories/__init__.py +27 -0
docling/models/factories/base_factory.py +122 -0
docling/models/factories/ocr_factory.py +11 -0
docling/models/factories/picture_description_factory.py +11 -0
docling/models/ocr_mac_model.py +39 -11
docling/models/page_preprocessing_model.py +4 -0
docling/models/picture_description_api_model.py +20 -3
docling/models/picture_description_base_model.py +19 -3
docling/models/picture_description_vlm_model.py +14 -2
docling/models/plugins/__init__.py +0 -0
docling/models/plugins/defaults.py +28 -0
docling/models/rapid_ocr_model.py +34 -13
docling/models/table_structure_model.py +13 -4
docling/models/tesseract_ocr_cli_model.py +40 -15
docling/models/tesseract_ocr_model.py +37 -12
docling/pipeline/standard_pdf_pipeline.py +25 -78
docling/utils/export.py +8 -6
docling/utils/layout_postprocessor.py +26 -23
docling/utils/visualization.py +1 -1
{docling-2.26.0.dist-info → docling-2.27.0.dist-info}/METADATA +48 -19
docling-2.27.0.dist-info/RECORD +83 -0
{docling-2.26.0.dist-info → docling-2.27.0.dist-info}/entry_points.txt +3 -0
docling-2.26.0.dist-info/RECORD +0 -72
{docling-2.26.0.dist-info → docling-2.27.0.dist-info}/LICENSE +0 -0
{docling-2.26.0.dist-info → docling-2.27.0.dist-info}/WHEEL +0 -0

docling/backend/pypdfium2_backend.py CHANGED Viewed

@@ -7,12 +7,12 @@ from typing import TYPE_CHECKING, Iterable, List, Optional, Union
 import pypdfium2 as pdfium
 import pypdfium2.raw as pdfium_c
 from docling_core.types.doc import BoundingBox, CoordOrigin, Size
+from docling_core.types.doc.page import BoundingRectangle, SegmentedPdfPage, TextCell
 from PIL import Image, ImageDraw
 from pypdfium2 import PdfTextPage
 from pypdfium2._helpers.misc import PdfiumError
 from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend
-from docling.datamodel.base_models import Cell
 from docling.utils.locks import pypdfium2_lock
 if TYPE_CHECKING:
@@ -68,7 +68,10 @@ class PyPdfiumPageBackend(PdfPageBackend):
         return text_piece
-    def get_text_cells(self) -> Iterable[Cell]:
+    def get_segmented_page(self) -> Optional[SegmentedPdfPage]:
+        return None
+    def get_text_cells(self) -> Iterable[TextCell]:
         with pypdfium2_lock:
             if not self.text_page:
                 self.text_page = self._ppage.get_textpage()
@@ -84,11 +87,19 @@ class PyPdfiumPageBackend(PdfPageBackend):
                 text_piece = self.text_page.get_text_bounded(*rect)
                 x0, y0, x1, y1 = rect
                 cells.append(
-                    Cell(
-                        id=cell_counter,
+                    TextCell(
+                        index=cell_counter,
                         text=text_piece,
-                        bbox=BoundingBox(
-                            l=x0, b=y0, r=x1, t=y1, coord_origin=CoordOrigin.BOTTOMLEFT
+                        orig=text_piece,
+                        from_ocr=False,
+                        rect=BoundingRectangle.from_bounding_box(
+                            BoundingBox(
+                                l=x0,
+                                b=y0,
+                                r=x1,
+                                t=y1,
+                                coord_origin=CoordOrigin.BOTTOMLEFT,
+                            )
                         ).to_top_left_origin(page_size.height),
                     )
                 )
@@ -97,51 +108,56 @@ class PyPdfiumPageBackend(PdfPageBackend):
         # PyPdfium2 produces very fragmented cells, with sub-word level boundaries, in many PDFs.
         # The cell merging code below is to clean this up.
         def merge_horizontal_cells(
-            cells: List[Cell],
+            cells: List[TextCell],
             horizontal_threshold_factor: float = 1.0,
             vertical_threshold_factor: float = 0.5,
-        ) -> List[Cell]:
+        ) -> List[TextCell]:
             if not cells:
                 return []
-            def group_rows(cells: List[Cell]) -> List[List[Cell]]:
+            def group_rows(cells: List[TextCell]) -> List[List[TextCell]]:
                 rows = []
                 current_row = [cells[0]]
-                row_top = cells[0].bbox.t
-                row_bottom = cells[0].bbox.b
-                row_height = cells[0].bbox.height
+                row_top = cells[0].rect.to_bounding_box().t
+                row_bottom = cells[0].rect.to_bounding_box().b
+                row_height = cells[0].rect.to_bounding_box().height
                 for cell in cells[1:]:
                     vertical_threshold = row_height * vertical_threshold_factor
                     if (
-                        abs(cell.bbox.t - row_top) <= vertical_threshold
-                        and abs(cell.bbox.b - row_bottom) <= vertical_threshold
+                        abs(cell.rect.to_bounding_box().t - row_top)
+                        <= vertical_threshold
+                        and abs(cell.rect.to_bounding_box().b - row_bottom)
+                        <= vertical_threshold
                     ):
                         current_row.append(cell)
-                        row_top = min(row_top, cell.bbox.t)
-                        row_bottom = max(row_bottom, cell.bbox.b)
+                        row_top = min(row_top, cell.rect.to_bounding_box().t)
+                        row_bottom = max(row_bottom, cell.rect.to_bounding_box().b)
                         row_height = row_bottom - row_top
                     else:
                         rows.append(current_row)
                         current_row = [cell]
-                        row_top = cell.bbox.t
-                        row_bottom = cell.bbox.b
-                        row_height = cell.bbox.height
+                        row_top = cell.rect.to_bounding_box().t
+                        row_bottom = cell.rect.to_bounding_box().b
+                        row_height = cell.rect.to_bounding_box().height
                 if current_row:
                     rows.append(current_row)
                 return rows
-            def merge_row(row: List[Cell]) -> List[Cell]:
+            def merge_row(row: List[TextCell]) -> List[TextCell]:
                 merged = []
                 current_group = [row[0]]
                 for cell in row[1:]:
                     prev_cell = current_group[-1]
-                    avg_height = (prev_cell.bbox.height + cell.bbox.height) / 2
+                    avg_height = (
+                        prev_cell.rect.height + cell.rect.to_bounding_box().height
+                    ) / 2
                     if (
-                        cell.bbox.l - prev_cell.bbox.r
+                        cell.rect.to_bounding_box().l
+                        - prev_cell.rect.to_bounding_box().r
                         <= avg_height * horizontal_threshold_factor
                     ):
                         current_group.append(cell)
@@ -154,24 +170,30 @@ class PyPdfiumPageBackend(PdfPageBackend):
                 return merged
-            def merge_group(group: List[Cell]) -> Cell:
+            def merge_group(group: List[TextCell]) -> TextCell:
                 if len(group) == 1:
                     return group[0]
                 merged_text = "".join(cell.text for cell in group)
                 merged_bbox = BoundingBox(
-                    l=min(cell.bbox.l for cell in group),
-                    t=min(cell.bbox.t for cell in group),
-                    r=max(cell.bbox.r for cell in group),
-                    b=max(cell.bbox.b for cell in group),
+                    l=min(cell.rect.to_bounding_box().l for cell in group),
+                    t=min(cell.rect.to_bounding_box().t for cell in group),
+                    r=max(cell.rect.to_bounding_box().r for cell in group),
+                    b=max(cell.rect.to_bounding_box().b for cell in group),
+                )
+                return TextCell(
+                    index=group[0].index,
+                    text=merged_text,
+                    orig=merged_text,
+                    rect=BoundingRectangle.from_bounding_box(merged_bbox),
+                    from_ocr=False,
                 )
-                return Cell(id=group[0].id, text=merged_text, bbox=merged_bbox)
             rows = group_rows(cells)
             merged_cells = [cell for row in rows for cell in merge_row(row)]
             for i, cell in enumerate(merged_cells, 1):
-                cell.id = i
+                cell.index = i
             return merged_cells
@@ -181,7 +203,7 @@ class PyPdfiumPageBackend(PdfPageBackend):
             )  # make new image to avoid drawing on the saved ones
             draw = ImageDraw.Draw(image)
             for c in cells:
-                x0, y0, x1, y1 = c.bbox.as_tuple()
+                x0, y0, x1, y1 = c.rect.to_bounding_box().as_tuple()
                 cell_color = (
                     random.randint(30, 140),
                     random.randint(30, 140),

docling/backend/xml/uspto_backend.py CHANGED Viewed

@@ -999,7 +999,7 @@ class PatentUsptoGrantAps(PatentUspto):
                     parent=self.parents[self.level],
                 )
-            last_claim.text += f" {value}" if last_claim.text else value
+            last_claim.text += f" {value.strip()}" if last_claim.text else value.strip()
         elif field == self.Field.CAPTION.value and section in (
             self.Section.SUMMARY.value,

docling/cli/main.py CHANGED Viewed

@@ -9,6 +9,7 @@ import warnings
 from pathlib import Path
 from typing import Annotated, Dict, Iterable, List, Optional, Type
+import rich.table
 import typer
 from docling_core.types.doc import ImageRefMode
 from docling_core.utils.file import resolve_source_to_path
@@ -16,6 +17,7 @@ from pydantic import TypeAdapter
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
+from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.base_models import (
@@ -29,18 +31,14 @@ from docling.datamodel.pipeline_options import (
     AcceleratorDevice,
     AcceleratorOptions,
     EasyOcrOptions,
-    OcrEngine,
-    OcrMacOptions,
     OcrOptions,
     PdfBackend,
     PdfPipelineOptions,
-    RapidOcrOptions,
     TableFormerMode,
-    TesseractCliOcrOptions,
-    TesseractOcrOptions,
 )
 from docling.datamodel.settings import settings
 from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
+from docling.models.factories import get_ocr_factory
 warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
 warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
@@ -48,8 +46,11 @@ warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr
 _log = logging.getLogger(__name__)
 from rich.console import Console
+console = Console()
 err_console = Console(stderr=True)
+ocr_factory_internal = get_ocr_factory(allow_external_plugins=False)
+ocr_engines_enum_internal = ocr_factory_internal.get_enum()
 app = typer.Typer(
     name="Docling",
@@ -77,6 +78,24 @@ def version_callback(value: bool):
         raise typer.Exit()
+def show_external_plugins_callback(value: bool):
+    if value:
+        ocr_factory_all = get_ocr_factory(allow_external_plugins=True)
+        table = rich.table.Table(title="Available OCR engines")
+        table.add_column("Name", justify="right")
+        table.add_column("Plugin")
+        table.add_column("Package")
+        for meta in ocr_factory_all.registered_meta.values():
+            if not meta.module.startswith("docling."):
+                table.add_row(
+                    f"[bold]{meta.kind}[/bold]",
+                    meta.plugin_name,
+                    meta.module.split(".")[0],
+                )
+        rich.print(table)
+        raise typer.Exit()
 def export_documents(
     conv_results: Iterable[ConversionResult],
     output_dir: Path,
@@ -195,8 +214,16 @@ def convert(
         ),
     ] = False,
     ocr_engine: Annotated[
-        OcrEngine, typer.Option(..., help="The OCR engine to use.")
-    ] = OcrEngine.EASYOCR,
+        str,
+        typer.Option(
+            ...,
+            help=(
+                f"The OCR engine to use. When --allow-external-plugins is *not* set, the available values are: "
+                f"{', '.join((o.value for o in ocr_engines_enum_internal))}. "
+                f"Use the option --show-external-plugins to see the options allowed with external plugins."
+            ),
+        ),
+    ] = EasyOcrOptions.kind,
     ocr_lang: Annotated[
         Optional[str],
         typer.Option(
@@ -240,6 +267,21 @@ def convert(
             ..., help="Must be enabled when using models connecting to remote services."
         ),
     ] = False,
+    allow_external_plugins: Annotated[
+        bool,
+        typer.Option(
+            ..., help="Must be enabled for loading modules from third-party plugins."
+        ),
+    ] = False,
+    show_external_plugins: Annotated[
+        bool,
+        typer.Option(
+            ...,
+            help="List the third-party plugins which are available when the option --allow-external-plugins is set.",
+            callback=show_external_plugins_callback,
+            is_eager=True,
+        ),
+    ] = False,
     abort_on_error: Annotated[
         bool,
         typer.Option(
@@ -367,18 +409,11 @@ def convert(
         export_txt = OutputFormat.TEXT in to_formats
         export_doctags = OutputFormat.DOCTAGS in to_formats
-        if ocr_engine == OcrEngine.EASYOCR:
-            ocr_options: OcrOptions = EasyOcrOptions(force_full_page_ocr=force_ocr)
-        elif ocr_engine == OcrEngine.TESSERACT_CLI:
-            ocr_options = TesseractCliOcrOptions(force_full_page_ocr=force_ocr)
-        elif ocr_engine == OcrEngine.TESSERACT:
-            ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr)
-        elif ocr_engine == OcrEngine.OCRMAC:
-            ocr_options = OcrMacOptions(force_full_page_ocr=force_ocr)
-        elif ocr_engine == OcrEngine.RAPIDOCR:
-            ocr_options = RapidOcrOptions(force_full_page_ocr=force_ocr)
-        else:
-            raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}")
+        ocr_factory = get_ocr_factory(allow_external_plugins=allow_external_plugins)
+        ocr_options: OcrOptions = ocr_factory.create_options(  # type: ignore
+            kind=ocr_engine,
+            force_full_page_ocr=force_ocr,
+        )
         ocr_lang_list = _split_list(ocr_lang)
         if ocr_lang_list is not None:
@@ -386,6 +421,7 @@ def convert(
         accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device)
         pipeline_options = PdfPipelineOptions(
+            allow_external_plugins=allow_external_plugins,
             enable_remote_services=enable_remote_services,
             accelerator_options=accelerator_options,
             do_ocr=ocr,
@@ -412,12 +448,15 @@ def convert(
         if artifacts_path is not None:
             pipeline_options.artifacts_path = artifacts_path
+        backend: Type[PdfDocumentBackend]
         if pdf_backend == PdfBackend.DLPARSE_V1:
-            backend: Type[PdfDocumentBackend] = DoclingParseDocumentBackend
+            backend = DoclingParseDocumentBackend
         elif pdf_backend == PdfBackend.DLPARSE_V2:
             backend = DoclingParseV2DocumentBackend
+        elif pdf_backend == PdfBackend.DLPARSE_V4:
+            backend = DoclingParseV4DocumentBackend  # type: ignore
         elif pdf_backend == PdfBackend.PYPDFIUM2:
-            backend = PyPdfiumDocumentBackend
+            backend = PyPdfiumDocumentBackend  # type: ignore
         else:
             raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}")

docling/cli/models.py CHANGED Viewed

@@ -121,7 +121,7 @@ def download(
             "Using the CLI:",
             f"`docling --artifacts-path={output_dir} FILE`",
             "\n",
-            "Using Python: see the documentation at <https://ds4sd.github.io/docling/usage>.",
+            "Using Python: see the documentation at <https://docling-project.github.io/docling/usage>.",
         )

docling/datamodel/base_models.py CHANGED Viewed

@@ -9,6 +9,7 @@ from docling_core.types.doc import (
     Size,
     TableCell,
 )
+from docling_core.types.doc.page import SegmentedPdfPage, TextCell
 from docling_core.types.io import (  # DO ΝΟΤ REMOVE; explicitly exposed from this location
     DocumentStream,
 )
@@ -123,14 +124,10 @@ class ErrorItem(BaseModel):
     error_message: str
-class Cell(BaseModel):
-    id: int
-    text: str
-    bbox: BoundingBox
-class OcrCell(Cell):
-    confidence: float
+# class Cell(BaseModel):
+#    id: int
+#    text: str
+#    bbox: BoundingBox
 class Cluster(BaseModel):
@@ -138,7 +135,7 @@ class Cluster(BaseModel):
     label: DocItemLabel
     bbox: BoundingBox
     confidence: float = 1.0
-    cells: List[Cell] = []
+    cells: List[TextCell] = []
     children: List["Cluster"] = []  # Add child cluster support
@@ -226,7 +223,8 @@ class Page(BaseModel):
     page_no: int
     # page_hash: Optional[str] = None
     size: Optional[Size] = None
-    cells: List[Cell] = []
+    cells: List[TextCell] = []
+    parsed_page: Optional[SegmentedPdfPage] = None
     predictions: PagePredictions = PagePredictions()
     assembled: Optional[AssembledUnit] = None

docling/datamodel/pipeline_options.py CHANGED Viewed

@@ -1,10 +1,9 @@
 import logging
 import os
 import re
-import warnings
 from enum import Enum
 from pathlib import Path
-from typing import Annotated, Any, Dict, List, Literal, Optional, Union
+from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
 from pydantic import (
     AnyUrl,
@@ -13,13 +12,8 @@ from pydantic import (
     Field,
     field_validator,
     model_validator,
-    validator,
-)
-from pydantic_settings import (
-    BaseSettings,
-    PydanticBaseSettingsSource,
-    SettingsConfigDict,
 )
+from pydantic_settings import BaseSettings, SettingsConfigDict
 from typing_extensions import deprecated
 _log = logging.getLogger(__name__)
@@ -83,6 +77,12 @@ class AcceleratorOptions(BaseSettings):
         return data
+class BaseOptions(BaseModel):
+    """Base class for options."""
+    kind: ClassVar[str]
 class TableFormerMode(str, Enum):
     """Modes for the TableFormer model."""
@@ -102,10 +102,9 @@ class TableStructureOptions(BaseModel):
     mode: TableFormerMode = TableFormerMode.ACCURATE
-class OcrOptions(BaseModel):
+class OcrOptions(BaseOptions):
     """OCR options."""
-    kind: str
     lang: List[str]
     force_full_page_ocr: bool = False  # If enabled a full page OCR is always applied
     bitmap_area_threshold: float = (
@@ -116,7 +115,7 @@ class OcrOptions(BaseModel):
 class RapidOcrOptions(OcrOptions):
     """Options for the RapidOCR engine."""
-    kind: Literal["rapidocr"] = "rapidocr"
+    kind: ClassVar[Literal["rapidocr"]] = "rapidocr"
     # English and chinese are the most commly used models and have been tested with RapidOCR.
     lang: List[str] = [
@@ -155,7 +154,7 @@ class RapidOcrOptions(OcrOptions):
 class EasyOcrOptions(OcrOptions):
     """Options for the EasyOCR engine."""
-    kind: Literal["easyocr"] = "easyocr"
+    kind: ClassVar[Literal["easyocr"]] = "easyocr"
     lang: List[str] = ["fr", "de", "es", "en"]
     use_gpu: Optional[bool] = None
@@ -175,7 +174,7 @@ class EasyOcrOptions(OcrOptions):
 class TesseractCliOcrOptions(OcrOptions):
     """Options for the TesseractCli engine."""
-    kind: Literal["tesseract"] = "tesseract"
+    kind: ClassVar[Literal["tesseract"]] = "tesseract"
     lang: List[str] = ["fra", "deu", "spa", "eng"]
     tesseract_cmd: str = "tesseract"
     path: Optional[str] = None
@@ -188,7 +187,7 @@ class TesseractCliOcrOptions(OcrOptions):
 class TesseractOcrOptions(OcrOptions):
     """Options for the Tesseract engine."""
-    kind: Literal["tesserocr"] = "tesserocr"
+    kind: ClassVar[Literal["tesserocr"]] = "tesserocr"
     lang: List[str] = ["fra", "deu", "spa", "eng"]
     path: Optional[str] = None
@@ -200,7 +199,7 @@ class TesseractOcrOptions(OcrOptions):
 class OcrMacOptions(OcrOptions):
     """Options for the Mac OCR engine."""
-    kind: Literal["ocrmac"] = "ocrmac"
+    kind: ClassVar[Literal["ocrmac"]] = "ocrmac"
     lang: List[str] = ["fr-FR", "de-DE", "es-ES", "en-US"]
     recognition: str = "accurate"
     framework: str = "vision"
@@ -210,8 +209,7 @@ class OcrMacOptions(OcrOptions):
     )
-class PictureDescriptionBaseOptions(BaseModel):
-    kind: str
+class PictureDescriptionBaseOptions(BaseOptions):
     batch_size: int = 8
     scale: float = 2
@@ -221,7 +219,7 @@ class PictureDescriptionBaseOptions(BaseModel):
 class PictureDescriptionApiOptions(PictureDescriptionBaseOptions):
-    kind: Literal["api"] = "api"
+    kind: ClassVar[Literal["api"]] = "api"
     url: AnyUrl = AnyUrl("http://localhost:8000/v1/chat/completions")
     headers: Dict[str, str] = {}
@@ -233,7 +231,7 @@ class PictureDescriptionApiOptions(PictureDescriptionBaseOptions):
 class PictureDescriptionVlmOptions(PictureDescriptionBaseOptions):
-    kind: Literal["vlm"] = "vlm"
+    kind: ClassVar[Literal["vlm"]] = "vlm"
     repo_id: str
     prompt: str = "Describe this image in a few sentences."
@@ -301,9 +299,11 @@ class PdfBackend(str, Enum):
     PYPDFIUM2 = "pypdfium2"
     DLPARSE_V1 = "dlparse_v1"
     DLPARSE_V2 = "dlparse_v2"
+    DLPARSE_V4 = "dlparse_v4"
 # Define an enum for the ocr engines
+@deprecated("Use ocr_factory.registered_enum")
 class OcrEngine(str, Enum):
     """Enum of valid OCR engines."""
@@ -323,6 +323,7 @@ class PipelineOptions(BaseModel):
     document_timeout: Optional[float] = None
     accelerator_options: AcceleratorOptions = AcceleratorOptions()
     enable_remote_services: bool = False
+    allow_external_plugins: bool = False
 class PaginatedPipelineOptions(PipelineOptions):
@@ -358,17 +359,10 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
     # If True, text from backend will be used instead of generated text
     table_structure_options: TableStructureOptions = TableStructureOptions()
-    ocr_options: Union[
-        EasyOcrOptions,
-        TesseractCliOcrOptions,
-        TesseractOcrOptions,
-        OcrMacOptions,
-        RapidOcrOptions,
-    ] = Field(EasyOcrOptions(), discriminator="kind")
-    picture_description_options: Annotated[
-        Union[PictureDescriptionApiOptions, PictureDescriptionVlmOptions],
-        Field(discriminator="kind"),
-    ] = smolvlm_picture_description
+    ocr_options: OcrOptions = EasyOcrOptions()
+    picture_description_options: PictureDescriptionBaseOptions = (
+        smolvlm_picture_description
+    )
     images_scale: float = 1.0
     generate_page_images: bool = False
@@ -381,3 +375,5 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
             "before conversion and then use the `TableItem.get_image` function."
         ),
     )
+    generate_parsed_pages: bool = False

docling/document_converter.py CHANGED Viewed

@@ -11,7 +11,7 @@ from pydantic import BaseModel, ConfigDict, model_validator, validate_call
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.asciidoc_backend import AsciiDocBackend
 from docling.backend.csv_backend import CsvDocumentBackend
-from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
+from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.backend.html_backend import HTMLDocumentBackend
 from docling.backend.json.docling_json_backend import DoclingJSONBackend
 from docling.backend.md_backend import MarkdownDocumentBackend
@@ -109,12 +109,12 @@ class XMLJatsFormatOption(FormatOption):
 class ImageFormatOption(FormatOption):
     pipeline_cls: Type = StandardPdfPipeline
-    backend: Type[AbstractDocumentBackend] = DoclingParseV2DocumentBackend
+    backend: Type[AbstractDocumentBackend] = DoclingParseV4DocumentBackend
 class PdfFormatOption(FormatOption):
     pipeline_cls: Type = StandardPdfPipeline
-    backend: Type[AbstractDocumentBackend] = DoclingParseV2DocumentBackend
+    backend: Type[AbstractDocumentBackend] = DoclingParseV4DocumentBackend
 def _get_default_option(format: InputFormat) -> FormatOption:
@@ -147,10 +147,10 @@ def _get_default_option(format: InputFormat) -> FormatOption:
             pipeline_cls=SimplePipeline, backend=JatsDocumentBackend
         ),
         InputFormat.IMAGE: FormatOption(
-            pipeline_cls=StandardPdfPipeline, backend=DoclingParseV2DocumentBackend
+            pipeline_cls=StandardPdfPipeline, backend=DoclingParseV4DocumentBackend
         ),
         InputFormat.PDF: FormatOption(
-            pipeline_cls=StandardPdfPipeline, backend=DoclingParseV2DocumentBackend
+            pipeline_cls=StandardPdfPipeline, backend=DoclingParseV4DocumentBackend
         ),
         InputFormat.JSON_DOCLING: FormatOption(
             pipeline_cls=SimplePipeline, backend=DoclingJSONBackend

docling/models/base_model.py CHANGED Viewed

@@ -1,14 +1,22 @@
 from abc import ABC, abstractmethod
-from typing import Any, Generic, Iterable, Optional
+from typing import Any, Generic, Iterable, Optional, Protocol, Type
 from docling_core.types.doc import BoundingBox, DocItem, DoclingDocument, NodeItem
 from typing_extensions import TypeVar
 from docling.datamodel.base_models import ItemAndImageEnrichmentElement, Page
 from docling.datamodel.document import ConversionResult
+from docling.datamodel.pipeline_options import BaseOptions
 from docling.datamodel.settings import settings
+class BaseModelWithOptions(Protocol):
+    @classmethod
+    def get_options_type(cls) -> Type[BaseOptions]: ...
+    def __init__(self, *, options: BaseOptions, **kwargs): ...
 class BasePageModel(ABC):
     @abstractmethod
     def __call__(

docling 2.26.0__py3-none-any.whl → 2.27.0__py3-none-any.whl

docling 2.26.0py3-none-any.whl → 2.27.0py3-none-any.whl