PyPI - docling - Versions diffs - 2.25.2__py3-none-any.whl → 2.27.0__py3-none-any.whl - Mend

docling 2.25.2py3-none-any.whl → 2.27.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

docling/backend/asciidoc_backend.py +1 -1
docling/backend/csv_backend.py +1 -1
docling/backend/docling_parse_backend.py +21 -13
docling/backend/docling_parse_v2_backend.py +20 -12
docling/backend/docling_parse_v4_backend.py +185 -0
docling/backend/docx/__init__.py +0 -0
docling/backend/docx/latex/__init__.py +0 -0
docling/backend/docx/latex/latex_dict.py +271 -0
docling/backend/docx/latex/omml.py +453 -0
docling/backend/html_backend.py +7 -7
docling/backend/md_backend.py +1 -1
docling/backend/msexcel_backend.py +2 -45
docling/backend/mspowerpoint_backend.py +1 -1
docling/backend/msword_backend.py +65 -3
docling/backend/pdf_backend.py +7 -2
docling/backend/pypdfium2_backend.py +52 -30
docling/backend/xml/uspto_backend.py +1 -1
docling/cli/main.py +62 -23
docling/cli/models.py +1 -1
docling/datamodel/base_models.py +8 -10
docling/datamodel/pipeline_options.py +27 -31
docling/document_converter.py +5 -5
docling/models/base_model.py +9 -1
docling/models/base_ocr_model.py +27 -16
docling/models/code_formula_model.py +84 -5
docling/models/document_picture_classifier.py +1 -1
docling/models/easyocr_model.py +28 -13
docling/models/factories/__init__.py +27 -0
docling/models/factories/base_factory.py +122 -0
docling/models/factories/ocr_factory.py +11 -0
docling/models/factories/picture_description_factory.py +11 -0
docling/models/ocr_mac_model.py +39 -11
docling/models/page_preprocessing_model.py +4 -0
docling/models/picture_description_api_model.py +20 -3
docling/models/picture_description_base_model.py +19 -3
docling/models/picture_description_vlm_model.py +14 -2
docling/models/plugins/__init__.py +0 -0
docling/models/plugins/defaults.py +28 -0
docling/models/rapid_ocr_model.py +34 -13
docling/models/table_structure_model.py +14 -5
docling/models/tesseract_ocr_cli_model.py +40 -15
docling/models/tesseract_ocr_model.py +37 -12
docling/pipeline/standard_pdf_pipeline.py +25 -78
docling/utils/export.py +8 -6
docling/utils/layout_postprocessor.py +26 -23
docling/utils/visualization.py +1 -1
{docling-2.25.2.dist-info → docling-2.27.0.dist-info}/METADATA +48 -19
docling-2.27.0.dist-info/RECORD +83 -0
{docling-2.25.2.dist-info → docling-2.27.0.dist-info}/entry_points.txt +3 -0
docling-2.25.2.dist-info/RECORD +0 -72
{docling-2.25.2.dist-info → docling-2.27.0.dist-info}/LICENSE +0 -0
{docling-2.25.2.dist-info → docling-2.27.0.dist-info}/WHEEL +0 -0

docling/models/picture_description_base_model.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import logging
+from abc import abstractmethod
 from pathlib import Path
-from typing import Any, Iterable, List, Optional, Union
+from typing import Any, Iterable, List, Optional, Type, Union
 from docling_core.types.doc import (
     DoclingDocument,
@@ -13,20 +14,30 @@ from docling_core.types.doc.document import (  # TODO: move import to docling_co
 )
 from PIL import Image
-from docling.datamodel.pipeline_options import PictureDescriptionBaseOptions
+from docling.datamodel.pipeline_options import (
+    AcceleratorOptions,
+    PictureDescriptionBaseOptions,
+)
 from docling.models.base_model import (
     BaseItemAndImageEnrichmentModel,
+    BaseModelWithOptions,
     ItemAndImageEnrichmentElement,
 )
-class PictureDescriptionBaseModel(BaseItemAndImageEnrichmentModel):
+class PictureDescriptionBaseModel(
+    BaseItemAndImageEnrichmentModel, BaseModelWithOptions
+):
     images_scale: float = 2.0
     def __init__(
         self,
+        *,
         enabled: bool,
+        enable_remote_services: bool,
+        artifacts_path: Optional[Union[Path, str]],
         options: PictureDescriptionBaseOptions,
+        accelerator_options: AcceleratorOptions,
     ):
         self.enabled = enabled
         self.options = options
@@ -62,3 +73,8 @@ class PictureDescriptionBaseModel(BaseItemAndImageEnrichmentModel):
                 PictureDescriptionData(text=output, provenance=self.provenance)
             )
             yield item
+    @classmethod
+    @abstractmethod
+    def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
+        pass

docling/models/picture_description_vlm_model.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from pathlib import Path
-from typing import Iterable, Optional, Union
+from typing import Iterable, Optional, Type, Union
 from PIL import Image
 from docling.datamodel.pipeline_options import (
     AcceleratorOptions,
+    PictureDescriptionBaseOptions,
     PictureDescriptionVlmOptions,
 )
 from docling.models.picture_description_base_model import PictureDescriptionBaseModel
@@ -13,14 +14,25 @@ from docling.utils.accelerator_utils import decide_device
 class PictureDescriptionVlmModel(PictureDescriptionBaseModel):
+    @classmethod
+    def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
+        return PictureDescriptionVlmOptions
     def __init__(
         self,
         enabled: bool,
+        enable_remote_services: bool,
         artifacts_path: Optional[Union[Path, str]],
         options: PictureDescriptionVlmOptions,
         accelerator_options: AcceleratorOptions,
     ):
-        super().__init__(enabled=enabled, options=options)
+        super().__init__(
+            enabled=enabled,
+            enable_remote_services=enable_remote_services,
+            artifacts_path=artifacts_path,
+            options=options,
+            accelerator_options=accelerator_options,
+        )
         self.options: PictureDescriptionVlmOptions
         if self.enabled:

docling/models/plugins/__init__.py ADDED Viewed

File without changes

docling/models/plugins/defaults.py ADDED Viewed

@@ -0,0 +1,28 @@
+from docling.models.easyocr_model import EasyOcrModel
+from docling.models.ocr_mac_model import OcrMacModel
+from docling.models.picture_description_api_model import PictureDescriptionApiModel
+from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
+from docling.models.rapid_ocr_model import RapidOcrModel
+from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
+from docling.models.tesseract_ocr_model import TesseractOcrModel
+def ocr_engines():
+    return {
+        "ocr_engines": [
+            EasyOcrModel,
+            OcrMacModel,
+            RapidOcrModel,
+            TesseractOcrModel,
+            TesseractOcrCliModel,
+        ]
+    }
+def picture_description():
+    return {
+        "picture_description": [
+            PictureDescriptionVlmModel,
+            PictureDescriptionApiModel,
+        ]
+    }

docling/models/rapid_ocr_model.py CHANGED Viewed

@@ -1,14 +1,17 @@
 import logging
-from typing import Iterable
+from pathlib import Path
+from typing import Iterable, Optional, Type
 import numpy
 from docling_core.types.doc import BoundingBox, CoordOrigin
+from docling_core.types.doc.page import BoundingRectangle, TextCell
-from docling.datamodel.base_models import OcrCell, Page
+from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
     AcceleratorDevice,
     AcceleratorOptions,
+    OcrOptions,
     RapidOcrOptions,
 )
 from docling.datamodel.settings import settings
@@ -23,10 +26,16 @@ class RapidOcrModel(BaseOcrModel):
     def __init__(
         self,
         enabled: bool,
+        artifacts_path: Optional[Path],
         options: RapidOcrOptions,
         accelerator_options: AcceleratorOptions,
     ):
-        super().__init__(enabled=enabled, options=options)
+        super().__init__(
+            enabled=enabled,
+            artifacts_path=artifacts_path,
+            options=options,
+            accelerator_options=accelerator_options,
+        )
         self.options: RapidOcrOptions
         self.scale = 3  # multiplier for 72 dpi == 216 dpi.
@@ -100,18 +109,26 @@ class RapidOcrModel(BaseOcrModel):
                         if result is not None:
                             cells = [
-                                OcrCell(
-                                    id=ix,
+                                TextCell(
+                                    index=ix,
                                     text=line[1],
+                                    orig=line[1],
                                     confidence=line[2],
-                                    bbox=BoundingBox.from_tuple(
-                                        coord=(
-                                            (line[0][0][0] / self.scale) + ocr_rect.l,
-                                            (line[0][0][1] / self.scale) + ocr_rect.t,
-                                            (line[0][2][0] / self.scale) + ocr_rect.l,
-                                            (line[0][2][1] / self.scale) + ocr_rect.t,
-                                        ),
-                                        origin=CoordOrigin.TOPLEFT,
+                                    from_ocr=True,
+                                    rect=BoundingRectangle.from_bounding_box(
+                                        BoundingBox.from_tuple(
+                                            coord=(
+                                                (line[0][0][0] / self.scale)
+                                                + ocr_rect.l,
+                                                (line[0][0][1] / self.scale)
+                                                + ocr_rect.t,
+                                                (line[0][2][0] / self.scale)
+                                                + ocr_rect.l,
+                                                (line[0][2][1] / self.scale)
+                                                + ocr_rect.t,
+                                            ),
+                                            origin=CoordOrigin.TOPLEFT,
+                                        )
                                     ),
                                 )
                                 for ix, line in enumerate(result)
@@ -126,3 +143,7 @@ class RapidOcrModel(BaseOcrModel):
                     self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
                 yield page
+    @classmethod
+    def get_options_type(cls) -> Type[OcrOptions]:
+        return RapidOcrOptions

docling/models/table_structure_model.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Iterable, Optional, Union
 import numpy
 from docling_core.types.doc import BoundingBox, DocItemLabel, TableCell
+from docling_core.types.doc.page import BoundingRectangle
 from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
 from PIL import ImageDraw
@@ -95,7 +96,7 @@ class TableStructureModel(BasePageModel):
             repo_id="ds4sd/docling-models",
             force_download=force,
             local_dir=local_dir,
-            revision="v2.1.0",
+            revision="v2.2.0",
         )
         return Path(download_path)
@@ -129,7 +130,7 @@ class TableStructureModel(BasePageModel):
             draw.rectangle([(x0, y0), (x1, y1)], outline="red")
             for cell in table_element.cluster.cells:
-                x0, y0, x1, y1 = cell.bbox.as_tuple()
+                x0, y0, x1, y1 = cell.rect.to_bounding_box().as_tuple()
                 x0 *= scale_x
                 x1 *= scale_x
                 y0 *= scale_x
@@ -223,11 +224,19 @@ class TableStructureModel(BasePageModel):
                                 # Only allow non empty stings (spaces) into the cells of a table
                                 if len(c.text.strip()) > 0:
                                     new_cell = copy.deepcopy(c)
-                                    new_cell.bbox = new_cell.bbox.scaled(
-                                        scale=self.scale
+                                    new_cell.rect = BoundingRectangle.from_bounding_box(
+                                        new_cell.rect.to_bounding_box().scaled(
+                                            scale=self.scale
+                                        )
                                     )
-                                    tokens.append(new_cell.model_dump())
+                                    tokens.append(
+                                        {
+                                            "id": new_cell.index,
+                                            "text": new_cell.text,
+                                            "bbox": new_cell.rect.to_bounding_box().model_dump(),
+                                        }
+                                    )
                             page_input["tokens"] = tokens
                             tf_output = self.tf_predictor.multi_table_predict(

docling/models/tesseract_ocr_cli_model.py CHANGED Viewed

@@ -3,15 +3,21 @@ import io
 import logging
 import os
 import tempfile
+from pathlib import Path
 from subprocess import DEVNULL, PIPE, Popen
-from typing import Iterable, List, Optional, Tuple
+from typing import Iterable, List, Optional, Tuple, Type
 import pandas as pd
 from docling_core.types.doc import BoundingBox, CoordOrigin
+from docling_core.types.doc.page import BoundingRectangle, TextCell
-from docling.datamodel.base_models import Cell, OcrCell, Page
+from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import TesseractCliOcrOptions
+from docling.datamodel.pipeline_options import (
+    AcceleratorOptions,
+    OcrOptions,
+    TesseractCliOcrOptions,
+)
 from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
 from docling.utils.ocr_utils import map_tesseract_script
@@ -21,8 +27,19 @@ _log = logging.getLogger(__name__)
 class TesseractOcrCliModel(BaseOcrModel):
-    def __init__(self, enabled: bool, options: TesseractCliOcrOptions):
-        super().__init__(enabled=enabled, options=options)
+    def __init__(
+        self,
+        enabled: bool,
+        artifacts_path: Optional[Path],
+        options: TesseractCliOcrOptions,
+        accelerator_options: AcceleratorOptions,
+    ):
+        super().__init__(
+            enabled=enabled,
+            artifacts_path=artifacts_path,
+            options=options,
+            accelerator_options=accelerator_options,
+        )
         self.options: TesseractCliOcrOptions
         self.scale = 3  # multiplier for 72 dpi == 216 dpi.
@@ -228,18 +245,22 @@ class TesseractOcrCliModel(BaseOcrModel):
                             t = b + h
                             r = l + w
-                            cell = OcrCell(
-                                id=ix,
+                            cell = TextCell(
+                                index=ix,
                                 text=text,
+                                orig=text,
+                                from_ocr=True,
                                 confidence=conf / 100.0,
-                                bbox=BoundingBox.from_tuple(
-                                    coord=(
-                                        (l / self.scale) + ocr_rect.l,
-                                        (b / self.scale) + ocr_rect.t,
-                                        (r / self.scale) + ocr_rect.l,
-                                        (t / self.scale) + ocr_rect.t,
-                                    ),
-                                    origin=CoordOrigin.TOPLEFT,
+                                rect=BoundingRectangle.from_bounding_box(
+                                    BoundingBox.from_tuple(
+                                        coord=(
+                                            (l / self.scale) + ocr_rect.l,
+                                            (b / self.scale) + ocr_rect.t,
+                                            (r / self.scale) + ocr_rect.l,
+                                            (t / self.scale) + ocr_rect.t,
+                                        ),
+                                        origin=CoordOrigin.TOPLEFT,
+                                    )
                                 ),
                             )
                             all_ocr_cells.append(cell)
@@ -252,3 +273,7 @@ class TesseractOcrCliModel(BaseOcrModel):
                     self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
                 yield page
+    @classmethod
+    def get_options_type(cls) -> Type[OcrOptions]:
+        return TesseractCliOcrOptions

docling/models/tesseract_ocr_model.py CHANGED Viewed

@@ -1,11 +1,17 @@
 import logging
-from typing import Iterable
+from pathlib import Path
+from typing import Iterable, Optional, Type
 from docling_core.types.doc import BoundingBox, CoordOrigin
+from docling_core.types.doc.page import BoundingRectangle, TextCell
-from docling.datamodel.base_models import Cell, OcrCell, Page
+from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import TesseractOcrOptions
+from docling.datamodel.pipeline_options import (
+    AcceleratorOptions,
+    OcrOptions,
+    TesseractOcrOptions,
+)
 from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
 from docling.utils.ocr_utils import map_tesseract_script
@@ -15,8 +21,19 @@ _log = logging.getLogger(__name__)
 class TesseractOcrModel(BaseOcrModel):
-    def __init__(self, enabled: bool, options: TesseractOcrOptions):
-        super().__init__(enabled=enabled, options=options)
+    def __init__(
+        self,
+        enabled: bool,
+        artifacts_path: Optional[Path],
+        options: TesseractOcrOptions,
+        accelerator_options: AcceleratorOptions,
+    ):
+        super().__init__(
+            enabled=enabled,
+            artifacts_path=artifacts_path,
+            options=options,
+            accelerator_options=accelerator_options,
+        )
         self.options: TesseractOcrOptions
         self.scale = 3  # multiplier for 72 dpi == 216 dpi.
@@ -31,14 +48,14 @@ class TesseractOcrModel(BaseOcrModel):
                 "Note that tesserocr might have to be manually compiled for working with "
                 "your Tesseract installation. The Docling documentation provides examples for it. "
                 "Alternatively, Docling has support for other OCR engines. See the documentation: "
-                "https://ds4sd.github.io/docling/installation/"
+                "https://docling-project.github.io/docling/installation/"
             )
             missing_langs_errmsg = (
                 "tesserocr is not correctly configured. No language models have been detected. "
                 "Please ensure that the TESSDATA_PREFIX envvar points to tesseract languages dir. "
                 "You can find more information how to setup other OCR engines in Docling "
                 "documentation: "
-                "https://ds4sd.github.io/docling/installation/"
+                "https://docling-project.github.io/docling/installation/"
             )
             try:
@@ -173,13 +190,17 @@ class TesseractOcrModel(BaseOcrModel):
                             top = (box["y"] + box["h"]) / self.scale
                             cells.append(
-                                OcrCell(
-                                    id=ix,
+                                TextCell(
+                                    index=ix,
                                     text=text,
+                                    orig=text,
+                                    from_ocr=True,
                                     confidence=confidence,
-                                    bbox=BoundingBox.from_tuple(
-                                        coord=(left, top, right, bottom),
-                                        origin=CoordOrigin.TOPLEFT,
+                                    rect=BoundingRectangle.from_bounding_box(
+                                        BoundingBox.from_tuple(
+                                            coord=(left, top, right, bottom),
+                                            origin=CoordOrigin.TOPLEFT,
+                                        ),
                                     ),
                                 )
                             )
@@ -195,3 +216,7 @@ class TesseractOcrModel(BaseOcrModel):
                     self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
                 yield page
+    @classmethod
+    def get_options_type(cls) -> Type[OcrOptions]:
+        return TesseractOcrOptions

docling/pipeline/standard_pdf_pipeline.py CHANGED Viewed

@@ -10,16 +10,7 @@ from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.datamodel.base_models import AssembledUnit, Page
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import (
-    EasyOcrOptions,
-    OcrMacOptions,
-    PdfPipelineOptions,
-    PictureDescriptionApiOptions,
-    PictureDescriptionVlmOptions,
-    RapidOcrOptions,
-    TesseractCliOcrOptions,
-    TesseractOcrOptions,
-)
+from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
 from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions
@@ -27,22 +18,16 @@ from docling.models.document_picture_classifier import (
     DocumentPictureClassifier,
     DocumentPictureClassifierOptions,
 )
-from docling.models.easyocr_model import EasyOcrModel
+from docling.models.factories import get_ocr_factory, get_picture_description_factory
 from docling.models.layout_model import LayoutModel
-from docling.models.ocr_mac_model import OcrMacModel
 from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
 from docling.models.page_preprocessing_model import (
     PagePreprocessingModel,
     PagePreprocessingOptions,
 )
-from docling.models.picture_description_api_model import PictureDescriptionApiModel
 from docling.models.picture_description_base_model import PictureDescriptionBaseModel
-from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
-from docling.models.rapid_ocr_model import RapidOcrModel
 from docling.models.readingorder_model import ReadingOrderModel, ReadingOrderOptions
 from docling.models.table_structure_model import TableStructureModel
-from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
-from docling.models.tesseract_ocr_model import TesseractOcrModel
 from docling.pipeline.base_pipeline import PaginatedPipeline
 from docling.utils.model_downloader import download_models
 from docling.utils.profiling import ProfilingScope, TimeRecorder
@@ -78,16 +63,14 @@ class StandardPdfPipeline(PaginatedPipeline):
         self.glm_model = ReadingOrderModel(options=ReadingOrderOptions())
-        if (ocr_model := self.get_ocr_model(artifacts_path=artifacts_path)) is None:
-            raise RuntimeError(
-                f"The specified OCR kind is not supported: {pipeline_options.ocr_options.kind}."
-            )
+        ocr_model = self.get_ocr_model(artifacts_path=artifacts_path)
         self.build_pipe = [
             # Pre-processing
             PagePreprocessingModel(
                 options=PagePreprocessingOptions(
-                    images_scale=pipeline_options.images_scale
+                    images_scale=pipeline_options.images_scale,
+                    create_parsed_page=pipeline_options.generate_parsed_pages,
                 )
             ),
             # OCR
@@ -163,66 +146,30 @@ class StandardPdfPipeline(PaginatedPipeline):
         output_dir = download_models(output_dir=local_dir, force=force, progress=False)
         return output_dir
-    def get_ocr_model(
-        self, artifacts_path: Optional[Path] = None
-    ) -> Optional[BaseOcrModel]:
-        if isinstance(self.pipeline_options.ocr_options, EasyOcrOptions):
-            return EasyOcrModel(
-                enabled=self.pipeline_options.do_ocr,
-                artifacts_path=artifacts_path,
-                options=self.pipeline_options.ocr_options,
-                accelerator_options=self.pipeline_options.accelerator_options,
-            )
-        elif isinstance(self.pipeline_options.ocr_options, TesseractCliOcrOptions):
-            return TesseractOcrCliModel(
-                enabled=self.pipeline_options.do_ocr,
-                options=self.pipeline_options.ocr_options,
-            )
-        elif isinstance(self.pipeline_options.ocr_options, TesseractOcrOptions):
-            return TesseractOcrModel(
-                enabled=self.pipeline_options.do_ocr,
-                options=self.pipeline_options.ocr_options,
-            )
-        elif isinstance(self.pipeline_options.ocr_options, RapidOcrOptions):
-            return RapidOcrModel(
-                enabled=self.pipeline_options.do_ocr,
-                options=self.pipeline_options.ocr_options,
-                accelerator_options=self.pipeline_options.accelerator_options,
-            )
-        elif isinstance(self.pipeline_options.ocr_options, OcrMacOptions):
-            if "darwin" != sys.platform:
-                raise RuntimeError(
-                    f"The specified OCR type is only supported on Mac: {self.pipeline_options.ocr_options.kind}."
-                )
-            return OcrMacModel(
-                enabled=self.pipeline_options.do_ocr,
-                options=self.pipeline_options.ocr_options,
-            )
-        return None
+    def get_ocr_model(self, artifacts_path: Optional[Path] = None) -> BaseOcrModel:
+        factory = get_ocr_factory(
+            allow_external_plugins=self.pipeline_options.allow_external_plugins
+        )
+        return factory.create_instance(
+            options=self.pipeline_options.ocr_options,
+            enabled=self.pipeline_options.do_ocr,
+            artifacts_path=artifacts_path,
+            accelerator_options=self.pipeline_options.accelerator_options,
+        )
     def get_picture_description_model(
         self, artifacts_path: Optional[Path] = None
     ) -> Optional[PictureDescriptionBaseModel]:
-        if isinstance(
-            self.pipeline_options.picture_description_options,
-            PictureDescriptionApiOptions,
-        ):
-            return PictureDescriptionApiModel(
-                enabled=self.pipeline_options.do_picture_description,
-                enable_remote_services=self.pipeline_options.enable_remote_services,
-                options=self.pipeline_options.picture_description_options,
-            )
-        elif isinstance(
-            self.pipeline_options.picture_description_options,
-            PictureDescriptionVlmOptions,
-        ):
-            return PictureDescriptionVlmModel(
-                enabled=self.pipeline_options.do_picture_description,
-                artifacts_path=artifacts_path,
-                options=self.pipeline_options.picture_description_options,
-                accelerator_options=self.pipeline_options.accelerator_options,
-            )
-        return None
+        factory = get_picture_description_factory(
+            allow_external_plugins=self.pipeline_options.allow_external_plugins
+        )
+        return factory.create_instance(
+            options=self.pipeline_options.picture_description_options,
+            enabled=self.pipeline_options.do_picture_description,
+            enable_remote_services=self.pipeline_options.enable_remote_services,
+            artifacts_path=artifacts_path,
+            accelerator_options=self.pipeline_options.accelerator_options,
+        )
     def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
         with TimeRecorder(conv_res, "page_init"):

docling/utils/export.py CHANGED Viewed

@@ -2,9 +2,9 @@ import logging
 from typing import Any, Dict, Iterable, List, Tuple, Union
 from docling_core.types.doc import BoundingBox, CoordOrigin
+from docling_core.types.doc.page import TextCell
 from docling_core.types.legacy_doc.base import BaseCell, BaseText, Ref, Table
-from docling.datamodel.base_models import OcrCell
 from docling.datamodel.document import ConversionResult, Page
 _log = logging.getLogger(__name__)
@@ -86,11 +86,13 @@ def generate_multimodal_pages(
         if page.size is None:
             return cells
         for cell in page.cells:
-            new_bbox = cell.bbox.to_top_left_origin(
-                page_height=page.size.height
-            ).normalized(page_size=page.size)
-            is_ocr = isinstance(cell, OcrCell)
-            ocr_confidence = cell.confidence if isinstance(cell, OcrCell) else 1.0
+            new_bbox = (
+                cell.rect.to_bounding_box()
+                .to_top_left_origin(page_height=page.size.height)
+                .normalized(page_size=page.size)
+            )
+            is_ocr = cell.from_ocr
+            ocr_confidence = cell.confidence
             cells.append(
                 {
                     "text": cell.text,

docling 2.25.2__py3-none-any.whl → 2.27.0__py3-none-any.whl

docling 2.25.2py3-none-any.whl → 2.27.0py3-none-any.whl