PyPI - docling - Versions diffs - 2.26.0__py3-none-any.whl → 2.28.0__py3-none-any.whl - Mend

docling 2.26.0py3-none-any.whl → 2.28.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

docling/backend/asciidoc_backend.py +1 -1
docling/backend/csv_backend.py +1 -1
docling/backend/docling_parse_backend.py +21 -13
docling/backend/docling_parse_v2_backend.py +20 -12
docling/backend/docling_parse_v4_backend.py +192 -0
docling/backend/docx/__init__.py +0 -0
docling/backend/docx/latex/__init__.py +0 -0
docling/backend/docx/latex/latex_dict.py +271 -0
docling/backend/docx/latex/omml.py +453 -0
docling/backend/html_backend.py +7 -7
docling/backend/md_backend.py +1 -1
docling/backend/msexcel_backend.py +2 -45
docling/backend/mspowerpoint_backend.py +19 -1
docling/backend/msword_backend.py +68 -3
docling/backend/pdf_backend.py +7 -2
docling/backend/pypdfium2_backend.py +52 -30
docling/backend/xml/uspto_backend.py +1 -1
docling/cli/main.py +135 -53
docling/cli/models.py +1 -1
docling/datamodel/base_models.py +8 -10
docling/datamodel/pipeline_options.py +54 -32
docling/document_converter.py +5 -5
docling/models/base_model.py +9 -1
docling/models/base_ocr_model.py +27 -16
docling/models/easyocr_model.py +28 -13
docling/models/factories/__init__.py +27 -0
docling/models/factories/base_factory.py +122 -0
docling/models/factories/ocr_factory.py +11 -0
docling/models/factories/picture_description_factory.py +11 -0
docling/models/hf_mlx_model.py +137 -0
docling/models/ocr_mac_model.py +39 -11
docling/models/page_preprocessing_model.py +4 -0
docling/models/picture_description_api_model.py +20 -3
docling/models/picture_description_base_model.py +19 -3
docling/models/picture_description_vlm_model.py +14 -2
docling/models/plugins/__init__.py +0 -0
docling/models/plugins/defaults.py +28 -0
docling/models/rapid_ocr_model.py +34 -13
docling/models/table_structure_model.py +13 -4
docling/models/tesseract_ocr_cli_model.py +40 -15
docling/models/tesseract_ocr_model.py +37 -12
docling/pipeline/standard_pdf_pipeline.py +25 -78
docling/pipeline/vlm_pipeline.py +78 -398
docling/utils/export.py +8 -6
docling/utils/layout_postprocessor.py +26 -23
docling/utils/visualization.py +1 -1
{docling-2.26.0.dist-info → docling-2.28.0.dist-info}/METADATA +47 -23
docling-2.28.0.dist-info/RECORD +84 -0
{docling-2.26.0.dist-info → docling-2.28.0.dist-info}/entry_points.txt +3 -0
docling-2.26.0.dist-info/RECORD +0 -72
{docling-2.26.0.dist-info → docling-2.28.0.dist-info}/LICENSE +0 -0
{docling-2.26.0.dist-info → docling-2.28.0.dist-info}/WHEEL +0 -0

docling/datamodel/pipeline_options.py CHANGED Viewed

@@ -1,10 +1,9 @@
 import logging
 import os
 import re
-import warnings
 from enum import Enum
 from pathlib import Path
-from typing import Annotated, Any, Dict, List, Literal, Optional, Union
+from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
 from pydantic import (
     AnyUrl,
@@ -13,13 +12,8 @@ from pydantic import (
     Field,
     field_validator,
     model_validator,
-    validator,
-)
-from pydantic_settings import (
-    BaseSettings,
-    PydanticBaseSettingsSource,
-    SettingsConfigDict,
 )
+from pydantic_settings import BaseSettings, SettingsConfigDict
 from typing_extensions import deprecated
 _log = logging.getLogger(__name__)
@@ -83,6 +77,12 @@ class AcceleratorOptions(BaseSettings):
         return data
+class BaseOptions(BaseModel):
+    """Base class for options."""
+    kind: ClassVar[str]
 class TableFormerMode(str, Enum):
     """Modes for the TableFormer model."""
@@ -102,10 +102,9 @@ class TableStructureOptions(BaseModel):
     mode: TableFormerMode = TableFormerMode.ACCURATE
-class OcrOptions(BaseModel):
+class OcrOptions(BaseOptions):
     """OCR options."""
-    kind: str
     lang: List[str]
     force_full_page_ocr: bool = False  # If enabled a full page OCR is always applied
     bitmap_area_threshold: float = (
@@ -116,7 +115,7 @@ class OcrOptions(BaseModel):
 class RapidOcrOptions(OcrOptions):
     """Options for the RapidOCR engine."""
-    kind: Literal["rapidocr"] = "rapidocr"
+    kind: ClassVar[Literal["rapidocr"]] = "rapidocr"
     # English and chinese are the most commly used models and have been tested with RapidOCR.
     lang: List[str] = [
@@ -155,7 +154,7 @@ class RapidOcrOptions(OcrOptions):
 class EasyOcrOptions(OcrOptions):
     """Options for the EasyOCR engine."""
-    kind: Literal["easyocr"] = "easyocr"
+    kind: ClassVar[Literal["easyocr"]] = "easyocr"
     lang: List[str] = ["fr", "de", "es", "en"]
     use_gpu: Optional[bool] = None
@@ -175,7 +174,7 @@ class EasyOcrOptions(OcrOptions):
 class TesseractCliOcrOptions(OcrOptions):
     """Options for the TesseractCli engine."""
-    kind: Literal["tesseract"] = "tesseract"
+    kind: ClassVar[Literal["tesseract"]] = "tesseract"
     lang: List[str] = ["fra", "deu", "spa", "eng"]
     tesseract_cmd: str = "tesseract"
     path: Optional[str] = None
@@ -188,7 +187,7 @@ class TesseractCliOcrOptions(OcrOptions):
 class TesseractOcrOptions(OcrOptions):
     """Options for the Tesseract engine."""
-    kind: Literal["tesserocr"] = "tesserocr"
+    kind: ClassVar[Literal["tesserocr"]] = "tesserocr"
     lang: List[str] = ["fra", "deu", "spa", "eng"]
     path: Optional[str] = None
@@ -200,7 +199,7 @@ class TesseractOcrOptions(OcrOptions):
 class OcrMacOptions(OcrOptions):
     """Options for the Mac OCR engine."""
-    kind: Literal["ocrmac"] = "ocrmac"
+    kind: ClassVar[Literal["ocrmac"]] = "ocrmac"
     lang: List[str] = ["fr-FR", "de-DE", "es-ES", "en-US"]
     recognition: str = "accurate"
     framework: str = "vision"
@@ -210,8 +209,7 @@ class OcrMacOptions(OcrOptions):
     )
-class PictureDescriptionBaseOptions(BaseModel):
-    kind: str
+class PictureDescriptionBaseOptions(BaseOptions):
     batch_size: int = 8
     scale: float = 2
@@ -221,7 +219,7 @@ class PictureDescriptionBaseOptions(BaseModel):
 class PictureDescriptionApiOptions(PictureDescriptionBaseOptions):
-    kind: Literal["api"] = "api"
+    kind: ClassVar[Literal["api"]] = "api"
     url: AnyUrl = AnyUrl("http://localhost:8000/v1/chat/completions")
     headers: Dict[str, str] = {}
@@ -233,7 +231,7 @@ class PictureDescriptionApiOptions(PictureDescriptionBaseOptions):
 class PictureDescriptionVlmOptions(PictureDescriptionBaseOptions):
-    kind: Literal["vlm"] = "vlm"
+    kind: ClassVar[Literal["vlm"]] = "vlm"
     repo_id: str
     prompt: str = "Describe this image in a few sentences."
@@ -265,6 +263,11 @@ class ResponseFormat(str, Enum):
     MARKDOWN = "markdown"
+class InferenceFramework(str, Enum):
+    MLX = "mlx"
+    TRANSFORMERS = "transformers"
 class HuggingFaceVlmOptions(BaseVlmOptions):
     kind: Literal["hf_model_options"] = "hf_model_options"
@@ -273,6 +276,7 @@ class HuggingFaceVlmOptions(BaseVlmOptions):
     llm_int8_threshold: float = 6.0
     quantized: bool = False
+    inference_framework: InferenceFramework
     response_format: ResponseFormat
     @property
@@ -280,10 +284,19 @@ class HuggingFaceVlmOptions(BaseVlmOptions):
         return self.repo_id.replace("/", "--")
+smoldocling_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
+    repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
+    prompt="Convert this page to docling.",
+    response_format=ResponseFormat.DOCTAGS,
+    inference_framework=InferenceFramework.MLX,
+)
 smoldocling_vlm_conversion_options = HuggingFaceVlmOptions(
     repo_id="ds4sd/SmolDocling-256M-preview",
     prompt="Convert this page to docling.",
     response_format=ResponseFormat.DOCTAGS,
+    inference_framework=InferenceFramework.TRANSFORMERS,
 )
 granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
@@ -291,9 +304,15 @@ granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
     # prompt="OCR the full page to markdown.",
     prompt="OCR this image.",
     response_format=ResponseFormat.MARKDOWN,
+    inference_framework=InferenceFramework.TRANSFORMERS,
 )
+class VlmModelType(str, Enum):
+    SMOLDOCLING = "smoldocling"
+    GRANITE_VISION = "granite_vision"
 # Define an enum for the backend options
 class PdfBackend(str, Enum):
     """Enum of valid PDF backends."""
@@ -301,9 +320,11 @@ class PdfBackend(str, Enum):
     PYPDFIUM2 = "pypdfium2"
     DLPARSE_V1 = "dlparse_v1"
     DLPARSE_V2 = "dlparse_v2"
+    DLPARSE_V4 = "dlparse_v4"
 # Define an enum for the ocr engines
+@deprecated("Use ocr_factory.registered_enum")
 class OcrEngine(str, Enum):
     """Enum of valid OCR engines."""
@@ -323,16 +344,18 @@ class PipelineOptions(BaseModel):
     document_timeout: Optional[float] = None
     accelerator_options: AcceleratorOptions = AcceleratorOptions()
     enable_remote_services: bool = False
+    allow_external_plugins: bool = False
 class PaginatedPipelineOptions(PipelineOptions):
+    artifacts_path: Optional[Union[Path, str]] = None
     images_scale: float = 1.0
     generate_page_images: bool = False
     generate_picture_images: bool = False
 class VlmPipelineOptions(PaginatedPipelineOptions):
-    artifacts_path: Optional[Union[Path, str]] = None
     generate_page_images: bool = True
     force_backend_text: bool = (
@@ -345,7 +368,6 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
 class PdfPipelineOptions(PaginatedPipelineOptions):
     """Options for the PDF pipeline."""
-    artifacts_path: Optional[Union[Path, str]] = None
     do_table_structure: bool = True  # True: perform table structure extraction
     do_ocr: bool = True  # True: perform OCR, replace programmatic PDF text
     do_code_enrichment: bool = False  # True: perform code OCR
@@ -358,17 +380,10 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
     # If True, text from backend will be used instead of generated text
     table_structure_options: TableStructureOptions = TableStructureOptions()
-    ocr_options: Union[
-        EasyOcrOptions,
-        TesseractCliOcrOptions,
-        TesseractOcrOptions,
-        OcrMacOptions,
-        RapidOcrOptions,
-    ] = Field(EasyOcrOptions(), discriminator="kind")
-    picture_description_options: Annotated[
-        Union[PictureDescriptionApiOptions, PictureDescriptionVlmOptions],
-        Field(discriminator="kind"),
-    ] = smolvlm_picture_description
+    ocr_options: OcrOptions = EasyOcrOptions()
+    picture_description_options: PictureDescriptionBaseOptions = (
+        smolvlm_picture_description
+    )
     images_scale: float = 1.0
     generate_page_images: bool = False
@@ -381,3 +396,10 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
             "before conversion and then use the `TableItem.get_image` function."
         ),
     )
+    generate_parsed_pages: bool = False
+class PdfPipeline(str, Enum):
+    STANDARD = "standard"
+    VLM = "vlm"

docling/document_converter.py CHANGED Viewed

@@ -11,7 +11,7 @@ from pydantic import BaseModel, ConfigDict, model_validator, validate_call
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.asciidoc_backend import AsciiDocBackend
 from docling.backend.csv_backend import CsvDocumentBackend
-from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
+from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.backend.html_backend import HTMLDocumentBackend
 from docling.backend.json.docling_json_backend import DoclingJSONBackend
 from docling.backend.md_backend import MarkdownDocumentBackend
@@ -109,12 +109,12 @@ class XMLJatsFormatOption(FormatOption):
 class ImageFormatOption(FormatOption):
     pipeline_cls: Type = StandardPdfPipeline
-    backend: Type[AbstractDocumentBackend] = DoclingParseV2DocumentBackend
+    backend: Type[AbstractDocumentBackend] = DoclingParseV4DocumentBackend
 class PdfFormatOption(FormatOption):
     pipeline_cls: Type = StandardPdfPipeline
-    backend: Type[AbstractDocumentBackend] = DoclingParseV2DocumentBackend
+    backend: Type[AbstractDocumentBackend] = DoclingParseV4DocumentBackend
 def _get_default_option(format: InputFormat) -> FormatOption:
@@ -147,10 +147,10 @@ def _get_default_option(format: InputFormat) -> FormatOption:
             pipeline_cls=SimplePipeline, backend=JatsDocumentBackend
         ),
         InputFormat.IMAGE: FormatOption(
-            pipeline_cls=StandardPdfPipeline, backend=DoclingParseV2DocumentBackend
+            pipeline_cls=StandardPdfPipeline, backend=DoclingParseV4DocumentBackend
         ),
         InputFormat.PDF: FormatOption(
-            pipeline_cls=StandardPdfPipeline, backend=DoclingParseV2DocumentBackend
+            pipeline_cls=StandardPdfPipeline, backend=DoclingParseV4DocumentBackend
         ),
         InputFormat.JSON_DOCLING: FormatOption(
             pipeline_cls=SimplePipeline, backend=DoclingJSONBackend

docling/models/base_model.py CHANGED Viewed

@@ -1,14 +1,22 @@
 from abc import ABC, abstractmethod
-from typing import Any, Generic, Iterable, Optional
+from typing import Any, Generic, Iterable, Optional, Protocol, Type
 from docling_core.types.doc import BoundingBox, DocItem, DoclingDocument, NodeItem
 from typing_extensions import TypeVar
 from docling.datamodel.base_models import ItemAndImageEnrichmentElement, Page
 from docling.datamodel.document import ConversionResult
+from docling.datamodel.pipeline_options import BaseOptions
 from docling.datamodel.settings import settings
+class BaseModelWithOptions(Protocol):
+    @classmethod
+    def get_options_type(cls) -> Type[BaseOptions]: ...
+    def __init__(self, *, options: BaseOptions, **kwargs): ...
 class BasePageModel(ABC):
     @abstractmethod
     def __call__(

docling/models/base_ocr_model.py CHANGED Viewed

@@ -2,25 +2,33 @@ import copy
 import logging
 from abc import abstractmethod
 from pathlib import Path
-from typing import Iterable, List
+from typing import Iterable, List, Optional, Type
 import numpy as np
 from docling_core.types.doc import BoundingBox, CoordOrigin
+from docling_core.types.doc.page import BoundingRectangle, PdfTextCell, TextCell
 from PIL import Image, ImageDraw
 from rtree import index
 from scipy.ndimage import binary_dilation, find_objects, label
-from docling.datamodel.base_models import Cell, OcrCell, Page
+from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import OcrOptions
+from docling.datamodel.pipeline_options import AcceleratorOptions, OcrOptions
 from docling.datamodel.settings import settings
-from docling.models.base_model import BasePageModel
+from docling.models.base_model import BaseModelWithOptions, BasePageModel
 _log = logging.getLogger(__name__)
-class BaseOcrModel(BasePageModel):
-    def __init__(self, enabled: bool, options: OcrOptions):
+class BaseOcrModel(BasePageModel, BaseModelWithOptions):
+    def __init__(
+        self,
+        *,
+        enabled: bool,
+        artifacts_path: Optional[Path],
+        options: OcrOptions,
+        accelerator_options: AcceleratorOptions,
+    ):
         self.enabled = enabled
         self.options = options
@@ -104,11 +112,13 @@ class BaseOcrModel(BasePageModel):
         p.dimension = 2
         idx = index.Index(properties=p)
         for i, cell in enumerate(programmatic_cells):
-            idx.insert(i, cell.bbox.as_tuple())
+            idx.insert(i, cell.rect.to_bounding_box().as_tuple())
         def is_overlapping_with_existing_cells(ocr_cell):
             # Query the R-tree to get overlapping rectangles
-            possible_matches_index = list(idx.intersection(ocr_cell.bbox.as_tuple()))
+            possible_matches_index = list(
+                idx.intersection(ocr_cell.rect.to_bounding_box().as_tuple())
+            )
             return (
                 len(possible_matches_index) > 0
@@ -125,10 +135,7 @@ class BaseOcrModel(BasePageModel):
         """
         if self.options.force_full_page_ocr:
             # If a full page OCR is forced, use only the OCR cells
-            cells = [
-                Cell(id=c_ocr.id, text=c_ocr.text, bbox=c_ocr.bbox)
-                for c_ocr in ocr_cells
-            ]
+            cells = ocr_cells
             return cells
         ## Remove OCR cells which overlap with programmatic cells.
@@ -156,7 +163,7 @@ class BaseOcrModel(BasePageModel):
         # Draw OCR and programmatic cells
         for tc in page.cells:
-            x0, y0, x1, y1 = tc.bbox.as_tuple()
+            x0, y0, x1, y1 = tc.rect.to_bounding_box().as_tuple()
             y0 *= scale_x
             y1 *= scale_y
             x0 *= scale_x
@@ -165,9 +172,8 @@ class BaseOcrModel(BasePageModel):
             if y1 <= y0:
                 y1, y0 = y0, y1
-            color = "gray"
-            if isinstance(tc, OcrCell):
-                color = "magenta"
+            color = "magenta" if tc.from_ocr else "gray"
             draw.rectangle([(x0, y0), (x1, y1)], outline=color)
         if show:
@@ -187,3 +193,8 @@ class BaseOcrModel(BasePageModel):
         self, conv_res: ConversionResult, page_batch: Iterable[Page]
     ) -> Iterable[Page]:
         pass
+    @classmethod
+    @abstractmethod
+    def get_options_type(cls) -> Type[OcrOptions]:
+        pass

docling/models/easyocr_model.py CHANGED Viewed

@@ -2,17 +2,19 @@ import logging
 import warnings
 import zipfile
 from pathlib import Path
-from typing import Iterable, List, Optional
+from typing import Iterable, List, Optional, Type
 import numpy
 from docling_core.types.doc import BoundingBox, CoordOrigin
+from docling_core.types.doc.page import BoundingRectangle, TextCell
-from docling.datamodel.base_models import Cell, OcrCell, Page
+from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
     AcceleratorDevice,
     AcceleratorOptions,
     EasyOcrOptions,
+    OcrOptions,
 )
 from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
@@ -33,7 +35,12 @@ class EasyOcrModel(BaseOcrModel):
         options: EasyOcrOptions,
         accelerator_options: AcceleratorOptions,
     ):
-        super().__init__(enabled=enabled, options=options)
+        super().__init__(
+            enabled=enabled,
+            artifacts_path=artifacts_path,
+            options=options,
+            accelerator_options=accelerator_options,
+        )
         self.options: EasyOcrOptions
         self.scale = 3  # multiplier for 72 dpi == 216 dpi.
@@ -148,18 +155,22 @@ class EasyOcrModel(BaseOcrModel):
                         del im
                         cells = [
-                            OcrCell(
-                                id=ix,
+                            TextCell(
+                                index=ix,
                                 text=line[1],
+                                orig=line[1],
+                                from_ocr=True,
                                 confidence=line[2],
-                                bbox=BoundingBox.from_tuple(
-                                    coord=(
-                                        (line[0][0][0] / self.scale) + ocr_rect.l,
-                                        (line[0][0][1] / self.scale) + ocr_rect.t,
-                                        (line[0][2][0] / self.scale) + ocr_rect.l,
-                                        (line[0][2][1] / self.scale) + ocr_rect.t,
-                                    ),
-                                    origin=CoordOrigin.TOPLEFT,
+                                rect=BoundingRectangle.from_bounding_box(
+                                    BoundingBox.from_tuple(
+                                        coord=(
+                                            (line[0][0][0] / self.scale) + ocr_rect.l,
+                                            (line[0][0][1] / self.scale) + ocr_rect.t,
+                                            (line[0][2][0] / self.scale) + ocr_rect.l,
+                                            (line[0][2][1] / self.scale) + ocr_rect.t,
+                                        ),
+                                        origin=CoordOrigin.TOPLEFT,
+                                    )
                                 ),
                             )
                             for ix, line in enumerate(result)
@@ -175,3 +186,7 @@ class EasyOcrModel(BaseOcrModel):
                     self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
                 yield page
+    @classmethod
+    def get_options_type(cls) -> Type[OcrOptions]:
+        return EasyOcrOptions

docling/models/factories/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+import logging
+from functools import lru_cache
+from docling.models.factories.ocr_factory import OcrFactory
+from docling.models.factories.picture_description_factory import (
+    PictureDescriptionFactory,
+)
+logger = logging.getLogger(__name__)
+@lru_cache()
+def get_ocr_factory(allow_external_plugins: bool = False) -> OcrFactory:
+    factory = OcrFactory()
+    factory.load_from_plugins(allow_external_plugins=allow_external_plugins)
+    logger.info("Registered ocr engines: %r", factory.registered_kind)
+    return factory
+@lru_cache()
+def get_picture_description_factory(
+    allow_external_plugins: bool = False,
+) -> PictureDescriptionFactory:
+    factory = PictureDescriptionFactory()
+    factory.load_from_plugins(allow_external_plugins=allow_external_plugins)
+    logger.info("Registered picture descriptions: %r", factory.registered_kind)
+    return factory

docling/models/factories/base_factory.py ADDED Viewed

@@ -0,0 +1,122 @@
+import enum
+import logging
+from abc import ABCMeta
+from typing import Generic, Optional, Type, TypeVar
+from pluggy import PluginManager
+from pydantic import BaseModel
+from docling.datamodel.pipeline_options import BaseOptions
+from docling.models.base_model import BaseModelWithOptions
+A = TypeVar("A", bound=BaseModelWithOptions)
+logger = logging.getLogger(__name__)
+class FactoryMeta(BaseModel):
+    kind: str
+    plugin_name: str
+    module: str
+class BaseFactory(Generic[A], metaclass=ABCMeta):
+    default_plugin_name = "docling"
+    def __init__(self, plugin_attr_name: str, plugin_name=default_plugin_name):
+        self.plugin_name = plugin_name
+        self.plugin_attr_name = plugin_attr_name
+        self._classes: dict[Type[BaseOptions], Type[A]] = {}
+        self._meta: dict[Type[BaseOptions], FactoryMeta] = {}
+    @property
+    def registered_kind(self) -> list[str]:
+        return list(opt.kind for opt in self._classes.keys())
+    def get_enum(self) -> enum.Enum:
+        return enum.Enum(
+            self.plugin_attr_name + "_enum",
+            names={kind: kind for kind in self.registered_kind},
+            type=str,
+            module=__name__,
+        )
+    @property
+    def classes(self):
+        return self._classes
+    @property
+    def registered_meta(self):
+        return self._meta
+    def create_instance(self, options: BaseOptions, **kwargs) -> A:
+        try:
+            _cls = self._classes[type(options)]
+            return _cls(options=options, **kwargs)
+        except KeyError:
+            raise RuntimeError(self._err_msg_on_class_not_found(options.kind))
+    def create_options(self, kind: str, *args, **kwargs) -> BaseOptions:
+        for opt_cls, _ in self._classes.items():
+            if opt_cls.kind == kind:
+                return opt_cls(*args, **kwargs)
+        raise RuntimeError(self._err_msg_on_class_not_found(kind))
+    def _err_msg_on_class_not_found(self, kind: str):
+        msg = []
+        for opt, cls in self._classes.items():
+            msg.append(f"\t{opt.kind!r} => {cls!r}")
+        msg_str = "\n".join(msg)
+        return f"No class found with the name {kind!r}, known classes are:\n{msg_str}"
+    def register(self, cls: Type[A], plugin_name: str, plugin_module_name: str):
+        opt_type = cls.get_options_type()
+        if opt_type in self._classes:
+            raise ValueError(
+                f"{opt_type.kind!r} already registered to class {self._classes[opt_type]!r}"
+            )
+        self._classes[opt_type] = cls
+        self._meta[opt_type] = FactoryMeta(
+            kind=opt_type.kind, plugin_name=plugin_name, module=plugin_module_name
+        )
+    def load_from_plugins(
+        self, plugin_name: Optional[str] = None, allow_external_plugins: bool = False
+    ):
+        plugin_name = plugin_name or self.plugin_name
+        plugin_manager = PluginManager(plugin_name)
+        plugin_manager.load_setuptools_entrypoints(plugin_name)
+        for plugin_name, plugin_module in plugin_manager.list_name_plugin():
+            plugin_module_name = str(plugin_module.__name__)  # type: ignore
+            if not allow_external_plugins and not plugin_module_name.startswith(
+                "docling."
+            ):
+                logger.warning(
+                    f"The plugin {plugin_name} will not be loaded because Docling is being executed with allow_external_plugins=false."
+                )
+                continue
+            attr = getattr(plugin_module, self.plugin_attr_name, None)
+            if callable(attr):
+                logger.info("Loading plugin %r", plugin_name)
+                config = attr()
+                self.process_plugin(config, plugin_name, plugin_module_name)
+    def process_plugin(self, config, plugin_name: str, plugin_module_name: str):
+        for item in config[self.plugin_attr_name]:
+            try:
+                self.register(item, plugin_name, plugin_module_name)
+            except ValueError:
+                logger.warning("%r already registered", item)

docling/models/factories/ocr_factory.py ADDED Viewed

@@ -0,0 +1,11 @@
+import logging
+from docling.models.base_ocr_model import BaseOcrModel
+from docling.models.factories.base_factory import BaseFactory
+logger = logging.getLogger(__name__)
+class OcrFactory(BaseFactory[BaseOcrModel]):
+    def __init__(self, *args, **kwargs):
+        super().__init__("ocr_engines", *args, **kwargs)

docling/models/factories/picture_description_factory.py ADDED Viewed

@@ -0,0 +1,11 @@
+import logging
+from docling.models.factories.base_factory import BaseFactory
+from docling.models.picture_description_base_model import PictureDescriptionBaseModel
+logger = logging.getLogger(__name__)
+class PictureDescriptionFactory(BaseFactory[PictureDescriptionBaseModel]):
+    def __init__(self, *args, **kwargs):
+        super().__init__("picture_description", *args, **kwargs)

docling 2.26.0__py3-none-any.whl → 2.28.0__py3-none-any.whl

docling 2.26.0py3-none-any.whl → 2.28.0py3-none-any.whl