PyPI - docling - Versions diffs - 2.30.0__py3-none-any.whl → 2.31.0__py3-none-any.whl - Mend

docling 2.30.0py3-none-any.whl → 2.31.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

docling/backend/asciidoc_backend.py +7 -15
docling/backend/csv_backend.py +1 -1
docling/backend/docling_parse_backend.py +2 -2
docling/backend/docling_parse_v2_backend.py +2 -2
docling/backend/docling_parse_v4_backend.py +3 -4
docling/backend/docx/latex/latex_dict.py +0 -5
docling/backend/docx/latex/omml.py +4 -7
docling/backend/html_backend.py +26 -9
docling/backend/md_backend.py +5 -7
docling/backend/msexcel_backend.py +1 -7
docling/backend/mspowerpoint_backend.py +4 -7
docling/backend/msword_backend.py +4 -4
docling/backend/pdf_backend.py +2 -1
docling/backend/pypdfium2_backend.py +3 -3
docling/backend/xml/jats_backend.py +10 -13
docling/backend/xml/uspto_backend.py +15 -19
docling/cli/main.py +7 -7
docling/cli/models.py +2 -3
docling/datamodel/base_models.py +7 -5
docling/datamodel/document.py +11 -10
docling/datamodel/pipeline_options.py +0 -1
docling/document_converter.py +5 -5
docling/models/api_vlm_model.py +1 -2
docling/models/base_model.py +2 -4
docling/models/base_ocr_model.py +2 -2
docling/models/code_formula_model.py +2 -1
docling/models/document_picture_classifier.py +2 -1
docling/models/easyocr_model.py +10 -11
docling/models/factories/__init__.py +2 -2
docling/models/factories/base_factory.py +1 -1
docling/models/hf_mlx_model.py +4 -6
docling/models/hf_vlm_model.py +7 -5
docling/models/layout_model.py +2 -2
docling/models/ocr_mac_model.py +3 -4
docling/models/page_assemble_model.py +7 -12
docling/models/page_preprocessing_model.py +2 -1
docling/models/picture_description_api_model.py +2 -1
docling/models/picture_description_base_model.py +2 -3
docling/models/picture_description_vlm_model.py +2 -3
docling/models/rapid_ocr_model.py +2 -3
docling/models/readingorder_model.py +8 -23
docling/models/table_structure_model.py +2 -6
docling/models/tesseract_ocr_cli_model.py +17 -16
docling/models/tesseract_ocr_model.py +8 -6
docling/pipeline/base_pipeline.py +4 -8
docling/pipeline/simple_pipeline.py +0 -1
docling/pipeline/standard_pdf_pipeline.py +0 -1
docling/pipeline/vlm_pipeline.py +0 -3
docling/utils/export.py +2 -4
docling/utils/glm_utils.py +2 -2
docling/utils/layout_postprocessor.py +4 -2
docling/utils/model_downloader.py +7 -7
docling/utils/utils.py +1 -1
{docling-2.30.0.dist-info → docling-2.31.0.dist-info}/METADATA +2 -1
docling-2.31.0.dist-info/RECORD +86 -0
docling-2.30.0.dist-info/RECORD +0 -86
{docling-2.30.0.dist-info → docling-2.31.0.dist-info}/LICENSE +0 -0
{docling-2.30.0.dist-info → docling-2.31.0.dist-info}/WHEEL +0 -0
{docling-2.30.0.dist-info → docling-2.31.0.dist-info}/entry_points.txt +0 -0

docling/backend/xml/jats_backend.py CHANGED Viewed

@@ -102,13 +102,13 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
             doc_info: etree.DocInfo = self.tree.docinfo
             if doc_info.system_url and any(
-                [kwd in doc_info.system_url for kwd in JATS_DTD_URL]
+                kwd in doc_info.system_url for kwd in JATS_DTD_URL
             ):
                 self.valid = True
                 return
             for ent in doc_info.internalDTD.iterentities():
                 if ent.system_url and any(
-                    [kwd in ent.system_url for kwd in JATS_DTD_URL]
+                    kwd in ent.system_url for kwd in JATS_DTD_URL
                 ):
                     self.valid = True
                     return
@@ -232,10 +232,9 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
                 # TODO: once superscript is supported, add label with formatting
                 aff = aff.removeprefix(f"{label[0].text}, ")
             affiliation_names.append(aff)
-        affiliation_ids_names = {
-            id: name
-            for id, name in zip(meta.xpath(".//aff[@id]/@id"), affiliation_names)
-        }
+        affiliation_ids_names = dict(
+            zip(meta.xpath(".//aff[@id]/@id"), affiliation_names)
+        )
         # Get author names and affiliation names
         for author_node in meta.xpath(
@@ -300,7 +299,6 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
     def _add_abstract(
         self, doc: DoclingDocument, xml_components: XMLComponents
     ) -> None:
         for abstract in xml_components["abstract"]:
             text: str = abstract["content"]
             title: str = abstract["label"] or DEFAULT_HEADER_ABSTRACT
@@ -349,7 +347,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
         return
-    def _parse_element_citation(self, node: etree._Element) -> str:
+    def _parse_element_citation(self, node: etree._Element) -> str:  # noqa: C901
         citation: Citation = {
             "author_names": "",
             "title": "",
@@ -440,7 +438,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
             citation["page"] = node.xpath("fpage")[0].text.replace("\n", " ").strip()
             if len(node.xpath("lpage")) > 0:
                 citation["page"] += (
-                    "–" + node.xpath("lpage")[0].text.replace("\n", " ").strip()
+                    "–" + node.xpath("lpage")[0].text.replace("\n", " ").strip()  # noqa: RUF001
                 )
         # Flatten the citation to string
@@ -595,9 +593,8 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
         try:
             self._add_table(doc, parent, table)
-        except Exception as e:
-            _log.warning(f"Skipping unsupported table in {str(self.file)}")
-            pass
+        except Exception:
+            _log.warning(f"Skipping unsupported table in {self.file!s}")
         return
@@ -609,7 +606,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
         )
         return
-    def _walk_linear(
+    def _walk_linear(  # noqa: C901
         self, doc: DoclingDocument, parent: NodeItem, node: etree._Element
     ) -> str:
         skip_tags = ["term"]

docling/backend/xml/uspto_backend.py CHANGED Viewed

@@ -122,7 +122,6 @@ class PatentUsptoDocumentBackend(DeclarativeDocumentBackend):
     @override
     def convert(self) -> DoclingDocument:
         if self.parser is not None:
             doc = self.parser.parse(self.patent_content)
             if doc is None:
@@ -163,7 +162,6 @@ class PatentUspto(ABC):
         Returns:
             The patent parsed as a docling document.
         """
-        pass
 class PatentUsptoIce(PatentUspto):
@@ -265,7 +263,7 @@ class PatentUsptoIce(PatentUspto):
             self.style_html = HtmlEntity()
         @override
-        def startElement(self, tag, attributes):  # noqa: N802
+        def startElement(self, tag, attributes):
             """Signal the start of an element.
             Args:
@@ -281,7 +279,7 @@ class PatentUsptoIce(PatentUspto):
             self._start_registered_elements(tag, attributes)
         @override
-        def skippedEntity(self, name):  # noqa: N802
+        def skippedEntity(self, name):
             """Receive notification of a skipped entity.
             HTML entities will be skipped by the parser. This method will unescape them
@@ -315,7 +313,7 @@ class PatentUsptoIce(PatentUspto):
                         self.text += unescaped
         @override
-        def endElement(self, tag):  # noqa: N802
+        def endElement(self, tag):
             """Signal the end of an element.
             Args:
@@ -603,7 +601,7 @@ class PatentUsptoGrantV2(PatentUspto):
             self.style_html = HtmlEntity()
         @override
-        def startElement(self, tag, attributes):  # noqa: N802
+        def startElement(self, tag, attributes):
             """Signal the start of an element.
             Args:
@@ -616,7 +614,7 @@ class PatentUsptoGrantV2(PatentUspto):
             self._start_registered_elements(tag, attributes)
         @override
-        def skippedEntity(self, name):  # noqa: N802
+        def skippedEntity(self, name):
             """Receive notification of a skipped entity.
             HTML entities will be skipped by the parser. This method will unescape them
@@ -650,7 +648,7 @@ class PatentUsptoGrantV2(PatentUspto):
                         self.text += unescaped
         @override
-        def endElement(self, tag):  # noqa: N802
+        def endElement(self, tag):
             """Signal the end of an element.
             Args:
@@ -691,7 +689,7 @@ class PatentUsptoGrantV2(PatentUspto):
             if tag in [member.value for member in self.Element]:
                 if (
                     tag == self.Element.HEADING.value
-                    and not self.Element.SDOCL.value in self.property
+                    and self.Element.SDOCL.value not in self.property
                 ):
                     level_attr: str = attributes.get("LVL", "")
                     new_level: int = int(level_attr) if level_attr.isnumeric() else 1
@@ -743,7 +741,7 @@ class PatentUsptoGrantV2(PatentUspto):
                 # headers except claims statement
                 elif (
                     self.Element.HEADING.value in self.property
-                    and not self.Element.SDOCL.value in self.property
+                    and self.Element.SDOCL.value not in self.property
                     and text.strip()
                 ):
                     self.parents[self.level + 1] = self.doc.add_heading(
@@ -1164,7 +1162,7 @@ class PatentUsptoAppV1(PatentUspto):
             self.style_html = HtmlEntity()
         @override
-        def startElement(self, tag, attributes):  # noqa: N802
+        def startElement(self, tag, attributes):
             """Signal the start of an element.
             Args:
@@ -1177,7 +1175,7 @@ class PatentUsptoAppV1(PatentUspto):
             self._start_registered_elements(tag, attributes)
         @override
-        def skippedEntity(self, name):  # noqa: N802
+        def skippedEntity(self, name):
             """Receive notification of a skipped entity.
             HTML entities will be skipped by the parser. This method will unescape them
@@ -1211,7 +1209,7 @@ class PatentUsptoAppV1(PatentUspto):
                         self.text += unescaped
         @override
-        def endElement(self, tag):  # noqa: N802
+        def endElement(self, tag):
             """Signal the end of an element.
             Args:
@@ -1474,9 +1472,7 @@ class XmlTable:
                 if cw == 0:
                     offset_w0.append(col["offset"][ic])
-            min_colinfo["offset"] = sorted(
-                list(set(col["offset"] + min_colinfo["offset"]))
-            )
+            min_colinfo["offset"] = sorted(set(col["offset"] + min_colinfo["offset"]))
         # add back the 0 width cols to offset list
         offset_w0 = list(set(offset_w0))
@@ -1527,7 +1523,7 @@ class XmlTable:
         return ncols_max
-    def _parse_table(self, table: Tag) -> TableData:
+    def _parse_table(self, table: Tag) -> TableData:  # noqa: C901
         """Parse the content of a table tag.
         Args:
@@ -1722,7 +1718,7 @@ class HtmlEntity:
                 "0": "&#8304;",
                 "+": "&#8314;",
                 "-": "&#8315;",
-                "−": "&#8315;",
+                "−": "&#8315;",  # noqa: RUF001
                 "=": "&#8316;",
                 "(": "&#8317;",
                 ")": "&#8318;",
@@ -1746,7 +1742,7 @@ class HtmlEntity:
                 "0": "&#8320;",
                 "+": "&#8330;",
                 "-": "&#8331;",
-                "−": "&#8331;",
+                "−": "&#8331;",  # noqa: RUF001
                 "=": "&#8332;",
                 "(": "&#8333;",
                 ")": "&#8334;",

docling/cli/main.py CHANGED Viewed

@@ -6,14 +6,16 @@ import sys
 import tempfile
 import time
 import warnings
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Annotated, Dict, Iterable, List, Optional, Type
+from typing import Annotated, Dict, List, Optional, Type
 import rich.table
 import typer
 from docling_core.types.doc import ImageRefMode
 from docling_core.utils.file import resolve_source_to_path
 from pydantic import TypeAdapter
+from rich.console import Console
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
@@ -53,7 +55,6 @@ warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|
 warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
 _log = logging.getLogger(__name__)
-from rich.console import Console
 console = Console()
 err_console = Console(stderr=True)
@@ -160,7 +161,6 @@ def export_documents(
     export_doctags: bool,
     image_export_mode: ImageRefMode,
 ):
     success_count = 0
     failure_count = 0
@@ -233,7 +233,7 @@ def _split_list(raw: Optional[str]) -> Optional[List[str]]:
 @app.command(no_args_is_help=True)
-def convert(
+def convert(  # noqa: C901
     input_sources: Annotated[
         List[str],
         typer.Argument(
@@ -289,7 +289,7 @@ def convert(
             ...,
             help=(
                 f"The OCR engine to use. When --allow-external-plugins is *not* set, the available values are: "
-                f"{', '.join((o.value for o in ocr_engines_enum_internal))}. "
+                f"{', '.join(o.value for o in ocr_engines_enum_internal)}. "
                 f"Use the option --show-external-plugins to see the options allowed with external plugins."
             ),
         ),
@@ -421,7 +421,7 @@ def convert(
         logging.basicConfig(level=logging.WARNING)
     elif verbose == 1:
         logging.basicConfig(level=logging.INFO)
-    elif verbose == 2:
+    else:
         logging.basicConfig(level=logging.DEBUG)
     settings.debug.visualize_cells = debug_visualize_cells
@@ -430,7 +430,7 @@ def convert(
     settings.debug.visualize_ocr = debug_visualize_ocr
     if from_formats is None:
-        from_formats = [e for e in InputFormat]
+        from_formats = list(InputFormat)
     parsed_headers: Optional[Dict[str, str]] = None
     if headers is not None:

docling/cli/models.py CHANGED Viewed

@@ -62,7 +62,7 @@ def download(
     models: Annotated[
         Optional[list[_AvailableModels]],
         typer.Argument(
-            help=f"Models to download (default behavior: a predefined set of models will be downloaded).",
+            help="Models to download (default behavior: a predefined set of models will be downloaded).",
         ),
     ] = None,
     all: Annotated[
@@ -89,14 +89,13 @@ def download(
             "Cannot simultaneously set 'all' parameter and specify models to download."
         )
     if not quiet:
-        FORMAT = "%(message)s"
         logging.basicConfig(
             level=logging.INFO,
             format="[blue]%(message)s[/blue]",
             datefmt="[%X]",
             handlers=[RichHandler(show_level=False, show_time=False, markup=True)],
         )
-    to_download = models or ([m for m in _AvailableModels] if all else _default_models)
+    to_download = models or (list(_AvailableModels) if all else _default_models)
     output_dir = download_models(
         output_dir=output_dir,
         force=force,

docling/datamodel/base_models.py CHANGED Viewed

@@ -10,7 +10,9 @@ from docling_core.types.doc import (
     TableCell,
 )
 from docling_core.types.doc.page import SegmentedPdfPage, TextCell
-from docling_core.types.io import (  # DO ΝΟΤ REMOVE; explicitly exposed from this location
+# DO NOT REMOVE; explicitly exposed from this location
+from docling_core.types.io import (
     DocumentStream,
 )
 from PIL.Image import Image
@@ -233,9 +235,9 @@ class Page(BaseModel):
         None  # Internal PDF backend. By default it is cleared during assembling.
     )
     _default_image_scale: float = 1.0  # Default image scale for external usage.
-    _image_cache: Dict[float, Image] = (
-        {}
-    )  # Cache of images in different scales. By default it is cleared during assembling.
+    _image_cache: Dict[
+        float, Image
+    ] = {}  # Cache of images in different scales. By default it is cleared during assembling.
     def get_image(
         self, scale: float = 1.0, cropbox: Optional[BoundingBox] = None
@@ -243,7 +245,7 @@ class Page(BaseModel):
         if self._backend is None:
             return self._image_cache.get(scale, None)
-        if not scale in self._image_cache:
+        if scale not in self._image_cache:
             if cropbox is None:
                 self._image_cache[scale] = self._backend.get_page_image(scale=scale)
             else:

docling/datamodel/document.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import csv
 import logging
 import re
+from collections.abc import Iterable
 from enum import Enum
 from io import BytesIO
 from pathlib import Path, PurePath
 from typing import (
     TYPE_CHECKING,
     Dict,
-    Iterable,
     List,
     Literal,
     Optional,
@@ -17,6 +17,8 @@ from typing import (
 )
 import filetype
+# DO NOT REMOVE; explicitly exposed from this location
 from docling_core.types.doc import (
     DocItem,
     DocItemLabel,
@@ -35,14 +37,14 @@ from docling_core.types.legacy_doc.base import (
     PageReference,
     Prov,
     Ref,
+    Table as DsSchemaTable,
+    TableCell,
 )
-from docling_core.types.legacy_doc.base import Table as DsSchemaTable
-from docling_core.types.legacy_doc.base import TableCell
 from docling_core.types.legacy_doc.document import (
     CCSDocumentDescription as DsDocumentDescription,
+    CCSFileInfoObject as DsFileInfoObject,
+    ExportedCCSDocument as DsDocument,
 )
-from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileInfoObject
-from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument
 from docling_core.utils.file import resolve_source_to_stream
 from docling_core.utils.legacy import docling_document_to_legacy
 from pydantic import BaseModel
@@ -65,7 +67,7 @@ from docling.datamodel.base_models import (
 )
 from docling.datamodel.settings import DocumentLimits
 from docling.utils.profiling import ProfilingItem
-from docling.utils.utils import create_file_hash, create_hash
+from docling.utils.utils import create_file_hash
 if TYPE_CHECKING:
     from docling.document_converter import FormatOption
@@ -134,9 +136,9 @@ class InputDocument(BaseModel):
                     self._init_doc(backend, path_or_stream)
             elif isinstance(path_or_stream, BytesIO):
-                assert (
-                    filename is not None
-                ), "Can't construct InputDocument from stream without providing filename arg."
+                assert filename is not None, (
+                    "Can't construct InputDocument from stream without providing filename arg."
+                )
                 self.file = PurePath(filename)
                 self.filesize = path_or_stream.getbuffer().nbytes
@@ -228,7 +230,6 @@ class _DummyBackend(AbstractDocumentBackend):
 class _DocumentConversionInput(BaseModel):
     path_or_stream_iterator: Iterable[Union[Path, str, DocumentStream]]
     headers: Optional[Dict[str, str]] = None
     limits: Optional[DocumentLimits] = DocumentLimits()

docling/datamodel/pipeline_options.py CHANGED Viewed

@@ -380,7 +380,6 @@ class PaginatedPipelineOptions(PipelineOptions):
 class VlmPipelineOptions(PaginatedPipelineOptions):
     generate_page_images: bool = True
     force_backend_text: bool = (
         False  # (To be used with vlms, or other generative models)

docling/document_converter.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import hashlib
 import logging
-import math
 import sys
 import time
+from collections.abc import Iterable, Iterator
 from functools import partial
 from pathlib import Path
-from typing import Dict, Iterable, Iterator, List, Optional, Tuple, Type, Union
+from typing import Dict, List, Optional, Tuple, Type, Union
 from pydantic import BaseModel, ConfigDict, model_validator, validate_call
@@ -172,7 +172,7 @@ class DocumentConverter:
         format_options: Optional[Dict[InputFormat, FormatOption]] = None,
     ):
         self.allowed_formats = (
-            allowed_formats if allowed_formats is not None else [e for e in InputFormat]
+            allowed_formats if allowed_formats is not None else list(InputFormat)
         )
         self.format_to_options = {
             format: (
@@ -254,7 +254,7 @@ class DocumentConverter:
         if not had_result and raises_on_error:
             raise ConversionError(
-                f"Conversion failed because the provided file has no recognizable format or it wasn't in the list of allowed formats."
+                "Conversion failed because the provided file has no recognizable format or it wasn't in the list of allowed formats."
             )
     def _convert(
@@ -266,7 +266,7 @@ class DocumentConverter:
             conv_input.docs(self.format_to_options),
             settings.perf.doc_batch_size,  # pass format_options
         ):
-            _log.info(f"Going to convert document batch...")
+            _log.info("Going to convert document batch...")
             # parallel processing only within input_batch
             # with ThreadPoolExecutor(

docling/models/api_vlm_model.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Iterable
+from collections.abc import Iterable
 from docling.datamodel.base_models import Page, VlmPrediction
 from docling.datamodel.document import ConversionResult
@@ -10,7 +10,6 @@ from docling.utils.profiling import TimeRecorder
 class ApiVlmModel(BasePageModel):
     def __init__(
         self,
         enabled: bool,

docling/models/base_model.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from abc import ABC, abstractmethod
-from typing import Any, Generic, Iterable, Optional, Protocol, Type
+from collections.abc import Iterable
+from typing import Generic, Optional, Protocol, Type
 from docling_core.types.doc import BoundingBox, DocItem, DoclingDocument, NodeItem
 from typing_extensions import TypeVar
@@ -29,7 +30,6 @@ EnrichElementT = TypeVar("EnrichElementT", default=NodeItem)
 class GenericEnrichmentModel(ABC, Generic[EnrichElementT]):
     elements_batch_size: int = settings.perf.elements_batch_size
     @abstractmethod
@@ -50,7 +50,6 @@ class GenericEnrichmentModel(ABC, Generic[EnrichElementT]):
 class BaseEnrichmentModel(GenericEnrichmentModel[NodeItem]):
     def prepare_element(
         self, conv_res: ConversionResult, element: NodeItem
     ) -> Optional[NodeItem]:
@@ -62,7 +61,6 @@ class BaseEnrichmentModel(GenericEnrichmentModel[NodeItem]):
 class BaseItemAndImageEnrichmentModel(
     GenericEnrichmentModel[ItemAndImageEnrichmentElement]
 ):
     images_scale: float
     expansion_factor: float = 0.0

docling/models/base_ocr_model.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import copy
 import logging
 from abc import abstractmethod
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, List, Optional, Type
+from typing import List, Optional, Type
 import numpy as np
 from docling_core.types.doc import BoundingBox, CoordOrigin
-from docling_core.types.doc.page import BoundingRectangle, PdfTextCell, TextCell
 from PIL import Image, ImageDraw
 from rtree import index
 from scipy.ndimage import binary_dilation, find_objects, label

docling/models/code_formula_model.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import re
 from collections import Counter
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, List, Literal, Optional, Tuple, Union
+from typing import List, Literal, Optional, Tuple, Union
 import numpy as np
 from docling_core.types.doc import (

docling/models/document_picture_classifier.py CHANGED Viewed

@@ -1,5 +1,6 @@
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, List, Literal, Optional, Tuple, Union
+from typing import List, Literal, Optional, Union
 import numpy as np
 from docling_core.types.doc import (

docling/models/easyocr_model.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import logging
 import warnings
 import zipfile
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, List, Optional, Type
+from typing import List, Optional, Type
 import numpy
 from docling_core.types.doc import BoundingBox, CoordOrigin
@@ -58,12 +59,10 @@ class EasyOcrModel(BaseOcrModel):
                 device = decide_device(accelerator_options.device)
                 # Enable easyocr GPU if running on CUDA, MPS
                 use_gpu = any(
-                    [
-                        device.startswith(x)
-                        for x in [
-                            AcceleratorDevice.CUDA.value,
-                            AcceleratorDevice.MPS.value,
-                        ]
+                    device.startswith(x)
+                    for x in [
+                        AcceleratorDevice.CUDA.value,
+                        AcceleratorDevice.MPS.value,
                     ]
                 )
             else:
@@ -98,8 +97,10 @@ class EasyOcrModel(BaseOcrModel):
         progress: bool = False,
     ) -> Path:
         # Models are located in https://github.com/JaidedAI/EasyOCR/blob/master/easyocr/config.py
-        from easyocr.config import detection_models as det_models_dict
-        from easyocr.config import recognition_models as rec_models_dict
+        from easyocr.config import (
+            detection_models as det_models_dict,
+            recognition_models as rec_models_dict,
+        )
         if local_dir is None:
             local_dir = settings.cache_dir / "models" / EasyOcrModel._model_repo_folder
@@ -126,13 +127,11 @@ class EasyOcrModel(BaseOcrModel):
     def __call__(
         self, conv_res: ConversionResult, page_batch: Iterable[Page]
     ) -> Iterable[Page]:
         if not self.enabled:
             yield from page_batch
             return
         for page in page_batch:
             assert page._backend is not None
             if not page._backend.is_valid():
                 yield page

docling/models/factories/__init__.py CHANGED Viewed

@@ -9,7 +9,7 @@ from docling.models.factories.picture_description_factory import (
 logger = logging.getLogger(__name__)
-@lru_cache()
+@lru_cache
 def get_ocr_factory(allow_external_plugins: bool = False) -> OcrFactory:
     factory = OcrFactory()
     factory.load_from_plugins(allow_external_plugins=allow_external_plugins)
@@ -17,7 +17,7 @@ def get_ocr_factory(allow_external_plugins: bool = False) -> OcrFactory:
     return factory
-@lru_cache()
+@lru_cache
 def get_picture_description_factory(
     allow_external_plugins: bool = False,
 ) -> PictureDescriptionFactory:

docling/models/factories/base_factory.py CHANGED Viewed

@@ -33,7 +33,7 @@ class BaseFactory(Generic[A], metaclass=ABCMeta):
     @property
     def registered_kind(self) -> list[str]:
-        return list(opt.kind for opt in self._classes.keys())
+        return [opt.kind for opt in self._classes.keys()]
     def get_enum(self) -> enum.Enum:
         return enum.Enum(

docling 2.30.0__py3-none-any.whl → 2.31.0__py3-none-any.whl

docling 2.30.0py3-none-any.whl → 2.31.0py3-none-any.whl