PyPI - deepdoctection - Versions diffs - 0.30__py3-none-any.whl → 0.31__py3-none-any.whl - Mend

deepdoctection 0.30py3-none-any.whl → 0.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (74) hide show

deepdoctection/__init__.py +4 -2
deepdoctection/analyzer/dd.py +6 -5
deepdoctection/dataflow/base.py +0 -19
deepdoctection/dataflow/custom.py +4 -3
deepdoctection/dataflow/custom_serialize.py +14 -5
deepdoctection/dataflow/parallel_map.py +12 -11
deepdoctection/dataflow/serialize.py +5 -4
deepdoctection/datapoint/annotation.py +33 -12
deepdoctection/datapoint/box.py +1 -4
deepdoctection/datapoint/convert.py +3 -1
deepdoctection/datapoint/image.py +66 -29
deepdoctection/datapoint/view.py +57 -25
deepdoctection/datasets/adapter.py +1 -1
deepdoctection/datasets/base.py +83 -10
deepdoctection/datasets/dataflow_builder.py +1 -1
deepdoctection/datasets/info.py +2 -2
deepdoctection/datasets/instances/layouttest.py +2 -7
deepdoctection/eval/accmetric.py +1 -1
deepdoctection/eval/base.py +5 -4
deepdoctection/eval/eval.py +2 -2
deepdoctection/eval/tp_eval_callback.py +5 -4
deepdoctection/extern/base.py +39 -13
deepdoctection/extern/d2detect.py +164 -64
deepdoctection/extern/deskew.py +32 -7
deepdoctection/extern/doctrocr.py +227 -39
deepdoctection/extern/fastlang.py +45 -7
deepdoctection/extern/hfdetr.py +90 -33
deepdoctection/extern/hflayoutlm.py +109 -22
deepdoctection/extern/pdftext.py +2 -1
deepdoctection/extern/pt/ptutils.py +3 -2
deepdoctection/extern/tessocr.py +134 -22
deepdoctection/extern/texocr.py +2 -0
deepdoctection/extern/tp/tpcompat.py +4 -4
deepdoctection/extern/tp/tpfrcnn/preproc.py +2 -7
deepdoctection/extern/tpdetect.py +50 -23
deepdoctection/mapper/d2struct.py +1 -1
deepdoctection/mapper/hfstruct.py +1 -1
deepdoctection/mapper/laylmstruct.py +1 -1
deepdoctection/mapper/maputils.py +13 -2
deepdoctection/mapper/prodigystruct.py +1 -1
deepdoctection/mapper/pubstruct.py +10 -10
deepdoctection/mapper/tpstruct.py +1 -1
deepdoctection/pipe/anngen.py +35 -8
deepdoctection/pipe/base.py +53 -19
deepdoctection/pipe/cell.py +29 -8
deepdoctection/pipe/common.py +12 -4
deepdoctection/pipe/doctectionpipe.py +2 -2
deepdoctection/pipe/language.py +3 -2
deepdoctection/pipe/layout.py +3 -2
deepdoctection/pipe/lm.py +2 -2
deepdoctection/pipe/refine.py +18 -10
deepdoctection/pipe/segment.py +21 -16
deepdoctection/pipe/text.py +14 -8
deepdoctection/pipe/transform.py +16 -9
deepdoctection/train/d2_frcnn_train.py +15 -12
deepdoctection/train/hf_detr_train.py +8 -6
deepdoctection/train/hf_layoutlm_train.py +16 -11
deepdoctection/utils/__init__.py +3 -0
deepdoctection/utils/concurrency.py +1 -1
deepdoctection/utils/context.py +2 -2
deepdoctection/utils/env_info.py +55 -22
deepdoctection/utils/error.py +84 -0
deepdoctection/utils/file_utils.py +4 -15
deepdoctection/utils/fs.py +7 -7
deepdoctection/utils/pdf_utils.py +5 -4
deepdoctection/utils/settings.py +5 -1
deepdoctection/utils/transform.py +1 -1
deepdoctection/utils/utils.py +0 -6
deepdoctection/utils/viz.py +44 -2
{deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/METADATA +33 -58
{deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/RECORD +74 -73
{deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/WHEEL +1 -1
{deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/LICENSE +0 -0
{deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/top_level.txt +0 -0

deepdoctection/extern/doctrocr.py CHANGED Viewed

@@ -18,12 +18,15 @@
 """
 Deepdoctection wrappers for DocTr OCR text line detection and text recognition models
 """
+import os
+from abc import ABC
 from pathlib import Path
 from typing import Any, List, Literal, Mapping, Optional, Tuple
 from zipfile import ZipFile
 from ..utils.detection_types import ImageType, Requirement
+from ..utils.env_info import get_device
+from ..utils.error import DependencyError
 from ..utils.file_utils import (
     doctr_available,
     get_doctr_requirement,
@@ -35,11 +38,13 @@ from ..utils.file_utils import (
     tf_available,
 )
 from ..utils.fs import load_json
-from ..utils.settings import LayoutType, ObjectTypes, TypeOrStr
-from .base import DetectionResult, ObjectDetector, PredictorBase, TextRecognizer
+from ..utils.settings import LayoutType, ObjectTypes, PageType, TypeOrStr
+from ..utils.viz import viz_handler
+from .base import DetectionResult, ImageTransformer, ObjectDetector, PredictorBase, TextRecognizer
 from .pt.ptutils import set_torch_auto_device
 if doctr_available() and ((tf_addons_available() and tf_available()) or pytorch_available()):
+    from doctr.models._utils import estimate_orientation
     from doctr.models.detection.predictor import DetectionPredictor  # pylint: disable=W0611
     from doctr.models.detection.zoo import detection_predictor
     from doctr.models.preprocessor import PreProcessor
@@ -64,7 +69,7 @@ def _set_device_str(device: Optional[str] = None) -> str:
     return device
-def _load_model(path_weights: str, doctr_predictor: Any, device: str, lib: str) -> None:
+def _load_model(path_weights: str, doctr_predictor: Any, device: str, lib: Literal["PT", "TF"]) -> None:
     if lib == "PT" and pytorch_available():
         state_dict = torch.load(path_weights, map_location=device)
         for key in list(state_dict.keys()):
@@ -83,6 +88,16 @@ def _load_model(path_weights: str, doctr_predictor: Any, device: str, lib: str)
             doctr_predictor.model.load_weights(path_weights)
+def auto_select_lib_for_doctr() -> Literal["PT", "TF"]:
+    """Auto select the DL library from the installed and from environment variables"""
+    if tf_available() and os.environ.get("USE_TF", os.environ.get("USE_TENSORFLOW", False)):
+        os.environ["USE_TF"] = "TRUE"
+        return "TF"
+    if pytorch_available() and os.environ.get("USE_TORCH", os.environ.get("USE_PYTORCH", False)):
+        return "PT"
+    raise DependencyError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextlineDetector")
 def doctr_predict_text_lines(np_img: ImageType, predictor: "DetectionPredictor", device: str) -> List[DetectionResult]:
     """
     Generating text line DetectionResult based on Doctr DetectionPredictor.
@@ -132,7 +147,28 @@ def doctr_predict_text(
     return detection_results
-class DoctrTextlineDetector(ObjectDetector):
+class DoctrTextlineDetectorMixin(ObjectDetector, ABC):
+    """Base class for Doctr textline detector. This class only implements the basic wrapper functions"""
+    def __init__(self, categories: Mapping[str, TypeOrStr], lib: Optional[Literal["PT", "TF"]] = None):
+        self.categories = categories  # type: ignore
+        self.lib = lib if lib is not None else self.auto_select_lib()
+    def possible_categories(self) -> List[ObjectTypes]:
+        return [LayoutType.word]
+    @staticmethod
+    def get_name(path_weights: str, architecture: str) -> str:
+        """Returns the name of the model"""
+        return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
+    @staticmethod
+    def auto_select_lib() -> Literal["PT", "TF"]:
+        """Auto select the DL library from the installed and from environment variables"""
+        return auto_select_lib_for_doctr()
+class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
     """
     A deepdoctection wrapper of DocTr text line detector. We model text line detection as ObjectDetector
     and assume to use this detector in a ImageLayoutService.
@@ -165,8 +201,6 @@ class DoctrTextlineDetector(ObjectDetector):
                  for dp in df:
                      ...
     """
     def __init__(
@@ -175,20 +209,36 @@ class DoctrTextlineDetector(ObjectDetector):
         path_weights: str,
         categories: Mapping[str, TypeOrStr],
         device: Optional[Literal["cpu", "cuda"]] = None,
-        lib: str = "TF",
+        lib: Optional[Literal["PT", "TF"]] = None,
     ) -> None:
-        self.lib = lib
-        self.name = "doctr_text_detector"
+        """
+        :param architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
+        "db_mobilenet_v3_large". The full list can be found here:
+        https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20
+        :param path_weights: Path to the weights of the model
+        :param categories: A dict with the model output label and value
+        :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
+        :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
+        """
+        super().__init__(categories, lib)
         self.architecture = architecture
         self.path_weights = path_weights
-        self.doctr_predictor = detection_predictor(
-            arch=self.architecture, pretrained=False, pretrained_backbone=False
-        )  # we will be loading the model
-        # later because there is no easy way in doctr to load a model by giving only a path to its weights
-        self.categories = categories  # type: ignore
+        self.name = self.get_name(self.path_weights, self.architecture)
+        self.model_id = self.get_model_id()
+        if device is None:
+            if self.lib == "TF":
+                device = "cuda" if tf.test.is_gpu_available() else "cpu"
+            elif self.lib == "PT":
+                auto_device = get_device(False)
+                device = "cpu" if auto_device == "mps" else auto_device
+            else:
+                raise DependencyError("Cannot select device automatically. Please set the device manually.")
         self.device_input = device
         self.device = _set_device_str(device)
-        self.load_model()
+        self.doctr_predictor = self.get_wrapped_model(self.architecture, self.path_weights, self.device_input, self.lib)
     def predict(self, np_img: ImageType) -> List[DetectionResult]:
         """
@@ -211,12 +261,34 @@ class DoctrTextlineDetector(ObjectDetector):
     def clone(self) -> PredictorBase:
         return self.__class__(self.architecture, self.path_weights, self.categories, self.device_input, self.lib)
-    def possible_categories(self) -> List[ObjectTypes]:
-        return [LayoutType.word]
-    def load_model(self) -> None:
+    @staticmethod
+    def load_model(path_weights: str, doctr_predictor: Any, device: str, lib: Literal["PT", "TF"]) -> None:
         """Loading model weights"""
-        _load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
+        _load_model(path_weights, doctr_predictor, device, lib)
+    @staticmethod
+    def get_wrapped_model(
+        architecture: str, path_weights: str, device: Literal["cpu", "cuda"], lib: Literal["PT", "TF"]
+    ) -> Any:
+        """
+        Get the inner (wrapped) model.
+        :param architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
+        "db_mobilenet_v3_large". The full list can be found here:
+        https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20
+        :param path_weights: Path to the weights of the model
+        :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
+        :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used. Make sure,
+                    these variables are set. If not, use
+                        deepdoctection.utils.env_info.auto_select_lib_and_device
+        :return: Inner model which is a "nn.Module" in PyTorch or a "tf.keras.Model" in Tensorflow
+        """
+        doctr_predictor = detection_predictor(arch=architecture, pretrained=False, pretrained_backbone=False)
+        device_str = _set_device_str(device)
+        DoctrTextlineDetector.load_model(path_weights, doctr_predictor, device_str, lib)
+        return doctr_predictor
 class DoctrTextRecognizer(TextRecognizer):
@@ -261,7 +333,7 @@ class DoctrTextRecognizer(TextRecognizer):
         architecture: str,
         path_weights: str,
         device: Optional[Literal["cpu", "cuda"]] = None,
-        lib: str = "TF",
+        lib: Optional[Literal["PT", "TF"]] = None,
         path_config_json: Optional[str] = None,
     ) -> None:
         """
@@ -270,19 +342,36 @@ class DoctrTextRecognizer(TextRecognizer):
         https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16.
         :param path_weights: Path to the weights of the model
         :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
-        :param lib: "TF" or "PT". Will default to "TF".
+        :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
         :param path_config_json: Path to a json file containing the configuration of the model. Useful, if you have
         a model trained on custom vocab.
         """
-        self.lib = lib
-        self.name = "doctr_text_recognizer"
+        self.lib = lib if lib is not None else self.auto_select_lib()
         self.architecture = architecture
         self.path_weights = path_weights
+        self.name = self.get_name(self.path_weights, self.architecture)
+        self.model_id = self.get_model_id()
+        if device is None:
+            if self.lib == "TF":
+                device = "cuda" if tf.test.is_gpu_available() else "cpu"
+            if self.lib == "PT":
+                auto_device = get_device(False)
+                device = "cpu" if auto_device == "mps" else auto_device
+            else:
+                raise DependencyError("Cannot select device automatically. Please set the device manually.")
         self.device_input = device
         self.device = _set_device_str(device)
         self.path_config_json = path_config_json
-        self.doctr_predictor = self.build_model()
-        self.load_model()
+        self.doctr_predictor = self.build_model(self.architecture, self.path_config_json)
+        self.load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
+        self.doctr_predictor = self.get_wrapped_model(
+            self.architecture, self.path_weights, self.device_input, self.lib, self.path_config_json
+        )
     def predict(self, images: List[Tuple[str, ImageType]]) -> List[DetectionResult]:
         """
@@ -306,19 +395,21 @@ class DoctrTextRecognizer(TextRecognizer):
     def clone(self) -> PredictorBase:
         return self.__class__(self.architecture, self.path_weights, self.device_input, self.lib)
-    def load_model(self) -> None:
+    @staticmethod
+    def load_model(path_weights: str, doctr_predictor: Any, device: str, lib: Literal["PT", "TF"]) -> None:
         """Loading model weights"""
-        _load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
+        _load_model(path_weights, doctr_predictor, device, lib)
-    def build_model(self) -> "RecognitionPredictor":
+    @staticmethod
+    def build_model(architecture: str, path_config_json: Optional[str] = None) -> "RecognitionPredictor":
         """Building the model"""
         # inspired and adapted from https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py
         custom_configs = {}
         batch_size = 32
         recognition_configs = {}
-        if self.path_config_json:
-            custom_configs = load_json(self.path_config_json)
+        if path_config_json:
+            custom_configs = load_json(path_config_json)
             custom_configs.pop("arch", None)
             custom_configs.pop("url", None)
             custom_configs.pop("task", None)
@@ -327,18 +418,115 @@ class DoctrTextRecognizer(TextRecognizer):
             batch_size = custom_configs.pop("batch_size")
         recognition_configs["batch_size"] = batch_size
-        if isinstance(self.architecture, str):
-            if self.architecture not in ARCHS:
-                raise ValueError(f"unknown architecture '{self.architecture}'")
+        if isinstance(architecture, str):
+            if architecture not in ARCHS:
+                raise ValueError(f"unknown architecture '{architecture}'")
-            model = recognition.__dict__[self.architecture](pretrained=True, pretrained_backbone=True, **custom_configs)
+            model = recognition.__dict__[architecture](pretrained=True, pretrained_backbone=True, **custom_configs)
         else:
             if not isinstance(
-                self.architecture,
+                architecture,
                 (recognition.CRNN, recognition.SAR, recognition.MASTER, recognition.ViTSTR, recognition.PARSeq),
             ):
-                raise ValueError(f"unknown architecture: {type(self.architecture)}")
-            model = self.architecture
+                raise ValueError(f"unknown architecture: {type(architecture)}")
+            model = architecture
         input_shape = model.cfg["input_shape"][:2] if tf_available() else model.cfg["input_shape"][-2:]
         return RecognitionPredictor(PreProcessor(input_shape, preserve_aspect_ratio=True, **recognition_configs), model)
+    @staticmethod
+    def get_wrapped_model(
+        architecture: str,
+        path_weights: str,
+        device: Literal["cpu", "cuda"],
+        lib: Literal["PT", "TF"],
+        path_config_json: Optional[str] = None,
+    ) -> Any:
+        """
+        Get the inner (wrapped) model.
+        :param architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
+        "crnn_mobilenet_v3_small". The full list can be found here:
+        https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16.
+        :param path_weights: Path to the weights of the model
+        :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
+        :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
+        :param path_config_json: Path to a json file containing the configuration of the model. Useful, if you have
+        a model trained on custom vocab.
+        :return: Inner model which is a "nn.Module" in PyTorch or a "tf.keras.Model" in Tensorflow
+        """
+        doctr_predictor = DoctrTextRecognizer.build_model(architecture, path_config_json)
+        device_str = _set_device_str(device)
+        DoctrTextRecognizer.load_model(path_weights, doctr_predictor, device_str, lib)
+        return doctr_predictor
+    @staticmethod
+    def get_name(path_weights: str, architecture: str) -> str:
+        """Returns the name of the model"""
+        return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
+    @staticmethod
+    def auto_select_lib() -> Literal["PT", "TF"]:
+        """Auto select the DL library from the installed and from environment variables"""
+        return auto_select_lib_for_doctr()
+class DocTrRotationTransformer(ImageTransformer):
+    """
+    The `DocTrRotationTransformer` class is a specialized image transformer that is designed to handle image rotation
+    in the context of Optical Character Recognition (OCR) tasks. It inherits from the `ImageTransformer` base class and
+    implements methods for predicting and applying rotation transformations to images.
+    The `predict` method determines the angle of the rotated image using the `estimate_orientation` function from the
+    `doctr.models._utils` module. The `n_ct` and `ratio_threshold_for_lines` parameters for this function can be
+    configured when instantiating the class.
+    The `transform` method applies the predicted rotation to the image, effectively rotating the image backwards.
+    This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
+    This class can be particularly useful in OCR tasks where the orientation of the text in the image matters.
+    The class also provides methods for cloning itself and for getting the requirements of the OCR system.
+    **Example:**
+                    transformer = DocTrRotationTransformer()
+                    detection_result = transformer.predict(np_img)
+                    rotated_image = transformer.transform(np_img, detection_result)
+    """
+    def __init__(self, number_contours: int = 50, ratio_threshold_for_lines: float = 5):
+        """
+        :param number_contours: the number of contours used for the orientation estimation
+        :param ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
+        """
+        self.number_contours = number_contours
+        self.ratio_threshold_for_lines = ratio_threshold_for_lines
+        self.name = "doctr_rotation_transformer"
+    def transform(self, np_img: ImageType, specification: DetectionResult) -> ImageType:
+        """
+        Applies the predicted rotation to the image, effectively rotating the image backwards.
+        This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
+        :param np_img: The input image as a numpy array.
+        :param specification: A `DetectionResult` object containing the predicted rotation angle.
+        :return: The rotated image as a numpy array.
+        """
+        return viz_handler.rotate_image(np_img, specification.angle)  # type: ignore
+    def predict(self, np_img: ImageType) -> DetectionResult:
+        angle = estimate_orientation(np_img, self.number_contours, self.ratio_threshold_for_lines)
+        if angle < 0:
+            angle += 360
+        return DetectionResult(angle=round(angle, 2))
+    @classmethod
+    def get_requirements(cls) -> List[Requirement]:
+        return [get_doctr_requirement()]
+    def clone(self) -> PredictorBase:
+        return self.__class__(self.number_contours, self.ratio_threshold_for_lines)
+    @staticmethod
+    def possible_category() -> PageType:
+        return PageType.angle

deepdoctection/extern/fastlang.py CHANGED Viewed

@@ -18,18 +18,45 @@
 """
 Deepdoctection wrappers for fasttext language detection models
 """
+from abc import ABC
 from copy import copy
-from typing import List, Mapping
+from pathlib import Path
+from typing import Any, List, Mapping, Tuple, Union
 from ..utils.file_utils import Requirement, fasttext_available, get_fasttext_requirement
-from ..utils.settings import TypeOrStr
+from ..utils.settings import TypeOrStr, get_type
 from .base import DetectionResult, LanguageDetector, PredictorBase
 if fasttext_available():
     from fasttext import load_model  # type: ignore
-class FasttextLangDetector(LanguageDetector):
+class FasttextLangDetectorMixin(LanguageDetector, ABC):
+    """
+    Base class for Fasttext language detection implementation. This class only implements the basic wrapper functions.
+    """
+    def __init__(self, categories: Mapping[str, TypeOrStr]) -> None:
+        """
+        :param categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
+        """
+        self.categories = copy({idx: get_type(cat) for idx, cat in categories.items()})
+    def output_to_detection_result(self, output: Union[Tuple[Any, Any]]) -> DetectionResult:
+        """
+        Generating `DetectionResult` from model output
+        :param output: FastText model output
+        :return: `DetectionResult` filled with `text` and `score`
+        """
+        return DetectionResult(text=self.categories[output[0][0]], score=output[1][0])
+    @staticmethod
+    def get_name(path_weights: str) -> str:
+        """Returns the name of the model"""
+        return "fasttext_" + "_".join(Path(path_weights).parts[-2:])
+class FasttextLangDetector(FasttextLangDetectorMixin):
     """
     Fasttext language detector wrapper. Two models provided in the fasttext library can be used to identify languages.
     The background to the models can be found in the works:
@@ -57,15 +84,18 @@ class FasttextLangDetector(LanguageDetector):
         :param categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
                            code.
         """
+        super().__init__(categories)
-        self.name = "fasttest_lang_detector"
         self.path_weights = path_weights
-        self.model = load_model(self.path_weights)
-        self.categories = copy(categories)  # type: ignore
+        self.name = self.get_name(self.path_weights)
+        self.model_id = self.get_model_id()
+        self.model = self.get_wrapped_model(self.path_weights)
     def predict(self, text_string: str) -> DetectionResult:
         output = self.model.predict(text_string)
-        return DetectionResult(text=self.categories[output[0][0]], score=output[1][0])
+        return self.output_to_detection_result(output)
     @classmethod
     def get_requirements(cls) -> List[Requirement]:
@@ -73,3 +103,11 @@ class FasttextLangDetector(LanguageDetector):
     def clone(self) -> PredictorBase:
         return self.__class__(self.path_weights, self.categories)
+    @staticmethod
+    def get_wrapped_model(path_weights: str) -> Any:
+        """
+        Get the wrapped model
+        :param path_weights: path to model weights
+        """
+        return load_model(path_weights)

deepdoctection/extern/hfdetr.py CHANGED Viewed

@@ -19,6 +19,8 @@
 HF Detr model for object detection.
 """
+from abc import ABC
+from pathlib import Path
 from typing import List, Literal, Mapping, Optional, Sequence
 from ..utils.detection_types import ImageType, Requirement
@@ -94,7 +96,48 @@ def detr_predict_image(
     ]
-class HFDetrDerivedDetector(ObjectDetector):
+class HFDetrDerivedDetectorMixin(ObjectDetector, ABC):
+    """Base class for Detr object detector. This class only implements the basic wrapper functions"""
+    def __init__(self, categories: Mapping[str, TypeOrStr], filter_categories: Optional[Sequence[TypeOrStr]] = None):
+        """
+        :param categories: A dict with key (indices) and values (category names).
+        :param filter_categories: The model might return objects that are not supposed to be predicted and that should
+                                  be filtered. Pass a list of category names that must not be returned
+        """
+        self.categories = {idx: get_type(cat) for idx, cat in categories.items()}
+        if filter_categories:
+            filter_categories = [get_type(cat) for cat in filter_categories]
+        self.filter_categories = filter_categories
+    def _map_category_names(self, detection_results: List[DetectionResult]) -> List[DetectionResult]:
+        """
+        Populating category names to detection results. Will also filter categories
+        :param detection_results: list of detection results
+        :return: List of detection results with attribute class_name populated
+        """
+        filtered_detection_result: List[DetectionResult] = []
+        for result in detection_results:
+            result.class_name = self.categories[str(result.class_id + 1)]  # type: ignore
+            if isinstance(result.class_id, int):
+                result.class_id += 1
+            if self.filter_categories:
+                if result.class_name not in self.filter_categories:
+                    filtered_detection_result.append(result)
+            else:
+                filtered_detection_result.append(result)
+        return filtered_detection_result
+    @staticmethod
+    def get_name(path_weights: str) -> str:
+        """Returns the name of the model"""
+        return "Transformers_Tatr_" + "_".join(Path(path_weights).parts[-2:])
+class HFDetrDerivedDetector(HFDetrDerivedDetectorMixin):
     """
     Model wrapper for TableTransformerForObjectDetection that again is based on
@@ -138,26 +181,25 @@ class HFDetrDerivedDetector(ObjectDetector):
         :param filter_categories: The model might return objects that are not supposed to be predicted and that should
                                   be filtered. Pass a list of category names that must not be returned
         """
-        self.name = "Detr"
-        self.categories = {idx: get_type(cat) for idx, cat in categories.items()}
+        super().__init__(categories, filter_categories)
         self.path_config = path_config_json
         self.path_weights = path_weights
         self.path_feature_extractor_config = path_feature_extractor_config_json
-        self.config = PretrainedConfig.from_pretrained(pretrained_model_name_or_path=self.path_config)
-        self.config.use_timm_backbone = True
-        self.config.threshold = 0.1
-        self.config.nms_threshold = 0.05
-        self.hf_detr_predictor = self.set_model(path_weights)
-        self.feature_extractor = self.set_pre_processor()
+        self.name = self.get_name(self.path_weights)
+        self.model_id = self.get_model_id()
+        self.config = self.get_config(path_config_json)
+        self.hf_detr_predictor = self.get_model(self.path_weights, self.config)
+        self.feature_extractor = self.get_pre_processor(self.path_feature_extractor_config)
         if device is not None:
             self.device = device
         else:
             self.device = set_torch_auto_device()
         self.hf_detr_predictor.to(self.device)
-        if filter_categories:
-            filter_categories = [get_type(cat) for cat in filter_categories]
-        self.filter_categories = filter_categories
     def predict(self, np_img: ImageType) -> List[DetectionResult]:
         results = detr_predict_image(
@@ -170,44 +212,41 @@ class HFDetrDerivedDetector(ObjectDetector):
         )
         return self._map_category_names(results)
-    def set_model(self, path_weights: str) -> "TableTransformerForObjectDetection":
+    @staticmethod
+    def get_model(path_weights: str, config: "PretrainedConfig") -> "TableTransformerForObjectDetection":
         """
         Builds the Detr model
-        :param path_weights: weights
+        :param path_weights: The path to the model checkpoint.
+        :param config: `PretrainedConfig`
         :return: TableTransformerForObjectDetection instance
         """
         return TableTransformerForObjectDetection.from_pretrained(
-            pretrained_model_name_or_path=path_weights, config=self.config
+            pretrained_model_name_or_path=path_weights, config=config
         )
-    def set_pre_processor(self) -> "DetrFeatureExtractor":
+    @staticmethod
+    def get_pre_processor(path_feature_extractor_config: str) -> "DetrFeatureExtractor":
         """
         Builds the feature extractor
         :return: DetrFeatureExtractor
         """
-        return AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path=self.path_feature_extractor_config)
+        return AutoFeatureExtractor.from_pretrained(pretrained_model_name_or_path=path_feature_extractor_config)
-    def _map_category_names(self, detection_results: List[DetectionResult]) -> List[DetectionResult]:
+    @staticmethod
+    def get_config(path_config: str) -> "PretrainedConfig":
         """
-        Populating category names to detection results. Will also filter categories
+        Builds the config
-        :param detection_results: list of detection results
-        :return: List of detection results with attribute class_name populated
+        :param path_config: The path to the json config.
+        :return: PretrainedConfig instance
         """
-        filtered_detection_result: List[DetectionResult] = []
-        for result in detection_results:
-            result.class_name = self.categories[str(result.class_id + 1)]  # type: ignore
-            if isinstance(result.class_id, int):
-                result.class_id += 1
-            if self.filter_categories:
-                if result.class_name not in self.filter_categories:
-                    filtered_detection_result.append(result)
-            else:
-                filtered_detection_result.append(result)
-        return filtered_detection_result
+        config = PretrainedConfig.from_pretrained(pretrained_model_name_or_path=path_config)
+        config.use_timm_backbone = True
+        config.threshold = 0.1
+        config.nms_threshold = 0.05
+        return config
     @classmethod
     def get_requirements(cls) -> List[Requirement]:
@@ -217,3 +256,21 @@ class HFDetrDerivedDetector(ObjectDetector):
         return self.__class__(
             self.path_config, self.path_weights, self.path_feature_extractor_config, self.categories, self.device
         )
+    @staticmethod
+    def get_wrapped_model(
+        path_config_json: str, path_weights: str, device: Optional[Literal["cpu", "cuda"]] = None
+    ) -> "TableTransformerForObjectDetection":
+        """
+        Get the wrapped model
+        :param path_config_json: The path to the json config.
+        :param path_weights: The path to the model checkpoint.
+        :param device: "cpu" or "cuda". If not specified will auto select depending on what is available
+        :return: TableTransformerForObjectDetection instance
+        """
+        config = HFDetrDerivedDetector.get_config(path_config_json)
+        hf_detr_predictor = HFDetrDerivedDetector.get_model(path_weights, config)
+        if device is None:
+            device = set_torch_auto_device()
+        return hf_detr_predictor.to(device)

deepdoctection 0.30__py3-none-any.whl → 0.31__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.30py3-none-any.whl → 0.31py3-none-any.whl