PyPI - deepdoctection - Versions diffs - 0.42.1__py3-none-any.whl → 0.43.1__py3-none-any.whl - Mend

deepdoctection 0.42.1py3-none-any.whl → 0.43.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show

deepdoctection/__init__.py +4 -2
deepdoctection/analyzer/__init__.py +2 -1
deepdoctection/analyzer/config.py +919 -0
deepdoctection/analyzer/dd.py +36 -62
deepdoctection/analyzer/factory.py +311 -141
deepdoctection/configs/conf_dd_one.yaml +100 -44
deepdoctection/configs/profiles.jsonl +32 -0
deepdoctection/dataflow/__init__.py +9 -6
deepdoctection/dataflow/base.py +33 -15
deepdoctection/dataflow/common.py +96 -75
deepdoctection/dataflow/custom.py +36 -29
deepdoctection/dataflow/custom_serialize.py +135 -91
deepdoctection/dataflow/parallel_map.py +33 -31
deepdoctection/dataflow/serialize.py +15 -10
deepdoctection/dataflow/stats.py +41 -28
deepdoctection/datapoint/__init__.py +4 -6
deepdoctection/datapoint/annotation.py +104 -66
deepdoctection/datapoint/box.py +190 -130
deepdoctection/datapoint/convert.py +66 -39
deepdoctection/datapoint/image.py +151 -95
deepdoctection/datapoint/view.py +383 -236
deepdoctection/datasets/__init__.py +2 -6
deepdoctection/datasets/adapter.py +11 -11
deepdoctection/datasets/base.py +118 -81
deepdoctection/datasets/dataflow_builder.py +18 -12
deepdoctection/datasets/info.py +76 -57
deepdoctection/datasets/instances/__init__.py +6 -2
deepdoctection/datasets/instances/doclaynet.py +17 -14
deepdoctection/datasets/instances/fintabnet.py +16 -22
deepdoctection/datasets/instances/funsd.py +11 -6
deepdoctection/datasets/instances/iiitar13k.py +9 -9
deepdoctection/datasets/instances/layouttest.py +9 -9
deepdoctection/datasets/instances/publaynet.py +9 -9
deepdoctection/datasets/instances/pubtables1m.py +13 -13
deepdoctection/datasets/instances/pubtabnet.py +13 -15
deepdoctection/datasets/instances/rvlcdip.py +8 -8
deepdoctection/datasets/instances/xfund.py +11 -9
deepdoctection/datasets/registry.py +18 -11
deepdoctection/datasets/save.py +12 -11
deepdoctection/eval/__init__.py +3 -2
deepdoctection/eval/accmetric.py +72 -52
deepdoctection/eval/base.py +29 -10
deepdoctection/eval/cocometric.py +14 -12
deepdoctection/eval/eval.py +56 -41
deepdoctection/eval/registry.py +6 -3
deepdoctection/eval/tedsmetric.py +24 -9
deepdoctection/eval/tp_eval_callback.py +13 -12
deepdoctection/extern/__init__.py +1 -1
deepdoctection/extern/base.py +176 -97
deepdoctection/extern/d2detect.py +127 -92
deepdoctection/extern/deskew.py +19 -10
deepdoctection/extern/doctrocr.py +162 -108
deepdoctection/extern/fastlang.py +25 -17
deepdoctection/extern/hfdetr.py +137 -60
deepdoctection/extern/hflayoutlm.py +329 -248
deepdoctection/extern/hflm.py +67 -33
deepdoctection/extern/model.py +108 -762
deepdoctection/extern/pdftext.py +37 -12
deepdoctection/extern/pt/nms.py +15 -1
deepdoctection/extern/pt/ptutils.py +13 -9
deepdoctection/extern/tessocr.py +87 -54
deepdoctection/extern/texocr.py +29 -14
deepdoctection/extern/tp/tfutils.py +36 -8
deepdoctection/extern/tp/tpcompat.py +54 -16
deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
deepdoctection/extern/tpdetect.py +4 -2
deepdoctection/mapper/__init__.py +1 -1
deepdoctection/mapper/cats.py +117 -76
deepdoctection/mapper/cocostruct.py +35 -17
deepdoctection/mapper/d2struct.py +56 -29
deepdoctection/mapper/hfstruct.py +32 -19
deepdoctection/mapper/laylmstruct.py +221 -185
deepdoctection/mapper/maputils.py +71 -35
deepdoctection/mapper/match.py +76 -62
deepdoctection/mapper/misc.py +68 -44
deepdoctection/mapper/pascalstruct.py +13 -12
deepdoctection/mapper/prodigystruct.py +33 -19
deepdoctection/mapper/pubstruct.py +42 -32
deepdoctection/mapper/tpstruct.py +39 -19
deepdoctection/mapper/xfundstruct.py +20 -13
deepdoctection/pipe/__init__.py +1 -2
deepdoctection/pipe/anngen.py +104 -62
deepdoctection/pipe/base.py +226 -107
deepdoctection/pipe/common.py +206 -123
deepdoctection/pipe/concurrency.py +74 -47
deepdoctection/pipe/doctectionpipe.py +108 -47
deepdoctection/pipe/language.py +41 -24
deepdoctection/pipe/layout.py +45 -18
deepdoctection/pipe/lm.py +146 -78
deepdoctection/pipe/order.py +205 -119
deepdoctection/pipe/refine.py +111 -63
deepdoctection/pipe/registry.py +1 -1
deepdoctection/pipe/segment.py +213 -142
deepdoctection/pipe/sub_layout.py +76 -46
deepdoctection/pipe/text.py +52 -33
deepdoctection/pipe/transform.py +8 -6
deepdoctection/train/d2_frcnn_train.py +87 -69
deepdoctection/train/hf_detr_train.py +72 -40
deepdoctection/train/hf_layoutlm_train.py +85 -46
deepdoctection/train/tp_frcnn_train.py +56 -28
deepdoctection/utils/concurrency.py +59 -16
deepdoctection/utils/context.py +40 -19
deepdoctection/utils/develop.py +26 -17
deepdoctection/utils/env_info.py +86 -37
deepdoctection/utils/error.py +16 -10
deepdoctection/utils/file_utils.py +246 -71
deepdoctection/utils/fs.py +162 -43
deepdoctection/utils/identifier.py +29 -16
deepdoctection/utils/logger.py +49 -32
deepdoctection/utils/metacfg.py +83 -21
deepdoctection/utils/pdf_utils.py +119 -62
deepdoctection/utils/settings.py +24 -10
deepdoctection/utils/tqdm.py +10 -5
deepdoctection/utils/transform.py +182 -46
deepdoctection/utils/utils.py +61 -28
deepdoctection/utils/viz.py +150 -104
deepdoctection-0.43.1.dist-info/METADATA +376 -0
deepdoctection-0.43.1.dist-info/RECORD +149 -0
deepdoctection/analyzer/_config.py +0 -146
deepdoctection-0.42.1.dist-info/METADATA +0 -431
deepdoctection-0.42.1.dist-info/RECORD +0 -148
{deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/WHEEL +0 -0
{deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/licenses/LICENSE +0 -0
{deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/top_level.txt +0 -0

deepdoctection/extern/doctrocr.py CHANGED Viewed

@@ -16,8 +16,9 @@
 # limitations under the License.
 """
-Deepdoctection wrappers for DocTr OCR text line detection and text recognition models
+Wrappers for DocTr text line detection and text recognition models
 """
 from __future__ import annotations
 import os
@@ -106,13 +107,16 @@ def doctr_predict_text_lines(
     np_img: PixelValues, predictor: DetectionPredictor, device: Union[torch.device, tf.device], lib: Literal["TF", "PT"]
 ) -> list[DetectionResult]:
     """
-    Generating text line DetectionResult based on Doctr DetectionPredictor.
+    Generating text line `DetectionResult` based on DocTr `DetectionPredictor`.
+    Args:
+        np_img: Image in `np.array`
+        predictor: `doctr.models.detection.predictor.DetectionPredictor`
+        device: Will only be used in Tensorflow settings. Either `/gpu:0` or `/cpu:0`
+        lib: "TF" or "PT"
-    :param np_img: Image in np.array.
-    :param predictor: `doctr.models.detection.predictor.DetectionPredictor`
-    :param device: Will only be used in tensorflow settings. Either /gpu:0 or /cpu:0
-    :param lib: "TF" or "PT"
-    :return: A list of text line detection results (without text).
+    Returns:
+        A list of text line `DetectionResult` (without text)
     """
     if lib == "TF":
         with device:
@@ -137,15 +141,18 @@ def doctr_predict_text(
     lib: Literal["TF", "PT"],
 ) -> list[DetectionResult]:
     """
-    Calls Doctr text recognition model on a batch of numpy arrays (text lines predicted from a text line detector) and
-    returns the recognized text as DetectionResult
-    :param inputs: list of tuples containing the annotation_id of the input image and the numpy array of the cropped
-                   text line
-    :param predictor: `doctr.models.detection.predictor.RecognitionPredictor`
-    :param device: Will only be used in tensorflow settings. Either /gpu:0 or /cpu:0
-    :param lib: "TF" or "PT"
-    :return: A list of DetectionResult containing recognized text.
+    Calls DocTr text recognition model on a batch of `np.array`s (text lines predicted from a text line detector) and
+    returns the recognized text as `DetectionResult`
+    Args:
+        inputs: list of tuples containing the `annotation_id` of the input image and the `np.array` of the cropped
+                text line
+        predictor: `doctr.models.detection.predictor.RecognitionPredictor`
+        device: Will only be used in Tensorflow settings. Either `/gpu:0` or `/cpu:0`
+        lib: "TF" or "PT"
+    Returns:
+        A list of `DetectionResult` containing recognized text
     """
     uuids, images = list(zip(*inputs))
@@ -163,7 +170,7 @@ def doctr_predict_text(
 class DoctrTextlineDetectorMixin(ObjectDetector, ABC):
-    """Base class for Doctr textline detector. This class only implements the basic wrapper functions"""
+    """Base class for DocTr text line detector. This class only implements the basic wrapper functions"""
     def __init__(self, categories: Mapping[int, TypeOrStr], lib: Optional[Literal["PT", "TF"]] = None):
         self.categories = ModelCategories(init_categories=categories)
@@ -174,12 +181,26 @@ class DoctrTextlineDetectorMixin(ObjectDetector, ABC):
     @staticmethod
     def get_name(path_weights: PathLikeOrStr, architecture: str) -> str:
-        """Returns the name of the model"""
+        """
+        Returns the name of the model
+        Args:
+            path_weights: Path to the model weights
+            architecture: Architecture name
+        Returns:
+            The name of the model as string
+        """
         return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
     @staticmethod
     def auto_select_lib() -> Literal["PT", "TF"]:
-        """Auto select the DL library from the installed and from environment variables"""
+        """
+        Auto select the DL library from the installed and from environment variables
+        Returns:
+            Either "PT" or "TF" based on environment variables
+        """
         return auto_select_lib_for_doctr()
@@ -194,28 +215,28 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
     Some other pre-trained models exist that have not been registered in `ModelCatalog`. Please check the DocTr library
     and organize the download of the pre-trained model by yourself.
-    **Example:**
+    Example:
+        ```python
+        path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
+        /db_resnet50-ac60cadc.pt")
+        # Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
-                 path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
-                 /db_resnet50-ac60cadc.pt")
-                 # Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
+        categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
+        det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
+        layout = ImageLayoutService(det,to_image=True, crop_image=True)
-                 categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
-                 det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
-                 layout = ImageLayoutService(det,to_image=True, crop_image=True)
+        path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
+        /pt/crnn_vgg16_bn-9762b0b0.pt")
+        rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
+        text = TextExtractionService(rec, extract_from_roi="word")
-                 path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
-                 /pt/crnn_vgg16_bn-9762b0b0.pt")
-                 rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
-                 text = TextExtractionService(rec, extract_from_roi="word")
+        analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
-                 analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
+        path = "/path/to/image_dir"
+        df = analyzer.analyze(path = path)
-                 path = "/path/to/image_dir"
-                 df = analyzer.analyze(path = path)
-                 for dp in df:
-                     ...
+        for dp in df:
+        ...
     """
     def __init__(
@@ -227,13 +248,14 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
         lib: Optional[Literal["PT", "TF"]] = None,
     ) -> None:
         """
-        :param architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
-        "db_mobilenet_v3_large". The full list can be found here:
-        https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20
-        :param path_weights: Path to the weights of the model
-        :param categories: A dict with the model output label and value
-        :param device: "cpu" or "cuda" or any tf.device or torch.device. The device must be compatible with the dll
-        :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
+        Args:
+            architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
+                          "db_mobilenet_v3_large". The full list can be found here:
+                          <https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20>
+            path_weights: Path to the weights of the model
+            categories: A dict with the model output label and value
+            device: "cpu" or "cuda" or any tf.device or torch.device. The device must be compatible with the dll
+            lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
         """
         super().__init__(categories, lib)
         self.architecture = architecture
@@ -247,14 +269,20 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
         if self.lib == "PT":
             self.device = get_torch_device(device)
-        self.doctr_predictor = self.get_wrapped_model(self.architecture, self.path_weights, self.device, self.lib)
+        self.doctr_predictor = self.get_wrapped_model(self.architecture,
+                                                      self.path_weights,
+                                                      self.device,
+                                                      self.lib)
     def predict(self, np_img: PixelValues) -> list[DetectionResult]:
         """
         Prediction per image.
-        :param np_img: image as numpy array
-        :return: A list of DetectionResult
+        Args:
+            np_img: image as `np.array`
+        Returns:
+            A list of `DetectionResult`
         """
         return doctr_predict_text_lines(np_img, self.doctr_predictor, self.device, self.lib)
@@ -284,17 +312,17 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
         """
         Get the inner (wrapped) model.
-        :param architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
-        "db_mobilenet_v3_large". The full list can be found here:
-        https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20
-        :param path_weights: Path to the weights of the model
-        :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
-        :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used. Make sure,
-                    these variables are set. If not, use
-                        deepdoctection.utils.env_info.auto_select_lib_and_device
-        :return: Inner model which is a "nn.Module" in PyTorch or a "tf.keras.Model" in Tensorflow
+        Args:
+            architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
+                          "db_mobilenet_v3_large". The full list can be found here:
+                          <https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20>
+            path_weights: Path to the weights of the model
+            device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
+            lib: "TF" or "PT" or `None`. If `None`, env variables `USE_TENSORFLOW`, `USE_PYTORCH` will be used. Make
+                 sure, these variables are set. If not, use `deepdoctection.utils.env_info.auto_select_lib_and_device`
+        Returns:
+            Inner model which is a `nn.Module` in PyTorch or a `tf.keras.Model` in Tensorflow
         """
         doctr_predictor = detection_predictor(arch=architecture, pretrained=False, pretrained_backbone=False)
         DoctrTextlineDetector.load_model(path_weights, doctr_predictor, device, lib)
@@ -306,7 +334,7 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
 class DoctrTextRecognizer(TextRecognizer):
     """
-    A deepdoctection wrapper of DocTr text recognition predictor. The base class is a TextRecognizer that takes
+    A deepdoctection wrapper of DocTr text recognition predictor. The base class is a `TextRecognizer` that takes
     a batch of sub images (e.g. text lines from a text detector) and returns a list with text spotted in the sub images.
     DocTr supports several text recognition models but provides only a subset of pre-trained models.
@@ -314,30 +342,30 @@ class DoctrTextRecognizer(TextRecognizer):
     described in “An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to
     Scene Text Recognition”. It can be used in either Tensorflow or PyTorch.
-    For more details please check the official DocTr documentation by Mindee: https://mindee.github.io/doctr/
-    **Example:**
+    For more details please check the official DocTr documentation by Mindee: <https://mindee.github.io/doctr/>
-                 path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
-                 /db_resnet50-ac60cadc.pt")
-                 # Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
+    Example:
+        ```python
+         path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
+         /db_resnet50-ac60cadc.pt")
+         # Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
-                 categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
-                 det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
-                 layout = ImageLayoutService(det,to_image=True, crop_image=True)
+         categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
+         det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
+         layout = ImageLayoutService(det,to_image=True, crop_image=True)
-                 path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
-                 /pt/crnn_vgg16_bn-9762b0b0.pt")
-                 rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
-                 text = TextExtractionService(rec, extract_from_roi="word")
+         path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
+         /pt/crnn_vgg16_bn-9762b0b0.pt")
+         rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
+         text = TextExtractionService(rec, extract_from_roi="word")
-                 analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
+         analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
-                 path = "/path/to/image_dir"
-                 df = analyzer.analyze(path = path)
+         path = "/path/to/image_dir"
+         df = analyzer.analyze(path = path)
-                 for dp in df:
-                     ...
+         for dp in df:
+         ...
     """
     def __init__(
@@ -349,14 +377,15 @@ class DoctrTextRecognizer(TextRecognizer):
         path_config_json: Optional[PathLikeOrStr] = None,
     ) -> None:
         """
-        :param architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
-        "crnn_mobilenet_v3_small". The full list can be found here:
-        https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16.
-        :param path_weights: Path to the weights of the model
-        :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
-        :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
-        :param path_config_json: Path to a json file containing the configuration of the model. Useful, if you have
-        a model trained on custom vocab.
+        Args:
+            architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
+                          "crnn_mobilenet_v3_small". The full list can be found here:
+                          <https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16>.
+            path_weights: Path to the weights of the model
+            device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
+            lib: "TF" or "PT" or `None`. If `None`, env variables `USE_TENSORFLOW`, `USE_PYTORCH` will be used.
+            path_config_json: Path to a `JSON` file containing the configuration of the model. Useful, if you have
+                              a model trained on custom vocab.
         """
         self.lib = lib if lib is not None else self.auto_select_lib()
@@ -383,8 +412,11 @@ class DoctrTextRecognizer(TextRecognizer):
         """
         Prediction on a batch of text lines
-        :param images: list of tuples with the annotation_id of the sub image and a numpy array
-        :return: A list of DetectionResult
+        Args:
+            images: list of tuples with the `annotation_id` of the sub image and a `np.array`
+        Returns:
+            A list of `DetectionResult`
         """
         if images:
             return doctr_predict_text(images, self.doctr_predictor, self.device, self.lib)
@@ -395,7 +427,7 @@ class DoctrTextRecognizer(TextRecognizer):
         return _get_doctr_requirements()
     def clone(self) -> DoctrTextRecognizer:
-        return self.__class__(self.architecture, self.path_weights, self.device, self.lib)
+        return self.__class__(self.architecture, self.path_weights, self.device, self.lib, self.path_config_json)
     @staticmethod
     def load_model(
@@ -456,15 +488,18 @@ class DoctrTextRecognizer(TextRecognizer):
         """
         Get the inner (wrapped) model.
-        :param architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
-        "crnn_mobilenet_v3_small". The full list can be found here:
-        https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16.
-        :param path_weights: Path to the weights of the model
-        :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
-        :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
-        :param path_config_json: Path to a json file containing the configuration of the model. Useful, if you have
-        a model trained on custom vocab.
-        :return: Inner model which is a "nn.Module" in PyTorch or a "tf.keras.Model" in Tensorflow
+        Args:
+            architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
+                          "crnn_mobilenet_v3_small". The full list can be found here:
+                          <https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16>.
+            path_weights: Path to the weights of the model
+            device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
+            lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
+            path_config_json: Path to a `JSON` file containing the configuration of the model. Useful, if you have
+                              a model trained on custom vocab.
+        Returns:
+            Inner model which is a `nn.Module` in PyTorch or a `tf.keras.Model` in Tensorflow
         """
         doctr_predictor = DoctrTextRecognizer.build_model(architecture, lib, path_config_json)
         DoctrTextRecognizer.load_model(path_weights, doctr_predictor, device, lib)
@@ -472,12 +507,26 @@ class DoctrTextRecognizer(TextRecognizer):
     @staticmethod
     def get_name(path_weights: PathLikeOrStr, architecture: str) -> str:
-        """Returns the name of the model"""
+        """
+        Returns the name of the model
+        Args:
+            path_weights: Path to the model weights
+            architecture: Architecture name
+        Returns:
+            The name of the model as string
+        """
         return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
     @staticmethod
     def auto_select_lib() -> Literal["PT", "TF"]:
-        """Auto select the DL library from the installed and from environment variables"""
+        """
+        Auto select the DL library from the installed and from environment variables
+        Returns:
+            Either "PT" or "TF" based on environment variables
+        """
         return auto_select_lib_for_doctr()
     def clear_model(self) -> None:
@@ -500,17 +549,19 @@ class DocTrRotationTransformer(ImageTransformer):
     This class can be particularly useful in OCR tasks where the orientation of the text in the image matters.
     The class also provides methods for cloning itself and for getting the requirements of the OCR system.
-    **Example:**
-                    transformer = DocTrRotationTransformer()
-                    detection_result = transformer.predict(np_img)
-                    rotated_image = transformer.transform(np_img, detection_result)
+    Example:
+        ```python
+        transformer = DocTrRotationTransformer()
+        detection_result = transformer.predict(np_img)
+        rotated_image = transformer.transform(np_img, detection_result)
+        ```
     """
     def __init__(self, number_contours: int = 50, ratio_threshold_for_lines: float = 5):
         """
-        :param number_contours: the number of contours used for the orientation estimation
-        :param ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
+        Args:
+            number_contours: the number of contours used for the orientation estimation
+            ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
         """
         self.number_contours = number_contours
         self.ratio_threshold_for_lines = ratio_threshold_for_lines
@@ -522,9 +573,12 @@ class DocTrRotationTransformer(ImageTransformer):
         Applies the predicted rotation to the image, effectively rotating the image backwards.
         This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
-        :param np_img: The input image as a numpy array.
-        :param specification: A `DetectionResult` object containing the predicted rotation angle.
-        :return: The rotated image as a numpy array.
+        Args:
+            np_img: The input image as a `np.array`
+            specification: A `DetectionResult` object containing the predicted rotation angle
+        Returns:
+            The rotated image as a `np.array`
         """
         return viz_handler.rotate_image(np_img, specification.angle)  # type: ignore

deepdoctection/extern/fastlang.py CHANGED Viewed

@@ -16,8 +16,9 @@
 # limitations under the License.
 """
-Deepdoctection wrappers for fasttext language detection models
+Wrappers for fasttext language detection models
 """
 from __future__ import annotations
 import os
@@ -39,12 +40,13 @@ with try_import() as import_guard:
 class FasttextLangDetectorMixin(LanguageDetector, ABC):
     """
-    Base class for Fasttext language detection implementation. This class only implements the basic wrapper functions.
+    Base class for `Fasttext` language detection implementation. This class only implements the basic wrapper functions.
     """
     def __init__(self, categories: Mapping[int, TypeOrStr], categories_orig: Mapping[str, TypeOrStr]) -> None:
         """
-        :param categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
+        Args:
+            categories: A `dict` with the model output label and value. We use as convention the `ISO 639-2` language
         """
         self.categories = ModelCategories(init_categories=categories)
         self.categories_orig = MappingProxyType({cat_orig: get_type(cat) for cat_orig, cat in categories_orig.items()})
@@ -52,8 +54,12 @@ class FasttextLangDetectorMixin(LanguageDetector, ABC):
     def output_to_detection_result(self, output: Union[tuple[Any, Any]]) -> DetectionResult:
         """
         Generating `DetectionResult` from model output
-        :param output: FastText model output
-        :return: `DetectionResult` filled with `text` and `score`
+        Args:
+            output: `FastText` model output
+        Returns:
+            `DetectionResult` filled with `text` and `score`
         """
         return DetectionResult(text=self.categories_orig[output[0][0]], score=output[1][0])
@@ -68,30 +74,30 @@ class FasttextLangDetector(FasttextLangDetectorMixin):
     Fasttext language detector wrapper. Two models provided in the fasttext library can be used to identify languages.
     The background to the models can be found in the works:
-    [1] Joulin A, Grave E, Bojanowski P, Mikolov T, Bag of Tricks for Efficient Text Classification
-    [2] Joulin A, Grave E, Bojanowski P, Douze M, Jégou H, Mikolov T, FastText.zip: Compressing text classification
+    Info:
+        [1] Joulin A, Grave E, Bojanowski P, Mikolov T, Bag of Tricks for Efficient Text Classification
+        [2] Joulin A, Grave E, Bojanowski P, Douze M, Jégou H, Mikolov T, FastText.zip: Compressing text classification
         models
-    The models are distributed under the Creative Commons Attribution-Share-Alike License 3.0.
-    (<https://creativecommons.org/licenses/by-sa/3.0/>)
-    When loading the models via the ModelCatalog, the original and unmodified models are used.
+    When loading the models via the `ModelCatalog`, the original and unmodified models are used.
+    Example:
+        ```python
         path_weights = ModelCatalog.get_full_path_weights("fasttext/lid.176.bin")
         profile = ModelCatalog.get_profile("fasttext/lid.176.bin")
         lang_detector = FasttextLangDetector(path_weights,profile.categories)
         detection_result = lang_detector.predict("some text in some language")
+        ```
     """
     def __init__(
         self, path_weights: PathLikeOrStr, categories: Mapping[int, TypeOrStr], categories_orig: Mapping[str, TypeOrStr]
     ):
         """
-        :param path_weights: path to model weights
-        :param categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
-                           code.
+        Args:
+            path_weights: path to model weights
+            categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
+                        code.
         """
         super().__init__(categories, categories_orig)
@@ -117,6 +123,8 @@ class FasttextLangDetector(FasttextLangDetectorMixin):
     def get_wrapped_model(path_weights: PathLikeOrStr) -> Any:
         """
         Get the wrapped model
-        :param path_weights: path to model weights
+        Args:
+            path_weights: path to model weights
         """
         return load_model(os.fspath(path_weights))

deepdoctection 0.42.1__py3-none-any.whl → 0.43.1__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.42.1py3-none-any.whl → 0.43.1py3-none-any.whl