PyPI - deepdoctection - Versions diffs - 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl - Mend

deepdoctection 0.42.0py3-none-any.whl → 0.43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show

deepdoctection/__init__.py +2 -1
deepdoctection/analyzer/__init__.py +2 -1
deepdoctection/analyzer/config.py +904 -0
deepdoctection/analyzer/dd.py +36 -62
deepdoctection/analyzer/factory.py +311 -141
deepdoctection/configs/conf_dd_one.yaml +100 -44
deepdoctection/configs/profiles.jsonl +32 -0
deepdoctection/dataflow/__init__.py +9 -6
deepdoctection/dataflow/base.py +33 -15
deepdoctection/dataflow/common.py +96 -75
deepdoctection/dataflow/custom.py +36 -29
deepdoctection/dataflow/custom_serialize.py +135 -91
deepdoctection/dataflow/parallel_map.py +33 -31
deepdoctection/dataflow/serialize.py +15 -10
deepdoctection/dataflow/stats.py +41 -28
deepdoctection/datapoint/__init__.py +4 -6
deepdoctection/datapoint/annotation.py +104 -66
deepdoctection/datapoint/box.py +190 -130
deepdoctection/datapoint/convert.py +66 -39
deepdoctection/datapoint/image.py +151 -95
deepdoctection/datapoint/view.py +383 -236
deepdoctection/datasets/__init__.py +2 -6
deepdoctection/datasets/adapter.py +11 -11
deepdoctection/datasets/base.py +118 -81
deepdoctection/datasets/dataflow_builder.py +18 -12
deepdoctection/datasets/info.py +76 -57
deepdoctection/datasets/instances/__init__.py +6 -2
deepdoctection/datasets/instances/doclaynet.py +17 -14
deepdoctection/datasets/instances/fintabnet.py +16 -22
deepdoctection/datasets/instances/funsd.py +11 -6
deepdoctection/datasets/instances/iiitar13k.py +9 -9
deepdoctection/datasets/instances/layouttest.py +9 -9
deepdoctection/datasets/instances/publaynet.py +9 -9
deepdoctection/datasets/instances/pubtables1m.py +13 -13
deepdoctection/datasets/instances/pubtabnet.py +13 -15
deepdoctection/datasets/instances/rvlcdip.py +8 -8
deepdoctection/datasets/instances/xfund.py +11 -9
deepdoctection/datasets/registry.py +18 -11
deepdoctection/datasets/save.py +12 -11
deepdoctection/eval/__init__.py +3 -2
deepdoctection/eval/accmetric.py +72 -52
deepdoctection/eval/base.py +29 -10
deepdoctection/eval/cocometric.py +14 -12
deepdoctection/eval/eval.py +56 -41
deepdoctection/eval/registry.py +6 -3
deepdoctection/eval/tedsmetric.py +24 -9
deepdoctection/eval/tp_eval_callback.py +13 -12
deepdoctection/extern/__init__.py +1 -1
deepdoctection/extern/base.py +176 -97
deepdoctection/extern/d2detect.py +127 -92
deepdoctection/extern/deskew.py +19 -10
deepdoctection/extern/doctrocr.py +157 -106
deepdoctection/extern/fastlang.py +25 -17
deepdoctection/extern/hfdetr.py +137 -60
deepdoctection/extern/hflayoutlm.py +329 -248
deepdoctection/extern/hflm.py +67 -33
deepdoctection/extern/model.py +108 -762
deepdoctection/extern/pdftext.py +37 -12
deepdoctection/extern/pt/nms.py +15 -1
deepdoctection/extern/pt/ptutils.py +13 -9
deepdoctection/extern/tessocr.py +87 -54
deepdoctection/extern/texocr.py +29 -14
deepdoctection/extern/tp/tfutils.py +36 -8
deepdoctection/extern/tp/tpcompat.py +54 -16
deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
deepdoctection/extern/tpdetect.py +4 -2
deepdoctection/mapper/__init__.py +1 -1
deepdoctection/mapper/cats.py +117 -76
deepdoctection/mapper/cocostruct.py +35 -17
deepdoctection/mapper/d2struct.py +56 -29
deepdoctection/mapper/hfstruct.py +32 -19
deepdoctection/mapper/laylmstruct.py +221 -185
deepdoctection/mapper/maputils.py +71 -35
deepdoctection/mapper/match.py +76 -62
deepdoctection/mapper/misc.py +68 -44
deepdoctection/mapper/pascalstruct.py +13 -12
deepdoctection/mapper/prodigystruct.py +33 -19
deepdoctection/mapper/pubstruct.py +42 -32
deepdoctection/mapper/tpstruct.py +39 -19
deepdoctection/mapper/xfundstruct.py +20 -13
deepdoctection/pipe/__init__.py +1 -2
deepdoctection/pipe/anngen.py +104 -62
deepdoctection/pipe/base.py +226 -107
deepdoctection/pipe/common.py +206 -123
deepdoctection/pipe/concurrency.py +74 -47
deepdoctection/pipe/doctectionpipe.py +108 -47
deepdoctection/pipe/language.py +41 -24
deepdoctection/pipe/layout.py +45 -18
deepdoctection/pipe/lm.py +146 -78
deepdoctection/pipe/order.py +196 -113
deepdoctection/pipe/refine.py +111 -63
deepdoctection/pipe/registry.py +1 -1
deepdoctection/pipe/segment.py +213 -142
deepdoctection/pipe/sub_layout.py +76 -46
deepdoctection/pipe/text.py +52 -33
deepdoctection/pipe/transform.py +8 -6
deepdoctection/train/d2_frcnn_train.py +87 -69
deepdoctection/train/hf_detr_train.py +72 -40
deepdoctection/train/hf_layoutlm_train.py +85 -46
deepdoctection/train/tp_frcnn_train.py +56 -28
deepdoctection/utils/concurrency.py +59 -16
deepdoctection/utils/context.py +40 -19
deepdoctection/utils/develop.py +25 -17
deepdoctection/utils/env_info.py +85 -36
deepdoctection/utils/error.py +16 -10
deepdoctection/utils/file_utils.py +246 -62
deepdoctection/utils/fs.py +162 -43
deepdoctection/utils/identifier.py +29 -16
deepdoctection/utils/logger.py +49 -32
deepdoctection/utils/metacfg.py +83 -21
deepdoctection/utils/pdf_utils.py +119 -62
deepdoctection/utils/settings.py +24 -10
deepdoctection/utils/tqdm.py +10 -5
deepdoctection/utils/transform.py +182 -46
deepdoctection/utils/utils.py +61 -28
deepdoctection/utils/viz.py +150 -104
deepdoctection-0.43.dist-info/METADATA +376 -0
deepdoctection-0.43.dist-info/RECORD +149 -0
{deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
deepdoctection/analyzer/_config.py +0 -146
deepdoctection-0.42.0.dist-info/METADATA +0 -431
deepdoctection-0.42.0.dist-info/RECORD +0 -148
{deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
{deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0

deepdoctection/extern/pdftext.py CHANGED Viewed

@@ -48,18 +48,23 @@ def _to_detect_result(word: dict[str, str], class_name: ObjectTypes) -> Detectio
 class PdfPlumberTextDetector(PdfMiner):
     """
-    Text miner based on the pdfminer.six engine. To convert pdfminers result, especially group character to get word
-    level results we use pdfplumber.
+    Text miner based on the `pdfminer.six` engine. To convert `pdfminers` result, especially group character to get word
+    level results we use `pdfplumber`.
+    Example:
+        ```python
         pdf_plumber = PdfPlumberTextDetector()
         df = SerializerPdfDoc.load("path/to/document.pdf")
         df.reset_state()
         for dp in df:
             detection_results = pdf_plumber.predict(dp["pdf_bytes"])
+        ```
     To use it in a more integrated way:
+    Example:
+        ```python
         pdf_plumber = PdfPlumberTextDetector()
         text_extract = TextExtractionService(pdf_plumber)
@@ -70,7 +75,7 @@ class PdfPlumberTextDetector(PdfMiner):
         for dp in df:
             ...
+        ```
     """
     def __init__(self, x_tolerance: int = 3, y_tolerance: int = 3) -> None:
@@ -83,10 +88,13 @@ class PdfPlumberTextDetector(PdfMiner):
     def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:
         """
-        Call pdfminer.six and returns detected text as detection results
+        Call `pdfminer.six` and returns detected text as `DetectionResult`
+        Args:
+            pdf_bytes: bytes of a single pdf page
-        :param pdf_bytes: bytes of a single pdf page
-        :return: A list of DetectionResult
+        Returns:
+            A list of `DetectionResult`
         """
         with save_tmp_file(pdf_bytes, "pdf_") as (tmp_name, _):
@@ -104,8 +112,12 @@ class PdfPlumberTextDetector(PdfMiner):
     def get_width_height(self, pdf_bytes: bytes) -> tuple[float, float]:
         """
         Get the width and height of the full page
-        :param pdf_bytes: pdf_bytes generating the pdf
-        :return: width and height
+        Args:
+            pdf_bytes: `pdf_bytes` generating the pdf
+        Returns:
+            `(width,height)`
         """
         if self._pdf_bytes == pdf_bytes and self._page is not None:
@@ -126,15 +138,20 @@ class Pdfmium2TextDetector(PdfMiner):
     """
     Text miner based on the pypdfium2 engine. It will return text on text line level and not on word level
+    Example:
+        ```python
         pdfmium2 = Pdfmium2TextDetector()
         df = SerializerPdfDoc.load("path/to/document.pdf")
         df.reset_state()
         for dp in df:
             detection_results = pdfmium2.predict(dp["pdf_bytes"])
+        ```
     To use it in a more integrated way:
+    Example:
+        ```python
         pdfmium2 = Pdfmium2TextDetector()
         text_extract = TextExtractionService(pdfmium2)
@@ -144,6 +161,7 @@ class Pdfmium2TextDetector(PdfMiner):
         df.reset_state()
         for dp in df:
             ...
+        ```
     """
@@ -157,8 +175,11 @@ class Pdfmium2TextDetector(PdfMiner):
         """
         Call pypdfium2 and returns detected text as detection results
-        :param pdf_bytes: bytes of a single pdf page
-        :return: A list of DetectionResult
+        Args:
+            pdf_bytes: bytes of a single pdf page
+        Returns:
+            A list of `DetectionResult`
         """
         pdf = PdfDocument(pdf_bytes)
@@ -188,8 +209,12 @@ class Pdfmium2TextDetector(PdfMiner):
     def get_width_height(self, pdf_bytes: bytes) -> tuple[float, float]:
         """
         Get the width and height of the full page
-        :param pdf_bytes: pdf_bytes generating the pdf
-        :return: width and height
+        Args:
+            pdf_bytes: `pdf_bytes` generating the pdf
+        Returns:
+            `(width,height)`
         """
         if self._pdf_bytes == pdf_bytes and self._page is not None:

deepdoctection/extern/pt/nms.py CHANGED Viewed

@@ -30,7 +30,21 @@ with try_import() as import_guard:
 # Copy & paste from https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/nms.py
 def batched_nms(boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float) -> torch.Tensor:
     """
-    Same as torchvision.ops.boxes.batched_nms, but with float().
+    Same as `torchvision.ops.boxes.batched_nms`, but with `float()`.
+    Args:
+        boxes: A `torch.Tensor` of shape (N, 4) containing bounding boxes.
+        scores: A `torch.Tensor` of shape (N,) containing scores for each box.
+        idxs: A `torch.Tensor` of shape (N,) containing the class indices for each box.
+        iou_threshold: A float representing the IoU threshold for suppression.
+    Returns:
+        A `torch.Tensor` containing the indices of the boxes to keep.
+    Note:
+        `Fp16` does not have enough range for batched NMS, so `float()` is used.
+        Torchvision already has a strategy to decide whether to use coordinate trick or for loop to implement
+        `batched_nms`.
     """
     assert boxes.shape[-1] == 4
     # Note: Torchvision already has a strategy (https://github.com/pytorch/vision/issues/1311)

deepdoctection/extern/pt/ptutils.py CHANGED Viewed

@@ -33,19 +33,23 @@ with try_import() as import_guard:
 def get_torch_device(device: Optional[Union[str, torch.device]] = None) -> torch.device:
     """
-    Selecting a device on which to load a model. The selection follows a cascade of priorities:
+    Select a device on which to load a model. The selection follows a cascade of priorities:
-    - If a device string is provided, it is used.
-    - If the environment variable "USE_CUDA" is set, a GPU is used. If more GPUs are available, it will use all of them
-      unless something else is specified by CUDA_VISIBLE_DEVICES:
+    If a device string is provided, it is used. If the environment variable `USE_CUDA` is set, a GPU is used.
+    If more GPUs are available, it will use all of them unless something else is specified by `CUDA_VISIBLE_DEVICES`.
-          https://stackoverflow.com/questions/54216920/how-to-use-multiple-gpus-in-pytorch
+    See: <https://stackoverflow.com/questions/54216920/how-to-use-multiple-gpus-in-pytorch>
-    - If an MPS device is available, it is used.
-    - Otherwise, the CPU is used.
+    If an MPS device is available, it is used. Otherwise, the CPU is used.
-    :param device: Device either as string or torch.device
-    :return: Tensorflow device
+    Args:
+        device: Device either as string or torch.device.
+    Returns:
+        torch.device: The selected device.
+    Note:
+        The function checks the environment variables `USE_CUDA` and `USE_MPS` to determine device preference.
     """
     if device is not None:
         if isinstance(device, torch.device):

deepdoctection/extern/tessocr.py CHANGED Viewed

@@ -16,7 +16,7 @@
 # limitations under the License.
 """
-Tesseract OCR engine for text extraction
+Tesseract OCR engine
 """
 from __future__ import annotations
@@ -115,7 +115,8 @@ def _run_tesseract(tesseract_args: list[str]) -> None:
 def get_tesseract_version() -> Version:
     """
-    Returns Version object of the Tesseract version
+    Returns:
+        Version of the installed tesseract engine.
     """
     try:
         output = subprocess.check_output(
@@ -142,10 +143,12 @@ def get_tesseract_version() -> Version:
 def image_to_angle(image: PixelValues) -> Mapping[str, str]:
     """
-    Generating a tmp file and running tesseract to get the orientation of the image.
+    Generating a tmp file and running Tesseract to get the orientation of the image.
-    :param image: Image in np.array.
-    :return: A dictionary with keys 'Orientation in degrees' and 'Orientation confidence'.
+     Args:
+        image: Image an `np.array`
+    Returns:
+        A dict with keys 'Orientation in degrees' and 'Orientation confidence'.
     """
     with save_tmp_file(image, "tess_") as (tmp_name, input_file_name):
         _run_tesseract(_input_to_cli_str("osd", "--psm 0", 0, input_file_name, tmp_name))
@@ -159,7 +162,7 @@ def image_to_angle(image: PixelValues) -> Mapping[str, str]:
 def image_to_dict(image: PixelValues, lang: str, config: str) -> dict[str, list[Union[str, int, float]]]:
     """
-    This is more or less pytesseract.image_to_data with a dict as returned value.
+    This is more or less `pytesseract.image_to_data` with a dict as returned value.
     What happens under the hood is:
     - saving an image file
@@ -167,13 +170,17 @@ def image_to_dict(image: PixelValues, lang: str, config: str) -> dict[str, list[
     - saving a temp .tsv file with predicted results
     - reading the .tsv file and returning the results as dict.
-    Requires Tesseract 3.05+
+    Note:
+        Requires Tesseract or 3.05 or higher
-    :param image: Image in np.array.
-    :param lang: String of language
-    :param config: string of configs
-    :return: Dictionary with keys 'left', 'top', 'width', 'height' (bounding box coords), 'conf' (confidence), 'text'
-             (captured text), 'block_num' (block number) and 'lin_num' (line number).
+    Args:
+        image: Image in np.array.
+        lang: String of language
+        config: string of configs
+    Returns:
+        Dictionary with keys `left`, `top`, `width`, `height` (bounding box coords), `conf` (confidence), `text`
+        (captured text), `block_num` (block number) and `lin_num` (line number).
     """
     with save_tmp_file(image, "tess_") as (tmp_name, input_file_name):
@@ -213,10 +220,14 @@ def image_to_dict(image: PixelValues, lang: str, config: str) -> dict[str, list[
 def tesseract_line_to_detectresult(detect_result_list: list[DetectionResult]) -> list[DetectionResult]:
     """
-    Generating text line DetectionResult based on Tesseract word grouping. It generates line bounding boxes from
+    Generating text line `DetectionResult`s based on Tesseract word grouping. It generates line bounding boxes from
     word bounding boxes.
-    :param detect_result_list: A list of detection result
-    :return: An extended list of detection result
+    Args:
+        detect_result_list: A list of `DetectionResult`s
+    Returns:
+        An extended list of `DetectionResult`s
     """
     line_detect_result: list[DetectionResult] = []
@@ -247,15 +258,18 @@ def tesseract_line_to_detectresult(detect_result_list: list[DetectionResult]) ->
 def predict_text(np_img: PixelValues, supported_languages: str, text_lines: bool, config: str) -> list[DetectionResult]:
     """
-    Calls tesseract directly with some given configs. Requires Tesseract to be installed.
-    :param np_img: Image in np.array.
-    :param supported_languages: To improve ocr extraction quality it is helpful to pre-select the language of the
-                                detected text, if this in known in advance. Combinations are possible, e.g. "deu",
-                                "fr+eng".
-    :param text_lines: If True, it will return DetectionResults of Text lines as well.
-    :param config: The config parameter passing to Tesseract. Consult also https://guides.nyu.edu/tesseract/usage
-    :return: A list of tesseract extractions wrapped in DetectionResult
+    Calls Tesseract directly with some given configs. Requires Tesseract to be installed.
+    Args:
+        np_img: Image in `np.array`.
+        supported_languages: To improve OCR extraction quality it is helpful to pre-select the language of the
+                             detected text, if this in known in advance. Combinations are possible, e.g. `deu`,
+                             `fr+eng`.
+        text_lines: If `True`, it will return `DetectionResult`s of text lines as well.
+        config: The config parameter passing to Tesseract. Consult also <https://guides.nyu.edu/tesseract/usage>
+    Returns:
+        A list of Tesseract extractions wrapped in `DetectionResult`
     """
     results = image_to_dict(np_img, supported_languages, config)
@@ -290,31 +304,37 @@ def predict_rotation(np_img: PixelValues) -> Mapping[str, str]:
     """
     Predicts the rotation of an image using the Tesseract OCR engine.
-    :param np_img: numpy array of the image
-    :return: A dictionary with keys 'Orientation in degrees' and 'Orientation confidence'
+    Args:
+        np_img: numpy array of the image
+    Returns:
+        A dictionary with keys 'Orientation in degrees' and 'Orientation confidence'
     """
     return image_to_angle(np_img)
 class TesseractOcrDetector(ObjectDetector):
     """
-    Text object detector based on Tesseracts OCR engine. Note that tesseract has to be installed separately.
+    Text object detector based on Tesseracts OCR engine.
-    The current Tesseract release is 4.1.1. A version 5.xx can be integrated via direct installation at
-    https://github.com/tesseract-ocr/tesseract. Building from source is necessary here.
+    Note:
+        Tesseract has to be installed separately. <https://tesseract-ocr.github.io/>
-    Documentation can be found here: https://tesseract-ocr.github.io/
-    All configuration options that are available via pytesseract can be given via the configuration. The best overview
-    can be found at https://pypi.org/project/pytesseract/.
+    All configuration options that are available via pytesseract can be added to the configuration file:
+    <https://pypi.org/project/pytesseract/.>
+    Example:
+        ```python
         tesseract_config_path = ModelCatalog.get_full_path_configs("dd/conf_tesseract.yaml")
         ocr_detector = TesseractOcrDetector(tesseract_config_path)
         detection_result = ocr_detector.predict(bgr_image_as_np_array)
+        ```
     To use it within a pipeline
+    Example:
+        ```python
         tesseract_config_path = ModelCatalog.get_full_path_configs("dd/conf_tesseract.yaml")
         ocr_detector = TesseractOcrDetector(tesseract_config_path)
@@ -325,6 +345,7 @@ class TesseractOcrDetector(ObjectDetector):
         for dp in df:
             ...
+        ```
     """
     def __init__(
@@ -333,11 +354,12 @@ class TesseractOcrDetector(ObjectDetector):
         config_overwrite: Optional[list[str]] = None,
     ):
         """
-        Set up the configuration which is stored in a yaml-file, that need to be passed through.
+        Set up the configuration which is stored in a `.yaml` file, that need to be passed through.
-        :param path_yaml: The path to the yaml config
-        :param config_overwrite: Overwrite config parameters defined by the yaml file with new values.
-                                 E.g. ["oem=14"]
+        Args:
+            path_yaml: The path to the yaml config
+            config_overwrite: Overwrite config parameters defined by the yaml file with new values.
+                              E.g. `["oem=14"]`
         """
         self.name = self.get_name()
         self.model_id = self.get_model_id()
@@ -362,8 +384,11 @@ class TesseractOcrDetector(ObjectDetector):
         """
         Transfer of a numpy array and call of pytesseract. Return of the detection results.
-        :param np_img: image as numpy array
-        :return: A list of DetectionResult
+        Args:
+            np_img: image as `np.array`
+        Returns:
+            A list of `DetectionResult`
         """
         return predict_text(
@@ -386,7 +411,10 @@ class TesseractOcrDetector(ObjectDetector):
     def set_language(self, language: ObjectTypes) -> None:
         """
         Pass a language to change the model selection. For runtime language selection.
-        :param language: `Languages`
+        Args:
+            language: One of the following: `fre`,`dut`,`chi`,`cze`,`per`,`gre`,`mac`,`rum`,`arm`,
+                      `geo`,`war`,`glg`,`slv`,`alb`,`nn`.
         """
         self.config.LANGUAGES = _LANG_CODE_TO_TESS_LANG_CODE.get(language, language.value)
@@ -398,13 +426,11 @@ class TesseractOcrDetector(ObjectDetector):
 class TesseractRotationTransformer(ImageTransformer):
     """
-    The `TesseractRotationTransformer` class is a specialized image transformer that is designed to handle image
-    rotation in the context of Optical Character Recognition (OCR) tasks. It inherits from the `ImageTransformer`
-    base class and implements methods for predicting and applying rotation transformations to images.
+    The `TesseractRotationTransformer` is designed to handle image rotations.. It inherits from the `ImageTransformer`
+    base class and implements methods for predicting and applying rotation transformations.
     The `predict` method determines the angle of the rotated image. It can only handle angles that are multiples of 90
-    degrees.
-    This method uses the Tesseract OCR engine to predict the rotation angle of an image.
+    degrees. This method uses the Tesseract OCR engine to predict the rotation angle of an image.
     The `transform` method applies the predicted rotation to the image, effectively rotating the image backwards.
     This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
@@ -412,10 +438,12 @@ class TesseractRotationTransformer(ImageTransformer):
     This class can be particularly useful in OCR tasks where the orientation of the text in the image matters.
     The class also provides methods for cloning itself and for getting the requirements of the Tesseract OCR system.
-    **Example:**
-                    transformer = TesseractRotationTransformer()
-                    detection_result = transformer.predict(np_img)
-                    rotated_image = transformer.transform(np_img, detection_result)
+    Example:
+        ```python
+        transformer = TesseractRotationTransformer()
+        detection_result = transformer.predict(np_img)
+        rotated_image = transformer.transform(np_img, detection_result)
+        ```
     """
     def __init__(self) -> None:
@@ -428,9 +456,12 @@ class TesseractRotationTransformer(ImageTransformer):
         Applies the predicted rotation to the image, effectively rotating the image backwards.
         This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
-        :param np_img: The input image as a numpy array.
-        :param specification: A `DetectionResult` object containing the predicted rotation angle.
-        :return: The rotated image as a numpy array.
+        Args:
+            np_img: The input image as a numpy array.
+            specification: A `DetectionResult` object containing the predicted rotation angle.
+        Returns:
+            The rotated image as a numpy array.
         """
         return viz_handler.rotate_image(np_img, specification.angle)  # type: ignore
@@ -439,8 +470,10 @@ class TesseractRotationTransformer(ImageTransformer):
         Determines the angle of the rotated image. It can only handle angles that are multiples of 90 degrees.
         This method uses the Tesseract OCR engine to predict the rotation angle of an image.
-        :param np_img: The input image as a numpy array.
-        :return: A `DetectionResult` object containing the predicted rotation angle and confidence.
+        Args:
+            np_img: The input image as a numpy array.
+        Returns:
+            A `DetectionResult` object containing the predicted rotation angle and confidence.
         """
         output_dict = predict_rotation(np_img)
         return DetectionResult(

deepdoctection/extern/texocr.py CHANGED Viewed

@@ -60,15 +60,18 @@ def _textract_to_detectresult(response: JsonDict, width: int, height: int, text_
     return all_results
-def predict_text(np_img: PixelValues, client, text_lines: bool) -> list[DetectionResult]:  # type: ignore
+def predict_text(np_img: PixelValues, client: boto3.client, text_lines: bool) -> list[DetectionResult]:  # type: ignore
     """
     Calls AWS Textract client (`detect_document_text`) and returns plain OCR results.
     AWS account required.
-    :param client: botocore textract client
-    :param np_img: Image in np.array.
-    :param text_lines: If True, it will return DetectionResults of Text lines as well.
-    :return: A list of textract extractions wrapped in DetectionResult
+    Args:
+        np_img: Image in `np.array`.
+        client: botocore textract client
+        text_lines: If `True`, it will return `DetectionResult`s of Text lines as well.
+    Returns:
+        A list of `DetectionResult`
     """
     width, height = np_img.shape[1], np_img.shape[0]
@@ -95,16 +98,23 @@ def predict_text(np_img: PixelValues, client, text_lines: bool) -> list[Detectio
 class TextractOcrDetector(ObjectDetector):
     """
     Text object detector based on AWS Textract OCR engine. Note that an AWS account as well as some additional
-    installations are required, i.e AWS CLI and boto3. Note further, that the service is not free of charge. Additional
-    information can be found at: <https://docs.aws.amazon.com/textract/?id=docs_gateway> .
+    installations are required, i.e `AWS CLI` and `boto3`.
+    Note:
+        The service is not free of charge. Additional information can be found at:
+        <https://docs.aws.amazon.com/textract/?id=docs_gateway> .
+    The detector only calls the base `OCR` engine and does not return additional Textract document analysis features.
-    The detector only calls the base OCR engine and does not return additional Textract document analysis features.
+    Example:
+        ```python
         textract_predictor = TextractOcrDetector()
         detection_result = textract_predictor.predict(bgr_image_as_np_array)
+        ```
-    or
+        or
+        ```python
         textract_predictor = TextractOcrDetector()
         text_extract = TextExtractionService(textract_predictor)
@@ -113,13 +123,15 @@ class TextractOcrDetector(ObjectDetector):
         for dp in df:
             ...
+        ```
     """
     def __init__(self, text_lines: bool = False, **credentials_kwargs: str) -> None:
         """
-        :param text_lines: If True, it will return DetectionResults of Text lines as well.
-        :param credentials_kwargs: `aws_access_key_id`, `aws_secret_access_key` or `aws_session_token`
+        Args:
+            text_lines: If `True`, it will return `DetectionResult`s of Text lines as well.
+            credentials_kwargs: `aws_access_key_id`, `aws_secret_access_key` or `aws_session_token`
         """
         self.name = "textract"
         self.model_id = self.get_model_id()
@@ -133,10 +145,13 @@ class TextractOcrDetector(ObjectDetector):
     def predict(self, np_img: PixelValues) -> list[DetectionResult]:
         """
-        Transfer of a numpy array and call textract client. Return of the detection results.
+        Transfer of a `np.array` and call textract `client`. Return of the `DetectionResult`s.
+        Args:
+            np_img: image as `np.array`
-        :param np_img: image as numpy array
-        :return: A list of DetectionResult
+        Returns:
+            A list of `DetectionResult`s
         """
         return predict_text(np_img, self.client, self.text_lines)

deepdoctection/extern/tp/tfutils.py CHANGED Viewed

@@ -37,7 +37,15 @@ with try_import() as tf_import_guard:
 def is_tfv2() -> bool:
     """
-    Returns whether TF is operating in V2 mode.
+    Returns whether TensorFlow is operating in V2 mode.
+    Returns:
+        Whether TensorFlow is operating in V2 mode.
+    Example:
+        ```python
+        is_tfv2()
+        ```
     """
     try:
         from tensorflow.python import tf2  # pylint: disable=C0415
@@ -49,7 +57,15 @@ def is_tfv2() -> bool:
 def disable_tfv2() -> bool:
     """
-    Disable TF in V2 mode.
+    Disables TensorFlow V2 mode.
+    Returns:
+        Whether TensorFlow V2 mode was disabled.
+    Example:
+        ```python
+        disable_tfv2()
+        ```
     """
     tfv1 = tf.compat.v1
@@ -62,20 +78,32 @@ def disable_tfv2() -> bool:
 def disable_tp_layer_logging() -> None:
     """
-    Disables TP layer logging, if not already set
+    Disables tensorpack layer logging, if not already set.
+    Example:
+        ```python
+        disable_tp_layer_logging()
+        ```
     """
     disable_layer_logging()
 def get_tf_device(device: Optional[Union[str, tf.device]] = None) -> tf.device:
     """
-    Selecting a device on which to load a model. The selection follows a cascade of priorities:
+    Selects a device on which to load a model. The selection follows a cascade of priorities:
+    - If a `device` string is provided, it is used. If the string is "cuda" or "GPU", the first GPU is used.
+    - If the environment variable `USE_CUDA` is set, a GPU is used. If more GPUs are available it will use the first
+      one.
+    Args:
+        device: Device string.
-    - If a device string is provided, it is used. If the string is "cuda" or "GPU", the first GPU is used.
-    - If the environment variable "USE_CUDA" is set, a GPU is used. If more GPUs are available it will use the first one
+    Returns:
+        TensorFlow device.
-    :param device: Device string
-    :return: Tensorflow device
+    Raises:
+        EnvironmentError: If `USE_CUDA` is set but no GPU device is found, or if no CPU device is found.
     """
     if device is not None:
         if isinstance(device, ContextManager):

deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.42.0py3-none-any.whl → 0.43py3-none-any.whl