PyPI - deepdoctection - Versions diffs - 0.31__py3-none-any.whl → 0.33__py3-none-any.whl - Mend

deepdoctection 0.31py3-none-any.whl → 0.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (131) hide show

deepdoctection/__init__.py +16 -29
deepdoctection/analyzer/dd.py +70 -59
deepdoctection/configs/conf_dd_one.yaml +34 -31
deepdoctection/dataflow/common.py +9 -5
deepdoctection/dataflow/custom.py +5 -5
deepdoctection/dataflow/custom_serialize.py +75 -18
deepdoctection/dataflow/parallel_map.py +3 -3
deepdoctection/dataflow/serialize.py +4 -4
deepdoctection/dataflow/stats.py +3 -3
deepdoctection/datapoint/annotation.py +41 -56
deepdoctection/datapoint/box.py +9 -8
deepdoctection/datapoint/convert.py +6 -6
deepdoctection/datapoint/image.py +56 -44
deepdoctection/datapoint/view.py +245 -150
deepdoctection/datasets/__init__.py +1 -4
deepdoctection/datasets/adapter.py +35 -26
deepdoctection/datasets/base.py +14 -12
deepdoctection/datasets/dataflow_builder.py +3 -3
deepdoctection/datasets/info.py +24 -26
deepdoctection/datasets/instances/doclaynet.py +51 -51
deepdoctection/datasets/instances/fintabnet.py +46 -46
deepdoctection/datasets/instances/funsd.py +25 -24
deepdoctection/datasets/instances/iiitar13k.py +13 -10
deepdoctection/datasets/instances/layouttest.py +4 -3
deepdoctection/datasets/instances/publaynet.py +5 -5
deepdoctection/datasets/instances/pubtables1m.py +24 -21
deepdoctection/datasets/instances/pubtabnet.py +32 -30
deepdoctection/datasets/instances/rvlcdip.py +30 -30
deepdoctection/datasets/instances/xfund.py +26 -26
deepdoctection/datasets/save.py +6 -6
deepdoctection/eval/__init__.py +1 -4
deepdoctection/eval/accmetric.py +32 -33
deepdoctection/eval/base.py +8 -9
deepdoctection/eval/cocometric.py +15 -13
deepdoctection/eval/eval.py +41 -37
deepdoctection/eval/tedsmetric.py +30 -23
deepdoctection/eval/tp_eval_callback.py +16 -19
deepdoctection/extern/__init__.py +2 -7
deepdoctection/extern/base.py +339 -134
deepdoctection/extern/d2detect.py +85 -113
deepdoctection/extern/deskew.py +14 -11
deepdoctection/extern/doctrocr.py +141 -130
deepdoctection/extern/fastlang.py +27 -18
deepdoctection/extern/hfdetr.py +71 -62
deepdoctection/extern/hflayoutlm.py +504 -211
deepdoctection/extern/hflm.py +230 -0
deepdoctection/extern/model.py +488 -302
deepdoctection/extern/pdftext.py +23 -19
deepdoctection/extern/pt/__init__.py +1 -3
deepdoctection/extern/pt/nms.py +6 -2
deepdoctection/extern/pt/ptutils.py +29 -19
deepdoctection/extern/tessocr.py +39 -38
deepdoctection/extern/texocr.py +18 -18
deepdoctection/extern/tp/tfutils.py +57 -9
deepdoctection/extern/tp/tpcompat.py +21 -14
deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
deepdoctection/extern/tpdetect.py +45 -53
deepdoctection/mapper/__init__.py +3 -8
deepdoctection/mapper/cats.py +27 -29
deepdoctection/mapper/cocostruct.py +10 -10
deepdoctection/mapper/d2struct.py +27 -26
deepdoctection/mapper/hfstruct.py +13 -8
deepdoctection/mapper/laylmstruct.py +178 -37
deepdoctection/mapper/maputils.py +12 -11
deepdoctection/mapper/match.py +2 -2
deepdoctection/mapper/misc.py +11 -9
deepdoctection/mapper/pascalstruct.py +4 -4
deepdoctection/mapper/prodigystruct.py +5 -5
deepdoctection/mapper/pubstruct.py +84 -92
deepdoctection/mapper/tpstruct.py +5 -5
deepdoctection/mapper/xfundstruct.py +33 -33
deepdoctection/pipe/__init__.py +1 -1
deepdoctection/pipe/anngen.py +12 -14
deepdoctection/pipe/base.py +52 -106
deepdoctection/pipe/common.py +72 -59
deepdoctection/pipe/concurrency.py +16 -11
deepdoctection/pipe/doctectionpipe.py +24 -21
deepdoctection/pipe/language.py +20 -25
deepdoctection/pipe/layout.py +20 -16
deepdoctection/pipe/lm.py +75 -105
deepdoctection/pipe/order.py +194 -89
deepdoctection/pipe/refine.py +111 -124
deepdoctection/pipe/segment.py +156 -161
deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
deepdoctection/pipe/text.py +37 -36
deepdoctection/pipe/transform.py +19 -16
deepdoctection/train/__init__.py +6 -12
deepdoctection/train/d2_frcnn_train.py +48 -41
deepdoctection/train/hf_detr_train.py +41 -30
deepdoctection/train/hf_layoutlm_train.py +153 -135
deepdoctection/train/tp_frcnn_train.py +32 -31
deepdoctection/utils/concurrency.py +1 -1
deepdoctection/utils/context.py +13 -6
deepdoctection/utils/develop.py +4 -4
deepdoctection/utils/env_info.py +87 -125
deepdoctection/utils/file_utils.py +6 -11
deepdoctection/utils/fs.py +22 -18
deepdoctection/utils/identifier.py +2 -2
deepdoctection/utils/logger.py +16 -15
deepdoctection/utils/metacfg.py +7 -7
deepdoctection/utils/mocks.py +93 -0
deepdoctection/utils/pdf_utils.py +11 -11
deepdoctection/utils/settings.py +185 -181
deepdoctection/utils/tqdm.py +1 -1
deepdoctection/utils/transform.py +14 -9
deepdoctection/utils/types.py +104 -0
deepdoctection/utils/utils.py +7 -7
deepdoctection/utils/viz.py +74 -72
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
deepdoctection-0.33.dist-info/RECORD +146 -0
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
deepdoctection/utils/detection_types.py +0 -68
deepdoctection-0.31.dist-info/RECORD +0 -144
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0

deepdoctection/extern/pdftext.py CHANGED Viewed

@@ -19,24 +19,26 @@
 PDFPlumber text extraction engine
 """
-from typing import Dict, List, Tuple
+from typing import Optional
+from lazy_imports import try_import
 from ..utils.context import save_tmp_file
-from ..utils.detection_types import Requirement
-from ..utils.file_utils import get_pdfplumber_requirement, pdfplumber_available
+from ..utils.file_utils import get_pdfplumber_requirement
 from ..utils.settings import LayoutType, ObjectTypes
-from .base import DetectionResult, PdfMiner
+from ..utils.types import Requirement
+from .base import DetectionResult, ModelCategories, PdfMiner
-if pdfplumber_available():
-    from pdfplumber.pdf import PDF
+with try_import() as import_guard:
+    from pdfplumber.pdf import PDF, Page
-def _to_detect_result(word: Dict[str, str]) -> DetectionResult:
+def _to_detect_result(word: dict[str, str]) -> DetectionResult:
     return DetectionResult(
         box=[float(word["x0"]), float(word["top"]), float(word["x1"]), float(word["bottom"])],
         class_id=1,
         text=word["text"],
-        class_name=LayoutType.word,
+        class_name=LayoutType.WORD,
     )
@@ -64,12 +66,15 @@ class PdfPlumberTextDetector(PdfMiner):
     """
-    def __init__(self) -> None:
+    def __init__(self, x_tolerance: int = 3, y_tolerance: int = 3) -> None:
         self.name = "Pdfplumber"
         self.model_id = self.get_model_id()
-        self.categories = {"1": LayoutType.word}
+        self.categories = ModelCategories(init_categories={1: LayoutType.WORD})
+        self.x_tolerance = x_tolerance
+        self.y_tolerance = y_tolerance
+        self._page: Optional[Page] = None
-    def predict(self, pdf_bytes: bytes) -> List[DetectionResult]:
+    def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:
         """
         Call pdfminer.six and returns detected text as detection results
@@ -79,25 +84,24 @@ class PdfPlumberTextDetector(PdfMiner):
         with save_tmp_file(pdf_bytes, "pdf_") as (tmp_name, _):
             with open(tmp_name, "rb") as fin:
-                _pdf = PDF(fin)
-                self._page = _pdf.pages[0]
+                self._page = PDF(fin).pages[0]
                 self._pdf_bytes = pdf_bytes
-                words = self._page.extract_words()
+                words = self._page.extract_words(x_tolerance=self.x_tolerance, y_tolerance=self.y_tolerance)
         detect_results = list(map(_to_detect_result, words))
         return detect_results
     @classmethod
-    def get_requirements(cls) -> List[Requirement]:
+    def get_requirements(cls) -> list[Requirement]:
         return [get_pdfplumber_requirement()]
-    def get_width_height(self, pdf_bytes: bytes) -> Tuple[float, float]:
+    def get_width_height(self, pdf_bytes: bytes) -> tuple[float, float]:
         """
         Get the width and height of the full page
         :param pdf_bytes: pdf_bytes generating the pdf
         :return: width and height
         """
-        if self._pdf_bytes == pdf_bytes:
+        if self._pdf_bytes == pdf_bytes and self._page is not None:
             return self._page.bbox[2], self._page.bbox[3]
         # if the pdf bytes is not equal to the cached pdf, will recalculate values
         with save_tmp_file(pdf_bytes, "pdf_") as (tmp_name, _):
@@ -107,5 +111,5 @@ class PdfPlumberTextDetector(PdfMiner):
                 self._pdf_bytes = pdf_bytes
         return self._page.bbox[2], self._page.bbox[3]
-    def possible_categories(self) -> List[ObjectTypes]:
-        return [LayoutType.word]
+    def get_category_names(self) -> tuple[ObjectTypes, ...]:
+        return self.categories.get_categories(as_dict=False)

deepdoctection/extern/pt/__init__.py CHANGED Viewed

@@ -19,7 +19,5 @@
 Init file for pytorch compatibility package
 """
+from .nms import *
 from .ptutils import *
-if pytorch_available():
-    from .nms import *

deepdoctection/extern/pt/nms.py CHANGED Viewed

@@ -18,9 +18,13 @@
 """
 Module for custom NMS functions.
 """
+from __future__ import annotations
-import torch
-from torchvision.ops import boxes as box_ops  # type: ignore
+from lazy_imports import try_import
+with try_import() as import_guard:
+    import torch
+    from torchvision.ops import boxes as box_ops  # type: ignore
 # Copy & paste from https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/nms.py

deepdoctection/extern/pt/ptutils.py CHANGED Viewed

@@ -18,32 +18,42 @@
 """
 Torch related utils
 """
+from __future__ import annotations
+import os
+from typing import Optional, Union
-from ...utils.error import DependencyError
-from ...utils.file_utils import pytorch_available
+from lazy_imports import try_import
+from ...utils.env_info import ENV_VARS_TRUE
-def set_torch_auto_device() -> "torch.device":  # type: ignore
-    """
-    Returns cuda device if available, otherwise cpu
-    """
-    if pytorch_available():
-        from torch import cuda, device  # pylint: disable=C0415
-        return device("cuda" if cuda.is_available() else "cpu")
-    raise DependencyError("Pytorch must be installed")
+with try_import() as import_guard:
+    import torch
-def get_num_gpu() -> int:
+def get_torch_device(device: Optional[Union[str, torch.device]] = None) -> torch.device:
     """
-    Returns number of CUDA devices if pytorch is available
+    Selecting a device on which to load a model. The selection follows a cascade of priorities:
-    :return:
-    """
+    - If a device string is provided, it is used.
+    - If the environment variable "USE_CUDA" is set, a GPU is used. If more GPUs are available, it will use all of them
+      unless something else is specified by CUDA_VISIBLE_DEVICES:
+          https://stackoverflow.com/questions/54216920/how-to-use-multiple-gpus-in-pytorch
-    if pytorch_available():
-        from torch import cuda  # pylint: disable=C0415
+    - If an MPS device is available, it is used.
+    - Otherwise, the CPU is used.
-        return cuda.device_count()
-    raise DependencyError("Pytorch must be installed")
+    :param device: Device either as string or torch.device
+    :return: Tensorflow device
+    """
+    if device is not None:
+        if isinstance(device, torch.device):
+            return device
+        if isinstance(device, str):
+            return torch.device(device)
+    if os.environ.get("USE_CUDA", "False") in ENV_VARS_TRUE:
+        return torch.device("cuda")
+    if os.environ.get("USE_MPS", "False") in ENV_VARS_TRUE:
+        return torch.device("mps")
+    return torch.device("cpu")

deepdoctection/extern/tessocr.py CHANGED Viewed

@@ -18,25 +18,28 @@
 """
 Tesseract OCR engine for text extraction
 """
+from __future__ import annotations
 import shlex
 import string
 import subprocess
 import sys
 from errno import ENOENT
 from itertools import groupby
-from os import environ
-from typing import Any, Dict, List, Mapping, Optional, Union
+from os import environ, fspath
+from pathlib import Path
+from typing import Any, Mapping, Optional, Union
 from packaging.version import InvalidVersion, Version, parse
 from ..utils.context import save_tmp_file, timeout_manager
-from ..utils.detection_types import ImageType, Requirement
 from ..utils.error import DependencyError, TesseractError
 from ..utils.file_utils import _TESS_PATH, get_tesseract_requirement
 from ..utils.metacfg import config_to_cli_str, set_config_by_yaml
 from ..utils.settings import LayoutType, ObjectTypes, PageType
+from ..utils.types import PathLikeOrStr, PixelValues, Requirement
 from ..utils.viz import viz_handler
-from .base import DetectionResult, ImageTransformer, ObjectDetector, PredictorBase
+from .base import DetectionResult, ImageTransformer, ModelCategories, ObjectDetector
 # copy and paste with some light modifications from https://github.com/madmaze/pytesseract/tree/master/pytesseract
@@ -60,7 +63,7 @@ _LANG_CODE_TO_TESS_LANG_CODE = {
 }
-def _subprocess_args() -> Dict[str, Any]:
+def _subprocess_args() -> dict[str, Any]:
     # See https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess
     # for reference and comments.
@@ -75,16 +78,16 @@ def _subprocess_args() -> Dict[str, Any]:
     return kwargs
-def _input_to_cli_str(lang: str, config: str, nice: int, input_file_name: str, output_file_name_base: str) -> List[str]:
+def _input_to_cli_str(lang: str, config: str, nice: int, input_file_name: str, output_file_name_base: str) -> list[str]:
     """
     Generates a tesseract cmd as list of string with given inputs
     """
-    cmd_args: List[str] = []
+    cmd_args: list[str] = []
     if not sys.platform.startswith("win32") and nice != 0:
         cmd_args += ("nice", "-n", str(nice))
-    cmd_args += (_TESS_PATH, input_file_name, output_file_name_base, "-l", lang)
+    cmd_args += (fspath(_TESS_PATH), input_file_name, output_file_name_base, "-l", lang)
     if config:
         cmd_args += shlex.split(config)
@@ -94,7 +97,7 @@ def _input_to_cli_str(lang: str, config: str, nice: int, input_file_name: str, o
     return cmd_args
-def _run_tesseract(tesseract_args: List[str]) -> None:
+def _run_tesseract(tesseract_args: list[str]) -> None:
     try:
         proc = subprocess.Popen(tesseract_args, **_subprocess_args())  # pylint: disable=R1732
     except OSError as error:
@@ -137,7 +140,7 @@ def get_tesseract_version() -> Version:
     return version
-def image_to_angle(image: ImageType) -> Mapping[str, str]:
+def image_to_angle(image: PixelValues) -> Mapping[str, str]:
     """
     Generating a tmp file and running tesseract to get the orientation of the image.
@@ -154,7 +157,7 @@ def image_to_angle(image: ImageType) -> Mapping[str, str]:
     }
-def image_to_dict(image: ImageType, lang: str, config: str) -> Dict[str, List[Union[str, int, float]]]:
+def image_to_dict(image: PixelValues, lang: str, config: str) -> dict[str, list[Union[str, int, float]]]:
     """
     This is more or less pytesseract.image_to_data with a dict as returned value.
     What happens under the hood is:
@@ -177,7 +180,7 @@ def image_to_dict(image: ImageType, lang: str, config: str) -> Dict[str, List[Un
         _run_tesseract(_input_to_cli_str(lang, config, 0, input_file_name, tmp_name))
         with open(tmp_name + ".tsv", "rb") as output_file:
             output = output_file.read().decode("utf-8")
-        result: Dict[str, List[Union[str, int, float]]] = {}
+        result: dict[str, list[Union[str, int, float]]] = {}
         rows = [row.split("\t") for row in output.strip().split("\n")]
         if len(rows) < 2:
             return result
@@ -208,7 +211,7 @@ def image_to_dict(image: ImageType, lang: str, config: str) -> Dict[str, List[Un
         return result
-def tesseract_line_to_detectresult(detect_result_list: List[DetectionResult]) -> List[DetectionResult]:
+def tesseract_line_to_detectresult(detect_result_list: list[DetectionResult]) -> list[DetectionResult]:
     """
     Generating text line DetectionResult based on Tesseract word grouping. It generates line bounding boxes from
     word bounding boxes.
@@ -216,7 +219,7 @@ def tesseract_line_to_detectresult(detect_result_list: List[DetectionResult]) ->
     :return: An extended list of detection result
     """
-    line_detect_result: List[DetectionResult] = []
+    line_detect_result: list[DetectionResult] = []
     for _, block_group_iter in groupby(detect_result_list, key=lambda x: x.block):
         block_group = []
         for _, line_group_iter in groupby(list(block_group_iter), key=lambda x: x.line):
@@ -231,7 +234,7 @@ def tesseract_line_to_detectresult(detect_result_list: List[DetectionResult]) ->
                 DetectionResult(
                     box=[ulx, uly, lrx, lry],
                     class_id=2,
-                    class_name=LayoutType.line,
+                    class_name=LayoutType.LINE,
                     text=" ".join(
                         [detect_result.text for detect_result in block_group if isinstance(detect_result.text, str)]
                     ),
@@ -242,7 +245,7 @@ def tesseract_line_to_detectresult(detect_result_list: List[DetectionResult]) ->
     return detect_result_list
-def predict_text(np_img: ImageType, supported_languages: str, text_lines: bool, config: str) -> List[DetectionResult]:
+def predict_text(np_img: PixelValues, supported_languages: str, text_lines: bool, config: str) -> list[DetectionResult]:
     """
     Calls tesseract directly with some given configs. Requires Tesseract to be installed.
@@ -275,7 +278,7 @@ def predict_text(np_img: ImageType, supported_languages: str, text_lines: bool,
                 score=score / 100,
                 text=caption[5],
                 class_id=1,
-                class_name=LayoutType.word,
+                class_name=LayoutType.WORD,
             )
             all_results.append(word)
     if text_lines:
@@ -283,7 +286,7 @@ def predict_text(np_img: ImageType, supported_languages: str, text_lines: bool,
     return all_results
-def predict_rotation(np_img: ImageType) -> Mapping[str, str]:
+def predict_rotation(np_img: PixelValues) -> Mapping[str, str]:
     """
     Predicts the rotation of an image using the Tesseract OCR engine.
@@ -326,8 +329,8 @@ class TesseractOcrDetector(ObjectDetector):
     def __init__(
         self,
-        path_yaml: str,
-        config_overwrite: Optional[List[str]] = None,
+        path_yaml: PathLikeOrStr,
+        config_overwrite: Optional[list[str]] = None,
     ):
         """
         Set up the configuration which is stored in a yaml-file, that need to be passed through.
@@ -346,16 +349,16 @@ class TesseractOcrDetector(ObjectDetector):
         if len(config_overwrite):
             hyper_param_config.update_args(config_overwrite)
-        self.path_yaml = path_yaml
+        self.path_yaml = Path(path_yaml)
         self.config_overwrite = config_overwrite
         self.config = hyper_param_config
         if self.config.LINES:
-            self.categories = {"1": LayoutType.word, "2": LayoutType.line}
+            self.categories = ModelCategories(init_categories={1: LayoutType.WORD, 2: LayoutType.LINE})
         else:
-            self.categories = {"1": LayoutType.word}
+            self.categories = ModelCategories(init_categories={1: LayoutType.WORD})
-    def predict(self, np_img: ImageType) -> List[DetectionResult]:
+    def predict(self, np_img: PixelValues) -> list[DetectionResult]:
         """
         Transfer of a numpy array and call of pytesseract. Return of the detection results.
@@ -371,16 +374,14 @@ class TesseractOcrDetector(ObjectDetector):
         )
     @classmethod
-    def get_requirements(cls) -> List[Requirement]:
+    def get_requirements(cls) -> list[Requirement]:
         return [get_tesseract_requirement()]
-    def clone(self) -> PredictorBase:
+    def clone(self) -> TesseractOcrDetector:
         return self.__class__(self.path_yaml, self.config_overwrite)
-    def possible_categories(self) -> List[ObjectTypes]:
-        if self.config.LINES:
-            return [LayoutType.word, LayoutType.line]
-        return [LayoutType.word]
+    def get_category_names(self) -> tuple[ObjectTypes, ...]:
+        return self.categories.get_categories(as_dict=False)
     def set_language(self, language: ObjectTypes) -> None:
         """
@@ -418,9 +419,10 @@ class TesseractRotationTransformer(ImageTransformer):
     """
     def __init__(self) -> None:
-        self.name = _TESS_PATH + "-rotation"
+        self.name = fspath(_TESS_PATH) + "-rotation"
+        self.categories = ModelCategories(init_categories={1: PageType.ANGLE})
-    def transform(self, np_img: ImageType, specification: DetectionResult) -> ImageType:
+    def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """
         Applies the predicted rotation to the image, effectively rotating the image backwards.
         This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
@@ -431,7 +433,7 @@ class TesseractRotationTransformer(ImageTransformer):
         """
         return viz_handler.rotate_image(np_img, specification.angle)  # type: ignore
-    def predict(self, np_img: ImageType) -> DetectionResult:
+    def predict(self, np_img: PixelValues) -> DetectionResult:
         """
         Determines the angle of the rotated image. It can only handle angles that are multiples of 90 degrees.
         This method uses the Tesseract OCR engine to predict the rotation angle of an image.
@@ -445,12 +447,11 @@ class TesseractRotationTransformer(ImageTransformer):
         )
     @classmethod
-    def get_requirements(cls) -> List[Requirement]:
+    def get_requirements(cls) -> list[Requirement]:
         return [get_tesseract_requirement()]
-    def clone(self) -> PredictorBase:
+    def clone(self) -> TesseractRotationTransformer:
         return self.__class__()
-    @staticmethod
-    def possible_category() -> PageType:
-        return PageType.angle
+    def get_category_names(self) -> tuple[ObjectTypes, ...]:
+        return self.categories.get_categories(as_dict=False)

deepdoctection/extern/texocr.py CHANGED Viewed

@@ -18,24 +18,26 @@
 """
 AWS Textract OCR engine for text extraction
 """
+from __future__ import annotations
 import sys
 import traceback
-from typing import List
+from lazy_imports import try_import
 from ..datapoint.convert import convert_np_array_to_b64_b
-from ..utils.detection_types import ImageType, JsonDict, Requirement
-from ..utils.file_utils import boto3_available, get_boto3_requirement
+from ..utils.file_utils import get_boto3_requirement
 from ..utils.logger import LoggingRecord, logger
 from ..utils.settings import LayoutType, ObjectTypes
-from .base import DetectionResult, ObjectDetector, PredictorBase
+from ..utils.types import JsonDict, PixelValues, Requirement
+from .base import DetectionResult, ModelCategories, ObjectDetector
-if boto3_available():
+with try_import() as import_guard:
     import boto3  # type:ignore
-def _textract_to_detectresult(response: JsonDict, width: int, height: int, text_lines: bool) -> List[DetectionResult]:
-    all_results: List[DetectionResult] = []
+def _textract_to_detectresult(response: JsonDict, width: int, height: int, text_lines: bool) -> list[DetectionResult]:
+    all_results: list[DetectionResult] = []
     blocks = response.get("Blocks")
     if blocks:
@@ -51,14 +53,14 @@ def _textract_to_detectresult(response: JsonDict, width: int, height: int, text_
                     score=block["Confidence"] / 100,
                     text=block["Text"],
                     class_id=1 if block["BlockType"] == "WORD" else 2,
-                    class_name=LayoutType.word if block["BlockType"] == "WORD" else LayoutType.line,
+                    class_name=LayoutType.WORD if block["BlockType"] == "WORD" else LayoutType.LINE,
                 )
                 all_results.append(word)
     return all_results
-def predict_text(np_img: ImageType, client, text_lines: bool) -> List[DetectionResult]:  # type: ignore
+def predict_text(np_img: PixelValues, client, text_lines: bool) -> list[DetectionResult]:  # type: ignore
     """
     Calls AWS Textract client (`detect_document_text`) and returns plain OCR results.
     AWS account required.
@@ -125,11 +127,11 @@ class TextractOcrDetector(ObjectDetector):
         self.text_lines = text_lines
         self.client = boto3.client("textract", **credentials_kwargs)
         if self.text_lines:
-            self.categories = {"1": LayoutType.word, "2": LayoutType.line}
+            self.categories = ModelCategories(init_categories={1: LayoutType.WORD, 2: LayoutType.LINE})
         else:
-            self.categories = {"1": LayoutType.word}
+            self.categories = ModelCategories(init_categories={1: LayoutType.WORD})
-    def predict(self, np_img: ImageType) -> List[DetectionResult]:
+    def predict(self, np_img: PixelValues) -> list[DetectionResult]:
         """
         Transfer of a numpy array and call textract client. Return of the detection results.
@@ -140,13 +142,11 @@ class TextractOcrDetector(ObjectDetector):
         return predict_text(np_img, self.client, self.text_lines)
     @classmethod
-    def get_requirements(cls) -> List[Requirement]:
+    def get_requirements(cls) -> list[Requirement]:
         return [get_boto3_requirement()]
-    def clone(self) -> PredictorBase:
+    def clone(self) -> TextractOcrDetector:
         return self.__class__()
-    def possible_categories(self) -> List[ObjectTypes]:
-        if self.text_lines:
-            return [LayoutType.word, LayoutType.line]
-        return [LayoutType.word]
+    def get_category_names(self) -> tuple[ObjectTypes, ...]:
+        return self.categories.get_categories(as_dict=False)

deepdoctection/extern/tp/tfutils.py CHANGED Viewed

@@ -19,7 +19,20 @@
 Tensorflow related utils.
 """
-from tensorpack.models import disable_layer_logging  # pylint: disable=E0401
+from __future__ import annotations
+import os
+from typing import ContextManager, Optional, Union
+from lazy_imports import try_import
+from ...utils.env_info import ENV_VARS_TRUE
+with try_import() as import_guard:
+    from tensorpack.models import disable_layer_logging  # pylint: disable=E0401
+with try_import() as tf_import_guard:
+    import tensorflow as tf  # pylint: disable=E0401
 def is_tfv2() -> bool:
@@ -38,16 +51,13 @@ def disable_tfv2() -> bool:
     """
     Disable TF in V2 mode.
     """
-    try:
-        import tensorflow as tf  # pylint: disable=C0415
-        tfv1 = tf.compat.v1
-        if is_tfv2():
-            tfv1.disable_v2_behavior()
-            tfv1.disable_eager_execution()
+    tfv1 = tf.compat.v1
+    if is_tfv2():
+        tfv1.disable_v2_behavior()
+        tfv1.disable_eager_execution()
         return True
-    except ModuleNotFoundError:
-        return False
+    return False
 def disable_tp_layer_logging() -> None:
@@ -55,3 +65,41 @@ def disable_tp_layer_logging() -> None:
     Disables TP layer logging, if not already set
     """
     disable_layer_logging()
+def get_tf_device(device: Optional[Union[str, tf.device]] = None) -> tf.device:
+    """
+    Selecting a device on which to load a model. The selection follows a cascade of priorities:
+    - If a device string is provided, it is used. If the string is "cuda" or "GPU", the first GPU is used.
+    - If the environment variable "USE_CUDA" is set, a GPU is used. If more GPUs are available it will use the first one
+    :param device: Device string
+    :return: Tensorflow device
+    """
+    if device is not None:
+        if isinstance(device, ContextManager):
+            return device
+        if isinstance(device, str):
+            if device in ("cuda", "GPU"):
+                device_names = [device.name for device in tf.config.list_logical_devices(device_type="GPU")]
+                return tf.device(device_names[0].name)
+            # The input must be something sensible
+            return tf.device(device)
+    if os.environ.get("USE_CUDA", "False") in ENV_VARS_TRUE:
+        device_names = [device.name for device in tf.config.list_logical_devices(device_type="GPU")]
+        if not device_names:
+            raise EnvironmentError(
+                "USE_CUDA is set but tf.config.list_logical_devices cannot find anyx device. "
+                "It looks like there is an issue with your Tensorlfow installation. "
+                "You can LOG_LEVEL='DEBUG' to get more information about installation."
+            )
+        return tf.device(device_names[0])
+    device_names = [device.name for device in tf.config.list_logical_devices(device_type="CPU")]
+    if not device_names:
+        raise EnvironmentError(
+            "Cannot find any CPU device. It looks like there is an issue with your "
+            "Tensorflow installation. You can LOG_LEVEL='DEBUG' to get more information about "
+            "installation."
+        )
+    return tf.device(device_names[0])

deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.31py3-none-any.whl → 0.33py3-none-any.whl