PyPI - deepdoctection - Versions diffs - 0.32__py3-none-any.whl → 0.34__py3-none-any.whl - Mend

deepdoctection 0.32py3-none-any.whl → 0.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (111) hide show

deepdoctection/__init__.py +8 -25
deepdoctection/analyzer/dd.py +84 -71
deepdoctection/dataflow/common.py +9 -5
deepdoctection/dataflow/custom.py +5 -5
deepdoctection/dataflow/custom_serialize.py +75 -18
deepdoctection/dataflow/parallel_map.py +3 -3
deepdoctection/dataflow/serialize.py +4 -4
deepdoctection/dataflow/stats.py +3 -3
deepdoctection/datapoint/annotation.py +78 -56
deepdoctection/datapoint/box.py +7 -7
deepdoctection/datapoint/convert.py +6 -6
deepdoctection/datapoint/image.py +157 -75
deepdoctection/datapoint/view.py +175 -151
deepdoctection/datasets/adapter.py +30 -24
deepdoctection/datasets/base.py +10 -10
deepdoctection/datasets/dataflow_builder.py +3 -3
deepdoctection/datasets/info.py +23 -25
deepdoctection/datasets/instances/doclaynet.py +48 -49
deepdoctection/datasets/instances/fintabnet.py +44 -45
deepdoctection/datasets/instances/funsd.py +23 -23
deepdoctection/datasets/instances/iiitar13k.py +8 -8
deepdoctection/datasets/instances/layouttest.py +2 -2
deepdoctection/datasets/instances/publaynet.py +3 -3
deepdoctection/datasets/instances/pubtables1m.py +18 -18
deepdoctection/datasets/instances/pubtabnet.py +30 -29
deepdoctection/datasets/instances/rvlcdip.py +28 -29
deepdoctection/datasets/instances/xfund.py +51 -30
deepdoctection/datasets/save.py +6 -6
deepdoctection/eval/accmetric.py +32 -33
deepdoctection/eval/base.py +8 -9
deepdoctection/eval/cocometric.py +13 -12
deepdoctection/eval/eval.py +32 -26
deepdoctection/eval/tedsmetric.py +16 -12
deepdoctection/eval/tp_eval_callback.py +7 -16
deepdoctection/extern/base.py +339 -134
deepdoctection/extern/d2detect.py +69 -89
deepdoctection/extern/deskew.py +11 -10
deepdoctection/extern/doctrocr.py +81 -64
deepdoctection/extern/fastlang.py +23 -16
deepdoctection/extern/hfdetr.py +53 -38
deepdoctection/extern/hflayoutlm.py +216 -155
deepdoctection/extern/hflm.py +35 -30
deepdoctection/extern/model.py +433 -255
deepdoctection/extern/pdftext.py +15 -15
deepdoctection/extern/pt/ptutils.py +4 -2
deepdoctection/extern/tessocr.py +39 -38
deepdoctection/extern/texocr.py +14 -16
deepdoctection/extern/tp/tfutils.py +16 -2
deepdoctection/extern/tp/tpcompat.py +11 -7
deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
deepdoctection/extern/tpdetect.py +40 -45
deepdoctection/mapper/cats.py +36 -40
deepdoctection/mapper/cocostruct.py +16 -12
deepdoctection/mapper/d2struct.py +22 -22
deepdoctection/mapper/hfstruct.py +7 -7
deepdoctection/mapper/laylmstruct.py +22 -24
deepdoctection/mapper/maputils.py +9 -10
deepdoctection/mapper/match.py +33 -2
deepdoctection/mapper/misc.py +6 -7
deepdoctection/mapper/pascalstruct.py +4 -4
deepdoctection/mapper/prodigystruct.py +6 -6
deepdoctection/mapper/pubstruct.py +84 -92
deepdoctection/mapper/tpstruct.py +3 -3
deepdoctection/mapper/xfundstruct.py +33 -33
deepdoctection/pipe/anngen.py +39 -14
deepdoctection/pipe/base.py +68 -99
deepdoctection/pipe/common.py +181 -85
deepdoctection/pipe/concurrency.py +14 -10
deepdoctection/pipe/doctectionpipe.py +24 -21
deepdoctection/pipe/language.py +20 -25
deepdoctection/pipe/layout.py +18 -16
deepdoctection/pipe/lm.py +49 -47
deepdoctection/pipe/order.py +63 -65
deepdoctection/pipe/refine.py +102 -109
deepdoctection/pipe/segment.py +157 -162
deepdoctection/pipe/sub_layout.py +50 -40
deepdoctection/pipe/text.py +37 -36
deepdoctection/pipe/transform.py +19 -16
deepdoctection/train/d2_frcnn_train.py +27 -25
deepdoctection/train/hf_detr_train.py +22 -18
deepdoctection/train/hf_layoutlm_train.py +49 -48
deepdoctection/train/tp_frcnn_train.py +10 -11
deepdoctection/utils/concurrency.py +1 -1
deepdoctection/utils/context.py +13 -6
deepdoctection/utils/develop.py +4 -4
deepdoctection/utils/env_info.py +52 -14
deepdoctection/utils/file_utils.py +6 -11
deepdoctection/utils/fs.py +41 -14
deepdoctection/utils/identifier.py +2 -2
deepdoctection/utils/logger.py +15 -15
deepdoctection/utils/metacfg.py +7 -7
deepdoctection/utils/pdf_utils.py +39 -14
deepdoctection/utils/settings.py +188 -182
deepdoctection/utils/tqdm.py +1 -1
deepdoctection/utils/transform.py +14 -9
deepdoctection/utils/types.py +104 -0
deepdoctection/utils/utils.py +7 -7
deepdoctection/utils/viz.py +70 -69
{deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
deepdoctection-0.34.dist-info/RECORD +146 -0
{deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
deepdoctection/utils/detection_types.py +0 -68
deepdoctection-0.32.dist-info/RECORD +0 -146
{deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
{deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0

deepdoctection/extern/pdftext.py CHANGED Viewed

@@ -19,26 +19,26 @@
 PDFPlumber text extraction engine
 """
-from typing import Dict, List, Tuple
+from typing import Optional
 from lazy_imports import try_import
 from ..utils.context import save_tmp_file
-from ..utils.detection_types import Requirement
 from ..utils.file_utils import get_pdfplumber_requirement
 from ..utils.settings import LayoutType, ObjectTypes
-from .base import DetectionResult, PdfMiner
+from ..utils.types import Requirement
+from .base import DetectionResult, ModelCategories, PdfMiner
 with try_import() as import_guard:
-    from pdfplumber.pdf import PDF
+    from pdfplumber.pdf import PDF, Page
-def _to_detect_result(word: Dict[str, str]) -> DetectionResult:
+def _to_detect_result(word: dict[str, str]) -> DetectionResult:
     return DetectionResult(
         box=[float(word["x0"]), float(word["top"]), float(word["x1"]), float(word["bottom"])],
         class_id=1,
         text=word["text"],
-        class_name=LayoutType.word,
+        class_name=LayoutType.WORD,
     )
@@ -69,11 +69,12 @@ class PdfPlumberTextDetector(PdfMiner):
     def __init__(self, x_tolerance: int = 3, y_tolerance: int = 3) -> None:
         self.name = "Pdfplumber"
         self.model_id = self.get_model_id()
-        self.categories = {"1": LayoutType.word}
+        self.categories = ModelCategories(init_categories={1: LayoutType.WORD})
         self.x_tolerance = x_tolerance
         self.y_tolerance = y_tolerance
+        self._page: Optional[Page] = None
-    def predict(self, pdf_bytes: bytes) -> List[DetectionResult]:
+    def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:
         """
         Call pdfminer.six and returns detected text as detection results
@@ -83,25 +84,24 @@ class PdfPlumberTextDetector(PdfMiner):
         with save_tmp_file(pdf_bytes, "pdf_") as (tmp_name, _):
             with open(tmp_name, "rb") as fin:
-                _pdf = PDF(fin)
-                self._page = _pdf.pages[0]
+                self._page = PDF(fin).pages[0]
                 self._pdf_bytes = pdf_bytes
                 words = self._page.extract_words(x_tolerance=self.x_tolerance, y_tolerance=self.y_tolerance)
         detect_results = list(map(_to_detect_result, words))
         return detect_results
     @classmethod
-    def get_requirements(cls) -> List[Requirement]:
+    def get_requirements(cls) -> list[Requirement]:
         return [get_pdfplumber_requirement()]
-    def get_width_height(self, pdf_bytes: bytes) -> Tuple[float, float]:
+    def get_width_height(self, pdf_bytes: bytes) -> tuple[float, float]:
         """
         Get the width and height of the full page
         :param pdf_bytes: pdf_bytes generating the pdf
         :return: width and height
         """
-        if self._pdf_bytes == pdf_bytes:
+        if self._pdf_bytes == pdf_bytes and self._page is not None:
             return self._page.bbox[2], self._page.bbox[3]
         # if the pdf bytes is not equal to the cached pdf, will recalculate values
         with save_tmp_file(pdf_bytes, "pdf_") as (tmp_name, _):
@@ -111,5 +111,5 @@ class PdfPlumberTextDetector(PdfMiner):
                 self._pdf_bytes = pdf_bytes
         return self._page.bbox[2], self._page.bbox[3]
-    def possible_categories(self) -> List[ObjectTypes]:
-        return [LayoutType.word]
+    def get_category_names(self) -> tuple[ObjectTypes, ...]:
+        return self.categories.get_categories(as_dict=False)

deepdoctection/extern/pt/ptutils.py CHANGED Viewed

@@ -25,6 +25,8 @@ from typing import Optional, Union
 from lazy_imports import try_import
+from ...utils.env_info import ENV_VARS_TRUE
 with try_import() as import_guard:
     import torch
@@ -50,8 +52,8 @@ def get_torch_device(device: Optional[Union[str, torch.device]] = None) -> torch
             return device
         if isinstance(device, str):
             return torch.device(device)
-    if os.environ.get("USE_CUDA"):
+    if os.environ.get("USE_CUDA", "False") in ENV_VARS_TRUE:
         return torch.device("cuda")
-    if os.environ.get("USE_MPS"):
+    if os.environ.get("USE_MPS", "False") in ENV_VARS_TRUE:
         return torch.device("mps")
     return torch.device("cpu")

deepdoctection/extern/tessocr.py CHANGED Viewed

@@ -18,25 +18,28 @@
 """
 Tesseract OCR engine for text extraction
 """
+from __future__ import annotations
 import shlex
 import string
 import subprocess
 import sys
 from errno import ENOENT
 from itertools import groupby
-from os import environ
-from typing import Any, Dict, List, Mapping, Optional, Union
+from os import environ, fspath
+from pathlib import Path
+from typing import Any, Mapping, Optional, Union
 from packaging.version import InvalidVersion, Version, parse
 from ..utils.context import save_tmp_file, timeout_manager
-from ..utils.detection_types import ImageType, Requirement
 from ..utils.error import DependencyError, TesseractError
 from ..utils.file_utils import _TESS_PATH, get_tesseract_requirement
 from ..utils.metacfg import config_to_cli_str, set_config_by_yaml
 from ..utils.settings import LayoutType, ObjectTypes, PageType
+from ..utils.types import PathLikeOrStr, PixelValues, Requirement
 from ..utils.viz import viz_handler
-from .base import DetectionResult, ImageTransformer, ObjectDetector, PredictorBase
+from .base import DetectionResult, ImageTransformer, ModelCategories, ObjectDetector
 # copy and paste with some light modifications from https://github.com/madmaze/pytesseract/tree/master/pytesseract
@@ -60,7 +63,7 @@ _LANG_CODE_TO_TESS_LANG_CODE = {
 }
-def _subprocess_args() -> Dict[str, Any]:
+def _subprocess_args() -> dict[str, Any]:
     # See https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess
     # for reference and comments.
@@ -75,16 +78,16 @@ def _subprocess_args() -> Dict[str, Any]:
     return kwargs
-def _input_to_cli_str(lang: str, config: str, nice: int, input_file_name: str, output_file_name_base: str) -> List[str]:
+def _input_to_cli_str(lang: str, config: str, nice: int, input_file_name: str, output_file_name_base: str) -> list[str]:
     """
     Generates a tesseract cmd as list of string with given inputs
     """
-    cmd_args: List[str] = []
+    cmd_args: list[str] = []
     if not sys.platform.startswith("win32") and nice != 0:
         cmd_args += ("nice", "-n", str(nice))
-    cmd_args += (_TESS_PATH, input_file_name, output_file_name_base, "-l", lang)
+    cmd_args += (fspath(_TESS_PATH), input_file_name, output_file_name_base, "-l", lang)
     if config:
         cmd_args += shlex.split(config)
@@ -94,7 +97,7 @@ def _input_to_cli_str(lang: str, config: str, nice: int, input_file_name: str, o
     return cmd_args
-def _run_tesseract(tesseract_args: List[str]) -> None:
+def _run_tesseract(tesseract_args: list[str]) -> None:
     try:
         proc = subprocess.Popen(tesseract_args, **_subprocess_args())  # pylint: disable=R1732
     except OSError as error:
@@ -137,7 +140,7 @@ def get_tesseract_version() -> Version:
     return version
-def image_to_angle(image: ImageType) -> Mapping[str, str]:
+def image_to_angle(image: PixelValues) -> Mapping[str, str]:
     """
     Generating a tmp file and running tesseract to get the orientation of the image.
@@ -154,7 +157,7 @@ def image_to_angle(image: ImageType) -> Mapping[str, str]:
     }
-def image_to_dict(image: ImageType, lang: str, config: str) -> Dict[str, List[Union[str, int, float]]]:
+def image_to_dict(image: PixelValues, lang: str, config: str) -> dict[str, list[Union[str, int, float]]]:
     """
     This is more or less pytesseract.image_to_data with a dict as returned value.
     What happens under the hood is:
@@ -177,7 +180,7 @@ def image_to_dict(image: ImageType, lang: str, config: str) -> Dict[str, List[Un
         _run_tesseract(_input_to_cli_str(lang, config, 0, input_file_name, tmp_name))
         with open(tmp_name + ".tsv", "rb") as output_file:
             output = output_file.read().decode("utf-8")
-        result: Dict[str, List[Union[str, int, float]]] = {}
+        result: dict[str, list[Union[str, int, float]]] = {}
         rows = [row.split("\t") for row in output.strip().split("\n")]
         if len(rows) < 2:
             return result
@@ -208,7 +211,7 @@ def image_to_dict(image: ImageType, lang: str, config: str) -> Dict[str, List[Un
         return result
-def tesseract_line_to_detectresult(detect_result_list: List[DetectionResult]) -> List[DetectionResult]:
+def tesseract_line_to_detectresult(detect_result_list: list[DetectionResult]) -> list[DetectionResult]:
     """
     Generating text line DetectionResult based on Tesseract word grouping. It generates line bounding boxes from
     word bounding boxes.
@@ -216,7 +219,7 @@ def tesseract_line_to_detectresult(detect_result_list: List[DetectionResult]) ->
     :return: An extended list of detection result
     """
-    line_detect_result: List[DetectionResult] = []
+    line_detect_result: list[DetectionResult] = []
     for _, block_group_iter in groupby(detect_result_list, key=lambda x: x.block):
         block_group = []
         for _, line_group_iter in groupby(list(block_group_iter), key=lambda x: x.line):
@@ -231,7 +234,7 @@ def tesseract_line_to_detectresult(detect_result_list: List[DetectionResult]) ->
                 DetectionResult(
                     box=[ulx, uly, lrx, lry],
                     class_id=2,
-                    class_name=LayoutType.line,
+                    class_name=LayoutType.LINE,
                     text=" ".join(
                         [detect_result.text for detect_result in block_group if isinstance(detect_result.text, str)]
                     ),
@@ -242,7 +245,7 @@ def tesseract_line_to_detectresult(detect_result_list: List[DetectionResult]) ->
     return detect_result_list
-def predict_text(np_img: ImageType, supported_languages: str, text_lines: bool, config: str) -> List[DetectionResult]:
+def predict_text(np_img: PixelValues, supported_languages: str, text_lines: bool, config: str) -> list[DetectionResult]:
     """
     Calls tesseract directly with some given configs. Requires Tesseract to be installed.
@@ -275,7 +278,7 @@ def predict_text(np_img: ImageType, supported_languages: str, text_lines: bool,
                 score=score / 100,
                 text=caption[5],
                 class_id=1,
-                class_name=LayoutType.word,
+                class_name=LayoutType.WORD,
             )
             all_results.append(word)
     if text_lines:
@@ -283,7 +286,7 @@ def predict_text(np_img: ImageType, supported_languages: str, text_lines: bool,
     return all_results
-def predict_rotation(np_img: ImageType) -> Mapping[str, str]:
+def predict_rotation(np_img: PixelValues) -> Mapping[str, str]:
     """
     Predicts the rotation of an image using the Tesseract OCR engine.
@@ -326,8 +329,8 @@ class TesseractOcrDetector(ObjectDetector):
     def __init__(
         self,
-        path_yaml: str,
-        config_overwrite: Optional[List[str]] = None,
+        path_yaml: PathLikeOrStr,
+        config_overwrite: Optional[list[str]] = None,
     ):
         """
         Set up the configuration which is stored in a yaml-file, that need to be passed through.
@@ -346,16 +349,16 @@ class TesseractOcrDetector(ObjectDetector):
         if len(config_overwrite):
             hyper_param_config.update_args(config_overwrite)
-        self.path_yaml = path_yaml
+        self.path_yaml = Path(path_yaml)
         self.config_overwrite = config_overwrite
         self.config = hyper_param_config
         if self.config.LINES:
-            self.categories = {"1": LayoutType.word, "2": LayoutType.line}
+            self.categories = ModelCategories(init_categories={1: LayoutType.WORD, 2: LayoutType.LINE})
         else:
-            self.categories = {"1": LayoutType.word}
+            self.categories = ModelCategories(init_categories={1: LayoutType.WORD})
-    def predict(self, np_img: ImageType) -> List[DetectionResult]:
+    def predict(self, np_img: PixelValues) -> list[DetectionResult]:
         """
         Transfer of a numpy array and call of pytesseract. Return of the detection results.
@@ -371,16 +374,14 @@ class TesseractOcrDetector(ObjectDetector):
         )
     @classmethod
-    def get_requirements(cls) -> List[Requirement]:
+    def get_requirements(cls) -> list[Requirement]:
         return [get_tesseract_requirement()]
-    def clone(self) -> PredictorBase:
+    def clone(self) -> TesseractOcrDetector:
         return self.__class__(self.path_yaml, self.config_overwrite)
-    def possible_categories(self) -> List[ObjectTypes]:
-        if self.config.LINES:
-            return [LayoutType.word, LayoutType.line]
-        return [LayoutType.word]
+    def get_category_names(self) -> tuple[ObjectTypes, ...]:
+        return self.categories.get_categories(as_dict=False)
     def set_language(self, language: ObjectTypes) -> None:
         """
@@ -418,9 +419,10 @@ class TesseractRotationTransformer(ImageTransformer):
     """
     def __init__(self) -> None:
-        self.name = _TESS_PATH + "-rotation"
+        self.name = fspath(_TESS_PATH) + "-rotation"
+        self.categories = ModelCategories(init_categories={1: PageType.ANGLE})
-    def transform(self, np_img: ImageType, specification: DetectionResult) -> ImageType:
+    def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
         """
         Applies the predicted rotation to the image, effectively rotating the image backwards.
         This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
@@ -431,7 +433,7 @@ class TesseractRotationTransformer(ImageTransformer):
         """
         return viz_handler.rotate_image(np_img, specification.angle)  # type: ignore
-    def predict(self, np_img: ImageType) -> DetectionResult:
+    def predict(self, np_img: PixelValues) -> DetectionResult:
         """
         Determines the angle of the rotated image. It can only handle angles that are multiples of 90 degrees.
         This method uses the Tesseract OCR engine to predict the rotation angle of an image.
@@ -445,12 +447,11 @@ class TesseractRotationTransformer(ImageTransformer):
         )
     @classmethod
-    def get_requirements(cls) -> List[Requirement]:
+    def get_requirements(cls) -> list[Requirement]:
         return [get_tesseract_requirement()]
-    def clone(self) -> PredictorBase:
+    def clone(self) -> TesseractRotationTransformer:
         return self.__class__()
-    @staticmethod
-    def possible_category() -> PageType:
-        return PageType.angle
+    def get_category_names(self) -> tuple[ObjectTypes, ...]:
+        return self.categories.get_categories(as_dict=False)

deepdoctection/extern/texocr.py CHANGED Viewed

@@ -18,26 +18,26 @@
 """
 AWS Textract OCR engine for text extraction
 """
+from __future__ import annotations
 import sys
 import traceback
-from typing import List
 from lazy_imports import try_import
 from ..datapoint.convert import convert_np_array_to_b64_b
-from ..utils.detection_types import ImageType, JsonDict, Requirement
 from ..utils.file_utils import get_boto3_requirement
 from ..utils.logger import LoggingRecord, logger
 from ..utils.settings import LayoutType, ObjectTypes
-from .base import DetectionResult, ObjectDetector, PredictorBase
+from ..utils.types import JsonDict, PixelValues, Requirement
+from .base import DetectionResult, ModelCategories, ObjectDetector
 with try_import() as import_guard:
     import boto3  # type:ignore
-def _textract_to_detectresult(response: JsonDict, width: int, height: int, text_lines: bool) -> List[DetectionResult]:
-    all_results: List[DetectionResult] = []
+def _textract_to_detectresult(response: JsonDict, width: int, height: int, text_lines: bool) -> list[DetectionResult]:
+    all_results: list[DetectionResult] = []
     blocks = response.get("Blocks")
     if blocks:
@@ -53,14 +53,14 @@ def _textract_to_detectresult(response: JsonDict, width: int, height: int, text_
                     score=block["Confidence"] / 100,
                     text=block["Text"],
                     class_id=1 if block["BlockType"] == "WORD" else 2,
-                    class_name=LayoutType.word if block["BlockType"] == "WORD" else LayoutType.line,
+                    class_name=LayoutType.WORD if block["BlockType"] == "WORD" else LayoutType.LINE,
                 )
                 all_results.append(word)
     return all_results
-def predict_text(np_img: ImageType, client, text_lines: bool) -> List[DetectionResult]:  # type: ignore
+def predict_text(np_img: PixelValues, client, text_lines: bool) -> list[DetectionResult]:  # type: ignore
     """
     Calls AWS Textract client (`detect_document_text`) and returns plain OCR results.
     AWS account required.
@@ -127,11 +127,11 @@ class TextractOcrDetector(ObjectDetector):
         self.text_lines = text_lines
         self.client = boto3.client("textract", **credentials_kwargs)
         if self.text_lines:
-            self.categories = {"1": LayoutType.word, "2": LayoutType.line}
+            self.categories = ModelCategories(init_categories={1: LayoutType.WORD, 2: LayoutType.LINE})
         else:
-            self.categories = {"1": LayoutType.word}
+            self.categories = ModelCategories(init_categories={1: LayoutType.WORD})
-    def predict(self, np_img: ImageType) -> List[DetectionResult]:
+    def predict(self, np_img: PixelValues) -> list[DetectionResult]:
         """
         Transfer of a numpy array and call textract client. Return of the detection results.
@@ -142,13 +142,11 @@ class TextractOcrDetector(ObjectDetector):
         return predict_text(np_img, self.client, self.text_lines)
     @classmethod
-    def get_requirements(cls) -> List[Requirement]:
+    def get_requirements(cls) -> list[Requirement]:
         return [get_boto3_requirement()]
-    def clone(self) -> PredictorBase:
+    def clone(self) -> TextractOcrDetector:
         return self.__class__()
-    def possible_categories(self) -> List[ObjectTypes]:
-        if self.text_lines:
-            return [LayoutType.word, LayoutType.line]
-        return [LayoutType.word]
+    def get_category_names(self) -> tuple[ObjectTypes, ...]:
+        return self.categories.get_categories(as_dict=False)

deepdoctection/extern/tp/tfutils.py CHANGED Viewed

@@ -22,10 +22,12 @@ Tensorflow related utils.
 from __future__ import annotations
 import os
-from typing import Optional, Union, ContextManager
+from typing import ContextManager, Optional, Union
 from lazy_imports import try_import
+from ...utils.env_info import ENV_VARS_TRUE
 with try_import() as import_guard:
     from tensorpack.models import disable_layer_logging  # pylint: disable=E0401
@@ -84,8 +86,20 @@ def get_tf_device(device: Optional[Union[str, tf.device]] = None) -> tf.device:
                 return tf.device(device_names[0].name)
             # The input must be something sensible
             return tf.device(device)
-    if os.environ.get("USE_CUDA"):
+    if os.environ.get("USE_CUDA", "False") in ENV_VARS_TRUE:
         device_names = [device.name for device in tf.config.list_logical_devices(device_type="GPU")]
+        if not device_names:
+            raise EnvironmentError(
+                "USE_CUDA is set but tf.config.list_logical_devices cannot find anyx device. "
+                "It looks like there is an issue with your Tensorlfow installation. "
+                "You can LOG_LEVEL='DEBUG' to get more information about installation."
+            )
         return tf.device(device_names[0])
     device_names = [device.name for device in tf.config.list_logical_devices(device_type="CPU")]
+    if not device_names:
+        raise EnvironmentError(
+            "Cannot find any CPU device. It looks like there is an issue with your "
+            "Tensorflow installation. You can LOG_LEVEL='DEBUG' to get more information about "
+            "installation."
+        )
     return tf.device(device_names[0])

deepdoctection/extern/tp/tpcompat.py CHANGED Viewed

@@ -20,13 +20,16 @@ Compatibility classes and methods related to Tensorpack package
 """
 from __future__ import annotations
+import os
 from abc import ABC, abstractmethod
-from typing import Any, List, Mapping, Tuple, Union
+from pathlib import Path
+from typing import Any, Mapping, Union
 from lazy_imports import try_import
 from ...utils.metacfg import AttrDict
 from ...utils.settings import ObjectTypes
+from ...utils.types import PathLikeOrStr, PixelValues
 with try_import() as import_guard:
     from tensorpack.predict import OfflinePredictor, PredictConfig  # pylint: disable=E0401
@@ -51,7 +54,7 @@ class ModelDescWithConfig(ModelDesc, ABC):  # type: ignore
         super().__init__()
         self.cfg = config
-    def get_inference_tensor_names(self) -> Tuple[List[str], List[str]]:
+    def get_inference_tensor_names(self) -> tuple[list[str], list[str]]:
         """
         Returns lists of tensor names to be used to create an inference callable. "build_graph" must create tensors
         of these names when called under inference context.
@@ -77,7 +80,7 @@ class TensorpackPredictor(ABC):
           as there is an explicit class available for this.
     """
-    def __init__(self, model: ModelDescWithConfig, path_weights: str, ignore_mismatch: bool) -> None:
+    def __init__(self, model: ModelDescWithConfig, path_weights: PathLikeOrStr, ignore_mismatch: bool) -> None:
         """
         :param model: Model, either as ModelDescWithConfig or derived from that class.
         :param path_weights: Model weights of the prediction config.
@@ -85,7 +88,7 @@ class TensorpackPredictor(ABC):
                                 if a pre-trained model is to be fine-tuned on a custom dataset.
         """
         self._model = model
-        self.path_weights = path_weights
+        self.path_weights = Path(path_weights)
         self.ignore_mismatch = ignore_mismatch
         self._number_gpus = get_num_gpu()
         self.predict_config = self._build_config()
@@ -98,9 +101,10 @@ class TensorpackPredictor(ABC):
         return OfflinePredictor(self.predict_config)
     def _build_config(self) -> PredictConfig:
+        path_weights = os.fspath(self.path_weights) if os.fspath(self.path_weights) != "." else ""
         predict_config = PredictConfig(
             model=self._model,
-            session_init=SmartInit(self.path_weights, ignore_mismatch=self.ignore_mismatch),
+            session_init=SmartInit(path_weights, ignore_mismatch=self.ignore_mismatch),
             input_names=self._model.get_inference_tensor_names()[0],
             output_names=self._model.get_inference_tensor_names()[1],
         )
@@ -110,7 +114,7 @@ class TensorpackPredictor(ABC):
     @staticmethod
     @abstractmethod
     def get_wrapped_model(
-        path_yaml: str, categories: Mapping[str, ObjectTypes], config_overwrite: Union[List[str], None]
+        path_yaml: PathLikeOrStr, categories: Mapping[int, ObjectTypes], config_overwrite: Union[list[str], None]
     ) -> ModelDescWithConfig:
         """
         Implement the config generation, its modification and instantiate a version of the model. See
@@ -119,7 +123,7 @@ class TensorpackPredictor(ABC):
         raise NotImplementedError()
     @abstractmethod
-    def predict(self, np_img: Any) -> Any:
+    def predict(self, np_img: PixelValues) -> Any:
         """
         Implement, how `self.tp_predictor` is invoked and raw prediction results are generated. Do use only raw
         objects and nothing, which is related to the DD API.

deepdoctection/extern/tp/tpfrcnn/config/config.py CHANGED Viewed

@@ -194,7 +194,7 @@ import numpy as np
 from lazy_imports import try_import
 from .....utils.metacfg import AttrDict
-from .....utils.settings import ObjectTypes
+from .....utils.settings import TypeOrStr, get_type
 with try_import() as import_guard:
     from tensorpack.tfutils import collect_env_info  # pylint: disable=E0401
@@ -209,7 +209,7 @@ with try_import() as import_guard:
 __all__ = ["train_frcnn_config", "model_frcnn_config"]
-def model_frcnn_config(config: AttrDict, categories: Mapping[str, ObjectTypes], print_summary: bool = True) -> None:
+def model_frcnn_config(config: AttrDict, categories: Mapping[int, TypeOrStr], print_summary: bool = True) -> None:
     """
     Sanity checks for Tensorpack Faster-RCNN config settings, where the focus lies on the model for predicting.
     It will update the config instance.
@@ -221,8 +221,8 @@ def model_frcnn_config(config: AttrDict, categories: Mapping[str, ObjectTypes],
     config.freeze(False)
-    categories = {str(key): categories[val] for key, val in enumerate(categories, 1)}
-    categories[0] = "BG"
+    categories = {key: get_type(categories[val]) for key, val in enumerate(categories, 1)}
+    categories[0] = get_type("background")
     config.DATA.CLASS_NAMES = list(categories.values())
     config.DATA.CLASS_DICT = categories
     config.DATA.NUM_CATEGORY = len(config.DATA.CLASS_NAMES) - 1

deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py CHANGED Viewed

@@ -71,7 +71,7 @@ def freeze_affine_getter(getter, *args, **kwargs):
     if name.endswith("/gamma") or name.endswith("/beta"):
         kwargs["trainable"] = False
         ret = getter(*args, **kwargs)
-        tf.add_to_collection(tf.GraphKeys.MODEL_VARIABLES, ret)
+        tf.add_to_collection(tf.GraphKeys.MODEL_VARIABLES, ret)  # pylint: disable=E1101
     else:
         ret = getter(*args, **kwargs)
     return ret

deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py CHANGED Viewed

@@ -66,7 +66,7 @@ def decode_bbox_target(box_predictions, anchors, preproc_max_size):
     xbyb = box_pred_txty * waha + xaya
     x1y1 = xbyb - wbhb * 0.5
     x2y2 = xbyb + wbhb * 0.5  # (...)x1x2
-    out = tf.concat([x1y1, x2y2], axis=-2)
+    out = tf.concat([x1y1, x2y2], axis=-2)  # pylint: disable=E1123
     return tf.reshape(out, orig_shape)
@@ -93,7 +93,7 @@ def encode_bbox_target(boxes, anchors):
     # Note that here not all boxes are valid. Some may be zero
     txty = (xbyb - xaya) / waha
     twth = tf.math.log(wbhb / waha)  # may contain -inf for invalid boxes
-    encoded = tf.concat([txty, twth], axis=1)  # (-1x2x2)
+    encoded = tf.concat([txty, twth], axis=1)  # (-1x2x2)  # pylint: disable=E1123
     return tf.reshape(encoded, tf.shape(boxes))
@@ -153,7 +153,7 @@ def crop_and_resize(image, boxes, box_ind, crop_size, pad_border=True):
         n_w = spacing_w * tf.cast(crop_shape[1] - 1, tf.float32) / imshape[1]
         n_h = spacing_h * tf.cast(crop_shape[0] - 1, tf.float32) / imshape[0]
-        return tf.concat([ny0, nx0, ny0 + n_h, nx0 + n_w], axis=1)
+        return tf.concat([ny0, nx0, ny0 + n_h, nx0 + n_w], axis=1)  # pylint: disable=E1123
     image_shape = tf.shape(image)[2:]
@@ -213,8 +213,8 @@ class RPNAnchors(namedtuple("_RPNAnchors", ["boxes", "gt_labels", "gt_boxes"])):
         Slice anchors to the spatial size of this feature map.
         """
         shape2d = tf.shape(featuremap)[2:]  # h,w
-        slice3d = tf.concat([shape2d, [-1]], axis=0)
-        slice4d = tf.concat([shape2d, [-1, -1]], axis=0)
+        slice3d = tf.concat([shape2d, [-1]], axis=0)  # pylint: disable=E1123
+        slice4d = tf.concat([shape2d, [-1, -1]], axis=0)  # pylint: disable=E1123
         boxes = tf.slice(self.boxes, [0, 0, 0, 0], slice4d)
         gt_labels = tf.slice(self.gt_labels, [0, 0, 0], slice3d)
         gt_boxes = tf.slice(self.gt_boxes, [0, 0, 0, 0], slice4d)

deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py CHANGED Viewed

@@ -151,9 +151,9 @@ def multilevel_roi_align(features, rcnn_boxes, resolution, fpn_anchor_strides):
             all_rois.append(roi_align(featuremap, boxes_on_featuremap, resolution))
     # this can fail if using TF<=1.8 with MKL build
-    all_rois = tf.concat(all_rois, axis=0)  # NCHW
+    all_rois = tf.concat(all_rois, axis=0)  # NCHW    # pylint: disable=E1123
     # Unshuffle to the original order, to match the original samples
-    level_id_perm = tf.concat(level_ids, axis=0)  # A permutation of 1~N
+    level_id_perm = tf.concat(level_ids, axis=0)  # A permutation of 1~N   # pylint: disable=E1123
     level_id_invert_perm = tf.math.invert_permutation(level_id_perm)
     all_rois = tf.gather(all_rois, level_id_invert_perm, name="output")
     return all_rois
@@ -258,8 +258,8 @@ def generate_fpn_proposals(
                 all_boxes.append(proposal_boxes)
                 all_scores.append(proposal_scores)
-        proposal_boxes = tf.concat(all_boxes, axis=0)  # nx4
-        proposal_scores = tf.concat(all_scores, axis=0)  # n
+        proposal_boxes = tf.concat(all_boxes, axis=0)  # nx4  # pylint: disable=E1123
+        proposal_scores = tf.concat(all_scores, axis=0)  # n  # pylint: disable=E1123
         # Here we are different from Detectron.
         # Detectron picks top-k within the batch, rather than within an image, however we do not have a batch.
         proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_top_k)
@@ -271,8 +271,8 @@ def generate_fpn_proposals(
                 pred_boxes_decoded = multilevel_pred_boxes[lvl]
                 all_boxes.append(tf.reshape(pred_boxes_decoded, [-1, 4]))
                 all_scores.append(tf.reshape(multilevel_label_logits[lvl], [-1]))
-        all_boxes = tf.concat(all_boxes, axis=0)
-        all_scores = tf.concat(all_scores, axis=0)
+        all_boxes = tf.concat(all_boxes, axis=0)  # pylint: disable=E1123
+        all_scores = tf.concat(all_scores, axis=0)  # pylint: disable=E1123
         proposal_boxes, proposal_scores = generate_rpn_proposals(
             all_boxes,
             all_scores,

deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.32py3-none-any.whl → 0.34py3-none-any.whl