PyPI - deepdoctection - Versions diffs - 0.31__py3-none-any.whl → 0.33__py3-none-any.whl - Mend

deepdoctection 0.31py3-none-any.whl → 0.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (131) hide show

deepdoctection/__init__.py +16 -29
deepdoctection/analyzer/dd.py +70 -59
deepdoctection/configs/conf_dd_one.yaml +34 -31
deepdoctection/dataflow/common.py +9 -5
deepdoctection/dataflow/custom.py +5 -5
deepdoctection/dataflow/custom_serialize.py +75 -18
deepdoctection/dataflow/parallel_map.py +3 -3
deepdoctection/dataflow/serialize.py +4 -4
deepdoctection/dataflow/stats.py +3 -3
deepdoctection/datapoint/annotation.py +41 -56
deepdoctection/datapoint/box.py +9 -8
deepdoctection/datapoint/convert.py +6 -6
deepdoctection/datapoint/image.py +56 -44
deepdoctection/datapoint/view.py +245 -150
deepdoctection/datasets/__init__.py +1 -4
deepdoctection/datasets/adapter.py +35 -26
deepdoctection/datasets/base.py +14 -12
deepdoctection/datasets/dataflow_builder.py +3 -3
deepdoctection/datasets/info.py +24 -26
deepdoctection/datasets/instances/doclaynet.py +51 -51
deepdoctection/datasets/instances/fintabnet.py +46 -46
deepdoctection/datasets/instances/funsd.py +25 -24
deepdoctection/datasets/instances/iiitar13k.py +13 -10
deepdoctection/datasets/instances/layouttest.py +4 -3
deepdoctection/datasets/instances/publaynet.py +5 -5
deepdoctection/datasets/instances/pubtables1m.py +24 -21
deepdoctection/datasets/instances/pubtabnet.py +32 -30
deepdoctection/datasets/instances/rvlcdip.py +30 -30
deepdoctection/datasets/instances/xfund.py +26 -26
deepdoctection/datasets/save.py +6 -6
deepdoctection/eval/__init__.py +1 -4
deepdoctection/eval/accmetric.py +32 -33
deepdoctection/eval/base.py +8 -9
deepdoctection/eval/cocometric.py +15 -13
deepdoctection/eval/eval.py +41 -37
deepdoctection/eval/tedsmetric.py +30 -23
deepdoctection/eval/tp_eval_callback.py +16 -19
deepdoctection/extern/__init__.py +2 -7
deepdoctection/extern/base.py +339 -134
deepdoctection/extern/d2detect.py +85 -113
deepdoctection/extern/deskew.py +14 -11
deepdoctection/extern/doctrocr.py +141 -130
deepdoctection/extern/fastlang.py +27 -18
deepdoctection/extern/hfdetr.py +71 -62
deepdoctection/extern/hflayoutlm.py +504 -211
deepdoctection/extern/hflm.py +230 -0
deepdoctection/extern/model.py +488 -302
deepdoctection/extern/pdftext.py +23 -19
deepdoctection/extern/pt/__init__.py +1 -3
deepdoctection/extern/pt/nms.py +6 -2
deepdoctection/extern/pt/ptutils.py +29 -19
deepdoctection/extern/tessocr.py +39 -38
deepdoctection/extern/texocr.py +18 -18
deepdoctection/extern/tp/tfutils.py +57 -9
deepdoctection/extern/tp/tpcompat.py +21 -14
deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
deepdoctection/extern/tpdetect.py +45 -53
deepdoctection/mapper/__init__.py +3 -8
deepdoctection/mapper/cats.py +27 -29
deepdoctection/mapper/cocostruct.py +10 -10
deepdoctection/mapper/d2struct.py +27 -26
deepdoctection/mapper/hfstruct.py +13 -8
deepdoctection/mapper/laylmstruct.py +178 -37
deepdoctection/mapper/maputils.py +12 -11
deepdoctection/mapper/match.py +2 -2
deepdoctection/mapper/misc.py +11 -9
deepdoctection/mapper/pascalstruct.py +4 -4
deepdoctection/mapper/prodigystruct.py +5 -5
deepdoctection/mapper/pubstruct.py +84 -92
deepdoctection/mapper/tpstruct.py +5 -5
deepdoctection/mapper/xfundstruct.py +33 -33
deepdoctection/pipe/__init__.py +1 -1
deepdoctection/pipe/anngen.py +12 -14
deepdoctection/pipe/base.py +52 -106
deepdoctection/pipe/common.py +72 -59
deepdoctection/pipe/concurrency.py +16 -11
deepdoctection/pipe/doctectionpipe.py +24 -21
deepdoctection/pipe/language.py +20 -25
deepdoctection/pipe/layout.py +20 -16
deepdoctection/pipe/lm.py +75 -105
deepdoctection/pipe/order.py +194 -89
deepdoctection/pipe/refine.py +111 -124
deepdoctection/pipe/segment.py +156 -161
deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
deepdoctection/pipe/text.py +37 -36
deepdoctection/pipe/transform.py +19 -16
deepdoctection/train/__init__.py +6 -12
deepdoctection/train/d2_frcnn_train.py +48 -41
deepdoctection/train/hf_detr_train.py +41 -30
deepdoctection/train/hf_layoutlm_train.py +153 -135
deepdoctection/train/tp_frcnn_train.py +32 -31
deepdoctection/utils/concurrency.py +1 -1
deepdoctection/utils/context.py +13 -6
deepdoctection/utils/develop.py +4 -4
deepdoctection/utils/env_info.py +87 -125
deepdoctection/utils/file_utils.py +6 -11
deepdoctection/utils/fs.py +22 -18
deepdoctection/utils/identifier.py +2 -2
deepdoctection/utils/logger.py +16 -15
deepdoctection/utils/metacfg.py +7 -7
deepdoctection/utils/mocks.py +93 -0
deepdoctection/utils/pdf_utils.py +11 -11
deepdoctection/utils/settings.py +185 -181
deepdoctection/utils/tqdm.py +1 -1
deepdoctection/utils/transform.py +14 -9
deepdoctection/utils/types.py +104 -0
deepdoctection/utils/utils.py +7 -7
deepdoctection/utils/viz.py +74 -72
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
deepdoctection-0.33.dist-info/RECORD +146 -0
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
deepdoctection/utils/detection_types.py +0 -68
deepdoctection-0.31.dist-info/RECORD +0 -144
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0

deepdoctection/utils/env_info.py CHANGED Viewed

@@ -46,16 +46,16 @@ can store an (absolute) path to a `.jsonl` file.
 """
-import ast
 import importlib
 import os
 import re
 import subprocess
 import sys
 from collections import defaultdict
-from typing import List, Literal, Optional, Tuple
+from typing import Optional
 import numpy as np
+from packaging import version
 from tabulate import tabulate
 from .file_utils import (
@@ -68,6 +68,7 @@ from .file_utils import (
     fasttext_available,
     get_poppler_version,
     get_tesseract_version,
+    get_tf_version,
     jdeskew_available,
     lxml_available,
     opencv_available,
@@ -85,17 +86,14 @@ from .file_utils import (
     wandb_available,
 )
 from .logger import LoggingRecord, logger
+from .types import KeyValEnvInfos, PathLikeOrStr
-__all__ = [
-    "collect_torch_env",
-    "collect_env_info",
-    "get_device",
-    "auto_select_lib_and_device",
-    "auto_select_viz_library",
-]
+__all__ = ["collect_env_info", "auto_select_viz_library", "ENV_VARS_TRUE"]
 # pylint: disable=import-outside-toplevel
+ENV_VARS_TRUE: set[str] = {"1", "True", "TRUE", "true", "yes"}
 def collect_torch_env() -> str:
     """Wrapper for torch.utils.collect_env.get_pretty_env_info"""
@@ -110,7 +108,7 @@ def collect_torch_env() -> str:
         return get_pretty_env_info()
-def collect_installed_dependencies(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
+def collect_installed_dependencies(data: KeyValEnvInfos) -> KeyValEnvInfos:
     """Collect installed dependencies for all third party libraries.
     :param data: A list of tuples to dump all collected package information such as the name and the version
@@ -235,7 +233,7 @@ def collect_installed_dependencies(data: List[Tuple[str, str]]) -> List[Tuple[st
     return data
-def detect_compute_compatibility(cuda_home: Optional[str], so_file: Optional[str]) -> str:
+def detect_compute_compatibility(cuda_home: Optional[PathLikeOrStr], so_file: Optional[PathLikeOrStr]) -> str:
     """
     Detect the compute compatibility of a CUDA library.
@@ -261,7 +259,7 @@ def detect_compute_compatibility(cuda_home: Optional[str], so_file: Optional[str
 # Copied from https://github.com/tensorpack/tensorpack/blob/master/tensorpack/tfutils/collect_env.py
-def tf_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
+def tf_info(data: KeyValEnvInfos) -> KeyValEnvInfos:
     """Returns a list of (key, value) pairs containing tensorflow information.
     :param data: A list of tuples to dump all collected package information such as the name and the version
@@ -270,21 +268,42 @@ def tf_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     if tf_available():
         import tensorflow as tf  # type: ignore # pylint: disable=E0401
+        os.environ["TENSORFLOW_AVAILABLE"] = "1"
         data.append(("Tensorflow", tf.__version__))
+        if version.parse(get_tf_version()) > version.parse("2.4.1"):
+            os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
+        try:
+            import tensorflow.python.util.deprecation as deprecation  # type: ignore # pylint: disable=E0401,R0402,E0611
+            deprecation._PRINT_DEPRECATION_WARNINGS = False  # pylint: disable=W0212
+        except Exception:  # pylint: disable=W0703
+            try:
+                from tensorflow.python.util import deprecation  # type: ignore # pylint: disable=E0401,E0611
+                deprecation._PRINT_DEPRECATION_WARNINGS = False  # pylint: disable=W0212
+            except Exception:  # pylint: disable=W0703
+                pass
     else:
         data.append(("Tensorflow", "None"))
         return data
-    from tensorflow.python.platform import build_info  # type: ignore # pylint: disable=E0401
+    from tensorflow.python.platform import build_info  # type: ignore # pylint: disable=E0401,E0611
     try:
         for key, value in list(build_info.build_info.items()):
-            if key == "cuda_version":
+            if key == "is_cuda_build":
+                data.append(("TF compiled with CUDA", value))
+                if value and len(tf.config.list_physical_devices("GPU")):
+                    os.environ["USE_CUDA"] = "1"
+            elif key == "cuda_version":
                 data.append(("TF built with CUDA", value))
             elif key == "cudnn_version":
                 data.append(("TF built with CUDNN", value))
             elif key == "cuda_compute_capabilities":
                 data.append(("TF compute capabilities", ",".join([k.replace("compute_", "") for k in value])))
+            elif key == "is_rocm_build":
+                data.append(("TF compiled with ROCM", value))
         return data
     except AttributeError:
         pass
@@ -297,7 +316,7 @@ def tf_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
 # Heavily inspired by https://github.com/facebookresearch/detectron2/blob/main/detectron2/utils/collect_env.py
-def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
+def pt_info(data: KeyValEnvInfos) -> KeyValEnvInfos:
     """Returns a list of (key, value) pairs containing Pytorch information.
     :param data: A list of tuples to dump all collected package information such as the name and the version
@@ -306,6 +325,13 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     if pytorch_available():
         import torch
+        os.environ["PYTORCH_AVAILABLE"] = "1"
+    else:
+        data.append(("PyTorch", "None"))
+        return []
     has_gpu = torch.cuda.is_available()  # true for both CUDA & ROCM
     has_mps = torch.backends.mps.is_available()
@@ -331,12 +357,9 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     data.append(("PyTorch", torch_version + " @" + os.path.dirname(torch.__file__)))
     data.append(("PyTorch debug build", str(torch.version.debug)))
-    if not has_gpu:
-        has_gpu_text = "No: torch.cuda.is_available() == False"
-    else:
-        has_gpu_text = "Yes"
-    data.append(("GPU available", has_gpu_text))
     if has_gpu:
+        os.environ["USE_CUDA"] = "1"
+        has_gpu_text = "Yes"
         devices = defaultdict(list)
         for k in range(torch.cuda.device_count()):
             cap = ".".join((str(x) for x in torch.cuda.get_device_capability(k)))
@@ -362,6 +385,10 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
             cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
             if cuda_arch_list:
                 data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list))
+    else:
+        has_gpu_text = "No: torch.cuda.is_available() == False"
+    data.append(("GPU available", has_gpu_text))
     mps_build = "No: torch.backends.mps.is_built() == False"
     if not has_mps:
@@ -369,9 +396,11 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     else:
         has_mps_text = "Yes"
         mps_build = str(torch.backends.mps.is_built())
+        if mps_build == "True":
+            os.environ["USE_MPS"] = "1"
     data.append(("MPS available", has_mps_text))
-    data.append(("MPS available", mps_build))
+    data.append(("MPS built", mps_build))
     try:
         import torchvision  # type: ignore
@@ -395,6 +424,42 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     return data
+def set_dl_env_vars() -> None:
+    """Set the environment variables that steer the selection of the DL framework.
+    If both PyTorch and TensorFlow are available, PyTorch will be selected by default.
+    It is possible that for testing purposes, e.g. on Colab you can find yourself with a pre-installed Tensorflow
+    version. If you want to enforce PyTorch you must set:
+    os.environ["DD_USE_TORCH"] = "1"
+    os.environ["USE_TORCH"] = "1"      # necessary if you make use of DocTr's OCR engine
+    os.environ["DD_USE_TF"] = "0"
+    os.environ["USE_TF"] = "0"      # it's better to explcitly disable Tensorflow
+    """
+    if os.environ.get("PYTORCH_AVAILABLE") and os.environ.get("DD_USE_TORCH") is None:
+        os.environ["DD_USE_TORCH"] = "1"
+        os.environ["USE_TORCH"] = "1"
+    if os.environ.get("TENSORFLOW_AVAILABLE") and os.environ.get("DD_USE_TF") is None:
+        os.environ["DD_USE_TF"] = "1"
+        os.environ["USE_TF"] = "1"
+    if os.environ.get("DD_USE_TORCH", "0") in ENV_VARS_TRUE and os.environ.get("DD_USE_TF", "0") in ENV_VARS_TRUE:
+        logger.warning(
+            "Both DD_USE_TORCH and DD_USE_TF are set. Defaulting to PyTorch. If you want a different "
+            "behaviour, set DD_USE_TORCH to None before importing deepdoctection."
+        )
+        os.environ["DD_USE_TF"] = "0"
+        os.environ["USE_TF"] = "0"
+    if (
+        os.environ.get("PYTORCH_AVAILABLE") not in ENV_VARS_TRUE
+        and os.environ.get("TENSORFLOW_AVAILABLE") not in ENV_VARS_TRUE
+    ):
+        logger.warning(LoggingRecord(msg="Neither Tensorflow or Pytorch are available."))
 def collect_env_info() -> str:
     """
@@ -441,6 +506,7 @@ def collect_env_info() -> str:
     data = pt_info(data)
     data = tf_info(data)
+    set_dl_env_vars()
     data = collect_installed_dependencies(data)
@@ -452,110 +518,6 @@ def collect_env_info() -> str:
     return env_str
-def set_env(name: str, value: str) -> None:
-    """
-    Set an environment variable if it is not already set.
-    :param name: The name of the environment variable
-    :param value: The value of the environment variable
-    """
-    if os.environ.get(name):
-        return
-    os.environ[name] = value
-    return
-def auto_select_lib_and_device() -> None:
-    """
-    Select the DL library and subsequently the device.
-    This will set environment variable `USE_TENSORFLOW`, `USE_PYTORCH` and `USE_CUDA`
-    If TF is available, use TF unless a GPU is not available, in which case choose PT. If CUDA is not available and PT
-    is not installed raise ImportError.
-    """
-    # USE_TF and USE_TORCH are env variables that steer DL library selection for Doctr.
-    if tf_available() and tensorpack_available():
-        from tensorpack.utils.gpu import get_num_gpu  # pylint: disable=E0401
-        if get_num_gpu() >= 1:
-            set_env("USE_TENSORFLOW", "True")
-            set_env("USE_PYTORCH", "False")
-            set_env("USE_CUDA", "True")
-            set_env("USE_MPS", "False")
-            set_env("USE_TF", "TRUE")
-            set_env("USE_TORCH", "False")
-            return
-        if pytorch_available():
-            set_env("USE_TENSORFLOW", "False")
-            set_env("USE_PYTORCH", "True")
-            set_env("USE_CUDA", "False")
-            set_env("USE_TF", "False")
-            set_env("USE_TORCH", "TRUE")
-            return
-        logger.warning(
-            LoggingRecord("You have Tensorflow installed but no GPU is available. All Tensorflow models require a GPU.")
-        )
-    if tf_available():
-        set_env("USE_TENSORFLOW", "False")
-        set_env("USE_PYTORCH", "False")
-        set_env("USE_CUDA", "False")
-        set_env("USE_TF", "AUTO")
-        set_env("USE_TORCH", "AUTO")
-        return
-    if pytorch_available():
-        import torch
-        if torch.cuda.is_available():
-            set_env("USE_TENSORFLOW", "False")
-            set_env("USE_PYTORCH", "True")
-            set_env("USE_CUDA", "True")
-            set_env("USE_TF", "False")
-            set_env("USE_TORCH", "TRUE")
-            return
-        if torch.backends.mps.is_available():
-            set_env("USE_TENSORFLOW", "False")
-            set_env("USE_PYTORCH", "True")
-            set_env("USE_CUDA", "False")
-            set_env("USE_MPS", "True")
-            set_env("USE_TF", "False")
-            set_env("USE_TORCH", "TRUE")
-            return
-        set_env("USE_TENSORFLOW", "False")
-        set_env("USE_PYTORCH", "True")
-        set_env("USE_CUDA", "False")
-        set_env("USE_MPS", "False")
-        set_env("USE_TF", "AUTO")
-        set_env("USE_TORCH", "AUTO")
-        return
-    logger.warning(
-        LoggingRecord(
-            "Neither Tensorflow or Pytorch are available. You will not be able to use any Deep Learning "
-            "model from the library."
-        )
-    )
-def get_device(ignore_cpu: bool = True) -> Literal["cuda", "mps", "cpu"]:
-    """
-    Device checks for running PyTorch with CUDA, MPS or optionall CPU.
-    If nothing can be found and if `disable_cpu` is deactivated it will raise a `ValueError`
-    :param ignore_cpu: Will not consider `cpu` as valid return value
-    :return: Either cuda or mps
-    """
-    if ast.literal_eval(os.environ.get("USE_CUDA", "True")):
-        return "cuda"
-    if ast.literal_eval(os.environ.get("USE_MPS", "True")):
-        return "mps"
-    if not ignore_cpu:
-        return "cpu"
-    raise RuntimeWarning("Could not find either GPU nor MPS")
 def auto_select_viz_library() -> None:
     """Setting PIL as default image library if cv2 is not installed"""

deepdoctection/utils/file_utils.py CHANGED Viewed

@@ -16,15 +16,15 @@ import sys
 from os import environ, path
 from shutil import which
 from types import ModuleType
-from typing import Any, Tuple, Union, no_type_check
+from typing import Any, Union, no_type_check
 import importlib_metadata
 from packaging import version
-from .detection_types import Requirement
 from .error import DependencyError
 from .logger import LoggingRecord, logger
 from .metacfg import AttrDict
+from .types import PathLikeOrStr, Requirement
 _GENERIC_ERR_MSG = "Please check the required version either in the docs or in the setup file"
@@ -52,7 +52,7 @@ def get_tf_version() -> str:
     """
     tf_version = "0.0"
     if tf_available():
-        candidates: Tuple[str, ...] = (
+        candidates: tuple[str, ...] = (
             "tensorflow",
             "tensorflow-cpu",
             "tensorflow-gpu",
@@ -250,31 +250,26 @@ def get_detectron2_requirement() -> Requirement:
 # Tesseract related dependencies
 _TESS_AVAILABLE = which("tesseract") is not None
 # Tesseract installation path
-_TESS_PATH = "tesseract"
+_TESS_PATH: PathLikeOrStr = "tesseract"
 _TESS_ERR_MSG = (
     "Tesseract >=4.0 must be installed. Please follow the official installation instructions. "
     "https://tesseract-ocr.github.io/tessdoc/Installation.html"
 )
-def set_tesseract_path(tesseract_path: str) -> None:
+def set_tesseract_path(tesseract_path: PathLikeOrStr) -> None:
     """Set the Tesseract path. If you have tesseract installed in Anaconda,
        you can use this function to set tesseract path.
     :param tesseract_path: Tesseract installation path.
     """
-    if tesseract_path is None:
-        raise TypeError("tesseract_path cannot be None")
     global _TESS_AVAILABLE  # pylint: disable=W0603
     global _TESS_PATH  # pylint: disable=W0603
     tesseract_flag = which(tesseract_path)
-    if tesseract_flag is None:
-        _TESS_AVAILABLE = False
-    else:
-        _TESS_AVAILABLE = True
+    _TESS_AVAILABLE = False if tesseract_flag is not None else True  # pylint: disable=W0603,R1719
     _TESS_PATH = tesseract_path

deepdoctection/utils/fs.py CHANGED Viewed

@@ -28,12 +28,12 @@ from pathlib import Path
 from typing import Callable, Literal, Optional, Protocol, Union, overload
 from urllib.request import urlretrieve
-from .detection_types import ImageType, JsonDict, Pathlike
 from .develop import deprecated
 from .logger import LoggingRecord, logger
 from .pdf_utils import get_pdf_file_reader, get_pdf_file_writer
 from .settings import CONFIGS, DATASET_DIR, MODEL_DIR, PATH
 from .tqdm import get_tqdm
+from .types import B64, B64Str, JsonDict, PathLikeOrStr, PixelValues
 from .utils import is_file_extension
 from .viz import viz_handler
@@ -66,7 +66,7 @@ def sizeof_fmt(num: float, suffix: str = "B") -> str:
 # Copyright (c) Tensorpack Contributors
 # Licensed under the Apache License, Version 2.0 (the "License")
-def mkdir_p(dir_name: Pathlike) -> None:
+def mkdir_p(dir_name: PathLikeOrStr) -> None:
     """
     Like "mkdir -p", make a dir recursively, but do nothing if the dir exists
@@ -84,7 +84,9 @@ def mkdir_p(dir_name: Pathlike) -> None:
 # Copyright (c) Tensorpack Contributors
 # Licensed under the Apache License, Version 2.0 (the "License")
-def download(url: str, directory: Pathlike, file_name: Optional[str] = None, expect_size: Optional[int] = None) -> str:
+def download(
+    url: str, directory: PathLikeOrStr, file_name: Optional[str] = None, expect_size: Optional[int] = None
+) -> str:
     """
     Download URL to a directory. Will figure out the filename automatically from URL, if not given.
     """
@@ -133,16 +135,18 @@ def download(url: str, directory: Pathlike, file_name: Optional[str] = None, exp
 @overload
-def load_image_from_file(path: Pathlike, type_id: Literal["np"] = "np") -> Optional[ImageType]:
+def load_image_from_file(path: PathLikeOrStr, type_id: Literal["np"] = "np") -> Optional[PixelValues]:
     ...
 @overload
-def load_image_from_file(path: Pathlike, type_id: Literal["b64"]) -> Optional[str]:
+def load_image_from_file(path: PathLikeOrStr, type_id: Literal["b64"]) -> Optional[B64Str]:
     ...
-def load_image_from_file(path: Pathlike, type_id: Literal["np", "b64"] = "np") -> Optional[Union[str, ImageType]]:
+def load_image_from_file(
+    path: PathLikeOrStr, type_id: Literal["np", "b64"] = "np"
+) -> Optional[Union[B64Str, PixelValues]]:
     """
     Loads an image from path and passes back an encoded base64 string, a numpy array or None if file is not found
     or a conversion error occurs.
@@ -151,7 +155,7 @@ def load_image_from_file(path: Pathlike, type_id: Literal["np", "b64"] = "np") -
     :param type_id:  "np" or "b64".
     :return: image of desired representation
     """
-    image: Optional[Union[str, ImageType]] = None
+    image: Optional[Union[str, PixelValues]] = None
     path = path.as_posix() if isinstance(path, Path) else path
     assert is_file_extension(path, [".png", ".jpeg", ".jpg", ".tif"]), f"image type not allowed: {path}"
@@ -169,7 +173,7 @@ def load_image_from_file(path: Pathlike, type_id: Literal["np", "b64"] = "np") -
     return image
-def load_bytes_from_pdf_file(path: Pathlike, page_number: int = 0) -> bytes:
+def load_bytes_from_pdf_file(path: PathLikeOrStr, page_number: int = 0) -> B64:
     """
     Loads a pdf file with one single page and passes back a bytes' representation of this file. Can be converted into
     a numpy or directly passed to the attr: image of Image.
@@ -194,13 +198,13 @@ class LoadImageFunc(Protocol):
     Protocol for typing load_image_from_file
     """
-    def __call__(self, path: Pathlike) -> Optional[ImageType]:
+    def __call__(self, path: PathLikeOrStr) -> Optional[PixelValues]:
         ...
 def get_load_image_func(
-    path: Pathlike,
-) -> Union[LoadImageFunc, Callable[[Pathlike], bytes]]:
+    path: PathLikeOrStr,
+) -> Union[LoadImageFunc, Callable[[PathLikeOrStr], B64]]:
     """
     Return the loading function according to its file extension.
@@ -219,7 +223,7 @@ def get_load_image_func(
     )
-def maybe_path_or_pdf(path: Pathlike) -> int:
+def maybe_path_or_pdf(path: PathLikeOrStr) -> int:
     """
     Checks if the path points to a directory or a pdf document. Returns 1 if the path points to a directory, 2
     if the path points to a pdf doc or 0, if none of the previous is true.
@@ -238,7 +242,7 @@ def maybe_path_or_pdf(path: Pathlike) -> int:
     return 0
-def load_json(path_ann: Pathlike) -> JsonDict:
+def load_json(path_ann: PathLikeOrStr) -> JsonDict:
     """
     Loading json file
@@ -250,28 +254,28 @@ def load_json(path_ann: Pathlike) -> JsonDict:
     return json_dict
-def get_package_path() -> Path:
+def get_package_path() -> PathLikeOrStr:
     """
     :return: full base path of this package
     """
     return PATH
-def get_weights_dir_path() -> Path:
+def get_weights_dir_path() -> PathLikeOrStr:
     """
     :return: full base path to the model dir
     """
     return MODEL_DIR
-def get_configs_dir_path() -> Path:
+def get_configs_dir_path() -> PathLikeOrStr:
     """
     :return: full base path to the configs dir
     """
     return CONFIGS
-def get_dataset_dir_path() -> Path:
+def get_dataset_dir_path() -> PathLikeOrStr:
     """
     :return: full base path to the dataset dir
     """
@@ -279,7 +283,7 @@ def get_dataset_dir_path() -> Path:
 @deprecated("Use pathlib operations instead", "2022-06-08")
-def sub_path(anchor_dir: str, *paths: str) -> str:
+def sub_path(anchor_dir: PathLikeOrStr, *paths: PathLikeOrStr) -> PathLikeOrStr:
     """
     Generate a path from the anchor directory and various paths args.

deepdoctection/utils/identifier.py CHANGED Viewed

@@ -21,7 +21,7 @@ Methods for generating and checking uuids
 import hashlib
 import uuid
-from .detection_types import Pathlike
+from .types import PathLikeOrStr
 __all__ = ["is_uuid_like", "get_uuid_from_str", "get_uuid"]
@@ -65,7 +65,7 @@ def get_uuid(*inputs: str) -> str:
     return get_uuid_from_str(str_input)
-def get_md5_hash(path: Pathlike, buffer_size: int = 65536) -> str:
+def get_md5_hash(path: PathLikeOrStr, buffer_size: int = 65536) -> str:
     """
     Calculate a md5 hash for a given file

deepdoctection/utils/logger.py CHANGED Viewed

@@ -25,7 +25,6 @@ Log levels can be set via the environment variable `LOG_LEVEL` (default: INFO).
 `STD_OUT_VERBOSE` will print a verbose message to the terminal (default: False).
 """
-import ast
 import errno
 import functools
 import json
@@ -37,21 +36,23 @@ import sys
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Dict, Optional, Union, no_type_check
+from typing import Any, Optional, Union, no_type_check
 from termcolor import colored
-from .detection_types import Pathlike
+from .types import PathLikeOrStr
 __all__ = ["logger", "set_logger_dir", "auto_set_dir", "get_logger_dir"]
+ENV_VARS_TRUE: set[str] = {"1", "True", "TRUE", "true", "yes"}
 @dataclass
 class LoggingRecord:
     """LoggingRecord to pass to the logger in order to distinguish from third party libraries."""
     msg: str
-    log_dict: Optional[Dict[Union[int, str], Any]] = field(default=None)
+    log_dict: Optional[dict[Union[int, str], Any]] = field(default=None)
     def __post_init__(self) -> None:
         """log_dict will be added to the log record as a dict."""
@@ -66,7 +67,7 @@ class LoggingRecord:
 class CustomFilter(logging.Filter):
     """A custom filter"""
-    filter_third_party_lib = ast.literal_eval(os.environ.get("FILTER_THIRD_PARTY_LIB", "False"))
+    filter_third_party_lib = os.environ.get("FILTER_THIRD_PARTY_LIB", "False") in ENV_VARS_TRUE
     def filter(self, record: logging.LogRecord) -> bool:
         if self.filter_third_party_lib:
@@ -79,7 +80,7 @@ class CustomFilter(logging.Filter):
 class StreamFormatter(logging.Formatter):
     """A custom formatter to produce unified LogRecords"""
-    std_out_verbose = ast.literal_eval(os.environ.get("STD_OUT_VERBOSE", "False"))
+    std_out_verbose = os.environ.get("STD_OUT_VERBOSE", "False") in ENV_VARS_TRUE
     @no_type_check
     def format(self, record: logging.LogRecord) -> str:
@@ -109,7 +110,7 @@ class StreamFormatter(logging.Formatter):
 class FileFormatter(logging.Formatter):
     """A custom formatter to produce a loggings in json format"""
-    filter_third_party_lib = ast.literal_eval(os.environ.get("FILTER_THIRD_PARTY_LIB", "False"))
+    filter_third_party_lib = os.environ.get("FILTER_THIRD_PARTY_LIB", "False") in ENV_VARS_TRUE
     @no_type_check
     def format(self, record: logging.LogRecord) -> str:
@@ -132,8 +133,9 @@ class FileFormatter(logging.Formatter):
 _LOG_DIR = None
-_CONFIG_DICT: Dict[str, Any] = {
+_CONFIG_DICT: dict[str, Any] = {
     "version": 1,
+    "disable_existing_loggers": False,
     "filters": {"customfilter": {"()": lambda: CustomFilter()}},  # pylint: disable=W0108
     "formatters": {
         "streamformatter": {"()": lambda: StreamFormatter(datefmt="%m%d %H:%M.%S")},
@@ -144,7 +146,7 @@ _CONFIG_DICT: Dict[str, Any] = {
     "root": {
         "handlers": ["streamhandler"],
         "level": os.environ.get("LOG_LEVEL", "INFO"),
-        "propagate": ast.literal_eval(os.environ.get("LOG_PROPAGATE", "False")),
+        "propagate": os.environ.get("LOG_PROPAGATE", "False") in ENV_VARS_TRUE,
     },
 }
@@ -170,9 +172,8 @@ def _get_time_str() -> str:
     return datetime.now().strftime("%m%d-%H%M%S")
-def _set_file(path: Pathlike) -> None:
-    if isinstance(path, Path):
-        path = path.as_posix()
+def _set_file(path: PathLikeOrStr) -> None:
+    path = os.fspath(path)
     global _FILE_HANDLER  # pylint: disable=W0603
     if os.path.isfile(path):
         backup_name = path + "." + _get_time_str()
@@ -187,7 +188,7 @@ def _set_file(path: Pathlike) -> None:
     logger.info("Argv: %s ", sys.argv)
-def set_logger_dir(dir_name: Pathlike, action: Optional[str] = None) -> None:
+def set_logger_dir(dir_name: PathLikeOrStr, action: Optional[str] = None) -> None:
     """
     Set the directory for global logging.
@@ -212,7 +213,7 @@ def set_logger_dir(dir_name: Pathlike, action: Optional[str] = None) -> None:
         logger.removeHandler(_FILE_HANDLER)
         del _FILE_HANDLER
-    def dir_nonempty(directory: str) -> int:
+    def dir_nonempty(directory: PathLikeOrStr) -> int:
         return os.path.isdir(directory) and len([x for x in os.listdir(directory) if x[0] != "."])
     if dir_nonempty(dir_name):
@@ -266,7 +267,7 @@ def auto_set_dir(action: Optional[str] = None, name: Optional[str] = None) -> No
     set_logger_dir(auto_dir_name, action=action)
-def get_logger_dir() -> Optional[str]:
+def get_logger_dir() -> Optional[PathLikeOrStr]:
     """
     The logger directory, or None if not set.
     The directory is used for general logging, tensorboard events, checkpoints, etc.

deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.31py3-none-any.whl → 0.33py3-none-any.whl