PyPI - deepdoctection - Versions diffs - 0.30__py3-none-any.whl → 0.32__py3-none-any.whl - Mend

deepdoctection 0.30py3-none-any.whl → 0.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (120) hide show

deepdoctection/__init__.py +38 -29
deepdoctection/analyzer/dd.py +36 -29
deepdoctection/configs/conf_dd_one.yaml +34 -31
deepdoctection/dataflow/base.py +0 -19
deepdoctection/dataflow/custom.py +4 -3
deepdoctection/dataflow/custom_serialize.py +14 -5
deepdoctection/dataflow/parallel_map.py +12 -11
deepdoctection/dataflow/serialize.py +5 -4
deepdoctection/datapoint/annotation.py +35 -13
deepdoctection/datapoint/box.py +3 -5
deepdoctection/datapoint/convert.py +3 -1
deepdoctection/datapoint/image.py +79 -36
deepdoctection/datapoint/view.py +152 -49
deepdoctection/datasets/__init__.py +1 -4
deepdoctection/datasets/adapter.py +6 -3
deepdoctection/datasets/base.py +86 -11
deepdoctection/datasets/dataflow_builder.py +1 -1
deepdoctection/datasets/info.py +4 -4
deepdoctection/datasets/instances/doclaynet.py +3 -2
deepdoctection/datasets/instances/fintabnet.py +2 -1
deepdoctection/datasets/instances/funsd.py +2 -1
deepdoctection/datasets/instances/iiitar13k.py +5 -2
deepdoctection/datasets/instances/layouttest.py +4 -8
deepdoctection/datasets/instances/publaynet.py +2 -2
deepdoctection/datasets/instances/pubtables1m.py +6 -3
deepdoctection/datasets/instances/pubtabnet.py +2 -1
deepdoctection/datasets/instances/rvlcdip.py +2 -1
deepdoctection/datasets/instances/xfund.py +2 -1
deepdoctection/eval/__init__.py +1 -4
deepdoctection/eval/accmetric.py +1 -1
deepdoctection/eval/base.py +5 -4
deepdoctection/eval/cocometric.py +2 -1
deepdoctection/eval/eval.py +19 -15
deepdoctection/eval/tedsmetric.py +14 -11
deepdoctection/eval/tp_eval_callback.py +14 -7
deepdoctection/extern/__init__.py +2 -7
deepdoctection/extern/base.py +39 -13
deepdoctection/extern/d2detect.py +182 -90
deepdoctection/extern/deskew.py +36 -9
deepdoctection/extern/doctrocr.py +265 -83
deepdoctection/extern/fastlang.py +49 -9
deepdoctection/extern/hfdetr.py +106 -55
deepdoctection/extern/hflayoutlm.py +441 -122
deepdoctection/extern/hflm.py +225 -0
deepdoctection/extern/model.py +56 -47
deepdoctection/extern/pdftext.py +10 -5
deepdoctection/extern/pt/__init__.py +1 -3
deepdoctection/extern/pt/nms.py +6 -2
deepdoctection/extern/pt/ptutils.py +27 -18
deepdoctection/extern/tessocr.py +134 -22
deepdoctection/extern/texocr.py +6 -2
deepdoctection/extern/tp/tfutils.py +43 -9
deepdoctection/extern/tp/tpcompat.py +14 -11
deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/config/config.py +9 -6
deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +17 -7
deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +9 -4
deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +16 -11
deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +17 -10
deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +14 -8
deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
deepdoctection/extern/tp/tpfrcnn/preproc.py +8 -9
deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
deepdoctection/extern/tpdetect.py +54 -30
deepdoctection/mapper/__init__.py +3 -8
deepdoctection/mapper/d2struct.py +9 -7
deepdoctection/mapper/hfstruct.py +7 -2
deepdoctection/mapper/laylmstruct.py +164 -21
deepdoctection/mapper/maputils.py +16 -3
deepdoctection/mapper/misc.py +6 -3
deepdoctection/mapper/prodigystruct.py +1 -1
deepdoctection/mapper/pubstruct.py +10 -10
deepdoctection/mapper/tpstruct.py +3 -3
deepdoctection/pipe/__init__.py +1 -1
deepdoctection/pipe/anngen.py +35 -8
deepdoctection/pipe/base.py +53 -19
deepdoctection/pipe/common.py +23 -13
deepdoctection/pipe/concurrency.py +2 -1
deepdoctection/pipe/doctectionpipe.py +2 -2
deepdoctection/pipe/language.py +3 -2
deepdoctection/pipe/layout.py +6 -3
deepdoctection/pipe/lm.py +34 -66
deepdoctection/pipe/order.py +142 -35
deepdoctection/pipe/refine.py +26 -24
deepdoctection/pipe/segment.py +21 -16
deepdoctection/pipe/{cell.py → sub_layout.py} +30 -9
deepdoctection/pipe/text.py +14 -8
deepdoctection/pipe/transform.py +16 -9
deepdoctection/train/__init__.py +6 -12
deepdoctection/train/d2_frcnn_train.py +36 -28
deepdoctection/train/hf_detr_train.py +26 -17
deepdoctection/train/hf_layoutlm_train.py +133 -111
deepdoctection/train/tp_frcnn_train.py +21 -19
deepdoctection/utils/__init__.py +3 -0
deepdoctection/utils/concurrency.py +1 -1
deepdoctection/utils/context.py +2 -2
deepdoctection/utils/env_info.py +41 -84
deepdoctection/utils/error.py +84 -0
deepdoctection/utils/file_utils.py +4 -15
deepdoctection/utils/fs.py +7 -7
deepdoctection/utils/logger.py +1 -0
deepdoctection/utils/mocks.py +93 -0
deepdoctection/utils/pdf_utils.py +5 -4
deepdoctection/utils/settings.py +6 -1
deepdoctection/utils/transform.py +1 -1
deepdoctection/utils/utils.py +0 -6
deepdoctection/utils/viz.py +48 -5
{deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/METADATA +57 -73
deepdoctection-0.32.dist-info/RECORD +146 -0
{deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/WHEEL +1 -1
deepdoctection-0.30.dist-info/RECORD +0 -143
{deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/LICENSE +0 -0
{deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/top_level.txt +0 -0

deepdoctection/utils/env_info.py CHANGED Viewed

@@ -46,7 +46,6 @@ can store an (absolute) path to a `.jsonl` file.
 """
-import ast
 import importlib
 import os
 import re
@@ -56,6 +55,7 @@ from collections import defaultdict
 from typing import List, Optional, Tuple
 import numpy as np
+from packaging import version
 from tabulate import tabulate
 from .file_utils import (
@@ -68,6 +68,7 @@ from .file_utils import (
     fasttext_available,
     get_poppler_version,
     get_tesseract_version,
+    get_tf_version,
     jdeskew_available,
     lxml_available,
     opencv_available,
@@ -84,13 +85,9 @@ from .file_utils import (
     transformers_available,
     wandb_available,
 )
-from .logger import LoggingRecord, logger
 __all__ = [
-    "collect_torch_env",
     "collect_env_info",
-    "get_device",
-    "auto_select_lib_and_device",
     "auto_select_viz_library",
 ]
@@ -270,7 +267,22 @@ def tf_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     if tf_available():
         import tensorflow as tf  # type: ignore # pylint: disable=E0401
+        os.environ["TENSORFLOW_AVAILABLE"] = "1"
         data.append(("Tensorflow", tf.__version__))
+        if version.parse(get_tf_version()) > version.parse("2.4.1"):
+            os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
+        try:
+            import tensorflow.python.util.deprecation as deprecation  # type: ignore # pylint: disable=E0401,R0402
+            deprecation._PRINT_DEPRECATION_WARNINGS = False  # pylint: disable=W0212
+        except Exception:  # pylint: disable=W0703
+            try:
+                from tensorflow.python.util import deprecation  # type: ignore # pylint: disable=E0401
+                deprecation._PRINT_DEPRECATION_WARNINGS = False  # pylint: disable=W0212
+            except Exception:  # pylint: disable=W0703
+                pass
     else:
         data.append(("Tensorflow", "None"))
         return data
@@ -279,12 +291,18 @@ def tf_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     try:
         for key, value in list(build_info.build_info.items()):
-            if key == "cuda_version":
+            if key == "is_cuda_build":
+                data.append(("TF compiled with CUDA", value))
+                if value and len(tf.config.list_physical_devices('GPU')):
+                    os.environ["USE_CUDA"] = "1"
+            elif key == "cuda_version":
                 data.append(("TF built with CUDA", value))
             elif key == "cudnn_version":
                 data.append(("TF built with CUDNN", value))
             elif key == "cuda_compute_capabilities":
                 data.append(("TF compute capabilities", ",".join([k.replace("compute_", "") for k in value])))
+            elif key == "is_rocm_build":
+                data.append(("TF compiled with ROCM", value))
         return data
     except AttributeError:
         pass
@@ -306,6 +324,13 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     if pytorch_available():
         import torch
+        os.environ["PYTORCH_AVAILABLE"] = "1"
+    else:
+        data.append(("PyTorch", "None"))
+        return []
     has_gpu = torch.cuda.is_available()  # true for both CUDA & ROCM
     has_mps = torch.backends.mps.is_available()
@@ -331,12 +356,9 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     data.append(("PyTorch", torch_version + " @" + os.path.dirname(torch.__file__)))
     data.append(("PyTorch debug build", str(torch.version.debug)))
-    if not has_gpu:
-        has_gpu_text = "No: torch.cuda.is_available() == False"
-    else:
-        has_gpu_text = "Yes"
-    data.append(("GPU available", has_gpu_text))
     if has_gpu:
+        os.environ["USE_CUDA"] = "1"
+        has_gpu_text = "Yes"
         devices = defaultdict(list)
         for k in range(torch.cuda.device_count()):
             cap = ".".join((str(x) for x in torch.cuda.get_device_capability(k)))
@@ -362,6 +384,10 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
             cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
             if cuda_arch_list:
                 data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list))
+    else:
+        has_gpu_text = "No: torch.cuda.is_available() == False"
+    data.append(("GPU available", has_gpu_text))
     mps_build = "No: torch.backends.mps.is_built() == False"
     if not has_mps:
@@ -369,9 +395,11 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     else:
         has_mps_text = "Yes"
         mps_build = str(torch.backends.mps.is_built())
+        if mps_build == "True":
+            os.environ["USE_MPS"] = "1"
     data.append(("MPS available", has_mps_text))
-    data.append(("MPS available", mps_build))
+    data.append(("MPS built", mps_build))
     try:
         import torchvision  # type: ignore
@@ -420,7 +448,7 @@ def collect_env_info() -> str:
     try:
         import prctl  # type: ignore
-        _ = prctl.set_pdeathsig  # noqa
+        _ = prctl.set_pdeathsig  # pylint: disable=E1101
     except ModuleNotFoundError:
         has_prctl = False
     data.append(("python-prctl", str(has_prctl)))
@@ -452,77 +480,6 @@ def collect_env_info() -> str:
     return env_str
-def auto_select_lib_and_device() -> None:
-    """
-    Select the DL library and subsequently the device.
-    This will set environment variable `USE_TENSORFLOW`, `USE_PYTORCH` and `USE_CUDA`
-    If TF is available, use TF unless a GPU is not available, in which case choose PT. If CUDA is not available and PT
-    is not installed raise ImportError.
-    """
-    if tf_available() and tensorpack_available():
-        from tensorpack.utils.gpu import get_num_gpu  # pylint: disable=E0401
-        if get_num_gpu() >= 1:
-            os.environ["USE_TENSORFLOW"] = "True"
-            os.environ["USE_PYTORCH"] = "False"
-            os.environ["USE_CUDA"] = "True"
-            os.environ["USE_MPS"] = "False"
-            return
-        if pytorch_available():
-            os.environ["USE_TENSORFLOW"] = "False"
-            os.environ["USE_PYTORCH"] = "True"
-            os.environ["USE_CUDA"] = "False"
-            return
-        logger.warning(
-            LoggingRecord("You have Tensorflow installed but no GPU is available. All Tensorflow models require a GPU.")
-        )
-    if pytorch_available():
-        import torch
-        if torch.cuda.is_available():
-            os.environ["USE_TENSORFLOW"] = "False"
-            os.environ["USE_PYTORCH"] = "True"
-            os.environ["USE_CUDA"] = "True"
-            return
-        if torch.backends.mps.is_available():
-            os.environ["USE_TENSORFLOW"] = "False"
-            os.environ["USE_PYTORCH"] = "True"
-            os.environ["USE_CUDA"] = "False"
-            os.environ["USE_MPS"] = "True"
-            return
-        os.environ["USE_TENSORFLOW"] = "False"
-        os.environ["USE_PYTORCH"] = "True"
-        os.environ["USE_CUDA"] = "False"
-        os.environ["USE_MPS"] = "False"
-        return
-    logger.warning(
-        LoggingRecord(
-            "Neither Tensorflow or Pytorch are available. You will not be able to use any Deep Learning "
-            "model from the library."
-        )
-    )
-def get_device(ignore_cpu: bool = True) -> str:
-    """
-    Device checks for running PyTorch with CUDA, MPS or optionall CPU.
-    If nothing can be found and if `disable_cpu` is deactivated it will raise a `ValueError`
-    :param ignore_cpu: Will not consider `cpu` as valid return value
-    :return: Either cuda or mps
-    """
-    if ast.literal_eval(os.environ.get("USE_CUDA", "True")):
-        return "cuda"
-    if ast.literal_eval(os.environ.get("USE_MPS", "True")):
-        return "mps"
-    if not ignore_cpu:
-        return "cpu"
-    raise ValueError("Could not find either GPU nor MPS")
 def auto_select_viz_library() -> None:
     """Setting PIL as default image library if cv2 is not installed"""

deepdoctection/utils/error.py ADDED Viewed

@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+# File: error.py
+# Copyright 2024 Dr. Janis Meyer. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Module for custom exceptions
+"""
+class BoundingBoxError(BaseException):
+    """Special exception only for `datapoint.box.BoundingBox`"""
+class AnnotationError(BaseException):
+    """Special exception only for `datapoint.annotation.Annotation`"""
+class ImageError(BaseException):
+    """Special exception only for `datapoint.image.Image`"""
+class UUIDError(BaseException):
+    """Special exception only for `utils.identifier`"""
+class DependencyError(BaseException):
+    """Special exception only for missing dependencies. We do not use the internals ImportError or
+    ModuleNotFoundError."""
+class DataFlowTerminatedError(BaseException):
+    """
+    An exception indicating that the DataFlow is unable to produce any more
+    data, i.e. something wrong happened so that calling `__iter__`
+    cannot give a valid iterator anymore.
+    In most DataFlow this will never be raised.
+    """
+class DataFlowResetStateNotCalledError(BaseException):
+    """
+    An exception indicating that `reset_state()` has not been called before starting
+    iteration.
+    """
+    def __init__(self) -> None:
+        super().__init__("Iterating a dataflow requires .reset_state() to be called first")
+class MalformedData(BaseException):
+    """
+    Exception class for malformed data. Use this class if something does not look right with the data
+    """
+class FileExtensionError(BaseException):
+    """
+    Exception class for wrong file extensions.
+    """
+class TesseractError(RuntimeError):
+    """
+    Tesseract Error
+    """
+    def __init__(self, status: int, message: str) -> None:
+        super().__init__()
+        self.status = status
+        self.message = message
+        self.args = (status, message)

deepdoctection/utils/file_utils.py CHANGED Viewed

@@ -22,6 +22,7 @@ import importlib_metadata
 from packaging import version
 from .detection_types import Requirement
+from .error import DependencyError
 from .logger import LoggingRecord, logger
 from .metacfg import AttrDict
@@ -263,7 +264,7 @@ def set_tesseract_path(tesseract_path: str) -> None:
     :param tesseract_path: Tesseract installation path.
     """
     if tesseract_path is None:
-        raise ValueError("tesseract_path is empty.")
+        raise TypeError("tesseract_path cannot be None")
     global _TESS_AVAILABLE  # pylint: disable=W0603
     global _TESS_PATH  # pylint: disable=W0603
@@ -288,12 +289,6 @@ def tesseract_available() -> bool:
 # copy paste from https://github.com/madmaze/pytesseract/blob/master/pytesseract/pytesseract.py
-class TesseractNotFound(BaseException):
-    """
-    Exception class for Tesseract being not found
-    """
 def get_tesseract_version() -> Union[int, version.Version]:
     """
     Returns Version object of the Tesseract version. We need at least Tesseract 3.05
@@ -306,7 +301,7 @@ def get_tesseract_version() -> Union[int, version.Version]:
             stdin=subprocess.DEVNULL,
         )
     except OSError:
-        raise TesseractNotFound(_TESS_ERR_MSG) from OSError
+        raise DependencyError(_TESS_ERR_MSG) from OSError
     raw_version = output.decode("utf-8")
     str_version, *_ = raw_version.lstrip(string.printable[10:]).partition(" ")
@@ -348,12 +343,6 @@ def pdf_to_cairo_available() -> bool:
     return bool(_PDF_TO_CAIRO_AVAILABLE)
-class PopplerNotFound(BaseException):
-    """
-    Exception class for Poppler being not found
-    """
 def get_poppler_version() -> Union[int, version.Version]:
     """
     Returns Version object of the Poppler version. We need at least Tesseract 3.05
@@ -371,7 +360,7 @@ def get_poppler_version() -> Union[int, version.Version]:
             [command, "-v"], stderr=subprocess.STDOUT, env=environ, stdin=subprocess.DEVNULL
         )
     except OSError:
-        raise PopplerNotFound() from OSError
+        raise DependencyError(_POPPLER_ERR_MSG) from OSError
     raw_version = output.decode("utf-8")
     list_version = raw_version.split("\n", maxsplit=1)[0].split(" ")[-1].split(".")

deepdoctection/utils/fs.py CHANGED Viewed

@@ -34,7 +34,7 @@ from .logger import LoggingRecord, logger
 from .pdf_utils import get_pdf_file_reader, get_pdf_file_writer
 from .settings import CONFIGS, DATASET_DIR, MODEL_DIR, PATH
 from .tqdm import get_tqdm
-from .utils import FileExtensionError, is_file_extension
+from .utils import is_file_extension
 from .viz import viz_handler
 __all__ = [
@@ -44,9 +44,7 @@ __all__ = [
     "maybe_path_or_pdf",
     "download",
     "mkdir_p",
-    "is_file_extension",
     "load_json",
-    "FileExtensionError",
     "sub_path",
     "get_package_path",
     "get_configs_dir_path",
@@ -125,8 +123,8 @@ def download(url: str, directory: Pathlike, file_name: Optional[str] = None, exp
     assert size > 0, f"Downloaded an empty file from {url}!"
     if expect_size is not None and size != expect_size:
-        logger.error(LoggingRecord(f"File downloaded from {url} does not match the expected size!"))
-        logger.error(
+        logger.warning(LoggingRecord(f"File downloaded from {url} does not match the expected size!"))
+        logger.warning(
             LoggingRecord("You may have downloaded a broken file, or the upstream may have modified the file.")
         )
@@ -210,13 +208,15 @@ def get_load_image_func(
     :return: The function loading the file (and converting to its desired format)
     """
-    assert is_file_extension(path, [".png", ".jpeg", ".jpg", ".pdf", ".tif"]), f"image type not allowed: {path}"
+    assert is_file_extension(path, [".png", ".jpeg", ".jpg", ".pdf", ".tif"]), f"image type not allowed: " f"{path}"
     if is_file_extension(path, [".png", ".jpeg", ".jpg", ".tif"]):
         return load_image_from_file
     if is_file_extension(path, [".pdf"]):
         return load_bytes_from_pdf_file
-    return NotImplemented
+    raise NotImplementedError(
+        "File extension not supported by any loader. Please specify a file type and raise an issue"
+    )
 def maybe_path_or_pdf(path: Pathlike) -> int:

deepdoctection/utils/logger.py CHANGED Viewed

@@ -134,6 +134,7 @@ class FileFormatter(logging.Formatter):
 _LOG_DIR = None
 _CONFIG_DICT: Dict[str, Any] = {
     "version": 1,
+    "disable_existing_loggers": False,
     "filters": {"customfilter": {"()": lambda: CustomFilter()}},  # pylint: disable=W0108
     "formatters": {
         "streamformatter": {"()": lambda: StreamFormatter(datefmt="%m%d %H:%M.%S")},

deepdoctection/utils/mocks.py ADDED Viewed

@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+# File: mocks.py
+# Copyright 2024 Dr. Janis Meyer. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Some classes with the purpose to mock the original classes from the Tensorpack library, if Tensorpack is not installed
+"""
+from deepdoctection.utils.error import DependencyError
+def layer_register(log_shape):  # pylint: disable=W0613
+    """Mock layer_register function from tensorpack."""
+    def inner(inputs):  # pylint: disable=W0613
+        pass
+    return inner
+def under_name_scope():
+    """Mock under_name_scope function from tensorpack."""
+    def inner(inputs):  # pylint: disable=W0613
+        pass
+    return inner
+def memoized(func):
+    """Mock memoized function from tensorpack."""
+    return func
+def memoized_method(func):
+    """Mock memoized_method function from tensorpack."""
+    return func
+def auto_reuse_variable_scope(inputs):  # pylint: disable=W0613
+    """Mock auto_reuse_variable_scope function from tensorpack."""
+class ModelDesc:  # pylint: disable=R0903
+    """Mock ModelDesc class from tensorpack."""
+    def __init__(self) -> None:
+        raise DependencyError("Tensorpack not found.")
+class ImageAugmentor:  # pylint: disable=R0903
+    """Mock ImageAugmentor class from tensorpack."""
+    def __init__(self) -> None:
+        raise DependencyError("Tensorpack not found.")
+class Callback:  # pylint: disable=R0903
+    """Mock Callback class from tensor"""
+    def __init__(self) -> None:
+        raise DependencyError("Tensorpack not found.")
+class Config:  # pylint: disable=R0903
+    """Mock class for Config"""
+    pass  # pylint: disable=W0107
+class Tree:  # pylint: disable=R0903
+    """Mock class for Tree"""
+    pass  # pylint: disable=W0107
+class IterableDataset:  # pylint: disable=R0903
+    """Mock class for IterableDataset"""
+    pass  # pylint: disable=W0107

deepdoctection/utils/pdf_utils.py CHANGED Viewed

@@ -32,9 +32,10 @@ from pypdf import PdfReader, PdfWriter, errors
 from .context import save_tmp_file, timeout_manager
 from .detection_types import ImageType, Pathlike
-from .file_utils import PopplerNotFound, pdf_to_cairo_available, pdf_to_ppm_available, qpdf_available
+from .error import DependencyError, FileExtensionError
+from .file_utils import pdf_to_cairo_available, pdf_to_ppm_available, qpdf_available
 from .logger import LoggingRecord, logger
-from .utils import FileExtensionError, is_file_extension
+from .utils import is_file_extension
 from .viz import viz_handler
 __all__ = ["decrypt_pdf_document", "get_pdf_file_reader", "get_pdf_file_writer", "PDFStreamer", "pdf_to_np_array"]
@@ -165,7 +166,7 @@ def _input_to_cli_str(
     elif pdf_to_cairo_available():
         command = "pdftocairo"
     else:
-        raise PopplerNotFound("Poppler not found. Please install or add to your PATH.")
+        raise DependencyError("Poppler not found. Please install or add to your PATH.")
     if platform.system() == "Windows":
         command = command + ".exe"
@@ -201,7 +202,7 @@ def _run_poppler(poppler_args: List[str]) -> None:
     except OSError as error:
         if error.errno != ENOENT:
             raise error from error
-        raise PopplerNotFound("Poppler not found. Please install or add to your PATH.") from error
+        raise DependencyError("Poppler not found. Please install or add to your PATH.") from error
     with timeout_manager(proc, 0):
         if proc.returncode:

deepdoctection/utils/settings.py CHANGED Viewed

@@ -65,6 +65,7 @@ class PageType(ObjectTypes):
     document_type = "document_type"
     language = "language"
+    angle = "angle"
 @object_types_registry.register("SummaryType")
@@ -125,6 +126,7 @@ class LayoutType(ObjectTypes):
     column = "column"
     word = "word"
     line = "line"
+    background = "background"
 @object_types_registry.register("TableType")
@@ -291,6 +293,7 @@ class DatasetType(ObjectTypes):
     sequence_classification = "sequence_classification"
     token_classification = "token_classification"
     publaynet = "publaynet"
+    default = "default"
 _TOKEN_AND_TAG_TO_TOKEN_CLASS_WITH_TAG = {
@@ -324,7 +327,9 @@ def token_class_tag_to_token_class_with_tag(token: ObjectTypes, tag: ObjectTypes
     """
     if isinstance(token, TokenClasses) and isinstance(tag, BioTag):
         return _TOKEN_AND_TAG_TO_TOKEN_CLASS_WITH_TAG[(token, tag)]
-    raise TypeError("Token must be of type TokenClasses and tag must be of type BioTag")
+    raise TypeError(
+        f"Token must be of type TokenClasses, is of {type(token)} and tag " f"{type(tag)} must be of type BioTag"
+    )
 def token_class_with_tag_to_token_class_and_tag(

deepdoctection/utils/transform.py CHANGED Viewed

@@ -47,7 +47,7 @@ class BaseTransform(ABC):
     @abstractmethod
     def apply_image(self, img: ImageType) -> ImageType:
         """The transformation that should be applied to the image"""
-        raise NotImplementedError
+        raise NotImplementedError()
 class ResizeTransform(BaseTransform):

deepdoctection/utils/utils.py CHANGED Viewed

@@ -144,12 +144,6 @@ def get_rng(obj: Any = None) -> np.random.RandomState:
     return np.random.RandomState(seed)
-class FileExtensionError(BaseException):
-    """
-    An exception indicating that a file does not seem to have an expected type
-    """
 def is_file_extension(file_name: Pathlike, extension: Union[str, Sequence[str]]) -> bool:
     """
     Check if a given file name has a given extension

deepdoctection 0.30__py3-none-any.whl → 0.32__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.30py3-none-any.whl → 0.32py3-none-any.whl