PyPI - deepdoctection - Versions diffs - 0.31__py3-none-any.whl → 0.32__py3-none-any.whl - Mend

deepdoctection 0.31py3-none-any.whl → 0.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (91) hide show

deepdoctection/__init__.py +35 -28
deepdoctection/analyzer/dd.py +30 -24
deepdoctection/configs/conf_dd_one.yaml +34 -31
deepdoctection/datapoint/annotation.py +2 -1
deepdoctection/datapoint/box.py +2 -1
deepdoctection/datapoint/image.py +13 -7
deepdoctection/datapoint/view.py +95 -24
deepdoctection/datasets/__init__.py +1 -4
deepdoctection/datasets/adapter.py +5 -2
deepdoctection/datasets/base.py +5 -3
deepdoctection/datasets/info.py +2 -2
deepdoctection/datasets/instances/doclaynet.py +3 -2
deepdoctection/datasets/instances/fintabnet.py +2 -1
deepdoctection/datasets/instances/funsd.py +2 -1
deepdoctection/datasets/instances/iiitar13k.py +5 -2
deepdoctection/datasets/instances/layouttest.py +2 -1
deepdoctection/datasets/instances/publaynet.py +2 -2
deepdoctection/datasets/instances/pubtables1m.py +6 -3
deepdoctection/datasets/instances/pubtabnet.py +2 -1
deepdoctection/datasets/instances/rvlcdip.py +2 -1
deepdoctection/datasets/instances/xfund.py +2 -1
deepdoctection/eval/__init__.py +1 -4
deepdoctection/eval/cocometric.py +2 -1
deepdoctection/eval/eval.py +17 -13
deepdoctection/eval/tedsmetric.py +14 -11
deepdoctection/eval/tp_eval_callback.py +9 -3
deepdoctection/extern/__init__.py +2 -7
deepdoctection/extern/d2detect.py +24 -32
deepdoctection/extern/deskew.py +4 -2
deepdoctection/extern/doctrocr.py +75 -81
deepdoctection/extern/fastlang.py +4 -2
deepdoctection/extern/hfdetr.py +22 -28
deepdoctection/extern/hflayoutlm.py +335 -103
deepdoctection/extern/hflm.py +225 -0
deepdoctection/extern/model.py +56 -47
deepdoctection/extern/pdftext.py +8 -4
deepdoctection/extern/pt/__init__.py +1 -3
deepdoctection/extern/pt/nms.py +6 -2
deepdoctection/extern/pt/ptutils.py +27 -19
deepdoctection/extern/texocr.py +4 -2
deepdoctection/extern/tp/tfutils.py +43 -9
deepdoctection/extern/tp/tpcompat.py +10 -7
deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/config/config.py +9 -6
deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +17 -7
deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +9 -4
deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +16 -11
deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +17 -10
deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +14 -8
deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
deepdoctection/extern/tp/tpfrcnn/preproc.py +7 -3
deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
deepdoctection/extern/tpdetect.py +5 -8
deepdoctection/mapper/__init__.py +3 -8
deepdoctection/mapper/d2struct.py +8 -6
deepdoctection/mapper/hfstruct.py +6 -1
deepdoctection/mapper/laylmstruct.py +163 -20
deepdoctection/mapper/maputils.py +3 -1
deepdoctection/mapper/misc.py +6 -3
deepdoctection/mapper/tpstruct.py +2 -2
deepdoctection/pipe/__init__.py +1 -1
deepdoctection/pipe/common.py +11 -9
deepdoctection/pipe/concurrency.py +2 -1
deepdoctection/pipe/layout.py +3 -1
deepdoctection/pipe/lm.py +32 -64
deepdoctection/pipe/order.py +142 -35
deepdoctection/pipe/refine.py +8 -14
deepdoctection/pipe/{cell.py → sub_layout.py} +1 -1
deepdoctection/train/__init__.py +6 -12
deepdoctection/train/d2_frcnn_train.py +21 -16
deepdoctection/train/hf_detr_train.py +18 -11
deepdoctection/train/hf_layoutlm_train.py +118 -101
deepdoctection/train/tp_frcnn_train.py +21 -19
deepdoctection/utils/env_info.py +41 -117
deepdoctection/utils/logger.py +1 -0
deepdoctection/utils/mocks.py +93 -0
deepdoctection/utils/settings.py +1 -0
deepdoctection/utils/viz.py +4 -3
{deepdoctection-0.31.dist-info → deepdoctection-0.32.dist-info}/METADATA +27 -18
deepdoctection-0.32.dist-info/RECORD +146 -0
deepdoctection-0.31.dist-info/RECORD +0 -144
{deepdoctection-0.31.dist-info → deepdoctection-0.32.dist-info}/LICENSE +0 -0
{deepdoctection-0.31.dist-info → deepdoctection-0.32.dist-info}/WHEEL +0 -0
{deepdoctection-0.31.dist-info → deepdoctection-0.32.dist-info}/top_level.txt +0 -0

deepdoctection/train/hf_layoutlm_train.py CHANGED Viewed

@@ -18,32 +18,15 @@
 """
 Module for training Huggingface implementation of LayoutLm
 """
+from __future__ import annotations
 import copy
 import json
 import os
 import pprint
-from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, Type, Union
-from torch.nn import Module
-from torch.utils.data import Dataset
-from transformers import (
-    IntervalStrategy,
-    LayoutLMForSequenceClassification,
-    LayoutLMForTokenClassification,
-    LayoutLMTokenizerFast,
-    LayoutLMv2Config,
-    LayoutLMv2ForSequenceClassification,
-    LayoutLMv2ForTokenClassification,
-    LayoutLMv3Config,
-    LayoutLMv3ForSequenceClassification,
-    LayoutLMv3ForTokenClassification,
-    PretrainedConfig,
-    PreTrainedModel,
-    RobertaTokenizerFast,
-    XLMRobertaTokenizerFast,
-)
-from transformers.trainer import Trainer, TrainingArguments
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union
+from lazy_imports import try_import
 from ..datasets.adapter import DatasetAdapter
 from ..datasets.base import DatasetBase
@@ -57,79 +40,108 @@ from ..extern.hflayoutlm import (
     HFLayoutLmv2TokenClassifier,
     HFLayoutLmv3SequenceClassifier,
     HFLayoutLmv3TokenClassifier,
+    HFLiltSequenceClassifier,
+    HFLiltTokenClassifier,
+    get_tokenizer_from_model_class,
 )
-from ..mapper.laylmstruct import LayoutLMDataCollator, image_to_raw_layoutlm_features
+from ..extern.hflm import HFLmSequenceClassifier
+from ..extern.pt.ptutils import get_torch_device
+from ..mapper.laylmstruct import LayoutLMDataCollator, image_to_raw_layoutlm_features, image_to_raw_lm_features
 from ..pipe.base import LanguageModelPipelineComponent
-from ..pipe.lm import get_tokenizer_from_architecture
 from ..pipe.registry import pipeline_component_registry
-from ..utils.env_info import get_device
 from ..utils.error import DependencyError
 from ..utils.file_utils import wandb_available
 from ..utils.logger import LoggingRecord, logger
-from ..utils.settings import DatasetType, LayoutType, ObjectTypes, WordType
+from ..utils.settings import DatasetType, LayoutType, WordType
 from ..utils.utils import string_to_dict
-if wandb_available():
-    import wandb
-_ARCHITECTURES_TO_MODEL_CLASS = {
-    "LayoutLMForTokenClassification": (LayoutLMForTokenClassification, HFLayoutLmTokenClassifier, PretrainedConfig),
-    "LayoutLMForSequenceClassification": (
-        LayoutLMForSequenceClassification,
-        HFLayoutLmSequenceClassifier,
-        PretrainedConfig,
-    ),
-    "LayoutLMv2ForTokenClassification": (
-        LayoutLMv2ForTokenClassification,
-        HFLayoutLmv2TokenClassifier,
-        LayoutLMv2Config,
-    ),
-    "LayoutLMv2ForSequenceClassification": (
-        LayoutLMv2ForSequenceClassification,
-        HFLayoutLmv2SequenceClassifier,
-        LayoutLMv2Config,
-    ),
-}
+with try_import() as pt_import_guard:
+    from torch import nn
+    from torch.utils.data import Dataset
-_MODEL_TYPE_AND_TASK_TO_MODEL_CLASS: Mapping[Tuple[str, ObjectTypes], Any] = {
-    ("layoutlm", DatasetType.sequence_classification): (
+with try_import() as tr_import_guard:
+    from transformers import (
+        IntervalStrategy,
         LayoutLMForSequenceClassification,
-        HFLayoutLmSequenceClassifier,
-        PretrainedConfig,
-    ),
-    ("layoutlm", DatasetType.token_classification): (
         LayoutLMForTokenClassification,
-        HFLayoutLmTokenClassifier,
-        PretrainedConfig,
-    ),
-    ("layoutlmv2", DatasetType.sequence_classification): (
-        LayoutLMv2ForSequenceClassification,
-        HFLayoutLmv2SequenceClassifier,
         LayoutLMv2Config,
-    ),
-    ("layoutlmv2", DatasetType.token_classification): (
+        LayoutLMv2ForSequenceClassification,
         LayoutLMv2ForTokenClassification,
-        HFLayoutLmv2TokenClassifier,
-        LayoutLMv2Config,
-    ),
-    ("layoutlmv3", DatasetType.sequence_classification): (
-        LayoutLMv3ForSequenceClassification,
-        HFLayoutLmv3SequenceClassifier,
         LayoutLMv3Config,
-    ),
-    ("layoutlmv3", DatasetType.token_classification): (
+        LayoutLMv3ForSequenceClassification,
         LayoutLMv3ForTokenClassification,
-        HFLayoutLmv3TokenClassifier,
-        LayoutLMv3Config,
-    ),
-}
-_MODEL_TYPE_TO_TOKENIZER = {
-    ("layoutlm", False): LayoutLMTokenizerFast.from_pretrained("microsoft/layoutlm-base-uncased"),
-    ("layoutlmv2", False): LayoutLMTokenizerFast.from_pretrained("microsoft/layoutlm-base-uncased"),
-    ("layoutlmv2", True): XLMRobertaTokenizerFast.from_pretrained("xlm-roberta-base", add_prefix_space=True),
-    ("layoutlmv3", False): RobertaTokenizerFast.from_pretrained("roberta-base", add_prefix_space=True),
-}
+        LiltForSequenceClassification,
+        LiltForTokenClassification,
+        PretrainedConfig,
+        PreTrainedModel,
+        XLMRobertaForSequenceClassification,
+    )
+    from transformers.trainer import Trainer, TrainingArguments
+with try_import() as wb_import_guard:
+    import wandb
+def get_model_architectures_and_configs(model_type: str, dataset_type: DatasetType) -> Tuple[Any, Any, Any]:
+    """
+    Get the model architecture, model wrapper and config class for a given model type and dataset type.
+    :param model_type: The model type
+    :param dataset_type: The dataset type
+    :return: Tuple of model architecture, model wrapper and config class
+    """
+    return {
+        ("layoutlm", DatasetType.sequence_classification): (
+            LayoutLMForSequenceClassification,
+            HFLayoutLmSequenceClassifier,
+            PretrainedConfig,
+        ),
+        ("layoutlm", DatasetType.token_classification): (
+            LayoutLMForTokenClassification,
+            HFLayoutLmTokenClassifier,
+            PretrainedConfig,
+        ),
+        ("layoutlmv2", DatasetType.sequence_classification): (
+            LayoutLMv2ForSequenceClassification,
+            HFLayoutLmv2SequenceClassifier,
+            LayoutLMv2Config,
+        ),
+        ("layoutlmv2", DatasetType.token_classification): (
+            LayoutLMv2ForTokenClassification,
+            HFLayoutLmv2TokenClassifier,
+            LayoutLMv2Config,
+        ),
+        ("layoutlmv3", DatasetType.sequence_classification): (
+            LayoutLMv3ForSequenceClassification,
+            HFLayoutLmv3SequenceClassifier,
+            LayoutLMv3Config,
+        ),
+        ("layoutlmv3", DatasetType.token_classification): (
+            LayoutLMv3ForTokenClassification,
+            HFLayoutLmv3TokenClassifier,
+            LayoutLMv3Config,
+        ),
+        ("lilt", DatasetType.token_classification): (
+            LiltForTokenClassification,
+            HFLiltTokenClassifier,
+            PretrainedConfig,
+        ),
+        ("lilt", DatasetType.sequence_classification): (
+            LiltForSequenceClassification,
+            HFLiltSequenceClassifier,
+            PretrainedConfig,
+        ),
+        ("xlm-roberta", DatasetType.sequence_classification): (
+            XLMRobertaForSequenceClassification,
+            HFLmSequenceClassifier,
+            PretrainedConfig,
+        ),
+    }[(model_type, dataset_type)]
+def maybe_remove_bounding_box_features(model_type: str) -> bool:
+    """Listing of models that do not need bounding box features."""
+    return {"xlm-roberta": True}.get(model_type, False)
 class LayoutLMTrainer(Trainer):
@@ -145,7 +157,7 @@ class LayoutLMTrainer(Trainer):
     def __init__(
         self,
-        model: Union[PreTrainedModel, Module],
+        model: Union[PreTrainedModel, nn.Module],
         args: TrainingArguments,
         data_collator: LayoutLMDataCollator,
         train_dataset: Dataset[Any],
@@ -159,7 +171,7 @@ class LayoutLMTrainer(Trainer):
         dataset_val: DatasetBase,
         pipeline_component: LanguageModelPipelineComponent,
         metric: Union[Type[ClassificationMetric], ClassificationMetric],
-        run: Optional["wandb.sdk.wandb_run.Run"] = None,
+        run: Optional[wandb.sdk.wandb_run.Run] = None,
         **build_eval_kwargs: Union[str, int],
     ) -> None:
         """
@@ -208,26 +220,27 @@ class LayoutLMTrainer(Trainer):
 def _get_model_class_and_tokenizer(
-    path_config_json: str, dataset_type: ObjectTypes, use_xlm_tokenizer: bool
-) -> Tuple[Any, Any, Any, Any]:
+    path_config_json: str, dataset_type: DatasetType, use_xlm_tokenizer: bool
+) -> Tuple[Any, Any, Any, Any, Any]:
     with open(path_config_json, "r", encoding="UTF-8") as file:
         config_json = json.load(file)
-    model_type = config_json.get("model_type")
-    if architectures := config_json.get("architectures"):
-        model_cls, model_wrapper_cls, config_cls = _ARCHITECTURES_TO_MODEL_CLASS[architectures[0]]
-        tokenizer_fast = get_tokenizer_from_architecture(architectures[0], use_xlm_tokenizer)
-    elif model_type:
-        model_cls, model_wrapper_cls, config_cls = _MODEL_TYPE_AND_TASK_TO_MODEL_CLASS[(model_type, dataset_type)]
-        tokenizer_fast = _MODEL_TYPE_TO_TOKENIZER[(model_type, use_xlm_tokenizer)]
+    if model_type := config_json.get("model_type"):
+        model_cls, model_wrapper_cls, config_cls = get_model_architectures_and_configs(model_type, dataset_type)
+        remove_box_features = maybe_remove_bounding_box_features(model_type)
     else:
-        raise KeyError("model_type and architectures not available in configs")
+        raise KeyError("model_type not available in configs. It seems that the config is not valid")
-    if not model_cls:
-        raise UserWarning("model not eligible to run with this framework")
+    tokenizer_fast = get_tokenizer_from_model_class(model_cls.__name__, use_xlm_tokenizer)
+    return config_cls, model_cls, model_wrapper_cls, tokenizer_fast, remove_box_features
-    return config_cls, model_cls, model_wrapper_cls, tokenizer_fast
+def get_image_to_raw_features_mapping(input_str: str) -> Any:
+    """Replacing eval functions"""
+    return {
+        "image_to_raw_layoutlm_features": image_to_raw_layoutlm_features,
+        "image_to_raw_lm_features": image_to_raw_lm_features,
+    }[input_str]
 def train_hf_layoutlm(
@@ -352,17 +365,19 @@ def train_hf_layoutlm(
     else:
         raise UserWarning("Dataset type not supported for training")
-    config_cls, model_cls, model_wrapper_cls, tokenizer_fast = _get_model_class_and_tokenizer(
+    config_cls, model_cls, model_wrapper_cls, tokenizer_fast, remove_box_features = _get_model_class_and_tokenizer(
         path_config_json, dataset_type, use_xlm_tokenizer
     )
-    image_to_raw_layoutlm_kwargs = {"dataset_type": dataset_type, "use_token_tag": use_token_tag}
+    image_to_raw_features_func = get_image_to_raw_features_mapping(model_wrapper_cls.image_to_raw_features_mapping())
+    image_to_raw_features_kwargs = {"dataset_type": dataset_type, "use_token_tag": use_token_tag}
     if segment_positions:
-        image_to_raw_layoutlm_kwargs["segment_positions"] = segment_positions  # type: ignore
-    image_to_raw_layoutlm_kwargs.update(model_wrapper_cls.default_kwargs_for_input_mapping())
+        image_to_raw_features_kwargs["segment_positions"] = segment_positions  # type: ignore
+    image_to_raw_features_kwargs.update(model_wrapper_cls.default_kwargs_for_input_mapping())
     dataset = DatasetAdapter(
         dataset_train,
         True,
-        image_to_raw_layoutlm_features(**image_to_raw_layoutlm_kwargs),
+        image_to_raw_features_func(**image_to_raw_features_kwargs),
         use_token_tag,
         **build_train_dict,
     )
@@ -453,6 +468,7 @@ def train_hf_layoutlm(
         return_tensors="pt",
         sliding_window_stride=sliding_window_stride,  # type: ignore
         max_batch_size=max_batch_size,  # type: ignore
+        remove_bounding_box_features=remove_box_features,
     )
     trainer = LayoutLMTrainer(model, arguments, data_collator, dataset)
@@ -475,7 +491,8 @@ def train_hf_layoutlm(
             path_config_json=path_config_json,
             path_weights=path_weights,
             categories=categories,
-            device=get_device(),
+            device=get_torch_device(),
+            use_xlm_tokenizer=use_xlm_tokenizer,
         )
         pipeline_component_cls = pipeline_component_registry.get(pipeline_component_name)
         if dataset_type == DatasetType.sequence_classification:

deepdoctection/train/tp_frcnn_train.py CHANGED Viewed

@@ -22,25 +22,7 @@ Module for training Tensorpack `GeneralizedRCNN`
 import os
 from typing import Dict, List, Optional, Sequence, Type, Union
-# pylint: disable=import-error
-from tensorpack.callbacks import (
-    EstimatedTimeLeft,
-    GPUMemoryTracker,
-    GPUUtilizationTracker,
-    HostMemoryTracker,
-    ModelSaver,
-    PeriodicCallback,
-    ScheduledHyperParamSetter,
-    SessionRunTimeout,
-    ThroughputTracker,
-)
-# todo: check how dataflow import is directly possible without having AssertionError
-from tensorpack.dataflow import ProxyDataFlow, imgaug
-from tensorpack.input_source import QueueInput
-from tensorpack.tfutils import SmartInit
-from tensorpack.train import SyncMultiGPUTrainerReplicated, TrainConfig, launch_train_with_config
-from tensorpack.utils import logger
+from lazy_imports import try_import
 from ..dataflow.base import DataFlow
 from ..dataflow.common import MapData
@@ -68,6 +50,26 @@ from ..utils.metacfg import AttrDict, set_config_by_yaml
 from ..utils.tqdm import get_tqdm
 from ..utils.utils import string_to_dict
+with try_import() as tp_import_guard:
+    # todo: check how dataflow import is directly possible without having an AssertionError
+    # pylint: disable=import-error
+    from tensorpack.callbacks import (
+        EstimatedTimeLeft,
+        GPUMemoryTracker,
+        GPUUtilizationTracker,
+        HostMemoryTracker,
+        ModelSaver,
+        PeriodicCallback,
+        ScheduledHyperParamSetter,
+        SessionRunTimeout,
+        ThroughputTracker,
+    )
+    from tensorpack.dataflow import ProxyDataFlow, imgaug
+    from tensorpack.input_source import QueueInput
+    from tensorpack.tfutils import SmartInit
+    from tensorpack.train import SyncMultiGPUTrainerReplicated, TrainConfig, launch_train_with_config
+    from tensorpack.utils import logger
 __all__ = ["train_faster_rcnn"]

deepdoctection/utils/env_info.py CHANGED Viewed

@@ -46,16 +46,16 @@ can store an (absolute) path to a `.jsonl` file.
 """
-import ast
 import importlib
 import os
 import re
 import subprocess
 import sys
 from collections import defaultdict
-from typing import List, Literal, Optional, Tuple
+from typing import List, Optional, Tuple
 import numpy as np
+from packaging import version
 from tabulate import tabulate
 from .file_utils import (
@@ -68,6 +68,7 @@ from .file_utils import (
     fasttext_available,
     get_poppler_version,
     get_tesseract_version,
+    get_tf_version,
     jdeskew_available,
     lxml_available,
     opencv_available,
@@ -84,13 +85,9 @@ from .file_utils import (
     transformers_available,
     wandb_available,
 )
-from .logger import LoggingRecord, logger
 __all__ = [
-    "collect_torch_env",
     "collect_env_info",
-    "get_device",
-    "auto_select_lib_and_device",
     "auto_select_viz_library",
 ]
@@ -270,7 +267,22 @@ def tf_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     if tf_available():
         import tensorflow as tf  # type: ignore # pylint: disable=E0401
+        os.environ["TENSORFLOW_AVAILABLE"] = "1"
         data.append(("Tensorflow", tf.__version__))
+        if version.parse(get_tf_version()) > version.parse("2.4.1"):
+            os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
+        try:
+            import tensorflow.python.util.deprecation as deprecation  # type: ignore # pylint: disable=E0401,R0402
+            deprecation._PRINT_DEPRECATION_WARNINGS = False  # pylint: disable=W0212
+        except Exception:  # pylint: disable=W0703
+            try:
+                from tensorflow.python.util import deprecation  # type: ignore # pylint: disable=E0401
+                deprecation._PRINT_DEPRECATION_WARNINGS = False  # pylint: disable=W0212
+            except Exception:  # pylint: disable=W0703
+                pass
     else:
         data.append(("Tensorflow", "None"))
         return data
@@ -279,12 +291,18 @@ def tf_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     try:
         for key, value in list(build_info.build_info.items()):
-            if key == "cuda_version":
+            if key == "is_cuda_build":
+                data.append(("TF compiled with CUDA", value))
+                if value and len(tf.config.list_physical_devices('GPU')):
+                    os.environ["USE_CUDA"] = "1"
+            elif key == "cuda_version":
                 data.append(("TF built with CUDA", value))
             elif key == "cudnn_version":
                 data.append(("TF built with CUDNN", value))
             elif key == "cuda_compute_capabilities":
                 data.append(("TF compute capabilities", ",".join([k.replace("compute_", "") for k in value])))
+            elif key == "is_rocm_build":
+                data.append(("TF compiled with ROCM", value))
         return data
     except AttributeError:
         pass
@@ -306,6 +324,13 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     if pytorch_available():
         import torch
+        os.environ["PYTORCH_AVAILABLE"] = "1"
+    else:
+        data.append(("PyTorch", "None"))
+        return []
     has_gpu = torch.cuda.is_available()  # true for both CUDA & ROCM
     has_mps = torch.backends.mps.is_available()
@@ -331,12 +356,9 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     data.append(("PyTorch", torch_version + " @" + os.path.dirname(torch.__file__)))
     data.append(("PyTorch debug build", str(torch.version.debug)))
-    if not has_gpu:
-        has_gpu_text = "No: torch.cuda.is_available() == False"
-    else:
-        has_gpu_text = "Yes"
-    data.append(("GPU available", has_gpu_text))
     if has_gpu:
+        os.environ["USE_CUDA"] = "1"
+        has_gpu_text = "Yes"
         devices = defaultdict(list)
         for k in range(torch.cuda.device_count()):
             cap = ".".join((str(x) for x in torch.cuda.get_device_capability(k)))
@@ -362,6 +384,10 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
             cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
             if cuda_arch_list:
                 data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list))
+    else:
+        has_gpu_text = "No: torch.cuda.is_available() == False"
+    data.append(("GPU available", has_gpu_text))
     mps_build = "No: torch.backends.mps.is_built() == False"
     if not has_mps:
@@ -369,9 +395,11 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
     else:
         has_mps_text = "Yes"
         mps_build = str(torch.backends.mps.is_built())
+        if mps_build == "True":
+            os.environ["USE_MPS"] = "1"
     data.append(("MPS available", has_mps_text))
-    data.append(("MPS available", mps_build))
+    data.append(("MPS built", mps_build))
     try:
         import torchvision  # type: ignore
@@ -452,110 +480,6 @@ def collect_env_info() -> str:
     return env_str
-def set_env(name: str, value: str) -> None:
-    """
-    Set an environment variable if it is not already set.
-    :param name: The name of the environment variable
-    :param value: The value of the environment variable
-    """
-    if os.environ.get(name):
-        return
-    os.environ[name] = value
-    return
-def auto_select_lib_and_device() -> None:
-    """
-    Select the DL library and subsequently the device.
-    This will set environment variable `USE_TENSORFLOW`, `USE_PYTORCH` and `USE_CUDA`
-    If TF is available, use TF unless a GPU is not available, in which case choose PT. If CUDA is not available and PT
-    is not installed raise ImportError.
-    """
-    # USE_TF and USE_TORCH are env variables that steer DL library selection for Doctr.
-    if tf_available() and tensorpack_available():
-        from tensorpack.utils.gpu import get_num_gpu  # pylint: disable=E0401
-        if get_num_gpu() >= 1:
-            set_env("USE_TENSORFLOW", "True")
-            set_env("USE_PYTORCH", "False")
-            set_env("USE_CUDA", "True")
-            set_env("USE_MPS", "False")
-            set_env("USE_TF", "TRUE")
-            set_env("USE_TORCH", "False")
-            return
-        if pytorch_available():
-            set_env("USE_TENSORFLOW", "False")
-            set_env("USE_PYTORCH", "True")
-            set_env("USE_CUDA", "False")
-            set_env("USE_TF", "False")
-            set_env("USE_TORCH", "TRUE")
-            return
-        logger.warning(
-            LoggingRecord("You have Tensorflow installed but no GPU is available. All Tensorflow models require a GPU.")
-        )
-    if tf_available():
-        set_env("USE_TENSORFLOW", "False")
-        set_env("USE_PYTORCH", "False")
-        set_env("USE_CUDA", "False")
-        set_env("USE_TF", "AUTO")
-        set_env("USE_TORCH", "AUTO")
-        return
-    if pytorch_available():
-        import torch
-        if torch.cuda.is_available():
-            set_env("USE_TENSORFLOW", "False")
-            set_env("USE_PYTORCH", "True")
-            set_env("USE_CUDA", "True")
-            set_env("USE_TF", "False")
-            set_env("USE_TORCH", "TRUE")
-            return
-        if torch.backends.mps.is_available():
-            set_env("USE_TENSORFLOW", "False")
-            set_env("USE_PYTORCH", "True")
-            set_env("USE_CUDA", "False")
-            set_env("USE_MPS", "True")
-            set_env("USE_TF", "False")
-            set_env("USE_TORCH", "TRUE")
-            return
-        set_env("USE_TENSORFLOW", "False")
-        set_env("USE_PYTORCH", "True")
-        set_env("USE_CUDA", "False")
-        set_env("USE_MPS", "False")
-        set_env("USE_TF", "AUTO")
-        set_env("USE_TORCH", "AUTO")
-        return
-    logger.warning(
-        LoggingRecord(
-            "Neither Tensorflow or Pytorch are available. You will not be able to use any Deep Learning "
-            "model from the library."
-        )
-    )
-def get_device(ignore_cpu: bool = True) -> Literal["cuda", "mps", "cpu"]:
-    """
-    Device checks for running PyTorch with CUDA, MPS or optionall CPU.
-    If nothing can be found and if `disable_cpu` is deactivated it will raise a `ValueError`
-    :param ignore_cpu: Will not consider `cpu` as valid return value
-    :return: Either cuda or mps
-    """
-    if ast.literal_eval(os.environ.get("USE_CUDA", "True")):
-        return "cuda"
-    if ast.literal_eval(os.environ.get("USE_MPS", "True")):
-        return "mps"
-    if not ignore_cpu:
-        return "cpu"
-    raise RuntimeWarning("Could not find either GPU nor MPS")
 def auto_select_viz_library() -> None:
     """Setting PIL as default image library if cv2 is not installed"""

deepdoctection/utils/logger.py CHANGED Viewed

@@ -134,6 +134,7 @@ class FileFormatter(logging.Formatter):
 _LOG_DIR = None
 _CONFIG_DICT: Dict[str, Any] = {
     "version": 1,
+    "disable_existing_loggers": False,
     "filters": {"customfilter": {"()": lambda: CustomFilter()}},  # pylint: disable=W0108
     "formatters": {
         "streamformatter": {"()": lambda: StreamFormatter(datefmt="%m%d %H:%M.%S")},

deepdoctection 0.31__py3-none-any.whl → 0.32__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.31py3-none-any.whl → 0.32py3-none-any.whl