PyPI - deepdoctection - Versions diffs - 0.32__py3-none-any.whl → 0.34__py3-none-any.whl - Mend

deepdoctection 0.32py3-none-any.whl → 0.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (111) hide show

deepdoctection/__init__.py +8 -25
deepdoctection/analyzer/dd.py +84 -71
deepdoctection/dataflow/common.py +9 -5
deepdoctection/dataflow/custom.py +5 -5
deepdoctection/dataflow/custom_serialize.py +75 -18
deepdoctection/dataflow/parallel_map.py +3 -3
deepdoctection/dataflow/serialize.py +4 -4
deepdoctection/dataflow/stats.py +3 -3
deepdoctection/datapoint/annotation.py +78 -56
deepdoctection/datapoint/box.py +7 -7
deepdoctection/datapoint/convert.py +6 -6
deepdoctection/datapoint/image.py +157 -75
deepdoctection/datapoint/view.py +175 -151
deepdoctection/datasets/adapter.py +30 -24
deepdoctection/datasets/base.py +10 -10
deepdoctection/datasets/dataflow_builder.py +3 -3
deepdoctection/datasets/info.py +23 -25
deepdoctection/datasets/instances/doclaynet.py +48 -49
deepdoctection/datasets/instances/fintabnet.py +44 -45
deepdoctection/datasets/instances/funsd.py +23 -23
deepdoctection/datasets/instances/iiitar13k.py +8 -8
deepdoctection/datasets/instances/layouttest.py +2 -2
deepdoctection/datasets/instances/publaynet.py +3 -3
deepdoctection/datasets/instances/pubtables1m.py +18 -18
deepdoctection/datasets/instances/pubtabnet.py +30 -29
deepdoctection/datasets/instances/rvlcdip.py +28 -29
deepdoctection/datasets/instances/xfund.py +51 -30
deepdoctection/datasets/save.py +6 -6
deepdoctection/eval/accmetric.py +32 -33
deepdoctection/eval/base.py +8 -9
deepdoctection/eval/cocometric.py +13 -12
deepdoctection/eval/eval.py +32 -26
deepdoctection/eval/tedsmetric.py +16 -12
deepdoctection/eval/tp_eval_callback.py +7 -16
deepdoctection/extern/base.py +339 -134
deepdoctection/extern/d2detect.py +69 -89
deepdoctection/extern/deskew.py +11 -10
deepdoctection/extern/doctrocr.py +81 -64
deepdoctection/extern/fastlang.py +23 -16
deepdoctection/extern/hfdetr.py +53 -38
deepdoctection/extern/hflayoutlm.py +216 -155
deepdoctection/extern/hflm.py +35 -30
deepdoctection/extern/model.py +433 -255
deepdoctection/extern/pdftext.py +15 -15
deepdoctection/extern/pt/ptutils.py +4 -2
deepdoctection/extern/tessocr.py +39 -38
deepdoctection/extern/texocr.py +14 -16
deepdoctection/extern/tp/tfutils.py +16 -2
deepdoctection/extern/tp/tpcompat.py +11 -7
deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
deepdoctection/extern/tpdetect.py +40 -45
deepdoctection/mapper/cats.py +36 -40
deepdoctection/mapper/cocostruct.py +16 -12
deepdoctection/mapper/d2struct.py +22 -22
deepdoctection/mapper/hfstruct.py +7 -7
deepdoctection/mapper/laylmstruct.py +22 -24
deepdoctection/mapper/maputils.py +9 -10
deepdoctection/mapper/match.py +33 -2
deepdoctection/mapper/misc.py +6 -7
deepdoctection/mapper/pascalstruct.py +4 -4
deepdoctection/mapper/prodigystruct.py +6 -6
deepdoctection/mapper/pubstruct.py +84 -92
deepdoctection/mapper/tpstruct.py +3 -3
deepdoctection/mapper/xfundstruct.py +33 -33
deepdoctection/pipe/anngen.py +39 -14
deepdoctection/pipe/base.py +68 -99
deepdoctection/pipe/common.py +181 -85
deepdoctection/pipe/concurrency.py +14 -10
deepdoctection/pipe/doctectionpipe.py +24 -21
deepdoctection/pipe/language.py +20 -25
deepdoctection/pipe/layout.py +18 -16
deepdoctection/pipe/lm.py +49 -47
deepdoctection/pipe/order.py +63 -65
deepdoctection/pipe/refine.py +102 -109
deepdoctection/pipe/segment.py +157 -162
deepdoctection/pipe/sub_layout.py +50 -40
deepdoctection/pipe/text.py +37 -36
deepdoctection/pipe/transform.py +19 -16
deepdoctection/train/d2_frcnn_train.py +27 -25
deepdoctection/train/hf_detr_train.py +22 -18
deepdoctection/train/hf_layoutlm_train.py +49 -48
deepdoctection/train/tp_frcnn_train.py +10 -11
deepdoctection/utils/concurrency.py +1 -1
deepdoctection/utils/context.py +13 -6
deepdoctection/utils/develop.py +4 -4
deepdoctection/utils/env_info.py +52 -14
deepdoctection/utils/file_utils.py +6 -11
deepdoctection/utils/fs.py +41 -14
deepdoctection/utils/identifier.py +2 -2
deepdoctection/utils/logger.py +15 -15
deepdoctection/utils/metacfg.py +7 -7
deepdoctection/utils/pdf_utils.py +39 -14
deepdoctection/utils/settings.py +188 -182
deepdoctection/utils/tqdm.py +1 -1
deepdoctection/utils/transform.py +14 -9
deepdoctection/utils/types.py +104 -0
deepdoctection/utils/utils.py +7 -7
deepdoctection/utils/viz.py +70 -69
{deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
deepdoctection-0.34.dist-info/RECORD +146 -0
{deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
deepdoctection/utils/detection_types.py +0 -68
deepdoctection-0.32.dist-info/RECORD +0 -146
{deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
{deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0

deepdoctection/__init__.py CHANGED Viewed

@@ -15,7 +15,6 @@ if importlib.util.find_spec("dotenv") is not None:
 # pylint: disable=wrong-import-position
-import os
 import sys
 from typing import TYPE_CHECKING
@@ -25,11 +24,10 @@ from .utils.logger import LoggingRecord, logger
 # pylint: enable=wrong-import-position
-__version__ = 0.32
+__version__ = 0.34
 _IMPORT_STRUCTURE = {
     "analyzer": [
-        "maybe_copy_config_to_cache",
         "config_sanity_checks",
         "build_detector",
         "build_padder",
@@ -76,6 +74,7 @@ _IMPORT_STRUCTURE = {
     ],
     "datapoint": [
         "ann_from_dict",
+        "AnnotationMap",
         "Annotation",
         "CategoryAnnotation",
         "ImageAnnotation",
@@ -160,6 +159,8 @@ _IMPORT_STRUCTURE = {
         "EvalCallback",
     ],
     "extern": [
+        "ModelCategories",
+        "NerModelCategories",
         "PredictorBase",
         "DetectionResult",
         "ObjectDetector",
@@ -235,6 +236,7 @@ _IMPORT_STRUCTURE = {
         "LabelSummarizer",
         "curry",
         "match_anns_by_intersection",
+        "match_anns_by_distance",
         "to_image",
         "maybe_load_image",
         "maybe_remove_image",
@@ -263,6 +265,8 @@ _IMPORT_STRUCTURE = {
         "DetectResultGenerator",
         "SubImageLayoutService",
         "ImageCroppingService",
+        "IntersectionMatcher",
+        "NeighbourMatcher",
         "MatchingService",
         "PageParsingService",
         "AnnotationNmsService",
@@ -362,6 +366,7 @@ _IMPORT_STRUCTURE = {
         "get_configs_dir_path",
         "get_weights_dir_path",
         "get_dataset_dir_path",
+        "maybe_copy_config_to_cache",
         "is_uuid_like",
         "get_uuid_from_str",
         "get_uuid",
@@ -423,28 +428,6 @@ _IMPORT_STRUCTURE = {
 env_info = collect_env_info()
 logger.debug(LoggingRecord(msg=env_info))
-if os.environ.get("PYTORCH_AVAILABLE") and os.environ.get("DD_USE_TORCH") is None:
-    os.environ["DD_USE_TORCH"] = "1"
-    os.environ["USE_TORCH"] = "1"
-if os.environ.get("TENSORFLOW_AVAILABLE") and os.environ.get("DD_USE_TF") is None:
-    os.environ["DD_USE_TF"] = "1"
-    os.environ["USE_TF"] = "1"
-if os.environ.get("DD_USE_TORCH") and os.environ.get("DD_USE_TF"):
-    logger.warning(
-        "Both DD_USE_TORCH and DD_USE_TF are set. Defaulting to PyTorch. If you want a different "
-        "behaviour, set DD_USE_TORCH to None before importing deepdoctection."
-    )
-    os.environ.pop("DD_USE_TF")
-    os.environ.pop("USE_TF")
-if not os.environ.get("PYTORCH_AVAILABLE") and not os.environ.get("TENSORFLOW_AVAILABLE"):
-    logger.warning(
-        LoggingRecord(
-            msg="Neither Tensorflow or Pytorch are available. You will not be able to use any Deep Learning "
-            "model from the library."
-        )
-    )
 # Direct imports for type-checking
 if TYPE_CHECKING:

deepdoctection/analyzer/dd.py CHANGED Viewed

@@ -23,10 +23,11 @@ Module for **deep**doctection analyzer.
 -user factory with a reduced config setting
 """
+from __future__ import annotations
 import os
 from os import environ
-from shutil import copyfile
-from typing import List, Optional, Union
+from typing import Optional, Union
 from lazy_imports import try_import
@@ -42,7 +43,7 @@ from ..extern.texocr import TextractOcrDetector
 from ..extern.tp.tfutils import disable_tp_layer_logging, get_tf_device
 from ..extern.tpdetect import TPFrcnnDetector
 from ..pipe.base import PipelineComponent
-from ..pipe.common import AnnotationNmsService, MatchingService, PageParsingService
+from ..pipe.common import AnnotationNmsService, IntersectionMatcher, MatchingService, PageParsingService
 from ..pipe.doctectionpipe import DoctectionPipe
 from ..pipe.layout import ImageLayoutService
 from ..pipe.order import TextOrderService
@@ -50,21 +51,21 @@ from ..pipe.refine import TableSegmentationRefinementService
 from ..pipe.segment import PubtablesSegmentationService, TableSegmentationService
 from ..pipe.sub_layout import DetectResultGenerator, SubImageLayoutService
 from ..pipe.text import TextExtractionService
-from ..utils.detection_types import Pathlike
+from ..utils.env_info import ENV_VARS_TRUE
 from ..utils.error import DependencyError
 from ..utils.file_utils import detectron2_available, tensorpack_available
-from ..utils.fs import get_configs_dir_path, get_package_path, mkdir_p
+from ..utils.fs import get_configs_dir_path, get_package_path, maybe_copy_config_to_cache
 from ..utils.logger import LoggingRecord, logger
 from ..utils.metacfg import AttrDict, set_config_by_yaml
-from ..utils.settings import CellType, LayoutType
+from ..utils.settings import CellType, LayoutType, Relationships
 from ..utils.transform import PadTransform
+from ..utils.types import PathLikeOrStr
 with try_import() as image_guard:
     from botocore.config import Config  # type: ignore
 __all__ = [
-    "maybe_copy_config_to_cache",
     "config_sanity_checks",
     "build_detector",
     "build_padder",
@@ -74,31 +75,37 @@ __all__ = [
     "build_doctr_word",
     "get_dd_analyzer",
     "build_analyzer",
+    "set_config_by_yaml",
 ]
 _DD_ONE = "deepdoctection/configs/conf_dd_one.yaml"
 _TESSERACT = "deepdoctection/configs/conf_tesseract.yaml"
-def maybe_copy_config_to_cache(
-    package_path: Pathlike, configs_dir_path: Pathlike, file_name: str, force_copy: bool = True
-) -> str:
-    """
-    Initial copying of various files
-    :param package_path: base path to directory of source file `file_name`
-    :param configs_dir_path: base path to target directory
-    :param file_name: file to copy
-    :param force_copy: If file is already in target directory, will re-copy the file
-    :return: path to the copied file_name
-    """
-    absolute_path_source = os.path.join(package_path, file_name)
-    absolute_path = os.path.join(configs_dir_path, os.path.join("dd", os.path.split(file_name)[1]))
-    mkdir_p(os.path.split(absolute_path)[0])
-    if not os.path.isfile(absolute_path) or force_copy:
-        copyfile(absolute_path_source, absolute_path)
-    return absolute_path
+_MODEL_CHOICES = {
+    "layout": [
+        "layout/d2_model_0829999_layout_inf_only.pt",
+        "xrf_layout/model_final_inf_only.pt",
+        "microsoft/table-transformer-detection/pytorch_model.bin",
+    ],
+    "segmentation": [
+        "item/model-1620000_inf_only.data-00000-of-00001",
+        "xrf_item/model_final_inf_only.pt",
+        "microsoft/table-transformer-structure-recognition/pytorch_model.bin",
+        "deepdoctection/tatr_tab_struct_v2/pytorch_model.bin",
+    ],
+    "ocr": ["Tesseract", "DocTr", "Textract"],
+    "doctr_word": ["doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt"],
+    "doctr_recognition": [
+        "doctr/crnn_vgg16_bn/pt/crnn_vgg16_bn-9762b0b0.pt",
+        "doctr/crnn_vgg16_bn/pt/pytorch_model.bin",
+    ],
+    "llm": ["gpt-3.5-turbo", "gpt-4"],
+    "segmentation_choices": {
+        "item/model-1620000_inf_only.data-00000-of-00001": "cell/model-1800000_inf_only.data-00000-of-00001",
+        "xrf_item/model_final_inf_only.pt": "xrf_cell/model_final_inf_only.pt",
+        "microsoft/table-transformer-structure-recognition/pytorch_model.bin": None,
+        "deepdoctection/tatr_tab_struct_v2/pytorch_model.bin": None,
+    },
+}
 def config_sanity_checks(cfg: AttrDict) -> None:
@@ -115,7 +122,7 @@ def config_sanity_checks(cfg: AttrDict) -> None:
 def build_detector(
     cfg: AttrDict, mode: str
-) -> Union["D2FrcnnDetector", "TPFrcnnDetector", "HFDetrDerivedDetector", "D2FrcnnTracingDetector"]:
+) -> Union[D2FrcnnDetector, TPFrcnnDetector, HFDetrDerivedDetector, D2FrcnnTracingDetector]:
     """Building a D2-Detector, a TP-Detector as Detr-Detector or a D2-Torch Tracing Detector according to
     the config
@@ -133,8 +140,8 @@ def build_detector(
     config_path = ModelCatalog.get_full_path_configs(weights)
     weights_path = ModelDownloadManager.maybe_download_weights_and_configs(weights)
     profile = ModelCatalog.get_profile(weights)
-    categories = profile.categories
-    assert categories is not None
+    categories = profile.categories if profile.categories is not None else {}
     if profile.model_wrapper in ("TPFrcnnDetector",):
         return TPFrcnnDetector(config_path, weights_path, categories, filter_categories=filter_categories)
     if profile.model_wrapper in ("D2FrcnnDetector",):
@@ -202,11 +209,13 @@ def build_sub_image_service(detector: ObjectDetector, cfg: AttrDict, mode: str)
     padder = None
     if mode == "ITEM":
         if detector.__class__.__name__ in ("HFDetrDerivedDetector",):
-            exclude_category_ids.extend(["1", "3", "4", "5", "6"])
+            exclude_category_ids.extend([1, 3, 4, 5, 6])
             padder = build_padder(cfg, mode)
-    detect_result_generator = DetectResultGenerator(detector.categories, exclude_category_ids=exclude_category_ids)
+    detect_result_generator = DetectResultGenerator(
+        categories=detector.categories.categories, exclude_category_ids=exclude_category_ids
+    )
     return SubImageLayoutService(
-        detector, [LayoutType.table, LayoutType.table_rotated], None, detect_result_generator, padder
+        detector, [LayoutType.TABLE, LayoutType.TABLE_ROTATED], None, detect_result_generator, padder
     )
@@ -233,9 +242,9 @@ def build_ocr(cfg: AttrDict) -> Union[TesseractOcrDetector, DoctrTextRecognizer,
         )
     if cfg.OCR.USE_TEXTRACT:
         credentials_kwargs = {
-            "aws_access_key_id": environ.get("ACCESS_KEY"),
-            "aws_secret_access_key": environ.get("SECRET_KEY"),
-            "config": Config(region_name=environ.get("REGION")),
+            "aws_access_key_id": environ.get("ACCESS_KEY", None),
+            "aws_secret_access_key": environ.get("SECRET_KEY", None),
+            "config": Config(region_name=environ.get("REGION", None)),
         }
         return TextractOcrDetector(**credentials_kwargs)
     raise ValueError("You have set USE_OCR=True but any of USE_TESSERACT, USE_DOCTR, USE_TEXTRACT is set to False")
@@ -260,7 +269,7 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
     :param cfg: A configuration
     :return: Analyzer pipeline
     """
-    pipe_component_list: List[PipelineComponent] = []
+    pipe_component_list: list[PipelineComponent] = []
     if cfg.USE_LAYOUT:
         d_layout = build_detector(cfg, "LAYOUT")
@@ -300,22 +309,22 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
                 cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_ROWS,
                 cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_COLS,
                 cfg.SEGMENTATION.CELL_CATEGORY_ID,
-                LayoutType.table,
+                LayoutType.TABLE,
                 [
-                    CellType.spanning,
-                    CellType.row_header,
-                    CellType.column_header,
-                    CellType.projected_row_header,
-                    LayoutType.cell,
+                    CellType.SPANNING,
+                    CellType.ROW_HEADER,
+                    CellType.COLUMN_HEADER,
+                    CellType.PROJECTED_ROW_HEADER,
+                    LayoutType.CELL,
                 ],
                 [
-                    CellType.spanning,
-                    CellType.row_header,
-                    CellType.column_header,
-                    CellType.projected_row_header,
+                    CellType.SPANNING,
+                    CellType.ROW_HEADER,
+                    CellType.COLUMN_HEADER,
+                    CellType.PROJECTED_ROW_HEADER,
                 ],
-                [LayoutType.row, LayoutType.column],
-                [CellType.row_number, CellType.column_number],
+                [LayoutType.ROW, LayoutType.COLUMN],
+                [CellType.ROW_NUMBER, CellType.COLUMN_NUMBER],
                 stretch_rule=cfg.SEGMENTATION.STRETCH_RULE,
             )
             pipe_component_list.append(pubtables)
@@ -327,23 +336,23 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
                 cfg.SEGMENTATION.FULL_TABLE_TILING,
                 cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_ROWS,
                 cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_COLS,
-                LayoutType.table,
-                [CellType.header, CellType.body, LayoutType.cell],
-                [LayoutType.row, LayoutType.column],
-                [CellType.row_number, CellType.column_number],
+                LayoutType.TABLE,
+                [CellType.HEADER, CellType.BODY, LayoutType.CELL],
+                [LayoutType.ROW, LayoutType.COLUMN],
+                [CellType.ROW_NUMBER, CellType.COLUMN_NUMBER],
                 cfg.SEGMENTATION.STRETCH_RULE,
             )
             pipe_component_list.append(table_segmentation)
             if cfg.USE_TABLE_REFINEMENT:
                 table_segmentation_refinement = TableSegmentationRefinementService(
-                    [LayoutType.table, LayoutType.table_rotated],
+                    [LayoutType.TABLE, LayoutType.TABLE_ROTATED],
                     [
-                        LayoutType.cell,
-                        CellType.column_header,
-                        CellType.projected_row_header,
-                        CellType.spanning,
-                        CellType.row_header,
+                        LayoutType.CELL,
+                        CellType.COLUMN_HEADER,
+                        CellType.PROJECTED_ROW_HEADER,
+                        CellType.SPANNING,
+                        CellType.ROW_HEADER,
                     ],
                 )
                 pipe_component_list.append(table_segmentation_refinement)
@@ -363,24 +372,28 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
         ocr = build_ocr(cfg)
         skip_if_text_extracted = cfg.USE_PDF_MINER
-        extract_from_roi = LayoutType.word if cfg.OCR.USE_DOCTR else None
+        extract_from_roi = LayoutType.WORD if cfg.OCR.USE_DOCTR else None
         text = TextExtractionService(
             ocr, skip_if_text_extracted=skip_if_text_extracted, extract_from_roi=extract_from_roi
         )
         pipe_component_list.append(text)
     if cfg.USE_PDF_MINER or cfg.USE_OCR:
-        match = MatchingService(
-            parent_categories=cfg.WORD_MATCHING.PARENTAL_CATEGORIES,
-            child_categories=LayoutType.word,
+        matcher = IntersectionMatcher(
             matching_rule=cfg.WORD_MATCHING.RULE,
             threshold=cfg.WORD_MATCHING.THRESHOLD,
             max_parent_only=cfg.WORD_MATCHING.MAX_PARENT_ONLY,
         )
+        match = MatchingService(
+            parent_categories=cfg.WORD_MATCHING.PARENTAL_CATEGORIES,
+            child_categories=LayoutType.WORD,
+            matcher=matcher,
+            relationship_key=Relationships.CHILD,
+        )
         pipe_component_list.append(match)
         order = TextOrderService(
-            text_container=LayoutType.word,
+            text_container=LayoutType.WORD,
             text_block_categories=cfg.TEXT_ORDERING.TEXT_BLOCK_CATEGORIES,
             floating_text_block_categories=cfg.TEXT_ORDERING.FLOATING_TEXT_BLOCK_CATEGORIES,
             include_residual_text_container=cfg.TEXT_ORDERING.INCLUDE_RESIDUAL_TEXT_CONTAINER,
@@ -392,7 +405,7 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
         pipe_component_list.append(order)
     page_parsing_service = PageParsingService(
-        text_container=LayoutType.word,
+        text_container=LayoutType.WORD,
         floating_text_block_categories=cfg.TEXT_ORDERING.FLOATING_TEXT_BLOCK_CATEGORIES,
         include_residual_text_container=cfg.TEXT_ORDERING.INCLUDE_RESIDUAL_TEXT_CONTAINER,
     )
@@ -403,8 +416,8 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
 def get_dd_analyzer(
     reset_config_file: bool = True,
-    config_overwrite: Optional[List[str]] = None,
-    path_config_file: Optional[Pathlike] = None,
+    config_overwrite: Optional[list[str]] = None,
+    path_config_file: Optional[PathLikeOrStr] = None,
 ) -> DoctectionPipe:
     """
     Factory function for creating the built-in **deep**doctection analyzer.
@@ -431,7 +444,7 @@ def get_dd_analyzer(
     :return: A DoctectionPipe instance with given configs
     """
     config_overwrite = [] if config_overwrite is None else config_overwrite
-    lib = "TF" if os.environ.get("DD_USE_TF") else "PT"
+    lib = "TF" if os.environ.get("DD_USE_TF", "0") in ENV_VARS_TRUE else "PT"
     if lib == "TF":
         device = get_tf_device()
     elif lib == "PT":
@@ -439,9 +452,9 @@ def get_dd_analyzer(
     else:
         raise DependencyError("At least one of the env variables DD_USE_TF or DD_USE_TORCH must be set.")
     dd_one_config_path = maybe_copy_config_to_cache(
-        get_package_path(), get_configs_dir_path(), _DD_ONE, reset_config_file
+        get_package_path(), get_configs_dir_path() / "dd", _DD_ONE, reset_config_file
     )
-    maybe_copy_config_to_cache(get_package_path(), get_configs_dir_path(), _TESSERACT)
+    maybe_copy_config_to_cache(get_package_path(), get_configs_dir_path() / "dd", _TESSERACT)
     # Set up of the configuration and logging
     cfg = set_config_by_yaml(dd_one_config_path if not path_config_file else path_config_file)

deepdoctection/dataflow/common.py CHANGED Viewed

@@ -12,7 +12,7 @@ Some DataFlow classes for transforming and processing datapoints. Many classes h
 """
 import itertools
 from copy import copy
-from typing import Any, Callable, Iterator, List, Union
+from typing import Any, Callable, Iterator, Union
 import tqdm
@@ -164,6 +164,10 @@ class RepeatedData(ProxyDataFlow):
                 Set to -1 to repeat ``ds`` infinite times.
         """
         self.num = num
+        if self.num != -1:
+            self.dfs = itertools.tee(df, self.num)
+        else:
+            self.dfs = ()
         super().__init__(df)
     def __len__(self) -> int:
@@ -180,8 +184,8 @@ class RepeatedData(ProxyDataFlow):
             while True:
                 yield from self.df
         else:
-            for _ in range(self.num):
-                yield from self.df
+            for df in self.dfs:
+                yield from df
 class ConcatData(DataFlow):
@@ -197,7 +201,7 @@ class ConcatData(DataFlow):
            df = ConcatData([df_1,df_2])
     """
-    def __init__(self, df_lists: List[DataFlow]) -> None:
+    def __init__(self, df_lists: list[DataFlow]) -> None:
         """
         :param df_lists: a list of DataFlow.
         """
@@ -233,7 +237,7 @@ class JoinData(DataFlow):
     `JoinData` will stop once the first Dataflow throws a StopIteration
     """
-    def __init__(self, df_lists: List[DataFlow]) -> None:
+    def __init__(self, df_lists: list[DataFlow]) -> None:
         """
         :param df_lists: a list of DataFlow. When these dataflows have different sizes, JoinData will stop when any
                         of them is exhausted.

deepdoctection/dataflow/custom.py CHANGED Viewed

@@ -21,7 +21,7 @@ from
 <https://github.com/tensorpack/dataflow/blob/master/dataflow/dataflow/common.py>
 """
-from typing import Any, Callable, Iterable, Iterator, List, Optional
+from typing import Any, Callable, Iterable, Iterator, Optional
 import numpy as np
@@ -54,7 +54,7 @@ class CacheData(ProxyDataFlow):
         :param shuffle: whether to shuffle the cache before yielding from it.
         """
         self.shuffle = shuffle
-        self.buffer: List[Any] = []
+        self.buffer: list[Any] = []
         self._guard: Optional[DataFlowReentrantGuard] = None
         self.rng = get_rng(self)
         super().__init__(df)
@@ -78,7 +78,7 @@ class CacheData(ProxyDataFlow):
                     yield dp
                     self.buffer.append(dp)
-    def get_cache(self) -> List[Any]:
+    def get_cache(self) -> list[Any]:
         """
         get the cache of the whole dataflow as a list
@@ -115,10 +115,10 @@ class CustomDataFromList(DataFromList):
     def __init__(
         self,
-        lst: List[Any],
+        lst: list[Any],
         shuffle: bool = False,
         max_datapoints: Optional[int] = None,
-        rebalance_func: Optional[Callable[[List[Any]], List[Any]]] = None,
+        rebalance_func: Optional[Callable[[list[Any]], list[Any]]] = None,
     ):
         """
         :param lst: the input list. Each element represents a datapoint.

deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.32py3-none-any.whl → 0.34py3-none-any.whl