PyPI - deepdoctection - Versions diffs - 0.30__py3-none-any.whl → 0.32__py3-none-any.whl - Mend

deepdoctection 0.30py3-none-any.whl → 0.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (120) hide show

deepdoctection/__init__.py +38 -29
deepdoctection/analyzer/dd.py +36 -29
deepdoctection/configs/conf_dd_one.yaml +34 -31
deepdoctection/dataflow/base.py +0 -19
deepdoctection/dataflow/custom.py +4 -3
deepdoctection/dataflow/custom_serialize.py +14 -5
deepdoctection/dataflow/parallel_map.py +12 -11
deepdoctection/dataflow/serialize.py +5 -4
deepdoctection/datapoint/annotation.py +35 -13
deepdoctection/datapoint/box.py +3 -5
deepdoctection/datapoint/convert.py +3 -1
deepdoctection/datapoint/image.py +79 -36
deepdoctection/datapoint/view.py +152 -49
deepdoctection/datasets/__init__.py +1 -4
deepdoctection/datasets/adapter.py +6 -3
deepdoctection/datasets/base.py +86 -11
deepdoctection/datasets/dataflow_builder.py +1 -1
deepdoctection/datasets/info.py +4 -4
deepdoctection/datasets/instances/doclaynet.py +3 -2
deepdoctection/datasets/instances/fintabnet.py +2 -1
deepdoctection/datasets/instances/funsd.py +2 -1
deepdoctection/datasets/instances/iiitar13k.py +5 -2
deepdoctection/datasets/instances/layouttest.py +4 -8
deepdoctection/datasets/instances/publaynet.py +2 -2
deepdoctection/datasets/instances/pubtables1m.py +6 -3
deepdoctection/datasets/instances/pubtabnet.py +2 -1
deepdoctection/datasets/instances/rvlcdip.py +2 -1
deepdoctection/datasets/instances/xfund.py +2 -1
deepdoctection/eval/__init__.py +1 -4
deepdoctection/eval/accmetric.py +1 -1
deepdoctection/eval/base.py +5 -4
deepdoctection/eval/cocometric.py +2 -1
deepdoctection/eval/eval.py +19 -15
deepdoctection/eval/tedsmetric.py +14 -11
deepdoctection/eval/tp_eval_callback.py +14 -7
deepdoctection/extern/__init__.py +2 -7
deepdoctection/extern/base.py +39 -13
deepdoctection/extern/d2detect.py +182 -90
deepdoctection/extern/deskew.py +36 -9
deepdoctection/extern/doctrocr.py +265 -83
deepdoctection/extern/fastlang.py +49 -9
deepdoctection/extern/hfdetr.py +106 -55
deepdoctection/extern/hflayoutlm.py +441 -122
deepdoctection/extern/hflm.py +225 -0
deepdoctection/extern/model.py +56 -47
deepdoctection/extern/pdftext.py +10 -5
deepdoctection/extern/pt/__init__.py +1 -3
deepdoctection/extern/pt/nms.py +6 -2
deepdoctection/extern/pt/ptutils.py +27 -18
deepdoctection/extern/tessocr.py +134 -22
deepdoctection/extern/texocr.py +6 -2
deepdoctection/extern/tp/tfutils.py +43 -9
deepdoctection/extern/tp/tpcompat.py +14 -11
deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/config/config.py +9 -6
deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +17 -7
deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +9 -4
deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +16 -11
deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +17 -10
deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +14 -8
deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
deepdoctection/extern/tp/tpfrcnn/preproc.py +8 -9
deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
deepdoctection/extern/tpdetect.py +54 -30
deepdoctection/mapper/__init__.py +3 -8
deepdoctection/mapper/d2struct.py +9 -7
deepdoctection/mapper/hfstruct.py +7 -2
deepdoctection/mapper/laylmstruct.py +164 -21
deepdoctection/mapper/maputils.py +16 -3
deepdoctection/mapper/misc.py +6 -3
deepdoctection/mapper/prodigystruct.py +1 -1
deepdoctection/mapper/pubstruct.py +10 -10
deepdoctection/mapper/tpstruct.py +3 -3
deepdoctection/pipe/__init__.py +1 -1
deepdoctection/pipe/anngen.py +35 -8
deepdoctection/pipe/base.py +53 -19
deepdoctection/pipe/common.py +23 -13
deepdoctection/pipe/concurrency.py +2 -1
deepdoctection/pipe/doctectionpipe.py +2 -2
deepdoctection/pipe/language.py +3 -2
deepdoctection/pipe/layout.py +6 -3
deepdoctection/pipe/lm.py +34 -66
deepdoctection/pipe/order.py +142 -35
deepdoctection/pipe/refine.py +26 -24
deepdoctection/pipe/segment.py +21 -16
deepdoctection/pipe/{cell.py → sub_layout.py} +30 -9
deepdoctection/pipe/text.py +14 -8
deepdoctection/pipe/transform.py +16 -9
deepdoctection/train/__init__.py +6 -12
deepdoctection/train/d2_frcnn_train.py +36 -28
deepdoctection/train/hf_detr_train.py +26 -17
deepdoctection/train/hf_layoutlm_train.py +133 -111
deepdoctection/train/tp_frcnn_train.py +21 -19
deepdoctection/utils/__init__.py +3 -0
deepdoctection/utils/concurrency.py +1 -1
deepdoctection/utils/context.py +2 -2
deepdoctection/utils/env_info.py +41 -84
deepdoctection/utils/error.py +84 -0
deepdoctection/utils/file_utils.py +4 -15
deepdoctection/utils/fs.py +7 -7
deepdoctection/utils/logger.py +1 -0
deepdoctection/utils/mocks.py +93 -0
deepdoctection/utils/pdf_utils.py +5 -4
deepdoctection/utils/settings.py +6 -1
deepdoctection/utils/transform.py +1 -1
deepdoctection/utils/utils.py +0 -6
deepdoctection/utils/viz.py +48 -5
{deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/METADATA +57 -73
deepdoctection-0.32.dist-info/RECORD +146 -0
{deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/WHEEL +1 -1
deepdoctection-0.30.dist-info/RECORD +0 -143
{deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/LICENSE +0 -0
{deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/top_level.txt +0 -0

deepdoctection/datasets/instances/iiitar13k.py CHANGED Viewed

@@ -35,10 +35,13 @@ Module for IIITar13K dataset. Install the dataset following the folder structure
     │ ├── ...
 """
+from __future__ import annotations
 import os
 from typing import Mapping, Union
+from lazy_imports import try_import
 from ...dataflow import DataFlow, MapData, SerializerFiles
 from ...datasets.info import DatasetInfo
 from ...mapper.maputils import curry
@@ -53,7 +56,7 @@ from ..dataflow_builder import DataFlowBaseBuilder
 from ..info import DatasetCategories
 from ..registry import dataset_registry
-if lxml_available():
+with try_import() as import_guard:
     from lxml import etree
 _NAME = "iiitar13k"
@@ -99,7 +102,7 @@ class IIITar13K(_BuiltInDataset):
     def _categories(self) -> DatasetCategories:
         return DatasetCategories(init_categories=_INIT_CATEGORIES)
-    def _builder(self) -> "IIITar13KBuilder":
+    def _builder(self) -> IIITar13KBuilder:
         return IIITar13KBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)

deepdoctection/datasets/instances/layouttest.py CHANGED Viewed

@@ -24,6 +24,7 @@ Module for Testlayout dataset. Install the dataset following the folder structur
     ├── test
     │ ├── xrf_layout_test.jsonl
 """
+from __future__ import annotations
 from typing import Mapping, Union
@@ -49,12 +50,7 @@ _LICENSE = (
     " – Permissive – Version 1.0 License. Dr. Janis Meyer does not own the copyright of the images. \n"
     " Use of the images must abide by the PMC Open Access Subset Terms of Use."
 )
-_URL = [
-    "https://www.googleapis.com/drive/v3/files/1ZD4Ef4gd2FIfp7vR8jbnrZeXD3gSWNqE?alt"
-    "=media&key=AIzaSyDuoPG6naK-kRJikScR7cP_1sQBF1r3fWU",
-    "https://www.googleapis.com/drive/v3/files/18HD62LFLa1iAmqffo4SyjuEQ32MzyNQ0?alt"
-    "=media&key=AIzaSyDuoPG6naK-kRJikScR7cP_1sQBF1r3fWU",
-]
 _SPLITS: Mapping[str, str] = {"test": "test", "predict": "predict"}
 _TYPE = DatasetType.object_detection
 _LOCATION = "testlayout"
@@ -77,12 +73,12 @@ class LayoutTest(_BuiltInDataset):
     @classmethod
     def _info(cls) -> DatasetInfo:
-        return DatasetInfo(name=_NAME, description=_DESCRIPTION, license=_LICENSE, url=_URL, splits=_SPLITS, type=_TYPE)
+        return DatasetInfo(name=_NAME, description=_DESCRIPTION, license=_LICENSE, splits=_SPLITS, type=_TYPE)
     def _categories(self) -> DatasetCategories:
         return DatasetCategories(init_categories=_INIT_CATEGORIES)
-    def _builder(self) -> "LayoutTestBuilder":
+    def _builder(self) -> LayoutTestBuilder:
         return LayoutTestBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)

deepdoctection/datasets/instances/publaynet.py CHANGED Viewed

@@ -28,7 +28,7 @@ Module for Publaynet dataset. Place the dataset as follows
     ├── train.json
     ├── val.json
 """
+from __future__ import annotations
 from typing import Mapping, Union
@@ -84,7 +84,7 @@ class Publaynet(_BuiltInDataset):
     def _categories(self) -> DatasetCategories:
         return DatasetCategories(init_categories=_INIT_CATEGORIES)
-    def _builder(self) -> "PublaynetBuilder":
+    def _builder(self) -> PublaynetBuilder:
         return PublaynetBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)

deepdoctection/datasets/instances/pubtables1m.py CHANGED Viewed

@@ -37,10 +37,13 @@ Module for PubTables1M-Detection-PASCAL-VOC dataset. Install the dataset followi
     ├── PubTables-1M-Structure_Annotations_Test
     ├── PubTables-1M-Structure_Images_Test
 """
+from __future__ import annotations
 import os
 from typing import Mapping, Union
+from lazy_imports import try_import
 from ...dataflow import DataFlow, MapData, SerializerFiles
 from ...datasets.info import DatasetInfo
 from ...mapper.cats import filter_cat
@@ -56,7 +59,7 @@ from ..dataflow_builder import DataFlowBaseBuilder
 from ..info import DatasetCategories
 from ..registry import dataset_registry
-if lxml_available():
+with try_import() as import_guard:
     from lxml import etree
 _NAME = "pubtables1m_det"
@@ -102,7 +105,7 @@ class Pubtables1MDet(_BuiltInDataset):
     def _categories(self) -> DatasetCategories:
         return DatasetCategories(init_categories=_INIT_CATEGORIES_DET)
-    def _builder(self) -> "Pubtables1MBuilder":
+    def _builder(self) -> Pubtables1MBuilder:
         return Pubtables1MBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)
@@ -225,7 +228,7 @@ class Pubtables1MStruct(_BuiltInDataset):
     def _categories(self) -> DatasetCategories:
         return DatasetCategories(init_categories=_INIT_CATEGORIES_STRUCT)
-    def _builder(self) -> "Pubtables1MBuilderStruct":
+    def _builder(self) -> Pubtables1MBuilderStruct:
         return Pubtables1MBuilderStruct(location=_LOCATION, annotation_files=_ANNOTATION_FILES_STRUCT)

deepdoctection/datasets/instances/pubtabnet.py CHANGED Viewed

@@ -27,6 +27,7 @@ Module for Pubtabnet dataset. Place the dataset as follows
     │ ├── PMC3.png
     ├── PubTabNet_2.0.0.jsonl
 """
+from __future__ import annotations
 from typing import Dict, List, Mapping, Union
@@ -119,7 +120,7 @@ class Pubtabnet(_BuiltInDataset):
     def _categories(self) -> DatasetCategories:
         return DatasetCategories(init_categories=_INIT_CATEGORIES, init_sub_categories=_SUB_CATEGORIES)
-    def _builder(self) -> "PubtabnetBuilder":
+    def _builder(self) -> PubtabnetBuilder:
         return PubtabnetBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)

deepdoctection/datasets/instances/rvlcdip.py CHANGED Viewed

@@ -29,6 +29,7 @@ Module for Publaynet dataset. Place the dataset as follows
     │ ├── train.txt
     │ ├── val.txt
 """
+from __future__ import annotations
 import os
 from typing import Mapping, Union
@@ -102,7 +103,7 @@ class Rvlcdip(_BuiltInDataset):
     def _categories(self) -> DatasetCategories:
         return DatasetCategories(init_categories=_INIT_CATEGORIES)
-    def _builder(self) -> "RvlcdipBuilder":
+    def _builder(self) -> RvlcdipBuilder:
         return RvlcdipBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)

deepdoctection/datasets/instances/xfund.py CHANGED Viewed

@@ -27,6 +27,7 @@ Module for XFUND dataset. Install the dataset following the folder structure
     │ ├── de_val_0.jpg
     ├── es_train
 """
+from __future__ import annotations
 import json
 import os
@@ -108,7 +109,7 @@ class Xfund(_BuiltInDataset):
     def _categories(self) -> DatasetCategories:
         return DatasetCategories(init_categories=_INIT_CATEGORIES, init_sub_categories=_SUB_CATEGORIES)
-    def _builder(self) -> "XfundBuilder":
+    def _builder(self) -> XfundBuilder:
         return XfundBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)

deepdoctection/eval/__init__.py CHANGED Viewed

@@ -20,12 +20,9 @@ Init file for eval package. Contains metrics (customized for special tasks), eva
 for training.
 """
-from ..utils.file_utils import apted_available
 from .accmetric import *
 from .base import *
 from .cocometric import *
 from .eval import *
 from .registry import *
-if apted_available():
-    from .tedsmetric import *
+from .tedsmetric import *

deepdoctection/eval/accmetric.py CHANGED Viewed

@@ -87,7 +87,7 @@ def accuracy(label_gt: Sequence[int], label_predictions: Sequence[int], masks: O
     np_label_gt, np_label_pr = np.asarray(label_gt), np.asarray(label_predictions)
     if len(np_label_gt) != len(np_label_pr):
         raise ValueError(
-            f"length of label_gt ({len(np_label_gt)}) and label_predictions" f" ({len(np_label_pr)}) must be equal"
+            f"length label_gt: {len(np_label_gt)}, length label_predictions: ({len(np_label_pr)}) but must be equal"
         )
     if masks is not None:
         np_label_gt, np_label_pr = _mask_some_gt_and_pr_labels(np_label_gt, np_label_pr, masks)

deepdoctection/eval/base.py CHANGED Viewed

@@ -25,6 +25,7 @@ from typing import Any, Callable, List, Optional, Tuple
 from ..dataflow import DataFlow
 from ..datasets.info import DatasetCategories
 from ..utils.detection_types import JsonDict
+from ..utils.error import DependencyError
 from ..utils.file_utils import Requirement
@@ -52,7 +53,7 @@ class MetricBase(ABC):
         requirements = cls.get_requirements()
         name = cls.__name__ if hasattr(cls, "__name__") else cls.__class__.__name__
         if not all(requirement[1] for requirement in requirements):
-            raise ImportError(
+            raise DependencyError(
                 "\n".join(
                     [f"{name} has the following dependencies:"]
                     + [requirement[2] for requirement in requirements if not requirement[1]]
@@ -66,7 +67,7 @@ class MetricBase(ABC):
         """
         Get a list of requirements for running the detector
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @classmethod
     @abstractmethod
@@ -80,7 +81,7 @@ class MetricBase(ABC):
         :param dataflow_predictions: Dataflow with predictions.
         :param categories:  DatasetCategories with respect to the underlying dataset.
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @classmethod
     @abstractmethod
@@ -95,7 +96,7 @@ class MetricBase(ABC):
         :param dataflow_predictions: Dataflow with predictions.
         :param categories: DatasetCategories with respect to the underlying dataset.
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @classmethod
     def result_list_to_dict(cls, results: List[JsonDict]) -> JsonDict:

deepdoctection/eval/cocometric.py CHANGED Viewed

@@ -23,6 +23,7 @@ from copy import copy
 from typing import Dict, List, Optional, Tuple, Union
 import numpy as np
+from lazy_imports import try_import
 from ..dataflow import DataFlow
 from ..datasets.info import DatasetCategories
@@ -33,7 +34,7 @@ from ..utils.file_utils import Requirement, cocotools_available, get_cocotools_r
 from .base import MetricBase
 from .registry import metric_registry
-if cocotools_available():
+with try_import() as cc_import_guard:
     from pycocotools.coco import COCO
     from pycocotools.cocoeval import COCOeval

deepdoctection/eval/eval.py CHANGED Viewed

@@ -19,36 +19,35 @@
 """
 Module for `Evaluator`
 """
-__all__ = ["Evaluator"]
+from __future__ import annotations
 from copy import deepcopy
-from typing import Any, Dict, List, Literal, Mapping, Optional, Type, Union, overload
+from typing import Any, Dict, Generator, List, Literal, Mapping, Optional, Type, Union, overload
 import numpy as np
+from lazy_imports import try_import
 from ..dataflow import CacheData, DataFlow, DataFromList, MapData
 from ..datapoint.image import Image
 from ..datasets.base import DatasetBase
 from ..mapper.cats import filter_cat, remove_cats
+from ..mapper.d2struct import to_wandb_image
 from ..mapper.misc import maybe_load_image, maybe_remove_image, maybe_remove_image_from_category
 from ..pipe.base import LanguageModelPipelineComponent, PredictorPipelineComponent
 from ..pipe.common import PageParsingService
 from ..pipe.concurrency import MultiThreadPipelineComponent
 from ..pipe.doctectionpipe import DoctectionPipe
 from ..utils.detection_types import ImageType
-from ..utils.file_utils import detectron2_available, wandb_available
 from ..utils.logger import LoggingRecord, logger
 from ..utils.settings import DatasetType, LayoutType, TypeOrStr, get_type
 from ..utils.viz import interactive_imshow
 from .base import MetricBase
-if wandb_available():
+with try_import() as wb_import_guard:
     import wandb  # pylint:disable=W0611
     from wandb import Artifact, Table
-if wandb_available() and detectron2_available():
-    from ..mapper.d2struct import to_wandb_image
+__all__ = ["Evaluator"]
 class Evaluator:
@@ -94,7 +93,7 @@ class Evaluator:
         component_or_pipeline: Union[PredictorPipelineComponent, LanguageModelPipelineComponent, DoctectionPipe],
         metric: Union[Type[MetricBase], MetricBase],
         num_threads: int = 2,
-        run: Optional["wandb.sdk.wandb_run.Run"] = None,
+        run: Optional[wandb.sdk.wandb_run.Run] = None,
     ) -> None:
         """
         Evaluating a pipeline component on a dataset with a given metric.
@@ -171,7 +170,7 @@ class Evaluator:
                         "metric has no attribute sub_cats and cannot be used for token classification datasets"
                     )
             else:
-                raise NotImplementedError
+                raise NotImplementedError()
         else:
             self.wandb_table_agent = None
@@ -271,11 +270,11 @@ class Evaluator:
             sub_cats_to_remove = meta_anns["sub_categories"]
             df_pr = MapData(df_pr, remove_cats(sub_categories=sub_cats_to_remove))
         else:
-            raise NotImplementedError
+            raise NotImplementedError()
         return df_pr
-    def compare(self, interactive: bool = False, **kwargs: Union[str, int]) -> Optional[ImageType]:
+    def compare(self, interactive: bool = False, **kwargs: Union[str, int]) -> Generator[ImageType, None, None]:
         """
         Visualize ground truth and prediction datapoint. Given a dataflow config it will run predictions per sample
         and concat the prediction image (with predicted bounding boxes) with ground truth image.
@@ -292,6 +291,8 @@ class Evaluator:
         show_layouts = kwargs.pop("show_layouts", True)
         show_table_structure = kwargs.pop("show_table_structure", True)
         show_words = kwargs.pop("show_words", False)
+        show_token_class = kwargs.pop("show_token_class", True)
+        ignore_default_token_class = kwargs.pop("ignore_default_token_class", False)
         df_gt = self.dataset.dataflow.build(**kwargs)
         df_pr = self.dataset.dataflow.build(**kwargs)
@@ -321,18 +322,21 @@ class Evaluator:
                 show_layouts=show_layouts,
                 show_table_structure=show_table_structure,
                 show_words=show_words,
+                show_token_class=show_token_class,
+                ignore_default_token_class=ignore_default_token_class,
             ), dp_pred.viz(
                 show_tables=show_tables,
                 show_layouts=show_layouts,
                 show_table_structure=show_table_structure,
                 show_words=show_words,
+                show_token_class=show_token_class,
+                ignore_default_token_class=ignore_default_token_class,
             )
             img_concat = np.concatenate((img_gt, img_pred), axis=1)
             if interactive:
                 interactive_imshow(img_concat)
             else:
-                return img_concat
-        return None
+                yield img_concat
 class WandbTableAgent:
@@ -350,7 +354,7 @@ class WandbTableAgent:
     def __init__(
         self,
-        wandb_run: "wandb.sdk.wandb_run.Run",
+        wandb_run: wandb.sdk.wandb_run.Run,
         dataset_name: str,
         num_samples: int,
         categories: Mapping[str, TypeOrStr],
@@ -409,7 +413,7 @@ class WandbTableAgent:
         self._table_rows = []
         self._counter = 0
-    def _build_table(self) -> "Table":
+    def _build_table(self) -> Table:
         """
         Builds wandb.Table object for logging evaluation

deepdoctection/eval/tedsmetric.py CHANGED Viewed

@@ -20,28 +20,31 @@ import statistics
 from collections import defaultdict, deque
 from typing import Any, List, Optional, Tuple
+from lazy_imports import try_import
 from ..dataflow import DataFlow, DataFromList, MapData, MultiThreadMapData
 from ..datapoint.view import Page
 from ..datasets.base import DatasetCategories
 from ..utils.detection_types import JsonDict
-from ..utils.file_utils import (
-    Requirement,
-    apted_available,
-    distance_available,
-    get_apted_requirement,
-    get_distance_requirement,
-    get_lxml_requirement,
-    lxml_available,
-)
+from ..utils.file_utils import Requirement, get_apted_requirement, get_distance_requirement, get_lxml_requirement
 from ..utils.logger import LoggingRecord, logger
 from ..utils.settings import LayoutType
 from .base import MetricBase
 from .registry import metric_registry
-if distance_available() and lxml_available() and apted_available():
-    import distance  # type: ignore
+with try_import() as ap_import_guard:
     from apted import APTED, Config  # type: ignore
     from apted.helpers import Tree  # type: ignore
+if not ap_import_guard.is_successful():
+    from ..utils.mocks import Config, Tree
+with try_import() as ds_import_guard:
+    import distance  # type: ignore
+with try_import() as lx_import_guard:
     from lxml import etree

deepdoctection/eval/tp_eval_callback.py CHANGED Viewed

@@ -19,13 +19,16 @@
 Module for EvalCallback in Tensorpack
 """
+from __future__ import annotations
 from itertools import count
 from typing import Mapping, Optional, Sequence, Type, Union
+from lazy_imports import try_import
 from ..datasets import DatasetBase
 from ..extern.tpdetect import TPFrcnnDetector
 from ..pipe.base import PredictorPipelineComponent
-from ..utils.file_utils import tensorpack_available
 from ..utils.logger import LoggingRecord, logger
 from ..utils.metacfg import AttrDict
 from ..utils.settings import ObjectTypes
@@ -33,12 +36,15 @@ from .base import MetricBase
 from .eval import Evaluator
 # pylint: disable=import-error
-if tensorpack_available():
+with try_import() as import_guard:
     from tensorpack.callbacks import Callback
     from tensorpack.predict import OnlinePredictor
     from tensorpack.utils.gpu import get_num_gpu
 # pylint: enable=import-error
+if not import_guard.is_successful():
+    from ..utils.mocks import Callback
 # The following class is modified from
 # https://github.com/tensorpack/tensorpack/blob/master/examples/FasterRCNN/eval.py
@@ -53,7 +59,7 @@ class EvalCallback(Callback):  # pylint: disable=R0903
     _chief_only = False
-    def __init__(
+    def __init__(  # pylint: disable=W0231
         self,
         dataset: DatasetBase,
         category_names: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]],
@@ -83,10 +89,11 @@ class EvalCallback(Callback):  # pylint: disable=R0903
         self.num_gpu = get_num_gpu()
         self.category_names = category_names
         self.sub_categories = sub_categories
-        assert isinstance(pipeline_component.predictor, TPFrcnnDetector), (
-            f"pipeline_component.predictor must be of "
-            f"type TPFrcnnDetector but is type {type(pipeline_component.predictor)}"
-        )
+        if not isinstance(pipeline_component.predictor, TPFrcnnDetector):
+            raise TypeError(
+                f"pipeline_component.predictor must be of type TPFrcnnDetector but is "
+                f"type {type(pipeline_component.predictor)}"
+            )
         self.cfg = pipeline_component.predictor.model.cfg
         if _use_replicated(self.cfg):
             self.evaluator = Evaluator(dataset, pipeline_component, metric, num_threads=self.num_gpu * 2)

deepdoctection/extern/__init__.py CHANGED Viewed

@@ -19,8 +19,8 @@
 Wrappers for models of external libraries as well as implementation of the Cascade-RCNN model of Tensorpack.
 """
-from ..utils.file_utils import detectron2_available, tensorpack_available
 from .base import *
+from .d2detect import *
 from .deskew import *
 from .doctrocr import *
 from .fastlang import *
@@ -30,9 +30,4 @@ from .model import *
 from .pdftext import *
 from .tessocr import *
 from .texocr import *  # type: ignore
-if tensorpack_available():
-    from .tpdetect import *
-if detectron2_available():
-    from .d2detect import *
+from .tpdetect import *

deepdoctection/extern/base.py CHANGED Viewed

@@ -25,6 +25,7 @@ from dataclasses import dataclass
 from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
 from ..utils.detection_types import ImageType, JsonDict, Requirement
+from ..utils.identifier import get_uuid_from_str
 from ..utils.settings import DefaultType, ObjectTypes, TypeOrStr, get_type
@@ -34,6 +35,7 @@ class PredictorBase(ABC):
     """
     name: str
+    model_id: str
     def __new__(cls, *args, **kwargs):  # type: ignore # pylint: disable=W0613
         requirements = cls.get_requirements()
@@ -53,14 +55,22 @@ class PredictorBase(ABC):
         """
         Get a list of requirements for running the detector
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @abstractmethod
     def clone(self) -> "PredictorBase":
         """
         Clone an instance
         """
-        raise NotImplementedError
+        raise NotImplementedError()
+    def get_model_id(self) -> str:
+        """
+        Get the generating model
+        """
+        if self.name is not None:
+            return get_uuid_from_str(self.name)[:8]
+        raise ValueError("name must be set before calling get_model_id")
 @dataclass
@@ -102,6 +112,7 @@ class DetectionResult:
     line: Optional[str] = None
     uuid: Optional[str] = None
     relationships: Optional[Dict[str, Any]] = None
+    angle: Optional[float] = None
 class ObjectDetector(PredictorBase):
@@ -133,7 +144,7 @@ class ObjectDetector(PredictorBase):
         """
         Abstract method predict
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @property
     def accepts_batch(self) -> bool:
@@ -174,14 +185,14 @@ class PdfMiner(PredictorBase):
         """
         Abstract method predict
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @abstractmethod
     def get_width_height(self, pdf_bytes: bytes) -> Tuple[float, float]:
         """
         Abstract method get_width_height
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     def clone(self) -> PredictorBase:
         return self.__class__()
@@ -212,7 +223,7 @@ class TextRecognizer(PredictorBase):
         """
         Abstract method predict
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @property
     def accepts_batch(self) -> bool:
@@ -294,7 +305,7 @@ class LMTokenClassifier(PredictorBase):
         """
         Abstract method predict
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     def possible_tokens(self) -> List[ObjectTypes]:
         """
@@ -307,7 +318,7 @@ class LMTokenClassifier(PredictorBase):
         """
         Clone an instance
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @staticmethod
     def default_kwargs_for_input_mapping() -> JsonDict:
@@ -341,7 +352,7 @@ class LMSequenceClassifier(PredictorBase):
         """
         Abstract method predict
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     def possible_categories(self) -> List[ObjectTypes]:
         """
@@ -354,7 +365,7 @@ class LMSequenceClassifier(PredictorBase):
         """
         Clone an instance
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     @staticmethod
     def default_kwargs_for_input_mapping() -> JsonDict:
@@ -388,7 +399,7 @@ class LanguageDetector(PredictorBase):
         """
         Abstract method predict
         """
-        raise NotImplementedError
+        raise NotImplementedError()
     def possible_languages(self) -> List[ObjectTypes]:
         """
@@ -403,11 +414,26 @@ class ImageTransformer(PredictorBase):
     """
     @abstractmethod
-    def transform(self, np_img: ImageType) -> ImageType:
+    def transform(self, np_img: ImageType, specification: DetectionResult) -> ImageType:
         """
         Abstract method transform
         """
-        raise NotImplementedError
+        raise NotImplementedError()
+    @abstractmethod
+    def predict(self, np_img: ImageType) -> DetectionResult:
+        """
+        Abstract method predict
+        """
+        raise NotImplementedError()
     def clone(self) -> PredictorBase:
         return self.__class__()
+    @staticmethod
+    @abstractmethod
+    def possible_category() -> ObjectTypes:
+        """
+        Returns a (single) category the `ImageTransformer` can predict
+        """
+        raise NotImplementedError()

deepdoctection 0.30__py3-none-any.whl → 0.32__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.30py3-none-any.whl → 0.32py3-none-any.whl