PyPI - deepdoctection - Versions diffs - 0.42.1__py3-none-any.whl → 0.43__py3-none-any.whl - Mend

deepdoctection 0.42.1py3-none-any.whl → 0.43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show

deepdoctection/__init__.py +2 -1
deepdoctection/analyzer/__init__.py +2 -1
deepdoctection/analyzer/config.py +904 -0
deepdoctection/analyzer/dd.py +36 -62
deepdoctection/analyzer/factory.py +311 -141
deepdoctection/configs/conf_dd_one.yaml +100 -44
deepdoctection/configs/profiles.jsonl +32 -0
deepdoctection/dataflow/__init__.py +9 -6
deepdoctection/dataflow/base.py +33 -15
deepdoctection/dataflow/common.py +96 -75
deepdoctection/dataflow/custom.py +36 -29
deepdoctection/dataflow/custom_serialize.py +135 -91
deepdoctection/dataflow/parallel_map.py +33 -31
deepdoctection/dataflow/serialize.py +15 -10
deepdoctection/dataflow/stats.py +41 -28
deepdoctection/datapoint/__init__.py +4 -6
deepdoctection/datapoint/annotation.py +104 -66
deepdoctection/datapoint/box.py +190 -130
deepdoctection/datapoint/convert.py +66 -39
deepdoctection/datapoint/image.py +151 -95
deepdoctection/datapoint/view.py +383 -236
deepdoctection/datasets/__init__.py +2 -6
deepdoctection/datasets/adapter.py +11 -11
deepdoctection/datasets/base.py +118 -81
deepdoctection/datasets/dataflow_builder.py +18 -12
deepdoctection/datasets/info.py +76 -57
deepdoctection/datasets/instances/__init__.py +6 -2
deepdoctection/datasets/instances/doclaynet.py +17 -14
deepdoctection/datasets/instances/fintabnet.py +16 -22
deepdoctection/datasets/instances/funsd.py +11 -6
deepdoctection/datasets/instances/iiitar13k.py +9 -9
deepdoctection/datasets/instances/layouttest.py +9 -9
deepdoctection/datasets/instances/publaynet.py +9 -9
deepdoctection/datasets/instances/pubtables1m.py +13 -13
deepdoctection/datasets/instances/pubtabnet.py +13 -15
deepdoctection/datasets/instances/rvlcdip.py +8 -8
deepdoctection/datasets/instances/xfund.py +11 -9
deepdoctection/datasets/registry.py +18 -11
deepdoctection/datasets/save.py +12 -11
deepdoctection/eval/__init__.py +3 -2
deepdoctection/eval/accmetric.py +72 -52
deepdoctection/eval/base.py +29 -10
deepdoctection/eval/cocometric.py +14 -12
deepdoctection/eval/eval.py +56 -41
deepdoctection/eval/registry.py +6 -3
deepdoctection/eval/tedsmetric.py +24 -9
deepdoctection/eval/tp_eval_callback.py +13 -12
deepdoctection/extern/__init__.py +1 -1
deepdoctection/extern/base.py +176 -97
deepdoctection/extern/d2detect.py +127 -92
deepdoctection/extern/deskew.py +19 -10
deepdoctection/extern/doctrocr.py +157 -106
deepdoctection/extern/fastlang.py +25 -17
deepdoctection/extern/hfdetr.py +137 -60
deepdoctection/extern/hflayoutlm.py +329 -248
deepdoctection/extern/hflm.py +67 -33
deepdoctection/extern/model.py +108 -762
deepdoctection/extern/pdftext.py +37 -12
deepdoctection/extern/pt/nms.py +15 -1
deepdoctection/extern/pt/ptutils.py +13 -9
deepdoctection/extern/tessocr.py +87 -54
deepdoctection/extern/texocr.py +29 -14
deepdoctection/extern/tp/tfutils.py +36 -8
deepdoctection/extern/tp/tpcompat.py +54 -16
deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
deepdoctection/extern/tpdetect.py +4 -2
deepdoctection/mapper/__init__.py +1 -1
deepdoctection/mapper/cats.py +117 -76
deepdoctection/mapper/cocostruct.py +35 -17
deepdoctection/mapper/d2struct.py +56 -29
deepdoctection/mapper/hfstruct.py +32 -19
deepdoctection/mapper/laylmstruct.py +221 -185
deepdoctection/mapper/maputils.py +71 -35
deepdoctection/mapper/match.py +76 -62
deepdoctection/mapper/misc.py +68 -44
deepdoctection/mapper/pascalstruct.py +13 -12
deepdoctection/mapper/prodigystruct.py +33 -19
deepdoctection/mapper/pubstruct.py +42 -32
deepdoctection/mapper/tpstruct.py +39 -19
deepdoctection/mapper/xfundstruct.py +20 -13
deepdoctection/pipe/__init__.py +1 -2
deepdoctection/pipe/anngen.py +104 -62
deepdoctection/pipe/base.py +226 -107
deepdoctection/pipe/common.py +206 -123
deepdoctection/pipe/concurrency.py +74 -47
deepdoctection/pipe/doctectionpipe.py +108 -47
deepdoctection/pipe/language.py +41 -24
deepdoctection/pipe/layout.py +45 -18
deepdoctection/pipe/lm.py +146 -78
deepdoctection/pipe/order.py +196 -113
deepdoctection/pipe/refine.py +111 -63
deepdoctection/pipe/registry.py +1 -1
deepdoctection/pipe/segment.py +213 -142
deepdoctection/pipe/sub_layout.py +76 -46
deepdoctection/pipe/text.py +52 -33
deepdoctection/pipe/transform.py +8 -6
deepdoctection/train/d2_frcnn_train.py +87 -69
deepdoctection/train/hf_detr_train.py +72 -40
deepdoctection/train/hf_layoutlm_train.py +85 -46
deepdoctection/train/tp_frcnn_train.py +56 -28
deepdoctection/utils/concurrency.py +59 -16
deepdoctection/utils/context.py +40 -19
deepdoctection/utils/develop.py +25 -17
deepdoctection/utils/env_info.py +85 -36
deepdoctection/utils/error.py +16 -10
deepdoctection/utils/file_utils.py +246 -62
deepdoctection/utils/fs.py +162 -43
deepdoctection/utils/identifier.py +29 -16
deepdoctection/utils/logger.py +49 -32
deepdoctection/utils/metacfg.py +83 -21
deepdoctection/utils/pdf_utils.py +119 -62
deepdoctection/utils/settings.py +24 -10
deepdoctection/utils/tqdm.py +10 -5
deepdoctection/utils/transform.py +182 -46
deepdoctection/utils/utils.py +61 -28
deepdoctection/utils/viz.py +150 -104
deepdoctection-0.43.dist-info/METADATA +376 -0
deepdoctection-0.43.dist-info/RECORD +149 -0
deepdoctection/analyzer/_config.py +0 -146
deepdoctection-0.42.1.dist-info/METADATA +0 -431
deepdoctection-0.42.1.dist-info/RECORD +0 -148
{deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/WHEEL +0 -0
{deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
{deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0

deepdoctection/extern/tp/tpcompat.py CHANGED Viewed

@@ -16,7 +16,10 @@
 # limitations under the License.
 """
-Compatibility classes and methods related to Tensorpack package
+Compatibility classes and methods related to Tensorpack package.
+Info:
+    This module provides compatibility classes and methods related to the Tensorpack package.
 """
 from __future__ import annotations
@@ -43,23 +46,32 @@ if not import_guard.is_successful():
 class ModelDescWithConfig(ModelDesc, ABC):  # type: ignore
     """
-    A wrapper for Tensorpack ModelDesc for bridging the gap between Tensorpack and DD API. Only for storing a
-    configuration of hyperparameters and maybe training settings.
+    A wrapper for `Tensorpack ModelDesc` for bridging the gap between Tensorpack and DD API.
+    Only for storing a configuration of hyperparameters and maybe training settings.
     """
     def __init__(self, config: AttrDict) -> None:
         """
-        :param config: Config setting
+        Args:
+            config: Config setting.
         """
         super().__init__()
         self.cfg = config
     def get_inference_tensor_names(self) -> tuple[list[str], list[str]]:
         """
-        Returns lists of tensor names to be used to create an inference callable. "build_graph" must create tensors
-        of these names when called under inference context.
+        Returns lists of tensor names to be used to create an inference callable.
-        :return: Tuple of list input and list output names. The names must coincide with tensor within the model.
+        `build_graph` must create tensors of these names when called under inference context.
+        Returns:
+            Tuple of list input and list output names. The names must coincide with tensor within the model.
+        Raises:
+            NotImplementedError: If not implemented in subclass.
         """
         raise NotImplementedError()
@@ -82,10 +94,11 @@ class TensorpackPredictor(ABC):
     def __init__(self, model: ModelDescWithConfig, path_weights: PathLikeOrStr, ignore_mismatch: bool) -> None:
         """
-        :param model: Model, either as ModelDescWithConfig or derived from that class.
-        :param path_weights: Model weights of the prediction config.
-        :param ignore_mismatch: When True will ignore mismatches between checkpoint weights and models. This is needed
-                                if a pre-trained model is to be fine-tuned on a custom dataset.
+        Args:
+            model: Model, either as `ModelDescWithConfig` or derived from that class.
+            path_weights: Model weights of the prediction config.
+            ignore_mismatch: When True will ignore mismatches between checkpoint weights and models. This is needed
+                if a pre-trained model is to be fine-tuned on a custom dataset.
         """
         self._model = model
         self.path_weights = Path(path_weights)
@@ -96,7 +109,10 @@ class TensorpackPredictor(ABC):
     def get_predictor(self) -> OfflinePredictor:
         """
-        :return: Returns an OfflinePredictor.
+        Returns an `OfflinePredictor`.
+        Returns:
+            Returns an `OfflinePredictor`.
         """
         return OfflinePredictor(self.predict_config)
@@ -117,16 +133,38 @@ class TensorpackPredictor(ABC):
         path_yaml: PathLikeOrStr, categories: Mapping[int, ObjectTypes], config_overwrite: Union[list[str], None]
     ) -> ModelDescWithConfig:
         """
-        Implement the config generation, its modification and instantiate a version of the model. See
-        `pipe.tpfrcnn.TPFrcnnDetector` for an example
+        Implement the config generation, its modification and instantiate a version of the model.
+        See `pipe.tpfrcnn.TPFrcnnDetector` for an example.
+        Raises:
+            NotImplementedError: If not implemented in subclass.
+        Args:
+            path_yaml: Path to the yaml file.
+            categories: Mapping of categories.
+            config_overwrite: List of config overwrites or None.
+        Returns:
+            An instance of `ModelDescWithConfig`.
         """
         raise NotImplementedError()
     @abstractmethod
     def predict(self, np_img: PixelValues) -> Any:
         """
-        Implement, how `self.tp_predictor` is invoked and raw prediction results are generated. Do use only raw
-        objects and nothing, which is related to the DD API.
+        Implement how `self.tp_predictor` is invoked and raw prediction results are generated.
+        Do use only raw objects and nothing, which is related to the DD API.
+        Args:
+            np_img: The input image as pixel values.
+        Returns:
+            Raw prediction results.
+        Raises:
+            NotImplementedError: If not implemented in subclass.
         """
         raise NotImplementedError()

deepdoctection/extern/tp/tpfrcnn/config/config.py CHANGED Viewed

@@ -31,6 +31,7 @@ Backbone settings
 **BACKBONE**
+```python
 .BOTTLENECK: Resnet oder resnext_32xd4
 .FREEZE_AFFINE: Do not train affine parameters inside norm layers
@@ -44,23 +45,26 @@ Backbone settings
 .TF_PAD_MODE: Use a base model with TF-preferred padding mode, which may pad more pixels on right or bottom
 than top/left. See https://github.com/tensorflow/tensorflow/issues/18213. Using either one should probably give the same
 performance.
+```
 **CASCADE**
+```python
 .BBOX_REG_WEIGHTS: Bounding box regression weights
 .IOUS: Iou levels
+```
 **DATA**
+```python
 .TRAIN_NUM_WORKERS: Number of threads to use when parallelizing the pre-processing (e.g. augmenting, adding anchors,
 RPN gt-labelling,...)
+```
 **FPN**
+```
 .ANCHOR_STRIDES: Strides for each FPN level. Must be the same length as ANCHOR_SIZES
 .CASCADE: Use Cascade RCNN
@@ -78,9 +82,11 @@ RPN gt-labelling,...)
 .NUM_CHANNEL: Number of channels
 .PROPOSAL_MODE: Choices: 'Level', 'Joint'
+```
 **FRCNN**
+```python
 .BATCH_PER_IM: Number of total proposals selected. Will divide into fg and bg by given ratio
 .BBOX_REG_WEIGHTS: Bounding box regression weights
@@ -90,15 +96,19 @@ RPN gt-labelling,...)
 .FG_THRESH: Threshold how to divide fg and bg selection
 .MODE_MASK: Whether to train mask head
+```
 **MRCNN**
+```python
 .ACCURATE_PASTE: Slightly more aligned results, but very slow on numpy
 .HEAD_DIM: Head dimension
+```
 **PREPROC**
+```python
 .MAX_SIZE: Maximum edge size
 .PIXEL_MEAN: Pixel mean (on the training data set)
@@ -108,10 +118,11 @@ RPN gt-labelling,...)
 .SHORT_EDGE_SIZE: Size to resize the image to (inference), while not exceeding max size
 .TRAIN_SHORT_EDGE_SIZE: The size to resize the image to (training), while not exceeding max size. [min, max] to sample
-from
+```
 **RPN**
+```python
 .ANCHOR_RATIOS: Anchor ratios
 .ANCHOR_SIZES: Anchor sizes
@@ -144,9 +155,11 @@ to a value larger than 1.0 will disable the feature. It is disabled by default b
 .TRAIN_POST_NMS_TOPK: Number of proposals after carrying out nms (training)
 .POST_NMS_TOPK: Number of proposals after carrying out nms (inference)
+```
 **OUTPUT**
+```python
 .FRCNN_NMS_THRESH: Nms threshold for output. nms being performed per class prediction
 .RESULTS_PER_IM: Number of output detection results
@@ -154,11 +167,13 @@ to a value larger than 1.0 will disable the feature. It is disabled by default b
 .RESULT_SCORE_THRESH: Threshold for detection result
 .NMS_THRESH_CLASS_AGNOSTIC: Nms threshold for output. nms being performed over all class predictions
+```
 TRAINER: options: 'horovod', 'replicated'. Note that Horovod trainer is not available when TF2 is installed
 **TRAIN**
+```python
 .LR_SCHEDULE: "1x" schedule in detectron.  LR_SCHEDULE means equivalent steps when the total batch size is 8.
                It can be either a string like "3x" that refers to standard convention, or a list of int.
                LR_SCHEDULE=3x is the same as LR_SCHEDULE=[420000, 500000, 540000], which
@@ -184,6 +199,7 @@ TRAINER: options: 'horovod', 'replicated'. Note that Horovod trainer is not avai
 .STARTING_EPOCH: Starting epoch. Useful when restarting training.
 .LOG_DIR: Log dir
+```
 """

deepdoctection/extern/tpdetect.py CHANGED Viewed

@@ -16,7 +16,7 @@
 # limitations under the License.
 """
-TP Faster RCNN model as predictor for deepdoctection pipeline
+TP Faster-RCNN model
 """
 from __future__ import annotations
@@ -149,8 +149,10 @@ class TPFrcnnDetector(TensorpackPredictor, TPFrcnnDetectorMixin):
         hyper_param_config = set_config_by_yaml(path_yaml)
-        if len(config_overwrite):
+        hyper_param_config.freeze(freezed=False)
+        if config_overwrite:
             hyper_param_config.update_args(config_overwrite)
+        hyper_param_config.freeze()
         model_frcnn_config(config=hyper_param_config, categories=categories, print_summary=False)
         return ResNetFPNModel(config=hyper_param_config)

deepdoctection/mapper/__init__.py CHANGED Viewed

@@ -16,7 +16,7 @@
 # limitations under the License.
 """
-Contains everything that is related to transformation between datapoints
+# Transformation functions for datapoints
 """
 from typing import Callable

deepdoctection/mapper/cats.py CHANGED Viewed

@@ -16,8 +16,7 @@
 # limitations under the License.
 """
-Categories related mapping functions. They can be set within a pipeline directly after a dataflow
-builder method of a dataset.
+Mapping functions for handling categories
 """
 from collections import defaultdict
@@ -36,14 +35,36 @@ def cat_to_sub_cat(
     cat_to_sub_cat_dict: Optional[dict[TypeOrStr, TypeOrStr]] = None,
 ) -> Image:
     """
-    Replace some category with its affiliated sub category of CategoryAnnotations. Suppose your category name is `foo`
-    and comes along with sub_category_annotations `foo_1` and `foo_2` then this adapter will replace `foo` with
-    `foo_1` or `foo_2`, respectively.
-    :param dp: Image datapoint
-    :param categories_dict_names_as_key: A dict of all possible categories and their ids
-    :param cat_to_sub_cat_dict: e.g. {'foo': 'sub_cat_1', 'bak': 'sub_cat_2'}
-    :return: Image with updated Annotations
+    Replace some categories with sub categories.
+    Example:
+        ```python
+        categories_dict_names_as_key = {'foo': 1}
+        cat_to_sub_cat_dict = {'foo': 'foo_1', 'bak': 'bak_1'}
+        dp = cat_to_sub_cat(categories_dict_names_as_key, cat_to_sub_cat_dict)(dp)
+        ```
+        will transform
+        ```python
+        ImageAnnotation(category_name='foo', category_id=1, ...)
+        ```
+        to
+        ```python
+        ImageAnnotation(category_name='foo_1', category_id=1, ...)
+        ```
+    Args:
+        dp: Image datapoint
+        categories_dict_names_as_key: A dict of all possible categories and their ids
+        cat_to_sub_cat_dict: e.g. `{'foo': 'sub_cat_1', 'bak': 'sub_cat_2'}`
+    Returns:
+        Image with updated `ImageAnnotation`s
     """
     if categories_dict_names_as_key is None:
         categories_dict_names_as_key = {}
@@ -67,29 +88,31 @@ def re_assign_cat_ids(
     cat_to_sub_cat_mapping: Optional[Mapping[ObjectTypes, Any]] = None,
 ) -> Image:
     """
-    Re-assigning category ids is sometimes necessary to align with categories of the `DatasetCategories` . E.g.
-    consider the situation where some categories are filtered. In order to guarantee alignment of category ids of the
-    `DatasetCategories` the ids in the annotation have to be re-assigned.
-    Annotations that are not in the dictionary provided will be removed.
-    :param dp: Image
-    :param categories_dict_name_as_key: e.g. `{LayoutType.word: 1}`
-    :param cat_to_sub_cat_mapping: e.g. `{<LayoutType.word>:
-        {<WordType.token_class>:
-            {<FundsFirstPage.REPORT_DATE>: 1,
-            <FundsFirstPage.REPORT_TYPE>: 2,
-            <FundsFirstPage.UMBRELLA>: 3,
-            <FundsFirstPage.FUND_NAME>: 4,
-            <TokenClasses.OTHER>: 5},
-            <WordType.TAG>:
-            {<BioTag.INSIDE>: 1,
-            <BioTag.OUTSIDE>: 2,
-            <BioTag.BEGIN>: 3}}}`
-            To re-assign the category ids of an image summary, use the key 'default_type' for the default category, e.g.
-            `{DefaultType.DEFAULT_TYPE: {<PageType.DOCUMENT_TYPE>: {<DocumentType.INVOICE>:1,
-            <DocumentType.BANK_STATEMENT>:2}}}`
-    :return: Image
+    Re-assigning `category_id`s is sometimes necessary to align with categories of the `DatasetCategories` .
+    Example:
+        Consider the situation where some categories are filtered. In order to guarantee alignment of category ids of
+        the `DatasetCategories` the ids in the annotation have to be re-assigned.
+        Annotations that are not in the dictionary provided will be removed.
+    Args:
+        dp: Image
+        categories_dict_name_as_key: e.g. `{LayoutType.word: 1}`
+        cat_to_sub_cat_mapping: e.g. `{LayoutType.word: {WordType.token_class:
+                                                          {FundsFirstPage.REPORT_DATE: 1,
+                                                           FundsFirstPage.REPORT_TYPE: 2,
+                                                           FundsFirstPage.UMBRELLA: 3,
+                                                           FundsFirstPage.FUND_NAME: 4,
+                                                           TokenClasses.OTHER: 5},
+                                                           WordType.TAG:{ BioTag.INSIDE: 1,
+                                                                          BioTag.OUTSIDE: 2,
+                                                                          BioTag.BEGIN: 3}}}`
+            To re-assign the `category_id`s of an image summary, use the key 'default_type' for the default category,
+             e.g. `{DefaultType.DEFAULT_TYPE: {PageType.DOCUMENT_TYPE: {DocumentType.INVOICE:1,
+            DocumentType.BANK_STATEMENT:2}}}`
+    Returns:
+        Image
     """
     ann_ids_to_remove: list[str] = []
@@ -129,12 +152,14 @@ def filter_cat(
     Filters category annotations based on the on a list of categories to be kept and a list of all possible
     category names that might be available in dp.
-    :param dp: Image datapoint
-    :param categories_as_list_filtered: A list of category names with categories to keep. Using a dataset e.g.
-                                        my_data.categories.get_categories(as_dict=False,filtered=True)
-    :param categories_as_list_unfiltered: A list of all available category names. Using a dataset e.g.
-                                          my_data.categories.get_categories(as_dict=False)
-    :return: Image with filtered Annotations
+    Args:
+        dp: Image datapoint
+        categories_as_list_filtered: A list of `category_name`s with categories to keep. Using a dataset e.g.
+                                        `my_data.categories.get_categories(as_dict=False,filtered=True)`
+        categories_as_list_unfiltered: A list of all available `category_name`s. Using a dataset e.g.
+                                          `my_data.categories.get_categories(as_dict=False)`
+    Returns:
+        Image with filtered Annotations
     """
     cats_to_remove_list = [cat for cat in categories_as_list_unfiltered if cat not in categories_as_list_filtered]
@@ -161,13 +186,15 @@ def filter_summary(
     Filters datapoints with given summary conditions. If several conditions are given, it will filter out datapoints
     that do not satisfy all conditions.
-    :param dp: Image datapoint
-    :param sub_cat_to_sub_cat_names_or_ids: A dict of list. The key correspond to the sub category key to look for in
+    Args:
+        dp: Image datapoint
+        sub_cat_to_sub_cat_names_or_ids: A dict of list. The key correspond to the sub category key to look for in
                                             the summary. The value correspond to a sequence of either category names
                                             or category ids
-    :param mode: With respect to the previous argument, it will look if the category name, the value or the category_id
-                 corresponds to any of the given values.
-    :return: Image or None
+        mode: With respect to the previous argument, it will look if the `category_name`, the `value` or the
+              `category_id` corresponds to any of the given values.
+    Returns:
+        Image or `None`
     """
     for key, values in sub_cat_to_sub_cat_names_or_ids.items():
         if mode == "name":
@@ -194,46 +221,54 @@ def image_to_cat_id(
     Extracts all category_ids, sub category information or summary sub category information with given names into a
     defaultdict. This mapping is useful when running evaluation with e.g. an accuracy metric.
-    **Example 1:**
+    Example 1:
         dp contains image annotations
-            ImageAnnotation(category_name='foo',category_id='1',...),
-            ImageAnnotation(category_name='bak',category_id='2',...),
-            ImageAnnotation(category_name='baz',category_id='3',...),
-            ImageAnnotation(category_name='foo',category_id='1',...),
+        ```python
+        ImageAnnotation(category_name='foo',category_id='1',...),
+        ImageAnnotation(category_name='bak',category_id='2',...),
+        ImageAnnotation(category_name='baz',category_id='3',...),
+        ImageAnnotation(category_name='foo',category_id='1',...),
+        ```
         Then
-             image_to_cat_id(category_names=['foo', 'bak', 'baz'])(dp)
+        ```python
+        image_to_cat_id(category_names=['foo', 'bak', 'baz'])(dp)
+        ```
         will return
-            ({'foo':[1,1], 'bak':[2], 'baz':[3]}, image_id)
+        ```python
+        ({'foo':[1,1], 'bak':[2], 'baz':[3]}, image_id)
+        ```
-    **Example 2:**
+    Example 2:
         dp contains image annotations as given in Example 1. Moreover, the 'foo' image annotation have sub categories:
-            foo_sub_1: CategoryAnnotation(category_name='sub_1', category_id='4')
-            foo_sub_1: CategoryAnnotation(category_name='sub_1', category_id='5')
+        ```python
+        foo_sub_1: CategoryAnnotation(category_name='sub_1', category_id='4')
+        foo_sub_1: CategoryAnnotation(category_name='sub_1', category_id='5')
-            image_to_cat_id(sub_categories={'foo':'foo_sub_1'})
+        image_to_cat_id(sub_categories={'foo':'foo_sub_1'})
+        ```
         will return
-            ({'foo_sub_1':[5,6]}, image_id)
+        ```python
+        ({'foo_sub_1':[5,6]}, image_id)
+        ```
-    :param dp: Image datapoint
-    :param category_names: A list of category names
-    :param sub_categories: A dict {'cat':'sub_cat'} or a list. Will dump the results with sub_cat as key
-    :param id_name_or_value: Only relevant for sub categories. It will extract the sub category id, the name or, if the
+    Args:
+        dp: Image
+        category_names: A list of category names
+        sub_categories: A dict `{'cat':'sub_cat'}` or a list. Will dump the results with sub_cat as key
+        id_name_or_value: Only relevant for sub categories. It will extract the sub category id, the name or, if the
                              sub category is a container, it will extract a value.
-    :param summary_sub_category_names: A list of summary sub categories
-    :return: A defaultdict of lists
+        summary_sub_category_names: A list of summary sub categories
+    Returns:
+        A defaultdict of lists
     """
     cat_container = defaultdict(list)
@@ -309,13 +344,16 @@ def remove_cats(
     Remove categories according to given category names or sub category names. Note that these will change the container
     in which the objects are stored.
-    :param dp: A datapoint image
-    :param category_names: A single category name or a list of categories to remove. On default will remove
+    Args:
+        dp: A datapoint image
+        category_names: A single category name or a list of categories to remove. On default will remove
                            nothing.
-    :param sub_categories: A dict with category names and a list of their sub categories to be removed
-    :param relationships: A dict with category names and a list of relationship names to be removed
-    :param summary_sub_categories: A single sub category or a list of sub categories from a summary to be removed
-    :return: A datapoint image with removed categories
+        sub_categories: A dict with category names and a list of their sub categories to be removed
+        relationships: A dict with category names and a list of relationship names to be removed
+        summary_sub_categories: A single sub category or a list of sub categories from a summary to be removed
+    Returns:
+        A datapoint image with removed categories
     """
     if isinstance(category_names, str):
@@ -364,9 +402,12 @@ def add_summary(dp: Image, categories: Mapping[int, ObjectTypes]) -> Image:
     """
     Adding a summary with the number of categories in an image.
-    :param dp: Image
-    :param categories: A dict of all categories, e.g. `{"1": "text", "2":"title", ...}`
-    :return: Image
+    Args:
+        dp: Image
+        categories: A dict of all categories, e.g. `{"1": "text", "2":"title", ...}`
+    Returns:
+        Image
     """
     category_list = list(categories.values())
     anns = dp.get_annotation(category_names=category_list)

deepdoctection/mapper/cocostruct.py CHANGED Viewed

@@ -42,19 +42,29 @@ def coco_to_image(
     coarse_sub_cat_name: Optional[ObjectTypes] = None,
 ) -> Optional[Image]:
     """
-    Map a dataset in coco format that has been serialized to image format. This serialized input requirements hold
-    when a coco style sheet is loaded via `SerializerCoco.load`.
-    :param dp: a datapoint in serialized coco format.
-    :param categories: A dict of categories, e.g. `DatasetCategories.get_categories`
-    :param load_image: If 'True' it will load image to attr: Image.image
-    :param filter_empty_image: Will return None, if datapoint has no annotations
-    :param fake_score: If dp does not contain a score, a fake score with uniform random variables in (0,1)
-                       will be added.
-    :param coarse_mapping: A mapping to map categories into broader categories. Note that the coarser categories must
-                           already be included in the original mapping.
-    :param coarse_sub_cat_name: A name to be provided as sub category key for a coarse mapping.
-    :return: Image
+    Maps a dataset in `COCO` format that has been serialized to image format.
+    This serialized input requirements hold when a `COCO` style sheet is loaded via `SerializerCoco.load`.
+    Args:
+        dp: A datapoint in serialized COCO format.
+        categories: A dict of categories, e.g. `DatasetCategories.get_categories`.
+        load_image: If `True`, it will load image to `Image.image`.
+        filter_empty_image: Will return `None` if datapoint has no annotations.
+        fake_score: If `dp` does not contain a score, a fake score with uniform random variables in `(0,1)` will be
+                    added.
+        coarse_mapping: A mapping to map categories into broader categories. Note that the coarser categories must
+                        already be included in the original mapping.
+        coarse_sub_cat_name: A name to be provided as sub category key for a coarse mapping.
+    Returns:
+        `Image` or `None`.
+    Raises:
+        ValueError: If `coarse_sub_cat_name` is provided but `coarse_mapping` is not.
+    Note:
+        A coarse mapping must be provided when `coarse_sub_cat_name` has been passed.
     """
     if coarse_sub_cat_name and coarse_mapping is None:
@@ -111,11 +121,19 @@ def coco_to_image(
 def image_to_coco(dp: Image) -> tuple[JsonDict, list[JsonDict]]:
     """
-    Converting an image back into the coco format. As images and anns are separated it will return a dict with the
-    image information and one for its annotations.
+    Converts an image back into the `COCO` format.
+    As images and annotations are separated, it will return a dict with the image information and one for its
+    annotations.
+    Args:
+        dp: An `Image`.
+    Returns:
+        A tuple of dicts, the first corresponding to the COCO-image object, the second to their COCO-annotations.
-    :param dp: An image
-    :return: A tuple of dicts, the first corresponding to the coco-image object, the second to their coco-annotations
+    Raises:
+        TypeError: If `dp` is not of type `Image`.
     """
     if not isinstance(dp, Image):

deepdoctection 0.42.1__py3-none-any.whl → 0.43__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.42.1py3-none-any.whl → 0.43py3-none-any.whl