PyPI - deepdoctection - Versions diffs - 0.42.1__py3-none-any.whl → 0.43.1__py3-none-any.whl - Mend

deepdoctection 0.42.1py3-none-any.whl → 0.43.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show

deepdoctection/__init__.py +4 -2
deepdoctection/analyzer/__init__.py +2 -1
deepdoctection/analyzer/config.py +919 -0
deepdoctection/analyzer/dd.py +36 -62
deepdoctection/analyzer/factory.py +311 -141
deepdoctection/configs/conf_dd_one.yaml +100 -44
deepdoctection/configs/profiles.jsonl +32 -0
deepdoctection/dataflow/__init__.py +9 -6
deepdoctection/dataflow/base.py +33 -15
deepdoctection/dataflow/common.py +96 -75
deepdoctection/dataflow/custom.py +36 -29
deepdoctection/dataflow/custom_serialize.py +135 -91
deepdoctection/dataflow/parallel_map.py +33 -31
deepdoctection/dataflow/serialize.py +15 -10
deepdoctection/dataflow/stats.py +41 -28
deepdoctection/datapoint/__init__.py +4 -6
deepdoctection/datapoint/annotation.py +104 -66
deepdoctection/datapoint/box.py +190 -130
deepdoctection/datapoint/convert.py +66 -39
deepdoctection/datapoint/image.py +151 -95
deepdoctection/datapoint/view.py +383 -236
deepdoctection/datasets/__init__.py +2 -6
deepdoctection/datasets/adapter.py +11 -11
deepdoctection/datasets/base.py +118 -81
deepdoctection/datasets/dataflow_builder.py +18 -12
deepdoctection/datasets/info.py +76 -57
deepdoctection/datasets/instances/__init__.py +6 -2
deepdoctection/datasets/instances/doclaynet.py +17 -14
deepdoctection/datasets/instances/fintabnet.py +16 -22
deepdoctection/datasets/instances/funsd.py +11 -6
deepdoctection/datasets/instances/iiitar13k.py +9 -9
deepdoctection/datasets/instances/layouttest.py +9 -9
deepdoctection/datasets/instances/publaynet.py +9 -9
deepdoctection/datasets/instances/pubtables1m.py +13 -13
deepdoctection/datasets/instances/pubtabnet.py +13 -15
deepdoctection/datasets/instances/rvlcdip.py +8 -8
deepdoctection/datasets/instances/xfund.py +11 -9
deepdoctection/datasets/registry.py +18 -11
deepdoctection/datasets/save.py +12 -11
deepdoctection/eval/__init__.py +3 -2
deepdoctection/eval/accmetric.py +72 -52
deepdoctection/eval/base.py +29 -10
deepdoctection/eval/cocometric.py +14 -12
deepdoctection/eval/eval.py +56 -41
deepdoctection/eval/registry.py +6 -3
deepdoctection/eval/tedsmetric.py +24 -9
deepdoctection/eval/tp_eval_callback.py +13 -12
deepdoctection/extern/__init__.py +1 -1
deepdoctection/extern/base.py +176 -97
deepdoctection/extern/d2detect.py +127 -92
deepdoctection/extern/deskew.py +19 -10
deepdoctection/extern/doctrocr.py +162 -108
deepdoctection/extern/fastlang.py +25 -17
deepdoctection/extern/hfdetr.py +137 -60
deepdoctection/extern/hflayoutlm.py +329 -248
deepdoctection/extern/hflm.py +67 -33
deepdoctection/extern/model.py +108 -762
deepdoctection/extern/pdftext.py +37 -12
deepdoctection/extern/pt/nms.py +15 -1
deepdoctection/extern/pt/ptutils.py +13 -9
deepdoctection/extern/tessocr.py +87 -54
deepdoctection/extern/texocr.py +29 -14
deepdoctection/extern/tp/tfutils.py +36 -8
deepdoctection/extern/tp/tpcompat.py +54 -16
deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
deepdoctection/extern/tpdetect.py +4 -2
deepdoctection/mapper/__init__.py +1 -1
deepdoctection/mapper/cats.py +117 -76
deepdoctection/mapper/cocostruct.py +35 -17
deepdoctection/mapper/d2struct.py +56 -29
deepdoctection/mapper/hfstruct.py +32 -19
deepdoctection/mapper/laylmstruct.py +221 -185
deepdoctection/mapper/maputils.py +71 -35
deepdoctection/mapper/match.py +76 -62
deepdoctection/mapper/misc.py +68 -44
deepdoctection/mapper/pascalstruct.py +13 -12
deepdoctection/mapper/prodigystruct.py +33 -19
deepdoctection/mapper/pubstruct.py +42 -32
deepdoctection/mapper/tpstruct.py +39 -19
deepdoctection/mapper/xfundstruct.py +20 -13
deepdoctection/pipe/__init__.py +1 -2
deepdoctection/pipe/anngen.py +104 -62
deepdoctection/pipe/base.py +226 -107
deepdoctection/pipe/common.py +206 -123
deepdoctection/pipe/concurrency.py +74 -47
deepdoctection/pipe/doctectionpipe.py +108 -47
deepdoctection/pipe/language.py +41 -24
deepdoctection/pipe/layout.py +45 -18
deepdoctection/pipe/lm.py +146 -78
deepdoctection/pipe/order.py +205 -119
deepdoctection/pipe/refine.py +111 -63
deepdoctection/pipe/registry.py +1 -1
deepdoctection/pipe/segment.py +213 -142
deepdoctection/pipe/sub_layout.py +76 -46
deepdoctection/pipe/text.py +52 -33
deepdoctection/pipe/transform.py +8 -6
deepdoctection/train/d2_frcnn_train.py +87 -69
deepdoctection/train/hf_detr_train.py +72 -40
deepdoctection/train/hf_layoutlm_train.py +85 -46
deepdoctection/train/tp_frcnn_train.py +56 -28
deepdoctection/utils/concurrency.py +59 -16
deepdoctection/utils/context.py +40 -19
deepdoctection/utils/develop.py +26 -17
deepdoctection/utils/env_info.py +86 -37
deepdoctection/utils/error.py +16 -10
deepdoctection/utils/file_utils.py +246 -71
deepdoctection/utils/fs.py +162 -43
deepdoctection/utils/identifier.py +29 -16
deepdoctection/utils/logger.py +49 -32
deepdoctection/utils/metacfg.py +83 -21
deepdoctection/utils/pdf_utils.py +119 -62
deepdoctection/utils/settings.py +24 -10
deepdoctection/utils/tqdm.py +10 -5
deepdoctection/utils/transform.py +182 -46
deepdoctection/utils/utils.py +61 -28
deepdoctection/utils/viz.py +150 -104
deepdoctection-0.43.1.dist-info/METADATA +376 -0
deepdoctection-0.43.1.dist-info/RECORD +149 -0
deepdoctection/analyzer/_config.py +0 -146
deepdoctection-0.42.1.dist-info/METADATA +0 -431
deepdoctection-0.42.1.dist-info/RECORD +0 -148
{deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/WHEEL +0 -0
{deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/licenses/LICENSE +0 -0
{deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/top_level.txt +0 -0

deepdoctection/datasets/registry.py CHANGED Viewed

@@ -16,8 +16,9 @@
 # limitations under the License.
 """
-Module for DatasetRegistry
+`DatasetRegistry` for registering built-in and custom datasets
 """
 import inspect
 import catalogue  # type: ignore
@@ -36,17 +37,22 @@ def get_dataset(name: str) -> DatasetBase:
     """
     Returns an instance of a dataset with a given name. This instance can be used to customize the dataflow output
-    **Example:**
+    Example:
+        ```python
+        dataset = get_dataset("some_name")
+        dataset.dataflow.categories.filter_categories(["cat1","cat2"])
+        df = dataset.dataflow.build(split="train")
-            dataset = get_dataset("some_name")
-            dataset.dataflow.categories.filter_categories(["cat1","cat2"])
-            df = dataset.dataflow.build(split="train")
+        for dp in df:
+            # do something
+        ```
-            for dp in df:
-                # do something
+    Args:
+        name: A dataset name
-    :param name: A dataset name
-    :return: An instance of a dataset
+    Returns:
+        An instance of a dataset
     """
     ds = dataset_registry.get(name)
     if inspect.isclass(ds):
@@ -58,8 +64,9 @@ def print_dataset_infos(add_license: bool = True, add_info: bool = True) -> None
     """
     Prints a table with all registered datasets and some basic information (name, license and optionally description)
-    :param add_license: Whether to add the license type of the dataset
-    :param add_info: Whether to add a description of the dataset
+    Args:
+        add_license: Whether to add the license type of the dataset
+        add_info: Whether to add a description of the dataset
     """
     data = dataset_registry.get_all()

deepdoctection/datasets/save.py CHANGED Viewed

@@ -16,7 +16,7 @@
 # limitations under the License.
 """
-Module for saving
+Saving samples from a DataFlow
 """
 import json
@@ -42,17 +42,18 @@ def dataflow_to_json(
     highest_hierarchy_only: bool = False,
 ) -> None:
     """
-    Save a dataflow consisting of `datapoint.Image` to a jsonl file. Each image will be dumped into a separate
-    JSON object.
+    Save a dataflow consisting of `datapoint.Image` to a `jsonl` file. Each image will be dumped into a separate
+    `JSON` object.
-    :param df: Input dataflow
-    :param path: Path to save the file(s) to
-    :param single_files: will save image results to a single JSON file. If False all images of the dataflow will be d
-                         dumped into a single jsonl file.
-    :param file_name: file name, only needed for jsonl files
-    :param max_datapoints: Will stop saving after dumping max_datapoint images.
-    :param save_image_in_json: Will save the image to the JSON object
-    :param highest_hierarchy_only: If True it will remove all image attributes of ImageAnnotations
+    Args:
+        df: Input dataflow
+        path: Path to save the file(s) to
+        single_files: Will save image results to a single `JSON` file. If False all images of the dataflow will be
+                      dumped into a single `.jsonl` file.
+        file_name: file name, only needed for `jsonl` files
+        max_datapoints: Will stop saving after dumping max_datapoint images.
+        save_image_in_json: Will save the image to the `JSON` object
+        highest_hierarchy_only: If `True` it will remove all image attributes of `ImageAnnotation`s
     """
     path = Path(path)
     if single_files:

deepdoctection/eval/__init__.py CHANGED Viewed

@@ -16,8 +16,9 @@
 # limitations under the License.
 """
-Init file for eval package. Contains metrics (customized for special tasks), evaluators and tensorpack related callbacks
-for training.
+## Evalulation metrics and Evaluators
+Contains metrics (customized for special tasks), evaluators and Tensorpack callback
 """
 from .accmetric import *

deepdoctection/eval/accmetric.py CHANGED Viewed

@@ -16,8 +16,9 @@
 # limitations under the License.
 """
-Module for Accuracy metric
+Accuracy metrics (micro, macro, F1 and per label) for classification tasks.
 """
 from collections import Counter
 from typing import Counter as TypeCounter
 from typing import Mapping, Optional, Sequence, Union
@@ -73,14 +74,19 @@ def _confusion(np_label_gt: NDArray[int32], np_label_pr: NDArray[int32]) -> NDAr
 def accuracy(label_gt: Sequence[int], label_predictions: Sequence[int], masks: Optional[Sequence[int]] = None) -> float:
     """
-    Calculates the accuracy given predictions and labels. Ignores masked indices. Uses
-    `sklearn.metrics.accuracy_score`
+    Calculates the accuracy given predictions and labels. Ignores masked indices.
+    Uses `sklearn.metrics.accuracy_score`
+    Args:
+        label_gt: List of ground truth labels
+        label_predictions: List of predictions. Must have the same length as label_gt
+        masks: An optional list with masks to ignore some samples
-    :param label_gt: list of ground truth labels
-    :param label_predictions: list of predictions. Must have the same length as label_gt
-    :param masks: An optional list with masks to ignore some samples.
+    Returns:
+        Accuracy score with only unmasked values considered
-    :return: Accuracy score with only unmasked values to be considered
+    Raises:
+        ValueError: If lengths of label_gt and label_predictions are not equal
     """
     np_label_gt, np_label_pr = np.asarray(label_gt), np.asarray(label_predictions)
@@ -99,13 +105,15 @@ def confusion(
     label_gt: Sequence[int], label_predictions: Sequence[int], masks: Optional[Sequence[int]] = None
 ) -> NDArray[int32]:
     """
-    Calculates the accuracy matrix given the predictions and labels. Ignores masked indices.
+    Calculates the confusion matrix given the predictions and labels.
-    :param label_gt: list of ground truth labels
-    :param label_predictions: list of predictions. Must have the same length as label_gt
-    :param masks: list with masks of same length as label_gt.
+    Args:
+        label_gt: List of ground truth labels
+        label_predictions: List of predictions. Must have the same length as label_gt
+        masks: List with masks of same length as label_gt
-    :return: numpy array
+    Returns:
+        Confusion matrix as numpy array
     """
     np_label_gt, np_label_pr = np.asarray(label_gt), np.asarray(label_predictions)
@@ -123,14 +131,16 @@ def precision(
     micro: bool = False,
 ) -> NDArray[float32]:
     """
-    Calculates the precision for a multi classification problem using a confusion matrix. The output will
-    be the precision by category.
-    :param label_gt: list of ground truth labels
-    :param label_predictions: list of predictions. Must have the same length as label_gt
-    :param masks: list with masks of same length as label_gt.
-    :param micro: If True, it will calculate the micro average precision
-    :return:  numpy array
+    Calculates the precision for a multi-classification problem using a confusion matrix.
+    Args:
+        label_gt: List of ground truth labels
+        label_predictions: List of predictions. Must have the same length as label_gt
+        masks: List with masks of same length as label_gt
+        micro: If True, calculates the micro average precision
+    Returns:
+        Precision scores by category or micro average
     """
     np_label_gt, np_label_pr = np.asarray(label_gt), np.asarray(label_predictions)
@@ -153,14 +163,16 @@ def recall(
     micro: bool = False,
 ) -> NDArray[float32]:
     """
-    Calculates the recall for a multi classification problem using a confusion matrix. The output will
-    be the recall by category.
-    :param label_gt: list of ground truth labels
-    :param label_predictions: list of predictions. Must have the same length as label_gt
-    :param masks: list with masks of same length as label_gt.
-    :param micro: If True, it will calculate the micro average recall
-    :return:  numpy array
+    Calculates the recall for a multi-classification problem using a confusion matrix.
+    Args:
+        label_gt: List of ground truth labels
+        label_predictions: List of predictions. Must have the same length as label_gt
+        masks: List with masks of same length as label_gt
+        micro: If True, calculates the micro average recall
+    Returns:
+        Recall scores by category or micro average
     """
     np_label_gt, np_label_pr = np.asarray(label_gt), np.asarray(label_predictions)
@@ -184,15 +196,17 @@ def f1_score(
     per_label: bool = True,
 ) -> NDArray[float32]:
     """
-    Calculates the recall for a multi classification problem using a confusion matrix. The output will
-    be the recall by category.
-    :param label_gt: list of ground truth labels
-    :param label_predictions: list of predictions. Must have the same length as label_gt
-    :param masks: list with masks of same length as label_gt.
-    :param micro: If True, it will calculate the micro average f1 score
-    :param per_label: If True, it will return the f1 score per label, otherwise will return the mean of all f1's
-    :return:  numpy array
+    Calculates the `F1` score for a multi-classification problem.
+    Args:
+        label_gt: List of ground truth labels
+        label_predictions: List of predictions. Must have the same length as label_gt
+        masks: List with masks of same length as label_gt
+        micro: If True, calculates the micro average f1 score
+        per_label: If True, returns the f1 score per label, otherwise returns the mean of all f1's
+    Returns:
+        `F1` scores by category, micro average, or mean value
     """
     np_precision = precision(label_gt, label_predictions, masks, micro)
@@ -205,7 +219,13 @@ def f1_score(
 class ClassificationMetric(MetricBase):
     """
-    Metric induced by `accuracy`
+    Base metric class for classification metrics.
+    Attributes:
+        mapper: Function to map images to `category_id`
+        _cats: Optional sequence of `ObjectTypes`
+        _sub_cats: Optional mapping of object types to object types or sequences of `ObjectTypes`
+        _summary_sub_cats: Optional sequence of `ObjectTypes` for summary
     """
     mapper = image_to_cat_id
@@ -279,20 +299,20 @@ class ClassificationMetric(MetricBase):
         summary_sub_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
     ) -> None:
         """
-        Set categories that are supposed to be evaluated. If sub_categories have to be considered then they need to be
-        passed explicitly.
-        **Example:**
-            You want to evaluate sub_cat1, sub_cat2 of cat1 and sub_cat3 of cat2. Set
+        Set categories that are supposed to be evaluated.
-                 sub_category_names = {cat1: [sub_cat1, sub_cat2], cat2: sub_cat3}
+        If `sub_categories` have to be considered, they need to be passed explicitly.
+        Example:
+            ```python
+            # Evaluate sub_cat1, sub_cat2 of cat1 and sub_cat3 of cat2
+            set_categories(sub_category_names={cat1: [sub_cat1, sub_cat2], cat2: sub_cat3})
+            ```
-        :param category_names: list of category names
-        :param sub_category_names: Dict of categories and their sub categories that are supposed to be evaluated,
-                                   e.g. {"FOO": ["bak","baz"]} will evaluate "bak" and "baz"
-        :param summary_sub_category_names: string or list of summary sub categories
+        Args:
+            category_names: List of category names
+            sub_category_names: Dict of categories and their sub categories to be evaluated
+            summary_sub_category_names: String or list of summary sub categories
         """
         if category_names is not None:
@@ -352,7 +372,7 @@ class ClassificationMetric(MetricBase):
     def sub_cats(
         self,
     ) -> Optional[Union[Mapping[ObjectTypes, ObjectTypes], Mapping[ObjectTypes, Sequence[ObjectTypes]]]]:
-        """sub cats"""
+        """`sub_cats`"""
         return self._sub_cats
     @property
@@ -474,7 +494,7 @@ class RecallMetric(PrecisionMetric):
 @metric_registry.register("f1")
 class F1Metric(PrecisionMetric):
     """
-    Metric induced by `f1_score`. Will calculate the f1 per category
+    Metric induced by `f1_score`. Will calculate the F1 per category
     """
     name = "F1"
@@ -523,7 +543,7 @@ class RecallMetricMicro(PrecisionMetricMicro):
 @metric_registry.register("f1_micro")
 class F1MetricMicro(PrecisionMetricMicro):
     """
-    Metric induced by `f1_score`. Will calculate the micro average f1
+    Metric induced by `f1_score`. Will calculate the micro average F1
     """
     name = "Micro F1"

deepdoctection/eval/base.py CHANGED Viewed

@@ -16,7 +16,7 @@
 # limitations under the License.
 """
-Module for the base class for evaluations and metrics
+Base classes for evaluations and metrics
 """
 from abc import ABC, abstractmethod
@@ -42,6 +42,11 @@ class MetricBase(ABC):
     Using `get_distance`, ground truth and prediction dataflow can be read in and evaluated.
     `dump` is a helper method that is often called via `get_distance`. Here, the dataflows should be
     executed and the results should be saved in separate lists.
+    Attributes:
+        name (str): Name of the metric, usually the class name.
+        metric (Callable[[Any, Any], Optional[Any]]): The metric function that computes the distance.
+        _results (list[MetricResults]): Internal storage for results of the metric computation.
     """
     name: str
@@ -65,6 +70,9 @@ class MetricBase(ABC):
     def get_requirements(cls) -> list[Requirement]:
         """
         Get a list of requirements for running the detector
+        Returns:
+            List of requirements
         """
         raise NotImplementedError()
@@ -74,11 +82,15 @@ class MetricBase(ABC):
         cls, dataflow_gt: DataFlow, dataflow_predictions: DataFlow, categories: DatasetCategories
     ) -> list[MetricResults]:
         """
-        Takes of the ground truth processing strand as well as the prediction strand and generates the metric results.
+        Takes of the ground truth dataflow as well as the dataflow and generates the metric results.
+        Args:
+            dataflow_gt: Dataflow with ground truth annotations.
+            dataflow_predictions: Dataflow with predictions.
+            categories: DatasetCategories with respect to the underlying dataset.
-        :param dataflow_gt: Dataflow with ground truth annotations.
-        :param dataflow_predictions: Dataflow with predictions.
-        :param categories:  DatasetCategories with respect to the underlying dataset.
+        Returns:
+            List of metric results
         """
         raise NotImplementedError()
@@ -91,9 +103,13 @@ class MetricBase(ABC):
         Dump the dataflow with ground truth annotations and predictions. Use it as auxiliary method and call it from
         `get_distance`.
-        :param dataflow_gt: Dataflow with ground truth annotations.
-        :param dataflow_predictions: Dataflow with predictions.
-        :param categories: DatasetCategories with respect to the underlying dataset.
+        Args:
+            dataflow_gt: Dataflow with ground truth annotations.
+            dataflow_predictions: Dataflow with predictions.
+            categories: DatasetCategories with respect to the underlying dataset.
+        Returns:
+            Tuple containing ground truth and predictions
         """
         raise NotImplementedError()
@@ -103,8 +119,11 @@ class MetricBase(ABC):
         Converts the result from `get_distance` to a dict. It concatenates all keys of the inner dict and uses
         the metric result 'val' as value.
-        :param results: List of dict as input
-        :return: Dict with metric results.
+        Args:
+            results: List of dict as input
+        Returns:
+            MetricResults: Dict with metric results.
         """
         output: MetricResults = {}
         for res in results:

deepdoctection/eval/cocometric.py CHANGED Viewed

@@ -16,7 +16,7 @@
 # limitations under the License.
 """
-Module for metrics that require the COCOeval class.
+Metrics that require the `COCOeval` class.
 """
 from __future__ import annotations
@@ -65,8 +65,7 @@ _F1_DEFAULTS = [
 _MAX_DET_INDEX = [2, 2, 2, 2, 2, 2, 0, 1, 2, 2, 2, 2]
 """
-The following function is taken from
-https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py
+Taken from <https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py>
 """
@@ -236,10 +235,11 @@ class CocoMetric(MetricBase):
     @classmethod
     def get_summary_default_parameters(cls) -> list[JsonDict]:
         """
-        Returns default parameters of evaluation results. May differ from other CocoMetric classes.
+        Get default parameters of evaluation results. May differ from other `CocoMetric` classes.
-        :return: List of dict with default configuration, e.g. setting of average precision, iou threshold,
-                 area range and maximum detections.
+        Returns:
+            List of dict with default configuration, e.g. setting of average precision, iou threshold,
+            area range and maximum detections.
         """
         if cls._f1_score:
             for el, idx in zip(_F1_DEFAULTS, [2, 2]):
@@ -267,12 +267,14 @@ class CocoMetric(MetricBase):
         """
         Setting params for different coco metric modes.
-        :param max_detections: The maximum number of detections to consider
-        :param area_range: The area range to classify objects as "all", "small", "medium" and "large"
-        :param f1_score: Will use f1 score setting with default iouThr 0.9. To be more precise it does not calculate
-                         the f1 score but the precision and recall for a given iou threshold. Use the harmonic mean to
-                         get the ultimate f1 score.
-        :param f1_iou: Use with f1_score True and reset the f1 iou threshold
+        Args:
+            max_detections: The maximum number of detections to consider
+            area_range: The area range to classify objects as `all`, `small`, `medium` and `large`
+            f1_score: Will use F1-score setting with default `iouThr=0.9`. To be more precise it does not calculate
+                      the F1-score but the precision and recall for a given `iou` threshold. Use the harmonic mean to
+                      get the ultimate F1-score.
+            f1_iou: Use with `f1_score=True` and reset the f1 iou threshold
+                    per_category: Whether to calculate metrics per category
         """
         if max_detections is not None:
             assert len(max_detections) == 3, max_detections

deepdoctection/eval/eval.py CHANGED Viewed

@@ -65,26 +65,28 @@ class Evaluator:
     erasing process and after that passing the predictor. Predicted and gt datapoints will be converted into the
     required metric input format and dumped into lists. Both lists will be passed to `MetricBase.get_distance`.
-    **Example:**
+    Note:
         You can evaluate the predictor on a subset of categories by filtering the ground truth dataset. When using
         the coco metric all predicted objects that are not in the set of filtered objects will be not taken into
         account.
-            publaynet = get_dataset("publaynet")
-            publaynet.dataflow.categories.filter_categories(categories=["TEXT","TITLE"])
-            coco_metric = metric_registry.get("coco")
-            profile = ModelCatalog.get_profile("layout/d2_model_0829999_layout_inf_only.pt")
-            path_weights = ModelCatalog.get_full_path_weights("layout/d2_model_0829999_layout_inf_only.pt")
-            path_config_yaml= ModelCatalog.get_full_path_configs("layout/d2_model_0829999_layout_inf_only.pt")
+    Example:
+        ```python
+        publaynet = get_dataset("publaynet")
+        publaynet.dataflow.categories.filter_categories(categories=["TEXT","TITLE"])
+        coco_metric = metric_registry.get("coco")
+        profile = ModelCatalog.get_profile("layout/d2_model_0829999_layout_inf_only.pt")
+        path_weights = ModelCatalog.get_full_path_weights("layout/d2_model_0829999_layout_inf_only.pt")
+        path_config_yaml= ModelCatalog.get_full_path_configs("layout/d2_model_0829999_layout_inf_only.pt")
-            layout_detector = D2FrcnnDetector(path_config_yaml, path_weights, profile.categories)
-            layout_service = ImageLayoutService(layout_detector)
-            evaluator = Evaluator(publaynet, layout_service, coco_metric)
+        layout_detector = D2FrcnnDetector(path_config_yaml, path_weights, profile.categories)
+        layout_service = ImageLayoutService(layout_detector)
+        evaluator = Evaluator(publaynet, layout_service, coco_metric)
-            output = evaluator.run(max_datapoints=10)
+        output = evaluator.run(max_datapoints=10)
+        ```
-    For another example check the script in :ref:`Evaluation of table recognition`
+    For another example check the script in `Evaluation` of table recognition`
     """
     def __init__(
@@ -98,9 +100,10 @@ class Evaluator:
         """
         Evaluating a pipeline component on a dataset with a given metric.
-        :param dataset: dataset
-        :param component_or_pipeline: A pipeline component with predictor and annotation factory.
-        :param metric: metric
+        Args:
+            dataset: dataset
+            component_or_pipeline: A pipeline component with predictor and annotation factory.
+            metric: metric
         """
         self.dataset = dataset
@@ -191,11 +194,13 @@ class Evaluator:
         """
         Start evaluation process and return the results.
-        :param output_as_dict: Return result in a list or dict.
-        :param dataflow_build_kwargs: Pass the necessary arguments in order to build the dataflow, e.g. "split",
-                                      "build_mode", "max_datapoints" etc.
+        Args:
+            output_as_dict: Return result in a list or dict.
+            dataflow_build_kwargs: Pass the necessary arguments in order to build the dataflow, e.g. `split`,
+                                  `build_mode`, `max_datapoints` etc.
-        :return: dict with metric results.
+        Returns:
+            dict with metric results.
         """
         df_gt = self.dataset.dataflow.build(**dataflow_build_kwargs)
@@ -279,12 +284,15 @@ class Evaluator:
         Visualize ground truth and prediction datapoint. Given a dataflow config it will run predictions per sample
         and concat the prediction image (with predicted bounding boxes) with ground truth image.
-        :param interactive: If set to True will open an interactive image, otherwise it will return a numpy array that
-                            can be displayed differently (e.g. matplotlib). Note that, if the interactive mode is being
-                            used, more than one sample can be iteratively be displayed.
-        :param kwargs: Dataflow configs for displaying specific image splits and visualisation configs:
-                       `show_tables`, `show_layouts`, `show_table_structure`, `show_words`
-        :return: Image as numpy array
+        Args:
+            interactive: If set to True will open an interactive image, otherwise it will return a `np.array` that
+                        can be displayed differently (e.g. `matplotlib`). Note that, if the interactive mode is being
+                        used, more than one sample can be iteratively be displayed.
+            kwargs: Dataflow configs for displaying specific image splits and visualisation configs:
+                   `show_tables`, `show_layouts`, `show_table_structure`, `show_words`
+        Returns:
+            Image as `np.array`
         """
         show_tables = kwargs.pop("show_tables", True)
@@ -349,13 +357,15 @@ class WandbTableAgent:
     """
     A class that creates a W&B table of sample predictions and sends them to the W&B server.
+    Example:
+        ```python
         df ... # some dataflow
         agent = WandbTableAgent(myrun,"MY_DATASET",50,{"1":"FOO"})
         for dp in df:
             agent.dump(dp)
         agent.log()
+        ```
     """
     def __init__(
@@ -368,16 +378,17 @@ class WandbTableAgent:
         cat_to_sub_cat: Optional[Mapping[TypeOrStr, TypeOrStr]] = None,
     ):
         """
-        :param wandb_run: An `wandb.run` instance for tracking. Use `run=wandb.init(project=project, config=config,
-                          **kwargs)` to generate a `run`.
-        :param dataset_name: name for tracking
-        :param num_samples: When dumping images to a table it will stop adding samples after `num_samples` instances
-        :param categories: dict of all possible categories
-        :param sub_categories:  dict of sub categories. If provided, these categories will define the classes for the
-                                table
-        :param cat_to_sub_cat: dict of category to sub category keys. Suppose your category `foo` has a sub category
-                               defined by the key `sub_foo`. The range sub category values must then be given by
-                               `sub_categories` and to extract the sub category values one must pass `{"foo": "sub_foo"}
+        Args:
+            wandb_run: An `wandb.run` instance for tracking. Use `run=wandb.init(project=project, config=config,
+                      **kwargs)` to generate a `run`.
+            dataset_name: name for tracking
+            num_samples: When dumping images to a table it will stop adding samples after `num_samples` instances
+            categories: dict of all possible categories
+            sub_categories: dict of sub categories. If provided, these categories will define the classes for the
+                            table
+            cat_to_sub_cat: dict of category to sub category keys. Suppose your category `foo` has a sub category
+                           defined by the key `sub_foo`. The range sub category values must then be given by
+                           `sub_categories` and to extract the sub category values one must pass `{"foo": "sub_foo"}`
         """
         self.dataset_name = dataset_name
@@ -400,8 +411,11 @@ class WandbTableAgent:
         Dump image to a table. Add this while iterating over samples. After `num_samples` it will stop appending samples
         to the table
-        :param dp: datapoint image
-        :return: same as input
+        Args:
+            dp: `Image` instance
+        Returns:
+            `Image` instance
         """
         if self.num_samples > self._counter:
             dp = maybe_load_image(dp)
@@ -421,9 +435,10 @@ class WandbTableAgent:
     def _build_table(self) -> Table:
         """
-        Builds wandb.Table object for logging evaluation
+        Builds `wandb.Table` instance for logging evaluation
-        returns: Table object to log evaluation
+        Returns:
+            Table object to log evaluation
         """
         return Table(columns=self._table_cols, data=self._table_rows)

deepdoctection/eval/registry.py CHANGED Viewed

@@ -16,7 +16,7 @@
 # limitations under the License.
 """
-Module for MetricRegistry
+MetricRegistry for registering and retrieving evaluation metrics
 """
 import catalogue  # type: ignore
@@ -30,7 +30,10 @@ def get_metric(name: str) -> MetricBase:
     """
     Returns an instance of a metric with a given name.
-    :param name: A metric name
-    :return: An instance of a metric
+    Args:
+        name: A metric name
+    Returns:
+        A metric instance
     """
     return metric_registry.get(name)()

deepdoctection 0.42.1__py3-none-any.whl → 0.43.1__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.42.1py3-none-any.whl → 0.43.1py3-none-any.whl