PyPI - deepdoctection - Versions diffs - 0.42.1__py3-none-any.whl → 0.43.1__py3-none-any.whl - Mend

deepdoctection 0.42.1py3-none-any.whl → 0.43.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show

deepdoctection/__init__.py +4 -2
deepdoctection/analyzer/__init__.py +2 -1
deepdoctection/analyzer/config.py +919 -0
deepdoctection/analyzer/dd.py +36 -62
deepdoctection/analyzer/factory.py +311 -141
deepdoctection/configs/conf_dd_one.yaml +100 -44
deepdoctection/configs/profiles.jsonl +32 -0
deepdoctection/dataflow/__init__.py +9 -6
deepdoctection/dataflow/base.py +33 -15
deepdoctection/dataflow/common.py +96 -75
deepdoctection/dataflow/custom.py +36 -29
deepdoctection/dataflow/custom_serialize.py +135 -91
deepdoctection/dataflow/parallel_map.py +33 -31
deepdoctection/dataflow/serialize.py +15 -10
deepdoctection/dataflow/stats.py +41 -28
deepdoctection/datapoint/__init__.py +4 -6
deepdoctection/datapoint/annotation.py +104 -66
deepdoctection/datapoint/box.py +190 -130
deepdoctection/datapoint/convert.py +66 -39
deepdoctection/datapoint/image.py +151 -95
deepdoctection/datapoint/view.py +383 -236
deepdoctection/datasets/__init__.py +2 -6
deepdoctection/datasets/adapter.py +11 -11
deepdoctection/datasets/base.py +118 -81
deepdoctection/datasets/dataflow_builder.py +18 -12
deepdoctection/datasets/info.py +76 -57
deepdoctection/datasets/instances/__init__.py +6 -2
deepdoctection/datasets/instances/doclaynet.py +17 -14
deepdoctection/datasets/instances/fintabnet.py +16 -22
deepdoctection/datasets/instances/funsd.py +11 -6
deepdoctection/datasets/instances/iiitar13k.py +9 -9
deepdoctection/datasets/instances/layouttest.py +9 -9
deepdoctection/datasets/instances/publaynet.py +9 -9
deepdoctection/datasets/instances/pubtables1m.py +13 -13
deepdoctection/datasets/instances/pubtabnet.py +13 -15
deepdoctection/datasets/instances/rvlcdip.py +8 -8
deepdoctection/datasets/instances/xfund.py +11 -9
deepdoctection/datasets/registry.py +18 -11
deepdoctection/datasets/save.py +12 -11
deepdoctection/eval/__init__.py +3 -2
deepdoctection/eval/accmetric.py +72 -52
deepdoctection/eval/base.py +29 -10
deepdoctection/eval/cocometric.py +14 -12
deepdoctection/eval/eval.py +56 -41
deepdoctection/eval/registry.py +6 -3
deepdoctection/eval/tedsmetric.py +24 -9
deepdoctection/eval/tp_eval_callback.py +13 -12
deepdoctection/extern/__init__.py +1 -1
deepdoctection/extern/base.py +176 -97
deepdoctection/extern/d2detect.py +127 -92
deepdoctection/extern/deskew.py +19 -10
deepdoctection/extern/doctrocr.py +162 -108
deepdoctection/extern/fastlang.py +25 -17
deepdoctection/extern/hfdetr.py +137 -60
deepdoctection/extern/hflayoutlm.py +329 -248
deepdoctection/extern/hflm.py +67 -33
deepdoctection/extern/model.py +108 -762
deepdoctection/extern/pdftext.py +37 -12
deepdoctection/extern/pt/nms.py +15 -1
deepdoctection/extern/pt/ptutils.py +13 -9
deepdoctection/extern/tessocr.py +87 -54
deepdoctection/extern/texocr.py +29 -14
deepdoctection/extern/tp/tfutils.py +36 -8
deepdoctection/extern/tp/tpcompat.py +54 -16
deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
deepdoctection/extern/tpdetect.py +4 -2
deepdoctection/mapper/__init__.py +1 -1
deepdoctection/mapper/cats.py +117 -76
deepdoctection/mapper/cocostruct.py +35 -17
deepdoctection/mapper/d2struct.py +56 -29
deepdoctection/mapper/hfstruct.py +32 -19
deepdoctection/mapper/laylmstruct.py +221 -185
deepdoctection/mapper/maputils.py +71 -35
deepdoctection/mapper/match.py +76 -62
deepdoctection/mapper/misc.py +68 -44
deepdoctection/mapper/pascalstruct.py +13 -12
deepdoctection/mapper/prodigystruct.py +33 -19
deepdoctection/mapper/pubstruct.py +42 -32
deepdoctection/mapper/tpstruct.py +39 -19
deepdoctection/mapper/xfundstruct.py +20 -13
deepdoctection/pipe/__init__.py +1 -2
deepdoctection/pipe/anngen.py +104 -62
deepdoctection/pipe/base.py +226 -107
deepdoctection/pipe/common.py +206 -123
deepdoctection/pipe/concurrency.py +74 -47
deepdoctection/pipe/doctectionpipe.py +108 -47
deepdoctection/pipe/language.py +41 -24
deepdoctection/pipe/layout.py +45 -18
deepdoctection/pipe/lm.py +146 -78
deepdoctection/pipe/order.py +205 -119
deepdoctection/pipe/refine.py +111 -63
deepdoctection/pipe/registry.py +1 -1
deepdoctection/pipe/segment.py +213 -142
deepdoctection/pipe/sub_layout.py +76 -46
deepdoctection/pipe/text.py +52 -33
deepdoctection/pipe/transform.py +8 -6
deepdoctection/train/d2_frcnn_train.py +87 -69
deepdoctection/train/hf_detr_train.py +72 -40
deepdoctection/train/hf_layoutlm_train.py +85 -46
deepdoctection/train/tp_frcnn_train.py +56 -28
deepdoctection/utils/concurrency.py +59 -16
deepdoctection/utils/context.py +40 -19
deepdoctection/utils/develop.py +26 -17
deepdoctection/utils/env_info.py +86 -37
deepdoctection/utils/error.py +16 -10
deepdoctection/utils/file_utils.py +246 -71
deepdoctection/utils/fs.py +162 -43
deepdoctection/utils/identifier.py +29 -16
deepdoctection/utils/logger.py +49 -32
deepdoctection/utils/metacfg.py +83 -21
deepdoctection/utils/pdf_utils.py +119 -62
deepdoctection/utils/settings.py +24 -10
deepdoctection/utils/tqdm.py +10 -5
deepdoctection/utils/transform.py +182 -46
deepdoctection/utils/utils.py +61 -28
deepdoctection/utils/viz.py +150 -104
deepdoctection-0.43.1.dist-info/METADATA +376 -0
deepdoctection-0.43.1.dist-info/RECORD +149 -0
deepdoctection/analyzer/_config.py +0 -146
deepdoctection-0.42.1.dist-info/METADATA +0 -431
deepdoctection-0.42.1.dist-info/RECORD +0 -148
{deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/WHEEL +0 -0
{deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/licenses/LICENSE +0 -0
{deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/top_level.txt +0 -0

deepdoctection/pipe/common.py CHANGED Viewed

@@ -46,18 +46,22 @@ elif os.environ.get("DD_USE_TF"):
 @pipeline_component_registry.register("ImageCroppingService")
 class ImageCroppingService(PipelineComponent):
     """
-    Crop sub images given by bounding boxes of some annotations. This service is not necessary for
-    `ImageLayoutService` and is more intended for saved files where sub images are
+    Crop sub images given by bounding boxes of some annotations.
+    This service is not necessary for `ImageLayoutService` and is more intended for saved files where sub images are
     generally not stored.
     """
     def __init__(
-            self,
-            category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
-            service_ids: Optional[Sequence[str]] = None,
+        self,
+        category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        service_ids: Optional[Sequence[str]] = None,
     ) -> None:
         """
-        :param category_names: A single name or a list of category names to crop
+        Args:
+            category_names: A single name or a list of category names to crop.
+            service_ids: Optional list of service IDs.
         """
         if category_names is None:
             self.category_names = None
@@ -86,42 +90,46 @@ class ImageCroppingService(PipelineComponent):
 class IntersectionMatcher:
     """
-    Objects of two object classes can be assigned to one another by determining their pairwise intersection. If this is
-    above a limit, a relation is created between them.
+    Objects of two object classes can be assigned to one another by determining their pairwise intersection.
+    If this is above a limit, a relation is created between them.
     The parent object class (based on its category) and the child object class are defined for the service.
     Either `iou` (intersection-over-union) or `ioa` (intersection-over-area) can be selected as the matching rule.
-            # the following will assign word annotations to text and title annotation, provided that their ioa-threshold
-            # is above 0.7. words below that threshold will not be assigned.
-            matcher = IntersectionMatcher(matching_rule="ioa", threshold=0.7)
-            match_service = MatchingService(parent_categories=["text","title"],
-                                    child_categories="word",
-                                    matcher=matcher,
-                                    relationship_key=Relationships.CHILD)
+    Example:
+        ```python
+        matcher = IntersectionMatcher(matching_rule="ioa", threshold=0.7)
+        match_service = MatchingService(parent_categories=["text","title"],
+                                        child_categories="word",
+                                        matcher=matcher,
+                                        relationship_key=Relationships.CHILD)
+        ```
-            # Assigning means that text and title annotation will receive a relationship called "CHILD" which is a list
-              of annotation ids of mapped words.
+    Assigning means that text and title annotation will receive a relationship called `CHILD` which is a list
+    of annotation ids of mapped words.
     """
     def __init__(
-            self,
-            matching_rule: Literal["iou", "ioa"],
-            threshold: float,
-            use_weighted_intersections: bool = False,
-            max_parent_only: bool = False,
+        self,
+        matching_rule: Literal["iou", "ioa"],
+        threshold: float,
+        use_weighted_intersections: bool = False,
+        max_parent_only: bool = False,
     ) -> None:
         """
-        :param matching_rule: "iou" or "ioa"
-        :param threshold: iou/ioa threshold. Value between [0,1]
-        :param use_weighted_intersections: This is currently only implemented for matching_rule 'ioa'. Instead of using
-                                           the ioa_matrix it will use mat weighted ioa in order to take into account
-                                           that intersections with more cells will likely decrease the ioa value. By
-                                           multiplying the ioa with the number of all intersection for each child this
-                                           value calibrate the ioa.
-        :param max_parent_only: Will assign to each child at most one parent with maximum ioa"""
+        Args:
+            matching_rule: `iou` or `ioa`.
+            threshold: iou/ioa threshold. Value between [0,1].
+            use_weighted_intersections: This is currently only implemented for matching_rule `ioa`. Instead of using
+                the ioa_matrix it will use mat weighted ioa in order to take into account that intersections with more
+                cells will likely decrease the ioa value. By multiplying the ioa with the number of all intersection for
+                each child this value calibrate the ioa.
+        max_parent_only: Will assign to each child at most one parent with maximum ioa.
+        Raises:
+            ValueError: If `matching_rule` is not `iou` or `ioa`.
+        """
         if matching_rule not in ("iou", "ioa"):
             raise ValueError("segment rule must be either iou or ioa")
@@ -131,25 +139,27 @@ class IntersectionMatcher:
         self.max_parent_only = max_parent_only
     def match(
-            self,
-            dp: Image,
-            parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
-            child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
-            parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
-            child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+        self,
+        dp: Image,
+        parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+        child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
     ) -> list[tuple[str, str]]:
         """
-        The matching algorithm
-        :param dp: datapoint image
-        :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
-        :param child_categories: list of categories to be used for a child class.
-        :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
-                                        parent candidates which are not in the list.
-        :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
-                                        children candidates which are not in the list.
-        :return: A list of tuples with parent and child annotation ids
+        The matching algorithm.
+        Args:
+            dp: `Image` datapoint.
+            parent_categories: List of categories to be used as parent class. Will generate a child-relationship.
+            child_categories: List of categories to be used for a child class.
+            parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                parent candidates which are not in the list.
+            child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                children candidates which are not in the list.
+        Returns:
+            A list of tuples with parent and child annotation ids.
         """
         child_index, parent_index, child_anns, parent_anns = match_anns_by_intersection(
             dp,
@@ -177,37 +187,38 @@ class NeighbourMatcher:
     """
     Objects of two object classes can be assigned to one another by determining their pairwise distance.
-        # the following will assign caption annotations to figure annotation
+    Example:
+        ```python
         matcher = NeighbourMatcher()
         match_service = MatchingService(parent_categories=["figure"],
                                         child_categories="caption",
                                         matcher=matcher,
                                         relationship_key=Relationships.LAYOUT_LINK)
+        ```
     """
     def match(
-            self,
-            dp: Image,
-            parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
-            child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
-            parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
-            child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+        self,
+        dp: Image,
+        parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
+        child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
     ) -> list[tuple[str, str]]:
         """
-        The matching algorithm
-        :param dp: datapoint image
-        :param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
-        :param child_categories: list of categories to be used for a child class.
-        :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
-                                        parent candidates which are not in the list.
-        :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
-                                        children candidates which are not in the list.
-        :return: A list of tuples with parent and child annotation ids
+        The matching algorithm.
+        Args:
+            dp: `Image` datapoint.
+            parent_categories: List of categories to be used as parent class. Will generate a child-relationship.
+            child_categories: List of categories to be used for a child class.
+            parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                parent candidates which are not in the list.
+            child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
+                children candidates which are not in the list.
+        Returns:
+            A list of tuples with parent and child annotation ids.
         """
         return [
@@ -225,8 +236,17 @@ class NeighbourMatcher:
 @dataclass
 class FamilyCompound:
     """
-    A family compound is a set of parent and child categories that are related by a relationship key. The parent
-    categories will receive a relationship to the child categories.
+    A family compound is a set of parent and child categories that are related by a relationship key.
+    The parent categories will receive a relationship to the child categories.
+    Attributes:
+        relationship_key: The relationship key.
+        parent_categories: Parent categories.
+        child_categories: Child categories.
+        parent_ann_service_ids: Parent annotation service IDs.
+        child_ann_service_ids: Child annotation service IDs.
+        create_synthetic_parent: Whether to create a synthetic parent.
+        synthetic_parent: The synthetic parent.
     """
     relationship_key: Relationships
@@ -255,18 +275,23 @@ class FamilyCompound:
 @pipeline_component_registry.register("MatchingService")
 class MatchingService(PipelineComponent):
     """
-    A service to match annotations of two categories by intersection or distance. The matched annotations will be
-    assigned a relationship. The parent category will receive a relationship to the child category.
+    A service to match annotations of two categories by intersection or distance.
+    The matched annotations will be assigned a relationship. The parent category will receive a
+    relationship to the child category.
     """
     def __init__(
-            self,
-            family_compounds: Sequence[FamilyCompound],
-            matcher: Union[IntersectionMatcher, NeighbourMatcher],
+        self,
+        family_compounds: Sequence[FamilyCompound],
+        matcher: Union[IntersectionMatcher, NeighbourMatcher],
     ) -> None:
         """
-        :param family_compounds: A list of FamilyCompounds
-        :param matcher: A matcher object
+        Args:
+            family_compounds: A list of `FamilyCompound`.
+            matcher: A matcher object.
         """
         self.family_compounds = family_compounds
         self.matcher = matcher
@@ -274,10 +299,10 @@ class MatchingService(PipelineComponent):
     def serve(self, dp: Image) -> None:
         """
-        - generates pairwise match-score by intersection
-        - generates child relationship at parent level
+        Generates pairwise match-score by intersection and generates child relationship at parent level.
-        :param dp: datapoint image
+        Args:
+            dp: `Image` datapoint.
         """
         for family_compound in self.family_compounds:
             matched_pairs = self.matcher.match(
@@ -300,18 +325,22 @@ class MatchingService(PipelineComponent):
                 detect_result_list = []
                 for child_ann in child_anns:
                     if child_ann.annotation_id not in child_ann_ids:
-                        detect_result_list.append(DetectionResult(
-                            class_name=family_compound.synthetic_parent,
-                            box=child_ann.get_bounding_box(dp.image_id).to_list(mode="xyxy"),
-                            absolute_coords=child_ann.get_bounding_box(dp.image_id).absolute_coords,
-                            relationships={family_compound.relationship_key: child_ann.annotation_id}))
+                        detect_result_list.append(
+                            DetectionResult(
+                                class_name=family_compound.synthetic_parent,
+                                box=child_ann.get_bounding_box(dp.image_id).to_list(mode="xyxy"),
+                                absolute_coords=child_ann.get_bounding_box(dp.image_id).absolute_coords,
+                                relationships={family_compound.relationship_key: child_ann.annotation_id},
+                            )
+                        )
                 for detect_result in detect_result_list:
                     annotation_id = self.dp_manager.set_image_annotation(detect_result)
                     if annotation_id is not None and detect_result.relationships is not None:
-                        self.dp_manager.set_relationship_annotation(family_compound.relationship_key,
-                                                                    annotation_id,
-                                                                    detect_result.relationships.get(
-                                                                        family_compound.relationship_key, None))
+                        self.dp_manager.set_relationship_annotation(
+                            family_compound.relationship_key,
+                            annotation_id,
+                            detect_result.relationships.get(family_compound.relationship_key, None),
+                        )
     def clone(self) -> PipelineComponent:
         return self.__class__(self.family_compounds, self.matcher)
@@ -336,31 +365,44 @@ class MatchingService(PipelineComponent):
 @pipeline_component_registry.register("PageParsingService")
 class PageParsingService(PipelineComponent):
     """
-    A "pseudo" pipeline component that can be added to a pipeline to convert `Image`s into `Page` formats. It allows a
-    custom parsing depending on customizing options of other pipeline components.
+    A "pseudo" pipeline component that can be added to a pipeline to convert `Image`s into `Page` formats.
+    It allows a custom parsing depending on customizing options of other pipeline components.
+    Info:
+        This component is not meant to be used in the `serve` method.
     """
     def __init__(
-            self,
-            text_container: TypeOrStr,
-            floating_text_block_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
-            include_residual_text_container: bool = True,
+        self,
+        text_container: TypeOrStr,
+        floating_text_block_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        residual_text_block_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
+        include_residual_text_container: bool = True,
     ):
         """
-        :param text_container: name of an image annotation that has a CHARS sub category. These annotations will be
-                               ordered within all text blocks.
-        :param floating_text_block_categories: name of image annotation that have a relation with text containers.
+        Args:
+            text_container: Name of an image annotation that has a `CHARS` sub category. These annotations will be
+                ordered within all text blocks.
+            floating_text_block_categories: Name of image annotation that have a relation with text containers.
+            residual_text_block_categories: Name of image annotation that have a relation with text containers.
+            include_residual_text_container: Whether to include residual text container.
         """
         self.name = "page_parser"
         if isinstance(floating_text_block_categories, (str, ObjectTypes)):
             floating_text_block_categories = (get_type(floating_text_block_categories),)
         if floating_text_block_categories is None:
-            floating_text_block_categories = IMAGE_DEFAULTS["floating_text_block_categories"]
+            floating_text_block_categories = IMAGE_DEFAULTS.FLOATING_TEXT_BLOCK_CATEGORIES
+        if residual_text_block_categories is None:
+            residual_text_block_categories = IMAGE_DEFAULTS.RESIDUAL_TEXT_BLOCK_CATEGORIES
         self.text_container = get_type(text_container)
         self.floating_text_block_categories = tuple(
             (get_type(text_block) for text_block in floating_text_block_categories)
         )
+        self.residual_text_block_categories = tuple(
+            get_type(text_block) for text_block in residual_text_block_categories
+        )
         self.include_residual_text_container = include_residual_text_container
         self._init_sanity_checks()
         super().__init__(self.name)
@@ -370,14 +412,19 @@ class PageParsingService(PipelineComponent):
     def pass_datapoint(self, dp: Image) -> Page:
         """
-        converts Image to Page
-        :param dp: Image
-        :return: Page
+        Converts `Image` to `Page`.
+        Args:
+            dp: `Image`.
+        Returns:
+            `Page`.
         """
         return Page.from_image(
             dp,
             text_container=self.text_container,
             floating_text_block_categories=self.floating_text_block_categories,
+            residual_text_block_categories=self.residual_text_block_categories,
             include_residual_text_container=self.include_residual_text_container,
         )
@@ -389,15 +436,22 @@ class PageParsingService(PipelineComponent):
     def get_meta_annotation(self) -> MetaAnnotation:
         """
-        meta annotation. We do not generate any new annotations here
+        Returns:
+            `MetaAnnotation`. No new annotations are generated here.
         """
         return MetaAnnotation(image_annotations=(), sub_categories={}, relationships={}, summaries=())
     def clone(self) -> PageParsingService:
-        """clone"""
+        """
+        Clone the `PageParsingService`.
+        Returns:
+            A cloned `PageParsingService`.
+        """
         return self.__class__(
             deepcopy(self.text_container),
             deepcopy(self.floating_text_block_categories),
+            deepcopy(self.residual_text_block_categories),
             self.include_residual_text_container,
         )
@@ -409,15 +463,18 @@ class PageParsingService(PipelineComponent):
 class AnnotationNmsService(PipelineComponent):
     """
     A service to pass `ImageAnnotation` to a non-maximum suppression (NMS) process for given pairs of categories.
     `ImageAnnotation`s are subjected to NMS process in groups:
     If `nms_pairs=[[LayoutType.text, LayoutType.table],[LayoutType.title, LayoutType.table]]` all `ImageAnnotation`
     subject to these categories are being selected and identified as one category.
     After NMS the discarded image annotation will be deactivated.
-    **Example**
+    Example:
+        ```python
         AnnotationNmsService(nms_pairs=[[LayoutType.text, LayoutType.table],[LayoutType.title, LayoutType.table]],
-                             thresholds=[0.7,0.7])   # for each pair a threshold has to be provided
+                             thresholds=[0.7,0.7])
+        ```
+        For each pair a threshold has to be provided.
     For a pair of categories, one can also select a category which has always priority even if the score is lower.
     This is useful if one expects some categories to be larger and want to keep them.
@@ -426,15 +483,20 @@ class AnnotationNmsService(PipelineComponent):
     """
     def __init__(
-            self,
-            nms_pairs: Sequence[Sequence[TypeOrStr]],
-            thresholds: Union[float, Sequence[float]],
-            priority: Optional[Sequence[Union[Optional[TypeOrStr]]]] = None,
+        self,
+        nms_pairs: Sequence[Sequence[TypeOrStr]],
+        thresholds: Union[float, Sequence[float]],
+        priority: Optional[Sequence[Union[Optional[TypeOrStr]]]] = None,
     ):
         """
-        :param nms_pairs: Groups of categories, either as string or by `ObjectType`.
-        :param thresholds: Suppression threshold. If only one value is provided, it will apply the threshold to all
-                           pairs. If a list is provided, make sure to add as many list elements as `nms_pairs`.
+        Args:
+            nms_pairs: Groups of categories, either as string or by `ObjectType`.
+            thresholds: Suppression threshold. If only one value is provided, it will apply the threshold to all
+                pairs. If a list is provided, make sure to add as many list elements as `nms_pairs`.
+            priority: Optional list of categories which have always priority.
+        Raises:
+            AssertionError: If the length of `nms_pairs` and `thresholds` or `priority` do not match.
         """
         self.nms_pairs = [[get_type(val) for val in pair] for pair in nms_pairs]
         if isinstance(thresholds, float):
@@ -456,6 +518,13 @@ class AnnotationNmsService(PipelineComponent):
         super().__init__("nms")
     def serve(self, dp: Image) -> None:
+        """
+        Args:
+            dp: `Image`.
+        Returns:
+            None.
+        """
         for pair, threshold, prio in zip(self.nms_pairs, self.threshold, self.priority):
             anns = dp.get_annotation(category_names=pair)
             ann_ids_to_keep = nms_image_annotations(anns, threshold, dp.image_id, prio)
@@ -476,27 +545,41 @@ class AnnotationNmsService(PipelineComponent):
 @pipeline_component_registry.register("ImageParsingService")
 class ImageParsingService:
     """
-    A super light service that calls `to_image` when processing datapoints. Might be useful if you build a pipeline that
-    is not derived from `DoctectionPipe`.
+    A super light service that calls `to_image` when processing datapoints.
+    Might be useful if you build a pipeline that is not derived from `DoctectionPipe`.
     """
     def __init__(self, dpi: Optional[int] = None):
         """
-        :param dpi: dpi resolution when converting PDFs into pixel values
+        Args:
+            dpi: dpi resolution when converting PDFs into pixel values.
         """
         self.name = "image"
         self.dpi = dpi
     def pass_datapoint(self, dp: Union[str, Mapping[str, Union[str, bytes]]]) -> Optional[Image]:
-        """pass a datapoint"""
+        """
+        Pass a datapoint.
+        Args:
+            dp: A datapoint, either a string or a mapping.
+        Returns:
+            `Image` or None.
+        """
         return to_image(dp, self.dpi)
     def predict_dataflow(self, df: DataFlow) -> DataFlow:
         """
-        Mapping a datapoint via `pass_datapoint` within a dataflow pipeline
+        Mapping a datapoint via `pass_datapoint` within a dataflow pipeline.
+        Args:
+            df: An input `DataFlow`.
-        :param df: An input dataflow
-        :return: A output dataflow
+        Returns:
+            An output `DataFlow`.
         """
         return MapData(df, self.pass_datapoint)

deepdoctection 0.42.1__py3-none-any.whl → 0.43.1__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.42.1py3-none-any.whl → 0.43.1py3-none-any.whl