PyPI - deepdoctection - Versions diffs - 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl - Mend

deepdoctection 0.42.0py3-none-any.whl → 0.43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show

deepdoctection/__init__.py +2 -1
deepdoctection/analyzer/__init__.py +2 -1
deepdoctection/analyzer/config.py +904 -0
deepdoctection/analyzer/dd.py +36 -62
deepdoctection/analyzer/factory.py +311 -141
deepdoctection/configs/conf_dd_one.yaml +100 -44
deepdoctection/configs/profiles.jsonl +32 -0
deepdoctection/dataflow/__init__.py +9 -6
deepdoctection/dataflow/base.py +33 -15
deepdoctection/dataflow/common.py +96 -75
deepdoctection/dataflow/custom.py +36 -29
deepdoctection/dataflow/custom_serialize.py +135 -91
deepdoctection/dataflow/parallel_map.py +33 -31
deepdoctection/dataflow/serialize.py +15 -10
deepdoctection/dataflow/stats.py +41 -28
deepdoctection/datapoint/__init__.py +4 -6
deepdoctection/datapoint/annotation.py +104 -66
deepdoctection/datapoint/box.py +190 -130
deepdoctection/datapoint/convert.py +66 -39
deepdoctection/datapoint/image.py +151 -95
deepdoctection/datapoint/view.py +383 -236
deepdoctection/datasets/__init__.py +2 -6
deepdoctection/datasets/adapter.py +11 -11
deepdoctection/datasets/base.py +118 -81
deepdoctection/datasets/dataflow_builder.py +18 -12
deepdoctection/datasets/info.py +76 -57
deepdoctection/datasets/instances/__init__.py +6 -2
deepdoctection/datasets/instances/doclaynet.py +17 -14
deepdoctection/datasets/instances/fintabnet.py +16 -22
deepdoctection/datasets/instances/funsd.py +11 -6
deepdoctection/datasets/instances/iiitar13k.py +9 -9
deepdoctection/datasets/instances/layouttest.py +9 -9
deepdoctection/datasets/instances/publaynet.py +9 -9
deepdoctection/datasets/instances/pubtables1m.py +13 -13
deepdoctection/datasets/instances/pubtabnet.py +13 -15
deepdoctection/datasets/instances/rvlcdip.py +8 -8
deepdoctection/datasets/instances/xfund.py +11 -9
deepdoctection/datasets/registry.py +18 -11
deepdoctection/datasets/save.py +12 -11
deepdoctection/eval/__init__.py +3 -2
deepdoctection/eval/accmetric.py +72 -52
deepdoctection/eval/base.py +29 -10
deepdoctection/eval/cocometric.py +14 -12
deepdoctection/eval/eval.py +56 -41
deepdoctection/eval/registry.py +6 -3
deepdoctection/eval/tedsmetric.py +24 -9
deepdoctection/eval/tp_eval_callback.py +13 -12
deepdoctection/extern/__init__.py +1 -1
deepdoctection/extern/base.py +176 -97
deepdoctection/extern/d2detect.py +127 -92
deepdoctection/extern/deskew.py +19 -10
deepdoctection/extern/doctrocr.py +157 -106
deepdoctection/extern/fastlang.py +25 -17
deepdoctection/extern/hfdetr.py +137 -60
deepdoctection/extern/hflayoutlm.py +329 -248
deepdoctection/extern/hflm.py +67 -33
deepdoctection/extern/model.py +108 -762
deepdoctection/extern/pdftext.py +37 -12
deepdoctection/extern/pt/nms.py +15 -1
deepdoctection/extern/pt/ptutils.py +13 -9
deepdoctection/extern/tessocr.py +87 -54
deepdoctection/extern/texocr.py +29 -14
deepdoctection/extern/tp/tfutils.py +36 -8
deepdoctection/extern/tp/tpcompat.py +54 -16
deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
deepdoctection/extern/tpdetect.py +4 -2
deepdoctection/mapper/__init__.py +1 -1
deepdoctection/mapper/cats.py +117 -76
deepdoctection/mapper/cocostruct.py +35 -17
deepdoctection/mapper/d2struct.py +56 -29
deepdoctection/mapper/hfstruct.py +32 -19
deepdoctection/mapper/laylmstruct.py +221 -185
deepdoctection/mapper/maputils.py +71 -35
deepdoctection/mapper/match.py +76 -62
deepdoctection/mapper/misc.py +68 -44
deepdoctection/mapper/pascalstruct.py +13 -12
deepdoctection/mapper/prodigystruct.py +33 -19
deepdoctection/mapper/pubstruct.py +42 -32
deepdoctection/mapper/tpstruct.py +39 -19
deepdoctection/mapper/xfundstruct.py +20 -13
deepdoctection/pipe/__init__.py +1 -2
deepdoctection/pipe/anngen.py +104 -62
deepdoctection/pipe/base.py +226 -107
deepdoctection/pipe/common.py +206 -123
deepdoctection/pipe/concurrency.py +74 -47
deepdoctection/pipe/doctectionpipe.py +108 -47
deepdoctection/pipe/language.py +41 -24
deepdoctection/pipe/layout.py +45 -18
deepdoctection/pipe/lm.py +146 -78
deepdoctection/pipe/order.py +196 -113
deepdoctection/pipe/refine.py +111 -63
deepdoctection/pipe/registry.py +1 -1
deepdoctection/pipe/segment.py +213 -142
deepdoctection/pipe/sub_layout.py +76 -46
deepdoctection/pipe/text.py +52 -33
deepdoctection/pipe/transform.py +8 -6
deepdoctection/train/d2_frcnn_train.py +87 -69
deepdoctection/train/hf_detr_train.py +72 -40
deepdoctection/train/hf_layoutlm_train.py +85 -46
deepdoctection/train/tp_frcnn_train.py +56 -28
deepdoctection/utils/concurrency.py +59 -16
deepdoctection/utils/context.py +40 -19
deepdoctection/utils/develop.py +25 -17
deepdoctection/utils/env_info.py +85 -36
deepdoctection/utils/error.py +16 -10
deepdoctection/utils/file_utils.py +246 -62
deepdoctection/utils/fs.py +162 -43
deepdoctection/utils/identifier.py +29 -16
deepdoctection/utils/logger.py +49 -32
deepdoctection/utils/metacfg.py +83 -21
deepdoctection/utils/pdf_utils.py +119 -62
deepdoctection/utils/settings.py +24 -10
deepdoctection/utils/tqdm.py +10 -5
deepdoctection/utils/transform.py +182 -46
deepdoctection/utils/utils.py +61 -28
deepdoctection/utils/viz.py +150 -104
deepdoctection-0.43.dist-info/METADATA +376 -0
deepdoctection-0.43.dist-info/RECORD +149 -0
{deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
deepdoctection/analyzer/_config.py +0 -146
deepdoctection-0.42.0.dist-info/METADATA +0 -431
deepdoctection-0.42.0.dist-info/RECORD +0 -148
{deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
{deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0

deepdoctection/mapper/maputils.py CHANGED Viewed

@@ -48,9 +48,13 @@ class MappingContextManager:
         self, dp_name: Optional[str] = None, filter_level: str = "image", **kwargs: dict[str, Optional[str]]
     ) -> None:
         """
-        :param dp_name: A name for the datapoint to be mapped
-        :param filter_level: Indicates if the `MappingContextManager` is use on datapoint level,
-                             annotation level etc. Filter level will only be used for logging
+        Args:
+            dp_name: A name for the datapoint to be mapped.
+            filter_level: Indicates if the `MappingContextManager` is used on datapoint level, annotation level etc.
+                          `filter_level` will only be used for logging.
+        Note:
+            Use this context manager to catch and log exceptions during mapping.
         """
         self.dp_name = dp_name if dp_name is not None else ""
         self.filter_level = filter_level
@@ -59,7 +63,10 @@ class MappingContextManager:
     def __enter__(self) -> MappingContextManager:
         """
-        context enter
+        Context enter.
+        Returns:
+            The `MappingContextManager` instance.
         """
         return self
@@ -70,7 +77,15 @@ class MappingContextManager:
         exc_tb: Optional[TracebackType],
     ) -> Optional[bool]:
         """
-        context exit
+        Context exit.
+        Args:
+            exc_type: The exception type.
+            exc_val: The exception value.
+            exc_tb: The traceback object.
+        Returns:
+            `True` if the exception was handled, otherwise `None`.
         """
         if (
             exc_type
@@ -121,9 +136,10 @@ class DefaultMapper:
     def __init__(self, func: Callable[[DP, S], T], *args: Any, **kwargs: Any) -> None:
         """
-        :param func: A mapping function
-        :param args: Default args to pass to the function
-        :param kwargs: Default kwargs to pass to the function
+        Args:
+            func: A mapping function
+            args: Default `args` to pass to the function
+            kwargs: Default `kwargs` to pass to the function
         """
         self.func = func
         self.argument_args = args
@@ -131,34 +147,42 @@ class DefaultMapper:
     def __call__(self, dp: Any) -> Any:
         """
-        :param dp: datapoint within a dataflow
-        :return: The return value of the invoked function with default arguments.
+        Call the wrapped function with the given datapoint and default arguments.
+        Args:
+            dp: Datapoint within a dataflow.
+        Returns:
+            The return value of the invoked function with default arguments.
         """
         return self.func(dp, *self.argument_args, **self.argument_kwargs)
 def curry(func: Callable[..., T]) -> Callable[..., Callable[[DP], T]]:
     """
-    Decorator for converting functions that maps
+    Decorator for converting functions that map
-        dps: Union[JsonDict,Image]  -> Union[JsonDict,Image]
+    ```python
+    dps: Union[JsonDict, Image] -> Union[JsonDict, Image]
+    ```
-    to `DefaultMapper`s. They will be initialized with all arguments except dp and can be called later with only the
+    to `DefaultMapper`s. They will be initialized with all arguments except `dp` and can be called later with only the
     datapoint as argument. This setting is useful when incorporating the function within a dataflow.
-    **Example:**
-            @curry
-            def json_to_image(dp, config_arg_1, config_arg_2,...) -> Image:
+    Example:
+        ```python
+        @curry
+        def json_to_image(dp, config_arg_1, config_arg_2, ...) -> Image:
             ...
+        df = ...
+        df = MapData(df, json_to_image(config_arg_1=val_1, config_arg_2=val_2))
+        ```
-        can be applied like:
-            df = ...
-            df = MapData(df,json_to_image(config_arg_1=val_1,config_arg_2=val_2))
+    Args:
+        func: A callable [[`Image`], [Any]] -> [`Image`]
-    :param func: A callable [[`Image`],[Any]] -> [`Image`]
-    :return: A DefaultMapper
+    Returns:
+        A `DefaultMapper`.
     """
     @functools.wraps(func)
@@ -170,10 +194,13 @@ def curry(func: Callable[..., T]) -> Callable[..., Callable[[DP], T]]:
 def maybe_get_fake_score(add_fake_score: bool) -> Optional[float]:
     """
-    Returns a fake score, if add_fake_score = True. Will otherwise return None
+    Returns a fake score, if `add_fake_score` is `True`. Will otherwise return `None`.
+    Args:
+        add_fake_score: Boolean.
-    :param add_fake_score: boolean
-    :return: A uniform random variable in (0,1)
+    Returns:
+        A uniform random variable in `(0,1)` or `None`.
     """
     if add_fake_score:
         return np.random.uniform(0.0, 1.0, 1)[0]
@@ -182,20 +209,24 @@ def maybe_get_fake_score(add_fake_score: bool) -> Optional[float]:
 class LabelSummarizer:
     """
-    A class for generating label statistics. Useful, when mapping and generating a SummaryAnnotation.
-        summarizer = LabelSummarizer({"1": "label_1","2":"label_2"})
+    A class for generating label statistics. Useful when mapping and generating a `SummaryAnnotation`.
+    Example:
+        ```python
+        summarizer = LabelSummarizer({"1": "label_1", "2": "label_2"})
         for dp in some_dataflow:
             summarizer.dump(dp["label_id"])
         summarizer.print_summary_histogram()
+        ```
+    Args:
+        categories: A dict of categories as given as in `categories.get_categories()`.
     """
     def __init__(self, categories: Mapping[int, ObjectTypes]) -> None:
         """
-        :param categories: A dict of categories as given as in categories.get_categories().
+        Args:
+            categories: A dict of categories as given as in `categories.get_categories()`.
         """
         self.categories = categories
         cat_numbers = len(self.categories.keys())
@@ -204,16 +235,20 @@ class LabelSummarizer:
     def dump(self, item: Union[Sequence[Union[str, int]], str, int]) -> None:
         """
-        Dump a category number
+        Dump a category number.
-        :param item: A category number.
+        Args:
+            item: A category number.
         """
         np_item = np.asarray(item, dtype="int8")
         self.summary += np.histogram(np_item, bins=self.hist_bins)[0]
     def get_summary(self) -> dict[int, int]:
         """
-        Get a dictionary with category ids and the number dumped
+        Get a dictionary with category ids and the number dumped.
+        Returns:
+            A dictionary mapping category ids to counts.
         """
         return dict(list(zip(self.categories.keys(), self.summary.tolist())))
@@ -221,7 +256,8 @@ class LabelSummarizer:
         """
         Prints a summary from all dumps.
-        :param dd_logic: Follow dd category convention when printing histogram (last background bucket omitted).
+        Args:
+            dd_logic: Follow dd category convention when printing histogram (last background bucket omitted).
         """
         if dd_logic:
             data = list(itertools.chain(*[[self.categories[i].value, v] for i, v in enumerate(self.summary, 1)]))

deepdoctection/mapper/match.py CHANGED Viewed

@@ -46,55 +46,64 @@ def match_anns_by_intersection(
     max_parent_only: bool = False,
 ) -> tuple[Any, Any, Sequence[ImageAnnotation], Sequence[ImageAnnotation]]:
     """
-    Generates an iou/ioa-matrix for parent_ann_categories and child_ann_categories and returns pairs of child/parent
-    indices that are above some intersection threshold. It will also return a list of all pre selected parent and child
+    Generates an iou/ioa-matrix for `parent_ann_categories` and `child_ann_categories` and returns pairs of child/parent
+    indices that are above some intersection threshold. It will also return a list of all pre-selected parent and child
     annotations.
-    **Example:**
-    Let `p_i, c_j` be annotations ids of parent and children according to some category names.
-    |**ioa**|**c_1**|**c_2**|
-    |-------|-------|-------|
-    |**p_1**|  0.3  |  0.8  |
-    |**p_2**|  0.4  |  0.1  |
-    |**p_3**|  1.   |  0.4  |
-    With `ioa_threshold = 0.5` it will return:
-     `[[2],[0]], [[1],[],[1]], [c_1,c_2], [p_1,p_2,p_3]`.
-    For each child the sum of all ioas with all parents sum up to 1. Hence, the ioa with one parent will in general
-    decrease if one child intersects with more parents. Take two childs one matching two parents with an ioa of 0.5 each
-    while the second matching four parents with an ioa of 0.25 each. In this situation it is difficult to assign
-    children according to a given threshold and one also has to take into account the number of parental intersection
-    for each child. Setting `use_weighted_intersections` to True will multiply each ioa with the number of intersection
-    making it easier to work with an absolute threshold.
-    In some situation you want to assign to each child at most one parent. Setting `max_parent_only` to `True` it will
-    select the parent with the highest ioa. Note, there is currently no implementation for iou.
-    :param dp: image datapoint
-    :param parent_ann_category_names: single str or list of category names
-    :param child_ann_category_names: single str or list of category names
-    :param matching_rule: intersection measure type, either "iou" or "ioa"
-    :param threshold: Threshold, for mat given matching rule. Will assign every child ann with iou/ioa above the
-                      threshold to the parental annotation.
-    :param use_weighted_intersections: This is currently only implemented for matching_rule 'ioa'. Instead of using
-                                       the ioa_matrix it will use mat weighted ioa in order to take into account that
-                                       intersections with more cells will likely decrease the ioa value. By multiplying
-                                       the ioa with the number of all intersection for each child this value calibrate
-                                       the ioa.
-    :param parent_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other parent candi-
-                           dates which are not in the list.
-    :param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
-                          candidates which are not in the list.
-    :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
-                                   parent candidates which are not in the list.
-    :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
-                                  children candidates which are not in the list.
-    :param max_parent_only: Will assign to each child at most one parent with maximum ioa
-    :return: child indices, parent indices (see Example), list of parent ids and list of children ids.
+    Example:
+        ```python
+        match_anns_by_intersection()
+        ```
+        Let `p_i, c_j` be annotation ids of parent and children according to some category names.
+        | ioa   | c_1 | c_2 |
+        |-------|-----|-----|
+        | p_1   | 0.3 | 0.8 |
+        | p_2   | 0.4 | 0.1 |
+        | p_3   | 1.0 | 0.4 |
+        With `ioa_threshold = 0.5` it will return:
+        `[[2],[0]], [[1],[],[1]], [c_1,c_2], [p_1,p_2,p_3]`.
+        For each child, the sum of all ioas with all parents sum up to 1. Hence, the ioa with one parent will in general
+        decrease if one child intersects with more parents. Take two children, one matching two parents with an ioa of
+        0.5 each, while the second matches four parents with an ioa of 0.25 each. In this situation, it is difficult to
+        assign children according to a given threshold and one also has to take into account the number of parental
+        intersections for each child.
+        Note:
+            Setting `use_weighted_intersections` to True will multiply each ioa with the number of intersections,
+            making it easier to work with an absolute threshold.
+        Note:
+            In some situations, you want to assign to each child at most one parent. Setting `max_parent_only` to `True`
+            will select the parent with the highest ioa. There is currently no implementation for iou.
+    Args:
+        dp: Image datapoint.
+        matching_rule: Intersection measure type, either `iou` or `ioa`.
+        threshold: Threshold for the given matching rule. Will assign every child annotation with iou/ioa above the
+            threshold to the parental annotation.
+        use_weighted_intersections: This is currently only implemented for matching_rule `ioa`. Instead of using
+            the ioa_matrix, it will use a weighted ioa in order to take into account that intersections with more cells
+            will likely decrease the ioa value. By multiplying the ioa with the number of all intersections for each
+            child, this value calibrates the ioa.
+        parent_ann_category_names: Single str or list of category names.
+        child_ann_category_names: Single str or list of category names.
+        parent_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other parent
+            candidates which are not in the list.
+        child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
+            candidates which are not in the list.
+        parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other parent
+            candidates which are not in the list.
+        child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other children
+            candidates which are not in the list.
+        max_parent_only: Will assign to each child at most one parent with maximum ioa.
+    Returns:
+        child indices, parent indices (see Example), list of parent ids and list of children ids.
     """
     assert matching_rule in ["iou", "ioa"], "matching rule must be either iou or ioa"
@@ -166,21 +175,26 @@ def match_anns_by_distance(
 ) -> list[tuple[ImageAnnotation, ImageAnnotation]]:
     """
     Generates pairs of parent and child annotations by calculating the euclidean distance between the centers of the
-    parent and child bounding boxes. It will return the closest child for each parent. Note, that a child can be
-    assigned multiple times to different parents.
-    :param dp: image datapoint
-    :param parent_ann_category_names: single str or list of category names
-    :param child_ann_category_names: single str or list of category names
-    :param parent_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other parent candi-
-                           dates which are not in the list.
-    :param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
-                          candidates which are not in the list.
-    :param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
-                                   parent candidates which are not in the list.
-    :param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
-                                  children candidates which are not in the list.
-    :return:
+    parent and child bounding boxes. It will return the closest child for each parent.
+    Note:
+        A child can be assigned multiple times to different parents.
+    Args:
+        dp: Image datapoint.
+        parent_ann_category_names: Single str or list of category names.
+        child_ann_category_names: Single str or list of category names.
+        parent_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other parent
+            candidates which are not in the list.
+        child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
+            candidates which are not in the list.
+        parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other parent
+            candidates which are not in the list.
+        child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other children
+            candidates which are not in the list.
+    Returns:
+        List of tuples of parent and child annotations.
     """
     parent_anns = dp.get_annotation(

deepdoctection/mapper/misc.py CHANGED Viewed

@@ -16,7 +16,7 @@
 # limitations under the License.
 """
-Module for small mapping functions
+Small mapping functions
 """
 from __future__ import annotations
@@ -45,14 +45,19 @@ def to_image(
     height: Optional[int] = None,
 ) -> Optional[Image]:
     """
-    Mapping an input from `dataflow.SerializerFiles` or similar to an Image
-    :param dp: Image
-    :param dpi: dot per inch definition for pdf resolution when converting to numpy array
-    :param width: target width of the image. This option does only work when using Poppler as PDF renderer
-    :param height: target width of the image. This option does only work when using Poppler as PDF renderer
-    :param height: target height of the image
-    :return: Image
+    Maps an input from `dataflow.SerializerFiles` or similar to an `Image`.
+    Args:
+        dp: Image.
+        dpi: Dot per inch definition for PDF resolution when converting to `np.array`.
+        width: Target width of the image. This option only works when using Poppler as PDF renderer.
+        height: Target height of the image. This option only works when using Poppler as PDF renderer.
+    Returns:
+        Image
+    Raises:
+        TypeError: If `dp` is not of the expected type for converting to image.
     """
     file_name: Optional[str]
@@ -101,10 +106,13 @@ def to_image(
 def maybe_load_image(dp: Image) -> Image:
     """
-    If `image` is None will load the image.
+    If `image` is `None`, loads the image.
-    :param dp: An Image
-    :return: Image with attr: image not None
+    Args:
+        dp: An `Image`.
+    Returns:
+        Image with attribute `image` not `None`.
     """
     if dp.image is None:
@@ -116,10 +124,13 @@ def maybe_load_image(dp: Image) -> Image:
 def maybe_remove_image(dp: Image) -> Image:
     """
-    Remove `image` if a location is provided.
+    Removes `image` if a location is provided.
+    Args:
+        dp: An `Image`.
-    :param dp: An Image
-    :return: Image with None attr: image
+    Returns:
+        Image with attribute `image` set to `None`.
     """
     if dp.location is not None:
@@ -130,11 +141,14 @@ def maybe_remove_image(dp: Image) -> Image:
 @curry
 def maybe_remove_image_from_category(dp: Image, category_names: Optional[Union[str, Sequence[str]]] = None) -> Image:
     """
-    Removes image from image annotation for some category names
+    Removes `image` from image annotation for some `category_name`s.
+    Args:
+        dp: An `Image`.
+        category_names: Category names.
-    :param dp: An Image
-    :param category_names: category names
-    :return: Image with image attributes from image annotations removed
+    Returns:
+        Image with `image` attributes from image annotations removed.
     """
     if category_names is None:
         category_names = []
@@ -151,12 +165,15 @@ def maybe_remove_image_from_category(dp: Image, category_names: Optional[Union[s
 def image_ann_to_image(dp: Image, category_names: Union[str, list[str]], crop_image: bool = True) -> Image:
     """
-    Adds `image` to annotations with given category names
+    Adds `image` to annotations with given category names.
-    :param dp: Image
-    :param category_names: A single or a list of category names
-    :param crop_image: Will add numpy array to `image.image`
-    :return: Image
+    Args:
+        dp: `Image`.
+        category_names: A single or a list of category names.
+        crop_image: If `True`, will add `np.array` to `image.image`.
+    Returns:
+        Image
     """
     img_anns = dp.get_annotation(category_names=category_names)
@@ -171,15 +188,18 @@ def maybe_ann_to_sub_image(
     dp: Image, category_names_sub_image: Union[str, list[str]], category_names: Union[str, list[str]], add_summary: bool
 ) -> Image:
     """
-    Assigns to sub image with given category names all annotations with given category names whose bounding box lie
+    Assigns to sub image with given category names all annotations with given category names whose bounding box lies
     within the bounding box of the sub image.
-    :param dp: Image
-    :param category_names_sub_image: A single or a list of category names that will form a sub image.
-    :param category_names: A single or a list of category names that will may be assigned to a sub image, conditioned
-                           on the bounding box lying within the sub image.
-    :param add_summary: will add the whole summary annotation to the sub image
-    :return: Image
+    Args:
+        dp: `Image`.
+        category_names_sub_image: A single or a list of category names that will form a sub image.
+        category_names: A single or a list of category names that may be assigned to a sub image, conditioned on the
+                        bounding box lying within the sub image.
+        add_summary: If `True`, will add the whole summary annotation to the sub image.
+    Returns:
+        Image
     """
     anns = dp.get_annotation(category_names=category_names_sub_image)
@@ -194,19 +214,23 @@ def maybe_ann_to_sub_image(
 @curry
 def xml_to_dict(dp: JsonDict, xslt_obj: etree.XSLT) -> JsonDict:
     """
-    Convert a xml object into a dict using a xsl style sheet.
-    **Example:**
-            with open(path_xslt) as xsl_file:
-                xslt_file = xsl_file.read().encode('utf-8')
-            xml_obj = etree.XML(xslt_file, parser=etree.XMLParser(encoding='utf-8'))
-            xslt_obj = etree.XSLT(xml_obj)
-            df = MapData(df, xml_to_dict(xslt_obj))
-    :param dp: string representing the xml
-    :param xslt_obj: xslt object to parse the string
-    :return: parsed xml
+    Converts an XML object into a dict using an XSL style sheet.
+    Example:
+        ```python
+        with open(path_xslt) as xsl_file:
+            xslt_file = xsl_file.read().encode('utf-8')
+        xml_obj = etree.XML(xslt_file, parser=etree.XMLParser(encoding='utf-8'))
+        xslt_obj = etree.XSLT(xml_obj)
+        df = MapData(df, xml_to_dict(xslt_obj))
+        ```
+    Args:
+        dp: String representing the XML.
+        xslt_obj: XSLT object to parse the string.
+    Returns:
+        Parsed XML as a dict.
     """
     output = str(xslt_obj(dp["xml"]))

deepdoctection/mapper/pascalstruct.py CHANGED Viewed

@@ -16,7 +16,7 @@
 # limitations under the License.
 """
-Module for mapping annotations in iiitar13k style structure
+Mapping for PASCAL VOC dataset structure to `Image` format.
 """
 import os
@@ -41,17 +41,18 @@ def pascal_voc_dict_to_image(
     category_name_mapping: Optional[dict[str, str]] = None,
 ) -> Optional[Image]:
     """
-    Map a dataset in a structure equivalent to iiitar13k annotation style to image format
-    :param dp: a datapoint in serialized iiitar13k format. Note that another conversion from xml to
-               a dict structure is required.
-    :param categories_name_as_key: A dict of categories, e.g. DatasetCategories.get_categories(name_as_key=True)
-    :param load_image: If 'True' it will load image to attr: Image.image
-    :param filter_empty_image: Will return None, if datapoint has no annotations
-    :param fake_score: If dp does not contain a score, a fake score with uniform random variables in (0,1)
-                       will be added.
-    :param category_name_mapping: Map incoming category names, e.g. {"source_name":"target_name"}
-    :return: Image
+    Maps a dataset in a structure equivalent to the PASCAL VOC annotation style to the `Image` format.
+    Args:
+        dp: A datapoint in PASCAL VOC format. Note that another conversion from XML to a dict structure is required.
+        categories_name_as_key: A dict of categories, e.g. `DatasetCategories.get_categories(name_as_key=True)`.
+        load_image: If `True`, it will load the image to the attribute `Image.image`.
+        filter_empty_image: Will return `None` if the datapoint has no annotations.
+        fake_score: If `dp` does not contain a score, a fake score with uniform random variables in (0,1) will be added.
+        category_name_mapping: Map incoming category names, e.g. `{"source_name": "target_name"}`.
+    Returns:
+        `Image` or `None`.
     """
     anns = dp.get("objects", [])

deepdoctection/mapper/prodigystruct.py CHANGED Viewed

@@ -16,7 +16,7 @@
 # limitations under the License.
 """
-Module for mapping annotations to and from prodigy data structure
+Module for mapping annotations to and from prodigy data structure.
 """
 import os
@@ -41,19 +41,24 @@ def prodigy_to_image(
     category_name_mapping: Optional[Mapping[str, str]] = None,
 ) -> Optional[Image]:
     """
-    Map a datapoint of annotation structure as given as from Prodigy database to an Image
-    structure.
-    :param dp: A datapoint in dict structure as returned from Prodigy database
-    :param categories_name_as_key: A dict of categories, e.g. DatasetCategories.get_categories(name_as_key=True)
-    :param load_image: If 'True' it will load image to attr:`Image.image`
-    :param fake_score: If dp does not contain a score, a fake score with uniform random variables in (0,1)
-                       will be added.
-    :param path_reference_ds: A path to a reference-dataset. It must point to the basedir where the file
-                              of the datapoint can be found.
-    :param accept_only_answer: Filter every datapoint that has the answer 'reject' or 'ignore'.
-    :param category_name_mapping: Map incoming category names, e.g. {"source_name":"target_name"}
-    :return: Image
+    Maps a datapoint of annotation structure from Prodigy database to an `Image` structure.
+    Args:
+        dp: A datapoint in dict structure as returned from Prodigy database.
+        categories_name_as_key: A dict of categories, e.g. `DatasetCategories.get_categories(name_as_key=True)`.
+        load_image: If `True`, it will load image to `Image.image`.
+        fake_score: If `dp` does not contain a score, a fake score with uniform random variables in (0,1) will be added.
+        path_reference_ds: A path to a reference-dataset. It must point to the basedir where the file of the datapoint
+                           can be found.
+        accept_only_answer: Filter every datapoint that has the answer `reject` or `ignore`.
+        category_name_mapping: Map incoming category names, e.g. `{"source_name":"target_name"}`.
+    Returns:
+        `Image`
+    Note:
+        If `accept_only_answer` is `True`, only datapoints with the answer `accept` will be processed.
     """
     if accept_only_answer and dp.get("answer") != "accept":
@@ -147,12 +152,21 @@ def prodigy_to_image(
 @curry
 def image_to_prodigy(dp: Image, category_names: Optional[Sequence[ObjectTypes]] = None) -> JsonDict:
     """
-    The mapper to transform the normalized image representation of datasets into the format
-    for visualising the annotation components in Prodigy.
+    Transforms the normalized image representation of datasets into the format for visualizing the annotation
+    components in Prodigy.
+    Args:
+        dp: An `Image`.
+        category_names: A list of category names to filter the annotations.
+    Returns:
+        A dictionary with compulsory keys: `text` and `spans`.
+    Example:
+        ```python
+        image_to_prodigy(image_instance)
+        ```
-    :param dp: An image
-    :param category_names: A list of category names to filter the annotations
-    :return: A dictionary with compulsory keys: "text" and "spans"
     """
     output: JsonDict = {}

deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.42.0py3-none-any.whl → 0.43py3-none-any.whl