PyPI - deepdoctection - Versions diffs - 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl - Mend

deepdoctection 0.42.0py3-none-any.whl → 0.43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show

deepdoctection/__init__.py +2 -1
deepdoctection/analyzer/__init__.py +2 -1
deepdoctection/analyzer/config.py +904 -0
deepdoctection/analyzer/dd.py +36 -62
deepdoctection/analyzer/factory.py +311 -141
deepdoctection/configs/conf_dd_one.yaml +100 -44
deepdoctection/configs/profiles.jsonl +32 -0
deepdoctection/dataflow/__init__.py +9 -6
deepdoctection/dataflow/base.py +33 -15
deepdoctection/dataflow/common.py +96 -75
deepdoctection/dataflow/custom.py +36 -29
deepdoctection/dataflow/custom_serialize.py +135 -91
deepdoctection/dataflow/parallel_map.py +33 -31
deepdoctection/dataflow/serialize.py +15 -10
deepdoctection/dataflow/stats.py +41 -28
deepdoctection/datapoint/__init__.py +4 -6
deepdoctection/datapoint/annotation.py +104 -66
deepdoctection/datapoint/box.py +190 -130
deepdoctection/datapoint/convert.py +66 -39
deepdoctection/datapoint/image.py +151 -95
deepdoctection/datapoint/view.py +383 -236
deepdoctection/datasets/__init__.py +2 -6
deepdoctection/datasets/adapter.py +11 -11
deepdoctection/datasets/base.py +118 -81
deepdoctection/datasets/dataflow_builder.py +18 -12
deepdoctection/datasets/info.py +76 -57
deepdoctection/datasets/instances/__init__.py +6 -2
deepdoctection/datasets/instances/doclaynet.py +17 -14
deepdoctection/datasets/instances/fintabnet.py +16 -22
deepdoctection/datasets/instances/funsd.py +11 -6
deepdoctection/datasets/instances/iiitar13k.py +9 -9
deepdoctection/datasets/instances/layouttest.py +9 -9
deepdoctection/datasets/instances/publaynet.py +9 -9
deepdoctection/datasets/instances/pubtables1m.py +13 -13
deepdoctection/datasets/instances/pubtabnet.py +13 -15
deepdoctection/datasets/instances/rvlcdip.py +8 -8
deepdoctection/datasets/instances/xfund.py +11 -9
deepdoctection/datasets/registry.py +18 -11
deepdoctection/datasets/save.py +12 -11
deepdoctection/eval/__init__.py +3 -2
deepdoctection/eval/accmetric.py +72 -52
deepdoctection/eval/base.py +29 -10
deepdoctection/eval/cocometric.py +14 -12
deepdoctection/eval/eval.py +56 -41
deepdoctection/eval/registry.py +6 -3
deepdoctection/eval/tedsmetric.py +24 -9
deepdoctection/eval/tp_eval_callback.py +13 -12
deepdoctection/extern/__init__.py +1 -1
deepdoctection/extern/base.py +176 -97
deepdoctection/extern/d2detect.py +127 -92
deepdoctection/extern/deskew.py +19 -10
deepdoctection/extern/doctrocr.py +157 -106
deepdoctection/extern/fastlang.py +25 -17
deepdoctection/extern/hfdetr.py +137 -60
deepdoctection/extern/hflayoutlm.py +329 -248
deepdoctection/extern/hflm.py +67 -33
deepdoctection/extern/model.py +108 -762
deepdoctection/extern/pdftext.py +37 -12
deepdoctection/extern/pt/nms.py +15 -1
deepdoctection/extern/pt/ptutils.py +13 -9
deepdoctection/extern/tessocr.py +87 -54
deepdoctection/extern/texocr.py +29 -14
deepdoctection/extern/tp/tfutils.py +36 -8
deepdoctection/extern/tp/tpcompat.py +54 -16
deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
deepdoctection/extern/tpdetect.py +4 -2
deepdoctection/mapper/__init__.py +1 -1
deepdoctection/mapper/cats.py +117 -76
deepdoctection/mapper/cocostruct.py +35 -17
deepdoctection/mapper/d2struct.py +56 -29
deepdoctection/mapper/hfstruct.py +32 -19
deepdoctection/mapper/laylmstruct.py +221 -185
deepdoctection/mapper/maputils.py +71 -35
deepdoctection/mapper/match.py +76 -62
deepdoctection/mapper/misc.py +68 -44
deepdoctection/mapper/pascalstruct.py +13 -12
deepdoctection/mapper/prodigystruct.py +33 -19
deepdoctection/mapper/pubstruct.py +42 -32
deepdoctection/mapper/tpstruct.py +39 -19
deepdoctection/mapper/xfundstruct.py +20 -13
deepdoctection/pipe/__init__.py +1 -2
deepdoctection/pipe/anngen.py +104 -62
deepdoctection/pipe/base.py +226 -107
deepdoctection/pipe/common.py +206 -123
deepdoctection/pipe/concurrency.py +74 -47
deepdoctection/pipe/doctectionpipe.py +108 -47
deepdoctection/pipe/language.py +41 -24
deepdoctection/pipe/layout.py +45 -18
deepdoctection/pipe/lm.py +146 -78
deepdoctection/pipe/order.py +196 -113
deepdoctection/pipe/refine.py +111 -63
deepdoctection/pipe/registry.py +1 -1
deepdoctection/pipe/segment.py +213 -142
deepdoctection/pipe/sub_layout.py +76 -46
deepdoctection/pipe/text.py +52 -33
deepdoctection/pipe/transform.py +8 -6
deepdoctection/train/d2_frcnn_train.py +87 -69
deepdoctection/train/hf_detr_train.py +72 -40
deepdoctection/train/hf_layoutlm_train.py +85 -46
deepdoctection/train/tp_frcnn_train.py +56 -28
deepdoctection/utils/concurrency.py +59 -16
deepdoctection/utils/context.py +40 -19
deepdoctection/utils/develop.py +25 -17
deepdoctection/utils/env_info.py +85 -36
deepdoctection/utils/error.py +16 -10
deepdoctection/utils/file_utils.py +246 -62
deepdoctection/utils/fs.py +162 -43
deepdoctection/utils/identifier.py +29 -16
deepdoctection/utils/logger.py +49 -32
deepdoctection/utils/metacfg.py +83 -21
deepdoctection/utils/pdf_utils.py +119 -62
deepdoctection/utils/settings.py +24 -10
deepdoctection/utils/tqdm.py +10 -5
deepdoctection/utils/transform.py +182 -46
deepdoctection/utils/utils.py +61 -28
deepdoctection/utils/viz.py +150 -104
deepdoctection-0.43.dist-info/METADATA +376 -0
deepdoctection-0.43.dist-info/RECORD +149 -0
{deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
deepdoctection/analyzer/_config.py +0 -146
deepdoctection-0.42.0.dist-info/METADATA +0 -431
deepdoctection-0.42.0.dist-info/RECORD +0 -148
{deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
{deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0

deepdoctection/eval/tedsmetric.py CHANGED Viewed

@@ -13,7 +13,9 @@
 # Apache 2.0 License for more details.
 """
-Tree distance similarity metric taken from <https://github.com/ibm-aur-nlp/PubTabNet/blob/master/src/metric.py>
+Tree distance similarity (TEDS) metric
+Taken from <https://github.com/ibm-aur-nlp/PubTabNet/blob/master/src/metric.py>
 """
 import statistics
@@ -81,7 +83,8 @@ class TableTree(Tree):
 class CustomConfig(Config):
     """
-    CustomConfig for calculating APTED tree edit distance. Check APTED docs for more information
+    `CustomConfig` for calculating `APTED` tree edit distance.
+    Check APTED docs for more information
     """
     @staticmethod
@@ -90,7 +93,7 @@ class CustomConfig(Config):
         return max(map(len, sequences))
     def normalized_distance(self, *sequences: Any) -> float:
-        """Get distance from 0 to 1"""
+        """Get distance from `0` to `1`"""
         return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
     def rename(self, node1: Any, node2: Any) -> float:
@@ -104,7 +107,7 @@ class CustomConfig(Config):
 class TEDS:
-    """Tree Edit Distance based Similarity"""
+    """Tree Edit Distance similarity"""
     def __init__(self, structure_only: bool = False):
         self.structure_only = structure_only
@@ -123,7 +126,7 @@ class TEDS:
             self.__tokens__ += list(node.tail)
     def load_html_tree(self, node: TableTree, parent: Optional[TableTree] = None) -> Optional[TableTree]:
-        """Converts HTML tree to the format required by apted"""
+        """Converts `HTML` tree to the format required by APTED"""
         global __tokens__  # pylint: disable = W0602
         if node.tag == "td":
             if self.structure_only:
@@ -151,8 +154,15 @@ class TEDS:
         return None
     def evaluate(self, inputs: tuple[str, str]) -> float:
-        """Computes TEDS score between the prediction and the ground truth of a
+        """
+        Computes TEDS score between the prediction and the ground truth of a
         given sample
+        Args:
+            inputs: A tuple of ground truth and prediction in xml format
+        Returns:
+            A float value between 0.0 and 1.0, where 1.0 means perfect match
         """
         ground_truth, pred = inputs[0], inputs[1]
@@ -192,8 +202,13 @@ class TEDS:
 def teds_metric(gt_list: list[str], predict_list: list[str], structure_only: bool) -> tuple[float, int]:
     """
     Computes tree edit distance score (TEDS) between the prediction and the ground truth of a batch of samples. The
-    approach to measure similarity of tables by means of their html representation has been adovacated in
-    <https://arxiv.org/abs/1911.10683> .
+    approach to measure similarity of tables by means of their html representation has been advocated in
+    <https://arxiv.org/abs/1911.10683>
+    Args:
+        gt_list: A list of ground truth samples in `xml` format
+        predict_list: A list of predictions in `xml` format
+        structure_only: If `True`, only the structure of the table is considered, but no text
     """
     teds = TEDS(structure_only=structure_only)
@@ -218,7 +233,7 @@ def teds_metric(gt_list: list[str], predict_list: list[str], structure_only: boo
 @metric_registry.register("teds")
 class TedsMetric(MetricBase):
     """
-    Metric induced by `teds`
+    Metric induced by `TEDS`
     """
     metric = teds_metric  # type: ignore

deepdoctection/eval/tp_eval_callback.py CHANGED Viewed

@@ -16,7 +16,7 @@
 # limitations under the License.
 """
-Module for EvalCallback in Tensorpack
+`EvalCallback` in Tensorpack
 """
 from __future__ import annotations
@@ -53,7 +53,7 @@ __all__ = ["EvalCallback"]
 class EvalCallback(Callback):  # pylint: disable=R0903
     """
-    A callback that runs evaluation once a while. It supports evaluation on any pipeline component.
+    A callback that runs evaluation once in a while. It supports evaluation on any pipeline component.
     """
     _chief_only = False
@@ -71,17 +71,18 @@ class EvalCallback(Callback):  # pylint: disable=R0903
         **build_eval_kwargs: str,
     ) -> None:
         """
-        :param dataset: dataset
-        :param category_names: String or list of category names
-        :param sub_categories: Dict of categories/sub-categories or categories/list of sub-categories. See also
+        Args:
+            dataset: dataset
+            category_names: String or list of category names
+            sub_categories: Dict of categories/sub-categories or categories/list of sub-categories. See also
                                `eval.Evaluator`
-        :param metric: metric
-        :param pipeline_component: Pipeline component with a detector.
-        :param in_names: Specify tensor input names.
-                         E.g. `extern.tp.tpfrcnn.GeneralizedRCNN.get_inference_tensor_names`
-        :param out_names: Specify tensor output names.
-        :param build_eval_kwargs: Pass the necessary arguments in order to build the dataflow, e.g. "split",
-                                  "build_mode", "max_datapoints" etc.
+            metric: metric
+            pipeline_component: Pipeline component with a detector.
+            in_names: Specify tensor input names.
+                      E.g. `extern.tp.tpfrcnn.GeneralizedRCNN.get_inference_tensor_names`
+            out_names: Specify tensor output names.
+            build_eval_kwargs: Pass the necessary arguments in order to build the dataflow, e.g. `split`,
+                               `build_mode`, `max_datapoints` etc.
         """
         self.dataset_name = dataset.dataset_info.name
         self.build_eval_kwargs = build_eval_kwargs

deepdoctection/extern/__init__.py CHANGED Viewed

@@ -16,7 +16,7 @@
 # limitations under the License.
 """
-Wrappers for models of external libraries as well as implementation of the Cascade-RCNN model of Tensorpack.
+# Wrappers for models of external libraries
 """
 from .base import *

deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.42.0py3-none-any.whl → 0.43py3-none-any.whl