PyPI - deepdoctection - Versions diffs - 0.32__py3-none-any.whl → 0.34__py3-none-any.whl - Mend

deepdoctection 0.32py3-none-any.whl → 0.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (111) hide show

deepdoctection/__init__.py +8 -25
deepdoctection/analyzer/dd.py +84 -71
deepdoctection/dataflow/common.py +9 -5
deepdoctection/dataflow/custom.py +5 -5
deepdoctection/dataflow/custom_serialize.py +75 -18
deepdoctection/dataflow/parallel_map.py +3 -3
deepdoctection/dataflow/serialize.py +4 -4
deepdoctection/dataflow/stats.py +3 -3
deepdoctection/datapoint/annotation.py +78 -56
deepdoctection/datapoint/box.py +7 -7
deepdoctection/datapoint/convert.py +6 -6
deepdoctection/datapoint/image.py +157 -75
deepdoctection/datapoint/view.py +175 -151
deepdoctection/datasets/adapter.py +30 -24
deepdoctection/datasets/base.py +10 -10
deepdoctection/datasets/dataflow_builder.py +3 -3
deepdoctection/datasets/info.py +23 -25
deepdoctection/datasets/instances/doclaynet.py +48 -49
deepdoctection/datasets/instances/fintabnet.py +44 -45
deepdoctection/datasets/instances/funsd.py +23 -23
deepdoctection/datasets/instances/iiitar13k.py +8 -8
deepdoctection/datasets/instances/layouttest.py +2 -2
deepdoctection/datasets/instances/publaynet.py +3 -3
deepdoctection/datasets/instances/pubtables1m.py +18 -18
deepdoctection/datasets/instances/pubtabnet.py +30 -29
deepdoctection/datasets/instances/rvlcdip.py +28 -29
deepdoctection/datasets/instances/xfund.py +51 -30
deepdoctection/datasets/save.py +6 -6
deepdoctection/eval/accmetric.py +32 -33
deepdoctection/eval/base.py +8 -9
deepdoctection/eval/cocometric.py +13 -12
deepdoctection/eval/eval.py +32 -26
deepdoctection/eval/tedsmetric.py +16 -12
deepdoctection/eval/tp_eval_callback.py +7 -16
deepdoctection/extern/base.py +339 -134
deepdoctection/extern/d2detect.py +69 -89
deepdoctection/extern/deskew.py +11 -10
deepdoctection/extern/doctrocr.py +81 -64
deepdoctection/extern/fastlang.py +23 -16
deepdoctection/extern/hfdetr.py +53 -38
deepdoctection/extern/hflayoutlm.py +216 -155
deepdoctection/extern/hflm.py +35 -30
deepdoctection/extern/model.py +433 -255
deepdoctection/extern/pdftext.py +15 -15
deepdoctection/extern/pt/ptutils.py +4 -2
deepdoctection/extern/tessocr.py +39 -38
deepdoctection/extern/texocr.py +14 -16
deepdoctection/extern/tp/tfutils.py +16 -2
deepdoctection/extern/tp/tpcompat.py +11 -7
deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
deepdoctection/extern/tpdetect.py +40 -45
deepdoctection/mapper/cats.py +36 -40
deepdoctection/mapper/cocostruct.py +16 -12
deepdoctection/mapper/d2struct.py +22 -22
deepdoctection/mapper/hfstruct.py +7 -7
deepdoctection/mapper/laylmstruct.py +22 -24
deepdoctection/mapper/maputils.py +9 -10
deepdoctection/mapper/match.py +33 -2
deepdoctection/mapper/misc.py +6 -7
deepdoctection/mapper/pascalstruct.py +4 -4
deepdoctection/mapper/prodigystruct.py +6 -6
deepdoctection/mapper/pubstruct.py +84 -92
deepdoctection/mapper/tpstruct.py +3 -3
deepdoctection/mapper/xfundstruct.py +33 -33
deepdoctection/pipe/anngen.py +39 -14
deepdoctection/pipe/base.py +68 -99
deepdoctection/pipe/common.py +181 -85
deepdoctection/pipe/concurrency.py +14 -10
deepdoctection/pipe/doctectionpipe.py +24 -21
deepdoctection/pipe/language.py +20 -25
deepdoctection/pipe/layout.py +18 -16
deepdoctection/pipe/lm.py +49 -47
deepdoctection/pipe/order.py +63 -65
deepdoctection/pipe/refine.py +102 -109
deepdoctection/pipe/segment.py +157 -162
deepdoctection/pipe/sub_layout.py +50 -40
deepdoctection/pipe/text.py +37 -36
deepdoctection/pipe/transform.py +19 -16
deepdoctection/train/d2_frcnn_train.py +27 -25
deepdoctection/train/hf_detr_train.py +22 -18
deepdoctection/train/hf_layoutlm_train.py +49 -48
deepdoctection/train/tp_frcnn_train.py +10 -11
deepdoctection/utils/concurrency.py +1 -1
deepdoctection/utils/context.py +13 -6
deepdoctection/utils/develop.py +4 -4
deepdoctection/utils/env_info.py +52 -14
deepdoctection/utils/file_utils.py +6 -11
deepdoctection/utils/fs.py +41 -14
deepdoctection/utils/identifier.py +2 -2
deepdoctection/utils/logger.py +15 -15
deepdoctection/utils/metacfg.py +7 -7
deepdoctection/utils/pdf_utils.py +39 -14
deepdoctection/utils/settings.py +188 -182
deepdoctection/utils/tqdm.py +1 -1
deepdoctection/utils/transform.py +14 -9
deepdoctection/utils/types.py +104 -0
deepdoctection/utils/utils.py +7 -7
deepdoctection/utils/viz.py +70 -69
{deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
deepdoctection-0.34.dist-info/RECORD +146 -0
{deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
deepdoctection/utils/detection_types.py +0 -68
deepdoctection-0.32.dist-info/RECORD +0 -146
{deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
{deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0

deepdoctection/eval/eval.py CHANGED Viewed

@@ -22,7 +22,7 @@ Module for `Evaluator`
 from __future__ import annotations
 from copy import deepcopy
-from typing import Any, Dict, Generator, List, Literal, Mapping, Optional, Type, Union, overload
+from typing import Any, Generator, Literal, Mapping, Optional, Type, Union, overload
 import numpy as np
 from lazy_imports import try_import
@@ -33,13 +33,13 @@ from ..datasets.base import DatasetBase
 from ..mapper.cats import filter_cat, remove_cats
 from ..mapper.d2struct import to_wandb_image
 from ..mapper.misc import maybe_load_image, maybe_remove_image, maybe_remove_image_from_category
-from ..pipe.base import LanguageModelPipelineComponent, PredictorPipelineComponent
+from ..pipe.base import PipelineComponent
 from ..pipe.common import PageParsingService
 from ..pipe.concurrency import MultiThreadPipelineComponent
 from ..pipe.doctectionpipe import DoctectionPipe
-from ..utils.detection_types import ImageType
 from ..utils.logger import LoggingRecord, logger
 from ..utils.settings import DatasetType, LayoutType, TypeOrStr, get_type
+from ..utils.types import PixelValues
 from ..utils.viz import interactive_imshow
 from .base import MetricBase
@@ -90,7 +90,7 @@ class Evaluator:
     def __init__(
         self,
         dataset: DatasetBase,
-        component_or_pipeline: Union[PredictorPipelineComponent, LanguageModelPipelineComponent, DoctectionPipe],
+        component_or_pipeline: Union[PipelineComponent, DoctectionPipe],
         metric: Union[Type[MetricBase], MetricBase],
         num_threads: int = 2,
         run: Optional[wandb.sdk.wandb_run.Run] = None,
@@ -108,14 +108,14 @@ class Evaluator:
         self.pipe: Optional[DoctectionPipe] = None
         # when passing a component, we will process prediction on num_threads
-        if isinstance(component_or_pipeline, (PredictorPipelineComponent, LanguageModelPipelineComponent)):
+        if isinstance(component_or_pipeline, PipelineComponent):
             logger.info(
                 LoggingRecord(
                     f"Building multi threading pipeline component to increase prediction throughput. "
                     f"Using {num_threads} threads"
                 )
             )
-            pipeline_components: List[Union[PredictorPipelineComponent, LanguageModelPipelineComponent]] = []
+            pipeline_components: list[PipelineComponent] = []
             for _ in range(num_threads - 1):
                 copy_pipe_component = component_or_pipeline.clone()
@@ -139,14 +139,14 @@ class Evaluator:
         self.wandb_table_agent: Optional[WandbTableAgent]
         if run is not None:
-            if self.dataset.dataset_info.type == DatasetType.object_detection:
+            if self.dataset.dataset_info.type == DatasetType.OBJECT_DETECTION:
                 self.wandb_table_agent = WandbTableAgent(
                     run,
                     self.dataset.dataset_info.name,
                     50,
                     self.dataset.dataflow.categories.get_categories(filtered=True),
                 )
-            elif self.dataset.dataset_info.type == DatasetType.token_classification:
+            elif self.dataset.dataset_info.type == DatasetType.TOKEN_CLASSIFICATION:
                 if hasattr(self.metric, "sub_cats"):
                     sub_cat_key, sub_cat_val_list = list(self.metric.sub_cats.items())[0]
                     sub_cat_val = sub_cat_val_list[0]
@@ -178,16 +178,16 @@ class Evaluator:
     @overload
     def run(
         self, output_as_dict: Literal[False] = False, **dataflow_build_kwargs: Union[str, int]
-    ) -> List[Dict[str, float]]:
+    ) -> list[dict[str, float]]:
         ...
     @overload
-    def run(self, output_as_dict: Literal[True], **dataflow_build_kwargs: Union[str, int]) -> Dict[str, float]:
+    def run(self, output_as_dict: Literal[True], **dataflow_build_kwargs: Union[str, int]) -> dict[str, float]:
         ...
     def run(
         self, output_as_dict: bool = False, **dataflow_build_kwargs: Union[str, int]
-    ) -> Union[List[Dict[str, float]], Dict[str, float]]:
+    ) -> Union[list[dict[str, float]], dict[str, float]]:
         """
         Start evaluation process and return the results.
@@ -246,11 +246,11 @@ class Evaluator:
         possible_cats_in_datapoint = self.dataset.dataflow.categories.get_categories(as_dict=False, filtered=True)
         # clean-up procedure depends on the dataset type
-        if self.dataset.dataset_info.type == DatasetType.object_detection:
+        if self.dataset.dataset_info.type == DatasetType.OBJECT_DETECTION:
             # we keep all image annotations that will not be generated through processing
-            anns_to_keep = {ann for ann in possible_cats_in_datapoint if ann not in meta_anns["image_annotations"]}
-            sub_cats_to_remove = meta_anns["sub_categories"]
-            relationships_to_remove = meta_anns["relationships"]
+            anns_to_keep = {ann for ann in possible_cats_in_datapoint if ann not in meta_anns.image_annotations}
+            sub_cats_to_remove = meta_anns.sub_categories
+            relationships_to_remove = meta_anns.relationships
             # removing annotations takes place in three steps: First we remove all image annotations. Then, with all
             # remaining image annotations we check, if the image attribute (with Image instance !) is not empty and
             # remove it as well, if necessary. In the last step we remove all sub categories and relationships, if
@@ -262,19 +262,19 @@ class Evaluator:
                 remove_cats(sub_categories=sub_cats_to_remove, relationships=relationships_to_remove),
             )
-        elif self.dataset.dataset_info.type == DatasetType.sequence_classification:
-            summary_sub_cats_to_remove = meta_anns["summaries"]
+        elif self.dataset.dataset_info.type == DatasetType.SEQUENCE_CLASSIFICATION:
+            summary_sub_cats_to_remove = meta_anns.summaries
             df_pr = MapData(df_pr, remove_cats(summary_sub_categories=summary_sub_cats_to_remove))
-        elif self.dataset.dataset_info.type == DatasetType.token_classification:
-            sub_cats_to_remove = meta_anns["sub_categories"]
+        elif self.dataset.dataset_info.type == DatasetType.TOKEN_CLASSIFICATION:
+            sub_cats_to_remove = meta_anns.sub_categories
             df_pr = MapData(df_pr, remove_cats(sub_categories=sub_cats_to_remove))
         else:
             raise NotImplementedError()
         return df_pr
-    def compare(self, interactive: bool = False, **kwargs: Union[str, int]) -> Generator[ImageType, None, None]:
+    def compare(self, interactive: bool = False, **kwargs: Union[str, int]) -> Generator[PixelValues, None, None]:
         """
         Visualize ground truth and prediction datapoint. Given a dataflow config it will run predictions per sample
         and concat the prediction image (with predicted bounding boxes) with ground truth image.
@@ -293,6 +293,8 @@ class Evaluator:
         show_words = kwargs.pop("show_words", False)
         show_token_class = kwargs.pop("show_token_class", True)
         ignore_default_token_class = kwargs.pop("ignore_default_token_class", False)
+        floating_text_block_categories = kwargs.pop("floating_text_block_categories", None)
+        include_residual_text_containers = kwargs.pop("include_residual_Text_containers", True)
         df_gt = self.dataset.dataflow.build(**kwargs)
         df_pr = self.dataset.dataflow.build(**kwargs)
@@ -301,7 +303,11 @@ class Evaluator:
         df_pr = MapData(df_pr, deepcopy)
         df_pr = self._clean_up_predict_dataflow_annotations(df_pr)
-        page_parsing_component = PageParsingService(text_container=LayoutType.word)
+        page_parsing_component = PageParsingService(
+            text_container=LayoutType.WORD,
+            floating_text_block_categories=floating_text_block_categories,  # type: ignore
+            include_residual_text_container=bool(include_residual_text_containers),
+        )
         df_gt = page_parsing_component.predict_dataflow(df_gt)
         if self.pipe_component:
@@ -357,8 +363,8 @@ class WandbTableAgent:
         wandb_run: wandb.sdk.wandb_run.Run,
         dataset_name: str,
         num_samples: int,
-        categories: Mapping[str, TypeOrStr],
-        sub_categories: Optional[Mapping[str, TypeOrStr]] = None,
+        categories: Mapping[int, TypeOrStr],
+        sub_categories: Optional[Mapping[int, TypeOrStr]] = None,
         cat_to_sub_cat: Optional[Mapping[TypeOrStr, TypeOrStr]] = None,
     ):
         """
@@ -385,8 +391,8 @@ class WandbTableAgent:
         self._counter = 0
         # Table logging utils
-        self._table_cols: List[str] = ["file_name", "image"]
-        self._table_rows: List[Any] = []
+        self._table_cols: list[str] = ["file_name", "image"]
+        self._table_rows: list[Any] = []
         self._table_ref = None
     def dump(self, dp: Image) -> Image:
@@ -439,4 +445,4 @@ class WandbTableAgent:
         eval_art.add(self._build_table(), self.dataset_name)
         self._run.use_artifact(eval_art)
         eval_art.wait()
-        self._table_ref = eval_art.get(self.dataset_name).data  # type:ignore
+        self._table_ref = eval_art.get(self.dataset_name).data  # type: ignore

deepdoctection/eval/tedsmetric.py CHANGED Viewed

@@ -18,17 +18,18 @@ Tree distance similarity metric taken from <https://github.com/ibm-aur-nlp/PubTa
 import statistics
 from collections import defaultdict, deque
-from typing import Any, List, Optional, Tuple
+from typing import Any, Callable, Optional
 from lazy_imports import try_import
 from ..dataflow import DataFlow, DataFromList, MapData, MultiThreadMapData
+from ..datapoint.image import Image
 from ..datapoint.view import Page
 from ..datasets.base import DatasetCategories
-from ..utils.detection_types import JsonDict
 from ..utils.file_utils import Requirement, get_apted_requirement, get_distance_requirement, get_lxml_requirement
 from ..utils.logger import LoggingRecord, logger
 from ..utils.settings import LayoutType
+from ..utils.types import MetricResults
 from .base import MetricBase
 from .registry import metric_registry
@@ -59,7 +60,7 @@ class TableTree(Tree):
         tag: str,
         colspan: Optional[int] = None,
         rowspan: Optional[int] = None,
-        content: Optional[List[str]] = None,
+        content: Optional[list[str]] = None,
     ) -> None:
         self.tag = tag
         self.colspan = colspan
@@ -107,7 +108,7 @@ class TEDS:
     def __init__(self, structure_only: bool = False):
         self.structure_only = structure_only
-        self.__tokens__: List[str] = []
+        self.__tokens__: list[str] = []
     def tokenize(self, node: TableTree) -> None:
         """Tokenizes table cells"""
@@ -149,7 +150,7 @@ class TEDS:
             return new_node
         return None
-    def evaluate(self, inputs: Tuple[str, str]) -> float:
+    def evaluate(self, inputs: tuple[str, str]) -> float:
         """Computes TEDS score between the prediction and the ground truth of a
         given sample
         """
@@ -188,7 +189,7 @@ class TEDS:
         return 0.0
-def teds_metric(gt_list: List[str], predict_list: List[str], structure_only: bool) -> Tuple[float, int]:
+def teds_metric(gt_list: list[str], predict_list: list[str], structure_only: bool) -> tuple[float, int]:
     """
     Computes tree edit distance score (TEDS) between the prediction and the ground truth of a batch of samples. The
     approach to measure similarity of tables by means of their html representation has been adovacated in
@@ -221,13 +222,16 @@ class TedsMetric(MetricBase):
     """
     metric = teds_metric  # type: ignore
-    mapper = Page.from_image
+    mapper: Callable[[Image, LayoutType, list[LayoutType]], Page] = Page.from_image
+    text_container: LayoutType = LayoutType.WORD
+    floating_text_block_categories = [LayoutType.TABLE]
     structure_only = False
     @classmethod
     def dump(
         cls, dataflow_gt: DataFlow, dataflow_predictions: DataFlow, categories: DatasetCategories
-    ) -> Tuple[List[str], List[str]]:
+    ) -> tuple[list[str], list[str]]:
         dataflow_gt.reset_state()
         dataflow_predictions.reset_state()
@@ -235,11 +239,11 @@ class TedsMetric(MetricBase):
         gt_dict = defaultdict(list)
         pred_dict = defaultdict(list)
         for dp_gt, dp_pred in zip(dataflow_gt, dataflow_predictions):
-            page_gt = cls.mapper(dp_gt, LayoutType.word, [LayoutType.table])
+            page_gt = cls.mapper(dp_gt, cls.text_container, cls.floating_text_block_categories)
             for table in page_gt.tables:
                 gt_dict[page_gt.image_id].append(table.html)
-            page_pred = cls.mapper(dp_pred, LayoutType.word, [LayoutType.table])
+            page_pred = cls.mapper(dp_pred, cls.text_container, cls.floating_text_block_categories)
             for table in page_pred.tables:
                 pred_dict[page_pred.image_id].append(table.html)
@@ -254,12 +258,12 @@ class TedsMetric(MetricBase):
     @classmethod
     def get_distance(
         cls, dataflow_gt: DataFlow, dataflow_predictions: DataFlow, categories: DatasetCategories
-    ) -> List[JsonDict]:
+    ) -> list[MetricResults]:
         html_gt_list, html_pr_list = cls.dump(dataflow_gt, dataflow_predictions, categories)
         score, num_samples = cls.metric(html_gt_list, html_pr_list, cls.structure_only)  # type: ignore
         return [{"teds_score": score, "num_samples": num_samples}]
     @classmethod
-    def get_requirements(cls) -> List[Requirement]:
+    def get_requirements(cls) -> list[Requirement]:
         return [get_apted_requirement(), get_distance_requirement(), get_lxml_requirement()]

deepdoctection/eval/tp_eval_callback.py CHANGED Viewed

@@ -27,8 +27,7 @@ from typing import Mapping, Optional, Sequence, Type, Union
 from lazy_imports import try_import
 from ..datasets import DatasetBase
-from ..extern.tpdetect import TPFrcnnDetector
-from ..pipe.base import PredictorPipelineComponent
+from ..pipe.base import PipelineComponent
 from ..utils.logger import LoggingRecord, logger
 from ..utils.metacfg import AttrDict
 from ..utils.settings import ObjectTypes
@@ -65,9 +64,10 @@ class EvalCallback(Callback):  # pylint: disable=R0903
         category_names: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]],
         sub_categories: Optional[Union[Mapping[ObjectTypes, ObjectTypes], Mapping[ObjectTypes, Sequence[ObjectTypes]]]],
         metric: Union[Type[MetricBase], MetricBase],
-        pipeline_component: PredictorPipelineComponent,
+        pipeline_component: PipelineComponent,
         in_names: str,
         out_names: str,
+        cfg: AttrDict,
         **build_eval_kwargs: str,
     ) -> None:
         """
@@ -89,12 +89,7 @@ class EvalCallback(Callback):  # pylint: disable=R0903
         self.num_gpu = get_num_gpu()
         self.category_names = category_names
         self.sub_categories = sub_categories
-        if not isinstance(pipeline_component.predictor, TPFrcnnDetector):
-            raise TypeError(
-                f"pipeline_component.predictor must be of type TPFrcnnDetector but is "
-                f"type {type(pipeline_component.predictor)}"
-            )
-        self.cfg = pipeline_component.predictor.model.cfg
+        self.cfg = cfg
         if _use_replicated(self.cfg):
             self.evaluator = Evaluator(dataset, pipeline_component, metric, num_threads=self.num_gpu * 2)
         else:
@@ -105,13 +100,9 @@ class EvalCallback(Callback):  # pylint: disable=R0903
             if self.evaluator.pipe_component is None:
                 raise TypeError("self.evaluator.pipe_component cannot be None")
             for idx, comp in enumerate(self.evaluator.pipe_component.pipe_components):
-                if not isinstance(comp, PredictorPipelineComponent):
-                    raise TypeError(f"comp must be of type PredictorPipelineComponent but is type {type(comp)}")
-                if not isinstance(comp.predictor, TPFrcnnDetector):
-                    raise TypeError(
-                        f"comp.predictor mus be of type TPFrcnnDetector but is of type {type(comp.predictor)}"
-                    )
-                comp.predictor.tp_predictor = self._build_predictor(idx % self.num_gpu)
+                if hasattr(comp, "predictor"):
+                    if hasattr(comp.predictor, "tp_predictor"):
+                        comp.predictor.tp_predictor = self._build_predictor(idx % self.num_gpu)
     def _build_predictor(self, idx: int) -> OnlinePredictor:
         return self.trainer.get_predictor(self.in_names, self.out_names, device=idx)

deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.32py3-none-any.whl → 0.34py3-none-any.whl