PyPI - deepdoctection - Versions diffs - 0.31__py3-none-any.whl → 0.33__py3-none-any.whl - Mend

deepdoctection 0.31py3-none-any.whl → 0.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (131) hide show

deepdoctection/__init__.py +16 -29
deepdoctection/analyzer/dd.py +70 -59
deepdoctection/configs/conf_dd_one.yaml +34 -31
deepdoctection/dataflow/common.py +9 -5
deepdoctection/dataflow/custom.py +5 -5
deepdoctection/dataflow/custom_serialize.py +75 -18
deepdoctection/dataflow/parallel_map.py +3 -3
deepdoctection/dataflow/serialize.py +4 -4
deepdoctection/dataflow/stats.py +3 -3
deepdoctection/datapoint/annotation.py +41 -56
deepdoctection/datapoint/box.py +9 -8
deepdoctection/datapoint/convert.py +6 -6
deepdoctection/datapoint/image.py +56 -44
deepdoctection/datapoint/view.py +245 -150
deepdoctection/datasets/__init__.py +1 -4
deepdoctection/datasets/adapter.py +35 -26
deepdoctection/datasets/base.py +14 -12
deepdoctection/datasets/dataflow_builder.py +3 -3
deepdoctection/datasets/info.py +24 -26
deepdoctection/datasets/instances/doclaynet.py +51 -51
deepdoctection/datasets/instances/fintabnet.py +46 -46
deepdoctection/datasets/instances/funsd.py +25 -24
deepdoctection/datasets/instances/iiitar13k.py +13 -10
deepdoctection/datasets/instances/layouttest.py +4 -3
deepdoctection/datasets/instances/publaynet.py +5 -5
deepdoctection/datasets/instances/pubtables1m.py +24 -21
deepdoctection/datasets/instances/pubtabnet.py +32 -30
deepdoctection/datasets/instances/rvlcdip.py +30 -30
deepdoctection/datasets/instances/xfund.py +26 -26
deepdoctection/datasets/save.py +6 -6
deepdoctection/eval/__init__.py +1 -4
deepdoctection/eval/accmetric.py +32 -33
deepdoctection/eval/base.py +8 -9
deepdoctection/eval/cocometric.py +15 -13
deepdoctection/eval/eval.py +41 -37
deepdoctection/eval/tedsmetric.py +30 -23
deepdoctection/eval/tp_eval_callback.py +16 -19
deepdoctection/extern/__init__.py +2 -7
deepdoctection/extern/base.py +339 -134
deepdoctection/extern/d2detect.py +85 -113
deepdoctection/extern/deskew.py +14 -11
deepdoctection/extern/doctrocr.py +141 -130
deepdoctection/extern/fastlang.py +27 -18
deepdoctection/extern/hfdetr.py +71 -62
deepdoctection/extern/hflayoutlm.py +504 -211
deepdoctection/extern/hflm.py +230 -0
deepdoctection/extern/model.py +488 -302
deepdoctection/extern/pdftext.py +23 -19
deepdoctection/extern/pt/__init__.py +1 -3
deepdoctection/extern/pt/nms.py +6 -2
deepdoctection/extern/pt/ptutils.py +29 -19
deepdoctection/extern/tessocr.py +39 -38
deepdoctection/extern/texocr.py +18 -18
deepdoctection/extern/tp/tfutils.py +57 -9
deepdoctection/extern/tp/tpcompat.py +21 -14
deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
deepdoctection/extern/tpdetect.py +45 -53
deepdoctection/mapper/__init__.py +3 -8
deepdoctection/mapper/cats.py +27 -29
deepdoctection/mapper/cocostruct.py +10 -10
deepdoctection/mapper/d2struct.py +27 -26
deepdoctection/mapper/hfstruct.py +13 -8
deepdoctection/mapper/laylmstruct.py +178 -37
deepdoctection/mapper/maputils.py +12 -11
deepdoctection/mapper/match.py +2 -2
deepdoctection/mapper/misc.py +11 -9
deepdoctection/mapper/pascalstruct.py +4 -4
deepdoctection/mapper/prodigystruct.py +5 -5
deepdoctection/mapper/pubstruct.py +84 -92
deepdoctection/mapper/tpstruct.py +5 -5
deepdoctection/mapper/xfundstruct.py +33 -33
deepdoctection/pipe/__init__.py +1 -1
deepdoctection/pipe/anngen.py +12 -14
deepdoctection/pipe/base.py +52 -106
deepdoctection/pipe/common.py +72 -59
deepdoctection/pipe/concurrency.py +16 -11
deepdoctection/pipe/doctectionpipe.py +24 -21
deepdoctection/pipe/language.py +20 -25
deepdoctection/pipe/layout.py +20 -16
deepdoctection/pipe/lm.py +75 -105
deepdoctection/pipe/order.py +194 -89
deepdoctection/pipe/refine.py +111 -124
deepdoctection/pipe/segment.py +156 -161
deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
deepdoctection/pipe/text.py +37 -36
deepdoctection/pipe/transform.py +19 -16
deepdoctection/train/__init__.py +6 -12
deepdoctection/train/d2_frcnn_train.py +48 -41
deepdoctection/train/hf_detr_train.py +41 -30
deepdoctection/train/hf_layoutlm_train.py +153 -135
deepdoctection/train/tp_frcnn_train.py +32 -31
deepdoctection/utils/concurrency.py +1 -1
deepdoctection/utils/context.py +13 -6
deepdoctection/utils/develop.py +4 -4
deepdoctection/utils/env_info.py +87 -125
deepdoctection/utils/file_utils.py +6 -11
deepdoctection/utils/fs.py +22 -18
deepdoctection/utils/identifier.py +2 -2
deepdoctection/utils/logger.py +16 -15
deepdoctection/utils/metacfg.py +7 -7
deepdoctection/utils/mocks.py +93 -0
deepdoctection/utils/pdf_utils.py +11 -11
deepdoctection/utils/settings.py +185 -181
deepdoctection/utils/tqdm.py +1 -1
deepdoctection/utils/transform.py +14 -9
deepdoctection/utils/types.py +104 -0
deepdoctection/utils/utils.py +7 -7
deepdoctection/utils/viz.py +74 -72
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
deepdoctection-0.33.dist-info/RECORD +146 -0
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
deepdoctection/utils/detection_types.py +0 -68
deepdoctection-0.31.dist-info/RECORD +0 -144
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0

deepdoctection/pipe/refine.py CHANGED Viewed

@@ -19,11 +19,13 @@
 Module for refining methods of table segmentation. The refining methods lead ultimately to a table structure which
 enables html table representations
 """
+from __future__ import annotations
 from collections import defaultdict
 from copy import copy
 from dataclasses import asdict
 from itertools import chain, product
-from typing import DefaultDict, List, Optional, Set, Tuple, Union
+from typing import DefaultDict, Optional, Sequence, Union
 import networkx as nx  # type: ignore
@@ -32,16 +34,15 @@ from ..datapoint.box import merge_boxes
 from ..datapoint.image import Image
 from ..extern.base import DetectionResult
 from ..mapper.maputils import MappingContextManager
-from ..utils.detection_types import JsonDict
-from ..utils.error import AnnotationError, ImageError
-from ..utils.settings import CellType, LayoutType, Relationships, TableType, get_type
-from .base import PipelineComponent
+from ..utils.error import ImageError
+from ..utils.settings import CellType, LayoutType, ObjectTypes, Relationships, TableType, get_type
+from .base import MetaAnnotation, PipelineComponent
 from .registry import pipeline_component_registry
 __all__ = ["TableSegmentationRefinementService", "generate_html_string"]
-def tiles_to_cells(dp: Image, table: ImageAnnotation) -> List[Tuple[Tuple[int, int], str]]:
+def tiles_to_cells(dp: Image, table: ImageAnnotation) -> list[tuple[tuple[int, int], str]]:
     """
     Creation of a table parquet: A table is divided into a tile parquet with the (number of rows) x
     (the number of columns) tiles.
@@ -53,17 +54,17 @@ def tiles_to_cells(dp: Image, table: ImageAnnotation) -> List[Tuple[Tuple[int, i
     :return: Image
     """
-    cell_ann_ids = table.get_relationship(Relationships.child)
+    cell_ann_ids = table.get_relationship(Relationships.CHILD)
     cells = dp.get_annotation(
-        category_names=[LayoutType.cell, CellType.header, CellType.body], annotation_ids=cell_ann_ids
+        category_names=[LayoutType.CELL, CellType.HEADER, CellType.BODY], annotation_ids=cell_ann_ids
     )
     tile_to_cells = []
     for cell in cells:
-        row_number = int(cell.get_sub_category(CellType.row_number).category_id)
-        col_number = int(cell.get_sub_category(CellType.column_number).category_id)
-        rs = int(cell.get_sub_category(CellType.row_span).category_id)
-        cs = int(cell.get_sub_category(CellType.column_span).category_id)
+        row_number = cell.get_sub_category(CellType.ROW_NUMBER).category_id
+        col_number = cell.get_sub_category(CellType.COLUMN_NUMBER).category_id
+        rs = cell.get_sub_category(CellType.ROW_SPAN).category_id
+        cs = cell.get_sub_category(CellType.COLUMN_SPAN).category_id
         for k in range(rs):
             for l in range(cs):
                 assert cell.annotation_id is not None, cell.annotation_id
@@ -73,15 +74,15 @@ def tiles_to_cells(dp: Image, table: ImageAnnotation) -> List[Tuple[Tuple[int, i
 def connected_component_tiles(
-    tile_to_cell_list: List[Tuple[Tuple[int, int], str]]
-) -> Tuple[List[Set[Tuple[int, int]]], DefaultDict[Tuple[int, int], List[str]]]:
+    tile_to_cell_list: list[tuple[tuple[int, int], str]]
+) -> tuple[list[set[tuple[int, int]]], DefaultDict[tuple[int, int], list[str]]]:
     """
     The assignment of bricks to their cell occupancy induces a graph, with bricks as corners and cell edges. Cells that
     lie on top of several bricks connect the underlying bricks. The graph generated according to this procedure is
     usually multiple connected. The related components and the tile/cell ids assignment are determined.
-    :param tile_to_cell_list: List of tuples with tile position and cell ids
-    :return: List of set with tiles that belong to the same connected component and a dict with tiles as keys and
+    :param tile_to_cell_list: list of tuples with tile position and cell ids
+    :return: list of set with tiles that belong to the same connected component and a dict with tiles as keys and
              assigned list of cell ids as values.
     """
     cell_to_tile_list = [(cell_position[1], cell_position[0]) for cell_position in tile_to_cell_list]
@@ -107,7 +108,7 @@ def connected_component_tiles(
     connected_components_tiles = []
     for component in connected_components_cell:
-        tiles: Set[Tuple[int, int]] = set()
+        tiles: set[tuple[int, int]] = set()
         for cell in component:
             tiles = tiles.union(set(cell_to_tile_dict[cell]))  # type: ignore
         connected_components_tiles.append(tiles)
@@ -115,7 +116,7 @@ def connected_component_tiles(
     return connected_components_tiles, tile_to_cell_dict
-def _missing_tile(inputs: Set[Tuple[int, int]]) -> Optional[Tuple[int, int]]:
+def _missing_tile(inputs: set[tuple[int, int]]) -> Optional[tuple[int, int]]:
     min_x, min_y, max_x, max_y = (
         min(a[0] for a in inputs),
         min(a[1] for a in inputs),
@@ -131,15 +132,15 @@ def _missing_tile(inputs: Set[Tuple[int, int]]) -> Optional[Tuple[int, int]]:
 def _find_component(
-    tile: Tuple[int, int], reduced_connected_tiles: List[Set[Tuple[int, int]]]
-) -> Optional[Set[Tuple[int, int]]]:
+    tile: tuple[int, int], reduced_connected_tiles: list[set[tuple[int, int]]]
+) -> Optional[set[tuple[int, int]]]:
     for comp in reduced_connected_tiles:
         if tile in comp:
             return comp
     return None
-def _merge_components(reduced_connected_tiles: List[Set[Tuple[int, int]]]) -> List[Set[Tuple[int, int]]]:
+def _merge_components(reduced_connected_tiles: list[set[tuple[int, int]]]) -> list[set[tuple[int, int]]]:
     new_reduced_connected_tiles = []
     for connected_tile in reduced_connected_tiles:
         out = _missing_tile(connected_tile)
@@ -161,17 +162,17 @@ def _merge_components(reduced_connected_tiles: List[Set[Tuple[int, int]]]) -> Li
     return new_reduced_connected_tiles
-def generate_rectangle_tiling(connected_components_tiles: List[Set[Tuple[int, int]]]) -> List[Set[Tuple[int, int]]]:
+def generate_rectangle_tiling(connected_components_tiles: list[set[tuple[int, int]]]) -> list[set[tuple[int, int]]]:
     """
     The determined connected components imply that all cells have to be combined which are above a connected component.
     In addition, however, it must also be taken into account that cells must be rectangular. This means that related
     components have to be combined whose combined cells above do not create a rectangular tiling. All tiles are combined
     in such a way that all cells above them combine to form a rectangular scheme.
-    :param connected_components_tiles: List of set with tiles that belong to the same connected component
-    :return: List of sets with tiles, the cells on top of which together form a rectangular scheme
+    :param connected_components_tiles: list of set with tiles that belong to the same connected component
+    :return: list of sets with tiles, the cells on top of which together form a rectangular scheme
     """
-    rectangle_tiling: List[Set[Tuple[int, int]]] = []
+    rectangle_tiling: list[set[tuple[int, int]]] = []
     inputs = connected_components_tiles
     while rectangle_tiling != inputs:
@@ -183,25 +184,25 @@ def generate_rectangle_tiling(connected_components_tiles: List[Set[Tuple[int, in
 def rectangle_cells(
-    rectangle_tiling: List[Set[Tuple[int, int]]], tile_to_cell_dict: DefaultDict[Tuple[int, int], List[str]]
-) -> List[Set[str]]:
+    rectangle_tiling: list[set[tuple[int, int]]], tile_to_cell_dict: DefaultDict[tuple[int, int], list[str]]
+) -> list[set[str]]:
     """
     All cells are determined that are located above combined connected components and form a rectangular scheme.
-    :param rectangle_tiling: List of sets with tiles, the cells on top of which together form a rectangular scheme
+    :param rectangle_tiling: list of sets with tiles, the cells on top of which together form a rectangular scheme
     :param tile_to_cell_dict: Dict with tiles as keys and assigned list of cell ids as values.
-    :return: List of set of cell ids that form a rectangular scheme
+    :return: list of set of cell ids that form a rectangular scheme
     """
-    rectangle_tiling_cells: List[Set[str]] = []
+    rectangle_tiling_cells: list[set[str]] = []
     for rect_tiling_component in rectangle_tiling:
-        rect_cell_component: Set[str] = set()
+        rect_cell_component: set[str] = set()
         for el in rect_tiling_component:
             rect_cell_component = rect_cell_component.union(set(tile_to_cell_dict[el]))
         rectangle_tiling_cells.append(rect_cell_component)
     return rectangle_tiling_cells
-def _tiling_to_cell_position(inputs: Set[Tuple[int, int]]) -> Tuple[int, int, int, int]:
+def _tiling_to_cell_position(inputs: set[tuple[int, int]]) -> tuple[int, int, int, int]:
     row_number = min(a[0] for a in inputs)
     col_number = min(a[1] for a in inputs)
     row_span = max(abs(a[0] - b[0]) + 1 for a in inputs for b in inputs)
@@ -210,8 +211,8 @@ def _tiling_to_cell_position(inputs: Set[Tuple[int, int]]) -> Tuple[int, int, in
 def _html_cell(
-    cell_position: Union[Tuple[int, int, int, int], Tuple[()]], position_filled_list: List[Tuple[int, int]]
-) -> List[str]:
+    cell_position: Union[tuple[int, int, int, int], tuple[()]], position_filled_list: list[tuple[int, int]]
+) -> list[str]:
     """
     Html table cell string generation
     """
@@ -238,12 +239,12 @@ def _html_cell(
 def _html_row(
-    row_list: List[Tuple[int, int, int, int]],
-    position_filled_list: List[Tuple[int, int]],
+    row_list: list[tuple[int, int, int, int]],
+    position_filled_list: list[tuple[int, int]],
     this_row: int,
     number_of_cols: int,
-    row_ann_id_list: List[str],
-) -> List[str]:
+    row_ann_id_list: list[str],
+) -> list[str]:
     """
     Html table row string generation
     """
@@ -275,16 +276,16 @@ def _html_row(
 def _html_table(
-    table_list: List[Tuple[int, List[Tuple[int, int, int, int]]]],
-    cells_ann_list: List[Tuple[int, List[str]]],
+    table_list: list[tuple[int, list[tuple[int, int, int, int]]]],
+    cells_ann_list: list[tuple[int, list[str]]],
     number_of_rows: int,
     number_of_cols: int,
-) -> List[str]:
+) -> list[str]:
     """
     Html table string generation
     """
     html = ["<table>"]
-    position_filled: List[Tuple[int, int]] = []
+    position_filled: list[tuple[int, int]] = []
     for idx in range(1, number_of_rows + 1):
         row_idx = list(filter(lambda x: x[0] == idx, table_list))[0][1]  # pylint:disable=W0640
         row_ann_ids = list(filter(lambda x: x[0] == idx, cells_ann_list))[0][1]  # pylint:disable=W0640
@@ -294,7 +295,7 @@ def _html_table(
     return html
-def generate_html_string(table: ImageAnnotation) -> List[str]:
+def generate_html_string(table: ImageAnnotation) -> list[str]:
     """
     Takes the table segmentation by using table cells row number, column numbers etc. and generates a html
     representation.
@@ -307,36 +308,36 @@ def generate_html_string(table: ImageAnnotation) -> List[str]:
     table_image = table.image
     cells = table_image.get_annotation(
         category_names=[
-            LayoutType.cell,
-            CellType.header,
-            CellType.body,
-            CellType.spanning,
-            CellType.row_header,
-            CellType.column_header,
-            CellType.projected_row_header,
+            LayoutType.CELL,
+            CellType.HEADER,
+            CellType.BODY,
+            CellType.SPANNING,
+            CellType.ROW_HEADER,
+            CellType.COLUMN_HEADER,
+            CellType.PROJECTED_ROW_HEADER,
         ]
     )
-    number_of_rows = int(table_image.summary.get_sub_category(TableType.number_of_rows).category_id)
-    number_of_cols = int(table_image.summary.get_sub_category(TableType.number_of_columns).category_id)
+    number_of_rows = table_image.summary.get_sub_category(TableType.NUMBER_OF_ROWS).category_id
+    number_of_cols = table_image.summary.get_sub_category(TableType.NUMBER_OF_COLUMNS).category_id
     table_list = []
     cells_ann_list = []
     for row_number in range(1, number_of_rows + 1):
         cells_of_row = list(
             sorted(
                 filter(
-                    lambda cell: cell.get_sub_category(CellType.row_number).category_id
-                    == str(row_number),  # pylint: disable=W0640
+                    lambda cell: cell.get_sub_category(CellType.ROW_NUMBER).category_id
+                    == row_number,  # pylint: disable=W0640
                     cells,
                 ),
-                key=lambda cell: cell.get_sub_category(CellType.column_number).category_id,
+                key=lambda cell: cell.get_sub_category(CellType.COLUMN_NUMBER).category_id,
             )
         )
         row_list = [
             (
-                int(cell.get_sub_category(CellType.row_number).category_id),
-                int(cell.get_sub_category(CellType.column_number).category_id),
-                int(cell.get_sub_category(CellType.row_span).category_id),
-                int(cell.get_sub_category(CellType.column_span).category_id),
+                cell.get_sub_category(CellType.ROW_NUMBER).category_id,
+                cell.get_sub_category(CellType.COLUMN_NUMBER).category_id,
+                cell.get_sub_category(CellType.ROW_SPAN).category_id,
+                cell.get_sub_category(CellType.COLUMN_SPAN).category_id,
             )
             for cell in cells_of_row
         ]
@@ -398,19 +399,13 @@ class TableSegmentationRefinementService(PipelineComponent):
     """
-    def __init__(self) -> None:
-        self._table_name = [LayoutType.table, LayoutType.table_rotated]
-        self._cell_names = [
-            LayoutType.cell,
-            CellType.column_header,
-            CellType.projected_row_header,
-            CellType.spanning,
-            CellType.row_header,
-        ]
+    def __init__(self, table_name: Sequence[ObjectTypes], cell_names: Sequence[ObjectTypes]) -> None:
+        self.table_name = table_name
+        self.cell_names = cell_names
         super().__init__("table_segment_refine")
     def serve(self, dp: Image) -> None:
-        tables = dp.get_annotation(category_names=self._table_name)
+        tables = dp.get_annotation(category_names=self.table_name)
         for table in tables:
             if table.image is None:
                 raise ImageError("table.image cannot be None")
@@ -427,23 +422,23 @@ class TableSegmentationRefinementService(PipelineComponent):
                     det_result = DetectionResult(
                         box=merged_box.to_list(mode="xyxy"),
                         score=-1.0,
-                        class_id=int(cells[0].category_id),
+                        class_id=cells[0].category_id,
                         class_name=get_type(cells[0].category_name),
                     )
                     new_cell_ann_id = self.dp_manager.set_image_annotation(det_result, table.annotation_id)
                     if new_cell_ann_id is not None:
                         row_number, col_number, row_span, col_span = _tiling_to_cell_position(tiling)
                         self.dp_manager.set_category_annotation(
-                            CellType.row_number, row_number, CellType.row_number, new_cell_ann_id
+                            CellType.ROW_NUMBER, row_number, CellType.ROW_NUMBER, new_cell_ann_id
                         )
                         self.dp_manager.set_category_annotation(
-                            CellType.column_number, col_number, CellType.column_number, new_cell_ann_id
+                            CellType.COLUMN_NUMBER, col_number, CellType.COLUMN_NUMBER, new_cell_ann_id
                         )
                         self.dp_manager.set_category_annotation(
-                            CellType.row_span, row_span, CellType.row_span, new_cell_ann_id
+                            CellType.ROW_SPAN, row_span, CellType.ROW_SPAN, new_cell_ann_id
                         )
                         self.dp_manager.set_category_annotation(
-                            CellType.column_span, col_span, CellType.column_span, new_cell_ann_id
+                            CellType.COLUMN_SPAN, col_span, CellType.COLUMN_SPAN, new_cell_ann_id
                         )
                     else:
                         # DetectionResult cannot be dumped, hence merged_box must already exist. Hence, it must
@@ -458,67 +453,59 @@ class TableSegmentationRefinementService(PipelineComponent):
                         for cell in cells:
                             cell.deactivate()
-            cells = table.image.get_annotation(category_names=self._cell_names)
-            number_of_rows = max(int(cell.get_sub_category(CellType.row_number).category_id) for cell in cells)
-            number_of_cols = max(int(cell.get_sub_category(CellType.column_number).category_id) for cell in cells)
-            max_row_span = max(int(cell.get_sub_category(CellType.row_span).category_id) for cell in cells)
-            max_col_span = max(int(cell.get_sub_category(CellType.column_span).category_id) for cell in cells)
+            cells = table.image.get_annotation(category_names=self.cell_names)
+            number_of_rows = max(cell.get_sub_category(CellType.ROW_NUMBER).category_id for cell in cells)
+            number_of_cols = max(cell.get_sub_category(CellType.COLUMN_NUMBER).category_id for cell in cells)
+            max_row_span = max(cell.get_sub_category(CellType.ROW_SPAN).category_id for cell in cells)
+            max_col_span = max(cell.get_sub_category(CellType.COLUMN_SPAN).category_id for cell in cells)
             # TODO: the summaries should be sub categories of the underlying ann
-            if table.image.summary is not None:
-                if (
-                    TableType.number_of_rows in table.image.summary.sub_categories
-                    and TableType.number_of_columns in table.image.summary.sub_categories
-                    and TableType.max_row_span in table.image.summary.sub_categories
-                    and TableType.max_col_span in table.image.summary.sub_categories
-                ):
-                    table.image.summary.remove_sub_category(TableType.number_of_rows)
-                    table.image.summary.remove_sub_category(TableType.number_of_columns)
-                    table.image.summary.remove_sub_category(TableType.max_row_span)
-                    table.image.summary.remove_sub_category(TableType.max_col_span)
-                else:
-                    raise AnnotationError(
-                        "Table summary does not contain sub categories TableType.number_of_rows, "
-                        "TableType.number_of_columns, TableType.max_row_span, TableType.max_col_span"
-                    )
+            if (
+                TableType.NUMBER_OF_ROWS in table.image.summary.sub_categories
+                and TableType.NUMBER_OF_COLUMNS in table.image.summary.sub_categories
+                and TableType.MAX_ROW_SPAN in table.image.summary.sub_categories
+                and TableType.MAX_COL_SPAN in table.image.summary.sub_categories
+            ):
+                table.image.summary.remove_sub_category(TableType.NUMBER_OF_ROWS)
+                table.image.summary.remove_sub_category(TableType.NUMBER_OF_COLUMNS)
+                table.image.summary.remove_sub_category(TableType.MAX_ROW_SPAN)
+                table.image.summary.remove_sub_category(TableType.MAX_COL_SPAN)
             self.dp_manager.set_summary_annotation(
-                TableType.number_of_rows, TableType.number_of_rows, number_of_rows, annotation_id=table.annotation_id
+                TableType.NUMBER_OF_ROWS, TableType.NUMBER_OF_ROWS, number_of_rows, annotation_id=table.annotation_id
             )
             self.dp_manager.set_summary_annotation(
-                TableType.number_of_columns,
-                TableType.number_of_columns,
+                TableType.NUMBER_OF_COLUMNS,
+                TableType.NUMBER_OF_COLUMNS,
                 number_of_cols,
                 annotation_id=table.annotation_id,
             )
             self.dp_manager.set_summary_annotation(
-                TableType.max_row_span, TableType.max_row_span, max_row_span, annotation_id=table.annotation_id
+                TableType.MAX_ROW_SPAN, TableType.MAX_ROW_SPAN, max_row_span, annotation_id=table.annotation_id
             )
             self.dp_manager.set_summary_annotation(
-                TableType.max_col_span, TableType.max_col_span, max_col_span, annotation_id=table.annotation_id
+                TableType.MAX_COL_SPAN, TableType.MAX_COL_SPAN, max_col_span, annotation_id=table.annotation_id
             )
             html = generate_html_string(table)
-            self.dp_manager.set_container_annotation(TableType.html, -1, TableType.html, table.annotation_id, html)
-    def clone(self) -> PipelineComponent:
-        return self.__class__()
-    def get_meta_annotation(self) -> JsonDict:
-        return dict(
-            [
-                ("image_annotations", []),
-                (
-                    "sub_categories",
-                    {
-                        LayoutType.cell: {
-                            CellType.row_number,
-                            CellType.column_number,
-                            CellType.row_span,
-                            CellType.column_span,
-                        },
-                        LayoutType.table: {TableType.html},
-                    },
-                ),
-                ("relationships", {}),
-                ("summaries", []),
-            ]
+            self.dp_manager.set_container_annotation(TableType.HTML, -1, TableType.HTML, table.annotation_id, html)
+    def clone(self) -> TableSegmentationRefinementService:
+        return self.__class__(self.table_name, self.cell_names)
+    def get_meta_annotation(self) -> MetaAnnotation:
+        return MetaAnnotation(
+            image_annotations=(),
+            sub_categories={
+                LayoutType.CELL: {
+                    CellType.ROW_NUMBER,
+                    CellType.COLUMN_NUMBER,
+                    CellType.ROW_SPAN,
+                    CellType.COLUMN_SPAN,
+                },
+                LayoutType.TABLE: {TableType.HTML},
+            },
+            relationships={},
+            summaries=(),
         )
+    def clear_predictor(self) -> None:
+        pass

deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.31py3-none-any.whl → 0.33py3-none-any.whl