deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +16 -29
- deepdoctection/analyzer/dd.py +70 -59
- deepdoctection/configs/conf_dd_one.yaml +34 -31
- deepdoctection/dataflow/common.py +9 -5
- deepdoctection/dataflow/custom.py +5 -5
- deepdoctection/dataflow/custom_serialize.py +75 -18
- deepdoctection/dataflow/parallel_map.py +3 -3
- deepdoctection/dataflow/serialize.py +4 -4
- deepdoctection/dataflow/stats.py +3 -3
- deepdoctection/datapoint/annotation.py +41 -56
- deepdoctection/datapoint/box.py +9 -8
- deepdoctection/datapoint/convert.py +6 -6
- deepdoctection/datapoint/image.py +56 -44
- deepdoctection/datapoint/view.py +245 -150
- deepdoctection/datasets/__init__.py +1 -4
- deepdoctection/datasets/adapter.py +35 -26
- deepdoctection/datasets/base.py +14 -12
- deepdoctection/datasets/dataflow_builder.py +3 -3
- deepdoctection/datasets/info.py +24 -26
- deepdoctection/datasets/instances/doclaynet.py +51 -51
- deepdoctection/datasets/instances/fintabnet.py +46 -46
- deepdoctection/datasets/instances/funsd.py +25 -24
- deepdoctection/datasets/instances/iiitar13k.py +13 -10
- deepdoctection/datasets/instances/layouttest.py +4 -3
- deepdoctection/datasets/instances/publaynet.py +5 -5
- deepdoctection/datasets/instances/pubtables1m.py +24 -21
- deepdoctection/datasets/instances/pubtabnet.py +32 -30
- deepdoctection/datasets/instances/rvlcdip.py +30 -30
- deepdoctection/datasets/instances/xfund.py +26 -26
- deepdoctection/datasets/save.py +6 -6
- deepdoctection/eval/__init__.py +1 -4
- deepdoctection/eval/accmetric.py +32 -33
- deepdoctection/eval/base.py +8 -9
- deepdoctection/eval/cocometric.py +15 -13
- deepdoctection/eval/eval.py +41 -37
- deepdoctection/eval/tedsmetric.py +30 -23
- deepdoctection/eval/tp_eval_callback.py +16 -19
- deepdoctection/extern/__init__.py +2 -7
- deepdoctection/extern/base.py +339 -134
- deepdoctection/extern/d2detect.py +85 -113
- deepdoctection/extern/deskew.py +14 -11
- deepdoctection/extern/doctrocr.py +141 -130
- deepdoctection/extern/fastlang.py +27 -18
- deepdoctection/extern/hfdetr.py +71 -62
- deepdoctection/extern/hflayoutlm.py +504 -211
- deepdoctection/extern/hflm.py +230 -0
- deepdoctection/extern/model.py +488 -302
- deepdoctection/extern/pdftext.py +23 -19
- deepdoctection/extern/pt/__init__.py +1 -3
- deepdoctection/extern/pt/nms.py +6 -2
- deepdoctection/extern/pt/ptutils.py +29 -19
- deepdoctection/extern/tessocr.py +39 -38
- deepdoctection/extern/texocr.py +18 -18
- deepdoctection/extern/tp/tfutils.py +57 -9
- deepdoctection/extern/tp/tpcompat.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
- deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
- deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
- deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
- deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
- deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
- deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
- deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
- deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
- deepdoctection/extern/tpdetect.py +45 -53
- deepdoctection/mapper/__init__.py +3 -8
- deepdoctection/mapper/cats.py +27 -29
- deepdoctection/mapper/cocostruct.py +10 -10
- deepdoctection/mapper/d2struct.py +27 -26
- deepdoctection/mapper/hfstruct.py +13 -8
- deepdoctection/mapper/laylmstruct.py +178 -37
- deepdoctection/mapper/maputils.py +12 -11
- deepdoctection/mapper/match.py +2 -2
- deepdoctection/mapper/misc.py +11 -9
- deepdoctection/mapper/pascalstruct.py +4 -4
- deepdoctection/mapper/prodigystruct.py +5 -5
- deepdoctection/mapper/pubstruct.py +84 -92
- deepdoctection/mapper/tpstruct.py +5 -5
- deepdoctection/mapper/xfundstruct.py +33 -33
- deepdoctection/pipe/__init__.py +1 -1
- deepdoctection/pipe/anngen.py +12 -14
- deepdoctection/pipe/base.py +52 -106
- deepdoctection/pipe/common.py +72 -59
- deepdoctection/pipe/concurrency.py +16 -11
- deepdoctection/pipe/doctectionpipe.py +24 -21
- deepdoctection/pipe/language.py +20 -25
- deepdoctection/pipe/layout.py +20 -16
- deepdoctection/pipe/lm.py +75 -105
- deepdoctection/pipe/order.py +194 -89
- deepdoctection/pipe/refine.py +111 -124
- deepdoctection/pipe/segment.py +156 -161
- deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
- deepdoctection/pipe/text.py +37 -36
- deepdoctection/pipe/transform.py +19 -16
- deepdoctection/train/__init__.py +6 -12
- deepdoctection/train/d2_frcnn_train.py +48 -41
- deepdoctection/train/hf_detr_train.py +41 -30
- deepdoctection/train/hf_layoutlm_train.py +153 -135
- deepdoctection/train/tp_frcnn_train.py +32 -31
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +13 -6
- deepdoctection/utils/develop.py +4 -4
- deepdoctection/utils/env_info.py +87 -125
- deepdoctection/utils/file_utils.py +6 -11
- deepdoctection/utils/fs.py +22 -18
- deepdoctection/utils/identifier.py +2 -2
- deepdoctection/utils/logger.py +16 -15
- deepdoctection/utils/metacfg.py +7 -7
- deepdoctection/utils/mocks.py +93 -0
- deepdoctection/utils/pdf_utils.py +11 -11
- deepdoctection/utils/settings.py +185 -181
- deepdoctection/utils/tqdm.py +1 -1
- deepdoctection/utils/transform.py +14 -9
- deepdoctection/utils/types.py +104 -0
- deepdoctection/utils/utils.py +7 -7
- deepdoctection/utils/viz.py +74 -72
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
- deepdoctection-0.33.dist-info/RECORD +146 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
- deepdoctection/utils/detection_types.py +0 -68
- deepdoctection-0.31.dist-info/RECORD +0 -144
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0
deepdoctection/mapper/match.py
CHANGED
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
Module for matching detections according to various matching rules
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
from typing import Any, Literal, Optional, Sequence,
|
|
22
|
+
from typing import Any, Literal, Optional, Sequence, Union
|
|
23
23
|
|
|
24
24
|
import numpy as np
|
|
25
25
|
from numpy.typing import NDArray
|
|
@@ -41,7 +41,7 @@ def match_anns_by_intersection(
|
|
|
41
41
|
parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
|
|
42
42
|
child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
43
43
|
max_parent_only: bool = False,
|
|
44
|
-
) ->
|
|
44
|
+
) -> tuple[Any, Any, Sequence[ImageAnnotation], Sequence[ImageAnnotation]]:
|
|
45
45
|
"""
|
|
46
46
|
Generates an iou/ioa-matrix for parent_ann_categories and child_ann_categories and returns pairs of child/parent
|
|
47
47
|
indices that are above some intersection threshold. It will also return a list of all pre selected parent and child
|
deepdoctection/mapper/misc.py
CHANGED
|
@@ -19,19 +19,22 @@
|
|
|
19
19
|
Module for small mapping functions
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
22
24
|
import ast
|
|
23
25
|
import os
|
|
24
|
-
from typing import
|
|
26
|
+
from typing import Mapping, Optional, Sequence, Union
|
|
27
|
+
|
|
28
|
+
from lazy_imports import try_import
|
|
25
29
|
|
|
26
30
|
from ..datapoint.convert import convert_pdf_bytes_to_np_array_v2
|
|
27
31
|
from ..datapoint.image import Image
|
|
28
|
-
from ..utils.detection_types import JsonDict
|
|
29
|
-
from ..utils.file_utils import lxml_available
|
|
30
32
|
from ..utils.fs import get_load_image_func, load_image_from_file
|
|
33
|
+
from ..utils.types import JsonDict
|
|
31
34
|
from ..utils.utils import is_file_extension
|
|
32
35
|
from .maputils import MappingContextManager, curry
|
|
33
36
|
|
|
34
|
-
|
|
37
|
+
with try_import() as import_guard:
|
|
35
38
|
from lxml import etree # pylint: disable=W0611
|
|
36
39
|
|
|
37
40
|
|
|
@@ -132,7 +135,7 @@ def maybe_remove_image_from_category(dp: Image, category_names: Optional[Union[s
|
|
|
132
135
|
return dp
|
|
133
136
|
|
|
134
137
|
|
|
135
|
-
def image_ann_to_image(dp: Image, category_names: Union[str,
|
|
138
|
+
def image_ann_to_image(dp: Image, category_names: Union[str, list[str]], crop_image: bool = True) -> Image:
|
|
136
139
|
"""
|
|
137
140
|
Adds `image` to annotations with given category names
|
|
138
141
|
|
|
@@ -151,7 +154,7 @@ def image_ann_to_image(dp: Image, category_names: Union[str, List[str]], crop_im
|
|
|
151
154
|
|
|
152
155
|
@curry
|
|
153
156
|
def maybe_ann_to_sub_image(
|
|
154
|
-
dp: Image, category_names_sub_image: Union[str,
|
|
157
|
+
dp: Image, category_names_sub_image: Union[str, list[str]], category_names: Union[str, list[str]], add_summary: bool
|
|
155
158
|
) -> Image:
|
|
156
159
|
"""
|
|
157
160
|
Assigns to sub image with given category names all annotations with given category names whose bounding box lie
|
|
@@ -175,7 +178,7 @@ def maybe_ann_to_sub_image(
|
|
|
175
178
|
|
|
176
179
|
|
|
177
180
|
@curry
|
|
178
|
-
def xml_to_dict(dp: JsonDict, xslt_obj:
|
|
181
|
+
def xml_to_dict(dp: JsonDict, xslt_obj: etree.XSLT) -> JsonDict:
|
|
179
182
|
"""
|
|
180
183
|
Convert a xml object into a dict using a xsl style sheet.
|
|
181
184
|
|
|
@@ -193,7 +196,6 @@ def xml_to_dict(dp: JsonDict, xslt_obj: "etree.XSLT") -> JsonDict:
|
|
|
193
196
|
"""
|
|
194
197
|
|
|
195
198
|
output = str(xslt_obj(dp["xml"]))
|
|
196
|
-
output = ast.literal_eval(output.replace('<?xml version="1.0"?>', ""))
|
|
197
199
|
dp.pop("xml")
|
|
198
|
-
dp["json"] = output
|
|
200
|
+
dp["json"] = ast.literal_eval(output.replace('<?xml version="1.0"?>', ""))
|
|
199
201
|
return dp
|
|
@@ -20,25 +20,25 @@ Module for mapping annotations in iiitar13k style structure
|
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
import os
|
|
23
|
-
from typing import
|
|
23
|
+
from typing import Optional
|
|
24
24
|
|
|
25
25
|
from ..datapoint.annotation import ImageAnnotation
|
|
26
26
|
from ..datapoint.box import BoundingBox
|
|
27
27
|
from ..datapoint.image import Image
|
|
28
|
-
from ..utils.detection_types import JsonDict
|
|
29
28
|
from ..utils.fs import load_image_from_file
|
|
30
29
|
from ..utils.settings import get_type
|
|
30
|
+
from ..utils.types import JsonDict
|
|
31
31
|
from .maputils import MappingContextManager, curry, maybe_get_fake_score
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
@curry
|
|
35
35
|
def pascal_voc_dict_to_image(
|
|
36
36
|
dp: JsonDict,
|
|
37
|
-
categories_name_as_key:
|
|
37
|
+
categories_name_as_key: dict[str, int],
|
|
38
38
|
load_image: bool,
|
|
39
39
|
filter_empty_image: bool,
|
|
40
40
|
fake_score: bool,
|
|
41
|
-
category_name_mapping: Optional[
|
|
41
|
+
category_name_mapping: Optional[dict[str, str]] = None,
|
|
42
42
|
) -> Optional[Image]:
|
|
43
43
|
"""
|
|
44
44
|
Map a dataset in a structure equivalent to iiitar13k annotation style to image format
|
|
@@ -23,8 +23,8 @@ import os
|
|
|
23
23
|
from typing import Mapping, Optional, Sequence
|
|
24
24
|
|
|
25
25
|
from ..datapoint import BoundingBox, Image, ImageAnnotation
|
|
26
|
-
from ..utils.
|
|
27
|
-
from ..utils.
|
|
26
|
+
from ..utils.settings import ObjectTypes, get_type
|
|
27
|
+
from ..utils.types import JsonDict, PathLikeOrStr
|
|
28
28
|
from .maputils import MappingContextManager, curry, maybe_get_fake_score
|
|
29
29
|
|
|
30
30
|
_PRODIGY_IMAGE_PREFIX = "data:image/png;base64,"
|
|
@@ -33,10 +33,10 @@ _PRODIGY_IMAGE_PREFIX = "data:image/png;base64,"
|
|
|
33
33
|
@curry
|
|
34
34
|
def prodigy_to_image(
|
|
35
35
|
dp: JsonDict,
|
|
36
|
-
categories_name_as_key: Mapping[
|
|
36
|
+
categories_name_as_key: Mapping[ObjectTypes, int],
|
|
37
37
|
load_image: bool,
|
|
38
38
|
fake_score: bool,
|
|
39
|
-
path_reference_ds: Optional[
|
|
39
|
+
path_reference_ds: Optional[PathLikeOrStr] = None,
|
|
40
40
|
accept_only_answer: bool = False,
|
|
41
41
|
category_name_mapping: Optional[Mapping[str, str]] = None,
|
|
42
42
|
) -> Optional[Image]:
|
|
@@ -133,7 +133,7 @@ def prodigy_to_image(
|
|
|
133
133
|
annotation = ImageAnnotation(
|
|
134
134
|
category_name=label,
|
|
135
135
|
bounding_box=bbox,
|
|
136
|
-
category_id=categories_name_as_key[label],
|
|
136
|
+
category_id=categories_name_as_key[get_type(label)],
|
|
137
137
|
score=score,
|
|
138
138
|
external_id=external_id,
|
|
139
139
|
)
|
|
@@ -20,17 +20,16 @@ Module for mapping annotations in pubtabnet style structure
|
|
|
20
20
|
"""
|
|
21
21
|
import itertools
|
|
22
22
|
import os
|
|
23
|
-
from typing import
|
|
23
|
+
from typing import Iterable, Optional, Sequence
|
|
24
24
|
|
|
25
25
|
import numpy as np
|
|
26
26
|
|
|
27
27
|
from ..datapoint import BoundingBox, CategoryAnnotation, ContainerAnnotation, ImageAnnotation
|
|
28
|
-
from ..datapoint.annotation import SummaryAnnotation
|
|
29
28
|
from ..datapoint.convert import convert_pdf_bytes_to_np_array_v2
|
|
30
29
|
from ..datapoint.image import Image
|
|
31
|
-
from ..utils.detection_types import JsonDict
|
|
32
30
|
from ..utils.fs import load_bytes_from_pdf_file, load_image_from_file
|
|
33
|
-
from ..utils.settings import CellType, LayoutType, Relationships, TableType, WordType
|
|
31
|
+
from ..utils.settings import CellType, LayoutType, ObjectTypes, Relationships, SummaryType, TableType, WordType
|
|
32
|
+
from ..utils.types import JsonDict, PubtabnetDict
|
|
34
33
|
from ..utils.utils import is_file_extension
|
|
35
34
|
from .maputils import MappingContextManager, curry, maybe_get_fake_score
|
|
36
35
|
|
|
@@ -52,14 +51,14 @@ def _convert_boxes(dp: JsonDict, height: int) -> JsonDict:
|
|
|
52
51
|
return dp
|
|
53
52
|
|
|
54
53
|
|
|
55
|
-
def _get_table_annotation(dp: JsonDict, category_id:
|
|
54
|
+
def _get_table_annotation(dp: JsonDict, category_id: int) -> ImageAnnotation:
|
|
56
55
|
ulx, uly, lrx, lry = list(map(float, dp["bbox"]))
|
|
57
56
|
bbox = BoundingBox(absolute_coords=True, ulx=ulx, uly=uly, lrx=lrx, lry=lry)
|
|
58
|
-
annotation = ImageAnnotation(category_name=LayoutType.
|
|
57
|
+
annotation = ImageAnnotation(category_name=LayoutType.TABLE, bounding_box=bbox, category_id=category_id)
|
|
59
58
|
return annotation
|
|
60
59
|
|
|
61
60
|
|
|
62
|
-
def _cell_token(html: Sequence[str]) ->
|
|
61
|
+
def _cell_token(html: Sequence[str]) -> list[list[int]]:
|
|
63
62
|
index_rows = [i for i, tag in enumerate(html) if tag == "<tr>"]
|
|
64
63
|
index_cells = [i for i, tag in enumerate(html) if tag in ("<td>", ">")]
|
|
65
64
|
index_rows_tmp = [(index_rows[i], index_rows[i + 1]) for i in range(len(index_rows) - 1)]
|
|
@@ -72,7 +71,7 @@ def _cell_token(html: Sequence[str]) -> List[List[int]]:
|
|
|
72
71
|
return index_cells_tmp
|
|
73
72
|
|
|
74
73
|
|
|
75
|
-
def _item_spans(html: Sequence[str], index_cells: Sequence[Sequence[int]], item: str) ->
|
|
74
|
+
def _item_spans(html: Sequence[str], index_cells: Sequence[Sequence[int]], item: str) -> list[list[int]]:
|
|
76
75
|
item_spans = [
|
|
77
76
|
[
|
|
78
77
|
(
|
|
@@ -102,7 +101,7 @@ def _end_of_header(html: Sequence[str]) -> int:
|
|
|
102
101
|
return 0
|
|
103
102
|
|
|
104
103
|
|
|
105
|
-
def tile_table(row_spans: Sequence[Sequence[int]], col_spans: Sequence[Sequence[int]]) ->
|
|
104
|
+
def tile_table(row_spans: Sequence[Sequence[int]], col_spans: Sequence[Sequence[int]]) -> list[list[int]]:
|
|
106
105
|
"""
|
|
107
106
|
Tiles a table according the row and column span scheme. A table can be represented as a list of list, where each
|
|
108
107
|
inner list has the same length. Each cell with a cell id can be located according to their row and column spans in
|
|
@@ -153,28 +152,25 @@ def tile_table(row_spans: Sequence[Sequence[int]], col_spans: Sequence[Sequence[
|
|
|
153
152
|
return tiling
|
|
154
153
|
|
|
155
154
|
|
|
156
|
-
def _add_items(
|
|
157
|
-
|
|
158
|
-
|
|
155
|
+
def _add_items(
|
|
156
|
+
image: Image, item_type: str, categories_name_as_key: dict[ObjectTypes, int], pubtables_like: bool
|
|
157
|
+
) -> Image:
|
|
158
|
+
item_number = CellType.ROW_NUMBER if item_type == LayoutType.ROW else CellType.COLUMN_NUMBER
|
|
159
|
+
item_span = CellType.ROW_SPAN if item_type == LayoutType.ROW else CellType.COLUMN_SPAN
|
|
159
160
|
|
|
160
|
-
summary_key = TableType.
|
|
161
|
+
summary_key = TableType.NUMBER_OF_ROWS if item_type == LayoutType.ROW else TableType.NUMBER_OF_COLUMNS
|
|
161
162
|
|
|
162
|
-
|
|
163
|
+
category_item = image.summary.get_sub_category(summary_key)
|
|
164
|
+
number_of_items = category_item.category_id
|
|
163
165
|
|
|
164
|
-
|
|
165
|
-
category_item = image.summary.get_sub_category(summary_key)
|
|
166
|
-
number_of_items = int(category_item.category_id)
|
|
167
|
-
|
|
168
|
-
cells = image.get_annotation(category_names=LayoutType.cell)
|
|
166
|
+
cells = image.get_annotation(category_names=LayoutType.CELL)
|
|
169
167
|
table: ImageAnnotation
|
|
170
168
|
|
|
171
169
|
for item_num in range(1, number_of_items + 1):
|
|
172
170
|
cell_item = list(
|
|
173
|
-
filter(
|
|
174
|
-
lambda x: x.get_sub_category(item_number).category_id == str(item_num), cells # pylint: disable=W0640
|
|
175
|
-
)
|
|
171
|
+
filter(lambda x: x.get_sub_category(item_number).category_id == item_num, cells) # pylint: disable=W0640
|
|
176
172
|
)
|
|
177
|
-
cell_item = list(filter(lambda x: x.get_sub_category(item_span).category_id ==
|
|
173
|
+
cell_item = list(filter(lambda x: x.get_sub_category(item_span).category_id == 1, cell_item))
|
|
178
174
|
if cell_item:
|
|
179
175
|
ulx = min(cell.bounding_box.ulx for cell in cell_item if isinstance(cell.bounding_box, BoundingBox))
|
|
180
176
|
|
|
@@ -185,12 +181,12 @@ def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, s
|
|
|
185
181
|
lry = max(cell.bounding_box.lry for cell in cell_item if isinstance(cell.bounding_box, BoundingBox))
|
|
186
182
|
|
|
187
183
|
if pubtables_like:
|
|
188
|
-
tables = image.get_annotation(category_names=LayoutType.
|
|
184
|
+
tables = image.get_annotation(category_names=LayoutType.TABLE)
|
|
189
185
|
if not tables:
|
|
190
186
|
raise ValueError("pubtables_like = True requires table")
|
|
191
187
|
table = tables[0]
|
|
192
188
|
|
|
193
|
-
if item_type == LayoutType.
|
|
189
|
+
if item_type == LayoutType.ROW:
|
|
194
190
|
if table.bounding_box:
|
|
195
191
|
ulx = table.bounding_box.ulx + 1.0
|
|
196
192
|
lrx = table.bounding_box.lrx - 1.0
|
|
@@ -200,22 +196,22 @@ def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, s
|
|
|
200
196
|
lry = table.bounding_box.lry - 1.0
|
|
201
197
|
|
|
202
198
|
item_ann = ImageAnnotation(
|
|
203
|
-
category_id=categories_name_as_key[TableType.
|
|
204
|
-
category_name=TableType.
|
|
199
|
+
category_id=categories_name_as_key[TableType.ITEM],
|
|
200
|
+
category_name=TableType.ITEM,
|
|
205
201
|
bounding_box=BoundingBox(absolute_coords=True, ulx=ulx, uly=uly, lrx=lrx, lry=lry),
|
|
206
202
|
)
|
|
207
203
|
item_sub_ann = CategoryAnnotation(category_name=item_type)
|
|
208
|
-
item_ann.dump_sub_category(TableType.
|
|
204
|
+
item_ann.dump_sub_category(TableType.ITEM, item_sub_ann, image.image_id)
|
|
209
205
|
image.dump(item_ann)
|
|
210
206
|
|
|
211
207
|
if pubtables_like: # pubtables_like:
|
|
212
|
-
items = image.get_annotation(category_names=TableType.
|
|
213
|
-
item_type_anns = [ann for ann in items if ann.get_sub_category(TableType.
|
|
208
|
+
items = image.get_annotation(category_names=TableType.ITEM)
|
|
209
|
+
item_type_anns = [ann for ann in items if ann.get_sub_category(TableType.ITEM).category_name == item_type]
|
|
214
210
|
item_type_anns.sort(
|
|
215
|
-
key=lambda x: (x.bounding_box.cx if item_type == LayoutType.
|
|
211
|
+
key=lambda x: (x.bounding_box.cx if item_type == LayoutType.COLUMN else x.bounding_box.cy) # type: ignore
|
|
216
212
|
)
|
|
217
213
|
if table.bounding_box:
|
|
218
|
-
tmp_item_xy = table.bounding_box.uly + 1.0 if item_type == LayoutType.
|
|
214
|
+
tmp_item_xy = table.bounding_box.uly + 1.0 if item_type == LayoutType.ROW else table.bounding_box.ulx + 1.0
|
|
219
215
|
for idx, item in enumerate(item_type_anns):
|
|
220
216
|
with MappingContextManager(
|
|
221
217
|
dp_name=image.file_name,
|
|
@@ -230,22 +226,22 @@ def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, s
|
|
|
230
226
|
if next_box:
|
|
231
227
|
tmp_next_item_xy = (
|
|
232
228
|
(box.lry + next_box.uly) / 2
|
|
233
|
-
if item_type == LayoutType.
|
|
229
|
+
if item_type == LayoutType.ROW
|
|
234
230
|
else (box.lrx + next_box.ulx) / 2
|
|
235
231
|
)
|
|
236
232
|
else:
|
|
237
233
|
if table.bounding_box:
|
|
238
234
|
tmp_next_item_xy = (
|
|
239
235
|
table.bounding_box.lry - 1.0
|
|
240
|
-
if item_type == LayoutType.
|
|
236
|
+
if item_type == LayoutType.ROW
|
|
241
237
|
else table.bounding_box.lrx - 1.0
|
|
242
238
|
)
|
|
243
239
|
|
|
244
240
|
new_embedding_box = BoundingBox(
|
|
245
|
-
ulx=box.ulx if item_type == LayoutType.
|
|
246
|
-
uly=tmp_item_xy if item_type == LayoutType.
|
|
247
|
-
lrx=box.lrx if item_type == LayoutType.
|
|
248
|
-
lry=tmp_next_item_xy if item_type == LayoutType.
|
|
241
|
+
ulx=box.ulx if item_type == LayoutType.ROW else tmp_item_xy,
|
|
242
|
+
uly=tmp_item_xy if item_type == LayoutType.ROW else box.uly,
|
|
243
|
+
lrx=box.lrx if item_type == LayoutType.ROW else tmp_next_item_xy,
|
|
244
|
+
lry=tmp_next_item_xy if item_type == LayoutType.ROW else box.lry,
|
|
249
245
|
absolute_coords=True,
|
|
250
246
|
)
|
|
251
247
|
item.bounding_box = new_embedding_box
|
|
@@ -255,7 +251,7 @@ def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, s
|
|
|
255
251
|
return image
|
|
256
252
|
|
|
257
253
|
|
|
258
|
-
def row_col_cell_ids(tiling:
|
|
254
|
+
def row_col_cell_ids(tiling: list[list[int]]) -> list[tuple[int, int, int]]:
|
|
259
255
|
"""
|
|
260
256
|
Infers absolute rows and columns for every cell from the tiling of a table.
|
|
261
257
|
|
|
@@ -271,7 +267,7 @@ def row_col_cell_ids(tiling: List[List[int]]) -> List[Tuple[int, int, int]]:
|
|
|
271
267
|
return rows_col_cell_ids
|
|
272
268
|
|
|
273
269
|
|
|
274
|
-
def embedding_in_image(dp: Image, html:
|
|
270
|
+
def embedding_in_image(dp: Image, html: list[str], categories_name_as_key: dict[ObjectTypes, int]) -> Image:
|
|
275
271
|
"""
|
|
276
272
|
Generating an image, that resembles the output of an analyzer. The layout of the image is table spanning
|
|
277
273
|
the full page, i.e. there is one table image annotation. Moreover, the table annotation has an image, with cells
|
|
@@ -286,8 +282,8 @@ def embedding_in_image(dp: Image, html: List[str], categories_name_as_key: Dict[
|
|
|
286
282
|
image.image = dp.image
|
|
287
283
|
image.set_width_height(dp.width, dp.height)
|
|
288
284
|
table_ann = ImageAnnotation(
|
|
289
|
-
category_name=LayoutType.
|
|
290
|
-
category_id=categories_name_as_key[LayoutType.
|
|
285
|
+
category_name=LayoutType.TABLE,
|
|
286
|
+
category_id=categories_name_as_key[LayoutType.TABLE],
|
|
291
287
|
bounding_box=BoundingBox(absolute_coords=True, ulx=0.0, uly=0.0, lrx=dp.width, lry=dp.height),
|
|
292
288
|
)
|
|
293
289
|
image.dump(table_ann)
|
|
@@ -297,20 +293,20 @@ def embedding_in_image(dp: Image, html: List[str], categories_name_as_key: Dict[
|
|
|
297
293
|
# node.
|
|
298
294
|
html.insert(0, "<table>")
|
|
299
295
|
html.append("</table>")
|
|
300
|
-
if CellType.
|
|
296
|
+
if CellType.HEADER not in categories_name_as_key:
|
|
301
297
|
html.remove("<thead>")
|
|
302
298
|
html.remove("</thead>")
|
|
303
299
|
if "<tbody>" in html and "</tbody>" in html:
|
|
304
300
|
html.remove("<tbody>")
|
|
305
301
|
html.remove("</tbody>")
|
|
306
302
|
|
|
307
|
-
html_ann = ContainerAnnotation(category_name=TableType.
|
|
308
|
-
table_ann.dump_sub_category(TableType.
|
|
303
|
+
html_ann = ContainerAnnotation(category_name=TableType.HTML, value=html)
|
|
304
|
+
table_ann.dump_sub_category(TableType.HTML, html_ann)
|
|
309
305
|
for ann in dp.get_annotation():
|
|
310
306
|
image.dump(ann)
|
|
311
307
|
assert table_ann.image
|
|
312
308
|
table_ann.image.dump(ann)
|
|
313
|
-
table_ann.dump_relationship(Relationships.
|
|
309
|
+
table_ann.dump_relationship(Relationships.CHILD, ann.annotation_id)
|
|
314
310
|
|
|
315
311
|
return image
|
|
316
312
|
|
|
@@ -329,8 +325,8 @@ def nth_index(iterable: Iterable[str], value: str, n: int) -> Optional[int]:
|
|
|
329
325
|
|
|
330
326
|
|
|
331
327
|
def pub_to_image_uncur( # pylint: disable=R0914
|
|
332
|
-
dp:
|
|
333
|
-
categories_name_as_key:
|
|
328
|
+
dp: PubtabnetDict,
|
|
329
|
+
categories_name_as_key: dict[ObjectTypes, int],
|
|
334
330
|
load_image: bool,
|
|
335
331
|
fake_score: bool,
|
|
336
332
|
rows_and_cols: bool,
|
|
@@ -342,7 +338,7 @@ def pub_to_image_uncur( # pylint: disable=R0914
|
|
|
342
338
|
Map a datapoint of annotation structure as given in the Pubtabnet dataset to an Image structure.
|
|
343
339
|
<https://github.com/ibm-aur-nlp/PubTabNet>
|
|
344
340
|
|
|
345
|
-
:param dp: A datapoint in serialized
|
|
341
|
+
:param dp: A datapoint in serialized Pubtabnet format.
|
|
346
342
|
:param categories_name_as_key: A dict of categories, e.g. DatasetCategories.get_categories(name_as_key=True)
|
|
347
343
|
:param load_image: If `True` it will load image to `Image.image`
|
|
348
344
|
:param fake_score: If dp does not contain a score, a fake score with uniform random variables in (0,1)
|
|
@@ -407,60 +403,56 @@ def pub_to_image_uncur( # pylint: disable=R0914
|
|
|
407
403
|
|
|
408
404
|
table_ann: Optional[ImageAnnotation] = None
|
|
409
405
|
if is_fintabnet: # cannot use for synthetic table ann creation
|
|
410
|
-
table_ann = _get_table_annotation(dp, categories_name_as_key[LayoutType.
|
|
406
|
+
table_ann = _get_table_annotation(dp, categories_name_as_key[LayoutType.TABLE])
|
|
411
407
|
image.dump(table_ann)
|
|
412
408
|
|
|
413
|
-
for idx,
|
|
409
|
+
for idx, (row_col_cell_id, cell, row_span, col_span) in enumerate(
|
|
414
410
|
zip(rows_cols_cell_ids[::-1], dp["html"]["cells"][::-1], row_spans[::-1], col_spans[::-1])
|
|
415
411
|
):
|
|
416
|
-
row_col_cell_id = value[0]
|
|
417
412
|
row_number, col_number, cell_id = row_col_cell_id[0], row_col_cell_id[1], row_col_cell_id[2]
|
|
418
|
-
cell = value[1]
|
|
419
|
-
row_span = value[2]
|
|
420
|
-
col_span = value[3]
|
|
421
413
|
|
|
422
414
|
if "bbox" in cell: # empty cells have no box
|
|
423
415
|
ulx, uly, lrx, lry = list(map(float, cell["bbox"]))
|
|
424
416
|
cell_bounding_box = BoundingBox(absolute_coords=True, ulx=ulx, uly=uly, lrx=lrx, lry=lry)
|
|
425
417
|
cell_ann = ImageAnnotation(
|
|
426
|
-
category_name=LayoutType.
|
|
418
|
+
category_name=LayoutType.CELL,
|
|
427
419
|
bounding_box=cell_bounding_box,
|
|
428
|
-
category_id=categories_name_as_key[LayoutType.
|
|
420
|
+
category_id=categories_name_as_key[LayoutType.CELL],
|
|
429
421
|
score=maybe_get_fake_score(fake_score),
|
|
430
422
|
)
|
|
431
423
|
cell_ann.dump_sub_category(
|
|
432
|
-
CellType.
|
|
433
|
-
CategoryAnnotation(category_name=CellType.
|
|
424
|
+
CellType.ROW_NUMBER,
|
|
425
|
+
CategoryAnnotation(category_name=CellType.ROW_NUMBER, category_id=row_number),
|
|
434
426
|
image.image_id,
|
|
435
427
|
)
|
|
436
428
|
cell_ann.dump_sub_category(
|
|
437
|
-
CellType.
|
|
438
|
-
CategoryAnnotation(category_name=CellType.
|
|
429
|
+
CellType.COLUMN_NUMBER,
|
|
430
|
+
CategoryAnnotation(category_name=CellType.COLUMN_NUMBER, category_id=col_number),
|
|
439
431
|
image.image_id,
|
|
440
432
|
)
|
|
441
433
|
cell_ann.dump_sub_category(
|
|
442
|
-
CellType.
|
|
443
|
-
CategoryAnnotation(category_name=CellType.
|
|
434
|
+
CellType.ROW_SPAN,
|
|
435
|
+
CategoryAnnotation(category_name=CellType.ROW_SPAN, category_id=row_span), # type: ignore
|
|
444
436
|
image.image_id,
|
|
445
437
|
)
|
|
446
438
|
cell_ann.dump_sub_category(
|
|
447
|
-
CellType.
|
|
448
|
-
CategoryAnnotation(category_name=CellType.
|
|
439
|
+
CellType.COLUMN_SPAN,
|
|
440
|
+
CategoryAnnotation(category_name=CellType.COLUMN_SPAN, category_id=col_span), # type: ignore
|
|
449
441
|
image.image_id,
|
|
450
442
|
)
|
|
451
443
|
if (
|
|
452
|
-
|
|
453
|
-
or
|
|
444
|
+
cell_ann.get_sub_category(CellType.ROW_SPAN).category_id > 1
|
|
445
|
+
or cell_ann.get_sub_category(CellType.COLUMN_SPAN).category_id > 1
|
|
454
446
|
):
|
|
455
447
|
cell_ann.dump_sub_category(
|
|
456
|
-
CellType.
|
|
457
|
-
CategoryAnnotation(category_name=CellType.
|
|
448
|
+
CellType.SPANNING,
|
|
449
|
+
CategoryAnnotation(category_name=CellType.SPANNING),
|
|
458
450
|
image.image_id,
|
|
459
451
|
)
|
|
460
452
|
else:
|
|
461
453
|
cell_ann.dump_sub_category(
|
|
462
|
-
CellType.
|
|
463
|
-
CategoryAnnotation(category_name=LayoutType.
|
|
454
|
+
CellType.SPANNING,
|
|
455
|
+
CategoryAnnotation(category_name=LayoutType.CELL),
|
|
464
456
|
image.image_id,
|
|
465
457
|
)
|
|
466
458
|
|
|
@@ -468,13 +460,13 @@ def pub_to_image_uncur( # pylint: disable=R0914
|
|
|
468
460
|
max_cs = max(max_cs, col_span) # type: ignore
|
|
469
461
|
|
|
470
462
|
if _has_header:
|
|
471
|
-
category_name = CellType.
|
|
463
|
+
category_name = CellType.HEADER if cell_id <= end_of_header else CellType.BODY
|
|
472
464
|
cell_ann.dump_sub_category(
|
|
473
|
-
CellType.
|
|
465
|
+
CellType.HEADER, CategoryAnnotation(category_name=category_name), image.image_id
|
|
474
466
|
)
|
|
475
467
|
image.dump(cell_ann)
|
|
476
468
|
if table_ann is not None:
|
|
477
|
-
table_ann.dump_relationship(Relationships.
|
|
469
|
+
table_ann.dump_relationship(Relationships.CHILD, cell_ann.annotation_id)
|
|
478
470
|
|
|
479
471
|
if dd_pipe_like:
|
|
480
472
|
tokens = cell["tokens"]
|
|
@@ -484,47 +476,47 @@ def pub_to_image_uncur( # pylint: disable=R0914
|
|
|
484
476
|
text = "".join(tokens)
|
|
485
477
|
# we are not separating each word but view the full table content as one word
|
|
486
478
|
word = ImageAnnotation(
|
|
487
|
-
category_name=LayoutType.
|
|
488
|
-
category_id=categories_name_as_key[LayoutType.
|
|
479
|
+
category_name=LayoutType.WORD,
|
|
480
|
+
category_id=categories_name_as_key[LayoutType.WORD],
|
|
489
481
|
bounding_box=cell_bounding_box,
|
|
490
482
|
)
|
|
491
|
-
text_container = ContainerAnnotation(category_name=WordType.
|
|
492
|
-
word.dump_sub_category(WordType.
|
|
493
|
-
reading_order = CategoryAnnotation(category_name=Relationships.
|
|
494
|
-
word.dump_sub_category(Relationships.
|
|
483
|
+
text_container = ContainerAnnotation(category_name=WordType.CHARACTERS, value=text)
|
|
484
|
+
word.dump_sub_category(WordType.CHARACTERS, text_container)
|
|
485
|
+
reading_order = CategoryAnnotation(category_name=Relationships.READING_ORDER, category_id=1)
|
|
486
|
+
word.dump_sub_category(Relationships.READING_ORDER, reading_order)
|
|
495
487
|
image.dump(word)
|
|
496
|
-
cell_ann.dump_relationship(Relationships.
|
|
488
|
+
cell_ann.dump_relationship(Relationships.CHILD, word.annotation_id)
|
|
497
489
|
|
|
498
490
|
index = nth_index(html, "<td>", number_of_cells - idx)
|
|
499
491
|
if index:
|
|
500
492
|
html.insert(index + 1, cell_ann.annotation_id)
|
|
501
493
|
|
|
502
|
-
summary_ann =
|
|
494
|
+
summary_ann = CategoryAnnotation(category_name=SummaryType.SUMMARY)
|
|
503
495
|
summary_ann.dump_sub_category(
|
|
504
|
-
TableType.
|
|
505
|
-
CategoryAnnotation(category_name=TableType.
|
|
496
|
+
TableType.NUMBER_OF_ROWS,
|
|
497
|
+
CategoryAnnotation(category_name=TableType.NUMBER_OF_ROWS, category_id=number_of_rows),
|
|
506
498
|
image.image_id,
|
|
507
499
|
)
|
|
508
500
|
summary_ann.dump_sub_category(
|
|
509
|
-
TableType.
|
|
510
|
-
CategoryAnnotation(category_name=TableType.
|
|
501
|
+
TableType.NUMBER_OF_COLUMNS,
|
|
502
|
+
CategoryAnnotation(category_name=TableType.NUMBER_OF_COLUMNS, category_id=number_of_cols),
|
|
511
503
|
image.image_id,
|
|
512
504
|
)
|
|
513
505
|
summary_ann.dump_sub_category(
|
|
514
|
-
TableType.
|
|
515
|
-
CategoryAnnotation(category_name=TableType.
|
|
506
|
+
TableType.MAX_ROW_SPAN,
|
|
507
|
+
CategoryAnnotation(category_name=TableType.MAX_ROW_SPAN, category_id=max_rs),
|
|
516
508
|
image.image_id,
|
|
517
509
|
)
|
|
518
510
|
summary_ann.dump_sub_category(
|
|
519
|
-
TableType.
|
|
520
|
-
CategoryAnnotation(category_name=TableType.
|
|
511
|
+
TableType.MAX_COL_SPAN,
|
|
512
|
+
CategoryAnnotation(category_name=TableType.MAX_COL_SPAN, category_id=max_cs),
|
|
521
513
|
image.image_id,
|
|
522
514
|
)
|
|
523
515
|
image.summary = summary_ann
|
|
524
516
|
|
|
525
517
|
if rows_and_cols or dd_pipe_like:
|
|
526
|
-
image = _add_items(image, LayoutType.
|
|
527
|
-
image = _add_items(image, LayoutType.
|
|
518
|
+
image = _add_items(image, LayoutType.ROW, categories_name_as_key, pubtables_like)
|
|
519
|
+
image = _add_items(image, LayoutType.COLUMN, categories_name_as_key, pubtables_like)
|
|
528
520
|
|
|
529
521
|
if dd_pipe_like:
|
|
530
522
|
image = embedding_in_image(image, html, categories_name_as_key)
|
|
@@ -22,15 +22,15 @@ import os.path
|
|
|
22
22
|
from typing import Optional, Sequence, Union
|
|
23
23
|
|
|
24
24
|
import numpy as np
|
|
25
|
+
from lazy_imports import try_import
|
|
25
26
|
|
|
26
27
|
from ..datapoint.annotation import ImageAnnotation
|
|
27
28
|
from ..datapoint.image import Image
|
|
28
|
-
from ..utils.
|
|
29
|
-
from ..utils.
|
|
30
|
-
from ..utils.settings import ObjectTypes
|
|
29
|
+
from ..utils.settings import TypeOrStr
|
|
30
|
+
from ..utils.types import JsonDict
|
|
31
31
|
from .maputils import curry
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
with try_import() as import_guard:
|
|
34
34
|
from tensorflow import convert_to_tensor, uint8 # type: ignore # pylint: disable=E0401
|
|
35
35
|
from tensorflow.image import non_max_suppression # type: ignore # pylint: disable=E0401
|
|
36
36
|
|
|
@@ -39,7 +39,7 @@ if tf_available():
|
|
|
39
39
|
def image_to_tp_frcnn_training(
|
|
40
40
|
dp: Image,
|
|
41
41
|
add_mask: bool = False,
|
|
42
|
-
category_names: Optional[Union[
|
|
42
|
+
category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
43
43
|
) -> Optional[JsonDict]:
|
|
44
44
|
"""
|
|
45
45
|
Maps an image to a dict to be consumed by Tensorpack Faster-RCNN bounding box detection. Note, that the returned
|