deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (131) hide show
  1. deepdoctection/__init__.py +16 -29
  2. deepdoctection/analyzer/dd.py +70 -59
  3. deepdoctection/configs/conf_dd_one.yaml +34 -31
  4. deepdoctection/dataflow/common.py +9 -5
  5. deepdoctection/dataflow/custom.py +5 -5
  6. deepdoctection/dataflow/custom_serialize.py +75 -18
  7. deepdoctection/dataflow/parallel_map.py +3 -3
  8. deepdoctection/dataflow/serialize.py +4 -4
  9. deepdoctection/dataflow/stats.py +3 -3
  10. deepdoctection/datapoint/annotation.py +41 -56
  11. deepdoctection/datapoint/box.py +9 -8
  12. deepdoctection/datapoint/convert.py +6 -6
  13. deepdoctection/datapoint/image.py +56 -44
  14. deepdoctection/datapoint/view.py +245 -150
  15. deepdoctection/datasets/__init__.py +1 -4
  16. deepdoctection/datasets/adapter.py +35 -26
  17. deepdoctection/datasets/base.py +14 -12
  18. deepdoctection/datasets/dataflow_builder.py +3 -3
  19. deepdoctection/datasets/info.py +24 -26
  20. deepdoctection/datasets/instances/doclaynet.py +51 -51
  21. deepdoctection/datasets/instances/fintabnet.py +46 -46
  22. deepdoctection/datasets/instances/funsd.py +25 -24
  23. deepdoctection/datasets/instances/iiitar13k.py +13 -10
  24. deepdoctection/datasets/instances/layouttest.py +4 -3
  25. deepdoctection/datasets/instances/publaynet.py +5 -5
  26. deepdoctection/datasets/instances/pubtables1m.py +24 -21
  27. deepdoctection/datasets/instances/pubtabnet.py +32 -30
  28. deepdoctection/datasets/instances/rvlcdip.py +30 -30
  29. deepdoctection/datasets/instances/xfund.py +26 -26
  30. deepdoctection/datasets/save.py +6 -6
  31. deepdoctection/eval/__init__.py +1 -4
  32. deepdoctection/eval/accmetric.py +32 -33
  33. deepdoctection/eval/base.py +8 -9
  34. deepdoctection/eval/cocometric.py +15 -13
  35. deepdoctection/eval/eval.py +41 -37
  36. deepdoctection/eval/tedsmetric.py +30 -23
  37. deepdoctection/eval/tp_eval_callback.py +16 -19
  38. deepdoctection/extern/__init__.py +2 -7
  39. deepdoctection/extern/base.py +339 -134
  40. deepdoctection/extern/d2detect.py +85 -113
  41. deepdoctection/extern/deskew.py +14 -11
  42. deepdoctection/extern/doctrocr.py +141 -130
  43. deepdoctection/extern/fastlang.py +27 -18
  44. deepdoctection/extern/hfdetr.py +71 -62
  45. deepdoctection/extern/hflayoutlm.py +504 -211
  46. deepdoctection/extern/hflm.py +230 -0
  47. deepdoctection/extern/model.py +488 -302
  48. deepdoctection/extern/pdftext.py +23 -19
  49. deepdoctection/extern/pt/__init__.py +1 -3
  50. deepdoctection/extern/pt/nms.py +6 -2
  51. deepdoctection/extern/pt/ptutils.py +29 -19
  52. deepdoctection/extern/tessocr.py +39 -38
  53. deepdoctection/extern/texocr.py +18 -18
  54. deepdoctection/extern/tp/tfutils.py +57 -9
  55. deepdoctection/extern/tp/tpcompat.py +21 -14
  56. deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
  57. deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
  58. deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
  59. deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
  60. deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
  61. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
  62. deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
  63. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
  64. deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
  65. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
  66. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
  67. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
  68. deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
  69. deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
  70. deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
  71. deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
  72. deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
  73. deepdoctection/extern/tpdetect.py +45 -53
  74. deepdoctection/mapper/__init__.py +3 -8
  75. deepdoctection/mapper/cats.py +27 -29
  76. deepdoctection/mapper/cocostruct.py +10 -10
  77. deepdoctection/mapper/d2struct.py +27 -26
  78. deepdoctection/mapper/hfstruct.py +13 -8
  79. deepdoctection/mapper/laylmstruct.py +178 -37
  80. deepdoctection/mapper/maputils.py +12 -11
  81. deepdoctection/mapper/match.py +2 -2
  82. deepdoctection/mapper/misc.py +11 -9
  83. deepdoctection/mapper/pascalstruct.py +4 -4
  84. deepdoctection/mapper/prodigystruct.py +5 -5
  85. deepdoctection/mapper/pubstruct.py +84 -92
  86. deepdoctection/mapper/tpstruct.py +5 -5
  87. deepdoctection/mapper/xfundstruct.py +33 -33
  88. deepdoctection/pipe/__init__.py +1 -1
  89. deepdoctection/pipe/anngen.py +12 -14
  90. deepdoctection/pipe/base.py +52 -106
  91. deepdoctection/pipe/common.py +72 -59
  92. deepdoctection/pipe/concurrency.py +16 -11
  93. deepdoctection/pipe/doctectionpipe.py +24 -21
  94. deepdoctection/pipe/language.py +20 -25
  95. deepdoctection/pipe/layout.py +20 -16
  96. deepdoctection/pipe/lm.py +75 -105
  97. deepdoctection/pipe/order.py +194 -89
  98. deepdoctection/pipe/refine.py +111 -124
  99. deepdoctection/pipe/segment.py +156 -161
  100. deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
  101. deepdoctection/pipe/text.py +37 -36
  102. deepdoctection/pipe/transform.py +19 -16
  103. deepdoctection/train/__init__.py +6 -12
  104. deepdoctection/train/d2_frcnn_train.py +48 -41
  105. deepdoctection/train/hf_detr_train.py +41 -30
  106. deepdoctection/train/hf_layoutlm_train.py +153 -135
  107. deepdoctection/train/tp_frcnn_train.py +32 -31
  108. deepdoctection/utils/concurrency.py +1 -1
  109. deepdoctection/utils/context.py +13 -6
  110. deepdoctection/utils/develop.py +4 -4
  111. deepdoctection/utils/env_info.py +87 -125
  112. deepdoctection/utils/file_utils.py +6 -11
  113. deepdoctection/utils/fs.py +22 -18
  114. deepdoctection/utils/identifier.py +2 -2
  115. deepdoctection/utils/logger.py +16 -15
  116. deepdoctection/utils/metacfg.py +7 -7
  117. deepdoctection/utils/mocks.py +93 -0
  118. deepdoctection/utils/pdf_utils.py +11 -11
  119. deepdoctection/utils/settings.py +185 -181
  120. deepdoctection/utils/tqdm.py +1 -1
  121. deepdoctection/utils/transform.py +14 -9
  122. deepdoctection/utils/types.py +104 -0
  123. deepdoctection/utils/utils.py +7 -7
  124. deepdoctection/utils/viz.py +74 -72
  125. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
  126. deepdoctection-0.33.dist-info/RECORD +146 -0
  127. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
  128. deepdoctection/utils/detection_types.py +0 -68
  129. deepdoctection-0.31.dist-info/RECORD +0 -144
  130. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
  131. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0
@@ -19,7 +19,7 @@
19
19
  Module for matching detections according to various matching rules
20
20
  """
21
21
 
22
- from typing import Any, Literal, Optional, Sequence, Tuple, Union
22
+ from typing import Any, Literal, Optional, Sequence, Union
23
23
 
24
24
  import numpy as np
25
25
  from numpy.typing import NDArray
@@ -41,7 +41,7 @@ def match_anns_by_intersection(
41
41
  parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
42
42
  child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
43
43
  max_parent_only: bool = False,
44
- ) -> Tuple[Any, Any, Sequence[ImageAnnotation], Sequence[ImageAnnotation]]:
44
+ ) -> tuple[Any, Any, Sequence[ImageAnnotation], Sequence[ImageAnnotation]]:
45
45
  """
46
46
  Generates an iou/ioa-matrix for parent_ann_categories and child_ann_categories and returns pairs of child/parent
47
47
  indices that are above some intersection threshold. It will also return a list of all pre selected parent and child
@@ -19,19 +19,22 @@
19
19
  Module for small mapping functions
20
20
  """
21
21
 
22
+ from __future__ import annotations
23
+
22
24
  import ast
23
25
  import os
24
- from typing import List, Mapping, Optional, Sequence, Union
26
+ from typing import Mapping, Optional, Sequence, Union
27
+
28
+ from lazy_imports import try_import
25
29
 
26
30
  from ..datapoint.convert import convert_pdf_bytes_to_np_array_v2
27
31
  from ..datapoint.image import Image
28
- from ..utils.detection_types import JsonDict
29
- from ..utils.file_utils import lxml_available
30
32
  from ..utils.fs import get_load_image_func, load_image_from_file
33
+ from ..utils.types import JsonDict
31
34
  from ..utils.utils import is_file_extension
32
35
  from .maputils import MappingContextManager, curry
33
36
 
34
- if lxml_available():
37
+ with try_import() as import_guard:
35
38
  from lxml import etree # pylint: disable=W0611
36
39
 
37
40
 
@@ -132,7 +135,7 @@ def maybe_remove_image_from_category(dp: Image, category_names: Optional[Union[s
132
135
  return dp
133
136
 
134
137
 
135
- def image_ann_to_image(dp: Image, category_names: Union[str, List[str]], crop_image: bool = True) -> Image:
138
+ def image_ann_to_image(dp: Image, category_names: Union[str, list[str]], crop_image: bool = True) -> Image:
136
139
  """
137
140
  Adds `image` to annotations with given category names
138
141
 
@@ -151,7 +154,7 @@ def image_ann_to_image(dp: Image, category_names: Union[str, List[str]], crop_im
151
154
 
152
155
  @curry
153
156
  def maybe_ann_to_sub_image(
154
- dp: Image, category_names_sub_image: Union[str, List[str]], category_names: Union[str, List[str]], add_summary: bool
157
+ dp: Image, category_names_sub_image: Union[str, list[str]], category_names: Union[str, list[str]], add_summary: bool
155
158
  ) -> Image:
156
159
  """
157
160
  Assigns to sub image with given category names all annotations with given category names whose bounding box lie
@@ -175,7 +178,7 @@ def maybe_ann_to_sub_image(
175
178
 
176
179
 
177
180
  @curry
178
- def xml_to_dict(dp: JsonDict, xslt_obj: "etree.XSLT") -> JsonDict:
181
+ def xml_to_dict(dp: JsonDict, xslt_obj: etree.XSLT) -> JsonDict:
179
182
  """
180
183
  Convert a xml object into a dict using a xsl style sheet.
181
184
 
@@ -193,7 +196,6 @@ def xml_to_dict(dp: JsonDict, xslt_obj: "etree.XSLT") -> JsonDict:
193
196
  """
194
197
 
195
198
  output = str(xslt_obj(dp["xml"]))
196
- output = ast.literal_eval(output.replace('<?xml version="1.0"?>', ""))
197
199
  dp.pop("xml")
198
- dp["json"] = output
200
+ dp["json"] = ast.literal_eval(output.replace('<?xml version="1.0"?>', ""))
199
201
  return dp
@@ -20,25 +20,25 @@ Module for mapping annotations in iiitar13k style structure
20
20
  """
21
21
 
22
22
  import os
23
- from typing import Dict, Optional
23
+ from typing import Optional
24
24
 
25
25
  from ..datapoint.annotation import ImageAnnotation
26
26
  from ..datapoint.box import BoundingBox
27
27
  from ..datapoint.image import Image
28
- from ..utils.detection_types import JsonDict
29
28
  from ..utils.fs import load_image_from_file
30
29
  from ..utils.settings import get_type
30
+ from ..utils.types import JsonDict
31
31
  from .maputils import MappingContextManager, curry, maybe_get_fake_score
32
32
 
33
33
 
34
34
  @curry
35
35
  def pascal_voc_dict_to_image(
36
36
  dp: JsonDict,
37
- categories_name_as_key: Dict[str, str],
37
+ categories_name_as_key: dict[str, int],
38
38
  load_image: bool,
39
39
  filter_empty_image: bool,
40
40
  fake_score: bool,
41
- category_name_mapping: Optional[Dict[str, str]] = None,
41
+ category_name_mapping: Optional[dict[str, str]] = None,
42
42
  ) -> Optional[Image]:
43
43
  """
44
44
  Map a dataset in a structure equivalent to iiitar13k annotation style to image format
@@ -23,8 +23,8 @@ import os
23
23
  from typing import Mapping, Optional, Sequence
24
24
 
25
25
  from ..datapoint import BoundingBox, Image, ImageAnnotation
26
- from ..utils.detection_types import JsonDict, Pathlike
27
- from ..utils.settings import ObjectTypes
26
+ from ..utils.settings import ObjectTypes, get_type
27
+ from ..utils.types import JsonDict, PathLikeOrStr
28
28
  from .maputils import MappingContextManager, curry, maybe_get_fake_score
29
29
 
30
30
  _PRODIGY_IMAGE_PREFIX = "data:image/png;base64,"
@@ -33,10 +33,10 @@ _PRODIGY_IMAGE_PREFIX = "data:image/png;base64,"
33
33
  @curry
34
34
  def prodigy_to_image(
35
35
  dp: JsonDict,
36
- categories_name_as_key: Mapping[str, str],
36
+ categories_name_as_key: Mapping[ObjectTypes, int],
37
37
  load_image: bool,
38
38
  fake_score: bool,
39
- path_reference_ds: Optional[Pathlike] = None,
39
+ path_reference_ds: Optional[PathLikeOrStr] = None,
40
40
  accept_only_answer: bool = False,
41
41
  category_name_mapping: Optional[Mapping[str, str]] = None,
42
42
  ) -> Optional[Image]:
@@ -133,7 +133,7 @@ def prodigy_to_image(
133
133
  annotation = ImageAnnotation(
134
134
  category_name=label,
135
135
  bounding_box=bbox,
136
- category_id=categories_name_as_key[label],
136
+ category_id=categories_name_as_key[get_type(label)],
137
137
  score=score,
138
138
  external_id=external_id,
139
139
  )
@@ -20,17 +20,16 @@ Module for mapping annotations in pubtabnet style structure
20
20
  """
21
21
  import itertools
22
22
  import os
23
- from typing import Dict, Iterable, List, Optional, Sequence, Tuple
23
+ from typing import Iterable, Optional, Sequence
24
24
 
25
25
  import numpy as np
26
26
 
27
27
  from ..datapoint import BoundingBox, CategoryAnnotation, ContainerAnnotation, ImageAnnotation
28
- from ..datapoint.annotation import SummaryAnnotation
29
28
  from ..datapoint.convert import convert_pdf_bytes_to_np_array_v2
30
29
  from ..datapoint.image import Image
31
- from ..utils.detection_types import JsonDict
32
30
  from ..utils.fs import load_bytes_from_pdf_file, load_image_from_file
33
- from ..utils.settings import CellType, LayoutType, Relationships, TableType, WordType
31
+ from ..utils.settings import CellType, LayoutType, ObjectTypes, Relationships, SummaryType, TableType, WordType
32
+ from ..utils.types import JsonDict, PubtabnetDict
34
33
  from ..utils.utils import is_file_extension
35
34
  from .maputils import MappingContextManager, curry, maybe_get_fake_score
36
35
 
@@ -52,14 +51,14 @@ def _convert_boxes(dp: JsonDict, height: int) -> JsonDict:
52
51
  return dp
53
52
 
54
53
 
55
- def _get_table_annotation(dp: JsonDict, category_id: str) -> ImageAnnotation:
54
+ def _get_table_annotation(dp: JsonDict, category_id: int) -> ImageAnnotation:
56
55
  ulx, uly, lrx, lry = list(map(float, dp["bbox"]))
57
56
  bbox = BoundingBox(absolute_coords=True, ulx=ulx, uly=uly, lrx=lrx, lry=lry)
58
- annotation = ImageAnnotation(category_name=LayoutType.table, bounding_box=bbox, category_id=category_id)
57
+ annotation = ImageAnnotation(category_name=LayoutType.TABLE, bounding_box=bbox, category_id=category_id)
59
58
  return annotation
60
59
 
61
60
 
62
- def _cell_token(html: Sequence[str]) -> List[List[int]]:
61
+ def _cell_token(html: Sequence[str]) -> list[list[int]]:
63
62
  index_rows = [i for i, tag in enumerate(html) if tag == "<tr>"]
64
63
  index_cells = [i for i, tag in enumerate(html) if tag in ("<td>", ">")]
65
64
  index_rows_tmp = [(index_rows[i], index_rows[i + 1]) for i in range(len(index_rows) - 1)]
@@ -72,7 +71,7 @@ def _cell_token(html: Sequence[str]) -> List[List[int]]:
72
71
  return index_cells_tmp
73
72
 
74
73
 
75
- def _item_spans(html: Sequence[str], index_cells: Sequence[Sequence[int]], item: str) -> List[List[int]]:
74
+ def _item_spans(html: Sequence[str], index_cells: Sequence[Sequence[int]], item: str) -> list[list[int]]:
76
75
  item_spans = [
77
76
  [
78
77
  (
@@ -102,7 +101,7 @@ def _end_of_header(html: Sequence[str]) -> int:
102
101
  return 0
103
102
 
104
103
 
105
- def tile_table(row_spans: Sequence[Sequence[int]], col_spans: Sequence[Sequence[int]]) -> List[List[int]]:
104
+ def tile_table(row_spans: Sequence[Sequence[int]], col_spans: Sequence[Sequence[int]]) -> list[list[int]]:
106
105
  """
107
106
  Tiles a table according the row and column span scheme. A table can be represented as a list of list, where each
108
107
  inner list has the same length. Each cell with a cell id can be located according to their row and column spans in
@@ -153,28 +152,25 @@ def tile_table(row_spans: Sequence[Sequence[int]], col_spans: Sequence[Sequence[
153
152
  return tiling
154
153
 
155
154
 
156
- def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, str], pubtables_like: bool) -> Image:
157
- item_number = CellType.row_number if item_type == LayoutType.row else CellType.column_number
158
- item_span = CellType.row_span if item_type == LayoutType.row else CellType.column_span
155
+ def _add_items(
156
+ image: Image, item_type: str, categories_name_as_key: dict[ObjectTypes, int], pubtables_like: bool
157
+ ) -> Image:
158
+ item_number = CellType.ROW_NUMBER if item_type == LayoutType.ROW else CellType.COLUMN_NUMBER
159
+ item_span = CellType.ROW_SPAN if item_type == LayoutType.ROW else CellType.COLUMN_SPAN
159
160
 
160
- summary_key = TableType.number_of_rows if item_type == LayoutType.row else TableType.number_of_columns
161
+ summary_key = TableType.NUMBER_OF_ROWS if item_type == LayoutType.ROW else TableType.NUMBER_OF_COLUMNS
161
162
 
162
- number_of_items = 0
163
+ category_item = image.summary.get_sub_category(summary_key)
164
+ number_of_items = category_item.category_id
163
165
 
164
- if image.summary is not None:
165
- category_item = image.summary.get_sub_category(summary_key)
166
- number_of_items = int(category_item.category_id)
167
-
168
- cells = image.get_annotation(category_names=LayoutType.cell)
166
+ cells = image.get_annotation(category_names=LayoutType.CELL)
169
167
  table: ImageAnnotation
170
168
 
171
169
  for item_num in range(1, number_of_items + 1):
172
170
  cell_item = list(
173
- filter(
174
- lambda x: x.get_sub_category(item_number).category_id == str(item_num), cells # pylint: disable=W0640
175
- )
171
+ filter(lambda x: x.get_sub_category(item_number).category_id == item_num, cells) # pylint: disable=W0640
176
172
  )
177
- cell_item = list(filter(lambda x: x.get_sub_category(item_span).category_id == "1", cell_item))
173
+ cell_item = list(filter(lambda x: x.get_sub_category(item_span).category_id == 1, cell_item))
178
174
  if cell_item:
179
175
  ulx = min(cell.bounding_box.ulx for cell in cell_item if isinstance(cell.bounding_box, BoundingBox))
180
176
 
@@ -185,12 +181,12 @@ def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, s
185
181
  lry = max(cell.bounding_box.lry for cell in cell_item if isinstance(cell.bounding_box, BoundingBox))
186
182
 
187
183
  if pubtables_like:
188
- tables = image.get_annotation(category_names=LayoutType.table)
184
+ tables = image.get_annotation(category_names=LayoutType.TABLE)
189
185
  if not tables:
190
186
  raise ValueError("pubtables_like = True requires table")
191
187
  table = tables[0]
192
188
 
193
- if item_type == LayoutType.row:
189
+ if item_type == LayoutType.ROW:
194
190
  if table.bounding_box:
195
191
  ulx = table.bounding_box.ulx + 1.0
196
192
  lrx = table.bounding_box.lrx - 1.0
@@ -200,22 +196,22 @@ def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, s
200
196
  lry = table.bounding_box.lry - 1.0
201
197
 
202
198
  item_ann = ImageAnnotation(
203
- category_id=categories_name_as_key[TableType.item],
204
- category_name=TableType.item,
199
+ category_id=categories_name_as_key[TableType.ITEM],
200
+ category_name=TableType.ITEM,
205
201
  bounding_box=BoundingBox(absolute_coords=True, ulx=ulx, uly=uly, lrx=lrx, lry=lry),
206
202
  )
207
203
  item_sub_ann = CategoryAnnotation(category_name=item_type)
208
- item_ann.dump_sub_category(TableType.item, item_sub_ann, image.image_id)
204
+ item_ann.dump_sub_category(TableType.ITEM, item_sub_ann, image.image_id)
209
205
  image.dump(item_ann)
210
206
 
211
207
  if pubtables_like: # pubtables_like:
212
- items = image.get_annotation(category_names=TableType.item)
213
- item_type_anns = [ann for ann in items if ann.get_sub_category(TableType.item).category_name == item_type]
208
+ items = image.get_annotation(category_names=TableType.ITEM)
209
+ item_type_anns = [ann for ann in items if ann.get_sub_category(TableType.ITEM).category_name == item_type]
214
210
  item_type_anns.sort(
215
- key=lambda x: (x.bounding_box.cx if item_type == LayoutType.column else x.bounding_box.cy) # type: ignore
211
+ key=lambda x: (x.bounding_box.cx if item_type == LayoutType.COLUMN else x.bounding_box.cy) # type: ignore
216
212
  )
217
213
  if table.bounding_box:
218
- tmp_item_xy = table.bounding_box.uly + 1.0 if item_type == LayoutType.row else table.bounding_box.ulx + 1.0
214
+ tmp_item_xy = table.bounding_box.uly + 1.0 if item_type == LayoutType.ROW else table.bounding_box.ulx + 1.0
219
215
  for idx, item in enumerate(item_type_anns):
220
216
  with MappingContextManager(
221
217
  dp_name=image.file_name,
@@ -230,22 +226,22 @@ def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, s
230
226
  if next_box:
231
227
  tmp_next_item_xy = (
232
228
  (box.lry + next_box.uly) / 2
233
- if item_type == LayoutType.row
229
+ if item_type == LayoutType.ROW
234
230
  else (box.lrx + next_box.ulx) / 2
235
231
  )
236
232
  else:
237
233
  if table.bounding_box:
238
234
  tmp_next_item_xy = (
239
235
  table.bounding_box.lry - 1.0
240
- if item_type == LayoutType.row
236
+ if item_type == LayoutType.ROW
241
237
  else table.bounding_box.lrx - 1.0
242
238
  )
243
239
 
244
240
  new_embedding_box = BoundingBox(
245
- ulx=box.ulx if item_type == LayoutType.row else tmp_item_xy,
246
- uly=tmp_item_xy if item_type == LayoutType.row else box.uly,
247
- lrx=box.lrx if item_type == LayoutType.row else tmp_next_item_xy,
248
- lry=tmp_next_item_xy if item_type == LayoutType.row else box.lry,
241
+ ulx=box.ulx if item_type == LayoutType.ROW else tmp_item_xy,
242
+ uly=tmp_item_xy if item_type == LayoutType.ROW else box.uly,
243
+ lrx=box.lrx if item_type == LayoutType.ROW else tmp_next_item_xy,
244
+ lry=tmp_next_item_xy if item_type == LayoutType.ROW else box.lry,
249
245
  absolute_coords=True,
250
246
  )
251
247
  item.bounding_box = new_embedding_box
@@ -255,7 +251,7 @@ def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, s
255
251
  return image
256
252
 
257
253
 
258
- def row_col_cell_ids(tiling: List[List[int]]) -> List[Tuple[int, int, int]]:
254
+ def row_col_cell_ids(tiling: list[list[int]]) -> list[tuple[int, int, int]]:
259
255
  """
260
256
  Infers absolute rows and columns for every cell from the tiling of a table.
261
257
 
@@ -271,7 +267,7 @@ def row_col_cell_ids(tiling: List[List[int]]) -> List[Tuple[int, int, int]]:
271
267
  return rows_col_cell_ids
272
268
 
273
269
 
274
- def embedding_in_image(dp: Image, html: List[str], categories_name_as_key: Dict[str, str]) -> Image:
270
+ def embedding_in_image(dp: Image, html: list[str], categories_name_as_key: dict[ObjectTypes, int]) -> Image:
275
271
  """
276
272
  Generating an image, that resembles the output of an analyzer. The layout of the image is table spanning
277
273
  the full page, i.e. there is one table image annotation. Moreover, the table annotation has an image, with cells
@@ -286,8 +282,8 @@ def embedding_in_image(dp: Image, html: List[str], categories_name_as_key: Dict[
286
282
  image.image = dp.image
287
283
  image.set_width_height(dp.width, dp.height)
288
284
  table_ann = ImageAnnotation(
289
- category_name=LayoutType.table,
290
- category_id=categories_name_as_key[LayoutType.table],
285
+ category_name=LayoutType.TABLE,
286
+ category_id=categories_name_as_key[LayoutType.TABLE],
291
287
  bounding_box=BoundingBox(absolute_coords=True, ulx=0.0, uly=0.0, lrx=dp.width, lry=dp.height),
292
288
  )
293
289
  image.dump(table_ann)
@@ -297,20 +293,20 @@ def embedding_in_image(dp: Image, html: List[str], categories_name_as_key: Dict[
297
293
  # node.
298
294
  html.insert(0, "<table>")
299
295
  html.append("</table>")
300
- if CellType.header not in categories_name_as_key:
296
+ if CellType.HEADER not in categories_name_as_key:
301
297
  html.remove("<thead>")
302
298
  html.remove("</thead>")
303
299
  if "<tbody>" in html and "</tbody>" in html:
304
300
  html.remove("<tbody>")
305
301
  html.remove("</tbody>")
306
302
 
307
- html_ann = ContainerAnnotation(category_name=TableType.html, value=html)
308
- table_ann.dump_sub_category(TableType.html, html_ann)
303
+ html_ann = ContainerAnnotation(category_name=TableType.HTML, value=html)
304
+ table_ann.dump_sub_category(TableType.HTML, html_ann)
309
305
  for ann in dp.get_annotation():
310
306
  image.dump(ann)
311
307
  assert table_ann.image
312
308
  table_ann.image.dump(ann)
313
- table_ann.dump_relationship(Relationships.child, ann.annotation_id)
309
+ table_ann.dump_relationship(Relationships.CHILD, ann.annotation_id)
314
310
 
315
311
  return image
316
312
 
@@ -329,8 +325,8 @@ def nth_index(iterable: Iterable[str], value: str, n: int) -> Optional[int]:
329
325
 
330
326
 
331
327
  def pub_to_image_uncur( # pylint: disable=R0914
332
- dp: JsonDict,
333
- categories_name_as_key: Dict[str, str],
328
+ dp: PubtabnetDict,
329
+ categories_name_as_key: dict[ObjectTypes, int],
334
330
  load_image: bool,
335
331
  fake_score: bool,
336
332
  rows_and_cols: bool,
@@ -342,7 +338,7 @@ def pub_to_image_uncur( # pylint: disable=R0914
342
338
  Map a datapoint of annotation structure as given in the Pubtabnet dataset to an Image structure.
343
339
  <https://github.com/ibm-aur-nlp/PubTabNet>
344
340
 
345
- :param dp: A datapoint in serialized coco format.
341
+ :param dp: A datapoint in serialized Pubtabnet format.
346
342
  :param categories_name_as_key: A dict of categories, e.g. DatasetCategories.get_categories(name_as_key=True)
347
343
  :param load_image: If `True` it will load image to `Image.image`
348
344
  :param fake_score: If dp does not contain a score, a fake score with uniform random variables in (0,1)
@@ -407,60 +403,56 @@ def pub_to_image_uncur( # pylint: disable=R0914
407
403
 
408
404
  table_ann: Optional[ImageAnnotation] = None
409
405
  if is_fintabnet: # cannot use for synthetic table ann creation
410
- table_ann = _get_table_annotation(dp, categories_name_as_key[LayoutType.table])
406
+ table_ann = _get_table_annotation(dp, categories_name_as_key[LayoutType.TABLE])
411
407
  image.dump(table_ann)
412
408
 
413
- for idx, value in enumerate(
409
+ for idx, (row_col_cell_id, cell, row_span, col_span) in enumerate(
414
410
  zip(rows_cols_cell_ids[::-1], dp["html"]["cells"][::-1], row_spans[::-1], col_spans[::-1])
415
411
  ):
416
- row_col_cell_id = value[0]
417
412
  row_number, col_number, cell_id = row_col_cell_id[0], row_col_cell_id[1], row_col_cell_id[2]
418
- cell = value[1]
419
- row_span = value[2]
420
- col_span = value[3]
421
413
 
422
414
  if "bbox" in cell: # empty cells have no box
423
415
  ulx, uly, lrx, lry = list(map(float, cell["bbox"]))
424
416
  cell_bounding_box = BoundingBox(absolute_coords=True, ulx=ulx, uly=uly, lrx=lrx, lry=lry)
425
417
  cell_ann = ImageAnnotation(
426
- category_name=LayoutType.cell,
418
+ category_name=LayoutType.CELL,
427
419
  bounding_box=cell_bounding_box,
428
- category_id=categories_name_as_key[LayoutType.cell],
420
+ category_id=categories_name_as_key[LayoutType.CELL],
429
421
  score=maybe_get_fake_score(fake_score),
430
422
  )
431
423
  cell_ann.dump_sub_category(
432
- CellType.row_number,
433
- CategoryAnnotation(category_name=CellType.row_number, category_id=str(row_number)),
424
+ CellType.ROW_NUMBER,
425
+ CategoryAnnotation(category_name=CellType.ROW_NUMBER, category_id=row_number),
434
426
  image.image_id,
435
427
  )
436
428
  cell_ann.dump_sub_category(
437
- CellType.column_number,
438
- CategoryAnnotation(category_name=CellType.column_number, category_id=str(col_number)),
429
+ CellType.COLUMN_NUMBER,
430
+ CategoryAnnotation(category_name=CellType.COLUMN_NUMBER, category_id=col_number),
439
431
  image.image_id,
440
432
  )
441
433
  cell_ann.dump_sub_category(
442
- CellType.row_span,
443
- CategoryAnnotation(category_name=CellType.row_span, category_id=str(row_span)),
434
+ CellType.ROW_SPAN,
435
+ CategoryAnnotation(category_name=CellType.ROW_SPAN, category_id=row_span), # type: ignore
444
436
  image.image_id,
445
437
  )
446
438
  cell_ann.dump_sub_category(
447
- CellType.column_span,
448
- CategoryAnnotation(category_name=CellType.column_span, category_id=str(col_span)),
439
+ CellType.COLUMN_SPAN,
440
+ CategoryAnnotation(category_name=CellType.COLUMN_SPAN, category_id=col_span), # type: ignore
449
441
  image.image_id,
450
442
  )
451
443
  if (
452
- int(cell_ann.get_sub_category(CellType.row_span).category_id) > 1
453
- or int(cell_ann.get_sub_category(CellType.column_span).category_id) > 1
444
+ cell_ann.get_sub_category(CellType.ROW_SPAN).category_id > 1
445
+ or cell_ann.get_sub_category(CellType.COLUMN_SPAN).category_id > 1
454
446
  ):
455
447
  cell_ann.dump_sub_category(
456
- CellType.spanning,
457
- CategoryAnnotation(category_name=CellType.spanning),
448
+ CellType.SPANNING,
449
+ CategoryAnnotation(category_name=CellType.SPANNING),
458
450
  image.image_id,
459
451
  )
460
452
  else:
461
453
  cell_ann.dump_sub_category(
462
- CellType.spanning,
463
- CategoryAnnotation(category_name=LayoutType.cell),
454
+ CellType.SPANNING,
455
+ CategoryAnnotation(category_name=LayoutType.CELL),
464
456
  image.image_id,
465
457
  )
466
458
 
@@ -468,13 +460,13 @@ def pub_to_image_uncur( # pylint: disable=R0914
468
460
  max_cs = max(max_cs, col_span) # type: ignore
469
461
 
470
462
  if _has_header:
471
- category_name = CellType.header if cell_id <= end_of_header else CellType.body
463
+ category_name = CellType.HEADER if cell_id <= end_of_header else CellType.BODY
472
464
  cell_ann.dump_sub_category(
473
- CellType.header, CategoryAnnotation(category_name=category_name), image.image_id
465
+ CellType.HEADER, CategoryAnnotation(category_name=category_name), image.image_id
474
466
  )
475
467
  image.dump(cell_ann)
476
468
  if table_ann is not None:
477
- table_ann.dump_relationship(Relationships.child, cell_ann.annotation_id)
469
+ table_ann.dump_relationship(Relationships.CHILD, cell_ann.annotation_id)
478
470
 
479
471
  if dd_pipe_like:
480
472
  tokens = cell["tokens"]
@@ -484,47 +476,47 @@ def pub_to_image_uncur( # pylint: disable=R0914
484
476
  text = "".join(tokens)
485
477
  # we are not separating each word but view the full table content as one word
486
478
  word = ImageAnnotation(
487
- category_name=LayoutType.word,
488
- category_id=categories_name_as_key[LayoutType.word],
479
+ category_name=LayoutType.WORD,
480
+ category_id=categories_name_as_key[LayoutType.WORD],
489
481
  bounding_box=cell_bounding_box,
490
482
  )
491
- text_container = ContainerAnnotation(category_name=WordType.characters, value=text)
492
- word.dump_sub_category(WordType.characters, text_container)
493
- reading_order = CategoryAnnotation(category_name=Relationships.reading_order, category_id="1")
494
- word.dump_sub_category(Relationships.reading_order, reading_order)
483
+ text_container = ContainerAnnotation(category_name=WordType.CHARACTERS, value=text)
484
+ word.dump_sub_category(WordType.CHARACTERS, text_container)
485
+ reading_order = CategoryAnnotation(category_name=Relationships.READING_ORDER, category_id=1)
486
+ word.dump_sub_category(Relationships.READING_ORDER, reading_order)
495
487
  image.dump(word)
496
- cell_ann.dump_relationship(Relationships.child, word.annotation_id)
488
+ cell_ann.dump_relationship(Relationships.CHILD, word.annotation_id)
497
489
 
498
490
  index = nth_index(html, "<td>", number_of_cells - idx)
499
491
  if index:
500
492
  html.insert(index + 1, cell_ann.annotation_id)
501
493
 
502
- summary_ann = SummaryAnnotation(external_id=image.image_id + "SUMMARY")
494
+ summary_ann = CategoryAnnotation(category_name=SummaryType.SUMMARY)
503
495
  summary_ann.dump_sub_category(
504
- TableType.number_of_rows,
505
- CategoryAnnotation(category_name=TableType.number_of_rows, category_id=str(number_of_rows)),
496
+ TableType.NUMBER_OF_ROWS,
497
+ CategoryAnnotation(category_name=TableType.NUMBER_OF_ROWS, category_id=number_of_rows),
506
498
  image.image_id,
507
499
  )
508
500
  summary_ann.dump_sub_category(
509
- TableType.number_of_columns,
510
- CategoryAnnotation(category_name=TableType.number_of_columns, category_id=str(number_of_cols)),
501
+ TableType.NUMBER_OF_COLUMNS,
502
+ CategoryAnnotation(category_name=TableType.NUMBER_OF_COLUMNS, category_id=number_of_cols),
511
503
  image.image_id,
512
504
  )
513
505
  summary_ann.dump_sub_category(
514
- TableType.max_row_span,
515
- CategoryAnnotation(category_name=TableType.max_row_span, category_id=str(max_rs)),
506
+ TableType.MAX_ROW_SPAN,
507
+ CategoryAnnotation(category_name=TableType.MAX_ROW_SPAN, category_id=max_rs),
516
508
  image.image_id,
517
509
  )
518
510
  summary_ann.dump_sub_category(
519
- TableType.max_col_span,
520
- CategoryAnnotation(category_name=TableType.max_col_span, category_id=str(max_cs)),
511
+ TableType.MAX_COL_SPAN,
512
+ CategoryAnnotation(category_name=TableType.MAX_COL_SPAN, category_id=max_cs),
521
513
  image.image_id,
522
514
  )
523
515
  image.summary = summary_ann
524
516
 
525
517
  if rows_and_cols or dd_pipe_like:
526
- image = _add_items(image, LayoutType.row, categories_name_as_key, pubtables_like)
527
- image = _add_items(image, LayoutType.column, categories_name_as_key, pubtables_like)
518
+ image = _add_items(image, LayoutType.ROW, categories_name_as_key, pubtables_like)
519
+ image = _add_items(image, LayoutType.COLUMN, categories_name_as_key, pubtables_like)
528
520
 
529
521
  if dd_pipe_like:
530
522
  image = embedding_in_image(image, html, categories_name_as_key)
@@ -22,15 +22,15 @@ import os.path
22
22
  from typing import Optional, Sequence, Union
23
23
 
24
24
  import numpy as np
25
+ from lazy_imports import try_import
25
26
 
26
27
  from ..datapoint.annotation import ImageAnnotation
27
28
  from ..datapoint.image import Image
28
- from ..utils.detection_types import JsonDict
29
- from ..utils.file_utils import tf_available
30
- from ..utils.settings import ObjectTypes
29
+ from ..utils.settings import TypeOrStr
30
+ from ..utils.types import JsonDict
31
31
  from .maputils import curry
32
32
 
33
- if tf_available():
33
+ with try_import() as import_guard:
34
34
  from tensorflow import convert_to_tensor, uint8 # type: ignore # pylint: disable=E0401
35
35
  from tensorflow.image import non_max_suppression # type: ignore # pylint: disable=E0401
36
36
 
@@ -39,7 +39,7 @@ if tf_available():
39
39
  def image_to_tp_frcnn_training(
40
40
  dp: Image,
41
41
  add_mask: bool = False,
42
- category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
42
+ category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
43
43
  ) -> Optional[JsonDict]:
44
44
  """
45
45
  Maps an image to a dict to be consumed by Tensorpack Faster-RCNN bounding box detection. Note, that the returned