deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (111) hide show
  1. deepdoctection/__init__.py +8 -25
  2. deepdoctection/analyzer/dd.py +84 -71
  3. deepdoctection/dataflow/common.py +9 -5
  4. deepdoctection/dataflow/custom.py +5 -5
  5. deepdoctection/dataflow/custom_serialize.py +75 -18
  6. deepdoctection/dataflow/parallel_map.py +3 -3
  7. deepdoctection/dataflow/serialize.py +4 -4
  8. deepdoctection/dataflow/stats.py +3 -3
  9. deepdoctection/datapoint/annotation.py +78 -56
  10. deepdoctection/datapoint/box.py +7 -7
  11. deepdoctection/datapoint/convert.py +6 -6
  12. deepdoctection/datapoint/image.py +157 -75
  13. deepdoctection/datapoint/view.py +175 -151
  14. deepdoctection/datasets/adapter.py +30 -24
  15. deepdoctection/datasets/base.py +10 -10
  16. deepdoctection/datasets/dataflow_builder.py +3 -3
  17. deepdoctection/datasets/info.py +23 -25
  18. deepdoctection/datasets/instances/doclaynet.py +48 -49
  19. deepdoctection/datasets/instances/fintabnet.py +44 -45
  20. deepdoctection/datasets/instances/funsd.py +23 -23
  21. deepdoctection/datasets/instances/iiitar13k.py +8 -8
  22. deepdoctection/datasets/instances/layouttest.py +2 -2
  23. deepdoctection/datasets/instances/publaynet.py +3 -3
  24. deepdoctection/datasets/instances/pubtables1m.py +18 -18
  25. deepdoctection/datasets/instances/pubtabnet.py +30 -29
  26. deepdoctection/datasets/instances/rvlcdip.py +28 -29
  27. deepdoctection/datasets/instances/xfund.py +51 -30
  28. deepdoctection/datasets/save.py +6 -6
  29. deepdoctection/eval/accmetric.py +32 -33
  30. deepdoctection/eval/base.py +8 -9
  31. deepdoctection/eval/cocometric.py +13 -12
  32. deepdoctection/eval/eval.py +32 -26
  33. deepdoctection/eval/tedsmetric.py +16 -12
  34. deepdoctection/eval/tp_eval_callback.py +7 -16
  35. deepdoctection/extern/base.py +339 -134
  36. deepdoctection/extern/d2detect.py +69 -89
  37. deepdoctection/extern/deskew.py +11 -10
  38. deepdoctection/extern/doctrocr.py +81 -64
  39. deepdoctection/extern/fastlang.py +23 -16
  40. deepdoctection/extern/hfdetr.py +53 -38
  41. deepdoctection/extern/hflayoutlm.py +216 -155
  42. deepdoctection/extern/hflm.py +35 -30
  43. deepdoctection/extern/model.py +433 -255
  44. deepdoctection/extern/pdftext.py +15 -15
  45. deepdoctection/extern/pt/ptutils.py +4 -2
  46. deepdoctection/extern/tessocr.py +39 -38
  47. deepdoctection/extern/texocr.py +14 -16
  48. deepdoctection/extern/tp/tfutils.py +16 -2
  49. deepdoctection/extern/tp/tpcompat.py +11 -7
  50. deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
  51. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
  52. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
  53. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
  54. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
  55. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
  56. deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
  57. deepdoctection/extern/tpdetect.py +40 -45
  58. deepdoctection/mapper/cats.py +36 -40
  59. deepdoctection/mapper/cocostruct.py +16 -12
  60. deepdoctection/mapper/d2struct.py +22 -22
  61. deepdoctection/mapper/hfstruct.py +7 -7
  62. deepdoctection/mapper/laylmstruct.py +22 -24
  63. deepdoctection/mapper/maputils.py +9 -10
  64. deepdoctection/mapper/match.py +33 -2
  65. deepdoctection/mapper/misc.py +6 -7
  66. deepdoctection/mapper/pascalstruct.py +4 -4
  67. deepdoctection/mapper/prodigystruct.py +6 -6
  68. deepdoctection/mapper/pubstruct.py +84 -92
  69. deepdoctection/mapper/tpstruct.py +3 -3
  70. deepdoctection/mapper/xfundstruct.py +33 -33
  71. deepdoctection/pipe/anngen.py +39 -14
  72. deepdoctection/pipe/base.py +68 -99
  73. deepdoctection/pipe/common.py +181 -85
  74. deepdoctection/pipe/concurrency.py +14 -10
  75. deepdoctection/pipe/doctectionpipe.py +24 -21
  76. deepdoctection/pipe/language.py +20 -25
  77. deepdoctection/pipe/layout.py +18 -16
  78. deepdoctection/pipe/lm.py +49 -47
  79. deepdoctection/pipe/order.py +63 -65
  80. deepdoctection/pipe/refine.py +102 -109
  81. deepdoctection/pipe/segment.py +157 -162
  82. deepdoctection/pipe/sub_layout.py +50 -40
  83. deepdoctection/pipe/text.py +37 -36
  84. deepdoctection/pipe/transform.py +19 -16
  85. deepdoctection/train/d2_frcnn_train.py +27 -25
  86. deepdoctection/train/hf_detr_train.py +22 -18
  87. deepdoctection/train/hf_layoutlm_train.py +49 -48
  88. deepdoctection/train/tp_frcnn_train.py +10 -11
  89. deepdoctection/utils/concurrency.py +1 -1
  90. deepdoctection/utils/context.py +13 -6
  91. deepdoctection/utils/develop.py +4 -4
  92. deepdoctection/utils/env_info.py +52 -14
  93. deepdoctection/utils/file_utils.py +6 -11
  94. deepdoctection/utils/fs.py +41 -14
  95. deepdoctection/utils/identifier.py +2 -2
  96. deepdoctection/utils/logger.py +15 -15
  97. deepdoctection/utils/metacfg.py +7 -7
  98. deepdoctection/utils/pdf_utils.py +39 -14
  99. deepdoctection/utils/settings.py +188 -182
  100. deepdoctection/utils/tqdm.py +1 -1
  101. deepdoctection/utils/transform.py +14 -9
  102. deepdoctection/utils/types.py +104 -0
  103. deepdoctection/utils/utils.py +7 -7
  104. deepdoctection/utils/viz.py +70 -69
  105. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
  106. deepdoctection-0.34.dist-info/RECORD +146 -0
  107. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
  108. deepdoctection/utils/detection_types.py +0 -68
  109. deepdoctection-0.32.dist-info/RECORD +0 -146
  110. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
  111. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0
@@ -20,17 +20,16 @@ Module for mapping annotations in pubtabnet style structure
20
20
  """
21
21
  import itertools
22
22
  import os
23
- from typing import Dict, Iterable, List, Optional, Sequence, Tuple
23
+ from typing import Iterable, Optional, Sequence
24
24
 
25
25
  import numpy as np
26
26
 
27
27
  from ..datapoint import BoundingBox, CategoryAnnotation, ContainerAnnotation, ImageAnnotation
28
- from ..datapoint.annotation import SummaryAnnotation
29
28
  from ..datapoint.convert import convert_pdf_bytes_to_np_array_v2
30
29
  from ..datapoint.image import Image
31
- from ..utils.detection_types import JsonDict
32
30
  from ..utils.fs import load_bytes_from_pdf_file, load_image_from_file
33
- from ..utils.settings import CellType, LayoutType, Relationships, TableType, WordType
31
+ from ..utils.settings import CellType, LayoutType, ObjectTypes, Relationships, SummaryType, TableType, WordType
32
+ from ..utils.types import JsonDict, PubtabnetDict
34
33
  from ..utils.utils import is_file_extension
35
34
  from .maputils import MappingContextManager, curry, maybe_get_fake_score
36
35
 
@@ -52,14 +51,14 @@ def _convert_boxes(dp: JsonDict, height: int) -> JsonDict:
52
51
  return dp
53
52
 
54
53
 
55
- def _get_table_annotation(dp: JsonDict, category_id: str) -> ImageAnnotation:
54
+ def _get_table_annotation(dp: JsonDict, category_id: int) -> ImageAnnotation:
56
55
  ulx, uly, lrx, lry = list(map(float, dp["bbox"]))
57
56
  bbox = BoundingBox(absolute_coords=True, ulx=ulx, uly=uly, lrx=lrx, lry=lry)
58
- annotation = ImageAnnotation(category_name=LayoutType.table, bounding_box=bbox, category_id=category_id)
57
+ annotation = ImageAnnotation(category_name=LayoutType.TABLE, bounding_box=bbox, category_id=category_id)
59
58
  return annotation
60
59
 
61
60
 
62
- def _cell_token(html: Sequence[str]) -> List[List[int]]:
61
+ def _cell_token(html: Sequence[str]) -> list[list[int]]:
63
62
  index_rows = [i for i, tag in enumerate(html) if tag == "<tr>"]
64
63
  index_cells = [i for i, tag in enumerate(html) if tag in ("<td>", ">")]
65
64
  index_rows_tmp = [(index_rows[i], index_rows[i + 1]) for i in range(len(index_rows) - 1)]
@@ -72,7 +71,7 @@ def _cell_token(html: Sequence[str]) -> List[List[int]]:
72
71
  return index_cells_tmp
73
72
 
74
73
 
75
- def _item_spans(html: Sequence[str], index_cells: Sequence[Sequence[int]], item: str) -> List[List[int]]:
74
+ def _item_spans(html: Sequence[str], index_cells: Sequence[Sequence[int]], item: str) -> list[list[int]]:
76
75
  item_spans = [
77
76
  [
78
77
  (
@@ -102,7 +101,7 @@ def _end_of_header(html: Sequence[str]) -> int:
102
101
  return 0
103
102
 
104
103
 
105
- def tile_table(row_spans: Sequence[Sequence[int]], col_spans: Sequence[Sequence[int]]) -> List[List[int]]:
104
+ def tile_table(row_spans: Sequence[Sequence[int]], col_spans: Sequence[Sequence[int]]) -> list[list[int]]:
106
105
  """
107
106
  Tiles a table according the row and column span scheme. A table can be represented as a list of list, where each
108
107
  inner list has the same length. Each cell with a cell id can be located according to their row and column spans in
@@ -153,28 +152,25 @@ def tile_table(row_spans: Sequence[Sequence[int]], col_spans: Sequence[Sequence[
153
152
  return tiling
154
153
 
155
154
 
156
- def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, str], pubtables_like: bool) -> Image:
157
- item_number = CellType.row_number if item_type == LayoutType.row else CellType.column_number
158
- item_span = CellType.row_span if item_type == LayoutType.row else CellType.column_span
155
+ def _add_items(
156
+ image: Image, item_type: str, categories_name_as_key: dict[ObjectTypes, int], pubtables_like: bool
157
+ ) -> Image:
158
+ item_number = CellType.ROW_NUMBER if item_type == LayoutType.ROW else CellType.COLUMN_NUMBER
159
+ item_span = CellType.ROW_SPAN if item_type == LayoutType.ROW else CellType.COLUMN_SPAN
159
160
 
160
- summary_key = TableType.number_of_rows if item_type == LayoutType.row else TableType.number_of_columns
161
+ summary_key = TableType.NUMBER_OF_ROWS if item_type == LayoutType.ROW else TableType.NUMBER_OF_COLUMNS
161
162
 
162
- number_of_items = 0
163
+ category_item = image.summary.get_sub_category(summary_key)
164
+ number_of_items = category_item.category_id
163
165
 
164
- if image.summary is not None:
165
- category_item = image.summary.get_sub_category(summary_key)
166
- number_of_items = int(category_item.category_id)
167
-
168
- cells = image.get_annotation(category_names=LayoutType.cell)
166
+ cells = image.get_annotation(category_names=LayoutType.CELL)
169
167
  table: ImageAnnotation
170
168
 
171
169
  for item_num in range(1, number_of_items + 1):
172
170
  cell_item = list(
173
- filter(
174
- lambda x: x.get_sub_category(item_number).category_id == str(item_num), cells # pylint: disable=W0640
175
- )
171
+ filter(lambda x: x.get_sub_category(item_number).category_id == item_num, cells) # pylint: disable=W0640
176
172
  )
177
- cell_item = list(filter(lambda x: x.get_sub_category(item_span).category_id == "1", cell_item))
173
+ cell_item = list(filter(lambda x: x.get_sub_category(item_span).category_id == 1, cell_item))
178
174
  if cell_item:
179
175
  ulx = min(cell.bounding_box.ulx for cell in cell_item if isinstance(cell.bounding_box, BoundingBox))
180
176
 
@@ -185,12 +181,12 @@ def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, s
185
181
  lry = max(cell.bounding_box.lry for cell in cell_item if isinstance(cell.bounding_box, BoundingBox))
186
182
 
187
183
  if pubtables_like:
188
- tables = image.get_annotation(category_names=LayoutType.table)
184
+ tables = image.get_annotation(category_names=LayoutType.TABLE)
189
185
  if not tables:
190
186
  raise ValueError("pubtables_like = True requires table")
191
187
  table = tables[0]
192
188
 
193
- if item_type == LayoutType.row:
189
+ if item_type == LayoutType.ROW:
194
190
  if table.bounding_box:
195
191
  ulx = table.bounding_box.ulx + 1.0
196
192
  lrx = table.bounding_box.lrx - 1.0
@@ -200,22 +196,22 @@ def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, s
200
196
  lry = table.bounding_box.lry - 1.0
201
197
 
202
198
  item_ann = ImageAnnotation(
203
- category_id=categories_name_as_key[TableType.item],
204
- category_name=TableType.item,
199
+ category_id=categories_name_as_key[TableType.ITEM],
200
+ category_name=TableType.ITEM,
205
201
  bounding_box=BoundingBox(absolute_coords=True, ulx=ulx, uly=uly, lrx=lrx, lry=lry),
206
202
  )
207
203
  item_sub_ann = CategoryAnnotation(category_name=item_type)
208
- item_ann.dump_sub_category(TableType.item, item_sub_ann, image.image_id)
204
+ item_ann.dump_sub_category(TableType.ITEM, item_sub_ann, image.image_id)
209
205
  image.dump(item_ann)
210
206
 
211
207
  if pubtables_like: # pubtables_like:
212
- items = image.get_annotation(category_names=TableType.item)
213
- item_type_anns = [ann for ann in items if ann.get_sub_category(TableType.item).category_name == item_type]
208
+ items = image.get_annotation(category_names=TableType.ITEM)
209
+ item_type_anns = [ann for ann in items if ann.get_sub_category(TableType.ITEM).category_name == item_type]
214
210
  item_type_anns.sort(
215
- key=lambda x: (x.bounding_box.cx if item_type == LayoutType.column else x.bounding_box.cy) # type: ignore
211
+ key=lambda x: (x.bounding_box.cx if item_type == LayoutType.COLUMN else x.bounding_box.cy) # type: ignore
216
212
  )
217
213
  if table.bounding_box:
218
- tmp_item_xy = table.bounding_box.uly + 1.0 if item_type == LayoutType.row else table.bounding_box.ulx + 1.0
214
+ tmp_item_xy = table.bounding_box.uly + 1.0 if item_type == LayoutType.ROW else table.bounding_box.ulx + 1.0
219
215
  for idx, item in enumerate(item_type_anns):
220
216
  with MappingContextManager(
221
217
  dp_name=image.file_name,
@@ -230,22 +226,22 @@ def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, s
230
226
  if next_box:
231
227
  tmp_next_item_xy = (
232
228
  (box.lry + next_box.uly) / 2
233
- if item_type == LayoutType.row
229
+ if item_type == LayoutType.ROW
234
230
  else (box.lrx + next_box.ulx) / 2
235
231
  )
236
232
  else:
237
233
  if table.bounding_box:
238
234
  tmp_next_item_xy = (
239
235
  table.bounding_box.lry - 1.0
240
- if item_type == LayoutType.row
236
+ if item_type == LayoutType.ROW
241
237
  else table.bounding_box.lrx - 1.0
242
238
  )
243
239
 
244
240
  new_embedding_box = BoundingBox(
245
- ulx=box.ulx if item_type == LayoutType.row else tmp_item_xy,
246
- uly=tmp_item_xy if item_type == LayoutType.row else box.uly,
247
- lrx=box.lrx if item_type == LayoutType.row else tmp_next_item_xy,
248
- lry=tmp_next_item_xy if item_type == LayoutType.row else box.lry,
241
+ ulx=box.ulx if item_type == LayoutType.ROW else tmp_item_xy,
242
+ uly=tmp_item_xy if item_type == LayoutType.ROW else box.uly,
243
+ lrx=box.lrx if item_type == LayoutType.ROW else tmp_next_item_xy,
244
+ lry=tmp_next_item_xy if item_type == LayoutType.ROW else box.lry,
249
245
  absolute_coords=True,
250
246
  )
251
247
  item.bounding_box = new_embedding_box
@@ -255,7 +251,7 @@ def _add_items(image: Image, item_type: str, categories_name_as_key: Dict[str, s
255
251
  return image
256
252
 
257
253
 
258
- def row_col_cell_ids(tiling: List[List[int]]) -> List[Tuple[int, int, int]]:
254
+ def row_col_cell_ids(tiling: list[list[int]]) -> list[tuple[int, int, int]]:
259
255
  """
260
256
  Infers absolute rows and columns for every cell from the tiling of a table.
261
257
 
@@ -271,7 +267,7 @@ def row_col_cell_ids(tiling: List[List[int]]) -> List[Tuple[int, int, int]]:
271
267
  return rows_col_cell_ids
272
268
 
273
269
 
274
- def embedding_in_image(dp: Image, html: List[str], categories_name_as_key: Dict[str, str]) -> Image:
270
+ def embedding_in_image(dp: Image, html: list[str], categories_name_as_key: dict[ObjectTypes, int]) -> Image:
275
271
  """
276
272
  Generating an image, that resembles the output of an analyzer. The layout of the image is table spanning
277
273
  the full page, i.e. there is one table image annotation. Moreover, the table annotation has an image, with cells
@@ -286,8 +282,8 @@ def embedding_in_image(dp: Image, html: List[str], categories_name_as_key: Dict[
286
282
  image.image = dp.image
287
283
  image.set_width_height(dp.width, dp.height)
288
284
  table_ann = ImageAnnotation(
289
- category_name=LayoutType.table,
290
- category_id=categories_name_as_key[LayoutType.table],
285
+ category_name=LayoutType.TABLE,
286
+ category_id=categories_name_as_key[LayoutType.TABLE],
291
287
  bounding_box=BoundingBox(absolute_coords=True, ulx=0.0, uly=0.0, lrx=dp.width, lry=dp.height),
292
288
  )
293
289
  image.dump(table_ann)
@@ -297,20 +293,20 @@ def embedding_in_image(dp: Image, html: List[str], categories_name_as_key: Dict[
297
293
  # node.
298
294
  html.insert(0, "<table>")
299
295
  html.append("</table>")
300
- if CellType.header not in categories_name_as_key:
296
+ if CellType.HEADER not in categories_name_as_key:
301
297
  html.remove("<thead>")
302
298
  html.remove("</thead>")
303
299
  if "<tbody>" in html and "</tbody>" in html:
304
300
  html.remove("<tbody>")
305
301
  html.remove("</tbody>")
306
302
 
307
- html_ann = ContainerAnnotation(category_name=TableType.html, value=html)
308
- table_ann.dump_sub_category(TableType.html, html_ann)
303
+ html_ann = ContainerAnnotation(category_name=TableType.HTML, value=html)
304
+ table_ann.dump_sub_category(TableType.HTML, html_ann)
309
305
  for ann in dp.get_annotation():
310
306
  image.dump(ann)
311
307
  assert table_ann.image
312
308
  table_ann.image.dump(ann)
313
- table_ann.dump_relationship(Relationships.child, ann.annotation_id)
309
+ table_ann.dump_relationship(Relationships.CHILD, ann.annotation_id)
314
310
 
315
311
  return image
316
312
 
@@ -329,8 +325,8 @@ def nth_index(iterable: Iterable[str], value: str, n: int) -> Optional[int]:
329
325
 
330
326
 
331
327
  def pub_to_image_uncur( # pylint: disable=R0914
332
- dp: JsonDict,
333
- categories_name_as_key: Dict[str, str],
328
+ dp: PubtabnetDict,
329
+ categories_name_as_key: dict[ObjectTypes, int],
334
330
  load_image: bool,
335
331
  fake_score: bool,
336
332
  rows_and_cols: bool,
@@ -342,7 +338,7 @@ def pub_to_image_uncur( # pylint: disable=R0914
342
338
  Map a datapoint of annotation structure as given in the Pubtabnet dataset to an Image structure.
343
339
  <https://github.com/ibm-aur-nlp/PubTabNet>
344
340
 
345
- :param dp: A datapoint in serialized coco format.
341
+ :param dp: A datapoint in serialized Pubtabnet format.
346
342
  :param categories_name_as_key: A dict of categories, e.g. DatasetCategories.get_categories(name_as_key=True)
347
343
  :param load_image: If `True` it will load image to `Image.image`
348
344
  :param fake_score: If dp does not contain a score, a fake score with uniform random variables in (0,1)
@@ -407,60 +403,56 @@ def pub_to_image_uncur( # pylint: disable=R0914
407
403
 
408
404
  table_ann: Optional[ImageAnnotation] = None
409
405
  if is_fintabnet: # cannot use for synthetic table ann creation
410
- table_ann = _get_table_annotation(dp, categories_name_as_key[LayoutType.table])
406
+ table_ann = _get_table_annotation(dp, categories_name_as_key[LayoutType.TABLE])
411
407
  image.dump(table_ann)
412
408
 
413
- for idx, value in enumerate(
409
+ for idx, (row_col_cell_id, cell, row_span, col_span) in enumerate(
414
410
  zip(rows_cols_cell_ids[::-1], dp["html"]["cells"][::-1], row_spans[::-1], col_spans[::-1])
415
411
  ):
416
- row_col_cell_id = value[0]
417
412
  row_number, col_number, cell_id = row_col_cell_id[0], row_col_cell_id[1], row_col_cell_id[2]
418
- cell = value[1]
419
- row_span = value[2]
420
- col_span = value[3]
421
413
 
422
414
  if "bbox" in cell: # empty cells have no box
423
415
  ulx, uly, lrx, lry = list(map(float, cell["bbox"]))
424
416
  cell_bounding_box = BoundingBox(absolute_coords=True, ulx=ulx, uly=uly, lrx=lrx, lry=lry)
425
417
  cell_ann = ImageAnnotation(
426
- category_name=LayoutType.cell,
418
+ category_name=LayoutType.CELL,
427
419
  bounding_box=cell_bounding_box,
428
- category_id=categories_name_as_key[LayoutType.cell],
420
+ category_id=categories_name_as_key[LayoutType.CELL],
429
421
  score=maybe_get_fake_score(fake_score),
430
422
  )
431
423
  cell_ann.dump_sub_category(
432
- CellType.row_number,
433
- CategoryAnnotation(category_name=CellType.row_number, category_id=str(row_number)),
424
+ CellType.ROW_NUMBER,
425
+ CategoryAnnotation(category_name=CellType.ROW_NUMBER, category_id=row_number),
434
426
  image.image_id,
435
427
  )
436
428
  cell_ann.dump_sub_category(
437
- CellType.column_number,
438
- CategoryAnnotation(category_name=CellType.column_number, category_id=str(col_number)),
429
+ CellType.COLUMN_NUMBER,
430
+ CategoryAnnotation(category_name=CellType.COLUMN_NUMBER, category_id=col_number),
439
431
  image.image_id,
440
432
  )
441
433
  cell_ann.dump_sub_category(
442
- CellType.row_span,
443
- CategoryAnnotation(category_name=CellType.row_span, category_id=str(row_span)),
434
+ CellType.ROW_SPAN,
435
+ CategoryAnnotation(category_name=CellType.ROW_SPAN, category_id=row_span), # type: ignore
444
436
  image.image_id,
445
437
  )
446
438
  cell_ann.dump_sub_category(
447
- CellType.column_span,
448
- CategoryAnnotation(category_name=CellType.column_span, category_id=str(col_span)),
439
+ CellType.COLUMN_SPAN,
440
+ CategoryAnnotation(category_name=CellType.COLUMN_SPAN, category_id=col_span), # type: ignore
449
441
  image.image_id,
450
442
  )
451
443
  if (
452
- int(cell_ann.get_sub_category(CellType.row_span).category_id) > 1
453
- or int(cell_ann.get_sub_category(CellType.column_span).category_id) > 1
444
+ cell_ann.get_sub_category(CellType.ROW_SPAN).category_id > 1
445
+ or cell_ann.get_sub_category(CellType.COLUMN_SPAN).category_id > 1
454
446
  ):
455
447
  cell_ann.dump_sub_category(
456
- CellType.spanning,
457
- CategoryAnnotation(category_name=CellType.spanning),
448
+ CellType.SPANNING,
449
+ CategoryAnnotation(category_name=CellType.SPANNING),
458
450
  image.image_id,
459
451
  )
460
452
  else:
461
453
  cell_ann.dump_sub_category(
462
- CellType.spanning,
463
- CategoryAnnotation(category_name=LayoutType.cell),
454
+ CellType.SPANNING,
455
+ CategoryAnnotation(category_name=LayoutType.CELL),
464
456
  image.image_id,
465
457
  )
466
458
 
@@ -468,13 +460,13 @@ def pub_to_image_uncur( # pylint: disable=R0914
468
460
  max_cs = max(max_cs, col_span) # type: ignore
469
461
 
470
462
  if _has_header:
471
- category_name = CellType.header if cell_id <= end_of_header else CellType.body
463
+ category_name = CellType.HEADER if cell_id <= end_of_header else CellType.BODY
472
464
  cell_ann.dump_sub_category(
473
- CellType.header, CategoryAnnotation(category_name=category_name), image.image_id
465
+ CellType.HEADER, CategoryAnnotation(category_name=category_name), image.image_id
474
466
  )
475
467
  image.dump(cell_ann)
476
468
  if table_ann is not None:
477
- table_ann.dump_relationship(Relationships.child, cell_ann.annotation_id)
469
+ table_ann.dump_relationship(Relationships.CHILD, cell_ann.annotation_id)
478
470
 
479
471
  if dd_pipe_like:
480
472
  tokens = cell["tokens"]
@@ -484,47 +476,47 @@ def pub_to_image_uncur( # pylint: disable=R0914
484
476
  text = "".join(tokens)
485
477
  # we are not separating each word but view the full table content as one word
486
478
  word = ImageAnnotation(
487
- category_name=LayoutType.word,
488
- category_id=categories_name_as_key[LayoutType.word],
479
+ category_name=LayoutType.WORD,
480
+ category_id=categories_name_as_key[LayoutType.WORD],
489
481
  bounding_box=cell_bounding_box,
490
482
  )
491
- text_container = ContainerAnnotation(category_name=WordType.characters, value=text)
492
- word.dump_sub_category(WordType.characters, text_container)
493
- reading_order = CategoryAnnotation(category_name=Relationships.reading_order, category_id="1")
494
- word.dump_sub_category(Relationships.reading_order, reading_order)
483
+ text_container = ContainerAnnotation(category_name=WordType.CHARACTERS, value=text)
484
+ word.dump_sub_category(WordType.CHARACTERS, text_container)
485
+ reading_order = CategoryAnnotation(category_name=Relationships.READING_ORDER, category_id=1)
486
+ word.dump_sub_category(Relationships.READING_ORDER, reading_order)
495
487
  image.dump(word)
496
- cell_ann.dump_relationship(Relationships.child, word.annotation_id)
488
+ cell_ann.dump_relationship(Relationships.CHILD, word.annotation_id)
497
489
 
498
490
  index = nth_index(html, "<td>", number_of_cells - idx)
499
491
  if index:
500
492
  html.insert(index + 1, cell_ann.annotation_id)
501
493
 
502
- summary_ann = SummaryAnnotation(external_id=image.image_id + "SUMMARY")
494
+ summary_ann = CategoryAnnotation(category_name=SummaryType.SUMMARY)
503
495
  summary_ann.dump_sub_category(
504
- TableType.number_of_rows,
505
- CategoryAnnotation(category_name=TableType.number_of_rows, category_id=str(number_of_rows)),
496
+ TableType.NUMBER_OF_ROWS,
497
+ CategoryAnnotation(category_name=TableType.NUMBER_OF_ROWS, category_id=number_of_rows),
506
498
  image.image_id,
507
499
  )
508
500
  summary_ann.dump_sub_category(
509
- TableType.number_of_columns,
510
- CategoryAnnotation(category_name=TableType.number_of_columns, category_id=str(number_of_cols)),
501
+ TableType.NUMBER_OF_COLUMNS,
502
+ CategoryAnnotation(category_name=TableType.NUMBER_OF_COLUMNS, category_id=number_of_cols),
511
503
  image.image_id,
512
504
  )
513
505
  summary_ann.dump_sub_category(
514
- TableType.max_row_span,
515
- CategoryAnnotation(category_name=TableType.max_row_span, category_id=str(max_rs)),
506
+ TableType.MAX_ROW_SPAN,
507
+ CategoryAnnotation(category_name=TableType.MAX_ROW_SPAN, category_id=max_rs),
516
508
  image.image_id,
517
509
  )
518
510
  summary_ann.dump_sub_category(
519
- TableType.max_col_span,
520
- CategoryAnnotation(category_name=TableType.max_col_span, category_id=str(max_cs)),
511
+ TableType.MAX_COL_SPAN,
512
+ CategoryAnnotation(category_name=TableType.MAX_COL_SPAN, category_id=max_cs),
521
513
  image.image_id,
522
514
  )
523
515
  image.summary = summary_ann
524
516
 
525
517
  if rows_and_cols or dd_pipe_like:
526
- image = _add_items(image, LayoutType.row, categories_name_as_key, pubtables_like)
527
- image = _add_items(image, LayoutType.column, categories_name_as_key, pubtables_like)
518
+ image = _add_items(image, LayoutType.ROW, categories_name_as_key, pubtables_like)
519
+ image = _add_items(image, LayoutType.COLUMN, categories_name_as_key, pubtables_like)
528
520
 
529
521
  if dd_pipe_like:
530
522
  image = embedding_in_image(image, html, categories_name_as_key)
@@ -26,8 +26,8 @@ from lazy_imports import try_import
26
26
 
27
27
  from ..datapoint.annotation import ImageAnnotation
28
28
  from ..datapoint.image import Image
29
- from ..utils.detection_types import JsonDict
30
- from ..utils.settings import ObjectTypes
29
+ from ..utils.settings import TypeOrStr
30
+ from ..utils.types import JsonDict
31
31
  from .maputils import curry
32
32
 
33
33
  with try_import() as import_guard:
@@ -39,7 +39,7 @@ with try_import() as import_guard:
39
39
  def image_to_tp_frcnn_training(
40
40
  dp: Image,
41
41
  add_mask: bool = False,
42
- category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
42
+ category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
43
43
  ) -> Optional[JsonDict]:
44
44
  """
45
45
  Maps an image to a dict to be consumed by Tensorpack Faster-RCNN bounding box detection. Note, that the returned
@@ -25,7 +25,6 @@ from itertools import chain
25
25
  from typing import Mapping, Optional
26
26
 
27
27
  from ..datapoint import BoundingBox, CategoryAnnotation, ContainerAnnotation, Image, ImageAnnotation
28
- from ..utils.detection_types import JsonDict
29
28
  from ..utils.fs import load_image_from_file
30
29
  from ..utils.settings import (
31
30
  BioTag,
@@ -37,17 +36,18 @@ from ..utils.settings import (
37
36
  get_type,
38
37
  token_class_tag_to_token_class_with_tag,
39
38
  )
39
+ from ..utils.types import FunsdDict
40
40
  from .maputils import MappingContextManager, curry, maybe_get_fake_score
41
41
 
42
42
 
43
43
  @curry
44
44
  def xfund_to_image(
45
- dp: JsonDict,
45
+ dp: FunsdDict,
46
46
  load_image: bool,
47
47
  fake_score: bool,
48
- categories_dict_name_as_key: Mapping[str, str],
48
+ categories_dict_name_as_key: Mapping[ObjectTypes, int],
49
49
  token_class_names_mapping: Mapping[str, str],
50
- ner_token_to_id_mapping: Mapping[ObjectTypes, Mapping[ObjectTypes, Mapping[ObjectTypes, str]]],
50
+ ner_token_to_id_mapping: Mapping[ObjectTypes, Mapping[ObjectTypes, Mapping[ObjectTypes, int]]],
51
51
  ) -> Optional[Image]:
52
52
  """
53
53
  Map a datapoint of annotation structure as given as from xfund or funsd dataset in to an Image structure
@@ -75,9 +75,9 @@ def xfund_to_image(
75
75
 
76
76
  _, file_name = os.path.split(full_path)
77
77
  external_id = dp.get("uid")
78
- tag_to_id_mapping = ner_token_to_id_mapping[LayoutType.word][WordType.tag]
79
- token_class_to_id_mapping = ner_token_to_id_mapping[LayoutType.word][WordType.token_class]
80
- token_tag_to_id_mapping = ner_token_to_id_mapping[LayoutType.word][WordType.token_tag]
78
+ tag_to_id_mapping = ner_token_to_id_mapping[LayoutType.WORD][WordType.TAG]
79
+ token_class_to_id_mapping = ner_token_to_id_mapping[LayoutType.WORD][WordType.TOKEN_CLASS]
80
+ token_tag_to_id_mapping = ner_token_to_id_mapping[LayoutType.WORD][WordType.TOKEN_TAG]
81
81
 
82
82
  with MappingContextManager(file_name) as mapping_context:
83
83
  image = Image(file_name=file_name, location=full_path, external_id=external_id)
@@ -101,16 +101,16 @@ def xfund_to_image(
101
101
  bbox = BoundingBox(absolute_coords=True, ulx=box[0], uly=box[1], lrx=box[2], lry=box[3])
102
102
  score = maybe_get_fake_score(fake_score)
103
103
  entity_ann = ImageAnnotation(
104
- category_name=LayoutType.text,
104
+ category_name=LayoutType.TEXT,
105
105
  bounding_box=bbox,
106
- category_id=categories_dict_name_as_key[LayoutType.text],
106
+ category_id=categories_dict_name_as_key[LayoutType.TEXT],
107
107
  score=score,
108
108
  )
109
109
  category_name = token_class_names_mapping[entity["label"]]
110
110
  sub_cat_semantic = CategoryAnnotation(
111
111
  category_name=category_name, category_id=token_class_to_id_mapping[get_type(category_name)]
112
112
  )
113
- entity_ann.dump_sub_category(WordType.token_class, sub_cat_semantic)
113
+ entity_ann.dump_sub_category(WordType.TOKEN_CLASS, sub_cat_semantic)
114
114
  image.dump(entity_ann)
115
115
 
116
116
  words = entity.get("words")
@@ -122,61 +122,61 @@ def xfund_to_image(
122
122
  score = maybe_get_fake_score(fake_score)
123
123
 
124
124
  ann = ImageAnnotation(
125
- category_name=LayoutType.word,
125
+ category_name=LayoutType.WORD,
126
126
  bounding_box=bbox,
127
- category_id=categories_dict_name_as_key[LayoutType.word],
127
+ category_id=categories_dict_name_as_key[LayoutType.WORD],
128
128
  score=score,
129
129
  )
130
130
  image.dump(ann)
131
- entity_ann.dump_relationship(Relationships.child, ann.annotation_id)
131
+ entity_ann.dump_relationship(Relationships.CHILD, ann.annotation_id)
132
132
  sub_cat_semantic = CategoryAnnotation(
133
133
  category_name=category_name, category_id=token_class_to_id_mapping[get_type(category_name)]
134
134
  )
135
- ann.dump_sub_category(WordType.token_class, sub_cat_semantic)
136
- sub_cat_chars = ContainerAnnotation(category_name=WordType.characters, value=word["text"])
137
- ann.dump_sub_category(WordType.characters, sub_cat_chars)
138
- if sub_cat_semantic.category_name == TokenClasses.other:
135
+ ann.dump_sub_category(WordType.TOKEN_CLASS, sub_cat_semantic)
136
+ sub_cat_chars = ContainerAnnotation(category_name=WordType.CHARACTERS, value=word["text"])
137
+ ann.dump_sub_category(WordType.CHARACTERS, sub_cat_chars)
138
+ if sub_cat_semantic.category_name == TokenClasses.OTHER:
139
139
  sub_cat_tag = CategoryAnnotation(
140
- category_name=BioTag.outside, category_id=tag_to_id_mapping[BioTag.outside]
140
+ category_name=BioTag.OUTSIDE, category_id=tag_to_id_mapping[BioTag.OUTSIDE]
141
141
  )
142
- ann.dump_sub_category(WordType.tag, sub_cat_tag)
142
+ ann.dump_sub_category(WordType.TAG, sub_cat_tag)
143
143
  # populating ner token to be used for training and evaluation
144
144
  sub_cat_ner_tok = CategoryAnnotation(
145
- category_name=BioTag.outside, category_id=token_tag_to_id_mapping[BioTag.outside]
145
+ category_name=BioTag.OUTSIDE, category_id=token_tag_to_id_mapping[BioTag.OUTSIDE]
146
146
  )
147
- ann.dump_sub_category(WordType.token_tag, sub_cat_ner_tok)
147
+ ann.dump_sub_category(WordType.TOKEN_TAG, sub_cat_ner_tok)
148
148
  elif not idx:
149
149
  sub_cat_tag = CategoryAnnotation(
150
- category_name=BioTag.begin, category_id=tag_to_id_mapping[BioTag.begin]
150
+ category_name=BioTag.BEGIN, category_id=tag_to_id_mapping[BioTag.BEGIN]
151
151
  )
152
- ann.dump_sub_category(WordType.tag, sub_cat_tag)
152
+ ann.dump_sub_category(WordType.TAG, sub_cat_tag)
153
153
  sub_cat_ner_tok = CategoryAnnotation(
154
154
  category_name=token_class_tag_to_token_class_with_tag(
155
- get_type(sub_cat_semantic.category_name), BioTag.begin
155
+ get_type(sub_cat_semantic.category_name), BioTag.BEGIN
156
156
  ),
157
157
  category_id=token_tag_to_id_mapping[
158
158
  token_class_tag_to_token_class_with_tag(
159
- get_type(sub_cat_semantic.category_name), BioTag.begin
159
+ get_type(sub_cat_semantic.category_name), BioTag.BEGIN
160
160
  )
161
161
  ],
162
162
  )
163
- ann.dump_sub_category(WordType.token_tag, sub_cat_ner_tok)
163
+ ann.dump_sub_category(WordType.TOKEN_TAG, sub_cat_ner_tok)
164
164
  else:
165
165
  sub_cat_tag = CategoryAnnotation(
166
- category_name=BioTag.inside, category_id=tag_to_id_mapping[BioTag.inside]
166
+ category_name=BioTag.INSIDE, category_id=tag_to_id_mapping[BioTag.INSIDE]
167
167
  )
168
- ann.dump_sub_category(WordType.tag, sub_cat_tag)
168
+ ann.dump_sub_category(WordType.TAG, sub_cat_tag)
169
169
  sub_cat_ner_tok = CategoryAnnotation(
170
170
  category_name=token_class_tag_to_token_class_with_tag(
171
- get_type(sub_cat_semantic.category_name), BioTag.inside
171
+ get_type(sub_cat_semantic.category_name), BioTag.INSIDE
172
172
  ),
173
173
  category_id=token_tag_to_id_mapping[
174
174
  token_class_tag_to_token_class_with_tag(
175
- get_type(sub_cat_semantic.category_name), BioTag.inside
175
+ get_type(sub_cat_semantic.category_name), BioTag.INSIDE
176
176
  )
177
177
  ],
178
178
  )
179
- ann.dump_sub_category(WordType.token_tag, sub_cat_ner_tok)
179
+ ann.dump_sub_category(WordType.TOKEN_TAG, sub_cat_ner_tok)
180
180
 
181
181
  entity_id_to_ann_id[entity["id"]].append(ann.annotation_id)
182
182
  ann_id_to_entity_id[ann.annotation_id] = entity["id"]
@@ -184,7 +184,7 @@ def xfund_to_image(
184
184
  entity_id_to_entity_link_id[entity["id"]].extend(entity["linking"])
185
185
 
186
186
  # now populating semantic links
187
- word_anns = image.get_annotation(category_names=LayoutType.word)
187
+ word_anns = image.get_annotation(category_names=LayoutType.WORD)
188
188
  for word in word_anns:
189
189
  entity_id = ann_id_to_entity_id[word.annotation_id]
190
190
  all_linked_entities = list(chain(*entity_id_to_entity_link_id[entity_id]))
@@ -193,7 +193,7 @@ def xfund_to_image(
193
193
  ann_ids.extend(entity_id_to_ann_id[linked_entity])
194
194
  for ann_id in ann_ids:
195
195
  if ann_id != word.annotation_id:
196
- word.dump_relationship(Relationships.semantic_entity_link, ann_id)
196
+ word.dump_relationship(Relationships.SEMANTIC_ENTITY_LINK, ann_id)
197
197
 
198
198
  if mapping_context.context_error:
199
199
  return None