deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (111) hide show
  1. deepdoctection/__init__.py +8 -25
  2. deepdoctection/analyzer/dd.py +84 -71
  3. deepdoctection/dataflow/common.py +9 -5
  4. deepdoctection/dataflow/custom.py +5 -5
  5. deepdoctection/dataflow/custom_serialize.py +75 -18
  6. deepdoctection/dataflow/parallel_map.py +3 -3
  7. deepdoctection/dataflow/serialize.py +4 -4
  8. deepdoctection/dataflow/stats.py +3 -3
  9. deepdoctection/datapoint/annotation.py +78 -56
  10. deepdoctection/datapoint/box.py +7 -7
  11. deepdoctection/datapoint/convert.py +6 -6
  12. deepdoctection/datapoint/image.py +157 -75
  13. deepdoctection/datapoint/view.py +175 -151
  14. deepdoctection/datasets/adapter.py +30 -24
  15. deepdoctection/datasets/base.py +10 -10
  16. deepdoctection/datasets/dataflow_builder.py +3 -3
  17. deepdoctection/datasets/info.py +23 -25
  18. deepdoctection/datasets/instances/doclaynet.py +48 -49
  19. deepdoctection/datasets/instances/fintabnet.py +44 -45
  20. deepdoctection/datasets/instances/funsd.py +23 -23
  21. deepdoctection/datasets/instances/iiitar13k.py +8 -8
  22. deepdoctection/datasets/instances/layouttest.py +2 -2
  23. deepdoctection/datasets/instances/publaynet.py +3 -3
  24. deepdoctection/datasets/instances/pubtables1m.py +18 -18
  25. deepdoctection/datasets/instances/pubtabnet.py +30 -29
  26. deepdoctection/datasets/instances/rvlcdip.py +28 -29
  27. deepdoctection/datasets/instances/xfund.py +51 -30
  28. deepdoctection/datasets/save.py +6 -6
  29. deepdoctection/eval/accmetric.py +32 -33
  30. deepdoctection/eval/base.py +8 -9
  31. deepdoctection/eval/cocometric.py +13 -12
  32. deepdoctection/eval/eval.py +32 -26
  33. deepdoctection/eval/tedsmetric.py +16 -12
  34. deepdoctection/eval/tp_eval_callback.py +7 -16
  35. deepdoctection/extern/base.py +339 -134
  36. deepdoctection/extern/d2detect.py +69 -89
  37. deepdoctection/extern/deskew.py +11 -10
  38. deepdoctection/extern/doctrocr.py +81 -64
  39. deepdoctection/extern/fastlang.py +23 -16
  40. deepdoctection/extern/hfdetr.py +53 -38
  41. deepdoctection/extern/hflayoutlm.py +216 -155
  42. deepdoctection/extern/hflm.py +35 -30
  43. deepdoctection/extern/model.py +433 -255
  44. deepdoctection/extern/pdftext.py +15 -15
  45. deepdoctection/extern/pt/ptutils.py +4 -2
  46. deepdoctection/extern/tessocr.py +39 -38
  47. deepdoctection/extern/texocr.py +14 -16
  48. deepdoctection/extern/tp/tfutils.py +16 -2
  49. deepdoctection/extern/tp/tpcompat.py +11 -7
  50. deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
  51. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
  52. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
  53. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
  54. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
  55. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
  56. deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
  57. deepdoctection/extern/tpdetect.py +40 -45
  58. deepdoctection/mapper/cats.py +36 -40
  59. deepdoctection/mapper/cocostruct.py +16 -12
  60. deepdoctection/mapper/d2struct.py +22 -22
  61. deepdoctection/mapper/hfstruct.py +7 -7
  62. deepdoctection/mapper/laylmstruct.py +22 -24
  63. deepdoctection/mapper/maputils.py +9 -10
  64. deepdoctection/mapper/match.py +33 -2
  65. deepdoctection/mapper/misc.py +6 -7
  66. deepdoctection/mapper/pascalstruct.py +4 -4
  67. deepdoctection/mapper/prodigystruct.py +6 -6
  68. deepdoctection/mapper/pubstruct.py +84 -92
  69. deepdoctection/mapper/tpstruct.py +3 -3
  70. deepdoctection/mapper/xfundstruct.py +33 -33
  71. deepdoctection/pipe/anngen.py +39 -14
  72. deepdoctection/pipe/base.py +68 -99
  73. deepdoctection/pipe/common.py +181 -85
  74. deepdoctection/pipe/concurrency.py +14 -10
  75. deepdoctection/pipe/doctectionpipe.py +24 -21
  76. deepdoctection/pipe/language.py +20 -25
  77. deepdoctection/pipe/layout.py +18 -16
  78. deepdoctection/pipe/lm.py +49 -47
  79. deepdoctection/pipe/order.py +63 -65
  80. deepdoctection/pipe/refine.py +102 -109
  81. deepdoctection/pipe/segment.py +157 -162
  82. deepdoctection/pipe/sub_layout.py +50 -40
  83. deepdoctection/pipe/text.py +37 -36
  84. deepdoctection/pipe/transform.py +19 -16
  85. deepdoctection/train/d2_frcnn_train.py +27 -25
  86. deepdoctection/train/hf_detr_train.py +22 -18
  87. deepdoctection/train/hf_layoutlm_train.py +49 -48
  88. deepdoctection/train/tp_frcnn_train.py +10 -11
  89. deepdoctection/utils/concurrency.py +1 -1
  90. deepdoctection/utils/context.py +13 -6
  91. deepdoctection/utils/develop.py +4 -4
  92. deepdoctection/utils/env_info.py +52 -14
  93. deepdoctection/utils/file_utils.py +6 -11
  94. deepdoctection/utils/fs.py +41 -14
  95. deepdoctection/utils/identifier.py +2 -2
  96. deepdoctection/utils/logger.py +15 -15
  97. deepdoctection/utils/metacfg.py +7 -7
  98. deepdoctection/utils/pdf_utils.py +39 -14
  99. deepdoctection/utils/settings.py +188 -182
  100. deepdoctection/utils/tqdm.py +1 -1
  101. deepdoctection/utils/transform.py +14 -9
  102. deepdoctection/utils/types.py +104 -0
  103. deepdoctection/utils/utils.py +7 -7
  104. deepdoctection/utils/viz.py +70 -69
  105. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
  106. deepdoctection-0.34.dist-info/RECORD +146 -0
  107. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
  108. deepdoctection/utils/detection_types.py +0 -68
  109. deepdoctection-0.32.dist-info/RECORD +0 -146
  110. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
  111. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0
@@ -22,17 +22,17 @@ visualising
22
22
  from __future__ import annotations
23
23
 
24
24
  import os.path
25
- from typing import Dict, List, Mapping, Optional, Sequence, Tuple, Union
25
+ from typing import Mapping, Optional, Sequence, Union
26
26
 
27
27
  import numpy as np
28
28
  from lazy_imports import try_import
29
29
 
30
- from ..datapoint.annotation import ImageAnnotation
30
+ from ..datapoint.annotation import DEFAULT_CATEGORY_ID, ImageAnnotation
31
31
  from ..datapoint.image import Image
32
32
  from ..extern.pt.nms import batched_nms
33
33
  from ..mapper.maputils import curry
34
- from ..utils.detection_types import JsonDict
35
- from ..utils.settings import ObjectTypes, TypeOrStr, get_type
34
+ from ..utils.settings import DefaultType, ObjectTypes, TypeOrStr, get_type
35
+ from ..utils.types import Detectron2Dict
36
36
 
37
37
  with try_import() as pt_import_guard:
38
38
  import torch
@@ -41,7 +41,7 @@ with try_import() as d2_import_guard:
41
41
  from detectron2.structures import BoxMode
42
42
 
43
43
  with try_import() as wb_import_guard:
44
- from wandb import Classes
44
+ from wandb import Classes # type: ignore
45
45
  from wandb import Image as Wbimage
46
46
 
47
47
 
@@ -49,8 +49,8 @@ with try_import() as wb_import_guard:
49
49
  def image_to_d2_frcnn_training(
50
50
  dp: Image,
51
51
  add_mask: bool = False,
52
- category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
53
- ) -> Optional[JsonDict]:
52
+ category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
53
+ ) -> Optional[Detectron2Dict]:
54
54
  """
55
55
  Maps an image to a standard dataset dict as described in
56
56
  <https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html>. It further checks if the image is physically
@@ -66,7 +66,7 @@ def image_to_d2_frcnn_training(
66
66
  if not os.path.isfile(dp.location) and dp.image is None:
67
67
  return None
68
68
 
69
- output: JsonDict = {"file_name": str(dp.location)}
69
+ output: Detectron2Dict = {"file_name": str(dp.location)}
70
70
 
71
71
  if dp.image is not None:
72
72
  output["image"] = dp.image.astype("float32")
@@ -87,10 +87,10 @@ def image_to_d2_frcnn_training(
87
87
  box = box.transform(dp.width, dp.height, absolute_coords=True)
88
88
 
89
89
  # Detectron2 does not fully support BoxMode.XYXY_REL
90
- mapped_ann: Dict[str, Union[str, int, List[float]]] = {
90
+ mapped_ann: dict[str, Union[str, int, list[float]]] = {
91
91
  "bbox_mode": BoxMode.XYXY_ABS,
92
92
  "bbox": box.to_list(mode="xyxy"),
93
- "category_id": int(ann.category_id) - 1,
93
+ "category_id": ann.category_id - 1,
94
94
  }
95
95
  annotations.append(mapped_ann)
96
96
 
@@ -149,23 +149,23 @@ def pt_nms_image_annotations(
149
149
 
150
150
  def _get_category_attributes(
151
151
  ann: ImageAnnotation, cat_to_sub_cat: Optional[Mapping[ObjectTypes, ObjectTypes]] = None
152
- ) -> Tuple[str, str, Optional[float]]:
152
+ ) -> tuple[ObjectTypes, int, Optional[float]]:
153
153
  if cat_to_sub_cat:
154
154
  sub_cat_key = cat_to_sub_cat.get(get_type(ann.category_name))
155
155
  if sub_cat_key in ann.sub_categories:
156
156
  sub_cat = ann.get_sub_category(sub_cat_key)
157
- return sub_cat.category_name, sub_cat.category_id, sub_cat.score
158
- return "", "", 0.0
159
- return ann.category_name, ann.category_id, ann.score
157
+ return get_type(sub_cat.category_name), sub_cat.category_id, sub_cat.score
158
+ return DefaultType.DEFAULT_TYPE, DEFAULT_CATEGORY_ID, 0.0
159
+ return get_type(ann.category_name), ann.category_id, ann.score
160
160
 
161
161
 
162
162
  @curry
163
163
  def to_wandb_image(
164
164
  dp: Image,
165
- categories: Mapping[str, TypeOrStr],
166
- sub_categories: Optional[Mapping[str, TypeOrStr]] = None,
165
+ categories: Mapping[int, TypeOrStr],
166
+ sub_categories: Optional[Mapping[int, TypeOrStr]] = None,
167
167
  cat_to_sub_cat: Optional[Mapping[ObjectTypes, ObjectTypes]] = None,
168
- ) -> Tuple[str, Wbimage]:
168
+ ) -> tuple[str, Wbimage]:
169
169
  """
170
170
  Converting a deepdoctection image into a wandb image
171
171
 
@@ -185,11 +185,11 @@ def to_wandb_image(
185
185
  anns = dp.get_annotation(category_names=list(categories.values()))
186
186
 
187
187
  if sub_categories:
188
- class_labels = {int(key): val for key, val in sub_categories.items()}
189
- class_set = Classes([{"name": val, "id": int(key)} for key, val in sub_categories.items()])
188
+ class_labels = dict(sub_categories.items())
189
+ class_set = Classes([{"name": val, "id": key} for key, val in sub_categories.items()])
190
190
  else:
191
- class_labels = {int(key): val for key, val in categories.items()}
192
- class_set = Classes([{"name": val, "id": int(key)} for key, val in categories.items()])
191
+ class_set = Classes([{"name": val, "id": key} for key, val in categories.items()])
192
+ class_labels = dict(categories.items())
193
193
 
194
194
  for ann in anns:
195
195
  bounding_box = ann.get_bounding_box(dp.image_id)
@@ -200,7 +200,7 @@ def to_wandb_image(
200
200
  box = {
201
201
  "position": {"middle": bounding_box.center, "width": bounding_box.width, "height": bounding_box.height},
202
202
  "domain": "pixel",
203
- "class_id": int(category_id),
203
+ "class_id": category_id,
204
204
  "box_caption": category_name,
205
205
  }
206
206
  if score:
@@ -23,7 +23,7 @@ from __future__ import annotations
23
23
 
24
24
  import os
25
25
  from dataclasses import dataclass, field
26
- from typing import Dict, List, Literal, Optional, Sequence, Union
26
+ from typing import Literal, Optional, Sequence, Union
27
27
 
28
28
  import numpy as np
29
29
  from lazy_imports import try_import
@@ -31,9 +31,9 @@ from lazy_imports import try_import
31
31
  from ..datapoint.image import Image
32
32
  from ..mapper.maputils import curry
33
33
  from ..mapper.misc import get_load_image_func
34
- from ..utils.detection_types import JsonDict
35
- from ..utils.settings import ObjectTypes
34
+ from ..utils.settings import TypeOrStr
36
35
  from ..utils.transform import PadTransform
36
+ from ..utils.types import JsonDict
37
37
 
38
38
  with try_import() as tr_import_guard:
39
39
  from transformers import BatchFeature, DetrFeatureExtractor
@@ -43,7 +43,7 @@ with try_import() as tr_import_guard:
43
43
  def image_to_hf_detr_training(
44
44
  dp: Image,
45
45
  add_mask: bool = False,
46
- category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
46
+ category_names: Optional[Union[TypeOrStr, Sequence[Union[TypeOrStr]]]] = None,
47
47
  ) -> Optional[JsonDict]:
48
48
  """
49
49
  Maps an image to a detr input datapoint dict, that, after collating can be used for training.
@@ -76,11 +76,11 @@ def image_to_hf_detr_training(
76
76
  for ann in anns:
77
77
  box = ann.get_bounding_box(dp.image_id)
78
78
 
79
- mapped_ann: Dict[str, Union[str, int, float, List[float]]] = {
79
+ mapped_ann: dict[str, Union[str, int, float, list[float]]] = {
80
80
  "id": "".join([c for c in ann.annotation_id if c.isdigit()])[:8],
81
81
  "image_id": "".join([c for c in dp.image_id if c.isdigit()])[:8],
82
82
  "bbox": box.to_list(mode="xywh"),
83
- "category_id": int(ann.category_id) - 1,
83
+ "category_id": ann.category_id - 1,
84
84
  "area": box.area,
85
85
  }
86
86
  annotations.append(mapped_ann)
@@ -108,7 +108,7 @@ class DetrDataCollator:
108
108
  padder: Optional[PadTransform] = None
109
109
  return_tensors: Optional[Literal["pt"]] = field(default="pt")
110
110
 
111
- def __call__(self, raw_features: List[JsonDict]) -> BatchFeature:
111
+ def __call__(self, raw_features: list[JsonDict]) -> BatchFeature:
112
112
  """
113
113
  Creating BatchFeature from a list of dict of raw features.
114
114
 
@@ -24,7 +24,7 @@ from __future__ import annotations
24
24
 
25
25
  import random
26
26
  from dataclasses import dataclass, field
27
- from typing import Any, Callable, Dict, List, Literal, NewType, Optional, Sequence, Union
27
+ from typing import Any, Callable, Literal, NewType, Optional, Sequence, Union
28
28
 
29
29
  import numpy as np
30
30
  import numpy.typing as npt
@@ -34,9 +34,9 @@ from ..datapoint.annotation import ContainerAnnotation
34
34
  from ..datapoint.convert import box_to_point4, point4_to_box
35
35
  from ..datapoint.image import Image
36
36
  from ..datapoint.view import Page
37
- from ..utils.detection_types import JsonDict
38
37
  from ..utils.settings import DatasetType, LayoutType, PageType, Relationships, WordType
39
38
  from ..utils.transform import ResizeTransform, normalize_image
39
+ from ..utils.types import JsonDict
40
40
  from .maputils import curry
41
41
 
42
42
  with try_import() as import_guard:
@@ -69,7 +69,7 @@ A DataCollator is a function that takes a list of samples from a Dataset and col
69
69
  of PyTorch/TensorFlow tensors or NumPy arrays.
70
70
  """
71
71
 
72
- DataCollator = NewType("DataCollator", Callable[[List[InputDataClass]], Dict[str, Any]]) # type: ignore
72
+ DataCollator = NewType("DataCollator", Callable[[list[InputDataClass]], dict[str, Any]]) # type: ignore
73
73
 
74
74
  _CLS_BOX = [0.0, 0.0, 1000.0, 1000.0]
75
75
  _SEP_BOX = [1000.0, 1000.0, 1000.0, 1000.0]
@@ -125,9 +125,9 @@ def image_to_raw_layoutlm_features(
125
125
  all_ann_ids = []
126
126
  all_words = []
127
127
  all_boxes = []
128
- all_labels: List[int] = []
128
+ all_labels: list[int] = []
129
129
 
130
- anns = dp.get_annotation_iter(category_names=LayoutType.word)
130
+ anns = dp.get_annotation(category_names=LayoutType.WORD)
131
131
 
132
132
  word_id_to_segment_box = {}
133
133
  if segment_positions:
@@ -139,12 +139,12 @@ def image_to_raw_layoutlm_features(
139
139
  if not bounding_box.absolute_coords:
140
140
  bounding_box = bounding_box.transform(dp.width, dp.height, absolute_coords=True)
141
141
  word_id_to_segment_box.update(
142
- {word_ann: bounding_box for word_ann in segm_ann.get_relationship(Relationships.child)}
142
+ {word_ann: bounding_box for word_ann in segm_ann.get_relationship(Relationships.CHILD)}
143
143
  )
144
144
 
145
145
  for ann in anns:
146
146
  all_ann_ids.append(ann.annotation_id)
147
- char_cat = ann.get_sub_category(WordType.characters)
147
+ char_cat = ann.get_sub_category(WordType.CHARACTERS)
148
148
  if not isinstance(char_cat, ContainerAnnotation):
149
149
  raise TypeError(f"char_cat must be of type ContainerAnnotation but is of type {type(char_cat)}")
150
150
  word = char_cat.value
@@ -158,15 +158,15 @@ def image_to_raw_layoutlm_features(
158
158
  all_boxes.append(word_id_to_segment_box.get(ann.annotation_id, box).to_list(mode="xyxy"))
159
159
 
160
160
  if (
161
- WordType.token_tag in ann.sub_categories or WordType.token_class in ann.sub_categories
162
- ) and dataset_type == DatasetType.token_classification:
161
+ WordType.TOKEN_TAG in ann.sub_categories or WordType.TOKEN_CLASS in ann.sub_categories
162
+ ) and dataset_type == DatasetType.TOKEN_CLASSIFICATION:
163
163
  if use_token_tag:
164
- all_labels.append(int(ann.get_sub_category(WordType.token_tag).category_id) - 1)
164
+ all_labels.append(ann.get_sub_category(WordType.TOKEN_TAG).category_id - 1)
165
165
  else:
166
- all_labels.append(int(ann.get_sub_category(WordType.token_class).category_id) - 1)
166
+ all_labels.append(ann.get_sub_category(WordType.TOKEN_CLASS).category_id - 1)
167
167
 
168
- if dp.summary is not None and dataset_type == DatasetType.sequence_classification:
169
- all_labels.append(int(dp.summary.get_sub_category(PageType.document_type).category_id) - 1)
168
+ if dataset_type == DatasetType.SEQUENCE_CLASSIFICATION:
169
+ all_labels.append(dp.summary.get_sub_category(PageType.DOCUMENT_TYPE).category_id - 1)
170
170
 
171
171
  boxes = np.asarray(all_boxes, dtype="float32")
172
172
  if boxes.ndim == 1:
@@ -234,7 +234,7 @@ def layoutlm_features_to_pt_tensors(features: LayoutLMFeatures) -> LayoutLMFeatu
234
234
 
235
235
 
236
236
  def _tokenize_with_sliding_window(
237
- raw_features: List[Union[RawLayoutLMFeatures, RawLMFeatures]],
237
+ raw_features: list[Union[RawLayoutLMFeatures, RawLMFeatures]],
238
238
  tokenizer: PreTrainedTokenizerFast,
239
239
  sliding_window_stride: int,
240
240
  max_batch_size: int,
@@ -385,7 +385,7 @@ def _tokenize_with_sliding_window(
385
385
  )
386
386
  )
387
387
 
388
- slided_tokenized_inputs: Dict[str, Union[List[Union[str, int]], torch.Tensor]] = {}
388
+ slided_tokenized_inputs: dict[str, Union[list[Union[str, int]], torch.Tensor]] = {}
389
389
  if return_tensors == "pt":
390
390
  slided_tokenized_inputs["overflow_to_sample_mapping"] = torch.tensor(overflow_to_sample_mapping)
391
391
  slided_tokenized_inputs["input_ids"] = torch.tensor(all_input_ids)
@@ -402,7 +402,7 @@ def _tokenize_with_sliding_window(
402
402
 
403
403
 
404
404
  def raw_features_to_layoutlm_features(
405
- raw_features: Union[RawLayoutLMFeatures, RawLMFeatures, List[Union[RawLayoutLMFeatures, RawLMFeatures]]],
405
+ raw_features: Union[RawLayoutLMFeatures, RawLMFeatures, list[Union[RawLayoutLMFeatures, RawLMFeatures]]],
406
406
  tokenizer: PreTrainedTokenizerFast,
407
407
  padding: Literal["max_length", "do_not_pad", "longest"] = "max_length",
408
408
  truncation: bool = True,
@@ -447,11 +447,11 @@ def raw_features_to_layoutlm_features(
447
447
  raw_features = [raw_features]
448
448
 
449
449
  _has_token_labels = (
450
- raw_features[0]["dataset_type"] == DatasetType.token_classification
450
+ raw_features[0]["dataset_type"] == DatasetType.TOKEN_CLASSIFICATION
451
451
  and raw_features[0].get("labels") is not None
452
452
  )
453
453
  _has_sequence_labels = (
454
- raw_features[0]["dataset_type"] == DatasetType.sequence_classification
454
+ raw_features[0]["dataset_type"] == DatasetType.SEQUENCE_CLASSIFICATION
455
455
  and raw_features[0].get("labels") is not None
456
456
  )
457
457
  _has_labels = bool(_has_token_labels or _has_sequence_labels)
@@ -620,7 +620,7 @@ class LayoutLMDataCollator:
620
620
  if self.return_overflowing_tokens:
621
621
  assert self.truncation, self.truncation
622
622
 
623
- def __call__(self, raw_features: Union[RawLayoutLMFeatures, List[RawLayoutLMFeatures]]) -> LayoutLMFeatures:
623
+ def __call__(self, raw_features: Union[RawLayoutLMFeatures, list[RawLayoutLMFeatures]]) -> LayoutLMFeatures:
624
624
  """
625
625
  Calling the DataCollator to form model inputs for training and inference. Takes a single raw
626
626
  :param raw_features: A dictionary with the following arguments: `image_id, width, height, ann_ids, words,
@@ -741,7 +741,7 @@ def image_to_raw_lm_features(
741
741
  dp: Image,
742
742
  dataset_type: Optional[Literal["sequence_classification", "token_classification"]] = None,
743
743
  use_token_tag: bool = True,
744
- text_container: Optional[LayoutType] = LayoutType.word,
744
+ text_container: Optional[LayoutType] = LayoutType.WORD,
745
745
  floating_text_block_categories: Optional[Sequence[LayoutType]] = None,
746
746
  include_residual_text_container: bool = False,
747
747
  ) -> Optional[RawLMFeatures]:
@@ -787,9 +787,7 @@ def image_to_raw_lm_features(
787
787
  elif text_["token_classes"]:
788
788
  raw_features["labels"] = text_["token_classes"]
789
789
  elif page.document_type is not None:
790
- document_type_id = (
791
- int(page.image_orig.summary.get_sub_category(PageType.document_type).category_id) - 1 # type: ignore
792
- )
790
+ document_type_id = page.image_orig.summary.get_sub_category(PageType.DOCUMENT_TYPE).category_id - 1
793
791
  raw_features["labels"] = [document_type_id]
794
792
 
795
793
  raw_features["dataset_type"] = dataset_type
@@ -806,7 +804,7 @@ def image_to_lm_features(
806
804
  return_overflowing_tokens: bool = False,
807
805
  return_tensors: Optional[Literal["pt"]] = "pt",
808
806
  sliding_window_stride: int = 0,
809
- text_container: Optional[LayoutType] = LayoutType.word,
807
+ text_container: Optional[LayoutType] = LayoutType.WORD,
810
808
  floating_text_block_categories: Optional[Sequence[LayoutType]] = None,
811
809
  include_residual_text_container: bool = False,
812
810
  ) -> Optional[LayoutLMFeatures]:
@@ -24,16 +24,16 @@ import functools
24
24
  import itertools
25
25
  import traceback
26
26
  from types import TracebackType
27
- from typing import Any, Callable, Dict, Mapping, Optional, Sequence, Union
27
+ from typing import Any, Callable, Mapping, Optional, Sequence, Union
28
28
 
29
29
  import numpy as np
30
30
  from tabulate import tabulate
31
31
  from termcolor import colored
32
32
 
33
- from ..utils.detection_types import DP, BaseExceptionType, S, T
34
33
  from ..utils.error import AnnotationError, BoundingBoxError, ImageError, UUIDError
35
34
  from ..utils.logger import LoggingRecord, logger
36
35
  from ..utils.settings import ObjectTypes
36
+ from ..utils.types import DP, BaseExceptionType, S, T
37
37
 
38
38
  __all__ = ["MappingContextManager", "DefaultMapper", "maybe_get_fake_score", "LabelSummarizer", "curry"]
39
39
 
@@ -45,7 +45,7 @@ class MappingContextManager:
45
45
  """
46
46
 
47
47
  def __init__(
48
- self, dp_name: Optional[str] = None, filter_level: str = "image", **kwargs: Dict[str, Optional[str]]
48
+ self, dp_name: Optional[str] = None, filter_level: str = "image", **kwargs: dict[str, Optional[str]]
49
49
  ) -> None:
50
50
  """
51
51
  :param dp_name: A name for the datapoint to be mapped
@@ -81,6 +81,7 @@ class MappingContextManager:
81
81
  AssertionError,
82
82
  TypeError,
83
83
  FileNotFoundError,
84
+ AttributeError,
84
85
  BoundingBoxError,
85
86
  AnnotationError,
86
87
  ImageError,
@@ -192,7 +193,7 @@ class LabelSummarizer:
192
193
 
193
194
  """
194
195
 
195
- def __init__(self, categories: Mapping[str, ObjectTypes]) -> None:
196
+ def __init__(self, categories: Mapping[int, ObjectTypes]) -> None:
196
197
  """
197
198
  :param categories: A dict of categories as given as in categories.get_categories().
198
199
  """
@@ -210,11 +211,11 @@ class LabelSummarizer:
210
211
  np_item = np.asarray(item, dtype="int8")
211
212
  self.summary += np.histogram(np_item, bins=self.hist_bins)[0]
212
213
 
213
- def get_summary(self) -> Dict[str, np.int32]:
214
+ def get_summary(self) -> dict[int, int]:
214
215
  """
215
216
  Get a dictionary with category ids and the number dumped
216
217
  """
217
- return dict(list(zip(self.categories.keys(), self.summary.astype(np.int32))))
218
+ return dict(list(zip(self.categories.keys(), self.summary.tolist())))
218
219
 
219
220
  def print_summary_histogram(self, dd_logic: bool = True) -> None:
220
221
  """
@@ -223,11 +224,9 @@ class LabelSummarizer:
223
224
  :param dd_logic: Follow dd category convention when printing histogram (last background bucket omitted).
224
225
  """
225
226
  if dd_logic:
226
- data = list(itertools.chain(*[[self.categories[str(i)].value, v] for i, v in enumerate(self.summary, 1)]))
227
+ data = list(itertools.chain(*[[self.categories[i].value, v] for i, v in enumerate(self.summary, 1)]))
227
228
  else:
228
- data = list(
229
- itertools.chain(*[[self.categories[str(i + 1)].value, v] for i, v in enumerate(self.summary[:-1])])
230
- )
229
+ data = list(itertools.chain(*[[self.categories[i + 1].value, v] for i, v in enumerate(self.summary[:-1])]))
231
230
  num_columns = min(6, len(data))
232
231
  total_img_anns = sum(data[1::2])
233
232
  data.extend([None] * ((num_columns - len(data) % num_columns) % num_columns))
@@ -19,10 +19,11 @@
19
19
  Module for matching detections according to various matching rules
20
20
  """
21
21
 
22
- from typing import Any, Literal, Optional, Sequence, Tuple, Union
22
+ from typing import Any, Literal, Optional, Sequence, Union
23
23
 
24
24
  import numpy as np
25
25
  from numpy.typing import NDArray
26
+ from scipy.spatial import distance
26
27
 
27
28
  from ..datapoint.annotation import ImageAnnotation
28
29
  from ..datapoint.box import iou
@@ -41,7 +42,7 @@ def match_anns_by_intersection(
41
42
  parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
42
43
  child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
43
44
  max_parent_only: bool = False,
44
- ) -> Tuple[Any, Any, Sequence[ImageAnnotation], Sequence[ImageAnnotation]]:
45
+ ) -> tuple[Any, Any, Sequence[ImageAnnotation], Sequence[ImageAnnotation]]:
45
46
  """
46
47
  Generates an iou/ioa-matrix for parent_ann_categories and child_ann_categories and returns pairs of child/parent
47
48
  indices that are above some intersection threshold. It will also return a list of all pre selected parent and child
@@ -164,3 +165,33 @@ def match_anns_by_intersection(
164
165
  return [], [], [], []
165
166
 
166
167
  return child_index, parent_index, child_anns, parent_anns
168
+
169
+
170
+ def match_anns_by_distance(
171
+ dp: Image,
172
+ parent_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
173
+ child_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
174
+ parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
175
+ child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
176
+ ) -> list[tuple[ImageAnnotation, ImageAnnotation]]:
177
+ """
178
+ Generates pairs of parent and child annotations by calculating the euclidean distance between the centers of the
179
+ parent and child bounding boxes. It will return the closest child for each parent. Note, that a child can be
180
+ assigned multiple times to different parents.
181
+
182
+ :param dp: image datapoint
183
+ :param parent_ann_category_names: single str or list of category names
184
+ :param child_ann_category_names: single str or list of category names
185
+ :param parent_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other parent candi-
186
+ dates which are not in the list.
187
+ :param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
188
+ candidates which are not in the list.
189
+ :return:
190
+ """
191
+
192
+ parent_anns = dp.get_annotation(annotation_ids=parent_ann_ids, category_names=parent_ann_category_names)
193
+ child_anns = dp.get_annotation(annotation_ids=child_ann_ids, category_names=child_ann_category_names)
194
+ child_centers = [block.get_bounding_box(dp.image_id).center for block in child_anns]
195
+ parent_centers = [block.get_bounding_box(dp.image_id).center for block in parent_anns]
196
+ child_indices = distance.cdist(parent_centers, child_centers).argmin(axis=1)
197
+ return [(parent_anns[i], child_anns[j]) for i, j in enumerate(child_indices)]
@@ -23,14 +23,14 @@ from __future__ import annotations
23
23
 
24
24
  import ast
25
25
  import os
26
- from typing import List, Mapping, Optional, Sequence, Union
26
+ from typing import Mapping, Optional, Sequence, Union
27
27
 
28
28
  from lazy_imports import try_import
29
29
 
30
30
  from ..datapoint.convert import convert_pdf_bytes_to_np_array_v2
31
31
  from ..datapoint.image import Image
32
- from ..utils.detection_types import JsonDict
33
32
  from ..utils.fs import get_load_image_func, load_image_from_file
33
+ from ..utils.types import JsonDict
34
34
  from ..utils.utils import is_file_extension
35
35
  from .maputils import MappingContextManager, curry
36
36
 
@@ -135,7 +135,7 @@ def maybe_remove_image_from_category(dp: Image, category_names: Optional[Union[s
135
135
  return dp
136
136
 
137
137
 
138
- def image_ann_to_image(dp: Image, category_names: Union[str, List[str]], crop_image: bool = True) -> Image:
138
+ def image_ann_to_image(dp: Image, category_names: Union[str, list[str]], crop_image: bool = True) -> Image:
139
139
  """
140
140
  Adds `image` to annotations with given category names
141
141
 
@@ -145,7 +145,7 @@ def image_ann_to_image(dp: Image, category_names: Union[str, List[str]], crop_im
145
145
  :return: Image
146
146
  """
147
147
 
148
- img_anns = dp.get_annotation_iter(category_names=category_names)
148
+ img_anns = dp.get_annotation(category_names=category_names)
149
149
  for ann in img_anns:
150
150
  dp.image_ann_to_image(annotation_id=ann.annotation_id, crop_image=crop_image)
151
151
 
@@ -154,7 +154,7 @@ def image_ann_to_image(dp: Image, category_names: Union[str, List[str]], crop_im
154
154
 
155
155
  @curry
156
156
  def maybe_ann_to_sub_image(
157
- dp: Image, category_names_sub_image: Union[str, List[str]], category_names: Union[str, List[str]], add_summary: bool
157
+ dp: Image, category_names_sub_image: Union[str, list[str]], category_names: Union[str, list[str]], add_summary: bool
158
158
  ) -> Image:
159
159
  """
160
160
  Assigns to sub image with given category names all annotations with given category names whose bounding box lie
@@ -196,7 +196,6 @@ def xml_to_dict(dp: JsonDict, xslt_obj: etree.XSLT) -> JsonDict:
196
196
  """
197
197
 
198
198
  output = str(xslt_obj(dp["xml"]))
199
- output = ast.literal_eval(output.replace('<?xml version="1.0"?>', ""))
200
199
  dp.pop("xml")
201
- dp["json"] = output
200
+ dp["json"] = ast.literal_eval(output.replace('<?xml version="1.0"?>', ""))
202
201
  return dp
@@ -20,25 +20,25 @@ Module for mapping annotations in iiitar13k style structure
20
20
  """
21
21
 
22
22
  import os
23
- from typing import Dict, Optional
23
+ from typing import Optional
24
24
 
25
25
  from ..datapoint.annotation import ImageAnnotation
26
26
  from ..datapoint.box import BoundingBox
27
27
  from ..datapoint.image import Image
28
- from ..utils.detection_types import JsonDict
29
28
  from ..utils.fs import load_image_from_file
30
29
  from ..utils.settings import get_type
30
+ from ..utils.types import JsonDict
31
31
  from .maputils import MappingContextManager, curry, maybe_get_fake_score
32
32
 
33
33
 
34
34
  @curry
35
35
  def pascal_voc_dict_to_image(
36
36
  dp: JsonDict,
37
- categories_name_as_key: Dict[str, str],
37
+ categories_name_as_key: dict[str, int],
38
38
  load_image: bool,
39
39
  filter_empty_image: bool,
40
40
  fake_score: bool,
41
- category_name_mapping: Optional[Dict[str, str]] = None,
41
+ category_name_mapping: Optional[dict[str, str]] = None,
42
42
  ) -> Optional[Image]:
43
43
  """
44
44
  Map a dataset in a structure equivalent to iiitar13k annotation style to image format
@@ -23,8 +23,8 @@ import os
23
23
  from typing import Mapping, Optional, Sequence
24
24
 
25
25
  from ..datapoint import BoundingBox, Image, ImageAnnotation
26
- from ..utils.detection_types import JsonDict, Pathlike
27
- from ..utils.settings import ObjectTypes
26
+ from ..utils.settings import ObjectTypes, get_type
27
+ from ..utils.types import JsonDict, PathLikeOrStr
28
28
  from .maputils import MappingContextManager, curry, maybe_get_fake_score
29
29
 
30
30
  _PRODIGY_IMAGE_PREFIX = "data:image/png;base64,"
@@ -33,10 +33,10 @@ _PRODIGY_IMAGE_PREFIX = "data:image/png;base64,"
33
33
  @curry
34
34
  def prodigy_to_image(
35
35
  dp: JsonDict,
36
- categories_name_as_key: Mapping[str, str],
36
+ categories_name_as_key: Mapping[ObjectTypes, int],
37
37
  load_image: bool,
38
38
  fake_score: bool,
39
- path_reference_ds: Optional[Pathlike] = None,
39
+ path_reference_ds: Optional[PathLikeOrStr] = None,
40
40
  accept_only_answer: bool = False,
41
41
  category_name_mapping: Optional[Mapping[str, str]] = None,
42
42
  ) -> Optional[Image]:
@@ -133,7 +133,7 @@ def prodigy_to_image(
133
133
  annotation = ImageAnnotation(
134
134
  category_name=label,
135
135
  bounding_box=bbox,
136
- category_id=categories_name_as_key[label],
136
+ category_id=categories_name_as_key[get_type(label)],
137
137
  score=score,
138
138
  external_id=external_id,
139
139
  )
@@ -163,7 +163,7 @@ def image_to_prodigy(dp: Image, category_names: Optional[Sequence[ObjectTypes]]
163
163
  output["image_id"] = dp.image_id
164
164
 
165
165
  spans = []
166
- for ann in dp.get_annotation_iter(category_names=category_names):
166
+ for ann in dp.get_annotation(category_names=category_names):
167
167
  bounding_box = ann.get_bounding_box(dp.image_id)
168
168
  if not bounding_box.absolute_coords:
169
169
  bounding_box = bounding_box.transform(dp.width, dp.height, absolute_coords=True)