deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (111) hide show
  1. deepdoctection/__init__.py +8 -25
  2. deepdoctection/analyzer/dd.py +84 -71
  3. deepdoctection/dataflow/common.py +9 -5
  4. deepdoctection/dataflow/custom.py +5 -5
  5. deepdoctection/dataflow/custom_serialize.py +75 -18
  6. deepdoctection/dataflow/parallel_map.py +3 -3
  7. deepdoctection/dataflow/serialize.py +4 -4
  8. deepdoctection/dataflow/stats.py +3 -3
  9. deepdoctection/datapoint/annotation.py +78 -56
  10. deepdoctection/datapoint/box.py +7 -7
  11. deepdoctection/datapoint/convert.py +6 -6
  12. deepdoctection/datapoint/image.py +157 -75
  13. deepdoctection/datapoint/view.py +175 -151
  14. deepdoctection/datasets/adapter.py +30 -24
  15. deepdoctection/datasets/base.py +10 -10
  16. deepdoctection/datasets/dataflow_builder.py +3 -3
  17. deepdoctection/datasets/info.py +23 -25
  18. deepdoctection/datasets/instances/doclaynet.py +48 -49
  19. deepdoctection/datasets/instances/fintabnet.py +44 -45
  20. deepdoctection/datasets/instances/funsd.py +23 -23
  21. deepdoctection/datasets/instances/iiitar13k.py +8 -8
  22. deepdoctection/datasets/instances/layouttest.py +2 -2
  23. deepdoctection/datasets/instances/publaynet.py +3 -3
  24. deepdoctection/datasets/instances/pubtables1m.py +18 -18
  25. deepdoctection/datasets/instances/pubtabnet.py +30 -29
  26. deepdoctection/datasets/instances/rvlcdip.py +28 -29
  27. deepdoctection/datasets/instances/xfund.py +51 -30
  28. deepdoctection/datasets/save.py +6 -6
  29. deepdoctection/eval/accmetric.py +32 -33
  30. deepdoctection/eval/base.py +8 -9
  31. deepdoctection/eval/cocometric.py +13 -12
  32. deepdoctection/eval/eval.py +32 -26
  33. deepdoctection/eval/tedsmetric.py +16 -12
  34. deepdoctection/eval/tp_eval_callback.py +7 -16
  35. deepdoctection/extern/base.py +339 -134
  36. deepdoctection/extern/d2detect.py +69 -89
  37. deepdoctection/extern/deskew.py +11 -10
  38. deepdoctection/extern/doctrocr.py +81 -64
  39. deepdoctection/extern/fastlang.py +23 -16
  40. deepdoctection/extern/hfdetr.py +53 -38
  41. deepdoctection/extern/hflayoutlm.py +216 -155
  42. deepdoctection/extern/hflm.py +35 -30
  43. deepdoctection/extern/model.py +433 -255
  44. deepdoctection/extern/pdftext.py +15 -15
  45. deepdoctection/extern/pt/ptutils.py +4 -2
  46. deepdoctection/extern/tessocr.py +39 -38
  47. deepdoctection/extern/texocr.py +14 -16
  48. deepdoctection/extern/tp/tfutils.py +16 -2
  49. deepdoctection/extern/tp/tpcompat.py +11 -7
  50. deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
  51. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
  52. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
  53. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
  54. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
  55. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
  56. deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
  57. deepdoctection/extern/tpdetect.py +40 -45
  58. deepdoctection/mapper/cats.py +36 -40
  59. deepdoctection/mapper/cocostruct.py +16 -12
  60. deepdoctection/mapper/d2struct.py +22 -22
  61. deepdoctection/mapper/hfstruct.py +7 -7
  62. deepdoctection/mapper/laylmstruct.py +22 -24
  63. deepdoctection/mapper/maputils.py +9 -10
  64. deepdoctection/mapper/match.py +33 -2
  65. deepdoctection/mapper/misc.py +6 -7
  66. deepdoctection/mapper/pascalstruct.py +4 -4
  67. deepdoctection/mapper/prodigystruct.py +6 -6
  68. deepdoctection/mapper/pubstruct.py +84 -92
  69. deepdoctection/mapper/tpstruct.py +3 -3
  70. deepdoctection/mapper/xfundstruct.py +33 -33
  71. deepdoctection/pipe/anngen.py +39 -14
  72. deepdoctection/pipe/base.py +68 -99
  73. deepdoctection/pipe/common.py +181 -85
  74. deepdoctection/pipe/concurrency.py +14 -10
  75. deepdoctection/pipe/doctectionpipe.py +24 -21
  76. deepdoctection/pipe/language.py +20 -25
  77. deepdoctection/pipe/layout.py +18 -16
  78. deepdoctection/pipe/lm.py +49 -47
  79. deepdoctection/pipe/order.py +63 -65
  80. deepdoctection/pipe/refine.py +102 -109
  81. deepdoctection/pipe/segment.py +157 -162
  82. deepdoctection/pipe/sub_layout.py +50 -40
  83. deepdoctection/pipe/text.py +37 -36
  84. deepdoctection/pipe/transform.py +19 -16
  85. deepdoctection/train/d2_frcnn_train.py +27 -25
  86. deepdoctection/train/hf_detr_train.py +22 -18
  87. deepdoctection/train/hf_layoutlm_train.py +49 -48
  88. deepdoctection/train/tp_frcnn_train.py +10 -11
  89. deepdoctection/utils/concurrency.py +1 -1
  90. deepdoctection/utils/context.py +13 -6
  91. deepdoctection/utils/develop.py +4 -4
  92. deepdoctection/utils/env_info.py +52 -14
  93. deepdoctection/utils/file_utils.py +6 -11
  94. deepdoctection/utils/fs.py +41 -14
  95. deepdoctection/utils/identifier.py +2 -2
  96. deepdoctection/utils/logger.py +15 -15
  97. deepdoctection/utils/metacfg.py +7 -7
  98. deepdoctection/utils/pdf_utils.py +39 -14
  99. deepdoctection/utils/settings.py +188 -182
  100. deepdoctection/utils/tqdm.py +1 -1
  101. deepdoctection/utils/transform.py +14 -9
  102. deepdoctection/utils/types.py +104 -0
  103. deepdoctection/utils/utils.py +7 -7
  104. deepdoctection/utils/viz.py +70 -69
  105. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
  106. deepdoctection-0.34.dist-info/RECORD +146 -0
  107. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
  108. deepdoctection/utils/detection_types.py +0 -68
  109. deepdoctection-0.32.dist-info/RECORD +0 -146
  110. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
  111. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0
@@ -19,23 +19,25 @@
19
19
  Methods that convert incoming data to dataflows.
20
20
  """
21
21
 
22
+ from __future__ import annotations
23
+
22
24
  import itertools
23
25
  import json
24
26
  import os
25
27
  from collections import defaultdict
26
28
  from pathlib import Path
27
- from typing import DefaultDict, Dict, List, Optional, Sequence, Union
29
+ from typing import Any, DefaultDict, Dict, Iterator, List, Optional, Sequence, TextIO, Union
28
30
 
29
31
  from jsonlines import Reader, Writer
30
32
  from tabulate import tabulate
31
33
  from termcolor import colored
32
34
 
33
35
  from ..utils.context import timed_operation
34
- from ..utils.detection_types import JsonDict, Pathlike
35
36
  from ..utils.error import FileExtensionError
36
37
  from ..utils.identifier import get_uuid_from_str
37
38
  from ..utils.pdf_utils import PDFStreamer
38
39
  from ..utils.tqdm import get_tqdm
40
+ from ..utils.types import JsonDict, PathLikeOrStr
39
41
  from ..utils.utils import is_file_extension
40
42
  from .base import DataFlow
41
43
  from .common import FlattenData, JoinData, MapData
@@ -53,6 +55,59 @@ def _reset_df_and_get_length(df: DataFlow) -> int:
53
55
  return length
54
56
 
55
57
 
58
+ class FileClosingIterator:
59
+ """
60
+ A custom iterator that closes the file object once the iteration is complete.
61
+
62
+ This iterator is used to ensure that the file object is properly closed after
63
+ reading the data from it. It is used in the context of reading data from a file
64
+ in a streaming manner, where the data is not loaded into memory all at once.
65
+
66
+ **Example:**
67
+
68
+ file = open(path, "r")
69
+ iterator = Reader(file)
70
+ closing_iterator = FileClosingIterator(file, iter(iterator))
71
+
72
+ df = CustomDataFromIterable(closing_iterator, max_datapoints=max_datapoints) # set up a dataflow
73
+
74
+ """
75
+
76
+ def __init__(self, file_obj: TextIO, iterator: Iterator[Any]):
77
+ """
78
+ Initializes the FileClosingIterator with a file object and its iterator.
79
+
80
+ :param file_obj (TextIO): The file object to read data from.
81
+ :param iterator (Iterator): The actual iterator of the file object.
82
+ """
83
+ self.file_obj = file_obj
84
+ self.iterator = iterator
85
+
86
+ def __iter__(self) -> FileClosingIterator:
87
+ """
88
+ Returns the iterator object itself.
89
+
90
+ :return: FileClosingIterator: The instance of the class itself.
91
+ """
92
+ return self
93
+
94
+ def __next__(self) -> Any:
95
+ """
96
+ Returns the next item from the file object's iterator.
97
+ Closes the file object if the iteration is finished.
98
+
99
+ :return: The next item from the file object's iterator.
100
+
101
+ Raises:
102
+ StopIteration: If there are no more items to return.
103
+ """
104
+ try:
105
+ return next(self.iterator)
106
+ except StopIteration as exc:
107
+ self.file_obj.close()
108
+ raise StopIteration from exc
109
+
110
+
56
111
  class SerializerJsonlines:
57
112
  """
58
113
  Serialize a dataflow from a jsonlines file. Alternatively, save a dataflow of JSON objects to a .jsonl file.
@@ -66,7 +121,7 @@ class SerializerJsonlines:
66
121
  """
67
122
 
68
123
  @staticmethod
69
- def load(path: Pathlike, max_datapoints: Optional[int] = None) -> CustomDataFromIterable:
124
+ def load(path: PathLikeOrStr, max_datapoints: Optional[int] = None) -> CustomDataFromIterable:
70
125
  """
71
126
  :param path: a path to a .jsonl file.
72
127
  :param max_datapoints: Will stop the iteration once max_datapoints have been streamed
@@ -75,10 +130,11 @@ class SerializerJsonlines:
75
130
  """
76
131
  file = open(path, "r") # pylint: disable=W1514,R1732
77
132
  iterator = Reader(file)
78
- return CustomDataFromIterable(iterator, max_datapoints=max_datapoints)
133
+ closing_iterator = FileClosingIterator(file, iter(iterator))
134
+ return CustomDataFromIterable(closing_iterator, max_datapoints=max_datapoints)
79
135
 
80
136
  @staticmethod
81
- def save(df: DataFlow, path: Pathlike, file_name: str, max_datapoints: Optional[int] = None) -> None:
137
+ def save(df: DataFlow, path: PathLikeOrStr, file_name: str, max_datapoints: Optional[int] = None) -> None:
82
138
  """
83
139
  Writes a dataflow iteratively to a .jsonl file. Every datapoint must be a dict where all items are serializable.
84
140
  As the length of the dataflow cannot be determined in every case max_datapoint prevents generating an
@@ -120,7 +176,7 @@ class SerializerTabsepFiles:
120
176
  """
121
177
 
122
178
  @staticmethod
123
- def load(path: Pathlike, max_datapoins: Optional[int] = None) -> CustomDataFromList:
179
+ def load(path: PathLikeOrStr, max_datapoins: Optional[int] = None) -> CustomDataFromList:
124
180
  """
125
181
  :param path: a path to a .txt file.
126
182
  :param max_datapoins: Will stop the iteration once max_datapoints have been streamed
@@ -133,7 +189,7 @@ class SerializerTabsepFiles:
133
189
  return CustomDataFromList(file_list, max_datapoints=max_datapoins)
134
190
 
135
191
  @staticmethod
136
- def save(df: DataFlow, path: Pathlike, file_name: str, max_datapoints: Optional[int] = None) -> None:
192
+ def save(df: DataFlow, path: PathLikeOrStr, file_name: str, max_datapoints: Optional[int] = None) -> None:
137
193
  """
138
194
  Writes a dataflow iteratively to a .txt file. Every datapoint must be a string.
139
195
  As the length of the dataflow cannot be determined in every case max_datapoint prevents generating an
@@ -168,7 +224,7 @@ class SerializerFiles:
168
224
 
169
225
  @staticmethod
170
226
  def load(
171
- path: Pathlike,
227
+ path: PathLikeOrStr,
172
228
  file_type: Union[str, Sequence[str]],
173
229
  max_datapoints: Optional[int] = None,
174
230
  shuffle: Optional[bool] = False,
@@ -190,15 +246,14 @@ class SerializerFiles:
190
246
  df2: DataFlow
191
247
  df3: DataFlow
192
248
 
193
- if isinstance(path, str):
194
- path = Path(path)
249
+ path = Path(path)
195
250
  if not path.exists():
196
251
  raise NotADirectoryError(f"The path {path} to the directory or file does not exist")
197
252
 
198
253
  if shuffle:
199
254
  sort = False
200
- it1 = os.walk(path, topdown=False)
201
- it2 = os.walk(path, topdown=False)
255
+ it1 = os.walk(os.fspath(path), topdown=False)
256
+ it2 = os.walk(os.fspath(path), topdown=False)
202
257
  df1 = CustomDataFromIterable(it1)
203
258
  df2 = CustomDataFromIterable(it2)
204
259
  df1 = MapData(df1, lambda dp: None if len(dp[2]) == 0 else dp)
@@ -237,7 +292,7 @@ class CocoParser:
237
292
  :param annotation_file: location of annotation file
238
293
  """
239
294
 
240
- def __init__(self, annotation_file: Optional[Pathlike] = None) -> None:
295
+ def __init__(self, annotation_file: Optional[PathLikeOrStr] = None) -> None:
241
296
  self.dataset: JsonDict = {}
242
297
  self.anns: Dict[int, JsonDict] = {}
243
298
  self.cats: Dict[int, JsonDict] = {}
@@ -465,7 +520,7 @@ class SerializerCoco:
465
520
  """
466
521
 
467
522
  @staticmethod
468
- def load(path: Pathlike, max_datapoints: Optional[int] = None) -> DataFlow:
523
+ def load(path: PathLikeOrStr, max_datapoints: Optional[int] = None) -> DataFlow:
469
524
  """
470
525
  Loads a .json file and generates a dataflow.
471
526
 
@@ -478,7 +533,7 @@ class SerializerCoco:
478
533
 
479
534
  {'image':{'id',...},'annotations':[{'id':…,'bbox':...}]}
480
535
 
481
- for each single image id.
536
+ for each image id. We use the type hint CocoDatapointDict to describe this dictionary
482
537
 
483
538
  :param max_datapoints: Will stop the iteration once max_datapoints have been streamed.
484
539
  :param path: a path to a .json file.
@@ -525,7 +580,7 @@ class SerializerPdfDoc:
525
580
  """
526
581
 
527
582
  @staticmethod
528
- def load(path: Pathlike, max_datapoints: Optional[int] = None) -> DataFlow:
583
+ def load(path: PathLikeOrStr, max_datapoints: Optional[int] = None) -> DataFlow:
529
584
  """
530
585
  Loads the document page wise and returns a dataflow accordingly.
531
586
 
@@ -552,14 +607,16 @@ class SerializerPdfDoc:
552
607
  return df
553
608
 
554
609
  @staticmethod
555
- def save(path: Pathlike) -> None:
610
+ def save(path: PathLikeOrStr) -> None:
556
611
  """
557
612
  Not implemented
558
613
  """
559
614
  raise NotImplementedError()
560
615
 
561
616
  @staticmethod
562
- def split(path: Pathlike, path_target: Optional[Pathlike] = None, max_datapoint: Optional[int] = None) -> None:
617
+ def split(
618
+ path: PathLikeOrStr, path_target: Optional[PathLikeOrStr] = None, max_datapoint: Optional[int] = None
619
+ ) -> None:
563
620
  """
564
621
  Split a document into single pages.
565
622
  """
@@ -23,7 +23,7 @@ import uuid
23
23
  import weakref
24
24
  from abc import ABC, abstractmethod
25
25
  from contextlib import contextmanager
26
- from typing import Any, Callable, Iterator, List, no_type_check
26
+ from typing import Any, Callable, Iterator, no_type_check
27
27
 
28
28
  import zmq
29
29
 
@@ -236,7 +236,7 @@ class MultiThreadMapData(_ParallelMapData):
236
236
  self._strict = strict
237
237
  self.num_thread = num_thread
238
238
  self.map_func = map_func
239
- self._threads: List[Any] = []
239
+ self._threads: list[Any] = []
240
240
  self._evt = None
241
241
 
242
242
  def reset_state(self) -> None:
@@ -284,7 +284,7 @@ class _MultiProcessZMQDataFlow(DataFlow, ABC):
284
284
  if os.name == "nt":
285
285
  raise EnvironmentError("ZMQ IPC doesn't support windows")
286
286
  self._reset_done = False
287
- self._procs: List[Any] = []
287
+ self._procs: list[Any] = []
288
288
  self.context = None
289
289
  self.socket = None
290
290
 
@@ -12,7 +12,7 @@ Some DataFlow classes for serialization. Many classes have been taken from
12
12
 
13
13
  import pickle
14
14
  from copy import copy
15
- from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union
15
+ from typing import Any, Iterable, Iterator, Optional, Union
16
16
 
17
17
  import numpy as np
18
18
 
@@ -23,7 +23,7 @@ from .base import DataFlow, RNGDataFlow
23
23
  class DataFromList(RNGDataFlow):
24
24
  """Wrap a list of datapoints to a DataFlow"""
25
25
 
26
- def __init__(self, lst: List[Any], shuffle: bool = True) -> None:
26
+ def __init__(self, lst: list[Any], shuffle: bool = True) -> None:
27
27
  """
28
28
  :param lst: input list. Each element is a datapoint.
29
29
  :param shuffle: shuffle data.
@@ -79,11 +79,11 @@ class FakeData(RNGDataFlow):
79
79
 
80
80
  def __init__(
81
81
  self,
82
- shapes: List[Union[List[Any], Tuple[Any]]],
82
+ shapes: list[Union[list[Any], tuple[Any]]],
83
83
  size: int = 1000,
84
84
  random: bool = True,
85
85
  dtype: str = "float32",
86
- domain: Tuple[Union[float, int], Union[float, int]] = (0, 1),
86
+ domain: tuple[Union[float, int], Union[float, int]] = (0, 1),
87
87
  ):
88
88
  """
89
89
  :param shapes: a list of lists/tuples. Shapes of each component.
@@ -18,7 +18,7 @@
18
18
  """
19
19
  Dataflows for calculating statistical values of the underlying dataset
20
20
  """
21
- from typing import Any, Optional, Tuple, Union
21
+ from typing import Any, Optional, Union
22
22
 
23
23
  import numpy as np
24
24
  import numpy.typing as npt
@@ -45,7 +45,7 @@ class MeanFromDataFlow(ProxyDataFlow):
45
45
  def __init__(
46
46
  self,
47
47
  df: DataFlow,
48
- axis: Optional[Union[int, Tuple[int], Tuple[int, int], Tuple[int, int, int]]] = None,
48
+ axis: Optional[Union[int, tuple[int], tuple[int, int], tuple[int, int, int]]] = None,
49
49
  key: Optional[str] = None,
50
50
  max_datapoints: Optional[int] = None,
51
51
  ):
@@ -165,7 +165,7 @@ class StdFromDataFlow(ProxyDataFlow):
165
165
  def __init__(
166
166
  self,
167
167
  df: DataFlow,
168
- axis: Optional[Union[int, Tuple[int], Tuple[int, int], Tuple[int, int, int]]] = None,
168
+ axis: Optional[Union[int, tuple[int], tuple[int, int], tuple[int, int, int]]] = None,
169
169
  key: Optional[str] = None,
170
170
  max_datapoints: Optional[int] = None,
171
171
  ):
@@ -18,34 +18,39 @@
18
18
  """
19
19
  Dataclass for annotations and their derived classes.
20
20
  """
21
+ from __future__ import annotations
21
22
 
22
23
  from abc import ABC, abstractmethod
24
+ from collections import defaultdict
23
25
  from dataclasses import dataclass, field
24
- from typing import Any, Dict, List, Optional, Union, no_type_check
26
+ from typing import Optional, Union, no_type_check
25
27
 
26
- from ..utils.detection_types import JsonDict
27
28
  from ..utils.error import AnnotationError, UUIDError
28
29
  from ..utils.identifier import get_uuid, is_uuid_like
29
30
  from ..utils.logger import LoggingRecord, logger
30
- from ..utils.settings import DefaultType, ObjectTypes, SummaryType, TypeOrStr, get_type
31
+ from ..utils.settings import DefaultType, ObjectTypes, TypeOrStr, get_type
32
+ from ..utils.types import AnnotationDict
31
33
  from .box import BoundingBox
32
34
  from .convert import as_dict
33
35
 
34
36
 
35
37
  @no_type_check
36
- def ann_from_dict(cls, **kwargs):
38
+ def ann_from_dict(cls, **kwargs: AnnotationDict):
37
39
  """
38
40
  A factory function to create subclasses of annotations from a given dict
39
41
  """
40
42
  _init_kwargs = {
41
43
  "external_id": kwargs.get("external_id"),
42
44
  "category_name": kwargs.get("category_name"),
43
- "category_id": kwargs.get("category_id"),
45
+ "category_id": kwargs.get("category_id", DEFAULT_CATEGORY_ID),
44
46
  "score": kwargs.get("score"),
45
47
  "service_id": kwargs.get("service_id"),
46
48
  "model_id": kwargs.get("model_id"),
47
49
  "session_id": kwargs.get("session_id"),
48
50
  }
51
+ _init_kwargs["category_id"] = (
52
+ int(_init_kwargs["category_id"]) if (_init_kwargs)["category_id"] not in ("None", "") else DEFAULT_CATEGORY_ID
53
+ )
49
54
  ann = cls(**_init_kwargs)
50
55
  ann.active = kwargs.get("active")
51
56
  ann._annotation_id = kwargs.get("_annotation_id") # pylint: disable=W0212
@@ -62,6 +67,16 @@ def ann_from_dict(cls, **kwargs):
62
67
  return ann
63
68
 
64
69
 
70
+ @dataclass(frozen=True)
71
+ class AnnotationMap:
72
+ """AnnotationMap to store all sub categories, relationship keys and summary keys of an annotation"""
73
+
74
+ image_annotation_id: str
75
+ sub_category_key: Optional[ObjectTypes] = None
76
+ relationship_key: Optional[ObjectTypes] = None
77
+ summary_key: Optional[ObjectTypes] = None
78
+
79
+
65
80
  @dataclass
66
81
  class Annotation(ABC):
67
82
  """
@@ -134,7 +149,7 @@ class Annotation(ABC):
134
149
  raise AnnotationError("Annotation_id must be uuid3 string")
135
150
 
136
151
  @abstractmethod
137
- def get_defining_attributes(self) -> List[str]:
152
+ def get_defining_attributes(self) -> list[str]:
138
153
  """
139
154
  Defining attributes of an annotation instance are attributes, of which you think that they uniquely
140
155
  describe the annotation object. If you do not provide an external id, only the defining attributes will be used
@@ -151,7 +166,7 @@ class Annotation(ABC):
151
166
  raise AnnotationError(f"Attribute {attr} must have __str__ method")
152
167
 
153
168
  @staticmethod
154
- def set_annotation_id(annotation: "CategoryAnnotation", *container_id_context: Optional[str]) -> str:
169
+ def set_annotation_id(annotation: CategoryAnnotation, *container_id_context: Optional[str]) -> str:
155
170
  """
156
171
  Defines the `annotation_id` by attributes of the annotation class as well as by external parameters given by a
157
172
  tuple or list of container id contexts.
@@ -167,7 +182,7 @@ class Annotation(ABC):
167
182
  attributes_values = [str(getattr(annotation, attribute)) for attribute in attributes]
168
183
  return get_uuid(*attributes_values, *container_id_context) # type: ignore
169
184
 
170
- def as_dict(self) -> Dict[str, Any]:
185
+ def as_dict(self) -> AnnotationDict:
171
186
  """
172
187
  Returning the full dataclass as dict. Uses the custom `convert.as_dict` to disregard attributes defined by
173
188
  `remove_keys`.
@@ -187,7 +202,7 @@ class Annotation(ABC):
187
202
 
188
203
  @classmethod
189
204
  @abstractmethod
190
- def from_dict(cls, **kwargs: JsonDict) -> "Annotation":
205
+ def from_dict(cls, **kwargs: AnnotationDict) -> Annotation:
191
206
  """
192
207
  Method to initialize a derived class from dict.
193
208
 
@@ -199,7 +214,7 @@ class Annotation(ABC):
199
214
 
200
215
  @staticmethod
201
216
  @abstractmethod
202
- def get_state_attributes() -> List[str]:
217
+ def get_state_attributes() -> list[str]:
203
218
  """
204
219
  Similar to `get_defining_attributes` but for `state_id`
205
220
 
@@ -242,6 +257,9 @@ class Annotation(ABC):
242
257
  return get_uuid(self.annotation_id, *container_ids)
243
258
 
244
259
 
260
+ DEFAULT_CATEGORY_ID = -1
261
+
262
+
245
263
  @dataclass
246
264
  class CategoryAnnotation(Annotation):
247
265
  """
@@ -268,12 +286,12 @@ class CategoryAnnotation(Annotation):
268
286
  `dump_relationship` instead.
269
287
  """
270
288
 
271
- category_name: TypeOrStr = field(default=DefaultType.default_type)
272
- _category_name: ObjectTypes = field(default=DefaultType.default_type, init=False)
273
- category_id: str = field(default="")
289
+ category_name: TypeOrStr = field(default=DefaultType.DEFAULT_TYPE)
290
+ _category_name: ObjectTypes = field(default=DefaultType.DEFAULT_TYPE, init=False)
291
+ category_id: int = field(default=DEFAULT_CATEGORY_ID)
274
292
  score: Optional[float] = field(default=None)
275
- sub_categories: Dict[ObjectTypes, "CategoryAnnotation"] = field(default_factory=dict, init=False, repr=True)
276
- relationships: Dict[ObjectTypes, List[str]] = field(default_factory=dict, init=False, repr=True)
293
+ sub_categories: dict[ObjectTypes, CategoryAnnotation] = field(default_factory=dict, init=False, repr=True)
294
+ relationships: dict[ObjectTypes, list[str]] = field(default_factory=dict, init=False, repr=True)
277
295
 
278
296
  @property # type: ignore
279
297
  def category_name(self) -> ObjectTypes:
@@ -287,13 +305,11 @@ class CategoryAnnotation(Annotation):
287
305
  self._category_name = get_type(category_name)
288
306
 
289
307
  def __post_init__(self) -> None:
290
- self.category_id = str(self.category_id)
291
- assert self.category_name
292
308
  self._assert_attributes_have_str(state_id=True)
293
309
  super().__post_init__()
294
310
 
295
311
  def dump_sub_category(
296
- self, sub_category_name: TypeOrStr, annotation: "CategoryAnnotation", *container_id_context: Optional[str]
312
+ self, sub_category_name: TypeOrStr, annotation: CategoryAnnotation, *container_id_context: Optional[str]
297
313
  ) -> None:
298
314
  """
299
315
  Storage of sub-categories. As sub-categories usually only depend on very few attributes and the parent
@@ -324,7 +340,7 @@ class CategoryAnnotation(Annotation):
324
340
  )
325
341
  self.sub_categories[get_type(sub_category_name)] = annotation
326
342
 
327
- def get_sub_category(self, sub_category_name: ObjectTypes) -> "CategoryAnnotation":
343
+ def get_sub_category(self, sub_category_name: ObjectTypes) -> CategoryAnnotation:
328
344
  """
329
345
  Return a sub category by its key.
330
346
 
@@ -362,7 +378,7 @@ class CategoryAnnotation(Annotation):
362
378
  if annotation_id not in self.relationships[key_type]:
363
379
  self.relationships[key_type].append(annotation_id)
364
380
 
365
- def get_relationship(self, key: ObjectTypes) -> List[str]:
381
+ def get_relationship(self, key: ObjectTypes) -> list[str]:
366
382
  """
367
383
  Returns a list of annotation ids stored with a given relationship key.
368
384
 
@@ -373,7 +389,7 @@ class CategoryAnnotation(Annotation):
373
389
  return self.relationships[key]
374
390
  return []
375
391
 
376
- def remove_relationship(self, key: ObjectTypes, annotation_ids: Optional[Union[List[str], str]] = None) -> None:
392
+ def remove_relationship(self, key: ObjectTypes, annotation_ids: Optional[Union[list[str], str]] = None) -> None:
377
393
  """
378
394
  Remove relationship by some given keys and ids. If no annotation ids are provided all relationship according
379
395
  to the key will be removed.
@@ -392,27 +408,28 @@ class CategoryAnnotation(Annotation):
392
408
  except ValueError:
393
409
  logger.warning(LoggingRecord(f"Relationship {key} cannot be removed because it does not exist"))
394
410
  else:
395
- self.relationships[key].clear()
411
+ if key in self.relationships:
412
+ self.relationships[key].clear()
396
413
 
397
- def get_defining_attributes(self) -> List[str]:
414
+ def get_defining_attributes(self) -> list[str]:
398
415
  return ["category_name", "category_id"]
399
416
 
400
417
  @staticmethod
401
- def remove_keys() -> List[str]:
418
+ def remove_keys() -> list[str]:
402
419
  """
403
420
  A list of attributes to suspend from as_dict creation.
404
421
 
405
- :return: List of attributes.
422
+ :return: list of attributes.
406
423
  """
407
- return []
424
+ return ["_category_name"]
408
425
 
409
426
  @classmethod
410
- def from_dict(cls, **kwargs: JsonDict) -> "CategoryAnnotation":
427
+ def from_dict(cls, **kwargs: AnnotationDict) -> CategoryAnnotation:
411
428
  category_ann = ann_from_dict(cls, **kwargs)
412
429
  return category_ann
413
430
 
414
431
  @staticmethod
415
- def get_state_attributes() -> List[str]:
432
+ def get_state_attributes() -> list[str]:
416
433
  return ["active", "sub_categories", "relationships"]
417
434
 
418
435
 
@@ -432,20 +449,20 @@ class ImageAnnotation(CategoryAnnotation):
432
449
  """
433
450
 
434
451
  bounding_box: Optional[BoundingBox] = field(default=None)
435
- image: Optional["Image"] = field(default=None, init=False, repr=False) # type: ignore
452
+ image: Optional[Image] = field(default=None, init=False, repr=False) # type: ignore # pylint: disable=E0602
436
453
 
437
- def get_defining_attributes(self) -> List[str]:
454
+ def get_defining_attributes(self) -> list[str]:
438
455
  return ["category_name", "bounding_box"]
439
456
 
440
457
  @classmethod
441
- def from_dict(cls, **kwargs: JsonDict) -> "ImageAnnotation":
458
+ def from_dict(cls, **kwargs: AnnotationDict) -> ImageAnnotation:
442
459
  image_ann = ann_from_dict(cls, **kwargs)
443
460
  if box_kwargs := kwargs.get("bounding_box"):
444
461
  image_ann.bounding_box = BoundingBox.from_dict(**box_kwargs)
445
462
  return image_ann
446
463
 
447
464
  @staticmethod
448
- def get_state_attributes() -> List[str]:
465
+ def get_state_attributes() -> list[str]:
449
466
  return ["active", "sub_categories", "relationships", "image"]
450
467
 
451
468
  def get_bounding_box(self, image_id: Optional[str] = None) -> BoundingBox:
@@ -462,29 +479,34 @@ class ImageAnnotation(CategoryAnnotation):
462
479
  def get_summary(self, key: ObjectTypes) -> CategoryAnnotation:
463
480
  """Get summary sub categories from `image`. Raises `ValueError` if `key` is not available"""
464
481
  if self.image:
465
- if self.image.summary:
466
- return self.image.summary.get_sub_category(key)
482
+ return self.image.summary.get_sub_category(key)
467
483
  raise AnnotationError(f"Summary does not exist for {self.annotation_id} and key: {key}")
468
484
 
469
-
470
- @dataclass
471
- class SummaryAnnotation(CategoryAnnotation):
472
- """
473
- A dataclass for adding summaries. The various summaries can be stored as sub categories.
474
-
475
- Summary annotations should be stored in the attribute provided: `image.Image.summary` and should not be
476
- dumped as a category.
477
- """
478
-
479
- def __post_init__(self) -> None:
480
- self._category_name = SummaryType.summary
481
- super().__post_init__()
482
-
483
- @classmethod
484
- def from_dict(cls, **kwargs: JsonDict) -> "SummaryAnnotation":
485
- summary_ann = ann_from_dict(cls, **kwargs)
486
- summary_ann.category_name = SummaryType.summary
487
- return summary_ann
485
+ def get_annotation_map(self) -> defaultdict[str, list[AnnotationMap]]:
486
+ """
487
+ Returns a defaultdict with annotation ids as keys and a list of AnnotationMap instances as values for all sub
488
+ categories, relationships and image summaries.
489
+ :return: defaultdict with annotation ids as keys and a list of AnnotationMap instances as values.
490
+ """
491
+ annotation_id_dict = defaultdict(list)
492
+ annotation_id_dict[self.annotation_id].append(AnnotationMap(image_annotation_id=self.annotation_id))
493
+ for sub_cat_key in self.sub_categories:
494
+ sub_cat = self.get_sub_category(sub_cat_key)
495
+ annotation_id_dict[sub_cat.annotation_id].append(
496
+ AnnotationMap(image_annotation_id=self.annotation_id, sub_category_key=sub_cat_key)
497
+ )
498
+ if self.image is not None:
499
+ for summary_cat_key in self.image.summary.sub_categories:
500
+ summary_cat = self.get_summary(summary_cat_key)
501
+ annotation_id_dict[summary_cat.annotation_id].append(
502
+ AnnotationMap(image_annotation_id=self.annotation_id, summary_key=summary_cat_key)
503
+ )
504
+ for rel_key in self.relationships:
505
+ for rel_ann_ids in self.get_relationship(rel_key):
506
+ annotation_id_dict[rel_ann_ids].append(
507
+ AnnotationMap(image_annotation_id=self.annotation_id, relationship_key=rel_key)
508
+ )
509
+ return annotation_id_dict
488
510
 
489
511
 
490
512
  @dataclass
@@ -496,13 +518,13 @@ class ContainerAnnotation(CategoryAnnotation):
496
518
  value: Attribute to store the value. Use strings.
497
519
  """
498
520
 
499
- value: Optional[Union[List[str], str]] = field(default=None)
521
+ value: Optional[Union[list[str], str]] = field(default=None)
500
522
 
501
- def get_defining_attributes(self) -> List[str]:
523
+ def get_defining_attributes(self) -> list[str]:
502
524
  return ["category_name", "value"]
503
525
 
504
526
  @classmethod
505
- def from_dict(cls, **kwargs: JsonDict) -> "SummaryAnnotation":
527
+ def from_dict(cls, **kwargs: AnnotationDict) -> ContainerAnnotation:
506
528
  container_ann = ann_from_dict(cls, **kwargs)
507
529
  value = kwargs.get("value", "")
508
530
  container_ann.value = value if isinstance(value, str) else list(value)
@@ -21,17 +21,17 @@ Implementation of BoundingBox class and related methods
21
21
 
22
22
  from dataclasses import dataclass
23
23
  from math import ceil, floor
24
- from typing import List, Optional, Sequence, no_type_check
24
+ from typing import Optional, Sequence, no_type_check
25
25
 
26
26
  import numpy as np
27
27
  import numpy.typing as npt
28
28
  from lazy_imports import try_import
29
29
  from numpy import float32
30
30
 
31
- from ..utils.detection_types import ImageType
32
31
  from ..utils.error import BoundingBoxError
33
32
  from ..utils.file_utils import cocotools_available
34
33
  from ..utils.logger import LoggingRecord, logger
34
+ from ..utils.types import PixelValues
35
35
 
36
36
  with try_import() as import_guard:
37
37
  import pycocotools.mask as coco_mask
@@ -221,7 +221,7 @@ class BoundingBox:
221
221
  return self.uly + 0.5 * self.height
222
222
 
223
223
  @property
224
- def center(self) -> List[float]:
224
+ def center(self) -> list[float]:
225
225
  """
226
226
  Bounding box center [x,y]
227
227
  """
@@ -264,7 +264,7 @@ class BoundingBox:
264
264
  * np_poly_scale
265
265
  )
266
266
 
267
- def to_list(self, mode: str, scale_x: float = 1.0, scale_y: float = 1.0) -> List[float]:
267
+ def to_list(self, mode: str, scale_x: float = 1.0, scale_y: float = 1.0) -> list[float]:
268
268
  """
269
269
  Returns the coordinates as list
270
270
 
@@ -345,7 +345,7 @@ class BoundingBox:
345
345
  return f"Bounding Box ulx: {self.ulx}, uly: {self.uly}, lrx: {self.lrx}, lry: {self.lry}"
346
346
 
347
347
  @staticmethod
348
- def remove_keys() -> List[str]:
348
+ def remove_keys() -> list[str]:
349
349
  """
350
350
  A list of attributes to suspend from as_dict creation.
351
351
  """
@@ -398,8 +398,8 @@ def intersection_box(
398
398
 
399
399
 
400
400
  def crop_box_from_image(
401
- np_image: ImageType, crop_box: BoundingBox, width: Optional[float] = None, height: Optional[float] = None
402
- ) -> ImageType:
401
+ np_image: PixelValues, crop_box: BoundingBox, width: Optional[float] = None, height: Optional[float] = None
402
+ ) -> PixelValues:
403
403
  """
404
404
  Crop a box (the crop_box) from a np_image. Will floor the left and ceil the right coordinate point.
405
405