deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (111) hide show
  1. deepdoctection/__init__.py +8 -25
  2. deepdoctection/analyzer/dd.py +84 -71
  3. deepdoctection/dataflow/common.py +9 -5
  4. deepdoctection/dataflow/custom.py +5 -5
  5. deepdoctection/dataflow/custom_serialize.py +75 -18
  6. deepdoctection/dataflow/parallel_map.py +3 -3
  7. deepdoctection/dataflow/serialize.py +4 -4
  8. deepdoctection/dataflow/stats.py +3 -3
  9. deepdoctection/datapoint/annotation.py +78 -56
  10. deepdoctection/datapoint/box.py +7 -7
  11. deepdoctection/datapoint/convert.py +6 -6
  12. deepdoctection/datapoint/image.py +157 -75
  13. deepdoctection/datapoint/view.py +175 -151
  14. deepdoctection/datasets/adapter.py +30 -24
  15. deepdoctection/datasets/base.py +10 -10
  16. deepdoctection/datasets/dataflow_builder.py +3 -3
  17. deepdoctection/datasets/info.py +23 -25
  18. deepdoctection/datasets/instances/doclaynet.py +48 -49
  19. deepdoctection/datasets/instances/fintabnet.py +44 -45
  20. deepdoctection/datasets/instances/funsd.py +23 -23
  21. deepdoctection/datasets/instances/iiitar13k.py +8 -8
  22. deepdoctection/datasets/instances/layouttest.py +2 -2
  23. deepdoctection/datasets/instances/publaynet.py +3 -3
  24. deepdoctection/datasets/instances/pubtables1m.py +18 -18
  25. deepdoctection/datasets/instances/pubtabnet.py +30 -29
  26. deepdoctection/datasets/instances/rvlcdip.py +28 -29
  27. deepdoctection/datasets/instances/xfund.py +51 -30
  28. deepdoctection/datasets/save.py +6 -6
  29. deepdoctection/eval/accmetric.py +32 -33
  30. deepdoctection/eval/base.py +8 -9
  31. deepdoctection/eval/cocometric.py +13 -12
  32. deepdoctection/eval/eval.py +32 -26
  33. deepdoctection/eval/tedsmetric.py +16 -12
  34. deepdoctection/eval/tp_eval_callback.py +7 -16
  35. deepdoctection/extern/base.py +339 -134
  36. deepdoctection/extern/d2detect.py +69 -89
  37. deepdoctection/extern/deskew.py +11 -10
  38. deepdoctection/extern/doctrocr.py +81 -64
  39. deepdoctection/extern/fastlang.py +23 -16
  40. deepdoctection/extern/hfdetr.py +53 -38
  41. deepdoctection/extern/hflayoutlm.py +216 -155
  42. deepdoctection/extern/hflm.py +35 -30
  43. deepdoctection/extern/model.py +433 -255
  44. deepdoctection/extern/pdftext.py +15 -15
  45. deepdoctection/extern/pt/ptutils.py +4 -2
  46. deepdoctection/extern/tessocr.py +39 -38
  47. deepdoctection/extern/texocr.py +14 -16
  48. deepdoctection/extern/tp/tfutils.py +16 -2
  49. deepdoctection/extern/tp/tpcompat.py +11 -7
  50. deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
  51. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
  52. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
  53. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
  54. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
  55. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
  56. deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
  57. deepdoctection/extern/tpdetect.py +40 -45
  58. deepdoctection/mapper/cats.py +36 -40
  59. deepdoctection/mapper/cocostruct.py +16 -12
  60. deepdoctection/mapper/d2struct.py +22 -22
  61. deepdoctection/mapper/hfstruct.py +7 -7
  62. deepdoctection/mapper/laylmstruct.py +22 -24
  63. deepdoctection/mapper/maputils.py +9 -10
  64. deepdoctection/mapper/match.py +33 -2
  65. deepdoctection/mapper/misc.py +6 -7
  66. deepdoctection/mapper/pascalstruct.py +4 -4
  67. deepdoctection/mapper/prodigystruct.py +6 -6
  68. deepdoctection/mapper/pubstruct.py +84 -92
  69. deepdoctection/mapper/tpstruct.py +3 -3
  70. deepdoctection/mapper/xfundstruct.py +33 -33
  71. deepdoctection/pipe/anngen.py +39 -14
  72. deepdoctection/pipe/base.py +68 -99
  73. deepdoctection/pipe/common.py +181 -85
  74. deepdoctection/pipe/concurrency.py +14 -10
  75. deepdoctection/pipe/doctectionpipe.py +24 -21
  76. deepdoctection/pipe/language.py +20 -25
  77. deepdoctection/pipe/layout.py +18 -16
  78. deepdoctection/pipe/lm.py +49 -47
  79. deepdoctection/pipe/order.py +63 -65
  80. deepdoctection/pipe/refine.py +102 -109
  81. deepdoctection/pipe/segment.py +157 -162
  82. deepdoctection/pipe/sub_layout.py +50 -40
  83. deepdoctection/pipe/text.py +37 -36
  84. deepdoctection/pipe/transform.py +19 -16
  85. deepdoctection/train/d2_frcnn_train.py +27 -25
  86. deepdoctection/train/hf_detr_train.py +22 -18
  87. deepdoctection/train/hf_layoutlm_train.py +49 -48
  88. deepdoctection/train/tp_frcnn_train.py +10 -11
  89. deepdoctection/utils/concurrency.py +1 -1
  90. deepdoctection/utils/context.py +13 -6
  91. deepdoctection/utils/develop.py +4 -4
  92. deepdoctection/utils/env_info.py +52 -14
  93. deepdoctection/utils/file_utils.py +6 -11
  94. deepdoctection/utils/fs.py +41 -14
  95. deepdoctection/utils/identifier.py +2 -2
  96. deepdoctection/utils/logger.py +15 -15
  97. deepdoctection/utils/metacfg.py +7 -7
  98. deepdoctection/utils/pdf_utils.py +39 -14
  99. deepdoctection/utils/settings.py +188 -182
  100. deepdoctection/utils/tqdm.py +1 -1
  101. deepdoctection/utils/transform.py +14 -9
  102. deepdoctection/utils/types.py +104 -0
  103. deepdoctection/utils/utils.py +7 -7
  104. deepdoctection/utils/viz.py +70 -69
  105. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
  106. deepdoctection-0.34.dist-info/RECORD +146 -0
  107. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
  108. deepdoctection/utils/detection_types.py +0 -68
  109. deepdoctection-0.32.dist-info/RECORD +0 -146
  110. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
  111. {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0
@@ -30,10 +30,10 @@ from numpy import uint8
30
30
  from numpy.typing import NDArray
31
31
  from pypdf import PdfReader
32
32
 
33
- from ..utils.detection_types import ImageType
34
33
  from ..utils.develop import deprecated
35
34
  from ..utils.error import DependencyError
36
35
  from ..utils.pdf_utils import pdf_to_np_array
36
+ from ..utils.types import PixelValues
37
37
  from ..utils.viz import viz_handler
38
38
 
39
39
  __all__ = [
@@ -75,7 +75,7 @@ def as_dict(obj: Any, dict_factory) -> Union[Any]: # type: ignore
75
75
  return copy.deepcopy(obj)
76
76
 
77
77
 
78
- def convert_b64_to_np_array(image: str) -> ImageType:
78
+ def convert_b64_to_np_array(image: str) -> PixelValues:
79
79
  """
80
80
  Converts an image in base4 string encoding representation to a numpy array of shape (width,height,channel).
81
81
 
@@ -86,7 +86,7 @@ def convert_b64_to_np_array(image: str) -> ImageType:
86
86
  return viz_handler.convert_b64_to_np(image).astype(uint8)
87
87
 
88
88
 
89
- def convert_np_array_to_b64(np_image: ImageType) -> str:
89
+ def convert_np_array_to_b64(np_image: PixelValues) -> str:
90
90
  """
91
91
  Converts an image from numpy array into a base64 string encoding representation
92
92
 
@@ -97,7 +97,7 @@ def convert_np_array_to_b64(np_image: ImageType) -> str:
97
97
 
98
98
 
99
99
  @no_type_check
100
- def convert_np_array_to_b64_b(np_image: ImageType) -> bytes:
100
+ def convert_np_array_to_b64_b(np_image: PixelValues) -> bytes:
101
101
  """
102
102
  Converts an image from numpy array into a base64 bytes encoding representation
103
103
 
@@ -108,7 +108,7 @@ def convert_np_array_to_b64_b(np_image: ImageType) -> bytes:
108
108
 
109
109
 
110
110
  @deprecated("Use convert_pdf_bytes_to_np_array_v2", "2022-02-23")
111
- def convert_pdf_bytes_to_np_array(pdf_bytes: bytes, dpi: Optional[int] = None) -> ImageType:
111
+ def convert_pdf_bytes_to_np_array(pdf_bytes: bytes, dpi: Optional[int] = None) -> PixelValues:
112
112
  """
113
113
  Converts a pdf passed as bytes into a numpy array. Note, that this method expects poppler to be installed.
114
114
  Please check the installation guides at https://poppler.freedesktop.org/ . If no value for dpi is provided
@@ -143,7 +143,7 @@ def convert_pdf_bytes_to_np_array(pdf_bytes: bytes, dpi: Optional[int] = None) -
143
143
  return np_array.astype(uint8)
144
144
 
145
145
 
146
- def convert_pdf_bytes_to_np_array_v2(pdf_bytes: bytes, dpi: Optional[int] = None) -> ImageType:
146
+ def convert_pdf_bytes_to_np_array_v2(pdf_bytes: bytes, dpi: Optional[int] = None) -> PixelValues:
147
147
  """
148
148
  Converts a pdf passed as bytes into a numpy array. Note, that this method expects poppler to be installed. This
149
149
  function, however does not rely on the wrapper pdf2image but uses a function of this lib which calls poppler
@@ -21,19 +21,20 @@ Dataclass Image
21
21
  from __future__ import annotations
22
22
 
23
23
  import json
24
+ from collections import defaultdict
24
25
  from dataclasses import dataclass, field
25
26
  from os import environ
26
27
  from pathlib import Path
27
- from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Union, no_type_check
28
+ from typing import Any, Optional, Sequence, Union, no_type_check
28
29
 
29
30
  import numpy as np
30
31
  from numpy import uint8
31
32
 
32
- from ..utils.detection_types import ImageType, JsonDict, Pathlike
33
33
  from ..utils.error import AnnotationError, BoundingBoxError, ImageError, UUIDError
34
34
  from ..utils.identifier import get_uuid, is_uuid_like
35
- from ..utils.settings import ObjectTypes, get_type
36
- from .annotation import Annotation, BoundingBox, ImageAnnotation, SummaryAnnotation
35
+ from ..utils.settings import ObjectTypes, SummaryType, get_type
36
+ from ..utils.types import ImageDict, PathLikeOrStr, PixelValues
37
+ from .annotation import Annotation, AnnotationMap, BoundingBox, CategoryAnnotation, ImageAnnotation
37
38
  from .box import crop_box_from_image, global_to_local_coords, intersection_box
38
39
  from .convert import as_dict, convert_b64_to_np_array, convert_np_array_to_b64, convert_pdf_bytes_to_np_array_v2
39
40
 
@@ -86,12 +87,12 @@ class Image:
86
87
  page_number: int = field(default=0, init=False, repr=False)
87
88
  external_id: Optional[Union[str, int]] = field(default=None, repr=False)
88
89
  _image_id: Optional[str] = field(default=None, init=False, repr=True)
89
- _image: Optional[ImageType] = field(default=None, init=False, repr=False)
90
+ _image: Optional[PixelValues] = field(default=None, init=False, repr=False)
90
91
  _bbox: Optional[BoundingBox] = field(default=None, init=False, repr=False)
91
- embeddings: Dict[str, BoundingBox] = field(default_factory=dict, init=False, repr=True)
92
- annotations: List[ImageAnnotation] = field(default_factory=list, init=False, repr=True)
93
- _annotation_ids: List[str] = field(default_factory=list, init=False, repr=False)
94
- _summary: Optional[SummaryAnnotation] = field(default=None, init=False, repr=False)
92
+ embeddings: dict[str, BoundingBox] = field(default_factory=dict, init=False, repr=True)
93
+ annotations: list[ImageAnnotation] = field(default_factory=list, init=False, repr=True)
94
+ _annotation_ids: list[str] = field(default_factory=list, init=False, repr=False)
95
+ _summary: Optional[CategoryAnnotation] = field(default=None, init=False, repr=False)
95
96
 
96
97
  def __post_init__(self) -> None:
97
98
  if self.external_id is not None:
@@ -128,14 +129,14 @@ class Image:
128
129
  raise UUIDError("image_id must be uuid3 string")
129
130
 
130
131
  @property
131
- def image(self) -> Optional[ImageType]:
132
+ def image(self) -> Optional[PixelValues]:
132
133
  """
133
134
  image
134
135
  """
135
136
  return self._image
136
137
 
137
138
  @image.setter
138
- def image(self, image: Optional[Union[str, ImageType, bytes]]) -> None:
139
+ def image(self, image: Optional[Union[str, PixelValues, bytes]]) -> None:
139
140
  """
140
141
  Sets the image for internal storage. Will convert to numpy array before storing internally.
141
142
  Note: If the input is an np.array, ensure that the image is in BGR-format as this is the standard
@@ -162,13 +163,19 @@ class Image:
162
163
  self._self_embedding()
163
164
 
164
165
  @property
165
- def summary(self) -> Optional[SummaryAnnotation]:
166
+ def summary(self) -> CategoryAnnotation:
166
167
  """summary"""
168
+ if self._summary is None:
169
+ self._summary = CategoryAnnotation(category_name=SummaryType.SUMMARY)
170
+ if self._summary._annotation_id is None: # pylint: disable=W0212
171
+ self._summary.annotation_id = self.define_annotation_id(self._summary)
167
172
  return self._summary
168
173
 
169
174
  @summary.setter
170
- def summary(self, summary_annotation: SummaryAnnotation) -> None:
175
+ def summary(self, summary_annotation: CategoryAnnotation) -> None:
171
176
  """summary setter"""
177
+ if self._summary is not None:
178
+ raise ImageError("Image.summary already defined and cannot be reset")
172
179
  if summary_annotation._annotation_id is None: # pylint: disable=W0212
173
180
  summary_annotation.annotation_id = self.define_annotation_id(summary_annotation)
174
181
  self._summary = summary_annotation
@@ -222,10 +229,10 @@ class Image:
222
229
  Helper class. Do not use it in your code.
223
230
  """
224
231
 
225
- def __init__(self, img: Optional[ImageType]):
232
+ def __init__(self, img: Optional[PixelValues]):
226
233
  self.img = img
227
234
 
228
- def to_np_array(self) -> Optional[ImageType]:
235
+ def to_np_array(self) -> Optional[PixelValues]:
229
236
  """
230
237
  Returns image as numpy array
231
238
 
@@ -297,6 +304,15 @@ class Image:
297
304
 
298
305
  return self.embeddings[image_id]
299
306
 
307
+ def remove_embedding(self, image_id: str) -> None:
308
+ """
309
+ Remove an embedding from the image.
310
+
311
+ :param image_id: uuid string of the embedding image
312
+ """
313
+ if image_id in self.embeddings:
314
+ self.embeddings.pop(image_id)
315
+
300
316
  def _self_embedding(self) -> None:
301
317
  if self._bbox is not None:
302
318
  self.set_embedding(self.image_id, self._bbox)
@@ -329,7 +345,7 @@ class Image:
329
345
  model_id: Optional[Union[str, Sequence[str]]] = None,
330
346
  session_ids: Optional[Union[str, Sequence[str]]] = None,
331
347
  ignore_inactive: bool = True,
332
- ) -> List[ImageAnnotation]:
348
+ ) -> list[ImageAnnotation]:
333
349
  """
334
350
  Selection of annotations from the annotation container. Filter conditions can be defined by specifying
335
351
  the annotation_id or the category name. (Since only image annotations are currently allowed in the container,
@@ -349,9 +365,9 @@ class Image:
349
365
 
350
366
  if category_names is not None:
351
367
  category_names = (
352
- [get_type(cat_name) for cat_name in category_names]
353
- if isinstance(category_names, (list, set))
354
- else [get_type(category_names)] # type:ignore
368
+ (get_type(category_names),)
369
+ if isinstance(category_names, str)
370
+ else tuple(get_type(cat_name) for cat_name in category_names)
355
371
  )
356
372
 
357
373
  ann_ids = [annotation_ids] if isinstance(annotation_ids, str) else annotation_ids
@@ -360,61 +376,28 @@ class Image:
360
376
  session_id = [session_ids] if isinstance(session_ids, str) else session_ids
361
377
 
362
378
  if ignore_inactive:
363
- anns = filter(lambda x: x.active, self.annotations)
379
+ anns: Union[list[ImageAnnotation], filter[ImageAnnotation]] = filter(lambda x: x.active, self.annotations)
364
380
  else:
365
- anns = self.annotations # type:ignore
381
+ anns = self.annotations
366
382
 
367
383
  if category_names is not None:
368
- anns = filter(lambda x: x.category_name in category_names, anns) # type:ignore
384
+ anns = filter(lambda x: x.category_name in category_names, anns)
369
385
 
370
386
  if ann_ids is not None:
371
- anns = filter(lambda x: x.annotation_id in ann_ids, anns) # type:ignore
387
+ anns = filter(lambda x: x.annotation_id in ann_ids, anns)
372
388
 
373
389
  if service_id is not None:
374
- anns = filter(lambda x: x.service_id in service_id, anns) # type:ignore
390
+ anns = filter(lambda x: x.service_id in service_id, anns)
375
391
 
376
392
  if model_id is not None:
377
- anns = filter(lambda x: x.model_id in model_id, anns) # type:ignore
393
+ anns = filter(lambda x: x.model_id in model_id, anns)
378
394
 
379
395
  if session_id is not None:
380
- anns = filter(lambda x: x.session_id in session_id, anns) # type:ignore
396
+ anns = filter(lambda x: x.session_id in session_id, anns)
381
397
 
382
398
  return list(anns)
383
399
 
384
- def get_annotation_iter(
385
- self,
386
- category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
387
- annotation_ids: Optional[Union[str, Sequence[str]]] = None,
388
- service_id: Optional[Union[str, Sequence[str]]] = None,
389
- model_id: Optional[Union[str, Sequence[str]]] = None,
390
- session_ids: Optional[Union[str, Sequence[str]]] = None,
391
- ignore_inactive: bool = True,
392
- ) -> Iterable[ImageAnnotation]:
393
- """
394
- Get annotation as an iterator. Same as `get_annotation` but returns an iterator instead of a list.
395
-
396
- :param category_names: A single name or list of names
397
- :param annotation_ids: A single id or list of ids
398
- :param service_id: A single service name or list of service names
399
- :param model_id: A single model name or list of model names
400
- :param session_ids: A single session id or list of session ids
401
- :param ignore_inactive: If set to `True` only active annotations are returned.
402
-
403
- :return: A (possibly empty) list of annotations
404
- """
405
-
406
- return iter(
407
- self.get_annotation(
408
- category_names=category_names,
409
- annotation_ids=annotation_ids,
410
- service_id=service_id,
411
- model_id=model_id,
412
- session_ids=session_ids,
413
- ignore_inactive=ignore_inactive,
414
- )
415
- )
416
-
417
- def as_dict(self) -> Dict[str, Any]:
400
+ def as_dict(self) -> dict[str, Any]:
418
401
  """
419
402
  Returns the full image dataclass as dict. Uses the custom `convert.as_dict` to disregard attributes
420
403
  defined by `remove_keys`.
@@ -430,12 +413,12 @@ class Image:
430
413
  return img_dict
431
414
 
432
415
  @staticmethod
433
- def remove_keys() -> List[str]:
416
+ def remove_keys() -> list[str]:
434
417
  """
435
418
  A list of attributes to suspend from as_dict creation.
436
419
  """
437
420
 
438
- return ["_image"]
421
+ return ["_image", "_annotation_ids"]
439
422
 
440
423
  def define_annotation_id(self, annotation: Annotation) -> str:
441
424
  """
@@ -450,7 +433,11 @@ class Image:
450
433
  attributes_values = [str(getattr(annotation, attribute)) for attribute in attributes]
451
434
  return get_uuid(*attributes_values, str(self.image_id))
452
435
 
453
- def remove(self, annotation: ImageAnnotation) -> None:
436
+ def remove(
437
+ self,
438
+ annotation_ids: Optional[Union[str, list[str]]] = None,
439
+ service_ids: Optional[Union[str, list[str]]] = None,
440
+ ) -> None:
454
441
  """
455
442
  Instead of removing consider deactivating annotations.
456
443
 
@@ -458,9 +445,66 @@ class Image:
458
445
 
459
446
  :param annotation: The annotation to remove
460
447
  """
448
+ ann_id_to_annotation_maps = self.get_annotation_id_to_annotation_maps()
449
+
450
+ if annotation_ids is not None:
451
+ annotation_ids = [annotation_ids] if isinstance(annotation_ids, str) else annotation_ids
452
+
453
+ for ann_id in annotation_ids:
454
+ if ann_id not in ann_id_to_annotation_maps:
455
+ raise ImageError(f"Annotation with id {ann_id} not found")
456
+ annotation_maps = ann_id_to_annotation_maps[ann_id]
457
+
458
+ for annotation_map in annotation_maps:
459
+ self._remove_by_annotation_id(ann_id, annotation_map)
460
+
461
+ if service_ids is not None:
462
+ service_ids = [service_ids] if isinstance(service_ids, str) else service_ids
463
+ service_id_to_annotation_id = self.get_service_id_to_annotation_id()
464
+
465
+ for service_id in service_ids:
466
+ if service_id not in service_id_to_annotation_id:
467
+ raise ImageError(f"Service id {service_id} not found")
468
+ annotation_ids = service_id_to_annotation_id[service_id]
469
+
470
+ for ann_id in annotation_ids:
471
+ if ann_id not in ann_id_to_annotation_maps:
472
+ raise ImageError(f"Annotation with id {ann_id} not found")
473
+ annotation_maps = ann_id_to_annotation_maps[ann_id]
474
+
475
+ for annotation_map in annotation_maps:
476
+ self._remove_by_annotation_id(ann_id, annotation_map)
477
+
478
+ def _remove_by_annotation_id(self, annotation_id: str, location_dict: AnnotationMap) -> None:
479
+ image_annotation_id = location_dict.image_annotation_id
480
+ annotations = self.get_annotation(annotation_ids=image_annotation_id)
481
+ if not annotations:
482
+ return
483
+ # There can only be one annotation with a given id
484
+ annotation = annotations[0]
485
+
486
+ if (
487
+ location_dict.sub_category_key is None
488
+ and location_dict.relationship_key is None
489
+ and location_dict.summary_key is None
490
+ ):
491
+ self.annotations.remove(annotation)
492
+ self._annotation_ids.remove(annotation.annotation_id)
493
+
494
+ sub_category_key = location_dict.sub_category_key
495
+
496
+ if sub_category_key is not None:
497
+ annotation.remove_sub_category(sub_category_key)
498
+
499
+ relationship_key = location_dict.relationship_key
461
500
 
462
- self.annotations.remove(annotation)
463
- self._annotation_ids.remove(annotation.annotation_id)
501
+ if relationship_key is not None:
502
+ annotation.remove_relationship(relationship_key, annotation_id)
503
+
504
+ summary_key = location_dict.summary_key
505
+ if summary_key is not None:
506
+ if annotation.image is not None:
507
+ annotation.image.summary.remove_sub_category(summary_key)
464
508
 
465
509
  def image_ann_to_image(self, annotation_id: str, crop_image: bool = False) -> None:
466
510
  """
@@ -497,7 +541,7 @@ class Image:
497
541
 
498
542
  ann.image = new_image
499
543
 
500
- def maybe_ann_to_sub_image(self, annotation_id: str, category_names: Union[str, List[str]]) -> None:
544
+ def maybe_ann_to_sub_image(self, annotation_id: str, category_names: Union[str, list[str]]) -> None:
501
545
  """
502
546
  Provides a supplement to `image_ann_to_image` and mainly operates on the `ImageAnnotation.image` of
503
547
  the image annotation. The aim is to assign image annotations from this image one hierarchy level lower to the
@@ -572,7 +616,9 @@ class Image:
572
616
  image_ann.image = cls.from_dict(**image_dict)
573
617
  image.dump(image_ann)
574
618
  if summary_dict := kwargs.get("_summary", kwargs.get("summary")):
575
- image.summary = SummaryAnnotation.from_dict(**summary_dict)
619
+ image.summary = CategoryAnnotation.from_dict(**summary_dict)
620
+ image.summary.category_name = SummaryType.SUMMARY
621
+
576
622
  return image
577
623
 
578
624
  @classmethod
@@ -589,7 +635,7 @@ class Image:
589
635
  return image
590
636
 
591
637
  @staticmethod
592
- def get_state_attributes() -> List[str]:
638
+ def get_state_attributes() -> list[str]:
593
639
  """
594
640
  Returns the list of attributes that define the `state_id` of an image.
595
641
 
@@ -636,9 +682,9 @@ class Image:
636
682
  self,
637
683
  image_to_json: bool = True,
638
684
  highest_hierarchy_only: bool = False,
639
- path: Optional[Pathlike] = None,
685
+ path: Optional[PathLikeOrStr] = None,
640
686
  dry: bool = False,
641
- ) -> Optional[JsonDict]:
687
+ ) -> Optional[Union[ImageDict, str]]:
642
688
  """
643
689
  Export image as dictionary. As numpy array cannot be serialized `image` values will be converted into
644
690
  base64 encodings.
@@ -650,10 +696,9 @@ class Image:
650
696
 
651
697
  :return: optional dict
652
698
  """
653
- if isinstance(path, str):
654
- path = Path(path)
655
- elif path is None:
699
+ if path is None:
656
700
  path = Path(self.location)
701
+ path = Path(path)
657
702
  if path.is_dir():
658
703
  path = path / self.image_id
659
704
  suffix = path.suffix
@@ -671,8 +716,45 @@ class Image:
671
716
  return export_dict
672
717
  with open(path_json, "w", encoding="UTF-8") as file:
673
718
  json.dump(export_dict, file, indent=2)
674
- return None
719
+ return path_json
675
720
 
676
- def get_categories_from_current_state(self) -> Set[str]:
721
+ def get_categories_from_current_state(self) -> set[str]:
677
722
  """Returns all active dumped categories"""
678
723
  return {ann.category_name for ann in self.get_annotation()}
724
+
725
+ def get_service_id_to_annotation_id(self) -> defaultdict[str, list[str]]:
726
+ """
727
+ Returns a dictionary with service ids as keys and lists of annotation ids that have been generated by the
728
+ service
729
+ :return: default with service ids as keys and lists of annotation ids as values
730
+ """
731
+ service_id_dict = defaultdict(list)
732
+ for ann in self.get_annotation():
733
+ if ann.service_id:
734
+ service_id_dict[ann.service_id].append(ann.annotation_id)
735
+ for sub_cat_key in ann.sub_categories:
736
+ sub_cat = ann.get_sub_category(sub_cat_key)
737
+ if sub_cat.service_id:
738
+ service_id_dict[sub_cat.service_id].append(sub_cat.annotation_id)
739
+ if ann.image is not None:
740
+ for summary_cat_key in ann.image.summary:
741
+ summary_cat = ann.get_summary(summary_cat_key)
742
+ if summary_cat.service_id:
743
+ service_id_dict[summary_cat.service_id].append(summary_cat.annotation_id)
744
+
745
+ return service_id_dict
746
+
747
+ def get_annotation_id_to_annotation_maps(self) -> defaultdict[str, list[AnnotationMap]]:
748
+ """
749
+ Returns a dictionary with annotation ids as keys and lists of AnnotationMap as values. The range of ids
750
+ is the union of all ImageAnnotation, CategoryAnnotation and ContainerAnnotation of the image.
751
+
752
+ :return: default dict with annotation ids as keys and lists of AnnotationMap as values
753
+ """
754
+ all_ann_id_dict = defaultdict(list)
755
+ for ann in self.get_annotation():
756
+ ann_id_dict = ann.get_annotation_map()
757
+ for key, val in ann_id_dict.items():
758
+ all_ann_id_dict[key].extend(val)
759
+
760
+ return all_ann_id_dict