deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +8 -25
- deepdoctection/analyzer/dd.py +84 -71
- deepdoctection/dataflow/common.py +9 -5
- deepdoctection/dataflow/custom.py +5 -5
- deepdoctection/dataflow/custom_serialize.py +75 -18
- deepdoctection/dataflow/parallel_map.py +3 -3
- deepdoctection/dataflow/serialize.py +4 -4
- deepdoctection/dataflow/stats.py +3 -3
- deepdoctection/datapoint/annotation.py +78 -56
- deepdoctection/datapoint/box.py +7 -7
- deepdoctection/datapoint/convert.py +6 -6
- deepdoctection/datapoint/image.py +157 -75
- deepdoctection/datapoint/view.py +175 -151
- deepdoctection/datasets/adapter.py +30 -24
- deepdoctection/datasets/base.py +10 -10
- deepdoctection/datasets/dataflow_builder.py +3 -3
- deepdoctection/datasets/info.py +23 -25
- deepdoctection/datasets/instances/doclaynet.py +48 -49
- deepdoctection/datasets/instances/fintabnet.py +44 -45
- deepdoctection/datasets/instances/funsd.py +23 -23
- deepdoctection/datasets/instances/iiitar13k.py +8 -8
- deepdoctection/datasets/instances/layouttest.py +2 -2
- deepdoctection/datasets/instances/publaynet.py +3 -3
- deepdoctection/datasets/instances/pubtables1m.py +18 -18
- deepdoctection/datasets/instances/pubtabnet.py +30 -29
- deepdoctection/datasets/instances/rvlcdip.py +28 -29
- deepdoctection/datasets/instances/xfund.py +51 -30
- deepdoctection/datasets/save.py +6 -6
- deepdoctection/eval/accmetric.py +32 -33
- deepdoctection/eval/base.py +8 -9
- deepdoctection/eval/cocometric.py +13 -12
- deepdoctection/eval/eval.py +32 -26
- deepdoctection/eval/tedsmetric.py +16 -12
- deepdoctection/eval/tp_eval_callback.py +7 -16
- deepdoctection/extern/base.py +339 -134
- deepdoctection/extern/d2detect.py +69 -89
- deepdoctection/extern/deskew.py +11 -10
- deepdoctection/extern/doctrocr.py +81 -64
- deepdoctection/extern/fastlang.py +23 -16
- deepdoctection/extern/hfdetr.py +53 -38
- deepdoctection/extern/hflayoutlm.py +216 -155
- deepdoctection/extern/hflm.py +35 -30
- deepdoctection/extern/model.py +433 -255
- deepdoctection/extern/pdftext.py +15 -15
- deepdoctection/extern/pt/ptutils.py +4 -2
- deepdoctection/extern/tessocr.py +39 -38
- deepdoctection/extern/texocr.py +14 -16
- deepdoctection/extern/tp/tfutils.py +16 -2
- deepdoctection/extern/tp/tpcompat.py +11 -7
- deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
- deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
- deepdoctection/extern/tpdetect.py +40 -45
- deepdoctection/mapper/cats.py +36 -40
- deepdoctection/mapper/cocostruct.py +16 -12
- deepdoctection/mapper/d2struct.py +22 -22
- deepdoctection/mapper/hfstruct.py +7 -7
- deepdoctection/mapper/laylmstruct.py +22 -24
- deepdoctection/mapper/maputils.py +9 -10
- deepdoctection/mapper/match.py +33 -2
- deepdoctection/mapper/misc.py +6 -7
- deepdoctection/mapper/pascalstruct.py +4 -4
- deepdoctection/mapper/prodigystruct.py +6 -6
- deepdoctection/mapper/pubstruct.py +84 -92
- deepdoctection/mapper/tpstruct.py +3 -3
- deepdoctection/mapper/xfundstruct.py +33 -33
- deepdoctection/pipe/anngen.py +39 -14
- deepdoctection/pipe/base.py +68 -99
- deepdoctection/pipe/common.py +181 -85
- deepdoctection/pipe/concurrency.py +14 -10
- deepdoctection/pipe/doctectionpipe.py +24 -21
- deepdoctection/pipe/language.py +20 -25
- deepdoctection/pipe/layout.py +18 -16
- deepdoctection/pipe/lm.py +49 -47
- deepdoctection/pipe/order.py +63 -65
- deepdoctection/pipe/refine.py +102 -109
- deepdoctection/pipe/segment.py +157 -162
- deepdoctection/pipe/sub_layout.py +50 -40
- deepdoctection/pipe/text.py +37 -36
- deepdoctection/pipe/transform.py +19 -16
- deepdoctection/train/d2_frcnn_train.py +27 -25
- deepdoctection/train/hf_detr_train.py +22 -18
- deepdoctection/train/hf_layoutlm_train.py +49 -48
- deepdoctection/train/tp_frcnn_train.py +10 -11
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +13 -6
- deepdoctection/utils/develop.py +4 -4
- deepdoctection/utils/env_info.py +52 -14
- deepdoctection/utils/file_utils.py +6 -11
- deepdoctection/utils/fs.py +41 -14
- deepdoctection/utils/identifier.py +2 -2
- deepdoctection/utils/logger.py +15 -15
- deepdoctection/utils/metacfg.py +7 -7
- deepdoctection/utils/pdf_utils.py +39 -14
- deepdoctection/utils/settings.py +188 -182
- deepdoctection/utils/tqdm.py +1 -1
- deepdoctection/utils/transform.py +14 -9
- deepdoctection/utils/types.py +104 -0
- deepdoctection/utils/utils.py +7 -7
- deepdoctection/utils/viz.py +70 -69
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
- deepdoctection-0.34.dist-info/RECORD +146 -0
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
- deepdoctection/utils/detection_types.py +0 -68
- deepdoctection-0.32.dist-info/RECORD +0 -146
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0
|
@@ -19,23 +19,25 @@
|
|
|
19
19
|
Methods that convert incoming data to dataflows.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
22
24
|
import itertools
|
|
23
25
|
import json
|
|
24
26
|
import os
|
|
25
27
|
from collections import defaultdict
|
|
26
28
|
from pathlib import Path
|
|
27
|
-
from typing import DefaultDict, Dict, List, Optional, Sequence, Union
|
|
29
|
+
from typing import Any, DefaultDict, Dict, Iterator, List, Optional, Sequence, TextIO, Union
|
|
28
30
|
|
|
29
31
|
from jsonlines import Reader, Writer
|
|
30
32
|
from tabulate import tabulate
|
|
31
33
|
from termcolor import colored
|
|
32
34
|
|
|
33
35
|
from ..utils.context import timed_operation
|
|
34
|
-
from ..utils.detection_types import JsonDict, Pathlike
|
|
35
36
|
from ..utils.error import FileExtensionError
|
|
36
37
|
from ..utils.identifier import get_uuid_from_str
|
|
37
38
|
from ..utils.pdf_utils import PDFStreamer
|
|
38
39
|
from ..utils.tqdm import get_tqdm
|
|
40
|
+
from ..utils.types import JsonDict, PathLikeOrStr
|
|
39
41
|
from ..utils.utils import is_file_extension
|
|
40
42
|
from .base import DataFlow
|
|
41
43
|
from .common import FlattenData, JoinData, MapData
|
|
@@ -53,6 +55,59 @@ def _reset_df_and_get_length(df: DataFlow) -> int:
|
|
|
53
55
|
return length
|
|
54
56
|
|
|
55
57
|
|
|
58
|
+
class FileClosingIterator:
|
|
59
|
+
"""
|
|
60
|
+
A custom iterator that closes the file object once the iteration is complete.
|
|
61
|
+
|
|
62
|
+
This iterator is used to ensure that the file object is properly closed after
|
|
63
|
+
reading the data from it. It is used in the context of reading data from a file
|
|
64
|
+
in a streaming manner, where the data is not loaded into memory all at once.
|
|
65
|
+
|
|
66
|
+
**Example:**
|
|
67
|
+
|
|
68
|
+
file = open(path, "r")
|
|
69
|
+
iterator = Reader(file)
|
|
70
|
+
closing_iterator = FileClosingIterator(file, iter(iterator))
|
|
71
|
+
|
|
72
|
+
df = CustomDataFromIterable(closing_iterator, max_datapoints=max_datapoints) # set up a dataflow
|
|
73
|
+
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, file_obj: TextIO, iterator: Iterator[Any]):
|
|
77
|
+
"""
|
|
78
|
+
Initializes the FileClosingIterator with a file object and its iterator.
|
|
79
|
+
|
|
80
|
+
:param file_obj (TextIO): The file object to read data from.
|
|
81
|
+
:param iterator (Iterator): The actual iterator of the file object.
|
|
82
|
+
"""
|
|
83
|
+
self.file_obj = file_obj
|
|
84
|
+
self.iterator = iterator
|
|
85
|
+
|
|
86
|
+
def __iter__(self) -> FileClosingIterator:
|
|
87
|
+
"""
|
|
88
|
+
Returns the iterator object itself.
|
|
89
|
+
|
|
90
|
+
:return: FileClosingIterator: The instance of the class itself.
|
|
91
|
+
"""
|
|
92
|
+
return self
|
|
93
|
+
|
|
94
|
+
def __next__(self) -> Any:
|
|
95
|
+
"""
|
|
96
|
+
Returns the next item from the file object's iterator.
|
|
97
|
+
Closes the file object if the iteration is finished.
|
|
98
|
+
|
|
99
|
+
:return: The next item from the file object's iterator.
|
|
100
|
+
|
|
101
|
+
Raises:
|
|
102
|
+
StopIteration: If there are no more items to return.
|
|
103
|
+
"""
|
|
104
|
+
try:
|
|
105
|
+
return next(self.iterator)
|
|
106
|
+
except StopIteration as exc:
|
|
107
|
+
self.file_obj.close()
|
|
108
|
+
raise StopIteration from exc
|
|
109
|
+
|
|
110
|
+
|
|
56
111
|
class SerializerJsonlines:
|
|
57
112
|
"""
|
|
58
113
|
Serialize a dataflow from a jsonlines file. Alternatively, save a dataflow of JSON objects to a .jsonl file.
|
|
@@ -66,7 +121,7 @@ class SerializerJsonlines:
|
|
|
66
121
|
"""
|
|
67
122
|
|
|
68
123
|
@staticmethod
|
|
69
|
-
def load(path:
|
|
124
|
+
def load(path: PathLikeOrStr, max_datapoints: Optional[int] = None) -> CustomDataFromIterable:
|
|
70
125
|
"""
|
|
71
126
|
:param path: a path to a .jsonl file.
|
|
72
127
|
:param max_datapoints: Will stop the iteration once max_datapoints have been streamed
|
|
@@ -75,10 +130,11 @@ class SerializerJsonlines:
|
|
|
75
130
|
"""
|
|
76
131
|
file = open(path, "r") # pylint: disable=W1514,R1732
|
|
77
132
|
iterator = Reader(file)
|
|
78
|
-
|
|
133
|
+
closing_iterator = FileClosingIterator(file, iter(iterator))
|
|
134
|
+
return CustomDataFromIterable(closing_iterator, max_datapoints=max_datapoints)
|
|
79
135
|
|
|
80
136
|
@staticmethod
|
|
81
|
-
def save(df: DataFlow, path:
|
|
137
|
+
def save(df: DataFlow, path: PathLikeOrStr, file_name: str, max_datapoints: Optional[int] = None) -> None:
|
|
82
138
|
"""
|
|
83
139
|
Writes a dataflow iteratively to a .jsonl file. Every datapoint must be a dict where all items are serializable.
|
|
84
140
|
As the length of the dataflow cannot be determined in every case max_datapoint prevents generating an
|
|
@@ -120,7 +176,7 @@ class SerializerTabsepFiles:
|
|
|
120
176
|
"""
|
|
121
177
|
|
|
122
178
|
@staticmethod
|
|
123
|
-
def load(path:
|
|
179
|
+
def load(path: PathLikeOrStr, max_datapoins: Optional[int] = None) -> CustomDataFromList:
|
|
124
180
|
"""
|
|
125
181
|
:param path: a path to a .txt file.
|
|
126
182
|
:param max_datapoins: Will stop the iteration once max_datapoints have been streamed
|
|
@@ -133,7 +189,7 @@ class SerializerTabsepFiles:
|
|
|
133
189
|
return CustomDataFromList(file_list, max_datapoints=max_datapoins)
|
|
134
190
|
|
|
135
191
|
@staticmethod
|
|
136
|
-
def save(df: DataFlow, path:
|
|
192
|
+
def save(df: DataFlow, path: PathLikeOrStr, file_name: str, max_datapoints: Optional[int] = None) -> None:
|
|
137
193
|
"""
|
|
138
194
|
Writes a dataflow iteratively to a .txt file. Every datapoint must be a string.
|
|
139
195
|
As the length of the dataflow cannot be determined in every case max_datapoint prevents generating an
|
|
@@ -168,7 +224,7 @@ class SerializerFiles:
|
|
|
168
224
|
|
|
169
225
|
@staticmethod
|
|
170
226
|
def load(
|
|
171
|
-
path:
|
|
227
|
+
path: PathLikeOrStr,
|
|
172
228
|
file_type: Union[str, Sequence[str]],
|
|
173
229
|
max_datapoints: Optional[int] = None,
|
|
174
230
|
shuffle: Optional[bool] = False,
|
|
@@ -190,15 +246,14 @@ class SerializerFiles:
|
|
|
190
246
|
df2: DataFlow
|
|
191
247
|
df3: DataFlow
|
|
192
248
|
|
|
193
|
-
|
|
194
|
-
path = Path(path)
|
|
249
|
+
path = Path(path)
|
|
195
250
|
if not path.exists():
|
|
196
251
|
raise NotADirectoryError(f"The path {path} to the directory or file does not exist")
|
|
197
252
|
|
|
198
253
|
if shuffle:
|
|
199
254
|
sort = False
|
|
200
|
-
it1 = os.walk(path, topdown=False)
|
|
201
|
-
it2 = os.walk(path, topdown=False)
|
|
255
|
+
it1 = os.walk(os.fspath(path), topdown=False)
|
|
256
|
+
it2 = os.walk(os.fspath(path), topdown=False)
|
|
202
257
|
df1 = CustomDataFromIterable(it1)
|
|
203
258
|
df2 = CustomDataFromIterable(it2)
|
|
204
259
|
df1 = MapData(df1, lambda dp: None if len(dp[2]) == 0 else dp)
|
|
@@ -237,7 +292,7 @@ class CocoParser:
|
|
|
237
292
|
:param annotation_file: location of annotation file
|
|
238
293
|
"""
|
|
239
294
|
|
|
240
|
-
def __init__(self, annotation_file: Optional[
|
|
295
|
+
def __init__(self, annotation_file: Optional[PathLikeOrStr] = None) -> None:
|
|
241
296
|
self.dataset: JsonDict = {}
|
|
242
297
|
self.anns: Dict[int, JsonDict] = {}
|
|
243
298
|
self.cats: Dict[int, JsonDict] = {}
|
|
@@ -465,7 +520,7 @@ class SerializerCoco:
|
|
|
465
520
|
"""
|
|
466
521
|
|
|
467
522
|
@staticmethod
|
|
468
|
-
def load(path:
|
|
523
|
+
def load(path: PathLikeOrStr, max_datapoints: Optional[int] = None) -> DataFlow:
|
|
469
524
|
"""
|
|
470
525
|
Loads a .json file and generates a dataflow.
|
|
471
526
|
|
|
@@ -478,7 +533,7 @@ class SerializerCoco:
|
|
|
478
533
|
|
|
479
534
|
{'image':{'id',...},'annotations':[{'id':…,'bbox':...}]}
|
|
480
535
|
|
|
481
|
-
for each
|
|
536
|
+
for each image id. We use the type hint CocoDatapointDict to describe this dictionary
|
|
482
537
|
|
|
483
538
|
:param max_datapoints: Will stop the iteration once max_datapoints have been streamed.
|
|
484
539
|
:param path: a path to a .json file.
|
|
@@ -525,7 +580,7 @@ class SerializerPdfDoc:
|
|
|
525
580
|
"""
|
|
526
581
|
|
|
527
582
|
@staticmethod
|
|
528
|
-
def load(path:
|
|
583
|
+
def load(path: PathLikeOrStr, max_datapoints: Optional[int] = None) -> DataFlow:
|
|
529
584
|
"""
|
|
530
585
|
Loads the document page wise and returns a dataflow accordingly.
|
|
531
586
|
|
|
@@ -552,14 +607,16 @@ class SerializerPdfDoc:
|
|
|
552
607
|
return df
|
|
553
608
|
|
|
554
609
|
@staticmethod
|
|
555
|
-
def save(path:
|
|
610
|
+
def save(path: PathLikeOrStr) -> None:
|
|
556
611
|
"""
|
|
557
612
|
Not implemented
|
|
558
613
|
"""
|
|
559
614
|
raise NotImplementedError()
|
|
560
615
|
|
|
561
616
|
@staticmethod
|
|
562
|
-
def split(
|
|
617
|
+
def split(
|
|
618
|
+
path: PathLikeOrStr, path_target: Optional[PathLikeOrStr] = None, max_datapoint: Optional[int] = None
|
|
619
|
+
) -> None:
|
|
563
620
|
"""
|
|
564
621
|
Split a document into single pages.
|
|
565
622
|
"""
|
|
@@ -23,7 +23,7 @@ import uuid
|
|
|
23
23
|
import weakref
|
|
24
24
|
from abc import ABC, abstractmethod
|
|
25
25
|
from contextlib import contextmanager
|
|
26
|
-
from typing import Any, Callable, Iterator,
|
|
26
|
+
from typing import Any, Callable, Iterator, no_type_check
|
|
27
27
|
|
|
28
28
|
import zmq
|
|
29
29
|
|
|
@@ -236,7 +236,7 @@ class MultiThreadMapData(_ParallelMapData):
|
|
|
236
236
|
self._strict = strict
|
|
237
237
|
self.num_thread = num_thread
|
|
238
238
|
self.map_func = map_func
|
|
239
|
-
self._threads:
|
|
239
|
+
self._threads: list[Any] = []
|
|
240
240
|
self._evt = None
|
|
241
241
|
|
|
242
242
|
def reset_state(self) -> None:
|
|
@@ -284,7 +284,7 @@ class _MultiProcessZMQDataFlow(DataFlow, ABC):
|
|
|
284
284
|
if os.name == "nt":
|
|
285
285
|
raise EnvironmentError("ZMQ IPC doesn't support windows")
|
|
286
286
|
self._reset_done = False
|
|
287
|
-
self._procs:
|
|
287
|
+
self._procs: list[Any] = []
|
|
288
288
|
self.context = None
|
|
289
289
|
self.socket = None
|
|
290
290
|
|
|
@@ -12,7 +12,7 @@ Some DataFlow classes for serialization. Many classes have been taken from
|
|
|
12
12
|
|
|
13
13
|
import pickle
|
|
14
14
|
from copy import copy
|
|
15
|
-
from typing import Any, Iterable, Iterator,
|
|
15
|
+
from typing import Any, Iterable, Iterator, Optional, Union
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
|
|
@@ -23,7 +23,7 @@ from .base import DataFlow, RNGDataFlow
|
|
|
23
23
|
class DataFromList(RNGDataFlow):
|
|
24
24
|
"""Wrap a list of datapoints to a DataFlow"""
|
|
25
25
|
|
|
26
|
-
def __init__(self, lst:
|
|
26
|
+
def __init__(self, lst: list[Any], shuffle: bool = True) -> None:
|
|
27
27
|
"""
|
|
28
28
|
:param lst: input list. Each element is a datapoint.
|
|
29
29
|
:param shuffle: shuffle data.
|
|
@@ -79,11 +79,11 @@ class FakeData(RNGDataFlow):
|
|
|
79
79
|
|
|
80
80
|
def __init__(
|
|
81
81
|
self,
|
|
82
|
-
shapes:
|
|
82
|
+
shapes: list[Union[list[Any], tuple[Any]]],
|
|
83
83
|
size: int = 1000,
|
|
84
84
|
random: bool = True,
|
|
85
85
|
dtype: str = "float32",
|
|
86
|
-
domain:
|
|
86
|
+
domain: tuple[Union[float, int], Union[float, int]] = (0, 1),
|
|
87
87
|
):
|
|
88
88
|
"""
|
|
89
89
|
:param shapes: a list of lists/tuples. Shapes of each component.
|
deepdoctection/dataflow/stats.py
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
"""
|
|
19
19
|
Dataflows for calculating statistical values of the underlying dataset
|
|
20
20
|
"""
|
|
21
|
-
from typing import Any, Optional,
|
|
21
|
+
from typing import Any, Optional, Union
|
|
22
22
|
|
|
23
23
|
import numpy as np
|
|
24
24
|
import numpy.typing as npt
|
|
@@ -45,7 +45,7 @@ class MeanFromDataFlow(ProxyDataFlow):
|
|
|
45
45
|
def __init__(
|
|
46
46
|
self,
|
|
47
47
|
df: DataFlow,
|
|
48
|
-
axis: Optional[Union[int,
|
|
48
|
+
axis: Optional[Union[int, tuple[int], tuple[int, int], tuple[int, int, int]]] = None,
|
|
49
49
|
key: Optional[str] = None,
|
|
50
50
|
max_datapoints: Optional[int] = None,
|
|
51
51
|
):
|
|
@@ -165,7 +165,7 @@ class StdFromDataFlow(ProxyDataFlow):
|
|
|
165
165
|
def __init__(
|
|
166
166
|
self,
|
|
167
167
|
df: DataFlow,
|
|
168
|
-
axis: Optional[Union[int,
|
|
168
|
+
axis: Optional[Union[int, tuple[int], tuple[int, int], tuple[int, int, int]]] = None,
|
|
169
169
|
key: Optional[str] = None,
|
|
170
170
|
max_datapoints: Optional[int] = None,
|
|
171
171
|
):
|
|
@@ -18,34 +18,39 @@
|
|
|
18
18
|
"""
|
|
19
19
|
Dataclass for annotations and their derived classes.
|
|
20
20
|
"""
|
|
21
|
+
from __future__ import annotations
|
|
21
22
|
|
|
22
23
|
from abc import ABC, abstractmethod
|
|
24
|
+
from collections import defaultdict
|
|
23
25
|
from dataclasses import dataclass, field
|
|
24
|
-
from typing import
|
|
26
|
+
from typing import Optional, Union, no_type_check
|
|
25
27
|
|
|
26
|
-
from ..utils.detection_types import JsonDict
|
|
27
28
|
from ..utils.error import AnnotationError, UUIDError
|
|
28
29
|
from ..utils.identifier import get_uuid, is_uuid_like
|
|
29
30
|
from ..utils.logger import LoggingRecord, logger
|
|
30
|
-
from ..utils.settings import DefaultType, ObjectTypes,
|
|
31
|
+
from ..utils.settings import DefaultType, ObjectTypes, TypeOrStr, get_type
|
|
32
|
+
from ..utils.types import AnnotationDict
|
|
31
33
|
from .box import BoundingBox
|
|
32
34
|
from .convert import as_dict
|
|
33
35
|
|
|
34
36
|
|
|
35
37
|
@no_type_check
|
|
36
|
-
def ann_from_dict(cls, **kwargs):
|
|
38
|
+
def ann_from_dict(cls, **kwargs: AnnotationDict):
|
|
37
39
|
"""
|
|
38
40
|
A factory function to create subclasses of annotations from a given dict
|
|
39
41
|
"""
|
|
40
42
|
_init_kwargs = {
|
|
41
43
|
"external_id": kwargs.get("external_id"),
|
|
42
44
|
"category_name": kwargs.get("category_name"),
|
|
43
|
-
"category_id": kwargs.get("category_id"),
|
|
45
|
+
"category_id": kwargs.get("category_id", DEFAULT_CATEGORY_ID),
|
|
44
46
|
"score": kwargs.get("score"),
|
|
45
47
|
"service_id": kwargs.get("service_id"),
|
|
46
48
|
"model_id": kwargs.get("model_id"),
|
|
47
49
|
"session_id": kwargs.get("session_id"),
|
|
48
50
|
}
|
|
51
|
+
_init_kwargs["category_id"] = (
|
|
52
|
+
int(_init_kwargs["category_id"]) if (_init_kwargs)["category_id"] not in ("None", "") else DEFAULT_CATEGORY_ID
|
|
53
|
+
)
|
|
49
54
|
ann = cls(**_init_kwargs)
|
|
50
55
|
ann.active = kwargs.get("active")
|
|
51
56
|
ann._annotation_id = kwargs.get("_annotation_id") # pylint: disable=W0212
|
|
@@ -62,6 +67,16 @@ def ann_from_dict(cls, **kwargs):
|
|
|
62
67
|
return ann
|
|
63
68
|
|
|
64
69
|
|
|
70
|
+
@dataclass(frozen=True)
|
|
71
|
+
class AnnotationMap:
|
|
72
|
+
"""AnnotationMap to store all sub categories, relationship keys and summary keys of an annotation"""
|
|
73
|
+
|
|
74
|
+
image_annotation_id: str
|
|
75
|
+
sub_category_key: Optional[ObjectTypes] = None
|
|
76
|
+
relationship_key: Optional[ObjectTypes] = None
|
|
77
|
+
summary_key: Optional[ObjectTypes] = None
|
|
78
|
+
|
|
79
|
+
|
|
65
80
|
@dataclass
|
|
66
81
|
class Annotation(ABC):
|
|
67
82
|
"""
|
|
@@ -134,7 +149,7 @@ class Annotation(ABC):
|
|
|
134
149
|
raise AnnotationError("Annotation_id must be uuid3 string")
|
|
135
150
|
|
|
136
151
|
@abstractmethod
|
|
137
|
-
def get_defining_attributes(self) ->
|
|
152
|
+
def get_defining_attributes(self) -> list[str]:
|
|
138
153
|
"""
|
|
139
154
|
Defining attributes of an annotation instance are attributes, of which you think that they uniquely
|
|
140
155
|
describe the annotation object. If you do not provide an external id, only the defining attributes will be used
|
|
@@ -151,7 +166,7 @@ class Annotation(ABC):
|
|
|
151
166
|
raise AnnotationError(f"Attribute {attr} must have __str__ method")
|
|
152
167
|
|
|
153
168
|
@staticmethod
|
|
154
|
-
def set_annotation_id(annotation:
|
|
169
|
+
def set_annotation_id(annotation: CategoryAnnotation, *container_id_context: Optional[str]) -> str:
|
|
155
170
|
"""
|
|
156
171
|
Defines the `annotation_id` by attributes of the annotation class as well as by external parameters given by a
|
|
157
172
|
tuple or list of container id contexts.
|
|
@@ -167,7 +182,7 @@ class Annotation(ABC):
|
|
|
167
182
|
attributes_values = [str(getattr(annotation, attribute)) for attribute in attributes]
|
|
168
183
|
return get_uuid(*attributes_values, *container_id_context) # type: ignore
|
|
169
184
|
|
|
170
|
-
def as_dict(self) ->
|
|
185
|
+
def as_dict(self) -> AnnotationDict:
|
|
171
186
|
"""
|
|
172
187
|
Returning the full dataclass as dict. Uses the custom `convert.as_dict` to disregard attributes defined by
|
|
173
188
|
`remove_keys`.
|
|
@@ -187,7 +202,7 @@ class Annotation(ABC):
|
|
|
187
202
|
|
|
188
203
|
@classmethod
|
|
189
204
|
@abstractmethod
|
|
190
|
-
def from_dict(cls, **kwargs:
|
|
205
|
+
def from_dict(cls, **kwargs: AnnotationDict) -> Annotation:
|
|
191
206
|
"""
|
|
192
207
|
Method to initialize a derived class from dict.
|
|
193
208
|
|
|
@@ -199,7 +214,7 @@ class Annotation(ABC):
|
|
|
199
214
|
|
|
200
215
|
@staticmethod
|
|
201
216
|
@abstractmethod
|
|
202
|
-
def get_state_attributes() ->
|
|
217
|
+
def get_state_attributes() -> list[str]:
|
|
203
218
|
"""
|
|
204
219
|
Similar to `get_defining_attributes` but for `state_id`
|
|
205
220
|
|
|
@@ -242,6 +257,9 @@ class Annotation(ABC):
|
|
|
242
257
|
return get_uuid(self.annotation_id, *container_ids)
|
|
243
258
|
|
|
244
259
|
|
|
260
|
+
DEFAULT_CATEGORY_ID = -1
|
|
261
|
+
|
|
262
|
+
|
|
245
263
|
@dataclass
|
|
246
264
|
class CategoryAnnotation(Annotation):
|
|
247
265
|
"""
|
|
@@ -268,12 +286,12 @@ class CategoryAnnotation(Annotation):
|
|
|
268
286
|
`dump_relationship` instead.
|
|
269
287
|
"""
|
|
270
288
|
|
|
271
|
-
category_name: TypeOrStr = field(default=DefaultType.
|
|
272
|
-
_category_name: ObjectTypes = field(default=DefaultType.
|
|
273
|
-
category_id:
|
|
289
|
+
category_name: TypeOrStr = field(default=DefaultType.DEFAULT_TYPE)
|
|
290
|
+
_category_name: ObjectTypes = field(default=DefaultType.DEFAULT_TYPE, init=False)
|
|
291
|
+
category_id: int = field(default=DEFAULT_CATEGORY_ID)
|
|
274
292
|
score: Optional[float] = field(default=None)
|
|
275
|
-
sub_categories:
|
|
276
|
-
relationships:
|
|
293
|
+
sub_categories: dict[ObjectTypes, CategoryAnnotation] = field(default_factory=dict, init=False, repr=True)
|
|
294
|
+
relationships: dict[ObjectTypes, list[str]] = field(default_factory=dict, init=False, repr=True)
|
|
277
295
|
|
|
278
296
|
@property # type: ignore
|
|
279
297
|
def category_name(self) -> ObjectTypes:
|
|
@@ -287,13 +305,11 @@ class CategoryAnnotation(Annotation):
|
|
|
287
305
|
self._category_name = get_type(category_name)
|
|
288
306
|
|
|
289
307
|
def __post_init__(self) -> None:
|
|
290
|
-
self.category_id = str(self.category_id)
|
|
291
|
-
assert self.category_name
|
|
292
308
|
self._assert_attributes_have_str(state_id=True)
|
|
293
309
|
super().__post_init__()
|
|
294
310
|
|
|
295
311
|
def dump_sub_category(
|
|
296
|
-
self, sub_category_name: TypeOrStr, annotation:
|
|
312
|
+
self, sub_category_name: TypeOrStr, annotation: CategoryAnnotation, *container_id_context: Optional[str]
|
|
297
313
|
) -> None:
|
|
298
314
|
"""
|
|
299
315
|
Storage of sub-categories. As sub-categories usually only depend on very few attributes and the parent
|
|
@@ -324,7 +340,7 @@ class CategoryAnnotation(Annotation):
|
|
|
324
340
|
)
|
|
325
341
|
self.sub_categories[get_type(sub_category_name)] = annotation
|
|
326
342
|
|
|
327
|
-
def get_sub_category(self, sub_category_name: ObjectTypes) ->
|
|
343
|
+
def get_sub_category(self, sub_category_name: ObjectTypes) -> CategoryAnnotation:
|
|
328
344
|
"""
|
|
329
345
|
Return a sub category by its key.
|
|
330
346
|
|
|
@@ -362,7 +378,7 @@ class CategoryAnnotation(Annotation):
|
|
|
362
378
|
if annotation_id not in self.relationships[key_type]:
|
|
363
379
|
self.relationships[key_type].append(annotation_id)
|
|
364
380
|
|
|
365
|
-
def get_relationship(self, key: ObjectTypes) ->
|
|
381
|
+
def get_relationship(self, key: ObjectTypes) -> list[str]:
|
|
366
382
|
"""
|
|
367
383
|
Returns a list of annotation ids stored with a given relationship key.
|
|
368
384
|
|
|
@@ -373,7 +389,7 @@ class CategoryAnnotation(Annotation):
|
|
|
373
389
|
return self.relationships[key]
|
|
374
390
|
return []
|
|
375
391
|
|
|
376
|
-
def remove_relationship(self, key: ObjectTypes, annotation_ids: Optional[Union[
|
|
392
|
+
def remove_relationship(self, key: ObjectTypes, annotation_ids: Optional[Union[list[str], str]] = None) -> None:
|
|
377
393
|
"""
|
|
378
394
|
Remove relationship by some given keys and ids. If no annotation ids are provided all relationship according
|
|
379
395
|
to the key will be removed.
|
|
@@ -392,27 +408,28 @@ class CategoryAnnotation(Annotation):
|
|
|
392
408
|
except ValueError:
|
|
393
409
|
logger.warning(LoggingRecord(f"Relationship {key} cannot be removed because it does not exist"))
|
|
394
410
|
else:
|
|
395
|
-
self.relationships
|
|
411
|
+
if key in self.relationships:
|
|
412
|
+
self.relationships[key].clear()
|
|
396
413
|
|
|
397
|
-
def get_defining_attributes(self) ->
|
|
414
|
+
def get_defining_attributes(self) -> list[str]:
|
|
398
415
|
return ["category_name", "category_id"]
|
|
399
416
|
|
|
400
417
|
@staticmethod
|
|
401
|
-
def remove_keys() ->
|
|
418
|
+
def remove_keys() -> list[str]:
|
|
402
419
|
"""
|
|
403
420
|
A list of attributes to suspend from as_dict creation.
|
|
404
421
|
|
|
405
|
-
:return:
|
|
422
|
+
:return: list of attributes.
|
|
406
423
|
"""
|
|
407
|
-
return []
|
|
424
|
+
return ["_category_name"]
|
|
408
425
|
|
|
409
426
|
@classmethod
|
|
410
|
-
def from_dict(cls, **kwargs:
|
|
427
|
+
def from_dict(cls, **kwargs: AnnotationDict) -> CategoryAnnotation:
|
|
411
428
|
category_ann = ann_from_dict(cls, **kwargs)
|
|
412
429
|
return category_ann
|
|
413
430
|
|
|
414
431
|
@staticmethod
|
|
415
|
-
def get_state_attributes() ->
|
|
432
|
+
def get_state_attributes() -> list[str]:
|
|
416
433
|
return ["active", "sub_categories", "relationships"]
|
|
417
434
|
|
|
418
435
|
|
|
@@ -432,20 +449,20 @@ class ImageAnnotation(CategoryAnnotation):
|
|
|
432
449
|
"""
|
|
433
450
|
|
|
434
451
|
bounding_box: Optional[BoundingBox] = field(default=None)
|
|
435
|
-
image: Optional[
|
|
452
|
+
image: Optional[Image] = field(default=None, init=False, repr=False) # type: ignore # pylint: disable=E0602
|
|
436
453
|
|
|
437
|
-
def get_defining_attributes(self) ->
|
|
454
|
+
def get_defining_attributes(self) -> list[str]:
|
|
438
455
|
return ["category_name", "bounding_box"]
|
|
439
456
|
|
|
440
457
|
@classmethod
|
|
441
|
-
def from_dict(cls, **kwargs:
|
|
458
|
+
def from_dict(cls, **kwargs: AnnotationDict) -> ImageAnnotation:
|
|
442
459
|
image_ann = ann_from_dict(cls, **kwargs)
|
|
443
460
|
if box_kwargs := kwargs.get("bounding_box"):
|
|
444
461
|
image_ann.bounding_box = BoundingBox.from_dict(**box_kwargs)
|
|
445
462
|
return image_ann
|
|
446
463
|
|
|
447
464
|
@staticmethod
|
|
448
|
-
def get_state_attributes() ->
|
|
465
|
+
def get_state_attributes() -> list[str]:
|
|
449
466
|
return ["active", "sub_categories", "relationships", "image"]
|
|
450
467
|
|
|
451
468
|
def get_bounding_box(self, image_id: Optional[str] = None) -> BoundingBox:
|
|
@@ -462,29 +479,34 @@ class ImageAnnotation(CategoryAnnotation):
|
|
|
462
479
|
def get_summary(self, key: ObjectTypes) -> CategoryAnnotation:
|
|
463
480
|
"""Get summary sub categories from `image`. Raises `ValueError` if `key` is not available"""
|
|
464
481
|
if self.image:
|
|
465
|
-
|
|
466
|
-
return self.image.summary.get_sub_category(key)
|
|
482
|
+
return self.image.summary.get_sub_category(key)
|
|
467
483
|
raise AnnotationError(f"Summary does not exist for {self.annotation_id} and key: {key}")
|
|
468
484
|
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
485
|
+
def get_annotation_map(self) -> defaultdict[str, list[AnnotationMap]]:
|
|
486
|
+
"""
|
|
487
|
+
Returns a defaultdict with annotation ids as keys and a list of AnnotationMap instances as values for all sub
|
|
488
|
+
categories, relationships and image summaries.
|
|
489
|
+
:return: defaultdict with annotation ids as keys and a list of AnnotationMap instances as values.
|
|
490
|
+
"""
|
|
491
|
+
annotation_id_dict = defaultdict(list)
|
|
492
|
+
annotation_id_dict[self.annotation_id].append(AnnotationMap(image_annotation_id=self.annotation_id))
|
|
493
|
+
for sub_cat_key in self.sub_categories:
|
|
494
|
+
sub_cat = self.get_sub_category(sub_cat_key)
|
|
495
|
+
annotation_id_dict[sub_cat.annotation_id].append(
|
|
496
|
+
AnnotationMap(image_annotation_id=self.annotation_id, sub_category_key=sub_cat_key)
|
|
497
|
+
)
|
|
498
|
+
if self.image is not None:
|
|
499
|
+
for summary_cat_key in self.image.summary.sub_categories:
|
|
500
|
+
summary_cat = self.get_summary(summary_cat_key)
|
|
501
|
+
annotation_id_dict[summary_cat.annotation_id].append(
|
|
502
|
+
AnnotationMap(image_annotation_id=self.annotation_id, summary_key=summary_cat_key)
|
|
503
|
+
)
|
|
504
|
+
for rel_key in self.relationships:
|
|
505
|
+
for rel_ann_ids in self.get_relationship(rel_key):
|
|
506
|
+
annotation_id_dict[rel_ann_ids].append(
|
|
507
|
+
AnnotationMap(image_annotation_id=self.annotation_id, relationship_key=rel_key)
|
|
508
|
+
)
|
|
509
|
+
return annotation_id_dict
|
|
488
510
|
|
|
489
511
|
|
|
490
512
|
@dataclass
|
|
@@ -496,13 +518,13 @@ class ContainerAnnotation(CategoryAnnotation):
|
|
|
496
518
|
value: Attribute to store the value. Use strings.
|
|
497
519
|
"""
|
|
498
520
|
|
|
499
|
-
value: Optional[Union[
|
|
521
|
+
value: Optional[Union[list[str], str]] = field(default=None)
|
|
500
522
|
|
|
501
|
-
def get_defining_attributes(self) ->
|
|
523
|
+
def get_defining_attributes(self) -> list[str]:
|
|
502
524
|
return ["category_name", "value"]
|
|
503
525
|
|
|
504
526
|
@classmethod
|
|
505
|
-
def from_dict(cls, **kwargs:
|
|
527
|
+
def from_dict(cls, **kwargs: AnnotationDict) -> ContainerAnnotation:
|
|
506
528
|
container_ann = ann_from_dict(cls, **kwargs)
|
|
507
529
|
value = kwargs.get("value", "")
|
|
508
530
|
container_ann.value = value if isinstance(value, str) else list(value)
|
deepdoctection/datapoint/box.py
CHANGED
|
@@ -21,17 +21,17 @@ Implementation of BoundingBox class and related methods
|
|
|
21
21
|
|
|
22
22
|
from dataclasses import dataclass
|
|
23
23
|
from math import ceil, floor
|
|
24
|
-
from typing import
|
|
24
|
+
from typing import Optional, Sequence, no_type_check
|
|
25
25
|
|
|
26
26
|
import numpy as np
|
|
27
27
|
import numpy.typing as npt
|
|
28
28
|
from lazy_imports import try_import
|
|
29
29
|
from numpy import float32
|
|
30
30
|
|
|
31
|
-
from ..utils.detection_types import ImageType
|
|
32
31
|
from ..utils.error import BoundingBoxError
|
|
33
32
|
from ..utils.file_utils import cocotools_available
|
|
34
33
|
from ..utils.logger import LoggingRecord, logger
|
|
34
|
+
from ..utils.types import PixelValues
|
|
35
35
|
|
|
36
36
|
with try_import() as import_guard:
|
|
37
37
|
import pycocotools.mask as coco_mask
|
|
@@ -221,7 +221,7 @@ class BoundingBox:
|
|
|
221
221
|
return self.uly + 0.5 * self.height
|
|
222
222
|
|
|
223
223
|
@property
|
|
224
|
-
def center(self) ->
|
|
224
|
+
def center(self) -> list[float]:
|
|
225
225
|
"""
|
|
226
226
|
Bounding box center [x,y]
|
|
227
227
|
"""
|
|
@@ -264,7 +264,7 @@ class BoundingBox:
|
|
|
264
264
|
* np_poly_scale
|
|
265
265
|
)
|
|
266
266
|
|
|
267
|
-
def to_list(self, mode: str, scale_x: float = 1.0, scale_y: float = 1.0) ->
|
|
267
|
+
def to_list(self, mode: str, scale_x: float = 1.0, scale_y: float = 1.0) -> list[float]:
|
|
268
268
|
"""
|
|
269
269
|
Returns the coordinates as list
|
|
270
270
|
|
|
@@ -345,7 +345,7 @@ class BoundingBox:
|
|
|
345
345
|
return f"Bounding Box ulx: {self.ulx}, uly: {self.uly}, lrx: {self.lrx}, lry: {self.lry}"
|
|
346
346
|
|
|
347
347
|
@staticmethod
|
|
348
|
-
def remove_keys() ->
|
|
348
|
+
def remove_keys() -> list[str]:
|
|
349
349
|
"""
|
|
350
350
|
A list of attributes to suspend from as_dict creation.
|
|
351
351
|
"""
|
|
@@ -398,8 +398,8 @@ def intersection_box(
|
|
|
398
398
|
|
|
399
399
|
|
|
400
400
|
def crop_box_from_image(
|
|
401
|
-
np_image:
|
|
402
|
-
) ->
|
|
401
|
+
np_image: PixelValues, crop_box: BoundingBox, width: Optional[float] = None, height: Optional[float] = None
|
|
402
|
+
) -> PixelValues:
|
|
403
403
|
"""
|
|
404
404
|
Crop a box (the crop_box) from a np_image. Will floor the left and ceil the right coordinate point.
|
|
405
405
|
|