deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +2 -1
- deepdoctection/analyzer/__init__.py +2 -1
- deepdoctection/analyzer/config.py +904 -0
- deepdoctection/analyzer/dd.py +36 -62
- deepdoctection/analyzer/factory.py +311 -141
- deepdoctection/configs/conf_dd_one.yaml +100 -44
- deepdoctection/configs/profiles.jsonl +32 -0
- deepdoctection/dataflow/__init__.py +9 -6
- deepdoctection/dataflow/base.py +33 -15
- deepdoctection/dataflow/common.py +96 -75
- deepdoctection/dataflow/custom.py +36 -29
- deepdoctection/dataflow/custom_serialize.py +135 -91
- deepdoctection/dataflow/parallel_map.py +33 -31
- deepdoctection/dataflow/serialize.py +15 -10
- deepdoctection/dataflow/stats.py +41 -28
- deepdoctection/datapoint/__init__.py +4 -6
- deepdoctection/datapoint/annotation.py +104 -66
- deepdoctection/datapoint/box.py +190 -130
- deepdoctection/datapoint/convert.py +66 -39
- deepdoctection/datapoint/image.py +151 -95
- deepdoctection/datapoint/view.py +383 -236
- deepdoctection/datasets/__init__.py +2 -6
- deepdoctection/datasets/adapter.py +11 -11
- deepdoctection/datasets/base.py +118 -81
- deepdoctection/datasets/dataflow_builder.py +18 -12
- deepdoctection/datasets/info.py +76 -57
- deepdoctection/datasets/instances/__init__.py +6 -2
- deepdoctection/datasets/instances/doclaynet.py +17 -14
- deepdoctection/datasets/instances/fintabnet.py +16 -22
- deepdoctection/datasets/instances/funsd.py +11 -6
- deepdoctection/datasets/instances/iiitar13k.py +9 -9
- deepdoctection/datasets/instances/layouttest.py +9 -9
- deepdoctection/datasets/instances/publaynet.py +9 -9
- deepdoctection/datasets/instances/pubtables1m.py +13 -13
- deepdoctection/datasets/instances/pubtabnet.py +13 -15
- deepdoctection/datasets/instances/rvlcdip.py +8 -8
- deepdoctection/datasets/instances/xfund.py +11 -9
- deepdoctection/datasets/registry.py +18 -11
- deepdoctection/datasets/save.py +12 -11
- deepdoctection/eval/__init__.py +3 -2
- deepdoctection/eval/accmetric.py +72 -52
- deepdoctection/eval/base.py +29 -10
- deepdoctection/eval/cocometric.py +14 -12
- deepdoctection/eval/eval.py +56 -41
- deepdoctection/eval/registry.py +6 -3
- deepdoctection/eval/tedsmetric.py +24 -9
- deepdoctection/eval/tp_eval_callback.py +13 -12
- deepdoctection/extern/__init__.py +1 -1
- deepdoctection/extern/base.py +176 -97
- deepdoctection/extern/d2detect.py +127 -92
- deepdoctection/extern/deskew.py +19 -10
- deepdoctection/extern/doctrocr.py +157 -106
- deepdoctection/extern/fastlang.py +25 -17
- deepdoctection/extern/hfdetr.py +137 -60
- deepdoctection/extern/hflayoutlm.py +329 -248
- deepdoctection/extern/hflm.py +67 -33
- deepdoctection/extern/model.py +108 -762
- deepdoctection/extern/pdftext.py +37 -12
- deepdoctection/extern/pt/nms.py +15 -1
- deepdoctection/extern/pt/ptutils.py +13 -9
- deepdoctection/extern/tessocr.py +87 -54
- deepdoctection/extern/texocr.py +29 -14
- deepdoctection/extern/tp/tfutils.py +36 -8
- deepdoctection/extern/tp/tpcompat.py +54 -16
- deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
- deepdoctection/extern/tpdetect.py +4 -2
- deepdoctection/mapper/__init__.py +1 -1
- deepdoctection/mapper/cats.py +117 -76
- deepdoctection/mapper/cocostruct.py +35 -17
- deepdoctection/mapper/d2struct.py +56 -29
- deepdoctection/mapper/hfstruct.py +32 -19
- deepdoctection/mapper/laylmstruct.py +221 -185
- deepdoctection/mapper/maputils.py +71 -35
- deepdoctection/mapper/match.py +76 -62
- deepdoctection/mapper/misc.py +68 -44
- deepdoctection/mapper/pascalstruct.py +13 -12
- deepdoctection/mapper/prodigystruct.py +33 -19
- deepdoctection/mapper/pubstruct.py +42 -32
- deepdoctection/mapper/tpstruct.py +39 -19
- deepdoctection/mapper/xfundstruct.py +20 -13
- deepdoctection/pipe/__init__.py +1 -2
- deepdoctection/pipe/anngen.py +104 -62
- deepdoctection/pipe/base.py +226 -107
- deepdoctection/pipe/common.py +206 -123
- deepdoctection/pipe/concurrency.py +74 -47
- deepdoctection/pipe/doctectionpipe.py +108 -47
- deepdoctection/pipe/language.py +41 -24
- deepdoctection/pipe/layout.py +45 -18
- deepdoctection/pipe/lm.py +146 -78
- deepdoctection/pipe/order.py +196 -113
- deepdoctection/pipe/refine.py +111 -63
- deepdoctection/pipe/registry.py +1 -1
- deepdoctection/pipe/segment.py +213 -142
- deepdoctection/pipe/sub_layout.py +76 -46
- deepdoctection/pipe/text.py +52 -33
- deepdoctection/pipe/transform.py +8 -6
- deepdoctection/train/d2_frcnn_train.py +87 -69
- deepdoctection/train/hf_detr_train.py +72 -40
- deepdoctection/train/hf_layoutlm_train.py +85 -46
- deepdoctection/train/tp_frcnn_train.py +56 -28
- deepdoctection/utils/concurrency.py +59 -16
- deepdoctection/utils/context.py +40 -19
- deepdoctection/utils/develop.py +25 -17
- deepdoctection/utils/env_info.py +85 -36
- deepdoctection/utils/error.py +16 -10
- deepdoctection/utils/file_utils.py +246 -62
- deepdoctection/utils/fs.py +162 -43
- deepdoctection/utils/identifier.py +29 -16
- deepdoctection/utils/logger.py +49 -32
- deepdoctection/utils/metacfg.py +83 -21
- deepdoctection/utils/pdf_utils.py +119 -62
- deepdoctection/utils/settings.py +24 -10
- deepdoctection/utils/tqdm.py +10 -5
- deepdoctection/utils/transform.py +182 -46
- deepdoctection/utils/utils.py +61 -28
- deepdoctection/utils/viz.py +150 -104
- deepdoctection-0.43.dist-info/METADATA +376 -0
- deepdoctection-0.43.dist-info/RECORD +149 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
- deepdoctection/analyzer/_config.py +0 -146
- deepdoctection-0.42.0.dist-info/METADATA +0 -431
- deepdoctection-0.42.0.dist-info/RECORD +0 -148
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
deepdoctection/datapoint/box.py
CHANGED
|
@@ -16,8 +16,9 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
`BoundingBox` class and methods for manipulating bounding boxes.
|
|
20
20
|
"""
|
|
21
|
+
|
|
21
22
|
from __future__ import annotations
|
|
22
23
|
|
|
23
24
|
from dataclasses import dataclass
|
|
@@ -43,12 +44,14 @@ with try_import() as import_guard:
|
|
|
43
44
|
|
|
44
45
|
def coco_iou(box_a: npt.NDArray[float32], box_b: npt.NDArray[float32]) -> npt.NDArray[float32]:
|
|
45
46
|
"""
|
|
46
|
-
Calculate iou for two arrays of bounding boxes in xyxy format
|
|
47
|
+
Calculate iou for two arrays of bounding boxes in `xyxy` format
|
|
47
48
|
|
|
48
|
-
:
|
|
49
|
-
|
|
49
|
+
Args:
|
|
50
|
+
box_a: Array of shape Nx4
|
|
51
|
+
box_b: Array of shape Mx4
|
|
50
52
|
|
|
51
|
-
:
|
|
53
|
+
Returns:
|
|
54
|
+
Array of shape NxM
|
|
52
55
|
"""
|
|
53
56
|
|
|
54
57
|
def to_xywh(box: npt.NDArray[float32]) -> npt.NDArray[float32]:
|
|
@@ -69,9 +72,11 @@ def area(boxes: npt.NDArray[float32]) -> npt.NDArray[float32]:
|
|
|
69
72
|
"""
|
|
70
73
|
Computes area of boxes.
|
|
71
74
|
|
|
72
|
-
:
|
|
75
|
+
Args:
|
|
76
|
+
boxes: numpy array with shape [N, 4] holding N boxes in xyxy format
|
|
73
77
|
|
|
74
|
-
:
|
|
78
|
+
Returns:
|
|
79
|
+
A numpy array with shape `[N*1]` representing box areas
|
|
75
80
|
"""
|
|
76
81
|
return np.array((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]), dtype=float32)
|
|
77
82
|
|
|
@@ -83,10 +88,12 @@ def intersection(boxes1: npt.NDArray[float32], boxes2: npt.NDArray[float32]) ->
|
|
|
83
88
|
"""
|
|
84
89
|
Compute pairwise intersection areas between boxes.
|
|
85
90
|
|
|
86
|
-
:
|
|
87
|
-
|
|
91
|
+
Args:
|
|
92
|
+
boxes1: A `np.array` with shape `[N, 4]` holding `N` boxes in `xyxy` format
|
|
93
|
+
boxes2: A `np.array` with shape `[M, 4]` holding `M` boxes in `xyxy` format
|
|
88
94
|
|
|
89
|
-
:
|
|
95
|
+
Returns:
|
|
96
|
+
A `np.array` with shape `[N*M]` representing pairwise intersection area
|
|
90
97
|
"""
|
|
91
98
|
[y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1) # pylint: disable=W0632
|
|
92
99
|
[y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1) # pylint: disable=W0632
|
|
@@ -111,10 +118,12 @@ def np_iou(boxes1: npt.NDArray[float32], boxes2: npt.NDArray[float32]) -> npt.ND
|
|
|
111
118
|
"""
|
|
112
119
|
Computes pairwise intersection-over-union between box collections.
|
|
113
120
|
|
|
114
|
-
:
|
|
115
|
-
|
|
121
|
+
Args:
|
|
122
|
+
boxes1: a numpy array with shape [N, 4] holding N boxes in xyxy format.
|
|
123
|
+
boxes2: a numpy array with shape [M, 4] holding M boxes in xyxy format.
|
|
116
124
|
|
|
117
|
-
:
|
|
125
|
+
Returns:
|
|
126
|
+
A `np.array` with shape `[N, M]` representing pairwise iou scores.
|
|
118
127
|
"""
|
|
119
128
|
intersect = intersection(boxes1, boxes2)
|
|
120
129
|
area1 = area(boxes1)
|
|
@@ -126,16 +135,21 @@ def np_iou(boxes1: npt.NDArray[float32], boxes2: npt.NDArray[float32]) -> npt.ND
|
|
|
126
135
|
|
|
127
136
|
def iou(boxes1: npt.NDArray[float32], boxes2: npt.NDArray[float32]) -> npt.NDArray[float32]:
|
|
128
137
|
"""
|
|
129
|
-
Computes pairwise intersection-over-union between box collections.
|
|
130
|
-
|
|
138
|
+
Computes pairwise intersection-over-union between box collections.
|
|
139
|
+
|
|
140
|
+
Note:
|
|
141
|
+
The method will be chosen based on what is installed:
|
|
131
142
|
|
|
132
|
-
|
|
133
|
-
|
|
143
|
+
- If `pycocotools` is installed it will choose `pycocotools.mask.iou` which is a `C++` implementation
|
|
144
|
+
and much faster
|
|
145
|
+
- Otherwise it will use the numpy implementation as fallback
|
|
134
146
|
|
|
135
|
-
:
|
|
136
|
-
|
|
147
|
+
Args:
|
|
148
|
+
boxes1: A `np.array` with shape `[N, 4]` holding `N` boxes in `xyxy` format.
|
|
149
|
+
boxes2: A np.array with shape `[N, 4]` holding `N` boxes in `xyxy` format.
|
|
137
150
|
|
|
138
|
-
:
|
|
151
|
+
Returns:
|
|
152
|
+
A `np.array` with shape `[N, M]` representing pairwise iou scores.
|
|
139
153
|
"""
|
|
140
154
|
|
|
141
155
|
if cocotools_available():
|
|
@@ -143,7 +157,7 @@ def iou(boxes1: npt.NDArray[float32], boxes2: npt.NDArray[float32]) -> npt.NDArr
|
|
|
143
157
|
return np_iou(boxes1, boxes2)
|
|
144
158
|
|
|
145
159
|
|
|
146
|
-
|
|
160
|
+
RELATIVE_COORD_SCALE_FACTOR = 10**8
|
|
147
161
|
|
|
148
162
|
|
|
149
163
|
@dataclass
|
|
@@ -152,18 +166,31 @@ class BoundingBox:
|
|
|
152
166
|
Rectangular bounding box that stores coordinates and allows different representations.
|
|
153
167
|
|
|
154
168
|
This implementation differs from the previous version by using internal integer storage with precision scaling
|
|
155
|
-
for both absolute and relative coordinates. Coordinates are stored internally as integers (_ulx, _uly, etc.)
|
|
156
|
-
with relative coordinates multiplied by RELATIVE_COORD_CONVERTER
|
|
169
|
+
for both absolute and relative coordinates. Coordinates are stored internally as integers `(_ulx, _uly, etc.)`
|
|
170
|
+
with relative coordinates multiplied by `RELATIVE_COORD_CONVERTER` for precision. Properties `(ulx, uly, etc.)`
|
|
157
171
|
handle the conversion between internal storage and exposed values.
|
|
158
172
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
173
|
+
Note:
|
|
174
|
+
You can define an instance by passing:
|
|
175
|
+
|
|
176
|
+
- Upper left point `(ulx, uly) + width` and height, OR
|
|
177
|
+
- Upper left point `(ulx, uly) + lower right point (lrx, lry)`
|
|
178
|
+
|
|
179
|
+
Note:
|
|
180
|
+
- When `absolute_coords=True`, coordinates will be rounded to integers
|
|
181
|
+
- When `absolute_coords=False`, coordinates must be between 0 and 1
|
|
182
|
+
- The box is validated on initialization to ensure coordinates are valid
|
|
183
|
+
|
|
184
|
+
Attributes:
|
|
185
|
+
absolute_coords: Whether the coordinates are absolute pixel values (`True`) or normalized
|
|
186
|
+
`[0,1]` values (`False`).
|
|
187
|
+
_ulx: Upper-left x-coordinate, stored as an integer.
|
|
188
|
+
_uly: Upper-left y-coordinate, stored as an integer.
|
|
189
|
+
_lrx: Lower-right x-coordinate, stored as an integer.
|
|
190
|
+
_lry: Lower-right y-coordinate, stored as an integer.
|
|
191
|
+
_height: Height of the bounding box, stored as an integer.
|
|
192
|
+
_width: Width of the bounding box, stored as an integer.
|
|
162
193
|
|
|
163
|
-
Notes:
|
|
164
|
-
- When absolute_coords=True, coordinates will be rounded to integers
|
|
165
|
-
- When absolute_coords=False, coordinates must be between 0 and 1
|
|
166
|
-
- The box is validated on initialization to ensure coordinates are valid
|
|
167
194
|
"""
|
|
168
195
|
|
|
169
196
|
absolute_coords: bool
|
|
@@ -185,23 +212,25 @@ class BoundingBox:
|
|
|
185
212
|
height: BoxCoordinate = 0,
|
|
186
213
|
):
|
|
187
214
|
"""
|
|
188
|
-
Initialize a BoundingBox instance with
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
:
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
215
|
+
Initialize a BoundingBox instance with specified coordinates.
|
|
216
|
+
|
|
217
|
+
Note:
|
|
218
|
+
This initializer supports two ways of defining a bounding box:
|
|
219
|
+
- Using upper-left coordinates (ulx, uly) with width and height
|
|
220
|
+
- Using upper-left (ulx, uly) and lower-right (lrx, lry) coordinates
|
|
221
|
+
|
|
222
|
+
When `absolute_coords=True`, coordinates are stored as integers.
|
|
223
|
+
When `absolute_coords=False`, coordinates are stored as scaled integers
|
|
224
|
+
(original `float values * RELATIVE_COORD_SCALE_FACTOR`) for precision.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
absolute_coords: Whether coordinates are absolute pixels (`True`) or normalized `[0,1]` values (`False`)
|
|
228
|
+
ulx: Upper-left `x`-coordinate (`float` or `int`)
|
|
229
|
+
uly: Upper-left `y`-coordinate (`float` or `int`)
|
|
230
|
+
lrx: Lower-right `x`-coordinate (`float` or `int`), default 0
|
|
231
|
+
lry: Lower-right `y`-coordinate (`float` or `int`), default 0
|
|
232
|
+
width: Width of the bounding box (`float` or `int`), default 0
|
|
233
|
+
height: Height of the bounding box (`float` or `int`), default 0
|
|
205
234
|
"""
|
|
206
235
|
self.absolute_coords = absolute_coords
|
|
207
236
|
if absolute_coords:
|
|
@@ -214,14 +243,14 @@ class BoundingBox:
|
|
|
214
243
|
self._width = round(width)
|
|
215
244
|
self._height = round(height)
|
|
216
245
|
else:
|
|
217
|
-
self._ulx = round(ulx *
|
|
218
|
-
self._uly = round(uly *
|
|
246
|
+
self._ulx = round(ulx * RELATIVE_COORD_SCALE_FACTOR)
|
|
247
|
+
self._uly = round(uly * RELATIVE_COORD_SCALE_FACTOR)
|
|
219
248
|
if lrx and lry:
|
|
220
|
-
self._lrx = round(lrx *
|
|
221
|
-
self._lry = round(lry *
|
|
249
|
+
self._lrx = round(lrx * RELATIVE_COORD_SCALE_FACTOR)
|
|
250
|
+
self._lry = round(lry * RELATIVE_COORD_SCALE_FACTOR)
|
|
222
251
|
if width and height:
|
|
223
|
-
self._width = round(width *
|
|
224
|
-
self._height = round(height *
|
|
252
|
+
self._width = round(width * RELATIVE_COORD_SCALE_FACTOR)
|
|
253
|
+
self._height = round(height * RELATIVE_COORD_SCALE_FACTOR)
|
|
225
254
|
if not self._width and not self._height:
|
|
226
255
|
self._width = self._lrx - self._ulx
|
|
227
256
|
self._height = self._lry - self._uly
|
|
@@ -265,67 +294,67 @@ class BoundingBox:
|
|
|
265
294
|
@property
|
|
266
295
|
def ulx(self) -> BoxCoordinate:
|
|
267
296
|
"""ulx property"""
|
|
268
|
-
return self._ulx /
|
|
297
|
+
return self._ulx / RELATIVE_COORD_SCALE_FACTOR if not self.absolute_coords else self._ulx
|
|
269
298
|
|
|
270
299
|
@ulx.setter
|
|
271
300
|
def ulx(self, value: BoxCoordinate) -> None:
|
|
272
301
|
"""ulx setter"""
|
|
273
|
-
self._ulx = round(value *
|
|
302
|
+
self._ulx = round(value * RELATIVE_COORD_SCALE_FACTOR) if not self.absolute_coords else round(value)
|
|
274
303
|
self._width = self._lrx - self._ulx
|
|
275
304
|
|
|
276
305
|
@property
|
|
277
306
|
def uly(self) -> BoxCoordinate:
|
|
278
307
|
"""uly property"""
|
|
279
|
-
return self._uly /
|
|
308
|
+
return self._uly / RELATIVE_COORD_SCALE_FACTOR if not self.absolute_coords else self._uly
|
|
280
309
|
|
|
281
310
|
@uly.setter
|
|
282
311
|
def uly(self, value: BoxCoordinate) -> None:
|
|
283
312
|
"""uly setter"""
|
|
284
|
-
self._uly = round(value *
|
|
313
|
+
self._uly = round(value * RELATIVE_COORD_SCALE_FACTOR) if not self.absolute_coords else round(value)
|
|
285
314
|
self._height = self._lry - self._uly
|
|
286
315
|
|
|
287
316
|
@property
|
|
288
317
|
def lrx(self) -> BoxCoordinate:
|
|
289
318
|
"""lrx property"""
|
|
290
|
-
return self._lrx /
|
|
319
|
+
return self._lrx / RELATIVE_COORD_SCALE_FACTOR if not self.absolute_coords else self._lrx
|
|
291
320
|
|
|
292
321
|
@lrx.setter
|
|
293
322
|
def lrx(self, value: BoxCoordinate) -> None:
|
|
294
323
|
"""lrx setter"""
|
|
295
|
-
self._lrx = round(value *
|
|
324
|
+
self._lrx = round(value * RELATIVE_COORD_SCALE_FACTOR) if not self.absolute_coords else round(value)
|
|
296
325
|
self._width = self._lrx - self._ulx
|
|
297
326
|
|
|
298
327
|
@property
|
|
299
328
|
def lry(self) -> BoxCoordinate:
|
|
300
329
|
"""lry property"""
|
|
301
|
-
return self._lry /
|
|
330
|
+
return self._lry / RELATIVE_COORD_SCALE_FACTOR if not self.absolute_coords else self._lry
|
|
302
331
|
|
|
303
332
|
@lry.setter
|
|
304
333
|
def lry(self, value: BoxCoordinate) -> None:
|
|
305
334
|
"""lry setter"""
|
|
306
|
-
self._lry = round(value *
|
|
335
|
+
self._lry = round(value * RELATIVE_COORD_SCALE_FACTOR) if not self.absolute_coords else round(value)
|
|
307
336
|
self._height = self._lry - self._uly
|
|
308
337
|
|
|
309
338
|
@property
|
|
310
339
|
def width(self) -> BoxCoordinate:
|
|
311
340
|
"""width property"""
|
|
312
|
-
return self._width /
|
|
341
|
+
return self._width / RELATIVE_COORD_SCALE_FACTOR if not self.absolute_coords else self._width
|
|
313
342
|
|
|
314
343
|
@width.setter
|
|
315
344
|
def width(self, value: BoxCoordinate) -> None:
|
|
316
345
|
"""width setter"""
|
|
317
|
-
self._width = round(value *
|
|
346
|
+
self._width = round(value * RELATIVE_COORD_SCALE_FACTOR) if not self.absolute_coords else round(value)
|
|
318
347
|
self._lrx = self._ulx + self._width
|
|
319
348
|
|
|
320
349
|
@property
|
|
321
350
|
def height(self) -> BoxCoordinate:
|
|
322
351
|
"""height property"""
|
|
323
|
-
return self._height /
|
|
352
|
+
return self._height / RELATIVE_COORD_SCALE_FACTOR if not self.absolute_coords else self._height
|
|
324
353
|
|
|
325
354
|
@height.setter
|
|
326
355
|
def height(self, value: BoxCoordinate) -> None:
|
|
327
356
|
"""height setter"""
|
|
328
|
-
self._height = round(value *
|
|
357
|
+
self._height = round(value * RELATIVE_COORD_SCALE_FACTOR) if not self.absolute_coords else round(value)
|
|
329
358
|
self._lry = self._uly + self._height
|
|
330
359
|
|
|
331
360
|
@property
|
|
@@ -358,15 +387,18 @@ class BoundingBox:
|
|
|
358
387
|
"""
|
|
359
388
|
Returns the coordinates as numpy array.
|
|
360
389
|
|
|
361
|
-
:
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
390
|
+
Args:
|
|
391
|
+
mode: Mode for coordinate arrangement:
|
|
392
|
+
`xyxy` for upper left/lower right point representation,
|
|
393
|
+
`xywh` for upper left and width/height representation or
|
|
394
|
+
`poly` for full eight coordinate polygon representation. `x,y` coordinates will be
|
|
365
395
|
returned in counter-clockwise order.
|
|
366
396
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
397
|
+
scale_x: rescale the `x` coordinate. Defaults to `1`
|
|
398
|
+
scale_y: rescale the `y` coordinate. Defaults to `1`
|
|
399
|
+
|
|
400
|
+
Returns:
|
|
401
|
+
box coordinates
|
|
370
402
|
"""
|
|
371
403
|
np_box_scale = np.array([scale_x, scale_y, scale_x, scale_y], dtype=np.float32)
|
|
372
404
|
np_poly_scale = np.array(
|
|
@@ -386,15 +418,18 @@ class BoundingBox:
|
|
|
386
418
|
"""
|
|
387
419
|
Returns the coordinates as list
|
|
388
420
|
|
|
389
|
-
:
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
421
|
+
Args:
|
|
422
|
+
mode: Mode for coordinate arrangement:
|
|
423
|
+
`xyxy` for upper left/lower right point representation,
|
|
424
|
+
`xywh` for upper left and width/height representation or
|
|
425
|
+
`poly` for full eight coordinate polygon representation. `x,y` coordinates will be
|
|
393
426
|
returned in counter-clockwise order.
|
|
394
427
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
428
|
+
scale_x: rescale the x coordinate. Defaults to 1
|
|
429
|
+
scale_y: rescale the y coordinate. Defaults to 1
|
|
430
|
+
|
|
431
|
+
Returns:
|
|
432
|
+
box coordinates
|
|
398
433
|
"""
|
|
399
434
|
assert mode in ("xyxy", "xywh", "poly"), "Not a valid mode"
|
|
400
435
|
if mode == "xyxy":
|
|
@@ -458,11 +493,13 @@ class BoundingBox:
|
|
|
458
493
|
Transforms bounding box coordinates into absolute or relative coords. Internally, a new bounding box will be
|
|
459
494
|
created. Changing coordinates requires width and height of the whole image.
|
|
460
495
|
|
|
461
|
-
:
|
|
462
|
-
|
|
463
|
-
|
|
496
|
+
Args:
|
|
497
|
+
image_width: The horizontal image size
|
|
498
|
+
image_height: The vertical image size
|
|
499
|
+
absolute_coords: Whether to recalculate into absolute coordinates.
|
|
464
500
|
|
|
465
|
-
:
|
|
501
|
+
Returns:
|
|
502
|
+
Either a `list` or `np.array`.
|
|
466
503
|
"""
|
|
467
504
|
if absolute_coords != self.absolute_coords:
|
|
468
505
|
if self.absolute_coords:
|
|
@@ -485,7 +522,10 @@ class BoundingBox:
|
|
|
485
522
|
return self
|
|
486
523
|
|
|
487
524
|
def __str__(self) -> str:
|
|
488
|
-
return
|
|
525
|
+
return (
|
|
526
|
+
f"Bounding Box(absolute_coords: {self.absolute_coords},"
|
|
527
|
+
f"ulx: {self.ulx}, uly: {self.uly}, lrx: {self.lrx}, lry: {self.lry})"
|
|
528
|
+
)
|
|
489
529
|
|
|
490
530
|
def __repr__(self) -> str:
|
|
491
531
|
return (
|
|
@@ -493,6 +533,10 @@ class BoundingBox:
|
|
|
493
533
|
f" lry={self.lry}, width={self.width}, height={self.height})"
|
|
494
534
|
)
|
|
495
535
|
|
|
536
|
+
def get_legacy_string(self) -> str:
|
|
537
|
+
"""Legacy string representation of the bounding box. Do not use"""
|
|
538
|
+
return f"Bounding Box ulx: {self.ulx}, uly: {self.uly}, lrx: {self.lrx}, lry: {self.lry}"
|
|
539
|
+
|
|
496
540
|
@staticmethod
|
|
497
541
|
def remove_keys() -> list[str]:
|
|
498
542
|
"""Removing keys when converting the dataclass object to a dict"""
|
|
@@ -514,19 +558,24 @@ def intersection_box(
|
|
|
514
558
|
box_1: BoundingBox, box_2: BoundingBox, width: Optional[float] = None, height: Optional[float] = None
|
|
515
559
|
) -> BoundingBox:
|
|
516
560
|
"""
|
|
517
|
-
Returns the intersection bounding box of two boxes.
|
|
561
|
+
Returns the intersection bounding box of two boxes.
|
|
518
562
|
If coords are absolute, it will floor the lower and ceil the upper coord to ensure the resulting box has same
|
|
519
563
|
coordinates as the box induces from `crop_box_from_image`
|
|
520
564
|
|
|
521
|
-
:
|
|
522
|
-
|
|
523
|
-
|
|
565
|
+
Args:
|
|
566
|
+
box_1: bounding box
|
|
567
|
+
box_2: bounding box
|
|
568
|
+
width: Total width of image. This optional parameter is needed if the value of `absolute_coords` of `box_1`
|
|
524
569
|
and `box_2` are not equal.
|
|
525
|
-
|
|
570
|
+
height: Total height of image. This optional parameter is needed if the value of `absolute_coords` of `box_1`
|
|
526
571
|
and `box_2` are not equal.
|
|
527
572
|
|
|
528
|
-
:
|
|
529
|
-
|
|
573
|
+
Returns:
|
|
574
|
+
bounding box. Will have same `absolute_coords` as `box_2`, if `absolute_coords` of `box_1` and `box_2` are
|
|
575
|
+
not equal#
|
|
576
|
+
|
|
577
|
+
Raises:
|
|
578
|
+
ValueError: If the intersection is empty, i.e. if the boxes do not overlap.
|
|
530
579
|
"""
|
|
531
580
|
|
|
532
581
|
if box_1.absolute_coords != box_2.absolute_coords:
|
|
@@ -548,17 +597,18 @@ def crop_box_from_image(
|
|
|
548
597
|
np_image: PixelValues, crop_box: BoundingBox, width: Optional[float] = None, height: Optional[float] = None
|
|
549
598
|
) -> PixelValues:
|
|
550
599
|
"""
|
|
551
|
-
Crop a box (the crop_box) from a
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
:
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
600
|
+
Crop a box (the crop_box) from a image given as `np.array`. Will floor the left and ceil the right coordinate
|
|
601
|
+
point.
|
|
602
|
+
|
|
603
|
+
Args:
|
|
604
|
+
np_image: Image to crop from.
|
|
605
|
+
crop_box: Bounding box to crop.
|
|
606
|
+
width: Total width of image. This optional parameter is needed if the value of `absolute_coords` of
|
|
607
|
+
`crop_box` is `False`.
|
|
608
|
+
height: Total width of image. This optional parameter is needed if the value of `absolute_coords` of
|
|
609
|
+
`crop_box` is `False`.
|
|
610
|
+
|
|
611
|
+
:return: A `np.array` cropped according to the bounding box.
|
|
562
612
|
"""
|
|
563
613
|
if not crop_box.absolute_coords:
|
|
564
614
|
assert (
|
|
@@ -579,13 +629,16 @@ def crop_box_from_image(
|
|
|
579
629
|
def local_to_global_coords(local_box: BoundingBox, embedding_box: BoundingBox) -> BoundingBox:
|
|
580
630
|
"""
|
|
581
631
|
Transform coords in terms of a cropped image into global coords. The local box coords are given in terms of the
|
|
582
|
-
embedding box. The global coords will be determined by transforming the upper left point (which is (0,0) in
|
|
632
|
+
embedding box. The global coords will be determined by transforming the upper left point (which is `(0,0)` in
|
|
583
633
|
local terms) into the upper left point given by the embedding box. This will shift the ul point of the
|
|
584
|
-
local box to ul + embedding_box.ul
|
|
634
|
+
local box to `ul + embedding_box.ul`
|
|
635
|
+
|
|
636
|
+
Args:
|
|
637
|
+
local_box: bounding box with coords in terms of an embedding (e.g. local coordinates)
|
|
638
|
+
embedding_box: bounding box of the embedding.
|
|
585
639
|
|
|
586
|
-
:
|
|
587
|
-
|
|
588
|
-
:return: bounding box with local box transformed to absolute coords
|
|
640
|
+
Returns:
|
|
641
|
+
Bounding box with local box transformed to absolute coords
|
|
589
642
|
"""
|
|
590
643
|
|
|
591
644
|
assert local_box.absolute_coords and embedding_box.absolute_coords, (
|
|
@@ -614,11 +667,14 @@ def global_to_local_coords(global_box: BoundingBox, embedding_box: BoundingBox)
|
|
|
614
667
|
Transforming global bounding box coords into the coordinate system given by the embedding box. The transformation
|
|
615
668
|
requires that the global bounding box coordinates lie completely within the rectangle of the embedding box.
|
|
616
669
|
The transformation results from a shift of all coordinates given by the shift of the upper left point of the
|
|
617
|
-
embedding box into (0,0)
|
|
670
|
+
embedding box into `(0,0)`.
|
|
618
671
|
|
|
619
|
-
:
|
|
620
|
-
|
|
621
|
-
|
|
672
|
+
Args:
|
|
673
|
+
global_box: The bounding box to be embedded
|
|
674
|
+
embedding_box: The embedding box. Must cover the global box completely.
|
|
675
|
+
|
|
676
|
+
Returns:
|
|
677
|
+
Bounding box of the embedded box in local coordinates.
|
|
622
678
|
"""
|
|
623
679
|
|
|
624
680
|
assert global_box.absolute_coords and embedding_box.absolute_coords, (
|
|
@@ -639,7 +695,9 @@ def global_to_local_coords(global_box: BoundingBox, embedding_box: BoundingBox)
|
|
|
639
695
|
def merge_boxes(*boxes: BoundingBox) -> BoundingBox:
|
|
640
696
|
"""
|
|
641
697
|
Generating the smallest box containing an arbitrary tuple/list of boxes.
|
|
642
|
-
|
|
698
|
+
|
|
699
|
+
Args:
|
|
700
|
+
boxes: An arbitrary tuple/list of bounding boxes `BoundingBox`.
|
|
643
701
|
"""
|
|
644
702
|
absolute_coords = boxes[0].absolute_coords
|
|
645
703
|
assert all(box.absolute_coords == absolute_coords for box in boxes), "all boxes must have same absolute_coords"
|
|
@@ -660,22 +718,21 @@ def rescale_coords(
|
|
|
660
718
|
scaled_total_height: float,
|
|
661
719
|
) -> BoundingBox:
|
|
662
720
|
"""
|
|
663
|
-
Generating a bounding box with scaled coordinates. Will rescale x coordinate with factor
|
|
664
|
-
|
|
665
|
-
|
|
721
|
+
Generating a bounding box with scaled coordinates. Will rescale `x` coordinate with factor
|
|
722
|
+
`*(current_total_width/scaled_total_width)`, resp. `y` coordinate with factor
|
|
723
|
+
`* (current_total_height/scaled_total_height)`,
|
|
666
724
|
|
|
667
|
-
|
|
725
|
+
while not changing anything if `absolute_coords` is set to `False`.
|
|
668
726
|
|
|
669
|
-
|
|
727
|
+
Args:
|
|
728
|
+
box: BoudingBox to rescale
|
|
729
|
+
current_total_width: absolute coords of width of image
|
|
730
|
+
current_total_height: absolute coords of height of image
|
|
731
|
+
scaled_total_width: absolute width of rescaled image
|
|
732
|
+
scaled_total_height: absolute height of rescaled image
|
|
670
733
|
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
:param box: BoudingBox to rescale
|
|
674
|
-
:param current_total_width: absolute coords of width of image
|
|
675
|
-
:param current_total_height: absolute coords of height of image
|
|
676
|
-
:param scaled_total_width: absolute width of rescaled image
|
|
677
|
-
:param scaled_total_height: absolute height of rescaled image
|
|
678
|
-
:return: rescaled BoundingBox
|
|
734
|
+
Returns:
|
|
735
|
+
rescaled `BoundingBox`
|
|
679
736
|
"""
|
|
680
737
|
|
|
681
738
|
if not box.absolute_coords:
|
|
@@ -694,12 +751,15 @@ def rescale_coords(
|
|
|
694
751
|
|
|
695
752
|
def intersection_boxes(boxes_1: Sequence[BoundingBox], boxes_2: Sequence[BoundingBox]) -> Sequence[BoundingBox]:
|
|
696
753
|
"""
|
|
697
|
-
The multiple version of
|
|
698
|
-
pairwise intersection of both groups. There will be at most mxn intersection boxes.
|
|
754
|
+
The multiple version of `intersection_box`: Given two lists of `m` and `n` bounding boxes, it will calculate the
|
|
755
|
+
pairwise intersection of both groups. There will be at most `mxn` intersection boxes.
|
|
756
|
+
|
|
757
|
+
Args:
|
|
758
|
+
boxes_1: sequence of m BoundingBox
|
|
759
|
+
boxes_2: sequence of n BoundingBox
|
|
699
760
|
|
|
700
|
-
:
|
|
701
|
-
|
|
702
|
-
:return: list of at most mxn BoundingBox
|
|
761
|
+
Returns:
|
|
762
|
+
list of at most mxn BoundingBox
|
|
703
763
|
"""
|
|
704
764
|
if not boxes_1 and boxes_2:
|
|
705
765
|
return boxes_2
|