dgenerate-ultralytics-headless 8.3.222__py3-none-any.whl → 8.3.225__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/METADATA +2 -2
- dgenerate_ultralytics_headless-8.3.225.dist-info/RECORD +286 -0
- tests/conftest.py +5 -8
- tests/test_cli.py +1 -8
- tests/test_python.py +1 -2
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +34 -49
- ultralytics/cfg/datasets/ImageNet.yaml +1 -1
- ultralytics/cfg/datasets/kitti.yaml +27 -0
- ultralytics/cfg/datasets/lvis.yaml +5 -5
- ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
- ultralytics/data/annotator.py +3 -4
- ultralytics/data/augment.py +244 -323
- ultralytics/data/base.py +12 -22
- ultralytics/data/build.py +47 -40
- ultralytics/data/converter.py +32 -42
- ultralytics/data/dataset.py +43 -71
- ultralytics/data/loaders.py +22 -34
- ultralytics/data/split.py +5 -6
- ultralytics/data/split_dota.py +8 -15
- ultralytics/data/utils.py +27 -36
- ultralytics/engine/exporter.py +49 -116
- ultralytics/engine/model.py +144 -180
- ultralytics/engine/predictor.py +18 -29
- ultralytics/engine/results.py +165 -231
- ultralytics/engine/trainer.py +11 -19
- ultralytics/engine/tuner.py +13 -23
- ultralytics/engine/validator.py +6 -10
- ultralytics/hub/__init__.py +7 -12
- ultralytics/hub/auth.py +6 -12
- ultralytics/hub/google/__init__.py +7 -10
- ultralytics/hub/session.py +15 -25
- ultralytics/hub/utils.py +3 -6
- ultralytics/models/fastsam/model.py +6 -8
- ultralytics/models/fastsam/predict.py +5 -10
- ultralytics/models/fastsam/utils.py +1 -2
- ultralytics/models/fastsam/val.py +2 -4
- ultralytics/models/nas/model.py +5 -8
- ultralytics/models/nas/predict.py +7 -9
- ultralytics/models/nas/val.py +1 -2
- ultralytics/models/rtdetr/model.py +5 -8
- ultralytics/models/rtdetr/predict.py +15 -18
- ultralytics/models/rtdetr/train.py +10 -13
- ultralytics/models/rtdetr/val.py +13 -20
- ultralytics/models/sam/amg.py +12 -18
- ultralytics/models/sam/build.py +6 -9
- ultralytics/models/sam/model.py +16 -23
- ultralytics/models/sam/modules/blocks.py +62 -84
- ultralytics/models/sam/modules/decoders.py +17 -24
- ultralytics/models/sam/modules/encoders.py +40 -56
- ultralytics/models/sam/modules/memory_attention.py +10 -16
- ultralytics/models/sam/modules/sam.py +41 -47
- ultralytics/models/sam/modules/tiny_encoder.py +64 -83
- ultralytics/models/sam/modules/transformer.py +17 -27
- ultralytics/models/sam/modules/utils.py +31 -42
- ultralytics/models/sam/predict.py +172 -209
- ultralytics/models/utils/loss.py +14 -26
- ultralytics/models/utils/ops.py +13 -17
- ultralytics/models/yolo/classify/predict.py +8 -11
- ultralytics/models/yolo/classify/train.py +8 -16
- ultralytics/models/yolo/classify/val.py +13 -20
- ultralytics/models/yolo/detect/predict.py +4 -8
- ultralytics/models/yolo/detect/train.py +11 -20
- ultralytics/models/yolo/detect/val.py +38 -48
- ultralytics/models/yolo/model.py +35 -47
- ultralytics/models/yolo/obb/predict.py +5 -8
- ultralytics/models/yolo/obb/train.py +11 -14
- ultralytics/models/yolo/obb/val.py +20 -28
- ultralytics/models/yolo/pose/predict.py +5 -8
- ultralytics/models/yolo/pose/train.py +4 -8
- ultralytics/models/yolo/pose/val.py +31 -39
- ultralytics/models/yolo/segment/predict.py +9 -14
- ultralytics/models/yolo/segment/train.py +3 -6
- ultralytics/models/yolo/segment/val.py +16 -26
- ultralytics/models/yolo/world/train.py +8 -14
- ultralytics/models/yolo/world/train_world.py +11 -16
- ultralytics/models/yolo/yoloe/predict.py +16 -23
- ultralytics/models/yolo/yoloe/train.py +30 -43
- ultralytics/models/yolo/yoloe/train_seg.py +5 -10
- ultralytics/models/yolo/yoloe/val.py +15 -20
- ultralytics/nn/autobackend.py +10 -18
- ultralytics/nn/modules/activation.py +4 -6
- ultralytics/nn/modules/block.py +99 -185
- ultralytics/nn/modules/conv.py +45 -90
- ultralytics/nn/modules/head.py +44 -98
- ultralytics/nn/modules/transformer.py +44 -76
- ultralytics/nn/modules/utils.py +14 -19
- ultralytics/nn/tasks.py +86 -146
- ultralytics/nn/text_model.py +25 -40
- ultralytics/solutions/ai_gym.py +10 -16
- ultralytics/solutions/analytics.py +7 -10
- ultralytics/solutions/config.py +4 -5
- ultralytics/solutions/distance_calculation.py +9 -12
- ultralytics/solutions/heatmap.py +7 -13
- ultralytics/solutions/instance_segmentation.py +5 -8
- ultralytics/solutions/object_blurrer.py +7 -10
- ultralytics/solutions/object_counter.py +8 -12
- ultralytics/solutions/object_cropper.py +5 -8
- ultralytics/solutions/parking_management.py +12 -14
- ultralytics/solutions/queue_management.py +4 -6
- ultralytics/solutions/region_counter.py +7 -10
- ultralytics/solutions/security_alarm.py +14 -19
- ultralytics/solutions/similarity_search.py +7 -12
- ultralytics/solutions/solutions.py +31 -53
- ultralytics/solutions/speed_estimation.py +6 -9
- ultralytics/solutions/streamlit_inference.py +2 -4
- ultralytics/solutions/trackzone.py +7 -10
- ultralytics/solutions/vision_eye.py +5 -8
- ultralytics/trackers/basetrack.py +2 -4
- ultralytics/trackers/bot_sort.py +6 -11
- ultralytics/trackers/byte_tracker.py +10 -15
- ultralytics/trackers/track.py +3 -6
- ultralytics/trackers/utils/gmc.py +6 -12
- ultralytics/trackers/utils/kalman_filter.py +35 -43
- ultralytics/trackers/utils/matching.py +6 -10
- ultralytics/utils/__init__.py +61 -100
- ultralytics/utils/autobatch.py +2 -4
- ultralytics/utils/autodevice.py +11 -13
- ultralytics/utils/benchmarks.py +25 -35
- ultralytics/utils/callbacks/base.py +8 -10
- ultralytics/utils/callbacks/clearml.py +2 -4
- ultralytics/utils/callbacks/comet.py +30 -44
- ultralytics/utils/callbacks/dvc.py +13 -18
- ultralytics/utils/callbacks/mlflow.py +4 -5
- ultralytics/utils/callbacks/neptune.py +4 -6
- ultralytics/utils/callbacks/raytune.py +3 -4
- ultralytics/utils/callbacks/tensorboard.py +4 -6
- ultralytics/utils/callbacks/wb.py +10 -13
- ultralytics/utils/checks.py +29 -56
- ultralytics/utils/cpu.py +1 -2
- ultralytics/utils/dist.py +8 -12
- ultralytics/utils/downloads.py +17 -27
- ultralytics/utils/errors.py +6 -8
- ultralytics/utils/events.py +2 -4
- ultralytics/utils/export/__init__.py +4 -239
- ultralytics/utils/export/engine.py +237 -0
- ultralytics/utils/export/imx.py +11 -17
- ultralytics/utils/export/tensorflow.py +217 -0
- ultralytics/utils/files.py +10 -15
- ultralytics/utils/git.py +5 -7
- ultralytics/utils/instance.py +30 -51
- ultralytics/utils/logger.py +11 -15
- ultralytics/utils/loss.py +8 -14
- ultralytics/utils/metrics.py +98 -138
- ultralytics/utils/nms.py +13 -16
- ultralytics/utils/ops.py +47 -74
- ultralytics/utils/patches.py +11 -18
- ultralytics/utils/plotting.py +29 -42
- ultralytics/utils/tal.py +25 -39
- ultralytics/utils/torch_utils.py +45 -73
- ultralytics/utils/tqdm.py +6 -8
- ultralytics/utils/triton.py +9 -12
- ultralytics/utils/tuner.py +1 -2
- dgenerate_ultralytics_headless-8.3.222.dist-info/RECORD +0 -283
- {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/top_level.txt +0 -0
ultralytics/data/augment.py
CHANGED
|
@@ -26,11 +26,10 @@ DEFAULT_STD = (1.0, 1.0, 1.0)
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class BaseTransform:
|
|
29
|
-
"""
|
|
30
|
-
Base class for image transformations in the Ultralytics library.
|
|
29
|
+
"""Base class for image transformations in the Ultralytics library.
|
|
31
30
|
|
|
32
|
-
This class serves as a foundation for implementing various image processing operations, designed to be
|
|
33
|
-
|
|
31
|
+
This class serves as a foundation for implementing various image processing operations, designed to be compatible
|
|
32
|
+
with both classification and semantic segmentation tasks.
|
|
34
33
|
|
|
35
34
|
Methods:
|
|
36
35
|
apply_image: Apply image transformations to labels.
|
|
@@ -45,11 +44,10 @@ class BaseTransform:
|
|
|
45
44
|
"""
|
|
46
45
|
|
|
47
46
|
def __init__(self) -> None:
|
|
48
|
-
"""
|
|
49
|
-
Initialize the BaseTransform object.
|
|
47
|
+
"""Initialize the BaseTransform object.
|
|
50
48
|
|
|
51
|
-
This constructor sets up the base transformation object, which can be extended for specific image
|
|
52
|
-
|
|
49
|
+
This constructor sets up the base transformation object, which can be extended for specific image processing
|
|
50
|
+
tasks. It is designed to be compatible with both classification and semantic segmentation.
|
|
53
51
|
|
|
54
52
|
Examples:
|
|
55
53
|
>>> transform = BaseTransform()
|
|
@@ -57,15 +55,14 @@ class BaseTransform:
|
|
|
57
55
|
pass
|
|
58
56
|
|
|
59
57
|
def apply_image(self, labels):
|
|
60
|
-
"""
|
|
61
|
-
Apply image transformations to labels.
|
|
58
|
+
"""Apply image transformations to labels.
|
|
62
59
|
|
|
63
60
|
This method is intended to be overridden by subclasses to implement specific image transformation
|
|
64
61
|
logic. In its base form, it returns the input labels unchanged.
|
|
65
62
|
|
|
66
63
|
Args:
|
|
67
|
-
labels (Any): The input labels to be transformed. The exact type and structure of labels may
|
|
68
|
-
|
|
64
|
+
labels (Any): The input labels to be transformed. The exact type and structure of labels may vary depending
|
|
65
|
+
on the specific implementation.
|
|
69
66
|
|
|
70
67
|
Returns:
|
|
71
68
|
(Any): The transformed labels. In the base implementation, this is identical to the input.
|
|
@@ -80,8 +77,7 @@ class BaseTransform:
|
|
|
80
77
|
pass
|
|
81
78
|
|
|
82
79
|
def apply_instances(self, labels):
|
|
83
|
-
"""
|
|
84
|
-
Apply transformations to object instances in labels.
|
|
80
|
+
"""Apply transformations to object instances in labels.
|
|
85
81
|
|
|
86
82
|
This method is responsible for applying various transformations to object instances within the given
|
|
87
83
|
labels. It is designed to be overridden by subclasses to implement specific instance transformation
|
|
@@ -101,8 +97,7 @@ class BaseTransform:
|
|
|
101
97
|
pass
|
|
102
98
|
|
|
103
99
|
def apply_semantic(self, labels):
|
|
104
|
-
"""
|
|
105
|
-
Apply semantic segmentation transformations to an image.
|
|
100
|
+
"""Apply semantic segmentation transformations to an image.
|
|
106
101
|
|
|
107
102
|
This method is intended to be overridden by subclasses to implement specific semantic segmentation
|
|
108
103
|
transformations. In its base form, it does not perform any operations.
|
|
@@ -121,16 +116,15 @@ class BaseTransform:
|
|
|
121
116
|
pass
|
|
122
117
|
|
|
123
118
|
def __call__(self, labels):
|
|
124
|
-
"""
|
|
125
|
-
Apply all label transformations to an image, instances, and semantic masks.
|
|
119
|
+
"""Apply all label transformations to an image, instances, and semantic masks.
|
|
126
120
|
|
|
127
|
-
This method orchestrates the application of various transformations defined in the BaseTransform class
|
|
128
|
-
|
|
129
|
-
|
|
121
|
+
This method orchestrates the application of various transformations defined in the BaseTransform class to the
|
|
122
|
+
input labels. It sequentially calls the apply_image and apply_instances methods to process the image and object
|
|
123
|
+
instances, respectively.
|
|
130
124
|
|
|
131
125
|
Args:
|
|
132
|
-
labels (dict): A dictionary containing image data and annotations. Expected keys include 'img' for
|
|
133
|
-
|
|
126
|
+
labels (dict): A dictionary containing image data and annotations. Expected keys include 'img' for the image
|
|
127
|
+
data, and 'instances' for object instances.
|
|
134
128
|
|
|
135
129
|
Returns:
|
|
136
130
|
(dict): The input labels dictionary with transformed image and instances.
|
|
@@ -146,8 +140,7 @@ class BaseTransform:
|
|
|
146
140
|
|
|
147
141
|
|
|
148
142
|
class Compose:
|
|
149
|
-
"""
|
|
150
|
-
A class for composing multiple image transformations.
|
|
143
|
+
"""A class for composing multiple image transformations.
|
|
151
144
|
|
|
152
145
|
Attributes:
|
|
153
146
|
transforms (list[Callable]): A list of transformation functions to be applied sequentially.
|
|
@@ -169,8 +162,7 @@ class Compose:
|
|
|
169
162
|
"""
|
|
170
163
|
|
|
171
164
|
def __init__(self, transforms):
|
|
172
|
-
"""
|
|
173
|
-
Initialize the Compose object with a list of transforms.
|
|
165
|
+
"""Initialize the Compose object with a list of transforms.
|
|
174
166
|
|
|
175
167
|
Args:
|
|
176
168
|
transforms (list[Callable]): A list of callable transform objects to be applied sequentially.
|
|
@@ -183,14 +175,13 @@ class Compose:
|
|
|
183
175
|
self.transforms = transforms if isinstance(transforms, list) else [transforms]
|
|
184
176
|
|
|
185
177
|
def __call__(self, data):
|
|
186
|
-
"""
|
|
187
|
-
Apply a series of transformations to input data.
|
|
178
|
+
"""Apply a series of transformations to input data.
|
|
188
179
|
|
|
189
180
|
This method sequentially applies each transformation in the Compose object's transforms to the input data.
|
|
190
181
|
|
|
191
182
|
Args:
|
|
192
|
-
data (Any): The input data to be transformed. This can be of any type, depending on the
|
|
193
|
-
|
|
183
|
+
data (Any): The input data to be transformed. This can be of any type, depending on the transformations in
|
|
184
|
+
the list.
|
|
194
185
|
|
|
195
186
|
Returns:
|
|
196
187
|
(Any): The transformed data after applying all transformations in sequence.
|
|
@@ -205,8 +196,7 @@ class Compose:
|
|
|
205
196
|
return data
|
|
206
197
|
|
|
207
198
|
def append(self, transform):
|
|
208
|
-
"""
|
|
209
|
-
Append a new transform to the existing list of transforms.
|
|
199
|
+
"""Append a new transform to the existing list of transforms.
|
|
210
200
|
|
|
211
201
|
Args:
|
|
212
202
|
transform (BaseTransform): The transformation to be added to the composition.
|
|
@@ -218,8 +208,7 @@ class Compose:
|
|
|
218
208
|
self.transforms.append(transform)
|
|
219
209
|
|
|
220
210
|
def insert(self, index, transform):
|
|
221
|
-
"""
|
|
222
|
-
Insert a new transform at a specified index in the existing list of transforms.
|
|
211
|
+
"""Insert a new transform at a specified index in the existing list of transforms.
|
|
223
212
|
|
|
224
213
|
Args:
|
|
225
214
|
index (int): The index at which to insert the new transform.
|
|
@@ -234,8 +223,7 @@ class Compose:
|
|
|
234
223
|
self.transforms.insert(index, transform)
|
|
235
224
|
|
|
236
225
|
def __getitem__(self, index: list | int) -> Compose:
|
|
237
|
-
"""
|
|
238
|
-
Retrieve a specific transform or a set of transforms using indexing.
|
|
226
|
+
"""Retrieve a specific transform or a set of transforms using indexing.
|
|
239
227
|
|
|
240
228
|
Args:
|
|
241
229
|
index (int | list[int]): Index or list of indices of the transforms to retrieve.
|
|
@@ -256,8 +244,7 @@ class Compose:
|
|
|
256
244
|
return Compose([self.transforms[i] for i in index]) if isinstance(index, list) else self.transforms[index]
|
|
257
245
|
|
|
258
246
|
def __setitem__(self, index: list | int, value: list | int) -> None:
|
|
259
|
-
"""
|
|
260
|
-
Set one or more transforms in the composition using indexing.
|
|
247
|
+
"""Set one or more transforms in the composition using indexing.
|
|
261
248
|
|
|
262
249
|
Args:
|
|
263
250
|
index (int | list[int]): Index or list of indices to set transforms at.
|
|
@@ -283,8 +270,7 @@ class Compose:
|
|
|
283
270
|
self.transforms[i] = v
|
|
284
271
|
|
|
285
272
|
def tolist(self):
|
|
286
|
-
"""
|
|
287
|
-
Convert the list of transforms to a standard Python list.
|
|
273
|
+
"""Convert the list of transforms to a standard Python list.
|
|
288
274
|
|
|
289
275
|
Returns:
|
|
290
276
|
(list): A list containing all the transform objects in the Compose instance.
|
|
@@ -299,8 +285,7 @@ class Compose:
|
|
|
299
285
|
return self.transforms
|
|
300
286
|
|
|
301
287
|
def __repr__(self):
|
|
302
|
-
"""
|
|
303
|
-
Return a string representation of the Compose object.
|
|
288
|
+
"""Return a string representation of the Compose object.
|
|
304
289
|
|
|
305
290
|
Returns:
|
|
306
291
|
(str): A string representation of the Compose object, including the list of transforms.
|
|
@@ -318,11 +303,10 @@ class Compose:
|
|
|
318
303
|
|
|
319
304
|
|
|
320
305
|
class BaseMixTransform:
|
|
321
|
-
"""
|
|
322
|
-
Base class for mix transformations like Cutmix, MixUp and Mosaic.
|
|
306
|
+
"""Base class for mix transformations like Cutmix, MixUp and Mosaic.
|
|
323
307
|
|
|
324
|
-
This class provides a foundation for implementing mix transformations on datasets. It handles the
|
|
325
|
-
|
|
308
|
+
This class provides a foundation for implementing mix transformations on datasets. It handles the probability-based
|
|
309
|
+
application of transforms and manages the mixing of multiple images and labels.
|
|
326
310
|
|
|
327
311
|
Attributes:
|
|
328
312
|
dataset (Any): The dataset object containing images and labels.
|
|
@@ -349,8 +333,7 @@ class BaseMixTransform:
|
|
|
349
333
|
"""
|
|
350
334
|
|
|
351
335
|
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
|
|
352
|
-
"""
|
|
353
|
-
Initialize the BaseMixTransform object for mix transformations like CutMix, MixUp and Mosaic.
|
|
336
|
+
"""Initialize the BaseMixTransform object for mix transformations like CutMix, MixUp and Mosaic.
|
|
354
337
|
|
|
355
338
|
This class serves as a base for implementing mix transformations in image processing pipelines.
|
|
356
339
|
|
|
@@ -369,11 +352,10 @@ class BaseMixTransform:
|
|
|
369
352
|
self.p = p
|
|
370
353
|
|
|
371
354
|
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
372
|
-
"""
|
|
373
|
-
Apply pre-processing transforms and cutmix/mixup/mosaic transforms to labels data.
|
|
355
|
+
"""Apply pre-processing transforms and cutmix/mixup/mosaic transforms to labels data.
|
|
374
356
|
|
|
375
|
-
This method determines whether to apply the mix transform based on a probability factor. If applied, it
|
|
376
|
-
|
|
357
|
+
This method determines whether to apply the mix transform based on a probability factor. If applied, it selects
|
|
358
|
+
additional images, applies pre-transforms if specified, and then performs the mix transform.
|
|
377
359
|
|
|
378
360
|
Args:
|
|
379
361
|
labels (dict[str, Any]): A dictionary containing label data for an image.
|
|
@@ -409,8 +391,7 @@ class BaseMixTransform:
|
|
|
409
391
|
return labels
|
|
410
392
|
|
|
411
393
|
def _mix_transform(self, labels: dict[str, Any]):
|
|
412
|
-
"""
|
|
413
|
-
Apply CutMix, MixUp or Mosaic augmentation to the label dictionary.
|
|
394
|
+
"""Apply CutMix, MixUp or Mosaic augmentation to the label dictionary.
|
|
414
395
|
|
|
415
396
|
This method should be implemented by subclasses to perform specific mix transformations like CutMix, MixUp or
|
|
416
397
|
Mosaic. It modifies the input label dictionary in-place with the augmented data.
|
|
@@ -430,8 +411,7 @@ class BaseMixTransform:
|
|
|
430
411
|
raise NotImplementedError
|
|
431
412
|
|
|
432
413
|
def get_indexes(self):
|
|
433
|
-
"""
|
|
434
|
-
Get a list of shuffled indexes for mosaic augmentation.
|
|
414
|
+
"""Get a list of shuffled indexes for mosaic augmentation.
|
|
435
415
|
|
|
436
416
|
Returns:
|
|
437
417
|
(list[int]): A list of shuffled indexes from the dataset.
|
|
@@ -445,15 +425,14 @@ class BaseMixTransform:
|
|
|
445
425
|
|
|
446
426
|
@staticmethod
|
|
447
427
|
def _update_label_text(labels: dict[str, Any]) -> dict[str, Any]:
|
|
448
|
-
"""
|
|
449
|
-
Update label text and class IDs for mixed labels in image augmentation.
|
|
428
|
+
"""Update label text and class IDs for mixed labels in image augmentation.
|
|
450
429
|
|
|
451
|
-
This method processes the 'texts' and 'cls' fields of the input labels dictionary and any mixed labels,
|
|
452
|
-
|
|
430
|
+
This method processes the 'texts' and 'cls' fields of the input labels dictionary and any mixed labels, creating
|
|
431
|
+
a unified set of text labels and updating class IDs accordingly.
|
|
453
432
|
|
|
454
433
|
Args:
|
|
455
|
-
labels (dict[str, Any]): A dictionary containing label information, including 'texts' and 'cls' fields,
|
|
456
|
-
|
|
434
|
+
labels (dict[str, Any]): A dictionary containing label information, including 'texts' and 'cls' fields, and
|
|
435
|
+
optionally a 'mix_labels' field with additional label dictionaries.
|
|
457
436
|
|
|
458
437
|
Returns:
|
|
459
438
|
(dict[str, Any]): The updated labels dictionary with unified text labels and updated class IDs.
|
|
@@ -490,11 +469,10 @@ class BaseMixTransform:
|
|
|
490
469
|
|
|
491
470
|
|
|
492
471
|
class Mosaic(BaseMixTransform):
|
|
493
|
-
"""
|
|
494
|
-
Mosaic augmentation for image datasets.
|
|
472
|
+
"""Mosaic augmentation for image datasets.
|
|
495
473
|
|
|
496
|
-
This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image.
|
|
497
|
-
|
|
474
|
+
This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image. The
|
|
475
|
+
augmentation is applied to a dataset with a given probability.
|
|
498
476
|
|
|
499
477
|
Attributes:
|
|
500
478
|
dataset: The dataset on which the mosaic augmentation is applied.
|
|
@@ -520,11 +498,10 @@ class Mosaic(BaseMixTransform):
|
|
|
520
498
|
"""
|
|
521
499
|
|
|
522
500
|
def __init__(self, dataset, imgsz: int = 640, p: float = 1.0, n: int = 4):
|
|
523
|
-
"""
|
|
524
|
-
Initialize the Mosaic augmentation object.
|
|
501
|
+
"""Initialize the Mosaic augmentation object.
|
|
525
502
|
|
|
526
|
-
This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image.
|
|
527
|
-
|
|
503
|
+
This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image. The
|
|
504
|
+
augmentation is applied to a dataset with a given probability.
|
|
528
505
|
|
|
529
506
|
Args:
|
|
530
507
|
dataset (Any): The dataset on which the mosaic augmentation is applied.
|
|
@@ -546,15 +523,14 @@ class Mosaic(BaseMixTransform):
|
|
|
546
523
|
self.buffer_enabled = self.dataset.cache != "ram"
|
|
547
524
|
|
|
548
525
|
def get_indexes(self):
|
|
549
|
-
"""
|
|
550
|
-
Return a list of random indexes from the dataset for mosaic augmentation.
|
|
526
|
+
"""Return a list of random indexes from the dataset for mosaic augmentation.
|
|
551
527
|
|
|
552
|
-
This method selects random image indexes either from a buffer or from the entire dataset, depending on
|
|
553
|
-
|
|
528
|
+
This method selects random image indexes either from a buffer or from the entire dataset, depending on the
|
|
529
|
+
'buffer' parameter. It is used to choose images for creating mosaic augmentations.
|
|
554
530
|
|
|
555
531
|
Returns:
|
|
556
|
-
(list[int]): A list of random image indexes. The length of the list is n-1, where n is the number
|
|
557
|
-
|
|
532
|
+
(list[int]): A list of random image indexes. The length of the list is n-1, where n is the number of images
|
|
533
|
+
used in the mosaic (either 3 or 8, depending on whether n is 4 or 9).
|
|
558
534
|
|
|
559
535
|
Examples:
|
|
560
536
|
>>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=4)
|
|
@@ -567,12 +543,11 @@ class Mosaic(BaseMixTransform):
|
|
|
567
543
|
return [random.randint(0, len(self.dataset) - 1) for _ in range(self.n - 1)]
|
|
568
544
|
|
|
569
545
|
def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
570
|
-
"""
|
|
571
|
-
Apply mosaic augmentation to the input image and labels.
|
|
546
|
+
"""Apply mosaic augmentation to the input image and labels.
|
|
572
547
|
|
|
573
|
-
This method combines multiple images (3, 4, or 9) into a single mosaic image based on the 'n' attribute.
|
|
574
|
-
|
|
575
|
-
|
|
548
|
+
This method combines multiple images (3, 4, or 9) into a single mosaic image based on the 'n' attribute. It
|
|
549
|
+
ensures that rectangular annotations are not present and that there are other images available for mosaic
|
|
550
|
+
augmentation.
|
|
576
551
|
|
|
577
552
|
Args:
|
|
578
553
|
labels (dict[str, Any]): A dictionary containing image data and annotations. Expected keys include:
|
|
@@ -596,16 +571,15 @@ class Mosaic(BaseMixTransform):
|
|
|
596
571
|
) # This code is modified for mosaic3 method.
|
|
597
572
|
|
|
598
573
|
def _mosaic3(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
599
|
-
"""
|
|
600
|
-
Create a 1x3 image mosaic by combining three images.
|
|
574
|
+
"""Create a 1x3 image mosaic by combining three images.
|
|
601
575
|
|
|
602
|
-
This method arranges three images in a horizontal layout, with the main image in the center and two
|
|
603
|
-
|
|
576
|
+
This method arranges three images in a horizontal layout, with the main image in the center and two additional
|
|
577
|
+
images on either side. It's part of the Mosaic augmentation technique used in object detection.
|
|
604
578
|
|
|
605
579
|
Args:
|
|
606
580
|
labels (dict[str, Any]): A dictionary containing image and label information for the main (center) image.
|
|
607
|
-
Must include 'img' key with the image array, and 'mix_labels' key with a list of two
|
|
608
|
-
|
|
581
|
+
Must include 'img' key with the image array, and 'mix_labels' key with a list of two dictionaries
|
|
582
|
+
containing information for the side images.
|
|
609
583
|
|
|
610
584
|
Returns:
|
|
611
585
|
(dict[str, Any]): A dictionary with the mosaic image and updated labels. Keys include:
|
|
@@ -655,19 +629,19 @@ class Mosaic(BaseMixTransform):
|
|
|
655
629
|
return final_labels
|
|
656
630
|
|
|
657
631
|
def _mosaic4(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
658
|
-
"""
|
|
659
|
-
Create a 2x2 image mosaic from four input images.
|
|
632
|
+
"""Create a 2x2 image mosaic from four input images.
|
|
660
633
|
|
|
661
|
-
This method combines four images into a single mosaic image by placing them in a 2x2 grid. It also
|
|
662
|
-
|
|
634
|
+
This method combines four images into a single mosaic image by placing them in a 2x2 grid. It also updates the
|
|
635
|
+
corresponding labels for each image in the mosaic.
|
|
663
636
|
|
|
664
637
|
Args:
|
|
665
|
-
labels (dict[str, Any]): A dictionary containing image data and labels for the base image (index 0) and
|
|
666
|
-
additional images (indices 1-3) in the 'mix_labels' key.
|
|
638
|
+
labels (dict[str, Any]): A dictionary containing image data and labels for the base image (index 0) and
|
|
639
|
+
three additional images (indices 1-3) in the 'mix_labels' key.
|
|
667
640
|
|
|
668
641
|
Returns:
|
|
669
|
-
(dict[str, Any]): A dictionary containing the mosaic image and updated labels. The 'img' key contains the
|
|
670
|
-
image as a numpy array, and other keys contain the combined and adjusted labels for all
|
|
642
|
+
(dict[str, Any]): A dictionary containing the mosaic image and updated labels. The 'img' key contains the
|
|
643
|
+
mosaic image as a numpy array, and other keys contain the combined and adjusted labels for all
|
|
644
|
+
four images.
|
|
671
645
|
|
|
672
646
|
Examples:
|
|
673
647
|
>>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=4)
|
|
@@ -713,22 +687,22 @@ class Mosaic(BaseMixTransform):
|
|
|
713
687
|
return final_labels
|
|
714
688
|
|
|
715
689
|
def _mosaic9(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
716
|
-
"""
|
|
717
|
-
Create a 3x3 image mosaic from the input image and eight additional images.
|
|
690
|
+
"""Create a 3x3 image mosaic from the input image and eight additional images.
|
|
718
691
|
|
|
719
|
-
This method combines nine images into a single mosaic image. The input image is placed at the center,
|
|
720
|
-
|
|
692
|
+
This method combines nine images into a single mosaic image. The input image is placed at the center, and eight
|
|
693
|
+
additional images from the dataset are placed around it in a 3x3 grid pattern.
|
|
721
694
|
|
|
722
695
|
Args:
|
|
723
696
|
labels (dict[str, Any]): A dictionary containing the input image and its associated labels. It should have
|
|
724
|
-
|
|
697
|
+
the following keys:
|
|
725
698
|
- 'img' (np.ndarray): The input image.
|
|
726
699
|
- 'resized_shape' (tuple[int, int]): The shape of the resized image (height, width).
|
|
727
700
|
- 'mix_labels' (list[dict]): A list of dictionaries containing information for the additional
|
|
728
|
-
|
|
701
|
+
eight images, each with the same structure as the input labels.
|
|
729
702
|
|
|
730
703
|
Returns:
|
|
731
|
-
(dict[str, Any]): A dictionary containing the mosaic image and updated labels. It includes the following
|
|
704
|
+
(dict[str, Any]): A dictionary containing the mosaic image and updated labels. It includes the following
|
|
705
|
+
keys:
|
|
732
706
|
- 'img' (np.ndarray): The final mosaic image.
|
|
733
707
|
- Other keys from the input labels, updated to reflect the new mosaic arrangement.
|
|
734
708
|
|
|
@@ -786,8 +760,7 @@ class Mosaic(BaseMixTransform):
|
|
|
786
760
|
|
|
787
761
|
@staticmethod
|
|
788
762
|
def _update_labels(labels, padw: int, padh: int) -> dict[str, Any]:
|
|
789
|
-
"""
|
|
790
|
-
Update label coordinates with padding values.
|
|
763
|
+
"""Update label coordinates with padding values.
|
|
791
764
|
|
|
792
765
|
This method adjusts the bounding box coordinates of object instances in the labels by adding padding
|
|
793
766
|
values. It also denormalizes the coordinates if they were previously normalized.
|
|
@@ -812,11 +785,10 @@ class Mosaic(BaseMixTransform):
|
|
|
812
785
|
return labels
|
|
813
786
|
|
|
814
787
|
def _cat_labels(self, mosaic_labels: list[dict[str, Any]]) -> dict[str, Any]:
|
|
815
|
-
"""
|
|
816
|
-
Concatenate and process labels for mosaic augmentation.
|
|
788
|
+
"""Concatenate and process labels for mosaic augmentation.
|
|
817
789
|
|
|
818
|
-
This method combines labels from multiple images used in mosaic augmentation, clips instances to the
|
|
819
|
-
|
|
790
|
+
This method combines labels from multiple images used in mosaic augmentation, clips instances to the mosaic
|
|
791
|
+
border, and removes zero-area boxes.
|
|
820
792
|
|
|
821
793
|
Args:
|
|
822
794
|
mosaic_labels (list[dict[str, Any]]): A list of label dictionaries for each image in the mosaic.
|
|
@@ -864,8 +836,7 @@ class Mosaic(BaseMixTransform):
|
|
|
864
836
|
|
|
865
837
|
|
|
866
838
|
class MixUp(BaseMixTransform):
|
|
867
|
-
"""
|
|
868
|
-
Apply MixUp augmentation to image datasets.
|
|
839
|
+
"""Apply MixUp augmentation to image datasets.
|
|
869
840
|
|
|
870
841
|
This class implements the MixUp augmentation technique as described in the paper [mixup: Beyond Empirical Risk
|
|
871
842
|
Minimization](https://arxiv.org/abs/1710.09412). MixUp combines two images and their labels using a random weight.
|
|
@@ -886,11 +857,10 @@ class MixUp(BaseMixTransform):
|
|
|
886
857
|
"""
|
|
887
858
|
|
|
888
859
|
def __init__(self, dataset, pre_transform=None, p: float = 0.0) -> None:
|
|
889
|
-
"""
|
|
890
|
-
Initialize the MixUp augmentation object.
|
|
860
|
+
"""Initialize the MixUp augmentation object.
|
|
891
861
|
|
|
892
|
-
MixUp is an image augmentation technique that combines two images by taking a weighted sum of their pixel
|
|
893
|
-
|
|
862
|
+
MixUp is an image augmentation technique that combines two images by taking a weighted sum of their pixel values
|
|
863
|
+
and labels. This implementation is designed for use with the Ultralytics YOLO framework.
|
|
894
864
|
|
|
895
865
|
Args:
|
|
896
866
|
dataset (Any): The dataset to which MixUp augmentation will be applied.
|
|
@@ -905,11 +875,10 @@ class MixUp(BaseMixTransform):
|
|
|
905
875
|
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
|
|
906
876
|
|
|
907
877
|
def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
908
|
-
"""
|
|
909
|
-
Apply MixUp augmentation to the input labels.
|
|
878
|
+
"""Apply MixUp augmentation to the input labels.
|
|
910
879
|
|
|
911
|
-
This method implements the MixUp augmentation technique as described in the paper
|
|
912
|
-
|
|
880
|
+
This method implements the MixUp augmentation technique as described in the paper "mixup: Beyond Empirical Risk
|
|
881
|
+
Minimization" (https://arxiv.org/abs/1710.09412).
|
|
913
882
|
|
|
914
883
|
Args:
|
|
915
884
|
labels (dict[str, Any]): A dictionary containing the original image and label information.
|
|
@@ -930,11 +899,10 @@ class MixUp(BaseMixTransform):
|
|
|
930
899
|
|
|
931
900
|
|
|
932
901
|
class CutMix(BaseMixTransform):
|
|
933
|
-
"""
|
|
934
|
-
Apply CutMix augmentation to image datasets as described in the paper https://arxiv.org/abs/1905.04899.
|
|
902
|
+
"""Apply CutMix augmentation to image datasets as described in the paper https://arxiv.org/abs/1905.04899.
|
|
935
903
|
|
|
936
|
-
CutMix combines two images by replacing a random rectangular region of one image with the corresponding region from
|
|
937
|
-
and adjusts the labels proportionally to the area of the mixed region.
|
|
904
|
+
CutMix combines two images by replacing a random rectangular region of one image with the corresponding region from
|
|
905
|
+
another image, and adjusts the labels proportionally to the area of the mixed region.
|
|
938
906
|
|
|
939
907
|
Attributes:
|
|
940
908
|
dataset (Any): The dataset to which CutMix augmentation will be applied.
|
|
@@ -955,8 +923,7 @@ class CutMix(BaseMixTransform):
|
|
|
955
923
|
"""
|
|
956
924
|
|
|
957
925
|
def __init__(self, dataset, pre_transform=None, p: float = 0.0, beta: float = 1.0, num_areas: int = 3) -> None:
|
|
958
|
-
"""
|
|
959
|
-
Initialize the CutMix augmentation object.
|
|
926
|
+
"""Initialize the CutMix augmentation object.
|
|
960
927
|
|
|
961
928
|
Args:
|
|
962
929
|
dataset (Any): The dataset to which CutMix augmentation will be applied.
|
|
@@ -970,8 +937,7 @@ class CutMix(BaseMixTransform):
|
|
|
970
937
|
self.num_areas = num_areas
|
|
971
938
|
|
|
972
939
|
def _rand_bbox(self, width: int, height: int) -> tuple[int, int, int, int]:
|
|
973
|
-
"""
|
|
974
|
-
Generate random bounding box coordinates for the cut region.
|
|
940
|
+
"""Generate random bounding box coordinates for the cut region.
|
|
975
941
|
|
|
976
942
|
Args:
|
|
977
943
|
width (int): Width of the image.
|
|
@@ -1000,8 +966,7 @@ class CutMix(BaseMixTransform):
|
|
|
1000
966
|
return x1, y1, x2, y2
|
|
1001
967
|
|
|
1002
968
|
def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
1003
|
-
"""
|
|
1004
|
-
Apply CutMix augmentation to the input labels.
|
|
969
|
+
"""Apply CutMix augmentation to the input labels.
|
|
1005
970
|
|
|
1006
971
|
Args:
|
|
1007
972
|
labels (dict[str, Any]): A dictionary containing the original image and label information.
|
|
@@ -1048,12 +1013,11 @@ class CutMix(BaseMixTransform):
|
|
|
1048
1013
|
|
|
1049
1014
|
|
|
1050
1015
|
class RandomPerspective:
|
|
1051
|
-
"""
|
|
1052
|
-
Implement random perspective and affine transformations on images and corresponding annotations.
|
|
1016
|
+
"""Implement random perspective and affine transformations on images and corresponding annotations.
|
|
1053
1017
|
|
|
1054
|
-
This class applies random rotations, translations, scaling, shearing, and perspective transformations
|
|
1055
|
-
|
|
1056
|
-
|
|
1018
|
+
This class applies random rotations, translations, scaling, shearing, and perspective transformations to images and
|
|
1019
|
+
their associated bounding boxes, segments, and keypoints. It can be used as part of an augmentation pipeline for
|
|
1020
|
+
object detection and instance segmentation tasks.
|
|
1057
1021
|
|
|
1058
1022
|
Attributes:
|
|
1059
1023
|
degrees (float): Maximum absolute degree range for random rotations.
|
|
@@ -1091,8 +1055,7 @@ class RandomPerspective:
|
|
|
1091
1055
|
border: tuple[int, int] = (0, 0),
|
|
1092
1056
|
pre_transform=None,
|
|
1093
1057
|
):
|
|
1094
|
-
"""
|
|
1095
|
-
Initialize RandomPerspective object with transformation parameters.
|
|
1058
|
+
"""Initialize RandomPerspective object with transformation parameters.
|
|
1096
1059
|
|
|
1097
1060
|
This class implements random perspective and affine transformations on images and corresponding bounding boxes,
|
|
1098
1061
|
segments, and keypoints. Transformations include rotation, translation, scaling, and shearing.
|
|
@@ -1120,12 +1083,11 @@ class RandomPerspective:
|
|
|
1120
1083
|
self.pre_transform = pre_transform
|
|
1121
1084
|
|
|
1122
1085
|
def affine_transform(self, img: np.ndarray, border: tuple[int, int]) -> tuple[np.ndarray, np.ndarray, float]:
|
|
1123
|
-
"""
|
|
1124
|
-
Apply a sequence of affine transformations centered around the image center.
|
|
1086
|
+
"""Apply a sequence of affine transformations centered around the image center.
|
|
1125
1087
|
|
|
1126
|
-
This function performs a series of geometric transformations on the input image, including
|
|
1127
|
-
|
|
1128
|
-
|
|
1088
|
+
This function performs a series of geometric transformations on the input image, including translation,
|
|
1089
|
+
perspective change, rotation, scaling, and shearing. The transformations are applied in a specific order to
|
|
1090
|
+
maintain consistency.
|
|
1129
1091
|
|
|
1130
1092
|
Args:
|
|
1131
1093
|
img (np.ndarray): Input image to be transformed.
|
|
@@ -1184,15 +1146,14 @@ class RandomPerspective:
|
|
|
1184
1146
|
return img, M, s
|
|
1185
1147
|
|
|
1186
1148
|
def apply_bboxes(self, bboxes: np.ndarray, M: np.ndarray) -> np.ndarray:
|
|
1187
|
-
"""
|
|
1188
|
-
Apply affine transformation to bounding boxes.
|
|
1149
|
+
"""Apply affine transformation to bounding boxes.
|
|
1189
1150
|
|
|
1190
|
-
This function applies an affine transformation to a set of bounding boxes using the provided
|
|
1191
|
-
|
|
1151
|
+
This function applies an affine transformation to a set of bounding boxes using the provided transformation
|
|
1152
|
+
matrix.
|
|
1192
1153
|
|
|
1193
1154
|
Args:
|
|
1194
|
-
bboxes (np.ndarray): Bounding boxes in xyxy format with shape (N, 4), where N is the number
|
|
1195
|
-
|
|
1155
|
+
bboxes (np.ndarray): Bounding boxes in xyxy format with shape (N, 4), where N is the number of bounding
|
|
1156
|
+
boxes.
|
|
1196
1157
|
M (np.ndarray): Affine transformation matrix with shape (3, 3).
|
|
1197
1158
|
|
|
1198
1159
|
Returns:
|
|
@@ -1218,11 +1179,10 @@ class RandomPerspective:
|
|
|
1218
1179
|
return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).T
|
|
1219
1180
|
|
|
1220
1181
|
def apply_segments(self, segments: np.ndarray, M: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
|
1221
|
-
"""
|
|
1222
|
-
Apply affine transformations to segments and generate new bounding boxes.
|
|
1182
|
+
"""Apply affine transformations to segments and generate new bounding boxes.
|
|
1223
1183
|
|
|
1224
|
-
This function applies affine transformations to input segments and generates new bounding boxes based on
|
|
1225
|
-
|
|
1184
|
+
This function applies affine transformations to input segments and generates new bounding boxes based on the
|
|
1185
|
+
transformed segments. It clips the transformed segments to fit within the new bounding boxes.
|
|
1226
1186
|
|
|
1227
1187
|
Args:
|
|
1228
1188
|
segments (np.ndarray): Input segments with shape (N, M, 2), where N is the number of segments and M is the
|
|
@@ -1254,16 +1214,15 @@ class RandomPerspective:
|
|
|
1254
1214
|
return bboxes, segments
|
|
1255
1215
|
|
|
1256
1216
|
def apply_keypoints(self, keypoints: np.ndarray, M: np.ndarray) -> np.ndarray:
|
|
1257
|
-
"""
|
|
1258
|
-
Apply affine transformation to keypoints.
|
|
1217
|
+
"""Apply affine transformation to keypoints.
|
|
1259
1218
|
|
|
1260
1219
|
This method transforms the input keypoints using the provided affine transformation matrix. It handles
|
|
1261
1220
|
perspective rescaling if necessary and updates the visibility of keypoints that fall outside the image
|
|
1262
1221
|
boundaries after transformation.
|
|
1263
1222
|
|
|
1264
1223
|
Args:
|
|
1265
|
-
keypoints (np.ndarray): Array of keypoints with shape (N, 17, 3), where N is the number of instances,
|
|
1266
|
-
|
|
1224
|
+
keypoints (np.ndarray): Array of keypoints with shape (N, 17, 3), where N is the number of instances, 17 is
|
|
1225
|
+
the number of keypoints per instance, and 3 represents (x, y, visibility).
|
|
1267
1226
|
M (np.ndarray): 3x3 affine transformation matrix.
|
|
1268
1227
|
|
|
1269
1228
|
Returns:
|
|
@@ -1288,21 +1247,14 @@ class RandomPerspective:
|
|
|
1288
1247
|
return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3)
|
|
1289
1248
|
|
|
1290
1249
|
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
1291
|
-
"""
|
|
1292
|
-
Apply random perspective and affine transformations to an image and its associated labels.
|
|
1250
|
+
"""Apply random perspective and affine transformations to an image and its associated labels.
|
|
1293
1251
|
|
|
1294
|
-
This method performs a series of transformations including rotation, translation, scaling, shearing,
|
|
1295
|
-
|
|
1296
|
-
|
|
1252
|
+
This method performs a series of transformations including rotation, translation, scaling, shearing, and
|
|
1253
|
+
perspective distortion on the input image and adjusts the corresponding bounding boxes, segments, and keypoints
|
|
1254
|
+
accordingly.
|
|
1297
1255
|
|
|
1298
1256
|
Args:
|
|
1299
1257
|
labels (dict[str, Any]): A dictionary containing image data and annotations.
|
|
1300
|
-
Must include:
|
|
1301
|
-
'img' (np.ndarray): The input image.
|
|
1302
|
-
'cls' (np.ndarray): Class labels.
|
|
1303
|
-
'instances' (Instances): Object instances with bounding boxes, segments, and keypoints.
|
|
1304
|
-
May include:
|
|
1305
|
-
'mosaic_border' (tuple[int, int]): Border size for mosaic augmentation.
|
|
1306
1258
|
|
|
1307
1259
|
Returns:
|
|
1308
1260
|
(dict[str, Any]): Transformed labels dictionary containing:
|
|
@@ -1321,6 +1273,14 @@ class RandomPerspective:
|
|
|
1321
1273
|
... }
|
|
1322
1274
|
>>> result = transform(labels)
|
|
1323
1275
|
>>> assert result["img"].shape[:2] == result["resized_shape"]
|
|
1276
|
+
|
|
1277
|
+
Notes:
|
|
1278
|
+
'labels' arg must include:
|
|
1279
|
+
- 'img' (np.ndarray): The input image.
|
|
1280
|
+
- 'cls' (np.ndarray): Class labels.
|
|
1281
|
+
- 'instances' (Instances): Object instances with bounding boxes, segments, and keypoints.
|
|
1282
|
+
May include:
|
|
1283
|
+
- 'mosaic_border' (tuple[int, int]): Border size for mosaic augmentation.
|
|
1324
1284
|
"""
|
|
1325
1285
|
if self.pre_transform and "mosaic_border" not in labels:
|
|
1326
1286
|
labels = self.pre_transform(labels)
|
|
@@ -1374,29 +1334,27 @@ class RandomPerspective:
|
|
|
1374
1334
|
area_thr: float = 0.1,
|
|
1375
1335
|
eps: float = 1e-16,
|
|
1376
1336
|
) -> np.ndarray:
|
|
1377
|
-
"""
|
|
1378
|
-
Compute candidate boxes for further processing based on size and aspect ratio criteria.
|
|
1337
|
+
"""Compute candidate boxes for further processing based on size and aspect ratio criteria.
|
|
1379
1338
|
|
|
1380
|
-
This method compares boxes before and after augmentation to determine if they meet specified
|
|
1381
|
-
|
|
1382
|
-
|
|
1339
|
+
This method compares boxes before and after augmentation to determine if they meet specified thresholds for
|
|
1340
|
+
width, height, aspect ratio, and area. It's used to filter out boxes that have been overly distorted or reduced
|
|
1341
|
+
by the augmentation process.
|
|
1383
1342
|
|
|
1384
1343
|
Args:
|
|
1385
|
-
box1 (np.ndarray): Original boxes before augmentation, shape (4, N) where n is the
|
|
1386
|
-
|
|
1387
|
-
box2 (np.ndarray): Augmented boxes after transformation, shape (4, N). Format is
|
|
1388
|
-
|
|
1389
|
-
wh_thr (int): Width and height threshold in pixels. Boxes smaller than this in either
|
|
1390
|
-
|
|
1391
|
-
ar_thr (int): Aspect ratio threshold. Boxes with an aspect ratio greater than this
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
this value are rejected.
|
|
1344
|
+
box1 (np.ndarray): Original boxes before augmentation, shape (4, N) where n is the number of boxes. Format
|
|
1345
|
+
is [x1, y1, x2, y2] in absolute coordinates.
|
|
1346
|
+
box2 (np.ndarray): Augmented boxes after transformation, shape (4, N). Format is [x1, y1, x2, y2] in
|
|
1347
|
+
absolute coordinates.
|
|
1348
|
+
wh_thr (int): Width and height threshold in pixels. Boxes smaller than this in either dimension are
|
|
1349
|
+
rejected.
|
|
1350
|
+
ar_thr (int): Aspect ratio threshold. Boxes with an aspect ratio greater than this value are rejected.
|
|
1351
|
+
area_thr (float): Area ratio threshold. Boxes with an area ratio (new/old) less than this value are
|
|
1352
|
+
rejected.
|
|
1395
1353
|
eps (float): Small epsilon value to prevent division by zero.
|
|
1396
1354
|
|
|
1397
1355
|
Returns:
|
|
1398
|
-
(np.ndarray): Boolean array of shape (n) indicating which boxes are candidates.
|
|
1399
|
-
|
|
1356
|
+
(np.ndarray): Boolean array of shape (n) indicating which boxes are candidates. True values correspond to
|
|
1357
|
+
boxes that meet all criteria.
|
|
1400
1358
|
|
|
1401
1359
|
Examples:
|
|
1402
1360
|
>>> random_perspective = RandomPerspective()
|
|
@@ -1413,8 +1371,7 @@ class RandomPerspective:
|
|
|
1413
1371
|
|
|
1414
1372
|
|
|
1415
1373
|
class RandomHSV:
|
|
1416
|
-
"""
|
|
1417
|
-
Randomly adjust the Hue, Saturation, and Value (HSV) channels of an image.
|
|
1374
|
+
"""Randomly adjust the Hue, Saturation, and Value (HSV) channels of an image.
|
|
1418
1375
|
|
|
1419
1376
|
This class applies random HSV augmentation to images within predefined limits set by hgain, sgain, and vgain.
|
|
1420
1377
|
|
|
@@ -1437,8 +1394,7 @@ class RandomHSV:
|
|
|
1437
1394
|
"""
|
|
1438
1395
|
|
|
1439
1396
|
def __init__(self, hgain: float = 0.5, sgain: float = 0.5, vgain: float = 0.5) -> None:
|
|
1440
|
-
"""
|
|
1441
|
-
Initialize the RandomHSV object for random HSV (Hue, Saturation, Value) augmentation.
|
|
1397
|
+
"""Initialize the RandomHSV object for random HSV (Hue, Saturation, Value) augmentation.
|
|
1442
1398
|
|
|
1443
1399
|
This class applies random adjustments to the HSV channels of an image within specified limits.
|
|
1444
1400
|
|
|
@@ -1456,15 +1412,14 @@ class RandomHSV:
|
|
|
1456
1412
|
self.vgain = vgain
|
|
1457
1413
|
|
|
1458
1414
|
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
1459
|
-
"""
|
|
1460
|
-
Apply random HSV augmentation to an image within predefined limits.
|
|
1415
|
+
"""Apply random HSV augmentation to an image within predefined limits.
|
|
1461
1416
|
|
|
1462
|
-
This method modifies the input image by randomly adjusting its Hue, Saturation, and Value (HSV) channels.
|
|
1463
|
-
|
|
1417
|
+
This method modifies the input image by randomly adjusting its Hue, Saturation, and Value (HSV) channels. The
|
|
1418
|
+
adjustments are made within the limits set by hgain, sgain, and vgain during initialization.
|
|
1464
1419
|
|
|
1465
1420
|
Args:
|
|
1466
|
-
labels (dict[str, Any]): A dictionary containing image data and metadata. Must include an 'img' key with
|
|
1467
|
-
|
|
1421
|
+
labels (dict[str, Any]): A dictionary containing image data and metadata. Must include an 'img' key with the
|
|
1422
|
+
image as a numpy array.
|
|
1468
1423
|
|
|
1469
1424
|
Returns:
|
|
1470
1425
|
(dict[str, Any]): A dictionary containing the mixed image and adjusted labels.
|
|
@@ -1496,11 +1451,10 @@ class RandomHSV:
|
|
|
1496
1451
|
|
|
1497
1452
|
|
|
1498
1453
|
class RandomFlip:
|
|
1499
|
-
"""
|
|
1500
|
-
Apply a random horizontal or vertical flip to an image with a given probability.
|
|
1454
|
+
"""Apply a random horizontal or vertical flip to an image with a given probability.
|
|
1501
1455
|
|
|
1502
|
-
This class performs random image flipping and updates corresponding instance annotations such as
|
|
1503
|
-
|
|
1456
|
+
This class performs random image flipping and updates corresponding instance annotations such as bounding boxes and
|
|
1457
|
+
keypoints.
|
|
1504
1458
|
|
|
1505
1459
|
Attributes:
|
|
1506
1460
|
p (float): Probability of applying the flip. Must be between 0 and 1.
|
|
@@ -1518,11 +1472,10 @@ class RandomFlip:
|
|
|
1518
1472
|
"""
|
|
1519
1473
|
|
|
1520
1474
|
def __init__(self, p: float = 0.5, direction: str = "horizontal", flip_idx: list[int] | None = None) -> None:
|
|
1521
|
-
"""
|
|
1522
|
-
Initialize the RandomFlip class with probability and direction.
|
|
1475
|
+
"""Initialize the RandomFlip class with probability and direction.
|
|
1523
1476
|
|
|
1524
|
-
This class applies a random horizontal or vertical flip to an image with a given probability.
|
|
1525
|
-
|
|
1477
|
+
This class applies a random horizontal or vertical flip to an image with a given probability. It also updates
|
|
1478
|
+
any instances (bounding boxes, keypoints, etc.) accordingly.
|
|
1526
1479
|
|
|
1527
1480
|
Args:
|
|
1528
1481
|
p (float): The probability of applying the flip. Must be between 0 and 1.
|
|
@@ -1544,23 +1497,21 @@ class RandomFlip:
|
|
|
1544
1497
|
self.flip_idx = flip_idx
|
|
1545
1498
|
|
|
1546
1499
|
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
1547
|
-
"""
|
|
1548
|
-
Apply random flip to an image and update any instances like bounding boxes or keypoints accordingly.
|
|
1500
|
+
"""Apply random flip to an image and update any instances like bounding boxes or keypoints accordingly.
|
|
1549
1501
|
|
|
1550
1502
|
This method randomly flips the input image either horizontally or vertically based on the initialized
|
|
1551
|
-
probability and direction. It also updates the corresponding instances (bounding boxes, keypoints) to
|
|
1552
|
-
|
|
1503
|
+
probability and direction. It also updates the corresponding instances (bounding boxes, keypoints) to match the
|
|
1504
|
+
flipped image.
|
|
1553
1505
|
|
|
1554
1506
|
Args:
|
|
1555
1507
|
labels (dict[str, Any]): A dictionary containing the following keys:
|
|
1556
|
-
'img' (np.ndarray): The image to be flipped.
|
|
1557
|
-
'instances' (ultralytics.utils.instance.Instances):
|
|
1558
|
-
optionally keypoints.
|
|
1508
|
+
- 'img' (np.ndarray): The image to be flipped.
|
|
1509
|
+
- 'instances' (ultralytics.utils.instance.Instances): Object containing boxes and optionally keypoints.
|
|
1559
1510
|
|
|
1560
1511
|
Returns:
|
|
1561
1512
|
(dict[str, Any]): The same dictionary with the flipped image and updated instances:
|
|
1562
|
-
'img' (np.ndarray): The flipped image.
|
|
1563
|
-
'instances' (ultralytics.utils.instance.Instances): Updated instances matching the flipped image.
|
|
1513
|
+
- 'img' (np.ndarray): The flipped image.
|
|
1514
|
+
- 'instances' (ultralytics.utils.instance.Instances): Updated instances matching the flipped image.
|
|
1564
1515
|
|
|
1565
1516
|
Examples:
|
|
1566
1517
|
>>> labels = {"img": np.random.rand(640, 640, 3), "instances": Instances(...)}
|
|
@@ -1591,11 +1542,10 @@ class RandomFlip:
|
|
|
1591
1542
|
|
|
1592
1543
|
|
|
1593
1544
|
class LetterBox:
|
|
1594
|
-
"""
|
|
1595
|
-
Resize image and padding for detection, instance segmentation, pose.
|
|
1545
|
+
"""Resize image and padding for detection, instance segmentation, pose.
|
|
1596
1546
|
|
|
1597
|
-
This class resizes and pads images to a specified shape while preserving aspect ratio. It also updates
|
|
1598
|
-
|
|
1547
|
+
This class resizes and pads images to a specified shape while preserving aspect ratio. It also updates corresponding
|
|
1548
|
+
labels and bounding boxes.
|
|
1599
1549
|
|
|
1600
1550
|
Attributes:
|
|
1601
1551
|
new_shape (tuple): Target shape (height, width) for resizing.
|
|
@@ -1626,8 +1576,7 @@ class LetterBox:
|
|
|
1626
1576
|
padding_value: int = 114,
|
|
1627
1577
|
interpolation: int = cv2.INTER_LINEAR,
|
|
1628
1578
|
):
|
|
1629
|
-
"""
|
|
1630
|
-
Initialize LetterBox object for resizing and padding images.
|
|
1579
|
+
"""Initialize LetterBox object for resizing and padding images.
|
|
1631
1580
|
|
|
1632
1581
|
This class is designed to resize and pad images for object detection, instance segmentation, and pose estimation
|
|
1633
1582
|
tasks. It supports various resizing modes including auto-sizing, scale-fill, and letterboxing.
|
|
@@ -1665,20 +1614,20 @@ class LetterBox:
|
|
|
1665
1614
|
self.interpolation = interpolation
|
|
1666
1615
|
|
|
1667
1616
|
def __call__(self, labels: dict[str, Any] | None = None, image: np.ndarray = None) -> dict[str, Any] | np.ndarray:
|
|
1668
|
-
"""
|
|
1669
|
-
Resize and pad an image for object detection, instance segmentation, or pose estimation tasks.
|
|
1617
|
+
"""Resize and pad an image for object detection, instance segmentation, or pose estimation tasks.
|
|
1670
1618
|
|
|
1671
1619
|
This method applies letterboxing to the input image, which involves resizing the image while maintaining its
|
|
1672
1620
|
aspect ratio and adding padding to fit the new shape. It also updates any associated labels accordingly.
|
|
1673
1621
|
|
|
1674
1622
|
Args:
|
|
1675
|
-
labels (dict[str, Any] | None): A dictionary containing image data and associated labels, or empty dict if
|
|
1623
|
+
labels (dict[str, Any] | None): A dictionary containing image data and associated labels, or empty dict if
|
|
1624
|
+
None.
|
|
1676
1625
|
image (np.ndarray | None): The input image as a numpy array. If None, the image is taken from 'labels'.
|
|
1677
1626
|
|
|
1678
1627
|
Returns:
|
|
1679
|
-
(dict[str, Any] |
|
|
1680
|
-
updated labels, and additional metadata. If 'labels' is empty, returns the resized
|
|
1681
|
-
|
|
1628
|
+
(dict[str, Any] | np.ndarray): If 'labels' is provided, returns an updated dictionary with the resized and
|
|
1629
|
+
padded image, updated labels, and additional metadata. If 'labels' is empty, returns the resized and
|
|
1630
|
+
padded image only.
|
|
1682
1631
|
|
|
1683
1632
|
Examples:
|
|
1684
1633
|
>>> letterbox = LetterBox(new_shape=(640, 640))
|
|
@@ -1744,11 +1693,10 @@ class LetterBox:
|
|
|
1744
1693
|
|
|
1745
1694
|
@staticmethod
|
|
1746
1695
|
def _update_labels(labels: dict[str, Any], ratio: tuple[float, float], padw: float, padh: float) -> dict[str, Any]:
|
|
1747
|
-
"""
|
|
1748
|
-
Update labels after applying letterboxing to an image.
|
|
1696
|
+
"""Update labels after applying letterboxing to an image.
|
|
1749
1697
|
|
|
1750
|
-
This method modifies the bounding box coordinates of instances in the labels
|
|
1751
|
-
|
|
1698
|
+
This method modifies the bounding box coordinates of instances in the labels to account for resizing and padding
|
|
1699
|
+
applied during letterboxing.
|
|
1752
1700
|
|
|
1753
1701
|
Args:
|
|
1754
1702
|
labels (dict[str, Any]): A dictionary containing image labels and instances.
|
|
@@ -1774,8 +1722,7 @@ class LetterBox:
|
|
|
1774
1722
|
|
|
1775
1723
|
|
|
1776
1724
|
class CopyPaste(BaseMixTransform):
|
|
1777
|
-
"""
|
|
1778
|
-
CopyPaste class for applying Copy-Paste augmentation to image datasets.
|
|
1725
|
+
"""CopyPaste class for applying Copy-Paste augmentation to image datasets.
|
|
1779
1726
|
|
|
1780
1727
|
This class implements the Copy-Paste augmentation technique as described in the paper "Simple Copy-Paste is a Strong
|
|
1781
1728
|
Data Augmentation Method for Instance Segmentation" (https://arxiv.org/abs/2012.07177). It combines objects from
|
|
@@ -1874,8 +1821,7 @@ class CopyPaste(BaseMixTransform):
|
|
|
1874
1821
|
|
|
1875
1822
|
|
|
1876
1823
|
class Albumentations:
|
|
1877
|
-
"""
|
|
1878
|
-
Albumentations transformations for image augmentation.
|
|
1824
|
+
"""Albumentations transformations for image augmentation.
|
|
1879
1825
|
|
|
1880
1826
|
This class applies various image transformations using the Albumentations library. It includes operations such as
|
|
1881
1827
|
Blur, Median Blur, conversion to grayscale, Contrast Limited Adaptive Histogram Equalization (CLAHE), random changes
|
|
@@ -1900,8 +1846,7 @@ class Albumentations:
|
|
|
1900
1846
|
"""
|
|
1901
1847
|
|
|
1902
1848
|
def __init__(self, p: float = 1.0) -> None:
|
|
1903
|
-
"""
|
|
1904
|
-
Initialize the Albumentations transform object for YOLO bbox formatted parameters.
|
|
1849
|
+
"""Initialize the Albumentations transform object for YOLO bbox formatted parameters.
|
|
1905
1850
|
|
|
1906
1851
|
This class applies various image augmentations using the Albumentations library, including Blur, Median Blur,
|
|
1907
1852
|
conversion to grayscale, Contrast Limited Adaptive Histogram Equalization, random changes of brightness and
|
|
@@ -2014,8 +1959,7 @@ class Albumentations:
|
|
|
2014
1959
|
LOGGER.info(f"{prefix}{e}")
|
|
2015
1960
|
|
|
2016
1961
|
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
2017
|
-
"""
|
|
2018
|
-
Apply Albumentations transformations to input labels.
|
|
1962
|
+
"""Apply Albumentations transformations to input labels.
|
|
2019
1963
|
|
|
2020
1964
|
This method applies a series of image augmentations using the Albumentations library. It can perform both
|
|
2021
1965
|
spatial and non-spatial transformations on the input image and its corresponding labels.
|
|
@@ -2071,8 +2015,7 @@ class Albumentations:
|
|
|
2071
2015
|
|
|
2072
2016
|
|
|
2073
2017
|
class Format:
|
|
2074
|
-
"""
|
|
2075
|
-
A class for formatting image annotations for object detection, instance segmentation, and pose estimation tasks.
|
|
2018
|
+
"""A class for formatting image annotations for object detection, instance segmentation, and pose estimation tasks.
|
|
2076
2019
|
|
|
2077
2020
|
This class standardizes image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader.
|
|
2078
2021
|
|
|
@@ -2112,8 +2055,7 @@ class Format:
|
|
|
2112
2055
|
batch_idx: bool = True,
|
|
2113
2056
|
bgr: float = 0.0,
|
|
2114
2057
|
):
|
|
2115
|
-
"""
|
|
2116
|
-
Initialize the Format class with given parameters for image and instance annotation formatting.
|
|
2058
|
+
"""Initialize the Format class with given parameters for image and instance annotation formatting.
|
|
2117
2059
|
|
|
2118
2060
|
This class standardizes image and instance annotations for object detection, instance segmentation, and pose
|
|
2119
2061
|
estimation tasks, preparing them for use in PyTorch DataLoader's `collate_fn`.
|
|
@@ -2156,8 +2098,7 @@ class Format:
|
|
|
2156
2098
|
self.bgr = bgr
|
|
2157
2099
|
|
|
2158
2100
|
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
2159
|
-
"""
|
|
2160
|
-
Format image annotations for object detection, instance segmentation, and pose estimation tasks.
|
|
2101
|
+
"""Format image annotations for object detection, instance segmentation, and pose estimation tasks.
|
|
2161
2102
|
|
|
2162
2103
|
This method standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch
|
|
2163
2104
|
DataLoader. It processes the input labels dictionary, converting annotations to the specified format and
|
|
@@ -2225,8 +2166,7 @@ class Format:
|
|
|
2225
2166
|
return labels
|
|
2226
2167
|
|
|
2227
2168
|
def _format_img(self, img: np.ndarray) -> torch.Tensor:
|
|
2228
|
-
"""
|
|
2229
|
-
Format an image for YOLO from a Numpy array to a PyTorch tensor.
|
|
2169
|
+
"""Format an image for YOLO from a Numpy array to a PyTorch tensor.
|
|
2230
2170
|
|
|
2231
2171
|
This function performs the following operations:
|
|
2232
2172
|
1. Ensures the image has 3 dimensions (adds a channel dimension if needed).
|
|
@@ -2258,8 +2198,7 @@ class Format:
|
|
|
2258
2198
|
def _format_segments(
|
|
2259
2199
|
self, instances: Instances, cls: np.ndarray, w: int, h: int
|
|
2260
2200
|
) -> tuple[np.ndarray, Instances, np.ndarray]:
|
|
2261
|
-
"""
|
|
2262
|
-
Convert polygon segments to bitmap masks.
|
|
2201
|
+
"""Convert polygon segments to bitmap masks.
|
|
2263
2202
|
|
|
2264
2203
|
Args:
|
|
2265
2204
|
instances (Instances): Object containing segment information.
|
|
@@ -2293,8 +2232,7 @@ class LoadVisualPrompt:
|
|
|
2293
2232
|
"""Create visual prompts from bounding boxes or masks for model input."""
|
|
2294
2233
|
|
|
2295
2234
|
def __init__(self, scale_factor: float = 1 / 8) -> None:
|
|
2296
|
-
"""
|
|
2297
|
-
Initialize the LoadVisualPrompt with a scale factor.
|
|
2235
|
+
"""Initialize the LoadVisualPrompt with a scale factor.
|
|
2298
2236
|
|
|
2299
2237
|
Args:
|
|
2300
2238
|
scale_factor (float): Factor to scale the input image dimensions.
|
|
@@ -2302,8 +2240,7 @@ class LoadVisualPrompt:
|
|
|
2302
2240
|
self.scale_factor = scale_factor
|
|
2303
2241
|
|
|
2304
2242
|
def make_mask(self, boxes: torch.Tensor, h: int, w: int) -> torch.Tensor:
|
|
2305
|
-
"""
|
|
2306
|
-
Create binary masks from bounding boxes.
|
|
2243
|
+
"""Create binary masks from bounding boxes.
|
|
2307
2244
|
|
|
2308
2245
|
Args:
|
|
2309
2246
|
boxes (torch.Tensor): Bounding boxes in xyxy format, shape: (N, 4).
|
|
@@ -2320,8 +2257,7 @@ class LoadVisualPrompt:
|
|
|
2320
2257
|
return (r >= x1) * (r < x2) * (c >= y1) * (c < y2)
|
|
2321
2258
|
|
|
2322
2259
|
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
2323
|
-
"""
|
|
2324
|
-
Process labels to create visual prompts.
|
|
2260
|
+
"""Process labels to create visual prompts.
|
|
2325
2261
|
|
|
2326
2262
|
Args:
|
|
2327
2263
|
labels (dict[str, Any]): Dictionary containing image data and annotations.
|
|
@@ -2347,8 +2283,7 @@ class LoadVisualPrompt:
|
|
|
2347
2283
|
bboxes: np.ndarray | torch.Tensor = None,
|
|
2348
2284
|
masks: np.ndarray | torch.Tensor = None,
|
|
2349
2285
|
) -> torch.Tensor:
|
|
2350
|
-
"""
|
|
2351
|
-
Generate visual masks based on bounding boxes or masks.
|
|
2286
|
+
"""Generate visual masks based on bounding boxes or masks.
|
|
2352
2287
|
|
|
2353
2288
|
Args:
|
|
2354
2289
|
category (int | np.ndarray | torch.Tensor): The category labels for the objects.
|
|
@@ -2389,12 +2324,11 @@ class LoadVisualPrompt:
|
|
|
2389
2324
|
|
|
2390
2325
|
|
|
2391
2326
|
class RandomLoadText:
|
|
2392
|
-
"""
|
|
2393
|
-
Randomly sample positive and negative texts and update class indices accordingly.
|
|
2327
|
+
"""Randomly sample positive and negative texts and update class indices accordingly.
|
|
2394
2328
|
|
|
2395
|
-
This class is responsible for sampling texts from a given set of class texts, including both positive
|
|
2396
|
-
|
|
2397
|
-
|
|
2329
|
+
This class is responsible for sampling texts from a given set of class texts, including both positive (present in
|
|
2330
|
+
the image) and negative (not present in the image) samples. It updates the class indices to reflect the sampled
|
|
2331
|
+
texts and can optionally pad the text list to a fixed length.
|
|
2398
2332
|
|
|
2399
2333
|
Attributes:
|
|
2400
2334
|
prompt_format (str): Format string for text prompts.
|
|
@@ -2422,21 +2356,19 @@ class RandomLoadText:
|
|
|
2422
2356
|
padding: bool = False,
|
|
2423
2357
|
padding_value: list[str] = [""],
|
|
2424
2358
|
) -> None:
|
|
2425
|
-
"""
|
|
2426
|
-
Initialize the RandomLoadText class for randomly sampling positive and negative texts.
|
|
2359
|
+
"""Initialize the RandomLoadText class for randomly sampling positive and negative texts.
|
|
2427
2360
|
|
|
2428
|
-
This class is designed to randomly sample positive texts and negative texts, and update the class
|
|
2429
|
-
|
|
2361
|
+
This class is designed to randomly sample positive texts and negative texts, and update the class indices
|
|
2362
|
+
accordingly to the number of samples. It can be used for text-based object detection tasks.
|
|
2430
2363
|
|
|
2431
2364
|
Args:
|
|
2432
|
-
prompt_format (str): Format string for the prompt. The format string should
|
|
2433
|
-
|
|
2434
|
-
neg_samples (tuple[int, int]): A range to randomly sample negative texts. The first integer
|
|
2435
|
-
|
|
2436
|
-
maximum.
|
|
2365
|
+
prompt_format (str): Format string for the prompt. The format string should contain a single pair of curly
|
|
2366
|
+
braces {} where the text will be inserted.
|
|
2367
|
+
neg_samples (tuple[int, int]): A range to randomly sample negative texts. The first integer specifies the
|
|
2368
|
+
minimum number of negative samples, and the second integer specifies the maximum.
|
|
2437
2369
|
max_samples (int): The maximum number of different text samples in one image.
|
|
2438
|
-
padding (bool): Whether to pad texts to max_samples. If True, the number of texts will always
|
|
2439
|
-
|
|
2370
|
+
padding (bool): Whether to pad texts to max_samples. If True, the number of texts will always be equal to
|
|
2371
|
+
max_samples.
|
|
2440
2372
|
padding_value (str): The padding text to use when padding is True.
|
|
2441
2373
|
|
|
2442
2374
|
Attributes:
|
|
@@ -2462,15 +2394,15 @@ class RandomLoadText:
|
|
|
2462
2394
|
self.padding_value = padding_value
|
|
2463
2395
|
|
|
2464
2396
|
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
|
2465
|
-
"""
|
|
2466
|
-
Randomly sample positive and negative texts and update class indices accordingly.
|
|
2397
|
+
"""Randomly sample positive and negative texts and update class indices accordingly.
|
|
2467
2398
|
|
|
2468
|
-
This method samples positive texts based on the existing class labels in the image, and randomly
|
|
2469
|
-
|
|
2470
|
-
|
|
2399
|
+
This method samples positive texts based on the existing class labels in the image, and randomly selects
|
|
2400
|
+
negative texts from the remaining classes. It then updates the class indices to match the new sampled text
|
|
2401
|
+
order.
|
|
2471
2402
|
|
|
2472
2403
|
Args:
|
|
2473
|
-
labels (dict[str, Any]): A dictionary containing image labels and metadata. Must include 'texts' and 'cls'
|
|
2404
|
+
labels (dict[str, Any]): A dictionary containing image labels and metadata. Must include 'texts' and 'cls'
|
|
2405
|
+
keys.
|
|
2474
2406
|
|
|
2475
2407
|
Returns:
|
|
2476
2408
|
(dict[str, Any]): Updated labels dictionary with new 'cls' and 'texts' entries.
|
|
@@ -2528,16 +2460,16 @@ class RandomLoadText:
|
|
|
2528
2460
|
|
|
2529
2461
|
|
|
2530
2462
|
def v8_transforms(dataset, imgsz: int, hyp: IterableSimpleNamespace, stretch: bool = False):
|
|
2531
|
-
"""
|
|
2532
|
-
Apply a series of image transformations for training.
|
|
2463
|
+
"""Apply a series of image transformations for training.
|
|
2533
2464
|
|
|
2534
|
-
This function creates a composition of image augmentation techniques to prepare images for YOLO training.
|
|
2535
|
-
|
|
2465
|
+
This function creates a composition of image augmentation techniques to prepare images for YOLO training. It
|
|
2466
|
+
includes operations such as mosaic, copy-paste, random perspective, mixup, and various color adjustments.
|
|
2536
2467
|
|
|
2537
2468
|
Args:
|
|
2538
2469
|
dataset (Dataset): The dataset object containing image data and annotations.
|
|
2539
2470
|
imgsz (int): The target image size for resizing.
|
|
2540
|
-
hyp (IterableSimpleNamespace): A dictionary of hyperparameters controlling various aspects of the
|
|
2471
|
+
hyp (IterableSimpleNamespace): A dictionary of hyperparameters controlling various aspects of the
|
|
2472
|
+
transformations.
|
|
2541
2473
|
stretch (bool): If True, applies stretching to the image. If False, uses LetterBox resizing.
|
|
2542
2474
|
|
|
2543
2475
|
Returns:
|
|
@@ -2603,12 +2535,11 @@ def classify_transforms(
|
|
|
2603
2535
|
interpolation: str = "BILINEAR",
|
|
2604
2536
|
crop_fraction: float | None = None,
|
|
2605
2537
|
):
|
|
2606
|
-
"""
|
|
2607
|
-
Create a composition of image transforms for classification tasks.
|
|
2538
|
+
"""Create a composition of image transforms for classification tasks.
|
|
2608
2539
|
|
|
2609
|
-
This function generates a sequence of torchvision transforms suitable for preprocessing images
|
|
2610
|
-
|
|
2611
|
-
|
|
2540
|
+
This function generates a sequence of torchvision transforms suitable for preprocessing images for classification
|
|
2541
|
+
models during evaluation or inference. The transforms include resizing, center cropping, conversion to tensor, and
|
|
2542
|
+
normalization.
|
|
2612
2543
|
|
|
2613
2544
|
Args:
|
|
2614
2545
|
size (int | tuple): The target size for the transformed image. If an int, it defines the shortest edge. If a
|
|
@@ -2663,8 +2594,7 @@ def classify_augmentations(
|
|
|
2663
2594
|
erasing: float = 0.0,
|
|
2664
2595
|
interpolation: str = "BILINEAR",
|
|
2665
2596
|
):
|
|
2666
|
-
"""
|
|
2667
|
-
Create a composition of image augmentation transforms for classification tasks.
|
|
2597
|
+
"""Create a composition of image augmentation transforms for classification tasks.
|
|
2668
2598
|
|
|
2669
2599
|
This function generates a set of image transformations suitable for training classification models. It includes
|
|
2670
2600
|
options for resizing, flipping, color jittering, auto augmentation, and random erasing.
|
|
@@ -2752,11 +2682,10 @@ def classify_augmentations(
|
|
|
2752
2682
|
|
|
2753
2683
|
# NOTE: keep this class for backward compatibility
|
|
2754
2684
|
class ClassifyLetterBox:
|
|
2755
|
-
"""
|
|
2756
|
-
A class for resizing and padding images for classification tasks.
|
|
2685
|
+
"""A class for resizing and padding images for classification tasks.
|
|
2757
2686
|
|
|
2758
|
-
This class is designed to be part of a transformation pipeline, e.g., T.Compose([LetterBox(size), ToTensor()]).
|
|
2759
|
-
|
|
2687
|
+
This class is designed to be part of a transformation pipeline, e.g., T.Compose([LetterBox(size), ToTensor()]). It
|
|
2688
|
+
resizes and pads images to a specified size while maintaining the original aspect ratio.
|
|
2760
2689
|
|
|
2761
2690
|
Attributes:
|
|
2762
2691
|
h (int): Target height of the image.
|
|
@@ -2776,15 +2705,14 @@ class ClassifyLetterBox:
|
|
|
2776
2705
|
"""
|
|
2777
2706
|
|
|
2778
2707
|
def __init__(self, size: int | tuple[int, int] = (640, 640), auto: bool = False, stride: int = 32):
|
|
2779
|
-
"""
|
|
2780
|
-
Initialize the ClassifyLetterBox object for image preprocessing.
|
|
2708
|
+
"""Initialize the ClassifyLetterBox object for image preprocessing.
|
|
2781
2709
|
|
|
2782
2710
|
This class is designed to be part of a transformation pipeline for image classification tasks. It resizes and
|
|
2783
2711
|
pads images to a specified size while maintaining the original aspect ratio.
|
|
2784
2712
|
|
|
2785
2713
|
Args:
|
|
2786
|
-
size (int | tuple[int, int]): Target size for the letterboxed image. If an int, a square image of
|
|
2787
|
-
|
|
2714
|
+
size (int | tuple[int, int]): Target size for the letterboxed image. If an int, a square image of (size,
|
|
2715
|
+
size) is created. If a tuple, it should be (height, width).
|
|
2788
2716
|
auto (bool): If True, automatically calculates the short side based on stride.
|
|
2789
2717
|
stride (int): The stride value, used when 'auto' is True.
|
|
2790
2718
|
|
|
@@ -2807,8 +2735,7 @@ class ClassifyLetterBox:
|
|
|
2807
2735
|
self.stride = stride # used with auto
|
|
2808
2736
|
|
|
2809
2737
|
def __call__(self, im: np.ndarray) -> np.ndarray:
|
|
2810
|
-
"""
|
|
2811
|
-
Resize and pad an image using the letterbox method.
|
|
2738
|
+
"""Resize and pad an image using the letterbox method.
|
|
2812
2739
|
|
|
2813
2740
|
This method resizes the input image to fit within the specified dimensions while maintaining its aspect ratio,
|
|
2814
2741
|
then pads the resized image to match the target size.
|
|
@@ -2817,8 +2744,8 @@ class ClassifyLetterBox:
|
|
|
2817
2744
|
im (np.ndarray): Input image as a numpy array with shape (H, W, C).
|
|
2818
2745
|
|
|
2819
2746
|
Returns:
|
|
2820
|
-
(np.ndarray): Resized and padded image as a numpy array with shape (hs, ws, 3), where hs and ws are
|
|
2821
|
-
|
|
2747
|
+
(np.ndarray): Resized and padded image as a numpy array with shape (hs, ws, 3), where hs and ws are the
|
|
2748
|
+
target height and width respectively.
|
|
2822
2749
|
|
|
2823
2750
|
Examples:
|
|
2824
2751
|
>>> letterbox = ClassifyLetterBox(size=(640, 640))
|
|
@@ -2843,8 +2770,7 @@ class ClassifyLetterBox:
|
|
|
2843
2770
|
|
|
2844
2771
|
# NOTE: keep this class for backward compatibility
|
|
2845
2772
|
class CenterCrop:
|
|
2846
|
-
"""
|
|
2847
|
-
Apply center cropping to images for classification tasks.
|
|
2773
|
+
"""Apply center cropping to images for classification tasks.
|
|
2848
2774
|
|
|
2849
2775
|
This class performs center cropping on input images, resizing them to a specified size while maintaining the aspect
|
|
2850
2776
|
ratio. It is designed to be part of a transformation pipeline, e.g., T.Compose([CenterCrop(size), ToTensor()]).
|
|
@@ -2865,15 +2791,14 @@ class CenterCrop:
|
|
|
2865
2791
|
"""
|
|
2866
2792
|
|
|
2867
2793
|
def __init__(self, size: int | tuple[int, int] = (640, 640)):
|
|
2868
|
-
"""
|
|
2869
|
-
Initialize the CenterCrop object for image preprocessing.
|
|
2794
|
+
"""Initialize the CenterCrop object for image preprocessing.
|
|
2870
2795
|
|
|
2871
2796
|
This class is designed to be part of a transformation pipeline, e.g., T.Compose([CenterCrop(size), ToTensor()]).
|
|
2872
2797
|
It performs a center crop on input images to a specified size.
|
|
2873
2798
|
|
|
2874
2799
|
Args:
|
|
2875
|
-
size (int | tuple[int, int]): The desired output size of the crop. If size is an int, a square crop
|
|
2876
|
-
|
|
2800
|
+
size (int | tuple[int, int]): The desired output size of the crop. If size is an int, a square crop (size,
|
|
2801
|
+
size) is made. If size is a sequence like (h, w), it is used as the output size.
|
|
2877
2802
|
|
|
2878
2803
|
Returns:
|
|
2879
2804
|
(None): This method initializes the object and does not return anything.
|
|
@@ -2889,15 +2814,14 @@ class CenterCrop:
|
|
|
2889
2814
|
self.h, self.w = (size, size) if isinstance(size, int) else size
|
|
2890
2815
|
|
|
2891
2816
|
def __call__(self, im: Image.Image | np.ndarray) -> np.ndarray:
|
|
2892
|
-
"""
|
|
2893
|
-
Apply center cropping to an input image.
|
|
2817
|
+
"""Apply center cropping to an input image.
|
|
2894
2818
|
|
|
2895
|
-
This method resizes and crops the center of the image using a letterbox method. It maintains the aspect
|
|
2896
|
-
|
|
2819
|
+
This method resizes and crops the center of the image using a letterbox method. It maintains the aspect ratio of
|
|
2820
|
+
the original image while fitting it into the specified dimensions.
|
|
2897
2821
|
|
|
2898
2822
|
Args:
|
|
2899
|
-
im (np.ndarray | PIL.Image.Image): The input image as a numpy array of shape (H, W, C) or a
|
|
2900
|
-
|
|
2823
|
+
im (np.ndarray | PIL.Image.Image): The input image as a numpy array of shape (H, W, C) or a PIL Image
|
|
2824
|
+
object.
|
|
2901
2825
|
|
|
2902
2826
|
Returns:
|
|
2903
2827
|
(np.ndarray): The center-cropped and resized image as a numpy array of shape (self.h, self.w, C).
|
|
@@ -2918,8 +2842,7 @@ class CenterCrop:
|
|
|
2918
2842
|
|
|
2919
2843
|
# NOTE: keep this class for backward compatibility
|
|
2920
2844
|
class ToTensor:
|
|
2921
|
-
"""
|
|
2922
|
-
Convert an image from a numpy array to a PyTorch tensor.
|
|
2845
|
+
"""Convert an image from a numpy array to a PyTorch tensor.
|
|
2923
2846
|
|
|
2924
2847
|
This class is designed to be part of a transformation pipeline, e.g., T.Compose([LetterBox(size), ToTensor()]).
|
|
2925
2848
|
|
|
@@ -2942,12 +2865,11 @@ class ToTensor:
|
|
|
2942
2865
|
"""
|
|
2943
2866
|
|
|
2944
2867
|
def __init__(self, half: bool = False):
|
|
2945
|
-
"""
|
|
2946
|
-
Initialize the ToTensor object for converting images to PyTorch tensors.
|
|
2868
|
+
"""Initialize the ToTensor object for converting images to PyTorch tensors.
|
|
2947
2869
|
|
|
2948
2870
|
This class is designed to be used as part of a transformation pipeline for image preprocessing in the
|
|
2949
|
-
Ultralytics YOLO framework. It converts numpy arrays or PIL Images to PyTorch tensors, with an option
|
|
2950
|
-
|
|
2871
|
+
Ultralytics YOLO framework. It converts numpy arrays or PIL Images to PyTorch tensors, with an option for
|
|
2872
|
+
half-precision (float16) conversion.
|
|
2951
2873
|
|
|
2952
2874
|
Args:
|
|
2953
2875
|
half (bool): If True, converts the tensor to half precision (float16).
|
|
@@ -2963,19 +2885,18 @@ class ToTensor:
|
|
|
2963
2885
|
self.half = half
|
|
2964
2886
|
|
|
2965
2887
|
def __call__(self, im: np.ndarray) -> torch.Tensor:
|
|
2966
|
-
"""
|
|
2967
|
-
Transform an image from a numpy array to a PyTorch tensor.
|
|
2888
|
+
"""Transform an image from a numpy array to a PyTorch tensor.
|
|
2968
2889
|
|
|
2969
|
-
This method converts the input image from a numpy array to a PyTorch tensor, applying optional
|
|
2970
|
-
|
|
2971
|
-
|
|
2890
|
+
This method converts the input image from a numpy array to a PyTorch tensor, applying optional half-precision
|
|
2891
|
+
conversion and normalization. The image is transposed from HWC to CHW format and the color channels are reversed
|
|
2892
|
+
from BGR to RGB.
|
|
2972
2893
|
|
|
2973
2894
|
Args:
|
|
2974
2895
|
im (np.ndarray): Input image as a numpy array with shape (H, W, C) in RGB order.
|
|
2975
2896
|
|
|
2976
2897
|
Returns:
|
|
2977
|
-
(torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized
|
|
2978
|
-
|
|
2898
|
+
(torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1] with
|
|
2899
|
+
shape (C, H, W) in RGB order.
|
|
2979
2900
|
|
|
2980
2901
|
Examples:
|
|
2981
2902
|
>>> transform = ToTensor(half=True)
|