dgenerate-ultralytics-headless 8.3.143__py3-none-any.whl → 8.3.144__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/METADATA +1 -1
- dgenerate_ultralytics_headless-8.3.144.dist-info/RECORD +272 -0
- tests/conftest.py +7 -24
- tests/test_cli.py +1 -1
- tests/test_cuda.py +7 -2
- tests/test_engine.py +7 -8
- tests/test_exports.py +16 -16
- tests/test_integrations.py +1 -1
- tests/test_solutions.py +11 -11
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +16 -13
- ultralytics/data/annotator.py +6 -5
- ultralytics/data/augment.py +127 -126
- ultralytics/data/base.py +54 -51
- ultralytics/data/build.py +47 -23
- ultralytics/data/converter.py +47 -43
- ultralytics/data/dataset.py +51 -50
- ultralytics/data/loaders.py +77 -44
- ultralytics/data/split.py +22 -9
- ultralytics/data/split_dota.py +63 -39
- ultralytics/data/utils.py +59 -39
- ultralytics/engine/exporter.py +79 -27
- ultralytics/engine/model.py +39 -39
- ultralytics/engine/predictor.py +37 -28
- ultralytics/engine/results.py +187 -157
- ultralytics/engine/trainer.py +36 -19
- ultralytics/engine/tuner.py +12 -9
- ultralytics/engine/validator.py +7 -9
- ultralytics/hub/__init__.py +11 -13
- ultralytics/hub/auth.py +22 -2
- ultralytics/hub/google/__init__.py +19 -19
- ultralytics/hub/session.py +37 -51
- ultralytics/hub/utils.py +19 -5
- ultralytics/models/fastsam/model.py +30 -12
- ultralytics/models/fastsam/predict.py +5 -6
- ultralytics/models/fastsam/utils.py +3 -3
- ultralytics/models/fastsam/val.py +10 -6
- ultralytics/models/nas/model.py +9 -5
- ultralytics/models/nas/predict.py +6 -6
- ultralytics/models/nas/val.py +3 -3
- ultralytics/models/rtdetr/model.py +7 -6
- ultralytics/models/rtdetr/predict.py +14 -7
- ultralytics/models/rtdetr/train.py +10 -4
- ultralytics/models/rtdetr/val.py +36 -9
- ultralytics/models/sam/amg.py +30 -12
- ultralytics/models/sam/build.py +22 -22
- ultralytics/models/sam/model.py +10 -9
- ultralytics/models/sam/modules/blocks.py +76 -80
- ultralytics/models/sam/modules/decoders.py +6 -8
- ultralytics/models/sam/modules/encoders.py +23 -26
- ultralytics/models/sam/modules/memory_attention.py +13 -1
- ultralytics/models/sam/modules/sam.py +57 -26
- ultralytics/models/sam/modules/tiny_encoder.py +232 -237
- ultralytics/models/sam/modules/transformer.py +13 -13
- ultralytics/models/sam/modules/utils.py +11 -19
- ultralytics/models/sam/predict.py +114 -101
- ultralytics/models/utils/loss.py +98 -77
- ultralytics/models/utils/ops.py +116 -67
- ultralytics/models/yolo/classify/predict.py +5 -5
- ultralytics/models/yolo/classify/train.py +32 -28
- ultralytics/models/yolo/classify/val.py +7 -8
- ultralytics/models/yolo/detect/predict.py +1 -0
- ultralytics/models/yolo/detect/train.py +15 -14
- ultralytics/models/yolo/detect/val.py +37 -36
- ultralytics/models/yolo/model.py +106 -23
- ultralytics/models/yolo/obb/predict.py +3 -4
- ultralytics/models/yolo/obb/train.py +14 -6
- ultralytics/models/yolo/obb/val.py +29 -23
- ultralytics/models/yolo/pose/predict.py +9 -8
- ultralytics/models/yolo/pose/train.py +24 -16
- ultralytics/models/yolo/pose/val.py +44 -26
- ultralytics/models/yolo/segment/predict.py +5 -5
- ultralytics/models/yolo/segment/train.py +11 -7
- ultralytics/models/yolo/segment/val.py +2 -2
- ultralytics/models/yolo/world/train.py +33 -23
- ultralytics/models/yolo/world/train_world.py +11 -3
- ultralytics/models/yolo/yoloe/predict.py +11 -11
- ultralytics/models/yolo/yoloe/train.py +73 -21
- ultralytics/models/yolo/yoloe/train_seg.py +10 -7
- ultralytics/models/yolo/yoloe/val.py +42 -18
- ultralytics/nn/autobackend.py +59 -15
- ultralytics/nn/modules/__init__.py +4 -4
- ultralytics/nn/modules/activation.py +4 -1
- ultralytics/nn/modules/block.py +178 -111
- ultralytics/nn/modules/conv.py +6 -5
- ultralytics/nn/modules/head.py +469 -121
- ultralytics/nn/modules/transformer.py +147 -58
- ultralytics/nn/tasks.py +227 -20
- ultralytics/nn/text_model.py +30 -33
- ultralytics/solutions/ai_gym.py +1 -1
- ultralytics/solutions/analytics.py +7 -4
- ultralytics/solutions/config.py +10 -10
- ultralytics/solutions/distance_calculation.py +11 -10
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +6 -3
- ultralytics/solutions/object_blurrer.py +3 -3
- ultralytics/solutions/object_counter.py +15 -7
- ultralytics/solutions/object_cropper.py +3 -2
- ultralytics/solutions/parking_management.py +29 -28
- ultralytics/solutions/queue_management.py +6 -6
- ultralytics/solutions/region_counter.py +10 -3
- ultralytics/solutions/security_alarm.py +3 -3
- ultralytics/solutions/similarity_search.py +85 -24
- ultralytics/solutions/solutions.py +184 -75
- ultralytics/solutions/speed_estimation.py +28 -22
- ultralytics/solutions/streamlit_inference.py +17 -12
- ultralytics/solutions/trackzone.py +4 -4
- ultralytics/trackers/basetrack.py +16 -23
- ultralytics/trackers/bot_sort.py +30 -20
- ultralytics/trackers/byte_tracker.py +70 -64
- ultralytics/trackers/track.py +4 -8
- ultralytics/trackers/utils/gmc.py +31 -58
- ultralytics/trackers/utils/kalman_filter.py +37 -37
- ultralytics/trackers/utils/matching.py +1 -1
- ultralytics/utils/__init__.py +105 -89
- ultralytics/utils/autobatch.py +16 -3
- ultralytics/utils/autodevice.py +54 -24
- ultralytics/utils/benchmarks.py +42 -28
- ultralytics/utils/callbacks/base.py +3 -3
- ultralytics/utils/callbacks/clearml.py +9 -9
- ultralytics/utils/callbacks/comet.py +67 -25
- ultralytics/utils/callbacks/dvc.py +7 -10
- ultralytics/utils/callbacks/mlflow.py +2 -5
- ultralytics/utils/callbacks/neptune.py +7 -13
- ultralytics/utils/callbacks/raytune.py +1 -1
- ultralytics/utils/callbacks/tensorboard.py +5 -6
- ultralytics/utils/callbacks/wb.py +14 -14
- ultralytics/utils/checks.py +14 -13
- ultralytics/utils/dist.py +5 -5
- ultralytics/utils/downloads.py +94 -67
- ultralytics/utils/errors.py +5 -5
- ultralytics/utils/export.py +61 -47
- ultralytics/utils/files.py +23 -22
- ultralytics/utils/instance.py +48 -52
- ultralytics/utils/loss.py +78 -40
- ultralytics/utils/metrics.py +186 -130
- ultralytics/utils/ops.py +186 -190
- ultralytics/utils/patches.py +15 -17
- ultralytics/utils/plotting.py +71 -27
- ultralytics/utils/tal.py +21 -15
- ultralytics/utils/torch_utils.py +53 -50
- ultralytics/utils/triton.py +5 -4
- ultralytics/utils/tuner.py +5 -5
- dgenerate_ultralytics_headless-8.3.143.dist-info/RECORD +0 -272
- {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/top_level.txt +0 -0
ultralytics/data/augment.py
CHANGED
@@ -31,10 +31,10 @@ class BaseTransform:
|
|
31
31
|
compatible with both classification and semantic segmentation tasks.
|
32
32
|
|
33
33
|
Methods:
|
34
|
-
apply_image:
|
35
|
-
apply_instances:
|
36
|
-
apply_semantic:
|
37
|
-
__call__:
|
34
|
+
apply_image: Apply image transformations to labels.
|
35
|
+
apply_instances: Apply transformations to object instances in labels.
|
36
|
+
apply_semantic: Apply semantic segmentation to an image.
|
37
|
+
__call__: Apply all label transformations to an image, instances, and semantic masks.
|
38
38
|
|
39
39
|
Examples:
|
40
40
|
>>> transform = BaseTransform()
|
@@ -44,7 +44,7 @@ class BaseTransform:
|
|
44
44
|
|
45
45
|
def __init__(self) -> None:
|
46
46
|
"""
|
47
|
-
|
47
|
+
Initialize the BaseTransform object.
|
48
48
|
|
49
49
|
This constructor sets up the base transformation object, which can be extended for specific image
|
50
50
|
processing tasks. It is designed to be compatible with both classification and semantic segmentation.
|
@@ -56,7 +56,7 @@ class BaseTransform:
|
|
56
56
|
|
57
57
|
def apply_image(self, labels):
|
58
58
|
"""
|
59
|
-
|
59
|
+
Apply image transformations to labels.
|
60
60
|
|
61
61
|
This method is intended to be overridden by subclasses to implement specific image transformation
|
62
62
|
logic. In its base form, it returns the input labels unchanged.
|
@@ -79,7 +79,7 @@ class BaseTransform:
|
|
79
79
|
|
80
80
|
def apply_instances(self, labels):
|
81
81
|
"""
|
82
|
-
|
82
|
+
Apply transformations to object instances in labels.
|
83
83
|
|
84
84
|
This method is responsible for applying various transformations to object instances within the given
|
85
85
|
labels. It is designed to be overridden by subclasses to implement specific instance transformation
|
@@ -100,7 +100,7 @@ class BaseTransform:
|
|
100
100
|
|
101
101
|
def apply_semantic(self, labels):
|
102
102
|
"""
|
103
|
-
|
103
|
+
Apply semantic segmentation transformations to an image.
|
104
104
|
|
105
105
|
This method is intended to be overridden by subclasses to implement specific semantic segmentation
|
106
106
|
transformations. In its base form, it does not perform any operations.
|
@@ -120,7 +120,7 @@ class BaseTransform:
|
|
120
120
|
|
121
121
|
def __call__(self, labels):
|
122
122
|
"""
|
123
|
-
|
123
|
+
Apply all label transformations to an image, instances, and semantic masks.
|
124
124
|
|
125
125
|
This method orchestrates the application of various transformations defined in the BaseTransform class
|
126
126
|
to the input labels. It sequentially calls the apply_image and apply_instances methods to process the
|
@@ -151,12 +151,12 @@ class Compose:
|
|
151
151
|
transforms (List[Callable]): A list of transformation functions to be applied sequentially.
|
152
152
|
|
153
153
|
Methods:
|
154
|
-
__call__:
|
155
|
-
append:
|
156
|
-
insert:
|
157
|
-
__getitem__:
|
158
|
-
__setitem__:
|
159
|
-
tolist:
|
154
|
+
__call__: Apply a series of transformations to input data.
|
155
|
+
append: Append a new transform to the existing list of transforms.
|
156
|
+
insert: Insert a new transform at a specified index in the list of transforms.
|
157
|
+
__getitem__: Retrieve a specific transform or a set of transforms using indexing.
|
158
|
+
__setitem__: Set a specific transform or a set of transforms using indexing.
|
159
|
+
tolist: Convert the list of transforms to a standard Python list.
|
160
160
|
|
161
161
|
Examples:
|
162
162
|
>>> transforms = [RandomFlip(), RandomPerspective(30)]
|
@@ -168,7 +168,7 @@ class Compose:
|
|
168
168
|
|
169
169
|
def __init__(self, transforms):
|
170
170
|
"""
|
171
|
-
|
171
|
+
Initialize the Compose object with a list of transforms.
|
172
172
|
|
173
173
|
Args:
|
174
174
|
transforms (List[Callable]): A list of callable transform objects to be applied sequentially.
|
@@ -182,8 +182,9 @@ class Compose:
|
|
182
182
|
|
183
183
|
def __call__(self, data):
|
184
184
|
"""
|
185
|
-
|
186
|
-
|
185
|
+
Apply a series of transformations to input data.
|
186
|
+
|
187
|
+
This method sequentially applies each transformation in the Compose object's transforms to the input data.
|
187
188
|
|
188
189
|
Args:
|
189
190
|
data (Any): The input data to be transformed. This can be of any type, depending on the
|
@@ -203,7 +204,7 @@ class Compose:
|
|
203
204
|
|
204
205
|
def append(self, transform):
|
205
206
|
"""
|
206
|
-
|
207
|
+
Append a new transform to the existing list of transforms.
|
207
208
|
|
208
209
|
Args:
|
209
210
|
transform (BaseTransform): The transformation to be added to the composition.
|
@@ -216,7 +217,7 @@ class Compose:
|
|
216
217
|
|
217
218
|
def insert(self, index, transform):
|
218
219
|
"""
|
219
|
-
|
220
|
+
Insert a new transform at a specified index in the existing list of transforms.
|
220
221
|
|
221
222
|
Args:
|
222
223
|
index (int): The index at which to insert the new transform.
|
@@ -232,7 +233,7 @@ class Compose:
|
|
232
233
|
|
233
234
|
def __getitem__(self, index: Union[list, int]) -> "Compose":
|
234
235
|
"""
|
235
|
-
|
236
|
+
Retrieve a specific transform or a set of transforms using indexing.
|
236
237
|
|
237
238
|
Args:
|
238
239
|
index (int | List[int]): Index or list of indices of the transforms to retrieve.
|
@@ -255,7 +256,7 @@ class Compose:
|
|
255
256
|
|
256
257
|
def __setitem__(self, index: Union[list, int], value: Union[list, int]) -> None:
|
257
258
|
"""
|
258
|
-
|
259
|
+
Set one or more transforms in the composition using indexing.
|
259
260
|
|
260
261
|
Args:
|
261
262
|
index (int | List[int]): Index or list of indices to set transforms at.
|
@@ -282,7 +283,7 @@ class Compose:
|
|
282
283
|
|
283
284
|
def tolist(self):
|
284
285
|
"""
|
285
|
-
|
286
|
+
Convert the list of transforms to a standard Python list.
|
286
287
|
|
287
288
|
Returns:
|
288
289
|
(list): A list containing all the transform objects in the Compose instance.
|
@@ -298,7 +299,7 @@ class Compose:
|
|
298
299
|
|
299
300
|
def __repr__(self):
|
300
301
|
"""
|
301
|
-
|
302
|
+
Return a string representation of the Compose object.
|
302
303
|
|
303
304
|
Returns:
|
304
305
|
(str): A string representation of the Compose object, including the list of transforms.
|
@@ -328,10 +329,10 @@ class BaseMixTransform:
|
|
328
329
|
p (float): Probability of applying the mix transformation.
|
329
330
|
|
330
331
|
Methods:
|
331
|
-
__call__:
|
332
|
+
__call__: Apply the mix transformation to the input labels.
|
332
333
|
_mix_transform: Abstract method to be implemented by subclasses for specific mix operations.
|
333
334
|
get_indexes: Abstract method to get indexes of images to be mixed.
|
334
|
-
_update_label_text:
|
335
|
+
_update_label_text: Update label text for mixed images.
|
335
336
|
|
336
337
|
Examples:
|
337
338
|
>>> class CustomMixTransform(BaseMixTransform):
|
@@ -348,7 +349,7 @@ class BaseMixTransform:
|
|
348
349
|
|
349
350
|
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
|
350
351
|
"""
|
351
|
-
|
352
|
+
Initialize the BaseMixTransform object for mix transformations like CutMix, MixUp and Mosaic.
|
352
353
|
|
353
354
|
This class serves as a base for implementing mix transformations in image processing pipelines.
|
354
355
|
|
@@ -368,7 +369,7 @@ class BaseMixTransform:
|
|
368
369
|
|
369
370
|
def __call__(self, labels):
|
370
371
|
"""
|
371
|
-
|
372
|
+
Apply pre-processing transforms and cutmix/mixup/mosaic transforms to labels data.
|
372
373
|
|
373
374
|
This method determines whether to apply the mix transform based on a probability factor. If applied, it
|
374
375
|
selects additional images, applies pre-transforms if specified, and then performs the mix transform.
|
@@ -408,7 +409,7 @@ class BaseMixTransform:
|
|
408
409
|
|
409
410
|
def _mix_transform(self, labels):
|
410
411
|
"""
|
411
|
-
|
412
|
+
Apply CutMix, MixUp or Mosaic augmentation to the label dictionary.
|
412
413
|
|
413
414
|
This method should be implemented by subclasses to perform specific mix transformations like CutMix, MixUp or
|
414
415
|
Mosaic. It modifies the input label dictionary in-place with the augmented data.
|
@@ -429,7 +430,7 @@ class BaseMixTransform:
|
|
429
430
|
|
430
431
|
def get_indexes(self):
|
431
432
|
"""
|
432
|
-
|
433
|
+
Get a list of shuffled indexes for mosaic augmentation.
|
433
434
|
|
434
435
|
Returns:
|
435
436
|
(List[int]): A list of shuffled indexes from the dataset.
|
@@ -444,7 +445,7 @@ class BaseMixTransform:
|
|
444
445
|
@staticmethod
|
445
446
|
def _update_label_text(labels):
|
446
447
|
"""
|
447
|
-
|
448
|
+
Update label text and class IDs for mixed labels in image augmentation.
|
448
449
|
|
449
450
|
This method processes the 'texts' and 'cls' fields of the input labels dictionary and any mixed labels,
|
450
451
|
creating a unified set of text labels and updating class IDs accordingly.
|
@@ -502,13 +503,13 @@ class Mosaic(BaseMixTransform):
|
|
502
503
|
border (Tuple[int, int]): Border size for width and height.
|
503
504
|
|
504
505
|
Methods:
|
505
|
-
get_indexes:
|
506
|
-
_mix_transform:
|
507
|
-
_mosaic3:
|
508
|
-
_mosaic4:
|
509
|
-
_mosaic9:
|
510
|
-
_update_labels:
|
511
|
-
_cat_labels:
|
506
|
+
get_indexes: Return a list of random indexes from the dataset.
|
507
|
+
_mix_transform: Apply mixup transformation to the input image and labels.
|
508
|
+
_mosaic3: Create a 1x3 image mosaic.
|
509
|
+
_mosaic4: Create a 2x2 image mosaic.
|
510
|
+
_mosaic9: Create a 3x3 image mosaic.
|
511
|
+
_update_labels: Update labels with padding.
|
512
|
+
_cat_labels: Concatenate labels and clips mosaic border instances.
|
512
513
|
|
513
514
|
Examples:
|
514
515
|
>>> from ultralytics.data.augment import Mosaic
|
@@ -519,7 +520,7 @@ class Mosaic(BaseMixTransform):
|
|
519
520
|
|
520
521
|
def __init__(self, dataset, imgsz=640, p=1.0, n=4):
|
521
522
|
"""
|
522
|
-
|
523
|
+
Initialize the Mosaic augmentation object.
|
523
524
|
|
524
525
|
This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image.
|
525
526
|
The augmentation is applied to a dataset with a given probability.
|
@@ -545,7 +546,7 @@ class Mosaic(BaseMixTransform):
|
|
545
546
|
|
546
547
|
def get_indexes(self):
|
547
548
|
"""
|
548
|
-
|
549
|
+
Return a list of random indexes from the dataset for mosaic augmentation.
|
549
550
|
|
550
551
|
This method selects random image indexes either from a buffer or from the entire dataset, depending on
|
551
552
|
the 'buffer' parameter. It is used to choose images for creating mosaic augmentations.
|
@@ -566,7 +567,7 @@ class Mosaic(BaseMixTransform):
|
|
566
567
|
|
567
568
|
def _mix_transform(self, labels):
|
568
569
|
"""
|
569
|
-
|
570
|
+
Apply mosaic augmentation to the input image and labels.
|
570
571
|
|
571
572
|
This method combines multiple images (3, 4, or 9) into a single mosaic image based on the 'n' attribute.
|
572
573
|
It ensures that rectangular annotations are not present and that there are other images available for
|
@@ -595,7 +596,7 @@ class Mosaic(BaseMixTransform):
|
|
595
596
|
|
596
597
|
def _mosaic3(self, labels):
|
597
598
|
"""
|
598
|
-
|
599
|
+
Create a 1x3 image mosaic by combining three images.
|
599
600
|
|
600
601
|
This method arranges three images in a horizontal layout, with the main image in the center and two
|
601
602
|
additional images on either side. It's part of the Mosaic augmentation technique used in object detection.
|
@@ -654,7 +655,7 @@ class Mosaic(BaseMixTransform):
|
|
654
655
|
|
655
656
|
def _mosaic4(self, labels):
|
656
657
|
"""
|
657
|
-
|
658
|
+
Create a 2x2 image mosaic from four input images.
|
658
659
|
|
659
660
|
This method combines four images into a single mosaic image by placing them in a 2x2 grid. It also
|
660
661
|
updates the corresponding labels for each image in the mosaic.
|
@@ -712,7 +713,7 @@ class Mosaic(BaseMixTransform):
|
|
712
713
|
|
713
714
|
def _mosaic9(self, labels):
|
714
715
|
"""
|
715
|
-
|
716
|
+
Create a 3x3 image mosaic from the input image and eight additional images.
|
716
717
|
|
717
718
|
This method combines nine images into a single mosaic image. The input image is placed at the center,
|
718
719
|
and eight additional images from the dataset are placed around it in a 3x3 grid pattern.
|
@@ -785,7 +786,7 @@ class Mosaic(BaseMixTransform):
|
|
785
786
|
@staticmethod
|
786
787
|
def _update_labels(labels, padw, padh):
|
787
788
|
"""
|
788
|
-
|
789
|
+
Update label coordinates with padding values.
|
789
790
|
|
790
791
|
This method adjusts the bounding box coordinates of object instances in the labels by adding padding
|
791
792
|
values. It also denormalizes the coordinates if they were previously normalized.
|
@@ -811,7 +812,7 @@ class Mosaic(BaseMixTransform):
|
|
811
812
|
|
812
813
|
def _cat_labels(self, mosaic_labels):
|
813
814
|
"""
|
814
|
-
|
815
|
+
Concatenate and process labels for mosaic augmentation.
|
815
816
|
|
816
817
|
This method combines labels from multiple images used in mosaic augmentation, clips instances to the
|
817
818
|
mosaic border, and removes zero-area boxes.
|
@@ -863,7 +864,7 @@ class Mosaic(BaseMixTransform):
|
|
863
864
|
|
864
865
|
class MixUp(BaseMixTransform):
|
865
866
|
"""
|
866
|
-
|
867
|
+
Apply MixUp augmentation to image datasets.
|
867
868
|
|
868
869
|
This class implements the MixUp augmentation technique as described in the paper [mixup: Beyond Empirical Risk
|
869
870
|
Minimization](https://arxiv.org/abs/1710.09412). MixUp combines two images and their labels using a random weight.
|
@@ -874,7 +875,7 @@ class MixUp(BaseMixTransform):
|
|
874
875
|
p (float): Probability of applying MixUp augmentation.
|
875
876
|
|
876
877
|
Methods:
|
877
|
-
_mix_transform:
|
878
|
+
_mix_transform: Apply MixUp augmentation to the input labels.
|
878
879
|
|
879
880
|
Examples:
|
880
881
|
>>> from ultralytics.data.augment import MixUp
|
@@ -885,7 +886,7 @@ class MixUp(BaseMixTransform):
|
|
885
886
|
|
886
887
|
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
|
887
888
|
"""
|
888
|
-
|
889
|
+
Initialize the MixUp augmentation object.
|
889
890
|
|
890
891
|
MixUp is an image augmentation technique that combines two images by taking a weighted sum of their pixel
|
891
892
|
values and labels. This implementation is designed for use with the Ultralytics YOLO framework.
|
@@ -904,7 +905,7 @@ class MixUp(BaseMixTransform):
|
|
904
905
|
|
905
906
|
def _mix_transform(self, labels):
|
906
907
|
"""
|
907
|
-
|
908
|
+
Apply MixUp augmentation to the input labels.
|
908
909
|
|
909
910
|
This method implements the MixUp augmentation technique as described in the paper
|
910
911
|
"mixup: Beyond Empirical Risk Minimization" (https://arxiv.org/abs/1710.09412).
|
@@ -929,7 +930,7 @@ class MixUp(BaseMixTransform):
|
|
929
930
|
|
930
931
|
class CutMix(BaseMixTransform):
|
931
932
|
"""
|
932
|
-
|
933
|
+
Apply CutMix augmentation to image datasets as described in the paper https://arxiv.org/abs/1905.04899.
|
933
934
|
|
934
935
|
CutMix combines two images by replacing a random rectangular region of one image with the corresponding region from another image,
|
935
936
|
and adjusts the labels proportionally to the area of the mixed region.
|
@@ -938,12 +939,12 @@ class CutMix(BaseMixTransform):
|
|
938
939
|
dataset (Any): The dataset to which CutMix augmentation will be applied.
|
939
940
|
pre_transform (Callable | None): Optional transform to apply before CutMix.
|
940
941
|
p (float): Probability of applying CutMix augmentation.
|
941
|
-
beta (float): Beta distribution parameter for sampling the mixing ratio
|
942
|
-
num_areas (int): Number of areas to try to cut and mix
|
942
|
+
beta (float): Beta distribution parameter for sampling the mixing ratio.
|
943
|
+
num_areas (int): Number of areas to try to cut and mix.
|
943
944
|
|
944
945
|
Methods:
|
945
|
-
_mix_transform:
|
946
|
-
_rand_bbox:
|
946
|
+
_mix_transform: Apply CutMix augmentation to the input labels.
|
947
|
+
_rand_bbox: Generate random bounding box coordinates for the cut region.
|
947
948
|
|
948
949
|
Examples:
|
949
950
|
>>> from ultralytics.data.augment import CutMix
|
@@ -954,14 +955,14 @@ class CutMix(BaseMixTransform):
|
|
954
955
|
|
955
956
|
def __init__(self, dataset, pre_transform=None, p=0.0, beta=1.0, num_areas=3) -> None:
|
956
957
|
"""
|
957
|
-
|
958
|
+
Initialize the CutMix augmentation object.
|
958
959
|
|
959
960
|
Args:
|
960
961
|
dataset (Any): The dataset to which CutMix augmentation will be applied.
|
961
962
|
pre_transform (Callable | None): Optional transform to apply before CutMix.
|
962
963
|
p (float): Probability of applying CutMix augmentation.
|
963
|
-
beta (float): Beta distribution parameter for sampling the mixing ratio
|
964
|
-
num_areas (int): Number of areas to try to cut and mix
|
964
|
+
beta (float): Beta distribution parameter for sampling the mixing ratio.
|
965
|
+
num_areas (int): Number of areas to try to cut and mix.
|
965
966
|
"""
|
966
967
|
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
|
967
968
|
self.beta = beta
|
@@ -969,7 +970,7 @@ class CutMix(BaseMixTransform):
|
|
969
970
|
|
970
971
|
def _rand_bbox(self, width, height):
|
971
972
|
"""
|
972
|
-
|
973
|
+
Generate random bounding box coordinates for the cut region.
|
973
974
|
|
974
975
|
Args:
|
975
976
|
width (int): Width of the image.
|
@@ -999,7 +1000,7 @@ class CutMix(BaseMixTransform):
|
|
999
1000
|
|
1000
1001
|
def _mix_transform(self, labels):
|
1001
1002
|
"""
|
1002
|
-
|
1003
|
+
Apply CutMix augmentation to the input labels.
|
1003
1004
|
|
1004
1005
|
Args:
|
1005
1006
|
labels (dict): A dictionary containing the original image and label information.
|
@@ -1021,7 +1022,7 @@ class CutMix(BaseMixTransform):
|
|
1021
1022
|
return labels
|
1022
1023
|
|
1023
1024
|
labels2 = labels.pop("mix_labels")[0]
|
1024
|
-
area = cut_areas[np.random.choice(idx)] #
|
1025
|
+
area = cut_areas[np.random.choice(idx)] # randomly select one
|
1025
1026
|
ioa2 = bbox_ioa(area[None], labels2["instances"].bboxes).squeeze(0)
|
1026
1027
|
indexes2 = np.nonzero(ioa2 >= (0.01 if len(labels["instances"].segments) else 0.1))[0]
|
1027
1028
|
if len(indexes2) == 0:
|
@@ -1047,7 +1048,7 @@ class CutMix(BaseMixTransform):
|
|
1047
1048
|
|
1048
1049
|
class RandomPerspective:
|
1049
1050
|
"""
|
1050
|
-
|
1051
|
+
Implement random perspective and affine transformations on images and corresponding annotations.
|
1051
1052
|
|
1052
1053
|
This class applies random rotations, translations, scaling, shearing, and perspective transformations
|
1053
1054
|
to images and their associated bounding boxes, segments, and keypoints. It can be used as part of an
|
@@ -1063,12 +1064,12 @@ class RandomPerspective:
|
|
1063
1064
|
pre_transform (Callable | None): Optional transform to apply before the random perspective.
|
1064
1065
|
|
1065
1066
|
Methods:
|
1066
|
-
affine_transform:
|
1067
|
-
apply_bboxes:
|
1068
|
-
apply_segments:
|
1069
|
-
apply_keypoints:
|
1070
|
-
__call__:
|
1071
|
-
box_candidates:
|
1067
|
+
affine_transform: Apply affine transformations to the input image.
|
1068
|
+
apply_bboxes: Transform bounding boxes using the affine matrix.
|
1069
|
+
apply_segments: Transform segments and generate new bounding boxes.
|
1070
|
+
apply_keypoints: Transform keypoints using the affine matrix.
|
1071
|
+
__call__: Apply the random perspective transformation to images and annotations.
|
1072
|
+
box_candidates: Filter transformed bounding boxes based on size and aspect ratio.
|
1072
1073
|
|
1073
1074
|
Examples:
|
1074
1075
|
>>> transform = RandomPerspective(degrees=10, translate=0.1, scale=0.1, shear=10)
|
@@ -1083,7 +1084,7 @@ class RandomPerspective:
|
|
1083
1084
|
self, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, border=(0, 0), pre_transform=None
|
1084
1085
|
):
|
1085
1086
|
"""
|
1086
|
-
|
1087
|
+
Initialize RandomPerspective object with transformation parameters.
|
1087
1088
|
|
1088
1089
|
This class implements random perspective and affine transformations on images and corresponding bounding boxes,
|
1089
1090
|
segments, and keypoints. Transformations include rotation, translation, scaling, and shearing.
|
@@ -1112,7 +1113,7 @@ class RandomPerspective:
|
|
1112
1113
|
|
1113
1114
|
def affine_transform(self, img, border):
|
1114
1115
|
"""
|
1115
|
-
|
1116
|
+
Apply a sequence of affine transformations centered around the image center.
|
1116
1117
|
|
1117
1118
|
This function performs a series of geometric transformations on the input image, including
|
1118
1119
|
translation, perspective change, rotation, scaling, and shearing. The transformations are
|
@@ -1246,7 +1247,7 @@ class RandomPerspective:
|
|
1246
1247
|
|
1247
1248
|
def apply_keypoints(self, keypoints, M):
|
1248
1249
|
"""
|
1249
|
-
|
1250
|
+
Apply affine transformation to keypoints.
|
1250
1251
|
|
1251
1252
|
This method transforms the input keypoints using the provided affine transformation matrix. It handles
|
1252
1253
|
perspective rescaling if necessary and updates the visibility of keypoints that fall outside the image
|
@@ -1280,7 +1281,7 @@ class RandomPerspective:
|
|
1280
1281
|
|
1281
1282
|
def __call__(self, labels):
|
1282
1283
|
"""
|
1283
|
-
|
1284
|
+
Apply random perspective and affine transformations to an image and its associated labels.
|
1284
1285
|
|
1285
1286
|
This method performs a series of transformations including rotation, translation, scaling, shearing,
|
1286
1287
|
and perspective distortion on the input image and adjusts the corresponding bounding boxes, segments,
|
@@ -1398,7 +1399,7 @@ class RandomPerspective:
|
|
1398
1399
|
|
1399
1400
|
class RandomHSV:
|
1400
1401
|
"""
|
1401
|
-
Randomly
|
1402
|
+
Randomly adjust the Hue, Saturation, and Value (HSV) channels of an image.
|
1402
1403
|
|
1403
1404
|
This class applies random HSV augmentation to images within predefined limits set by hgain, sgain, and vgain.
|
1404
1405
|
|
@@ -1408,7 +1409,7 @@ class RandomHSV:
|
|
1408
1409
|
vgain (float): Maximum variation for value. Range is typically [0, 1].
|
1409
1410
|
|
1410
1411
|
Methods:
|
1411
|
-
__call__:
|
1412
|
+
__call__: Apply random HSV augmentation to an image.
|
1412
1413
|
|
1413
1414
|
Examples:
|
1414
1415
|
>>> import numpy as np
|
@@ -1422,7 +1423,7 @@ class RandomHSV:
|
|
1422
1423
|
|
1423
1424
|
def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:
|
1424
1425
|
"""
|
1425
|
-
|
1426
|
+
Initialize the RandomHSV object for random HSV (Hue, Saturation, Value) augmentation.
|
1426
1427
|
|
1427
1428
|
This class applies random adjustments to the HSV channels of an image within specified limits.
|
1428
1429
|
|
@@ -1441,7 +1442,7 @@ class RandomHSV:
|
|
1441
1442
|
|
1442
1443
|
def __call__(self, labels):
|
1443
1444
|
"""
|
1444
|
-
|
1445
|
+
Apply random HSV augmentation to an image within predefined limits.
|
1445
1446
|
|
1446
1447
|
This method modifies the input image by randomly adjusting its Hue, Saturation, and Value (HSV) channels.
|
1447
1448
|
The adjustments are made within the limits set by hgain, sgain, and vgain during initialization.
|
@@ -1482,7 +1483,7 @@ class RandomHSV:
|
|
1482
1483
|
|
1483
1484
|
class RandomFlip:
|
1484
1485
|
"""
|
1485
|
-
|
1486
|
+
Apply a random horizontal or vertical flip to an image with a given probability.
|
1486
1487
|
|
1487
1488
|
This class performs random image flipping and updates corresponding instance annotations such as
|
1488
1489
|
bounding boxes and keypoints.
|
@@ -1493,7 +1494,7 @@ class RandomFlip:
|
|
1493
1494
|
flip_idx (array-like): Index mapping for flipping keypoints, if applicable.
|
1494
1495
|
|
1495
1496
|
Methods:
|
1496
|
-
__call__:
|
1497
|
+
__call__: Apply the random flip transformation to an image and its annotations.
|
1497
1498
|
|
1498
1499
|
Examples:
|
1499
1500
|
>>> transform = RandomFlip(p=0.5, direction="horizontal")
|
@@ -1504,7 +1505,7 @@ class RandomFlip:
|
|
1504
1505
|
|
1505
1506
|
def __init__(self, p=0.5, direction="horizontal", flip_idx=None) -> None:
|
1506
1507
|
"""
|
1507
|
-
|
1508
|
+
Initialize the RandomFlip class with probability and direction.
|
1508
1509
|
|
1509
1510
|
This class applies a random horizontal or vertical flip to an image with a given probability.
|
1510
1511
|
It also updates any instances (bounding boxes, keypoints, etc.) accordingly.
|
@@ -1530,7 +1531,7 @@ class RandomFlip:
|
|
1530
1531
|
|
1531
1532
|
def __call__(self, labels):
|
1532
1533
|
"""
|
1533
|
-
|
1534
|
+
Apply random flip to an image and update any instances like bounding boxes or keypoints accordingly.
|
1534
1535
|
|
1535
1536
|
This method randomly flips the input image either horizontally or vertically based on the initialized
|
1536
1537
|
probability and direction. It also updates the corresponding instances (bounding boxes, keypoints) to
|
@@ -1634,7 +1635,7 @@ class LetterBox:
|
|
1634
1635
|
|
1635
1636
|
def __call__(self, labels=None, image=None):
|
1636
1637
|
"""
|
1637
|
-
|
1638
|
+
Resize and pad an image for object detection, instance segmentation, or pose estimation tasks.
|
1638
1639
|
|
1639
1640
|
This method applies letterboxing to the input image, which involves resizing the image while maintaining its
|
1640
1641
|
aspect ratio and adding padding to fit the new shape. It also updates any associated labels accordingly.
|
@@ -1711,7 +1712,7 @@ class LetterBox:
|
|
1711
1712
|
@staticmethod
|
1712
1713
|
def _update_labels(labels, ratio, padw, padh):
|
1713
1714
|
"""
|
1714
|
-
|
1715
|
+
Update labels after applying letterboxing to an image.
|
1715
1716
|
|
1716
1717
|
This method modifies the bounding box coordinates of instances in the labels
|
1717
1718
|
to account for resizing and padding applied during letterboxing.
|
@@ -1753,8 +1754,8 @@ class CopyPaste(BaseMixTransform):
|
|
1753
1754
|
p (float): Probability of applying Copy-Paste augmentation.
|
1754
1755
|
|
1755
1756
|
Methods:
|
1756
|
-
_mix_transform:
|
1757
|
-
__call__:
|
1757
|
+
_mix_transform: Apply Copy-Paste augmentation to the input labels.
|
1758
|
+
__call__: Apply the Copy-Paste transformation to images and annotations.
|
1758
1759
|
|
1759
1760
|
Examples:
|
1760
1761
|
>>> from ultralytics.data.augment import CopyPaste
|
@@ -1764,18 +1765,18 @@ class CopyPaste(BaseMixTransform):
|
|
1764
1765
|
"""
|
1765
1766
|
|
1766
1767
|
def __init__(self, dataset=None, pre_transform=None, p=0.5, mode="flip") -> None:
|
1767
|
-
"""
|
1768
|
+
"""Initialize CopyPaste object with dataset, pre_transform, and probability of applying MixUp."""
|
1768
1769
|
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
|
1769
1770
|
assert mode in {"flip", "mixup"}, f"Expected `mode` to be `flip` or `mixup`, but got {mode}."
|
1770
1771
|
self.mode = mode
|
1771
1772
|
|
1772
1773
|
def _mix_transform(self, labels):
|
1773
|
-
"""
|
1774
|
+
"""Apply Copy-Paste augmentation to combine objects from another image into the current image."""
|
1774
1775
|
labels2 = labels["mix_labels"][0]
|
1775
1776
|
return self._transform(labels, labels2)
|
1776
1777
|
|
1777
1778
|
def __call__(self, labels):
|
1778
|
-
"""
|
1779
|
+
"""Apply Copy-Paste augmentation to an image and its labels."""
|
1779
1780
|
if len(labels["instances"].segments) == 0 or self.p == 0:
|
1780
1781
|
return labels
|
1781
1782
|
if self.mode == "flip":
|
@@ -1802,7 +1803,7 @@ class CopyPaste(BaseMixTransform):
|
|
1802
1803
|
return labels
|
1803
1804
|
|
1804
1805
|
def _transform(self, labels1, labels2={}):
|
1805
|
-
"""
|
1806
|
+
"""Apply Copy-Paste augmentation to combine objects from another image into the current image."""
|
1806
1807
|
im = labels1["img"]
|
1807
1808
|
cls = labels1["cls"]
|
1808
1809
|
h, w = im.shape[:2]
|
@@ -1851,7 +1852,7 @@ class Albumentations:
|
|
1851
1852
|
contains_spatial (bool): Indicates if the transforms include spatial operations.
|
1852
1853
|
|
1853
1854
|
Methods:
|
1854
|
-
__call__:
|
1855
|
+
__call__: Apply the Albumentations transformations to the input labels.
|
1855
1856
|
|
1856
1857
|
Examples:
|
1857
1858
|
>>> transform = Albumentations(p=0.5)
|
@@ -1979,7 +1980,7 @@ class Albumentations:
|
|
1979
1980
|
|
1980
1981
|
def __call__(self, labels):
|
1981
1982
|
"""
|
1982
|
-
|
1983
|
+
Apply Albumentations transformations to input labels.
|
1983
1984
|
|
1984
1985
|
This method applies a series of image augmentations using the Albumentations library. It can perform both
|
1985
1986
|
spatial and non-spatial transformations on the input image and its corresponding labels.
|
@@ -2052,9 +2053,9 @@ class Format:
|
|
2052
2053
|
bgr (float): The probability to return BGR images.
|
2053
2054
|
|
2054
2055
|
Methods:
|
2055
|
-
__call__:
|
2056
|
-
_format_img:
|
2057
|
-
_format_segments:
|
2056
|
+
__call__: Format labels dictionary with image, classes, bounding boxes, and optionally masks and keypoints.
|
2057
|
+
_format_img: Convert image from Numpy array to PyTorch tensor.
|
2058
|
+
_format_segments: Convert polygon points to bitmap masks.
|
2058
2059
|
|
2059
2060
|
Examples:
|
2060
2061
|
>>> formatter = Format(bbox_format="xywh", normalize=True, return_mask=True)
|
@@ -2077,7 +2078,7 @@ class Format:
|
|
2077
2078
|
bgr=0.0,
|
2078
2079
|
):
|
2079
2080
|
"""
|
2080
|
-
|
2081
|
+
Initialize the Format class with given parameters for image and instance annotation formatting.
|
2081
2082
|
|
2082
2083
|
This class standardizes image and instance annotations for object detection, instance segmentation, and pose
|
2083
2084
|
estimation tasks, preparing them for use in PyTorch DataLoader's `collate_fn`.
|
@@ -2121,7 +2122,7 @@ class Format:
|
|
2121
2122
|
|
2122
2123
|
def __call__(self, labels):
|
2123
2124
|
"""
|
2124
|
-
|
2125
|
+
Format image annotations for object detection, instance segmentation, and pose estimation tasks.
|
2125
2126
|
|
2126
2127
|
This method standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch
|
2127
2128
|
DataLoader. It processes the input labels dictionary, converting annotations to the specified format and
|
@@ -2188,7 +2189,7 @@ class Format:
|
|
2188
2189
|
|
2189
2190
|
def _format_img(self, img):
|
2190
2191
|
"""
|
2191
|
-
|
2192
|
+
Format an image for YOLO from a Numpy array to a PyTorch tensor.
|
2192
2193
|
|
2193
2194
|
This function performs the following operations:
|
2194
2195
|
1. Ensures the image has 3 dimensions (adds a channel dimension if needed).
|
@@ -2219,7 +2220,7 @@ class Format:
|
|
2219
2220
|
|
2220
2221
|
def _format_segments(self, instances, cls, w, h):
|
2221
2222
|
"""
|
2222
|
-
|
2223
|
+
Convert polygon segments to bitmap masks.
|
2223
2224
|
|
2224
2225
|
Args:
|
2225
2226
|
instances (Instances): Object containing segment information.
|
@@ -2250,7 +2251,7 @@ class Format:
|
|
2250
2251
|
|
2251
2252
|
|
2252
2253
|
class LoadVisualPrompt:
|
2253
|
-
"""
|
2254
|
+
"""Create visual prompts from bounding boxes or masks for model input."""
|
2254
2255
|
|
2255
2256
|
def __init__(self, scale_factor=1 / 8):
|
2256
2257
|
"""
|
@@ -2307,8 +2308,8 @@ class LoadVisualPrompt:
|
|
2307
2308
|
Args:
|
2308
2309
|
category (int | np.ndarray | torch.Tensor): The category labels for the objects.
|
2309
2310
|
shape (tuple): The shape of the image (height, width).
|
2310
|
-
bboxes (np.ndarray | torch.Tensor, optional): Bounding boxes for the objects, xyxy format.
|
2311
|
-
masks (np.ndarray | torch.Tensor, optional): Masks for the objects.
|
2311
|
+
bboxes (np.ndarray | torch.Tensor, optional): Bounding boxes for the objects, xyxy format.
|
2312
|
+
masks (np.ndarray | torch.Tensor, optional): Masks for the objects.
|
2312
2313
|
|
2313
2314
|
Returns:
|
2314
2315
|
(torch.Tensor): A tensor containing the visual masks for each category.
|
@@ -2344,7 +2345,7 @@ class LoadVisualPrompt:
|
|
2344
2345
|
|
2345
2346
|
class RandomLoadText:
|
2346
2347
|
"""
|
2347
|
-
Randomly
|
2348
|
+
Randomly sample positive and negative texts and update class indices accordingly.
|
2348
2349
|
|
2349
2350
|
This class is responsible for sampling texts from a given set of class texts, including both positive
|
2350
2351
|
(present in the image) and negative (not present in the image) samples. It updates the class indices
|
@@ -2358,7 +2359,7 @@ class RandomLoadText:
|
|
2358
2359
|
padding_value (str): The text used for padding when padding is True.
|
2359
2360
|
|
2360
2361
|
Methods:
|
2361
|
-
__call__:
|
2362
|
+
__call__: Process the input labels and return updated classes and texts.
|
2362
2363
|
|
2363
2364
|
Examples:
|
2364
2365
|
>>> loader = RandomLoadText(prompt_format="Object: {}", neg_samples=(5, 10), max_samples=20)
|
@@ -2377,21 +2378,21 @@ class RandomLoadText:
|
|
2377
2378
|
padding_value: List[str] = [""],
|
2378
2379
|
) -> None:
|
2379
2380
|
"""
|
2380
|
-
|
2381
|
+
Initialize the RandomLoadText class for randomly sampling positive and negative texts.
|
2381
2382
|
|
2382
2383
|
This class is designed to randomly sample positive texts and negative texts, and update the class
|
2383
2384
|
indices accordingly to the number of samples. It can be used for text-based object detection tasks.
|
2384
2385
|
|
2385
2386
|
Args:
|
2386
|
-
prompt_format (str): Format string for the prompt.
|
2387
|
+
prompt_format (str): Format string for the prompt. The format string should
|
2387
2388
|
contain a single pair of curly braces {} where the text will be inserted.
|
2388
2389
|
neg_samples (Tuple[int, int]): A range to randomly sample negative texts. The first integer
|
2389
2390
|
specifies the minimum number of negative samples, and the second integer specifies the
|
2390
|
-
maximum.
|
2391
|
-
max_samples (int): The maximum number of different text samples in one image.
|
2391
|
+
maximum.
|
2392
|
+
max_samples (int): The maximum number of different text samples in one image.
|
2392
2393
|
padding (bool): Whether to pad texts to max_samples. If True, the number of texts will always
|
2393
|
-
be equal to max_samples.
|
2394
|
-
padding_value (str): The padding text to use when padding is True.
|
2394
|
+
be equal to max_samples.
|
2395
|
+
padding_value (str): The padding text to use when padding is True.
|
2395
2396
|
|
2396
2397
|
Attributes:
|
2397
2398
|
prompt_format (str): The format string for the prompt.
|
@@ -2417,7 +2418,7 @@ class RandomLoadText:
|
|
2417
2418
|
|
2418
2419
|
def __call__(self, labels: dict) -> dict:
|
2419
2420
|
"""
|
2420
|
-
Randomly
|
2421
|
+
Randomly sample positive and negative texts and update class indices accordingly.
|
2421
2422
|
|
2422
2423
|
This method samples positive texts based on the existing class labels in the image, and randomly
|
2423
2424
|
selects negative texts from the remaining classes. It then updates the class indices to match the
|
@@ -2483,7 +2484,7 @@ class RandomLoadText:
|
|
2483
2484
|
|
2484
2485
|
def v8_transforms(dataset, imgsz, hyp, stretch=False):
|
2485
2486
|
"""
|
2486
|
-
|
2487
|
+
Apply a series of image transformations for training.
|
2487
2488
|
|
2488
2489
|
This function creates a composition of image augmentation techniques to prepare images for YOLO training.
|
2489
2490
|
It includes operations such as mosaic, copy-paste, random perspective, mixup, and various color adjustments.
|
@@ -2558,7 +2559,7 @@ def classify_transforms(
|
|
2558
2559
|
crop_fraction=None,
|
2559
2560
|
):
|
2560
2561
|
"""
|
2561
|
-
|
2562
|
+
Create a composition of image transforms for classification tasks.
|
2562
2563
|
|
2563
2564
|
This function generates a sequence of torchvision transforms suitable for preprocessing images
|
2564
2565
|
for classification models during evaluation or inference. The transforms include resizing,
|
@@ -2618,7 +2619,7 @@ def classify_augmentations(
|
|
2618
2619
|
interpolation="BILINEAR",
|
2619
2620
|
):
|
2620
2621
|
"""
|
2621
|
-
|
2622
|
+
Create a composition of image augmentation transforms for classification tasks.
|
2622
2623
|
|
2623
2624
|
This function generates a set of image transformations suitable for training classification models. It includes
|
2624
2625
|
options for resizing, flipping, color jittering, auto augmentation, and random erasing.
|
@@ -2719,7 +2720,7 @@ class ClassifyLetterBox:
|
|
2719
2720
|
stride (int): The stride value, used when 'auto' is True.
|
2720
2721
|
|
2721
2722
|
Methods:
|
2722
|
-
__call__:
|
2723
|
+
__call__: Apply the letterbox transformation to an input image.
|
2723
2724
|
|
2724
2725
|
Examples:
|
2725
2726
|
>>> transform = ClassifyLetterBox(size=(640, 640), auto=False, stride=32)
|
@@ -2731,7 +2732,7 @@ class ClassifyLetterBox:
|
|
2731
2732
|
|
2732
2733
|
def __init__(self, size=(640, 640), auto=False, stride=32):
|
2733
2734
|
"""
|
2734
|
-
|
2735
|
+
Initialize the ClassifyLetterBox object for image preprocessing.
|
2735
2736
|
|
2736
2737
|
This class is designed to be part of a transformation pipeline for image classification tasks. It resizes and
|
2737
2738
|
pads images to a specified size while maintaining the original aspect ratio.
|
@@ -2739,8 +2740,8 @@ class ClassifyLetterBox:
|
|
2739
2740
|
Args:
|
2740
2741
|
size (int | Tuple[int, int]): Target size for the letterboxed image. If an int, a square image of
|
2741
2742
|
(size, size) is created. If a tuple, it should be (height, width).
|
2742
|
-
auto (bool): If True, automatically calculates the short side based on stride.
|
2743
|
-
stride (int): The stride value, used when 'auto' is True.
|
2743
|
+
auto (bool): If True, automatically calculates the short side based on stride.
|
2744
|
+
stride (int): The stride value, used when 'auto' is True.
|
2744
2745
|
|
2745
2746
|
Attributes:
|
2746
2747
|
h (int): Target height of the letterboxed image.
|
@@ -2762,7 +2763,7 @@ class ClassifyLetterBox:
|
|
2762
2763
|
|
2763
2764
|
def __call__(self, im):
|
2764
2765
|
"""
|
2765
|
-
|
2766
|
+
Resize and pad an image using the letterbox method.
|
2766
2767
|
|
2767
2768
|
This method resizes the input image to fit within the specified dimensions while maintaining its aspect ratio,
|
2768
2769
|
then pads the resized image to match the target size.
|
@@ -2798,7 +2799,7 @@ class ClassifyLetterBox:
|
|
2798
2799
|
# NOTE: keep this class for backward compatibility
|
2799
2800
|
class CenterCrop:
|
2800
2801
|
"""
|
2801
|
-
|
2802
|
+
Apply center cropping to images for classification tasks.
|
2802
2803
|
|
2803
2804
|
This class performs center cropping on input images, resizing them to a specified size while maintaining the aspect
|
2804
2805
|
ratio. It is designed to be part of a transformation pipeline, e.g., T.Compose([CenterCrop(size), ToTensor()]).
|
@@ -2808,7 +2809,7 @@ class CenterCrop:
|
|
2808
2809
|
w (int): Target width of the cropped image.
|
2809
2810
|
|
2810
2811
|
Methods:
|
2811
|
-
__call__:
|
2812
|
+
__call__: Apply the center crop transformation to an input image.
|
2812
2813
|
|
2813
2814
|
Examples:
|
2814
2815
|
>>> transform = CenterCrop(640)
|
@@ -2820,7 +2821,7 @@ class CenterCrop:
|
|
2820
2821
|
|
2821
2822
|
def __init__(self, size=640):
|
2822
2823
|
"""
|
2823
|
-
|
2824
|
+
Initialize the CenterCrop object for image preprocessing.
|
2824
2825
|
|
2825
2826
|
This class is designed to be part of a transformation pipeline, e.g., T.Compose([CenterCrop(size), ToTensor()]).
|
2826
2827
|
It performs a center crop on input images to a specified size.
|
@@ -2844,7 +2845,7 @@ class CenterCrop:
|
|
2844
2845
|
|
2845
2846
|
def __call__(self, im):
|
2846
2847
|
"""
|
2847
|
-
|
2848
|
+
Apply center cropping to an input image.
|
2848
2849
|
|
2849
2850
|
This method resizes and crops the center of the image using a letterbox method. It maintains the aspect
|
2850
2851
|
ratio of the original image while fitting it into the specified dimensions.
|
@@ -2873,7 +2874,7 @@ class CenterCrop:
|
|
2873
2874
|
# NOTE: keep this class for backward compatibility
|
2874
2875
|
class ToTensor:
|
2875
2876
|
"""
|
2876
|
-
|
2877
|
+
Convert an image from a numpy array to a PyTorch tensor.
|
2877
2878
|
|
2878
2879
|
This class is designed to be part of a transformation pipeline, e.g., T.Compose([LetterBox(size), ToTensor()]).
|
2879
2880
|
|
@@ -2881,7 +2882,7 @@ class ToTensor:
|
|
2881
2882
|
half (bool): If True, converts the image to half precision (float16).
|
2882
2883
|
|
2883
2884
|
Methods:
|
2884
|
-
__call__:
|
2885
|
+
__call__: Apply the tensor conversion to an input image.
|
2885
2886
|
|
2886
2887
|
Examples:
|
2887
2888
|
>>> transform = ToTensor(half=True)
|
@@ -2897,14 +2898,14 @@ class ToTensor:
|
|
2897
2898
|
|
2898
2899
|
def __init__(self, half=False):
|
2899
2900
|
"""
|
2900
|
-
|
2901
|
+
Initialize the ToTensor object for converting images to PyTorch tensors.
|
2901
2902
|
|
2902
2903
|
This class is designed to be used as part of a transformation pipeline for image preprocessing in the
|
2903
2904
|
Ultralytics YOLO framework. It converts numpy arrays or PIL Images to PyTorch tensors, with an option
|
2904
2905
|
for half-precision (float16) conversion.
|
2905
2906
|
|
2906
2907
|
Args:
|
2907
|
-
half (bool): If True, converts the tensor to half precision (float16).
|
2908
|
+
half (bool): If True, converts the tensor to half precision (float16).
|
2908
2909
|
|
2909
2910
|
Examples:
|
2910
2911
|
>>> transform = ToTensor(half=True)
|
@@ -2918,7 +2919,7 @@ class ToTensor:
|
|
2918
2919
|
|
2919
2920
|
def __call__(self, im):
|
2920
2921
|
"""
|
2921
|
-
|
2922
|
+
Transform an image from a numpy array to a PyTorch tensor.
|
2922
2923
|
|
2923
2924
|
This method converts the input image from a numpy array to a PyTorch tensor, applying optional
|
2924
2925
|
half-precision conversion and normalization. The image is transposed from HWC to CHW format and
|