dgenerate-ultralytics-headless 8.3.222__py3-none-any.whl → 8.3.225__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/METADATA +2 -2
  2. dgenerate_ultralytics_headless-8.3.225.dist-info/RECORD +286 -0
  3. tests/conftest.py +5 -8
  4. tests/test_cli.py +1 -8
  5. tests/test_python.py +1 -2
  6. ultralytics/__init__.py +1 -1
  7. ultralytics/cfg/__init__.py +34 -49
  8. ultralytics/cfg/datasets/ImageNet.yaml +1 -1
  9. ultralytics/cfg/datasets/kitti.yaml +27 -0
  10. ultralytics/cfg/datasets/lvis.yaml +5 -5
  11. ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
  12. ultralytics/data/annotator.py +3 -4
  13. ultralytics/data/augment.py +244 -323
  14. ultralytics/data/base.py +12 -22
  15. ultralytics/data/build.py +47 -40
  16. ultralytics/data/converter.py +32 -42
  17. ultralytics/data/dataset.py +43 -71
  18. ultralytics/data/loaders.py +22 -34
  19. ultralytics/data/split.py +5 -6
  20. ultralytics/data/split_dota.py +8 -15
  21. ultralytics/data/utils.py +27 -36
  22. ultralytics/engine/exporter.py +49 -116
  23. ultralytics/engine/model.py +144 -180
  24. ultralytics/engine/predictor.py +18 -29
  25. ultralytics/engine/results.py +165 -231
  26. ultralytics/engine/trainer.py +11 -19
  27. ultralytics/engine/tuner.py +13 -23
  28. ultralytics/engine/validator.py +6 -10
  29. ultralytics/hub/__init__.py +7 -12
  30. ultralytics/hub/auth.py +6 -12
  31. ultralytics/hub/google/__init__.py +7 -10
  32. ultralytics/hub/session.py +15 -25
  33. ultralytics/hub/utils.py +3 -6
  34. ultralytics/models/fastsam/model.py +6 -8
  35. ultralytics/models/fastsam/predict.py +5 -10
  36. ultralytics/models/fastsam/utils.py +1 -2
  37. ultralytics/models/fastsam/val.py +2 -4
  38. ultralytics/models/nas/model.py +5 -8
  39. ultralytics/models/nas/predict.py +7 -9
  40. ultralytics/models/nas/val.py +1 -2
  41. ultralytics/models/rtdetr/model.py +5 -8
  42. ultralytics/models/rtdetr/predict.py +15 -18
  43. ultralytics/models/rtdetr/train.py +10 -13
  44. ultralytics/models/rtdetr/val.py +13 -20
  45. ultralytics/models/sam/amg.py +12 -18
  46. ultralytics/models/sam/build.py +6 -9
  47. ultralytics/models/sam/model.py +16 -23
  48. ultralytics/models/sam/modules/blocks.py +62 -84
  49. ultralytics/models/sam/modules/decoders.py +17 -24
  50. ultralytics/models/sam/modules/encoders.py +40 -56
  51. ultralytics/models/sam/modules/memory_attention.py +10 -16
  52. ultralytics/models/sam/modules/sam.py +41 -47
  53. ultralytics/models/sam/modules/tiny_encoder.py +64 -83
  54. ultralytics/models/sam/modules/transformer.py +17 -27
  55. ultralytics/models/sam/modules/utils.py +31 -42
  56. ultralytics/models/sam/predict.py +172 -209
  57. ultralytics/models/utils/loss.py +14 -26
  58. ultralytics/models/utils/ops.py +13 -17
  59. ultralytics/models/yolo/classify/predict.py +8 -11
  60. ultralytics/models/yolo/classify/train.py +8 -16
  61. ultralytics/models/yolo/classify/val.py +13 -20
  62. ultralytics/models/yolo/detect/predict.py +4 -8
  63. ultralytics/models/yolo/detect/train.py +11 -20
  64. ultralytics/models/yolo/detect/val.py +38 -48
  65. ultralytics/models/yolo/model.py +35 -47
  66. ultralytics/models/yolo/obb/predict.py +5 -8
  67. ultralytics/models/yolo/obb/train.py +11 -14
  68. ultralytics/models/yolo/obb/val.py +20 -28
  69. ultralytics/models/yolo/pose/predict.py +5 -8
  70. ultralytics/models/yolo/pose/train.py +4 -8
  71. ultralytics/models/yolo/pose/val.py +31 -39
  72. ultralytics/models/yolo/segment/predict.py +9 -14
  73. ultralytics/models/yolo/segment/train.py +3 -6
  74. ultralytics/models/yolo/segment/val.py +16 -26
  75. ultralytics/models/yolo/world/train.py +8 -14
  76. ultralytics/models/yolo/world/train_world.py +11 -16
  77. ultralytics/models/yolo/yoloe/predict.py +16 -23
  78. ultralytics/models/yolo/yoloe/train.py +30 -43
  79. ultralytics/models/yolo/yoloe/train_seg.py +5 -10
  80. ultralytics/models/yolo/yoloe/val.py +15 -20
  81. ultralytics/nn/autobackend.py +10 -18
  82. ultralytics/nn/modules/activation.py +4 -6
  83. ultralytics/nn/modules/block.py +99 -185
  84. ultralytics/nn/modules/conv.py +45 -90
  85. ultralytics/nn/modules/head.py +44 -98
  86. ultralytics/nn/modules/transformer.py +44 -76
  87. ultralytics/nn/modules/utils.py +14 -19
  88. ultralytics/nn/tasks.py +86 -146
  89. ultralytics/nn/text_model.py +25 -40
  90. ultralytics/solutions/ai_gym.py +10 -16
  91. ultralytics/solutions/analytics.py +7 -10
  92. ultralytics/solutions/config.py +4 -5
  93. ultralytics/solutions/distance_calculation.py +9 -12
  94. ultralytics/solutions/heatmap.py +7 -13
  95. ultralytics/solutions/instance_segmentation.py +5 -8
  96. ultralytics/solutions/object_blurrer.py +7 -10
  97. ultralytics/solutions/object_counter.py +8 -12
  98. ultralytics/solutions/object_cropper.py +5 -8
  99. ultralytics/solutions/parking_management.py +12 -14
  100. ultralytics/solutions/queue_management.py +4 -6
  101. ultralytics/solutions/region_counter.py +7 -10
  102. ultralytics/solutions/security_alarm.py +14 -19
  103. ultralytics/solutions/similarity_search.py +7 -12
  104. ultralytics/solutions/solutions.py +31 -53
  105. ultralytics/solutions/speed_estimation.py +6 -9
  106. ultralytics/solutions/streamlit_inference.py +2 -4
  107. ultralytics/solutions/trackzone.py +7 -10
  108. ultralytics/solutions/vision_eye.py +5 -8
  109. ultralytics/trackers/basetrack.py +2 -4
  110. ultralytics/trackers/bot_sort.py +6 -11
  111. ultralytics/trackers/byte_tracker.py +10 -15
  112. ultralytics/trackers/track.py +3 -6
  113. ultralytics/trackers/utils/gmc.py +6 -12
  114. ultralytics/trackers/utils/kalman_filter.py +35 -43
  115. ultralytics/trackers/utils/matching.py +6 -10
  116. ultralytics/utils/__init__.py +61 -100
  117. ultralytics/utils/autobatch.py +2 -4
  118. ultralytics/utils/autodevice.py +11 -13
  119. ultralytics/utils/benchmarks.py +25 -35
  120. ultralytics/utils/callbacks/base.py +8 -10
  121. ultralytics/utils/callbacks/clearml.py +2 -4
  122. ultralytics/utils/callbacks/comet.py +30 -44
  123. ultralytics/utils/callbacks/dvc.py +13 -18
  124. ultralytics/utils/callbacks/mlflow.py +4 -5
  125. ultralytics/utils/callbacks/neptune.py +4 -6
  126. ultralytics/utils/callbacks/raytune.py +3 -4
  127. ultralytics/utils/callbacks/tensorboard.py +4 -6
  128. ultralytics/utils/callbacks/wb.py +10 -13
  129. ultralytics/utils/checks.py +29 -56
  130. ultralytics/utils/cpu.py +1 -2
  131. ultralytics/utils/dist.py +8 -12
  132. ultralytics/utils/downloads.py +17 -27
  133. ultralytics/utils/errors.py +6 -8
  134. ultralytics/utils/events.py +2 -4
  135. ultralytics/utils/export/__init__.py +4 -239
  136. ultralytics/utils/export/engine.py +237 -0
  137. ultralytics/utils/export/imx.py +11 -17
  138. ultralytics/utils/export/tensorflow.py +217 -0
  139. ultralytics/utils/files.py +10 -15
  140. ultralytics/utils/git.py +5 -7
  141. ultralytics/utils/instance.py +30 -51
  142. ultralytics/utils/logger.py +11 -15
  143. ultralytics/utils/loss.py +8 -14
  144. ultralytics/utils/metrics.py +98 -138
  145. ultralytics/utils/nms.py +13 -16
  146. ultralytics/utils/ops.py +47 -74
  147. ultralytics/utils/patches.py +11 -18
  148. ultralytics/utils/plotting.py +29 -42
  149. ultralytics/utils/tal.py +25 -39
  150. ultralytics/utils/torch_utils.py +45 -73
  151. ultralytics/utils/tqdm.py +6 -8
  152. ultralytics/utils/triton.py +9 -12
  153. ultralytics/utils/tuner.py +1 -2
  154. dgenerate_ultralytics_headless-8.3.222.dist-info/RECORD +0 -283
  155. {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/WHEEL +0 -0
  156. {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/entry_points.txt +0 -0
  157. {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/licenses/LICENSE +0 -0
  158. {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/top_level.txt +0 -0
@@ -26,11 +26,10 @@ DEFAULT_STD = (1.0, 1.0, 1.0)
26
26
 
27
27
 
28
28
  class BaseTransform:
29
- """
30
- Base class for image transformations in the Ultralytics library.
29
+ """Base class for image transformations in the Ultralytics library.
31
30
 
32
- This class serves as a foundation for implementing various image processing operations, designed to be
33
- compatible with both classification and semantic segmentation tasks.
31
+ This class serves as a foundation for implementing various image processing operations, designed to be compatible
32
+ with both classification and semantic segmentation tasks.
34
33
 
35
34
  Methods:
36
35
  apply_image: Apply image transformations to labels.
@@ -45,11 +44,10 @@ class BaseTransform:
45
44
  """
46
45
 
47
46
  def __init__(self) -> None:
48
- """
49
- Initialize the BaseTransform object.
47
+ """Initialize the BaseTransform object.
50
48
 
51
- This constructor sets up the base transformation object, which can be extended for specific image
52
- processing tasks. It is designed to be compatible with both classification and semantic segmentation.
49
+ This constructor sets up the base transformation object, which can be extended for specific image processing
50
+ tasks. It is designed to be compatible with both classification and semantic segmentation.
53
51
 
54
52
  Examples:
55
53
  >>> transform = BaseTransform()
@@ -57,15 +55,14 @@ class BaseTransform:
57
55
  pass
58
56
 
59
57
  def apply_image(self, labels):
60
- """
61
- Apply image transformations to labels.
58
+ """Apply image transformations to labels.
62
59
 
63
60
  This method is intended to be overridden by subclasses to implement specific image transformation
64
61
  logic. In its base form, it returns the input labels unchanged.
65
62
 
66
63
  Args:
67
- labels (Any): The input labels to be transformed. The exact type and structure of labels may
68
- vary depending on the specific implementation.
64
+ labels (Any): The input labels to be transformed. The exact type and structure of labels may vary depending
65
+ on the specific implementation.
69
66
 
70
67
  Returns:
71
68
  (Any): The transformed labels. In the base implementation, this is identical to the input.
@@ -80,8 +77,7 @@ class BaseTransform:
80
77
  pass
81
78
 
82
79
  def apply_instances(self, labels):
83
- """
84
- Apply transformations to object instances in labels.
80
+ """Apply transformations to object instances in labels.
85
81
 
86
82
  This method is responsible for applying various transformations to object instances within the given
87
83
  labels. It is designed to be overridden by subclasses to implement specific instance transformation
@@ -101,8 +97,7 @@ class BaseTransform:
101
97
  pass
102
98
 
103
99
  def apply_semantic(self, labels):
104
- """
105
- Apply semantic segmentation transformations to an image.
100
+ """Apply semantic segmentation transformations to an image.
106
101
 
107
102
  This method is intended to be overridden by subclasses to implement specific semantic segmentation
108
103
  transformations. In its base form, it does not perform any operations.
@@ -121,16 +116,15 @@ class BaseTransform:
121
116
  pass
122
117
 
123
118
  def __call__(self, labels):
124
- """
125
- Apply all label transformations to an image, instances, and semantic masks.
119
+ """Apply all label transformations to an image, instances, and semantic masks.
126
120
 
127
- This method orchestrates the application of various transformations defined in the BaseTransform class
128
- to the input labels. It sequentially calls the apply_image and apply_instances methods to process the
129
- image and object instances, respectively.
121
+ This method orchestrates the application of various transformations defined in the BaseTransform class to the
122
+ input labels. It sequentially calls the apply_image and apply_instances methods to process the image and object
123
+ instances, respectively.
130
124
 
131
125
  Args:
132
- labels (dict): A dictionary containing image data and annotations. Expected keys include 'img' for
133
- the image data, and 'instances' for object instances.
126
+ labels (dict): A dictionary containing image data and annotations. Expected keys include 'img' for the image
127
+ data, and 'instances' for object instances.
134
128
 
135
129
  Returns:
136
130
  (dict): The input labels dictionary with transformed image and instances.
@@ -146,8 +140,7 @@ class BaseTransform:
146
140
 
147
141
 
148
142
  class Compose:
149
- """
150
- A class for composing multiple image transformations.
143
+ """A class for composing multiple image transformations.
151
144
 
152
145
  Attributes:
153
146
  transforms (list[Callable]): A list of transformation functions to be applied sequentially.
@@ -169,8 +162,7 @@ class Compose:
169
162
  """
170
163
 
171
164
  def __init__(self, transforms):
172
- """
173
- Initialize the Compose object with a list of transforms.
165
+ """Initialize the Compose object with a list of transforms.
174
166
 
175
167
  Args:
176
168
  transforms (list[Callable]): A list of callable transform objects to be applied sequentially.
@@ -183,14 +175,13 @@ class Compose:
183
175
  self.transforms = transforms if isinstance(transforms, list) else [transforms]
184
176
 
185
177
  def __call__(self, data):
186
- """
187
- Apply a series of transformations to input data.
178
+ """Apply a series of transformations to input data.
188
179
 
189
180
  This method sequentially applies each transformation in the Compose object's transforms to the input data.
190
181
 
191
182
  Args:
192
- data (Any): The input data to be transformed. This can be of any type, depending on the
193
- transformations in the list.
183
+ data (Any): The input data to be transformed. This can be of any type, depending on the transformations in
184
+ the list.
194
185
 
195
186
  Returns:
196
187
  (Any): The transformed data after applying all transformations in sequence.
@@ -205,8 +196,7 @@ class Compose:
205
196
  return data
206
197
 
207
198
  def append(self, transform):
208
- """
209
- Append a new transform to the existing list of transforms.
199
+ """Append a new transform to the existing list of transforms.
210
200
 
211
201
  Args:
212
202
  transform (BaseTransform): The transformation to be added to the composition.
@@ -218,8 +208,7 @@ class Compose:
218
208
  self.transforms.append(transform)
219
209
 
220
210
  def insert(self, index, transform):
221
- """
222
- Insert a new transform at a specified index in the existing list of transforms.
211
+ """Insert a new transform at a specified index in the existing list of transforms.
223
212
 
224
213
  Args:
225
214
  index (int): The index at which to insert the new transform.
@@ -234,8 +223,7 @@ class Compose:
234
223
  self.transforms.insert(index, transform)
235
224
 
236
225
  def __getitem__(self, index: list | int) -> Compose:
237
- """
238
- Retrieve a specific transform or a set of transforms using indexing.
226
+ """Retrieve a specific transform or a set of transforms using indexing.
239
227
 
240
228
  Args:
241
229
  index (int | list[int]): Index or list of indices of the transforms to retrieve.
@@ -256,8 +244,7 @@ class Compose:
256
244
  return Compose([self.transforms[i] for i in index]) if isinstance(index, list) else self.transforms[index]
257
245
 
258
246
  def __setitem__(self, index: list | int, value: list | int) -> None:
259
- """
260
- Set one or more transforms in the composition using indexing.
247
+ """Set one or more transforms in the composition using indexing.
261
248
 
262
249
  Args:
263
250
  index (int | list[int]): Index or list of indices to set transforms at.
@@ -283,8 +270,7 @@ class Compose:
283
270
  self.transforms[i] = v
284
271
 
285
272
  def tolist(self):
286
- """
287
- Convert the list of transforms to a standard Python list.
273
+ """Convert the list of transforms to a standard Python list.
288
274
 
289
275
  Returns:
290
276
  (list): A list containing all the transform objects in the Compose instance.
@@ -299,8 +285,7 @@ class Compose:
299
285
  return self.transforms
300
286
 
301
287
  def __repr__(self):
302
- """
303
- Return a string representation of the Compose object.
288
+ """Return a string representation of the Compose object.
304
289
 
305
290
  Returns:
306
291
  (str): A string representation of the Compose object, including the list of transforms.
@@ -318,11 +303,10 @@ class Compose:
318
303
 
319
304
 
320
305
  class BaseMixTransform:
321
- """
322
- Base class for mix transformations like Cutmix, MixUp and Mosaic.
306
+ """Base class for mix transformations like Cutmix, MixUp and Mosaic.
323
307
 
324
- This class provides a foundation for implementing mix transformations on datasets. It handles the
325
- probability-based application of transforms and manages the mixing of multiple images and labels.
308
+ This class provides a foundation for implementing mix transformations on datasets. It handles the probability-based
309
+ application of transforms and manages the mixing of multiple images and labels.
326
310
 
327
311
  Attributes:
328
312
  dataset (Any): The dataset object containing images and labels.
@@ -349,8 +333,7 @@ class BaseMixTransform:
349
333
  """
350
334
 
351
335
  def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
352
- """
353
- Initialize the BaseMixTransform object for mix transformations like CutMix, MixUp and Mosaic.
336
+ """Initialize the BaseMixTransform object for mix transformations like CutMix, MixUp and Mosaic.
354
337
 
355
338
  This class serves as a base for implementing mix transformations in image processing pipelines.
356
339
 
@@ -369,11 +352,10 @@ class BaseMixTransform:
369
352
  self.p = p
370
353
 
371
354
  def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
372
- """
373
- Apply pre-processing transforms and cutmix/mixup/mosaic transforms to labels data.
355
+ """Apply pre-processing transforms and cutmix/mixup/mosaic transforms to labels data.
374
356
 
375
- This method determines whether to apply the mix transform based on a probability factor. If applied, it
376
- selects additional images, applies pre-transforms if specified, and then performs the mix transform.
357
+ This method determines whether to apply the mix transform based on a probability factor. If applied, it selects
358
+ additional images, applies pre-transforms if specified, and then performs the mix transform.
377
359
 
378
360
  Args:
379
361
  labels (dict[str, Any]): A dictionary containing label data for an image.
@@ -409,8 +391,7 @@ class BaseMixTransform:
409
391
  return labels
410
392
 
411
393
  def _mix_transform(self, labels: dict[str, Any]):
412
- """
413
- Apply CutMix, MixUp or Mosaic augmentation to the label dictionary.
394
+ """Apply CutMix, MixUp or Mosaic augmentation to the label dictionary.
414
395
 
415
396
  This method should be implemented by subclasses to perform specific mix transformations like CutMix, MixUp or
416
397
  Mosaic. It modifies the input label dictionary in-place with the augmented data.
@@ -430,8 +411,7 @@ class BaseMixTransform:
430
411
  raise NotImplementedError
431
412
 
432
413
  def get_indexes(self):
433
- """
434
- Get a list of shuffled indexes for mosaic augmentation.
414
+ """Get a list of shuffled indexes for mosaic augmentation.
435
415
 
436
416
  Returns:
437
417
  (list[int]): A list of shuffled indexes from the dataset.
@@ -445,15 +425,14 @@ class BaseMixTransform:
445
425
 
446
426
  @staticmethod
447
427
  def _update_label_text(labels: dict[str, Any]) -> dict[str, Any]:
448
- """
449
- Update label text and class IDs for mixed labels in image augmentation.
428
+ """Update label text and class IDs for mixed labels in image augmentation.
450
429
 
451
- This method processes the 'texts' and 'cls' fields of the input labels dictionary and any mixed labels,
452
- creating a unified set of text labels and updating class IDs accordingly.
430
+ This method processes the 'texts' and 'cls' fields of the input labels dictionary and any mixed labels, creating
431
+ a unified set of text labels and updating class IDs accordingly.
453
432
 
454
433
  Args:
455
- labels (dict[str, Any]): A dictionary containing label information, including 'texts' and 'cls' fields,
456
- and optionally a 'mix_labels' field with additional label dictionaries.
434
+ labels (dict[str, Any]): A dictionary containing label information, including 'texts' and 'cls' fields, and
435
+ optionally a 'mix_labels' field with additional label dictionaries.
457
436
 
458
437
  Returns:
459
438
  (dict[str, Any]): The updated labels dictionary with unified text labels and updated class IDs.
@@ -490,11 +469,10 @@ class BaseMixTransform:
490
469
 
491
470
 
492
471
  class Mosaic(BaseMixTransform):
493
- """
494
- Mosaic augmentation for image datasets.
472
+ """Mosaic augmentation for image datasets.
495
473
 
496
- This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image.
497
- The augmentation is applied to a dataset with a given probability.
474
+ This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image. The
475
+ augmentation is applied to a dataset with a given probability.
498
476
 
499
477
  Attributes:
500
478
  dataset: The dataset on which the mosaic augmentation is applied.
@@ -520,11 +498,10 @@ class Mosaic(BaseMixTransform):
520
498
  """
521
499
 
522
500
  def __init__(self, dataset, imgsz: int = 640, p: float = 1.0, n: int = 4):
523
- """
524
- Initialize the Mosaic augmentation object.
501
+ """Initialize the Mosaic augmentation object.
525
502
 
526
- This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image.
527
- The augmentation is applied to a dataset with a given probability.
503
+ This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image. The
504
+ augmentation is applied to a dataset with a given probability.
528
505
 
529
506
  Args:
530
507
  dataset (Any): The dataset on which the mosaic augmentation is applied.
@@ -546,15 +523,14 @@ class Mosaic(BaseMixTransform):
546
523
  self.buffer_enabled = self.dataset.cache != "ram"
547
524
 
548
525
  def get_indexes(self):
549
- """
550
- Return a list of random indexes from the dataset for mosaic augmentation.
526
+ """Return a list of random indexes from the dataset for mosaic augmentation.
551
527
 
552
- This method selects random image indexes either from a buffer or from the entire dataset, depending on
553
- the 'buffer' parameter. It is used to choose images for creating mosaic augmentations.
528
+ This method selects random image indexes either from a buffer or from the entire dataset, depending on the
529
+ 'buffer' parameter. It is used to choose images for creating mosaic augmentations.
554
530
 
555
531
  Returns:
556
- (list[int]): A list of random image indexes. The length of the list is n-1, where n is the number
557
- of images used in the mosaic (either 3 or 8, depending on whether n is 4 or 9).
532
+ (list[int]): A list of random image indexes. The length of the list is n-1, where n is the number of images
533
+ used in the mosaic (either 3 or 8, depending on whether n is 4 or 9).
558
534
 
559
535
  Examples:
560
536
  >>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=4)
@@ -567,12 +543,11 @@ class Mosaic(BaseMixTransform):
567
543
  return [random.randint(0, len(self.dataset) - 1) for _ in range(self.n - 1)]
568
544
 
569
545
  def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
570
- """
571
- Apply mosaic augmentation to the input image and labels.
546
+ """Apply mosaic augmentation to the input image and labels.
572
547
 
573
- This method combines multiple images (3, 4, or 9) into a single mosaic image based on the 'n' attribute.
574
- It ensures that rectangular annotations are not present and that there are other images available for
575
- mosaic augmentation.
548
+ This method combines multiple images (3, 4, or 9) into a single mosaic image based on the 'n' attribute. It
549
+ ensures that rectangular annotations are not present and that there are other images available for mosaic
550
+ augmentation.
576
551
 
577
552
  Args:
578
553
  labels (dict[str, Any]): A dictionary containing image data and annotations. Expected keys include:
@@ -596,16 +571,15 @@ class Mosaic(BaseMixTransform):
596
571
  ) # This code is modified for mosaic3 method.
597
572
 
598
573
  def _mosaic3(self, labels: dict[str, Any]) -> dict[str, Any]:
599
- """
600
- Create a 1x3 image mosaic by combining three images.
574
+ """Create a 1x3 image mosaic by combining three images.
601
575
 
602
- This method arranges three images in a horizontal layout, with the main image in the center and two
603
- additional images on either side. It's part of the Mosaic augmentation technique used in object detection.
576
+ This method arranges three images in a horizontal layout, with the main image in the center and two additional
577
+ images on either side. It's part of the Mosaic augmentation technique used in object detection.
604
578
 
605
579
  Args:
606
580
  labels (dict[str, Any]): A dictionary containing image and label information for the main (center) image.
607
- Must include 'img' key with the image array, and 'mix_labels' key with a list of two
608
- dictionaries containing information for the side images.
581
+ Must include 'img' key with the image array, and 'mix_labels' key with a list of two dictionaries
582
+ containing information for the side images.
609
583
 
610
584
  Returns:
611
585
  (dict[str, Any]): A dictionary with the mosaic image and updated labels. Keys include:
@@ -655,19 +629,19 @@ class Mosaic(BaseMixTransform):
655
629
  return final_labels
656
630
 
657
631
  def _mosaic4(self, labels: dict[str, Any]) -> dict[str, Any]:
658
- """
659
- Create a 2x2 image mosaic from four input images.
632
+ """Create a 2x2 image mosaic from four input images.
660
633
 
661
- This method combines four images into a single mosaic image by placing them in a 2x2 grid. It also
662
- updates the corresponding labels for each image in the mosaic.
634
+ This method combines four images into a single mosaic image by placing them in a 2x2 grid. It also updates the
635
+ corresponding labels for each image in the mosaic.
663
636
 
664
637
  Args:
665
- labels (dict[str, Any]): A dictionary containing image data and labels for the base image (index 0) and three
666
- additional images (indices 1-3) in the 'mix_labels' key.
638
+ labels (dict[str, Any]): A dictionary containing image data and labels for the base image (index 0) and
639
+ three additional images (indices 1-3) in the 'mix_labels' key.
667
640
 
668
641
  Returns:
669
- (dict[str, Any]): A dictionary containing the mosaic image and updated labels. The 'img' key contains the mosaic
670
- image as a numpy array, and other keys contain the combined and adjusted labels for all four images.
642
+ (dict[str, Any]): A dictionary containing the mosaic image and updated labels. The 'img' key contains the
643
+ mosaic image as a numpy array, and other keys contain the combined and adjusted labels for all
644
+ four images.
671
645
 
672
646
  Examples:
673
647
  >>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=4)
@@ -713,22 +687,22 @@ class Mosaic(BaseMixTransform):
713
687
  return final_labels
714
688
 
715
689
  def _mosaic9(self, labels: dict[str, Any]) -> dict[str, Any]:
716
- """
717
- Create a 3x3 image mosaic from the input image and eight additional images.
690
+ """Create a 3x3 image mosaic from the input image and eight additional images.
718
691
 
719
- This method combines nine images into a single mosaic image. The input image is placed at the center,
720
- and eight additional images from the dataset are placed around it in a 3x3 grid pattern.
692
+ This method combines nine images into a single mosaic image. The input image is placed at the center, and eight
693
+ additional images from the dataset are placed around it in a 3x3 grid pattern.
721
694
 
722
695
  Args:
723
696
  labels (dict[str, Any]): A dictionary containing the input image and its associated labels. It should have
724
- the following keys:
697
+ the following keys:
725
698
  - 'img' (np.ndarray): The input image.
726
699
  - 'resized_shape' (tuple[int, int]): The shape of the resized image (height, width).
727
700
  - 'mix_labels' (list[dict]): A list of dictionaries containing information for the additional
728
- eight images, each with the same structure as the input labels.
701
+ eight images, each with the same structure as the input labels.
729
702
 
730
703
  Returns:
731
- (dict[str, Any]): A dictionary containing the mosaic image and updated labels. It includes the following keys:
704
+ (dict[str, Any]): A dictionary containing the mosaic image and updated labels. It includes the following
705
+ keys:
732
706
  - 'img' (np.ndarray): The final mosaic image.
733
707
  - Other keys from the input labels, updated to reflect the new mosaic arrangement.
734
708
 
@@ -786,8 +760,7 @@ class Mosaic(BaseMixTransform):
786
760
 
787
761
  @staticmethod
788
762
  def _update_labels(labels, padw: int, padh: int) -> dict[str, Any]:
789
- """
790
- Update label coordinates with padding values.
763
+ """Update label coordinates with padding values.
791
764
 
792
765
  This method adjusts the bounding box coordinates of object instances in the labels by adding padding
793
766
  values. It also denormalizes the coordinates if they were previously normalized.
@@ -812,11 +785,10 @@ class Mosaic(BaseMixTransform):
812
785
  return labels
813
786
 
814
787
  def _cat_labels(self, mosaic_labels: list[dict[str, Any]]) -> dict[str, Any]:
815
- """
816
- Concatenate and process labels for mosaic augmentation.
788
+ """Concatenate and process labels for mosaic augmentation.
817
789
 
818
- This method combines labels from multiple images used in mosaic augmentation, clips instances to the
819
- mosaic border, and removes zero-area boxes.
790
+ This method combines labels from multiple images used in mosaic augmentation, clips instances to the mosaic
791
+ border, and removes zero-area boxes.
820
792
 
821
793
  Args:
822
794
  mosaic_labels (list[dict[str, Any]]): A list of label dictionaries for each image in the mosaic.
@@ -864,8 +836,7 @@ class Mosaic(BaseMixTransform):
864
836
 
865
837
 
866
838
  class MixUp(BaseMixTransform):
867
- """
868
- Apply MixUp augmentation to image datasets.
839
+ """Apply MixUp augmentation to image datasets.
869
840
 
870
841
  This class implements the MixUp augmentation technique as described in the paper [mixup: Beyond Empirical Risk
871
842
  Minimization](https://arxiv.org/abs/1710.09412). MixUp combines two images and their labels using a random weight.
@@ -886,11 +857,10 @@ class MixUp(BaseMixTransform):
886
857
  """
887
858
 
888
859
  def __init__(self, dataset, pre_transform=None, p: float = 0.0) -> None:
889
- """
890
- Initialize the MixUp augmentation object.
860
+ """Initialize the MixUp augmentation object.
891
861
 
892
- MixUp is an image augmentation technique that combines two images by taking a weighted sum of their pixel
893
- values and labels. This implementation is designed for use with the Ultralytics YOLO framework.
862
+ MixUp is an image augmentation technique that combines two images by taking a weighted sum of their pixel values
863
+ and labels. This implementation is designed for use with the Ultralytics YOLO framework.
894
864
 
895
865
  Args:
896
866
  dataset (Any): The dataset to which MixUp augmentation will be applied.
@@ -905,11 +875,10 @@ class MixUp(BaseMixTransform):
905
875
  super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
906
876
 
907
877
  def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
908
- """
909
- Apply MixUp augmentation to the input labels.
878
+ """Apply MixUp augmentation to the input labels.
910
879
 
911
- This method implements the MixUp augmentation technique as described in the paper
912
- "mixup: Beyond Empirical Risk Minimization" (https://arxiv.org/abs/1710.09412).
880
+ This method implements the MixUp augmentation technique as described in the paper "mixup: Beyond Empirical Risk
881
+ Minimization" (https://arxiv.org/abs/1710.09412).
913
882
 
914
883
  Args:
915
884
  labels (dict[str, Any]): A dictionary containing the original image and label information.
@@ -930,11 +899,10 @@ class MixUp(BaseMixTransform):
930
899
 
931
900
 
932
901
  class CutMix(BaseMixTransform):
933
- """
934
- Apply CutMix augmentation to image datasets as described in the paper https://arxiv.org/abs/1905.04899.
902
+ """Apply CutMix augmentation to image datasets as described in the paper https://arxiv.org/abs/1905.04899.
935
903
 
936
- CutMix combines two images by replacing a random rectangular region of one image with the corresponding region from another image,
937
- and adjusts the labels proportionally to the area of the mixed region.
904
+ CutMix combines two images by replacing a random rectangular region of one image with the corresponding region from
905
+ another image, and adjusts the labels proportionally to the area of the mixed region.
938
906
 
939
907
  Attributes:
940
908
  dataset (Any): The dataset to which CutMix augmentation will be applied.
@@ -955,8 +923,7 @@ class CutMix(BaseMixTransform):
955
923
  """
956
924
 
957
925
  def __init__(self, dataset, pre_transform=None, p: float = 0.0, beta: float = 1.0, num_areas: int = 3) -> None:
958
- """
959
- Initialize the CutMix augmentation object.
926
+ """Initialize the CutMix augmentation object.
960
927
 
961
928
  Args:
962
929
  dataset (Any): The dataset to which CutMix augmentation will be applied.
@@ -970,8 +937,7 @@ class CutMix(BaseMixTransform):
970
937
  self.num_areas = num_areas
971
938
 
972
939
  def _rand_bbox(self, width: int, height: int) -> tuple[int, int, int, int]:
973
- """
974
- Generate random bounding box coordinates for the cut region.
940
+ """Generate random bounding box coordinates for the cut region.
975
941
 
976
942
  Args:
977
943
  width (int): Width of the image.
@@ -1000,8 +966,7 @@ class CutMix(BaseMixTransform):
1000
966
  return x1, y1, x2, y2
1001
967
 
1002
968
  def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
1003
- """
1004
- Apply CutMix augmentation to the input labels.
969
+ """Apply CutMix augmentation to the input labels.
1005
970
 
1006
971
  Args:
1007
972
  labels (dict[str, Any]): A dictionary containing the original image and label information.
@@ -1048,12 +1013,11 @@ class CutMix(BaseMixTransform):
1048
1013
 
1049
1014
 
1050
1015
  class RandomPerspective:
1051
- """
1052
- Implement random perspective and affine transformations on images and corresponding annotations.
1016
+ """Implement random perspective and affine transformations on images and corresponding annotations.
1053
1017
 
1054
- This class applies random rotations, translations, scaling, shearing, and perspective transformations
1055
- to images and their associated bounding boxes, segments, and keypoints. It can be used as part of an
1056
- augmentation pipeline for object detection and instance segmentation tasks.
1018
+ This class applies random rotations, translations, scaling, shearing, and perspective transformations to images and
1019
+ their associated bounding boxes, segments, and keypoints. It can be used as part of an augmentation pipeline for
1020
+ object detection and instance segmentation tasks.
1057
1021
 
1058
1022
  Attributes:
1059
1023
  degrees (float): Maximum absolute degree range for random rotations.
@@ -1091,8 +1055,7 @@ class RandomPerspective:
1091
1055
  border: tuple[int, int] = (0, 0),
1092
1056
  pre_transform=None,
1093
1057
  ):
1094
- """
1095
- Initialize RandomPerspective object with transformation parameters.
1058
+ """Initialize RandomPerspective object with transformation parameters.
1096
1059
 
1097
1060
  This class implements random perspective and affine transformations on images and corresponding bounding boxes,
1098
1061
  segments, and keypoints. Transformations include rotation, translation, scaling, and shearing.
@@ -1120,12 +1083,11 @@ class RandomPerspective:
1120
1083
  self.pre_transform = pre_transform
1121
1084
 
1122
1085
  def affine_transform(self, img: np.ndarray, border: tuple[int, int]) -> tuple[np.ndarray, np.ndarray, float]:
1123
- """
1124
- Apply a sequence of affine transformations centered around the image center.
1086
+ """Apply a sequence of affine transformations centered around the image center.
1125
1087
 
1126
- This function performs a series of geometric transformations on the input image, including
1127
- translation, perspective change, rotation, scaling, and shearing. The transformations are
1128
- applied in a specific order to maintain consistency.
1088
+ This function performs a series of geometric transformations on the input image, including translation,
1089
+ perspective change, rotation, scaling, and shearing. The transformations are applied in a specific order to
1090
+ maintain consistency.
1129
1091
 
1130
1092
  Args:
1131
1093
  img (np.ndarray): Input image to be transformed.
@@ -1184,15 +1146,14 @@ class RandomPerspective:
1184
1146
  return img, M, s
1185
1147
 
1186
1148
  def apply_bboxes(self, bboxes: np.ndarray, M: np.ndarray) -> np.ndarray:
1187
- """
1188
- Apply affine transformation to bounding boxes.
1149
+ """Apply affine transformation to bounding boxes.
1189
1150
 
1190
- This function applies an affine transformation to a set of bounding boxes using the provided
1191
- transformation matrix.
1151
+ This function applies an affine transformation to a set of bounding boxes using the provided transformation
1152
+ matrix.
1192
1153
 
1193
1154
  Args:
1194
- bboxes (np.ndarray): Bounding boxes in xyxy format with shape (N, 4), where N is the number
1195
- of bounding boxes.
1155
+ bboxes (np.ndarray): Bounding boxes in xyxy format with shape (N, 4), where N is the number of bounding
1156
+ boxes.
1196
1157
  M (np.ndarray): Affine transformation matrix with shape (3, 3).
1197
1158
 
1198
1159
  Returns:
@@ -1218,11 +1179,10 @@ class RandomPerspective:
1218
1179
  return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).T
1219
1180
 
1220
1181
  def apply_segments(self, segments: np.ndarray, M: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
1221
- """
1222
- Apply affine transformations to segments and generate new bounding boxes.
1182
+ """Apply affine transformations to segments and generate new bounding boxes.
1223
1183
 
1224
- This function applies affine transformations to input segments and generates new bounding boxes based on
1225
- the transformed segments. It clips the transformed segments to fit within the new bounding boxes.
1184
+ This function applies affine transformations to input segments and generates new bounding boxes based on the
1185
+ transformed segments. It clips the transformed segments to fit within the new bounding boxes.
1226
1186
 
1227
1187
  Args:
1228
1188
  segments (np.ndarray): Input segments with shape (N, M, 2), where N is the number of segments and M is the
@@ -1254,16 +1214,15 @@ class RandomPerspective:
1254
1214
  return bboxes, segments
1255
1215
 
1256
1216
  def apply_keypoints(self, keypoints: np.ndarray, M: np.ndarray) -> np.ndarray:
1257
- """
1258
- Apply affine transformation to keypoints.
1217
+ """Apply affine transformation to keypoints.
1259
1218
 
1260
1219
  This method transforms the input keypoints using the provided affine transformation matrix. It handles
1261
1220
  perspective rescaling if necessary and updates the visibility of keypoints that fall outside the image
1262
1221
  boundaries after transformation.
1263
1222
 
1264
1223
  Args:
1265
- keypoints (np.ndarray): Array of keypoints with shape (N, 17, 3), where N is the number of instances,
1266
- 17 is the number of keypoints per instance, and 3 represents (x, y, visibility).
1224
+ keypoints (np.ndarray): Array of keypoints with shape (N, 17, 3), where N is the number of instances, 17 is
1225
+ the number of keypoints per instance, and 3 represents (x, y, visibility).
1267
1226
  M (np.ndarray): 3x3 affine transformation matrix.
1268
1227
 
1269
1228
  Returns:
@@ -1288,21 +1247,14 @@ class RandomPerspective:
1288
1247
  return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3)
1289
1248
 
1290
1249
  def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
1291
- """
1292
- Apply random perspective and affine transformations to an image and its associated labels.
1250
+ """Apply random perspective and affine transformations to an image and its associated labels.
1293
1251
 
1294
- This method performs a series of transformations including rotation, translation, scaling, shearing,
1295
- and perspective distortion on the input image and adjusts the corresponding bounding boxes, segments,
1296
- and keypoints accordingly.
1252
+ This method performs a series of transformations including rotation, translation, scaling, shearing, and
1253
+ perspective distortion on the input image and adjusts the corresponding bounding boxes, segments, and keypoints
1254
+ accordingly.
1297
1255
 
1298
1256
  Args:
1299
1257
  labels (dict[str, Any]): A dictionary containing image data and annotations.
1300
- Must include:
1301
- 'img' (np.ndarray): The input image.
1302
- 'cls' (np.ndarray): Class labels.
1303
- 'instances' (Instances): Object instances with bounding boxes, segments, and keypoints.
1304
- May include:
1305
- 'mosaic_border' (tuple[int, int]): Border size for mosaic augmentation.
1306
1258
 
1307
1259
  Returns:
1308
1260
  (dict[str, Any]): Transformed labels dictionary containing:
@@ -1321,6 +1273,14 @@ class RandomPerspective:
1321
1273
  ... }
1322
1274
  >>> result = transform(labels)
1323
1275
  >>> assert result["img"].shape[:2] == result["resized_shape"]
1276
+
1277
+ Notes:
1278
+ 'labels' arg must include:
1279
+ - 'img' (np.ndarray): The input image.
1280
+ - 'cls' (np.ndarray): Class labels.
1281
+ - 'instances' (Instances): Object instances with bounding boxes, segments, and keypoints.
1282
+ May include:
1283
+ - 'mosaic_border' (tuple[int, int]): Border size for mosaic augmentation.
1324
1284
  """
1325
1285
  if self.pre_transform and "mosaic_border" not in labels:
1326
1286
  labels = self.pre_transform(labels)
@@ -1374,29 +1334,27 @@ class RandomPerspective:
1374
1334
  area_thr: float = 0.1,
1375
1335
  eps: float = 1e-16,
1376
1336
  ) -> np.ndarray:
1377
- """
1378
- Compute candidate boxes for further processing based on size and aspect ratio criteria.
1337
+ """Compute candidate boxes for further processing based on size and aspect ratio criteria.
1379
1338
 
1380
- This method compares boxes before and after augmentation to determine if they meet specified
1381
- thresholds for width, height, aspect ratio, and area. It's used to filter out boxes that have
1382
- been overly distorted or reduced by the augmentation process.
1339
+ This method compares boxes before and after augmentation to determine if they meet specified thresholds for
1340
+ width, height, aspect ratio, and area. It's used to filter out boxes that have been overly distorted or reduced
1341
+ by the augmentation process.
1383
1342
 
1384
1343
  Args:
1385
- box1 (np.ndarray): Original boxes before augmentation, shape (4, N) where n is the
1386
- number of boxes. Format is [x1, y1, x2, y2] in absolute coordinates.
1387
- box2 (np.ndarray): Augmented boxes after transformation, shape (4, N). Format is
1388
- [x1, y1, x2, y2] in absolute coordinates.
1389
- wh_thr (int): Width and height threshold in pixels. Boxes smaller than this in either
1390
- dimension are rejected.
1391
- ar_thr (int): Aspect ratio threshold. Boxes with an aspect ratio greater than this
1392
- value are rejected.
1393
- area_thr (float): Area ratio threshold. Boxes with an area ratio (new/old) less than
1394
- this value are rejected.
1344
+ box1 (np.ndarray): Original boxes before augmentation, shape (4, N) where n is the number of boxes. Format
1345
+ is [x1, y1, x2, y2] in absolute coordinates.
1346
+ box2 (np.ndarray): Augmented boxes after transformation, shape (4, N). Format is [x1, y1, x2, y2] in
1347
+ absolute coordinates.
1348
+ wh_thr (int): Width and height threshold in pixels. Boxes smaller than this in either dimension are
1349
+ rejected.
1350
+ ar_thr (int): Aspect ratio threshold. Boxes with an aspect ratio greater than this value are rejected.
1351
+ area_thr (float): Area ratio threshold. Boxes with an area ratio (new/old) less than this value are
1352
+ rejected.
1395
1353
  eps (float): Small epsilon value to prevent division by zero.
1396
1354
 
1397
1355
  Returns:
1398
- (np.ndarray): Boolean array of shape (n) indicating which boxes are candidates.
1399
- True values correspond to boxes that meet all criteria.
1356
+ (np.ndarray): Boolean array of shape (n) indicating which boxes are candidates. True values correspond to
1357
+ boxes that meet all criteria.
1400
1358
 
1401
1359
  Examples:
1402
1360
  >>> random_perspective = RandomPerspective()
@@ -1413,8 +1371,7 @@ class RandomPerspective:
1413
1371
 
1414
1372
 
1415
1373
  class RandomHSV:
1416
- """
1417
- Randomly adjust the Hue, Saturation, and Value (HSV) channels of an image.
1374
+ """Randomly adjust the Hue, Saturation, and Value (HSV) channels of an image.
1418
1375
 
1419
1376
  This class applies random HSV augmentation to images within predefined limits set by hgain, sgain, and vgain.
1420
1377
 
@@ -1437,8 +1394,7 @@ class RandomHSV:
1437
1394
  """
1438
1395
 
1439
1396
  def __init__(self, hgain: float = 0.5, sgain: float = 0.5, vgain: float = 0.5) -> None:
1440
- """
1441
- Initialize the RandomHSV object for random HSV (Hue, Saturation, Value) augmentation.
1397
+ """Initialize the RandomHSV object for random HSV (Hue, Saturation, Value) augmentation.
1442
1398
 
1443
1399
  This class applies random adjustments to the HSV channels of an image within specified limits.
1444
1400
 
@@ -1456,15 +1412,14 @@ class RandomHSV:
1456
1412
  self.vgain = vgain
1457
1413
 
1458
1414
  def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
1459
- """
1460
- Apply random HSV augmentation to an image within predefined limits.
1415
+ """Apply random HSV augmentation to an image within predefined limits.
1461
1416
 
1462
- This method modifies the input image by randomly adjusting its Hue, Saturation, and Value (HSV) channels.
1463
- The adjustments are made within the limits set by hgain, sgain, and vgain during initialization.
1417
+ This method modifies the input image by randomly adjusting its Hue, Saturation, and Value (HSV) channels. The
1418
+ adjustments are made within the limits set by hgain, sgain, and vgain during initialization.
1464
1419
 
1465
1420
  Args:
1466
- labels (dict[str, Any]): A dictionary containing image data and metadata. Must include an 'img' key with
1467
- the image as a numpy array.
1421
+ labels (dict[str, Any]): A dictionary containing image data and metadata. Must include an 'img' key with the
1422
+ image as a numpy array.
1468
1423
 
1469
1424
  Returns:
1470
1425
  (dict[str, Any]): A dictionary containing the mixed image and adjusted labels.
@@ -1496,11 +1451,10 @@ class RandomHSV:
1496
1451
 
1497
1452
 
1498
1453
  class RandomFlip:
1499
- """
1500
- Apply a random horizontal or vertical flip to an image with a given probability.
1454
+ """Apply a random horizontal or vertical flip to an image with a given probability.
1501
1455
 
1502
- This class performs random image flipping and updates corresponding instance annotations such as
1503
- bounding boxes and keypoints.
1456
+ This class performs random image flipping and updates corresponding instance annotations such as bounding boxes and
1457
+ keypoints.
1504
1458
 
1505
1459
  Attributes:
1506
1460
  p (float): Probability of applying the flip. Must be between 0 and 1.
@@ -1518,11 +1472,10 @@ class RandomFlip:
1518
1472
  """
1519
1473
 
1520
1474
  def __init__(self, p: float = 0.5, direction: str = "horizontal", flip_idx: list[int] | None = None) -> None:
1521
- """
1522
- Initialize the RandomFlip class with probability and direction.
1475
+ """Initialize the RandomFlip class with probability and direction.
1523
1476
 
1524
- This class applies a random horizontal or vertical flip to an image with a given probability.
1525
- It also updates any instances (bounding boxes, keypoints, etc.) accordingly.
1477
+ This class applies a random horizontal or vertical flip to an image with a given probability. It also updates
1478
+ any instances (bounding boxes, keypoints, etc.) accordingly.
1526
1479
 
1527
1480
  Args:
1528
1481
  p (float): The probability of applying the flip. Must be between 0 and 1.
@@ -1544,23 +1497,21 @@ class RandomFlip:
1544
1497
  self.flip_idx = flip_idx
1545
1498
 
1546
1499
  def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
1547
- """
1548
- Apply random flip to an image and update any instances like bounding boxes or keypoints accordingly.
1500
+ """Apply random flip to an image and update any instances like bounding boxes or keypoints accordingly.
1549
1501
 
1550
1502
  This method randomly flips the input image either horizontally or vertically based on the initialized
1551
- probability and direction. It also updates the corresponding instances (bounding boxes, keypoints) to
1552
- match the flipped image.
1503
+ probability and direction. It also updates the corresponding instances (bounding boxes, keypoints) to match the
1504
+ flipped image.
1553
1505
 
1554
1506
  Args:
1555
1507
  labels (dict[str, Any]): A dictionary containing the following keys:
1556
- 'img' (np.ndarray): The image to be flipped.
1557
- 'instances' (ultralytics.utils.instance.Instances): An object containing bounding boxes and
1558
- optionally keypoints.
1508
+ - 'img' (np.ndarray): The image to be flipped.
1509
+ - 'instances' (ultralytics.utils.instance.Instances): Object containing boxes and optionally keypoints.
1559
1510
 
1560
1511
  Returns:
1561
1512
  (dict[str, Any]): The same dictionary with the flipped image and updated instances:
1562
- 'img' (np.ndarray): The flipped image.
1563
- 'instances' (ultralytics.utils.instance.Instances): Updated instances matching the flipped image.
1513
+ - 'img' (np.ndarray): The flipped image.
1514
+ - 'instances' (ultralytics.utils.instance.Instances): Updated instances matching the flipped image.
1564
1515
 
1565
1516
  Examples:
1566
1517
  >>> labels = {"img": np.random.rand(640, 640, 3), "instances": Instances(...)}
@@ -1591,11 +1542,10 @@ class RandomFlip:
1591
1542
 
1592
1543
 
1593
1544
  class LetterBox:
1594
- """
1595
- Resize image and padding for detection, instance segmentation, pose.
1545
+ """Resize image and padding for detection, instance segmentation, pose.
1596
1546
 
1597
- This class resizes and pads images to a specified shape while preserving aspect ratio. It also updates
1598
- corresponding labels and bounding boxes.
1547
+ This class resizes and pads images to a specified shape while preserving aspect ratio. It also updates corresponding
1548
+ labels and bounding boxes.
1599
1549
 
1600
1550
  Attributes:
1601
1551
  new_shape (tuple): Target shape (height, width) for resizing.
@@ -1626,8 +1576,7 @@ class LetterBox:
1626
1576
  padding_value: int = 114,
1627
1577
  interpolation: int = cv2.INTER_LINEAR,
1628
1578
  ):
1629
- """
1630
- Initialize LetterBox object for resizing and padding images.
1579
+ """Initialize LetterBox object for resizing and padding images.
1631
1580
 
1632
1581
  This class is designed to resize and pad images for object detection, instance segmentation, and pose estimation
1633
1582
  tasks. It supports various resizing modes including auto-sizing, scale-fill, and letterboxing.
@@ -1665,20 +1614,20 @@ class LetterBox:
1665
1614
  self.interpolation = interpolation
1666
1615
 
1667
1616
  def __call__(self, labels: dict[str, Any] | None = None, image: np.ndarray = None) -> dict[str, Any] | np.ndarray:
1668
- """
1669
- Resize and pad an image for object detection, instance segmentation, or pose estimation tasks.
1617
+ """Resize and pad an image for object detection, instance segmentation, or pose estimation tasks.
1670
1618
 
1671
1619
  This method applies letterboxing to the input image, which involves resizing the image while maintaining its
1672
1620
  aspect ratio and adding padding to fit the new shape. It also updates any associated labels accordingly.
1673
1621
 
1674
1622
  Args:
1675
- labels (dict[str, Any] | None): A dictionary containing image data and associated labels, or empty dict if None.
1623
+ labels (dict[str, Any] | None): A dictionary containing image data and associated labels, or empty dict if
1624
+ None.
1676
1625
  image (np.ndarray | None): The input image as a numpy array. If None, the image is taken from 'labels'.
1677
1626
 
1678
1627
  Returns:
1679
- (dict[str, Any] | nd.ndarray): If 'labels' is provided, returns an updated dictionary with the resized and padded image,
1680
- updated labels, and additional metadata. If 'labels' is empty, returns the resized
1681
- and padded image.
1628
+ (dict[str, Any] | np.ndarray): If 'labels' is provided, returns an updated dictionary with the resized and
1629
+ padded image, updated labels, and additional metadata. If 'labels' is empty, returns the resized and
1630
+ padded image only.
1682
1631
 
1683
1632
  Examples:
1684
1633
  >>> letterbox = LetterBox(new_shape=(640, 640))
@@ -1744,11 +1693,10 @@ class LetterBox:
1744
1693
 
1745
1694
  @staticmethod
1746
1695
  def _update_labels(labels: dict[str, Any], ratio: tuple[float, float], padw: float, padh: float) -> dict[str, Any]:
1747
- """
1748
- Update labels after applying letterboxing to an image.
1696
+ """Update labels after applying letterboxing to an image.
1749
1697
 
1750
- This method modifies the bounding box coordinates of instances in the labels
1751
- to account for resizing and padding applied during letterboxing.
1698
+ This method modifies the bounding box coordinates of instances in the labels to account for resizing and padding
1699
+ applied during letterboxing.
1752
1700
 
1753
1701
  Args:
1754
1702
  labels (dict[str, Any]): A dictionary containing image labels and instances.
@@ -1774,8 +1722,7 @@ class LetterBox:
1774
1722
 
1775
1723
 
1776
1724
  class CopyPaste(BaseMixTransform):
1777
- """
1778
- CopyPaste class for applying Copy-Paste augmentation to image datasets.
1725
+ """CopyPaste class for applying Copy-Paste augmentation to image datasets.
1779
1726
 
1780
1727
  This class implements the Copy-Paste augmentation technique as described in the paper "Simple Copy-Paste is a Strong
1781
1728
  Data Augmentation Method for Instance Segmentation" (https://arxiv.org/abs/2012.07177). It combines objects from
@@ -1874,8 +1821,7 @@ class CopyPaste(BaseMixTransform):
1874
1821
 
1875
1822
 
1876
1823
  class Albumentations:
1877
- """
1878
- Albumentations transformations for image augmentation.
1824
+ """Albumentations transformations for image augmentation.
1879
1825
 
1880
1826
  This class applies various image transformations using the Albumentations library. It includes operations such as
1881
1827
  Blur, Median Blur, conversion to grayscale, Contrast Limited Adaptive Histogram Equalization (CLAHE), random changes
@@ -1900,8 +1846,7 @@ class Albumentations:
1900
1846
  """
1901
1847
 
1902
1848
  def __init__(self, p: float = 1.0) -> None:
1903
- """
1904
- Initialize the Albumentations transform object for YOLO bbox formatted parameters.
1849
+ """Initialize the Albumentations transform object for YOLO bbox formatted parameters.
1905
1850
 
1906
1851
  This class applies various image augmentations using the Albumentations library, including Blur, Median Blur,
1907
1852
  conversion to grayscale, Contrast Limited Adaptive Histogram Equalization, random changes of brightness and
@@ -2014,8 +1959,7 @@ class Albumentations:
2014
1959
  LOGGER.info(f"{prefix}{e}")
2015
1960
 
2016
1961
  def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
2017
- """
2018
- Apply Albumentations transformations to input labels.
1962
+ """Apply Albumentations transformations to input labels.
2019
1963
 
2020
1964
  This method applies a series of image augmentations using the Albumentations library. It can perform both
2021
1965
  spatial and non-spatial transformations on the input image and its corresponding labels.
@@ -2071,8 +2015,7 @@ class Albumentations:
2071
2015
 
2072
2016
 
2073
2017
  class Format:
2074
- """
2075
- A class for formatting image annotations for object detection, instance segmentation, and pose estimation tasks.
2018
+ """A class for formatting image annotations for object detection, instance segmentation, and pose estimation tasks.
2076
2019
 
2077
2020
  This class standardizes image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader.
2078
2021
 
@@ -2112,8 +2055,7 @@ class Format:
2112
2055
  batch_idx: bool = True,
2113
2056
  bgr: float = 0.0,
2114
2057
  ):
2115
- """
2116
- Initialize the Format class with given parameters for image and instance annotation formatting.
2058
+ """Initialize the Format class with given parameters for image and instance annotation formatting.
2117
2059
 
2118
2060
  This class standardizes image and instance annotations for object detection, instance segmentation, and pose
2119
2061
  estimation tasks, preparing them for use in PyTorch DataLoader's `collate_fn`.
@@ -2156,8 +2098,7 @@ class Format:
2156
2098
  self.bgr = bgr
2157
2099
 
2158
2100
  def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
2159
- """
2160
- Format image annotations for object detection, instance segmentation, and pose estimation tasks.
2101
+ """Format image annotations for object detection, instance segmentation, and pose estimation tasks.
2161
2102
 
2162
2103
  This method standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch
2163
2104
  DataLoader. It processes the input labels dictionary, converting annotations to the specified format and
@@ -2225,8 +2166,7 @@ class Format:
2225
2166
  return labels
2226
2167
 
2227
2168
  def _format_img(self, img: np.ndarray) -> torch.Tensor:
2228
- """
2229
- Format an image for YOLO from a Numpy array to a PyTorch tensor.
2169
+ """Format an image for YOLO from a Numpy array to a PyTorch tensor.
2230
2170
 
2231
2171
  This function performs the following operations:
2232
2172
  1. Ensures the image has 3 dimensions (adds a channel dimension if needed).
@@ -2258,8 +2198,7 @@ class Format:
2258
2198
  def _format_segments(
2259
2199
  self, instances: Instances, cls: np.ndarray, w: int, h: int
2260
2200
  ) -> tuple[np.ndarray, Instances, np.ndarray]:
2261
- """
2262
- Convert polygon segments to bitmap masks.
2201
+ """Convert polygon segments to bitmap masks.
2263
2202
 
2264
2203
  Args:
2265
2204
  instances (Instances): Object containing segment information.
@@ -2293,8 +2232,7 @@ class LoadVisualPrompt:
2293
2232
  """Create visual prompts from bounding boxes or masks for model input."""
2294
2233
 
2295
2234
  def __init__(self, scale_factor: float = 1 / 8) -> None:
2296
- """
2297
- Initialize the LoadVisualPrompt with a scale factor.
2235
+ """Initialize the LoadVisualPrompt with a scale factor.
2298
2236
 
2299
2237
  Args:
2300
2238
  scale_factor (float): Factor to scale the input image dimensions.
@@ -2302,8 +2240,7 @@ class LoadVisualPrompt:
2302
2240
  self.scale_factor = scale_factor
2303
2241
 
2304
2242
  def make_mask(self, boxes: torch.Tensor, h: int, w: int) -> torch.Tensor:
2305
- """
2306
- Create binary masks from bounding boxes.
2243
+ """Create binary masks from bounding boxes.
2307
2244
 
2308
2245
  Args:
2309
2246
  boxes (torch.Tensor): Bounding boxes in xyxy format, shape: (N, 4).
@@ -2320,8 +2257,7 @@ class LoadVisualPrompt:
2320
2257
  return (r >= x1) * (r < x2) * (c >= y1) * (c < y2)
2321
2258
 
2322
2259
  def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
2323
- """
2324
- Process labels to create visual prompts.
2260
+ """Process labels to create visual prompts.
2325
2261
 
2326
2262
  Args:
2327
2263
  labels (dict[str, Any]): Dictionary containing image data and annotations.
@@ -2347,8 +2283,7 @@ class LoadVisualPrompt:
2347
2283
  bboxes: np.ndarray | torch.Tensor = None,
2348
2284
  masks: np.ndarray | torch.Tensor = None,
2349
2285
  ) -> torch.Tensor:
2350
- """
2351
- Generate visual masks based on bounding boxes or masks.
2286
+ """Generate visual masks based on bounding boxes or masks.
2352
2287
 
2353
2288
  Args:
2354
2289
  category (int | np.ndarray | torch.Tensor): The category labels for the objects.
@@ -2389,12 +2324,11 @@ class LoadVisualPrompt:
2389
2324
 
2390
2325
 
2391
2326
  class RandomLoadText:
2392
- """
2393
- Randomly sample positive and negative texts and update class indices accordingly.
2327
+ """Randomly sample positive and negative texts and update class indices accordingly.
2394
2328
 
2395
- This class is responsible for sampling texts from a given set of class texts, including both positive
2396
- (present in the image) and negative (not present in the image) samples. It updates the class indices
2397
- to reflect the sampled texts and can optionally pad the text list to a fixed length.
2329
+ This class is responsible for sampling texts from a given set of class texts, including both positive (present in
2330
+ the image) and negative (not present in the image) samples. It updates the class indices to reflect the sampled
2331
+ texts and can optionally pad the text list to a fixed length.
2398
2332
 
2399
2333
  Attributes:
2400
2334
  prompt_format (str): Format string for text prompts.
@@ -2422,21 +2356,19 @@ class RandomLoadText:
2422
2356
  padding: bool = False,
2423
2357
  padding_value: list[str] = [""],
2424
2358
  ) -> None:
2425
- """
2426
- Initialize the RandomLoadText class for randomly sampling positive and negative texts.
2359
+ """Initialize the RandomLoadText class for randomly sampling positive and negative texts.
2427
2360
 
2428
- This class is designed to randomly sample positive texts and negative texts, and update the class
2429
- indices accordingly to the number of samples. It can be used for text-based object detection tasks.
2361
+ This class is designed to randomly sample positive texts and negative texts, and update the class indices
2362
+ accordingly to the number of samples. It can be used for text-based object detection tasks.
2430
2363
 
2431
2364
  Args:
2432
- prompt_format (str): Format string for the prompt. The format string should
2433
- contain a single pair of curly braces {} where the text will be inserted.
2434
- neg_samples (tuple[int, int]): A range to randomly sample negative texts. The first integer
2435
- specifies the minimum number of negative samples, and the second integer specifies the
2436
- maximum.
2365
+ prompt_format (str): Format string for the prompt. The format string should contain a single pair of curly
2366
+ braces {} where the text will be inserted.
2367
+ neg_samples (tuple[int, int]): A range to randomly sample negative texts. The first integer specifies the
2368
+ minimum number of negative samples, and the second integer specifies the maximum.
2437
2369
  max_samples (int): The maximum number of different text samples in one image.
2438
- padding (bool): Whether to pad texts to max_samples. If True, the number of texts will always
2439
- be equal to max_samples.
2370
+ padding (bool): Whether to pad texts to max_samples. If True, the number of texts will always be equal to
2371
+ max_samples.
2440
2372
  padding_value (str): The padding text to use when padding is True.
2441
2373
 
2442
2374
  Attributes:
@@ -2462,15 +2394,15 @@ class RandomLoadText:
2462
2394
  self.padding_value = padding_value
2463
2395
 
2464
2396
  def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
2465
- """
2466
- Randomly sample positive and negative texts and update class indices accordingly.
2397
+ """Randomly sample positive and negative texts and update class indices accordingly.
2467
2398
 
2468
- This method samples positive texts based on the existing class labels in the image, and randomly
2469
- selects negative texts from the remaining classes. It then updates the class indices to match the
2470
- new sampled text order.
2399
+ This method samples positive texts based on the existing class labels in the image, and randomly selects
2400
+ negative texts from the remaining classes. It then updates the class indices to match the new sampled text
2401
+ order.
2471
2402
 
2472
2403
  Args:
2473
- labels (dict[str, Any]): A dictionary containing image labels and metadata. Must include 'texts' and 'cls' keys.
2404
+ labels (dict[str, Any]): A dictionary containing image labels and metadata. Must include 'texts' and 'cls'
2405
+ keys.
2474
2406
 
2475
2407
  Returns:
2476
2408
  (dict[str, Any]): Updated labels dictionary with new 'cls' and 'texts' entries.
@@ -2528,16 +2460,16 @@ class RandomLoadText:
2528
2460
 
2529
2461
 
2530
2462
  def v8_transforms(dataset, imgsz: int, hyp: IterableSimpleNamespace, stretch: bool = False):
2531
- """
2532
- Apply a series of image transformations for training.
2463
+ """Apply a series of image transformations for training.
2533
2464
 
2534
- This function creates a composition of image augmentation techniques to prepare images for YOLO training.
2535
- It includes operations such as mosaic, copy-paste, random perspective, mixup, and various color adjustments.
2465
+ This function creates a composition of image augmentation techniques to prepare images for YOLO training. It
2466
+ includes operations such as mosaic, copy-paste, random perspective, mixup, and various color adjustments.
2536
2467
 
2537
2468
  Args:
2538
2469
  dataset (Dataset): The dataset object containing image data and annotations.
2539
2470
  imgsz (int): The target image size for resizing.
2540
- hyp (IterableSimpleNamespace): A dictionary of hyperparameters controlling various aspects of the transformations.
2471
+ hyp (IterableSimpleNamespace): A dictionary of hyperparameters controlling various aspects of the
2472
+ transformations.
2541
2473
  stretch (bool): If True, applies stretching to the image. If False, uses LetterBox resizing.
2542
2474
 
2543
2475
  Returns:
@@ -2603,12 +2535,11 @@ def classify_transforms(
2603
2535
  interpolation: str = "BILINEAR",
2604
2536
  crop_fraction: float | None = None,
2605
2537
  ):
2606
- """
2607
- Create a composition of image transforms for classification tasks.
2538
+ """Create a composition of image transforms for classification tasks.
2608
2539
 
2609
- This function generates a sequence of torchvision transforms suitable for preprocessing images
2610
- for classification models during evaluation or inference. The transforms include resizing,
2611
- center cropping, conversion to tensor, and normalization.
2540
+ This function generates a sequence of torchvision transforms suitable for preprocessing images for classification
2541
+ models during evaluation or inference. The transforms include resizing, center cropping, conversion to tensor, and
2542
+ normalization.
2612
2543
 
2613
2544
  Args:
2614
2545
  size (int | tuple): The target size for the transformed image. If an int, it defines the shortest edge. If a
@@ -2663,8 +2594,7 @@ def classify_augmentations(
2663
2594
  erasing: float = 0.0,
2664
2595
  interpolation: str = "BILINEAR",
2665
2596
  ):
2666
- """
2667
- Create a composition of image augmentation transforms for classification tasks.
2597
+ """Create a composition of image augmentation transforms for classification tasks.
2668
2598
 
2669
2599
  This function generates a set of image transformations suitable for training classification models. It includes
2670
2600
  options for resizing, flipping, color jittering, auto augmentation, and random erasing.
@@ -2752,11 +2682,10 @@ def classify_augmentations(
2752
2682
 
2753
2683
  # NOTE: keep this class for backward compatibility
2754
2684
  class ClassifyLetterBox:
2755
- """
2756
- A class for resizing and padding images for classification tasks.
2685
+ """A class for resizing and padding images for classification tasks.
2757
2686
 
2758
- This class is designed to be part of a transformation pipeline, e.g., T.Compose([LetterBox(size), ToTensor()]).
2759
- It resizes and pads images to a specified size while maintaining the original aspect ratio.
2687
+ This class is designed to be part of a transformation pipeline, e.g., T.Compose([LetterBox(size), ToTensor()]). It
2688
+ resizes and pads images to a specified size while maintaining the original aspect ratio.
2760
2689
 
2761
2690
  Attributes:
2762
2691
  h (int): Target height of the image.
@@ -2776,15 +2705,14 @@ class ClassifyLetterBox:
2776
2705
  """
2777
2706
 
2778
2707
  def __init__(self, size: int | tuple[int, int] = (640, 640), auto: bool = False, stride: int = 32):
2779
- """
2780
- Initialize the ClassifyLetterBox object for image preprocessing.
2708
+ """Initialize the ClassifyLetterBox object for image preprocessing.
2781
2709
 
2782
2710
  This class is designed to be part of a transformation pipeline for image classification tasks. It resizes and
2783
2711
  pads images to a specified size while maintaining the original aspect ratio.
2784
2712
 
2785
2713
  Args:
2786
- size (int | tuple[int, int]): Target size for the letterboxed image. If an int, a square image of
2787
- (size, size) is created. If a tuple, it should be (height, width).
2714
+ size (int | tuple[int, int]): Target size for the letterboxed image. If an int, a square image of (size,
2715
+ size) is created. If a tuple, it should be (height, width).
2788
2716
  auto (bool): If True, automatically calculates the short side based on stride.
2789
2717
  stride (int): The stride value, used when 'auto' is True.
2790
2718
 
@@ -2807,8 +2735,7 @@ class ClassifyLetterBox:
2807
2735
  self.stride = stride # used with auto
2808
2736
 
2809
2737
  def __call__(self, im: np.ndarray) -> np.ndarray:
2810
- """
2811
- Resize and pad an image using the letterbox method.
2738
+ """Resize and pad an image using the letterbox method.
2812
2739
 
2813
2740
  This method resizes the input image to fit within the specified dimensions while maintaining its aspect ratio,
2814
2741
  then pads the resized image to match the target size.
@@ -2817,8 +2744,8 @@ class ClassifyLetterBox:
2817
2744
  im (np.ndarray): Input image as a numpy array with shape (H, W, C).
2818
2745
 
2819
2746
  Returns:
2820
- (np.ndarray): Resized and padded image as a numpy array with shape (hs, ws, 3), where hs and ws are
2821
- the target height and width respectively.
2747
+ (np.ndarray): Resized and padded image as a numpy array with shape (hs, ws, 3), where hs and ws are the
2748
+ target height and width respectively.
2822
2749
 
2823
2750
  Examples:
2824
2751
  >>> letterbox = ClassifyLetterBox(size=(640, 640))
@@ -2843,8 +2770,7 @@ class ClassifyLetterBox:
2843
2770
 
2844
2771
  # NOTE: keep this class for backward compatibility
2845
2772
  class CenterCrop:
2846
- """
2847
- Apply center cropping to images for classification tasks.
2773
+ """Apply center cropping to images for classification tasks.
2848
2774
 
2849
2775
  This class performs center cropping on input images, resizing them to a specified size while maintaining the aspect
2850
2776
  ratio. It is designed to be part of a transformation pipeline, e.g., T.Compose([CenterCrop(size), ToTensor()]).
@@ -2865,15 +2791,14 @@ class CenterCrop:
2865
2791
  """
2866
2792
 
2867
2793
  def __init__(self, size: int | tuple[int, int] = (640, 640)):
2868
- """
2869
- Initialize the CenterCrop object for image preprocessing.
2794
+ """Initialize the CenterCrop object for image preprocessing.
2870
2795
 
2871
2796
  This class is designed to be part of a transformation pipeline, e.g., T.Compose([CenterCrop(size), ToTensor()]).
2872
2797
  It performs a center crop on input images to a specified size.
2873
2798
 
2874
2799
  Args:
2875
- size (int | tuple[int, int]): The desired output size of the crop. If size is an int, a square crop
2876
- (size, size) is made. If size is a sequence like (h, w), it is used as the output size.
2800
+ size (int | tuple[int, int]): The desired output size of the crop. If size is an int, a square crop (size,
2801
+ size) is made. If size is a sequence like (h, w), it is used as the output size.
2877
2802
 
2878
2803
  Returns:
2879
2804
  (None): This method initializes the object and does not return anything.
@@ -2889,15 +2814,14 @@ class CenterCrop:
2889
2814
  self.h, self.w = (size, size) if isinstance(size, int) else size
2890
2815
 
2891
2816
  def __call__(self, im: Image.Image | np.ndarray) -> np.ndarray:
2892
- """
2893
- Apply center cropping to an input image.
2817
+ """Apply center cropping to an input image.
2894
2818
 
2895
- This method resizes and crops the center of the image using a letterbox method. It maintains the aspect
2896
- ratio of the original image while fitting it into the specified dimensions.
2819
+ This method resizes and crops the center of the image using a letterbox method. It maintains the aspect ratio of
2820
+ the original image while fitting it into the specified dimensions.
2897
2821
 
2898
2822
  Args:
2899
- im (np.ndarray | PIL.Image.Image): The input image as a numpy array of shape (H, W, C) or a
2900
- PIL Image object.
2823
+ im (np.ndarray | PIL.Image.Image): The input image as a numpy array of shape (H, W, C) or a PIL Image
2824
+ object.
2901
2825
 
2902
2826
  Returns:
2903
2827
  (np.ndarray): The center-cropped and resized image as a numpy array of shape (self.h, self.w, C).
@@ -2918,8 +2842,7 @@ class CenterCrop:
2918
2842
 
2919
2843
  # NOTE: keep this class for backward compatibility
2920
2844
  class ToTensor:
2921
- """
2922
- Convert an image from a numpy array to a PyTorch tensor.
2845
+ """Convert an image from a numpy array to a PyTorch tensor.
2923
2846
 
2924
2847
  This class is designed to be part of a transformation pipeline, e.g., T.Compose([LetterBox(size), ToTensor()]).
2925
2848
 
@@ -2942,12 +2865,11 @@ class ToTensor:
2942
2865
  """
2943
2866
 
2944
2867
  def __init__(self, half: bool = False):
2945
- """
2946
- Initialize the ToTensor object for converting images to PyTorch tensors.
2868
+ """Initialize the ToTensor object for converting images to PyTorch tensors.
2947
2869
 
2948
2870
  This class is designed to be used as part of a transformation pipeline for image preprocessing in the
2949
- Ultralytics YOLO framework. It converts numpy arrays or PIL Images to PyTorch tensors, with an option
2950
- for half-precision (float16) conversion.
2871
+ Ultralytics YOLO framework. It converts numpy arrays or PIL Images to PyTorch tensors, with an option for
2872
+ half-precision (float16) conversion.
2951
2873
 
2952
2874
  Args:
2953
2875
  half (bool): If True, converts the tensor to half precision (float16).
@@ -2963,19 +2885,18 @@ class ToTensor:
2963
2885
  self.half = half
2964
2886
 
2965
2887
  def __call__(self, im: np.ndarray) -> torch.Tensor:
2966
- """
2967
- Transform an image from a numpy array to a PyTorch tensor.
2888
+ """Transform an image from a numpy array to a PyTorch tensor.
2968
2889
 
2969
- This method converts the input image from a numpy array to a PyTorch tensor, applying optional
2970
- half-precision conversion and normalization. The image is transposed from HWC to CHW format and
2971
- the color channels are reversed from BGR to RGB.
2890
+ This method converts the input image from a numpy array to a PyTorch tensor, applying optional half-precision
2891
+ conversion and normalization. The image is transposed from HWC to CHW format and the color channels are reversed
2892
+ from BGR to RGB.
2972
2893
 
2973
2894
  Args:
2974
2895
  im (np.ndarray): Input image as a numpy array with shape (H, W, C) in RGB order.
2975
2896
 
2976
2897
  Returns:
2977
- (torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized
2978
- to [0, 1] with shape (C, H, W) in RGB order.
2898
+ (torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1] with
2899
+ shape (C, H, W) in RGB order.
2979
2900
 
2980
2901
  Examples:
2981
2902
  >>> transform = ToTensor(half=True)