dgenerate-ultralytics-headless 8.3.141__py3-none-any.whl → 8.3.144__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/METADATA +1 -1
  2. dgenerate_ultralytics_headless-8.3.144.dist-info/RECORD +272 -0
  3. tests/conftest.py +7 -24
  4. tests/test_cli.py +1 -1
  5. tests/test_cuda.py +7 -2
  6. tests/test_engine.py +7 -8
  7. tests/test_exports.py +16 -16
  8. tests/test_integrations.py +1 -1
  9. tests/test_solutions.py +12 -12
  10. ultralytics/__init__.py +1 -1
  11. ultralytics/cfg/__init__.py +22 -19
  12. ultralytics/data/annotator.py +6 -5
  13. ultralytics/data/augment.py +127 -126
  14. ultralytics/data/base.py +54 -51
  15. ultralytics/data/build.py +47 -23
  16. ultralytics/data/converter.py +47 -43
  17. ultralytics/data/dataset.py +51 -50
  18. ultralytics/data/loaders.py +77 -44
  19. ultralytics/data/split.py +22 -9
  20. ultralytics/data/split_dota.py +63 -39
  21. ultralytics/data/utils.py +59 -39
  22. ultralytics/engine/exporter.py +79 -27
  23. ultralytics/engine/model.py +39 -39
  24. ultralytics/engine/predictor.py +37 -28
  25. ultralytics/engine/results.py +187 -158
  26. ultralytics/engine/trainer.py +36 -19
  27. ultralytics/engine/tuner.py +12 -9
  28. ultralytics/engine/validator.py +7 -9
  29. ultralytics/hub/__init__.py +11 -13
  30. ultralytics/hub/auth.py +22 -2
  31. ultralytics/hub/google/__init__.py +19 -19
  32. ultralytics/hub/session.py +37 -51
  33. ultralytics/hub/utils.py +19 -5
  34. ultralytics/models/fastsam/model.py +30 -12
  35. ultralytics/models/fastsam/predict.py +5 -6
  36. ultralytics/models/fastsam/utils.py +3 -3
  37. ultralytics/models/fastsam/val.py +10 -6
  38. ultralytics/models/nas/model.py +9 -5
  39. ultralytics/models/nas/predict.py +6 -6
  40. ultralytics/models/nas/val.py +3 -3
  41. ultralytics/models/rtdetr/model.py +7 -6
  42. ultralytics/models/rtdetr/predict.py +14 -7
  43. ultralytics/models/rtdetr/train.py +10 -4
  44. ultralytics/models/rtdetr/val.py +36 -9
  45. ultralytics/models/sam/amg.py +30 -12
  46. ultralytics/models/sam/build.py +22 -22
  47. ultralytics/models/sam/model.py +10 -9
  48. ultralytics/models/sam/modules/blocks.py +76 -80
  49. ultralytics/models/sam/modules/decoders.py +6 -8
  50. ultralytics/models/sam/modules/encoders.py +23 -26
  51. ultralytics/models/sam/modules/memory_attention.py +13 -1
  52. ultralytics/models/sam/modules/sam.py +57 -26
  53. ultralytics/models/sam/modules/tiny_encoder.py +232 -237
  54. ultralytics/models/sam/modules/transformer.py +13 -13
  55. ultralytics/models/sam/modules/utils.py +11 -19
  56. ultralytics/models/sam/predict.py +114 -101
  57. ultralytics/models/utils/loss.py +98 -77
  58. ultralytics/models/utils/ops.py +116 -67
  59. ultralytics/models/yolo/classify/predict.py +5 -5
  60. ultralytics/models/yolo/classify/train.py +32 -28
  61. ultralytics/models/yolo/classify/val.py +7 -8
  62. ultralytics/models/yolo/detect/predict.py +1 -0
  63. ultralytics/models/yolo/detect/train.py +15 -14
  64. ultralytics/models/yolo/detect/val.py +37 -36
  65. ultralytics/models/yolo/model.py +106 -23
  66. ultralytics/models/yolo/obb/predict.py +3 -4
  67. ultralytics/models/yolo/obb/train.py +14 -6
  68. ultralytics/models/yolo/obb/val.py +29 -23
  69. ultralytics/models/yolo/pose/predict.py +9 -8
  70. ultralytics/models/yolo/pose/train.py +24 -16
  71. ultralytics/models/yolo/pose/val.py +44 -26
  72. ultralytics/models/yolo/segment/predict.py +5 -5
  73. ultralytics/models/yolo/segment/train.py +11 -7
  74. ultralytics/models/yolo/segment/val.py +2 -2
  75. ultralytics/models/yolo/world/train.py +33 -23
  76. ultralytics/models/yolo/world/train_world.py +11 -3
  77. ultralytics/models/yolo/yoloe/predict.py +11 -11
  78. ultralytics/models/yolo/yoloe/train.py +73 -21
  79. ultralytics/models/yolo/yoloe/train_seg.py +10 -7
  80. ultralytics/models/yolo/yoloe/val.py +42 -18
  81. ultralytics/nn/autobackend.py +59 -15
  82. ultralytics/nn/modules/__init__.py +4 -4
  83. ultralytics/nn/modules/activation.py +4 -1
  84. ultralytics/nn/modules/block.py +178 -111
  85. ultralytics/nn/modules/conv.py +6 -5
  86. ultralytics/nn/modules/head.py +469 -121
  87. ultralytics/nn/modules/transformer.py +147 -58
  88. ultralytics/nn/tasks.py +227 -20
  89. ultralytics/nn/text_model.py +30 -33
  90. ultralytics/solutions/ai_gym.py +1 -1
  91. ultralytics/solutions/analytics.py +7 -4
  92. ultralytics/solutions/config.py +10 -10
  93. ultralytics/solutions/distance_calculation.py +13 -11
  94. ultralytics/solutions/heatmap.py +1 -1
  95. ultralytics/solutions/instance_segmentation.py +6 -3
  96. ultralytics/solutions/object_blurrer.py +3 -3
  97. ultralytics/solutions/object_counter.py +18 -12
  98. ultralytics/solutions/object_cropper.py +12 -5
  99. ultralytics/solutions/parking_management.py +29 -28
  100. ultralytics/solutions/queue_management.py +6 -6
  101. ultralytics/solutions/region_counter.py +10 -3
  102. ultralytics/solutions/security_alarm.py +3 -3
  103. ultralytics/solutions/similarity_search.py +85 -24
  104. ultralytics/solutions/solutions.py +215 -85
  105. ultralytics/solutions/speed_estimation.py +28 -22
  106. ultralytics/solutions/streamlit_inference.py +17 -12
  107. ultralytics/solutions/trackzone.py +4 -4
  108. ultralytics/trackers/basetrack.py +16 -23
  109. ultralytics/trackers/bot_sort.py +30 -20
  110. ultralytics/trackers/byte_tracker.py +70 -64
  111. ultralytics/trackers/track.py +4 -8
  112. ultralytics/trackers/utils/gmc.py +31 -58
  113. ultralytics/trackers/utils/kalman_filter.py +37 -37
  114. ultralytics/trackers/utils/matching.py +1 -1
  115. ultralytics/utils/__init__.py +105 -89
  116. ultralytics/utils/autobatch.py +16 -3
  117. ultralytics/utils/autodevice.py +54 -24
  118. ultralytics/utils/benchmarks.py +42 -28
  119. ultralytics/utils/callbacks/base.py +3 -3
  120. ultralytics/utils/callbacks/clearml.py +9 -9
  121. ultralytics/utils/callbacks/comet.py +67 -25
  122. ultralytics/utils/callbacks/dvc.py +7 -10
  123. ultralytics/utils/callbacks/mlflow.py +2 -5
  124. ultralytics/utils/callbacks/neptune.py +7 -13
  125. ultralytics/utils/callbacks/raytune.py +1 -1
  126. ultralytics/utils/callbacks/tensorboard.py +5 -6
  127. ultralytics/utils/callbacks/wb.py +14 -14
  128. ultralytics/utils/checks.py +14 -13
  129. ultralytics/utils/dist.py +5 -5
  130. ultralytics/utils/downloads.py +94 -67
  131. ultralytics/utils/errors.py +5 -5
  132. ultralytics/utils/export.py +61 -47
  133. ultralytics/utils/files.py +23 -22
  134. ultralytics/utils/instance.py +48 -52
  135. ultralytics/utils/loss.py +78 -40
  136. ultralytics/utils/metrics.py +186 -130
  137. ultralytics/utils/ops.py +186 -190
  138. ultralytics/utils/patches.py +15 -17
  139. ultralytics/utils/plotting.py +84 -42
  140. ultralytics/utils/tal.py +21 -15
  141. ultralytics/utils/torch_utils.py +53 -50
  142. ultralytics/utils/triton.py +5 -4
  143. ultralytics/utils/tuner.py +5 -5
  144. dgenerate_ultralytics_headless-8.3.141.dist-info/RECORD +0 -272
  145. {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/WHEEL +0 -0
  146. {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/entry_points.txt +0 -0
  147. {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/licenses/LICENSE +0 -0
  148. {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/top_level.txt +0 -0
@@ -31,10 +31,10 @@ class BaseTransform:
31
31
  compatible with both classification and semantic segmentation tasks.
32
32
 
33
33
  Methods:
34
- apply_image: Applies image transformations to labels.
35
- apply_instances: Applies transformations to object instances in labels.
36
- apply_semantic: Applies semantic segmentation to an image.
37
- __call__: Applies all label transformations to an image, instances, and semantic masks.
34
+ apply_image: Apply image transformations to labels.
35
+ apply_instances: Apply transformations to object instances in labels.
36
+ apply_semantic: Apply semantic segmentation to an image.
37
+ __call__: Apply all label transformations to an image, instances, and semantic masks.
38
38
 
39
39
  Examples:
40
40
  >>> transform = BaseTransform()
@@ -44,7 +44,7 @@ class BaseTransform:
44
44
 
45
45
  def __init__(self) -> None:
46
46
  """
47
- Initializes the BaseTransform object.
47
+ Initialize the BaseTransform object.
48
48
 
49
49
  This constructor sets up the base transformation object, which can be extended for specific image
50
50
  processing tasks. It is designed to be compatible with both classification and semantic segmentation.
@@ -56,7 +56,7 @@ class BaseTransform:
56
56
 
57
57
  def apply_image(self, labels):
58
58
  """
59
- Applies image transformations to labels.
59
+ Apply image transformations to labels.
60
60
 
61
61
  This method is intended to be overridden by subclasses to implement specific image transformation
62
62
  logic. In its base form, it returns the input labels unchanged.
@@ -79,7 +79,7 @@ class BaseTransform:
79
79
 
80
80
  def apply_instances(self, labels):
81
81
  """
82
- Applies transformations to object instances in labels.
82
+ Apply transformations to object instances in labels.
83
83
 
84
84
  This method is responsible for applying various transformations to object instances within the given
85
85
  labels. It is designed to be overridden by subclasses to implement specific instance transformation
@@ -100,7 +100,7 @@ class BaseTransform:
100
100
 
101
101
  def apply_semantic(self, labels):
102
102
  """
103
- Applies semantic segmentation transformations to an image.
103
+ Apply semantic segmentation transformations to an image.
104
104
 
105
105
  This method is intended to be overridden by subclasses to implement specific semantic segmentation
106
106
  transformations. In its base form, it does not perform any operations.
@@ -120,7 +120,7 @@ class BaseTransform:
120
120
 
121
121
  def __call__(self, labels):
122
122
  """
123
- Applies all label transformations to an image, instances, and semantic masks.
123
+ Apply all label transformations to an image, instances, and semantic masks.
124
124
 
125
125
  This method orchestrates the application of various transformations defined in the BaseTransform class
126
126
  to the input labels. It sequentially calls the apply_image and apply_instances methods to process the
@@ -151,12 +151,12 @@ class Compose:
151
151
  transforms (List[Callable]): A list of transformation functions to be applied sequentially.
152
152
 
153
153
  Methods:
154
- __call__: Applies a series of transformations to input data.
155
- append: Appends a new transform to the existing list of transforms.
156
- insert: Inserts a new transform at a specified index in the list of transforms.
157
- __getitem__: Retrieves a specific transform or a set of transforms using indexing.
158
- __setitem__: Sets a specific transform or a set of transforms using indexing.
159
- tolist: Converts the list of transforms to a standard Python list.
154
+ __call__: Apply a series of transformations to input data.
155
+ append: Append a new transform to the existing list of transforms.
156
+ insert: Insert a new transform at a specified index in the list of transforms.
157
+ __getitem__: Retrieve a specific transform or a set of transforms using indexing.
158
+ __setitem__: Set a specific transform or a set of transforms using indexing.
159
+ tolist: Convert the list of transforms to a standard Python list.
160
160
 
161
161
  Examples:
162
162
  >>> transforms = [RandomFlip(), RandomPerspective(30)]
@@ -168,7 +168,7 @@ class Compose:
168
168
 
169
169
  def __init__(self, transforms):
170
170
  """
171
- Initializes the Compose object with a list of transforms.
171
+ Initialize the Compose object with a list of transforms.
172
172
 
173
173
  Args:
174
174
  transforms (List[Callable]): A list of callable transform objects to be applied sequentially.
@@ -182,8 +182,9 @@ class Compose:
182
182
 
183
183
  def __call__(self, data):
184
184
  """
185
- Applies a series of transformations to input data. This method sequentially applies each transformation in the
186
- Compose object's list of transforms to the input data.
185
+ Apply a series of transformations to input data.
186
+
187
+ This method sequentially applies each transformation in the Compose object's transforms to the input data.
187
188
 
188
189
  Args:
189
190
  data (Any): The input data to be transformed. This can be of any type, depending on the
@@ -203,7 +204,7 @@ class Compose:
203
204
 
204
205
  def append(self, transform):
205
206
  """
206
- Appends a new transform to the existing list of transforms.
207
+ Append a new transform to the existing list of transforms.
207
208
 
208
209
  Args:
209
210
  transform (BaseTransform): The transformation to be added to the composition.
@@ -216,7 +217,7 @@ class Compose:
216
217
 
217
218
  def insert(self, index, transform):
218
219
  """
219
- Inserts a new transform at a specified index in the existing list of transforms.
220
+ Insert a new transform at a specified index in the existing list of transforms.
220
221
 
221
222
  Args:
222
223
  index (int): The index at which to insert the new transform.
@@ -232,7 +233,7 @@ class Compose:
232
233
 
233
234
  def __getitem__(self, index: Union[list, int]) -> "Compose":
234
235
  """
235
- Retrieves a specific transform or a set of transforms using indexing.
236
+ Retrieve a specific transform or a set of transforms using indexing.
236
237
 
237
238
  Args:
238
239
  index (int | List[int]): Index or list of indices of the transforms to retrieve.
@@ -255,7 +256,7 @@ class Compose:
255
256
 
256
257
  def __setitem__(self, index: Union[list, int], value: Union[list, int]) -> None:
257
258
  """
258
- Sets one or more transforms in the composition using indexing.
259
+ Set one or more transforms in the composition using indexing.
259
260
 
260
261
  Args:
261
262
  index (int | List[int]): Index or list of indices to set transforms at.
@@ -282,7 +283,7 @@ class Compose:
282
283
 
283
284
  def tolist(self):
284
285
  """
285
- Converts the list of transforms to a standard Python list.
286
+ Convert the list of transforms to a standard Python list.
286
287
 
287
288
  Returns:
288
289
  (list): A list containing all the transform objects in the Compose instance.
@@ -298,7 +299,7 @@ class Compose:
298
299
 
299
300
  def __repr__(self):
300
301
  """
301
- Returns a string representation of the Compose object.
302
+ Return a string representation of the Compose object.
302
303
 
303
304
  Returns:
304
305
  (str): A string representation of the Compose object, including the list of transforms.
@@ -328,10 +329,10 @@ class BaseMixTransform:
328
329
  p (float): Probability of applying the mix transformation.
329
330
 
330
331
  Methods:
331
- __call__: Applies the mix transformation to the input labels.
332
+ __call__: Apply the mix transformation to the input labels.
332
333
  _mix_transform: Abstract method to be implemented by subclasses for specific mix operations.
333
334
  get_indexes: Abstract method to get indexes of images to be mixed.
334
- _update_label_text: Updates label text for mixed images.
335
+ _update_label_text: Update label text for mixed images.
335
336
 
336
337
  Examples:
337
338
  >>> class CustomMixTransform(BaseMixTransform):
@@ -348,7 +349,7 @@ class BaseMixTransform:
348
349
 
349
350
  def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
350
351
  """
351
- Initializes the BaseMixTransform object for mix transformations like CutMix, MixUp and Mosaic.
352
+ Initialize the BaseMixTransform object for mix transformations like CutMix, MixUp and Mosaic.
352
353
 
353
354
  This class serves as a base for implementing mix transformations in image processing pipelines.
354
355
 
@@ -368,7 +369,7 @@ class BaseMixTransform:
368
369
 
369
370
  def __call__(self, labels):
370
371
  """
371
- Applies pre-processing transforms and cutmix/mixup/mosaic transforms to labels data.
372
+ Apply pre-processing transforms and cutmix/mixup/mosaic transforms to labels data.
372
373
 
373
374
  This method determines whether to apply the mix transform based on a probability factor. If applied, it
374
375
  selects additional images, applies pre-transforms if specified, and then performs the mix transform.
@@ -408,7 +409,7 @@ class BaseMixTransform:
408
409
 
409
410
  def _mix_transform(self, labels):
410
411
  """
411
- Applies CutMix, MixUp or Mosaic augmentation to the label dictionary.
412
+ Apply CutMix, MixUp or Mosaic augmentation to the label dictionary.
412
413
 
413
414
  This method should be implemented by subclasses to perform specific mix transformations like CutMix, MixUp or
414
415
  Mosaic. It modifies the input label dictionary in-place with the augmented data.
@@ -429,7 +430,7 @@ class BaseMixTransform:
429
430
 
430
431
  def get_indexes(self):
431
432
  """
432
- Gets a list of shuffled indexes for mosaic augmentation.
433
+ Get a list of shuffled indexes for mosaic augmentation.
433
434
 
434
435
  Returns:
435
436
  (List[int]): A list of shuffled indexes from the dataset.
@@ -444,7 +445,7 @@ class BaseMixTransform:
444
445
  @staticmethod
445
446
  def _update_label_text(labels):
446
447
  """
447
- Updates label text and class IDs for mixed labels in image augmentation.
448
+ Update label text and class IDs for mixed labels in image augmentation.
448
449
 
449
450
  This method processes the 'texts' and 'cls' fields of the input labels dictionary and any mixed labels,
450
451
  creating a unified set of text labels and updating class IDs accordingly.
@@ -502,13 +503,13 @@ class Mosaic(BaseMixTransform):
502
503
  border (Tuple[int, int]): Border size for width and height.
503
504
 
504
505
  Methods:
505
- get_indexes: Returns a list of random indexes from the dataset.
506
- _mix_transform: Applies mixup transformation to the input image and labels.
507
- _mosaic3: Creates a 1x3 image mosaic.
508
- _mosaic4: Creates a 2x2 image mosaic.
509
- _mosaic9: Creates a 3x3 image mosaic.
510
- _update_labels: Updates labels with padding.
511
- _cat_labels: Concatenates labels and clips mosaic border instances.
506
+ get_indexes: Return a list of random indexes from the dataset.
507
+ _mix_transform: Apply mixup transformation to the input image and labels.
508
+ _mosaic3: Create a 1x3 image mosaic.
509
+ _mosaic4: Create a 2x2 image mosaic.
510
+ _mosaic9: Create a 3x3 image mosaic.
511
+ _update_labels: Update labels with padding.
512
+ _cat_labels: Concatenate labels and clips mosaic border instances.
512
513
 
513
514
  Examples:
514
515
  >>> from ultralytics.data.augment import Mosaic
@@ -519,7 +520,7 @@ class Mosaic(BaseMixTransform):
519
520
 
520
521
  def __init__(self, dataset, imgsz=640, p=1.0, n=4):
521
522
  """
522
- Initializes the Mosaic augmentation object.
523
+ Initialize the Mosaic augmentation object.
523
524
 
524
525
  This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image.
525
526
  The augmentation is applied to a dataset with a given probability.
@@ -545,7 +546,7 @@ class Mosaic(BaseMixTransform):
545
546
 
546
547
  def get_indexes(self):
547
548
  """
548
- Returns a list of random indexes from the dataset for mosaic augmentation.
549
+ Return a list of random indexes from the dataset for mosaic augmentation.
549
550
 
550
551
  This method selects random image indexes either from a buffer or from the entire dataset, depending on
551
552
  the 'buffer' parameter. It is used to choose images for creating mosaic augmentations.
@@ -566,7 +567,7 @@ class Mosaic(BaseMixTransform):
566
567
 
567
568
  def _mix_transform(self, labels):
568
569
  """
569
- Applies mosaic augmentation to the input image and labels.
570
+ Apply mosaic augmentation to the input image and labels.
570
571
 
571
572
  This method combines multiple images (3, 4, or 9) into a single mosaic image based on the 'n' attribute.
572
573
  It ensures that rectangular annotations are not present and that there are other images available for
@@ -595,7 +596,7 @@ class Mosaic(BaseMixTransform):
595
596
 
596
597
  def _mosaic3(self, labels):
597
598
  """
598
- Creates a 1x3 image mosaic by combining three images.
599
+ Create a 1x3 image mosaic by combining three images.
599
600
 
600
601
  This method arranges three images in a horizontal layout, with the main image in the center and two
601
602
  additional images on either side. It's part of the Mosaic augmentation technique used in object detection.
@@ -654,7 +655,7 @@ class Mosaic(BaseMixTransform):
654
655
 
655
656
  def _mosaic4(self, labels):
656
657
  """
657
- Creates a 2x2 image mosaic from four input images.
658
+ Create a 2x2 image mosaic from four input images.
658
659
 
659
660
  This method combines four images into a single mosaic image by placing them in a 2x2 grid. It also
660
661
  updates the corresponding labels for each image in the mosaic.
@@ -712,7 +713,7 @@ class Mosaic(BaseMixTransform):
712
713
 
713
714
  def _mosaic9(self, labels):
714
715
  """
715
- Creates a 3x3 image mosaic from the input image and eight additional images.
716
+ Create a 3x3 image mosaic from the input image and eight additional images.
716
717
 
717
718
  This method combines nine images into a single mosaic image. The input image is placed at the center,
718
719
  and eight additional images from the dataset are placed around it in a 3x3 grid pattern.
@@ -785,7 +786,7 @@ class Mosaic(BaseMixTransform):
785
786
  @staticmethod
786
787
  def _update_labels(labels, padw, padh):
787
788
  """
788
- Updates label coordinates with padding values.
789
+ Update label coordinates with padding values.
789
790
 
790
791
  This method adjusts the bounding box coordinates of object instances in the labels by adding padding
791
792
  values. It also denormalizes the coordinates if they were previously normalized.
@@ -811,7 +812,7 @@ class Mosaic(BaseMixTransform):
811
812
 
812
813
  def _cat_labels(self, mosaic_labels):
813
814
  """
814
- Concatenates and processes labels for mosaic augmentation.
815
+ Concatenate and process labels for mosaic augmentation.
815
816
 
816
817
  This method combines labels from multiple images used in mosaic augmentation, clips instances to the
817
818
  mosaic border, and removes zero-area boxes.
@@ -863,7 +864,7 @@ class Mosaic(BaseMixTransform):
863
864
 
864
865
  class MixUp(BaseMixTransform):
865
866
  """
866
- Applies MixUp augmentation to image datasets.
867
+ Apply MixUp augmentation to image datasets.
867
868
 
868
869
  This class implements the MixUp augmentation technique as described in the paper [mixup: Beyond Empirical Risk
869
870
  Minimization](https://arxiv.org/abs/1710.09412). MixUp combines two images and their labels using a random weight.
@@ -874,7 +875,7 @@ class MixUp(BaseMixTransform):
874
875
  p (float): Probability of applying MixUp augmentation.
875
876
 
876
877
  Methods:
877
- _mix_transform: Applies MixUp augmentation to the input labels.
878
+ _mix_transform: Apply MixUp augmentation to the input labels.
878
879
 
879
880
  Examples:
880
881
  >>> from ultralytics.data.augment import MixUp
@@ -885,7 +886,7 @@ class MixUp(BaseMixTransform):
885
886
 
886
887
  def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
887
888
  """
888
- Initializes the MixUp augmentation object.
889
+ Initialize the MixUp augmentation object.
889
890
 
890
891
  MixUp is an image augmentation technique that combines two images by taking a weighted sum of their pixel
891
892
  values and labels. This implementation is designed for use with the Ultralytics YOLO framework.
@@ -904,7 +905,7 @@ class MixUp(BaseMixTransform):
904
905
 
905
906
  def _mix_transform(self, labels):
906
907
  """
907
- Applies MixUp augmentation to the input labels.
908
+ Apply MixUp augmentation to the input labels.
908
909
 
909
910
  This method implements the MixUp augmentation technique as described in the paper
910
911
  "mixup: Beyond Empirical Risk Minimization" (https://arxiv.org/abs/1710.09412).
@@ -929,7 +930,7 @@ class MixUp(BaseMixTransform):
929
930
 
930
931
  class CutMix(BaseMixTransform):
931
932
  """
932
- Applies CutMix augmentation to image datasets as described in the paper https://arxiv.org/abs/1905.04899.
933
+ Apply CutMix augmentation to image datasets as described in the paper https://arxiv.org/abs/1905.04899.
933
934
 
934
935
  CutMix combines two images by replacing a random rectangular region of one image with the corresponding region from another image,
935
936
  and adjusts the labels proportionally to the area of the mixed region.
@@ -938,12 +939,12 @@ class CutMix(BaseMixTransform):
938
939
  dataset (Any): The dataset to which CutMix augmentation will be applied.
939
940
  pre_transform (Callable | None): Optional transform to apply before CutMix.
940
941
  p (float): Probability of applying CutMix augmentation.
941
- beta (float): Beta distribution parameter for sampling the mixing ratio (default=1.0).
942
- num_areas (int): Number of areas to try to cut and mix (default=3).
942
+ beta (float): Beta distribution parameter for sampling the mixing ratio.
943
+ num_areas (int): Number of areas to try to cut and mix.
943
944
 
944
945
  Methods:
945
- _mix_transform: Applies CutMix augmentation to the input labels.
946
- _rand_bbox: Generates random bounding box coordinates for the cut region.
946
+ _mix_transform: Apply CutMix augmentation to the input labels.
947
+ _rand_bbox: Generate random bounding box coordinates for the cut region.
947
948
 
948
949
  Examples:
949
950
  >>> from ultralytics.data.augment import CutMix
@@ -954,14 +955,14 @@ class CutMix(BaseMixTransform):
954
955
 
955
956
  def __init__(self, dataset, pre_transform=None, p=0.0, beta=1.0, num_areas=3) -> None:
956
957
  """
957
- Initializes the CutMix augmentation object.
958
+ Initialize the CutMix augmentation object.
958
959
 
959
960
  Args:
960
961
  dataset (Any): The dataset to which CutMix augmentation will be applied.
961
962
  pre_transform (Callable | None): Optional transform to apply before CutMix.
962
963
  p (float): Probability of applying CutMix augmentation.
963
- beta (float): Beta distribution parameter for sampling the mixing ratio (default=1.0).
964
- num_areas (int): Number of areas to try to cut and mix (default=3).
964
+ beta (float): Beta distribution parameter for sampling the mixing ratio.
965
+ num_areas (int): Number of areas to try to cut and mix.
965
966
  """
966
967
  super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
967
968
  self.beta = beta
@@ -969,7 +970,7 @@ class CutMix(BaseMixTransform):
969
970
 
970
971
  def _rand_bbox(self, width, height):
971
972
  """
972
- Generates random bounding box coordinates for the cut region.
973
+ Generate random bounding box coordinates for the cut region.
973
974
 
974
975
  Args:
975
976
  width (int): Width of the image.
@@ -999,7 +1000,7 @@ class CutMix(BaseMixTransform):
999
1000
 
1000
1001
  def _mix_transform(self, labels):
1001
1002
  """
1002
- Applies CutMix augmentation to the input labels.
1003
+ Apply CutMix augmentation to the input labels.
1003
1004
 
1004
1005
  Args:
1005
1006
  labels (dict): A dictionary containing the original image and label information.
@@ -1021,7 +1022,7 @@ class CutMix(BaseMixTransform):
1021
1022
  return labels
1022
1023
 
1023
1024
  labels2 = labels.pop("mix_labels")[0]
1024
- area = cut_areas[np.random.choice(idx)] # randomle select one
1025
+ area = cut_areas[np.random.choice(idx)] # randomly select one
1025
1026
  ioa2 = bbox_ioa(area[None], labels2["instances"].bboxes).squeeze(0)
1026
1027
  indexes2 = np.nonzero(ioa2 >= (0.01 if len(labels["instances"].segments) else 0.1))[0]
1027
1028
  if len(indexes2) == 0:
@@ -1047,7 +1048,7 @@ class CutMix(BaseMixTransform):
1047
1048
 
1048
1049
  class RandomPerspective:
1049
1050
  """
1050
- Implements random perspective and affine transformations on images and corresponding annotations.
1051
+ Implement random perspective and affine transformations on images and corresponding annotations.
1051
1052
 
1052
1053
  This class applies random rotations, translations, scaling, shearing, and perspective transformations
1053
1054
  to images and their associated bounding boxes, segments, and keypoints. It can be used as part of an
@@ -1063,12 +1064,12 @@ class RandomPerspective:
1063
1064
  pre_transform (Callable | None): Optional transform to apply before the random perspective.
1064
1065
 
1065
1066
  Methods:
1066
- affine_transform: Applies affine transformations to the input image.
1067
- apply_bboxes: Transforms bounding boxes using the affine matrix.
1068
- apply_segments: Transforms segments and generates new bounding boxes.
1069
- apply_keypoints: Transforms keypoints using the affine matrix.
1070
- __call__: Applies the random perspective transformation to images and annotations.
1071
- box_candidates: Filters transformed bounding boxes based on size and aspect ratio.
1067
+ affine_transform: Apply affine transformations to the input image.
1068
+ apply_bboxes: Transform bounding boxes using the affine matrix.
1069
+ apply_segments: Transform segments and generate new bounding boxes.
1070
+ apply_keypoints: Transform keypoints using the affine matrix.
1071
+ __call__: Apply the random perspective transformation to images and annotations.
1072
+ box_candidates: Filter transformed bounding boxes based on size and aspect ratio.
1072
1073
 
1073
1074
  Examples:
1074
1075
  >>> transform = RandomPerspective(degrees=10, translate=0.1, scale=0.1, shear=10)
@@ -1083,7 +1084,7 @@ class RandomPerspective:
1083
1084
  self, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, border=(0, 0), pre_transform=None
1084
1085
  ):
1085
1086
  """
1086
- Initializes RandomPerspective object with transformation parameters.
1087
+ Initialize RandomPerspective object with transformation parameters.
1087
1088
 
1088
1089
  This class implements random perspective and affine transformations on images and corresponding bounding boxes,
1089
1090
  segments, and keypoints. Transformations include rotation, translation, scaling, and shearing.
@@ -1112,7 +1113,7 @@ class RandomPerspective:
1112
1113
 
1113
1114
  def affine_transform(self, img, border):
1114
1115
  """
1115
- Applies a sequence of affine transformations centered around the image center.
1116
+ Apply a sequence of affine transformations centered around the image center.
1116
1117
 
1117
1118
  This function performs a series of geometric transformations on the input image, including
1118
1119
  translation, perspective change, rotation, scaling, and shearing. The transformations are
@@ -1246,7 +1247,7 @@ class RandomPerspective:
1246
1247
 
1247
1248
  def apply_keypoints(self, keypoints, M):
1248
1249
  """
1249
- Applies affine transformation to keypoints.
1250
+ Apply affine transformation to keypoints.
1250
1251
 
1251
1252
  This method transforms the input keypoints using the provided affine transformation matrix. It handles
1252
1253
  perspective rescaling if necessary and updates the visibility of keypoints that fall outside the image
@@ -1280,7 +1281,7 @@ class RandomPerspective:
1280
1281
 
1281
1282
  def __call__(self, labels):
1282
1283
  """
1283
- Applies random perspective and affine transformations to an image and its associated labels.
1284
+ Apply random perspective and affine transformations to an image and its associated labels.
1284
1285
 
1285
1286
  This method performs a series of transformations including rotation, translation, scaling, shearing,
1286
1287
  and perspective distortion on the input image and adjusts the corresponding bounding boxes, segments,
@@ -1398,7 +1399,7 @@ class RandomPerspective:
1398
1399
 
1399
1400
  class RandomHSV:
1400
1401
  """
1401
- Randomly adjusts the Hue, Saturation, and Value (HSV) channels of an image.
1402
+ Randomly adjust the Hue, Saturation, and Value (HSV) channels of an image.
1402
1403
 
1403
1404
  This class applies random HSV augmentation to images within predefined limits set by hgain, sgain, and vgain.
1404
1405
 
@@ -1408,7 +1409,7 @@ class RandomHSV:
1408
1409
  vgain (float): Maximum variation for value. Range is typically [0, 1].
1409
1410
 
1410
1411
  Methods:
1411
- __call__: Applies random HSV augmentation to an image.
1412
+ __call__: Apply random HSV augmentation to an image.
1412
1413
 
1413
1414
  Examples:
1414
1415
  >>> import numpy as np
@@ -1422,7 +1423,7 @@ class RandomHSV:
1422
1423
 
1423
1424
  def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:
1424
1425
  """
1425
- Initializes the RandomHSV object for random HSV (Hue, Saturation, Value) augmentation.
1426
+ Initialize the RandomHSV object for random HSV (Hue, Saturation, Value) augmentation.
1426
1427
 
1427
1428
  This class applies random adjustments to the HSV channels of an image within specified limits.
1428
1429
 
@@ -1441,7 +1442,7 @@ class RandomHSV:
1441
1442
 
1442
1443
  def __call__(self, labels):
1443
1444
  """
1444
- Applies random HSV augmentation to an image within predefined limits.
1445
+ Apply random HSV augmentation to an image within predefined limits.
1445
1446
 
1446
1447
  This method modifies the input image by randomly adjusting its Hue, Saturation, and Value (HSV) channels.
1447
1448
  The adjustments are made within the limits set by hgain, sgain, and vgain during initialization.
@@ -1482,7 +1483,7 @@ class RandomHSV:
1482
1483
 
1483
1484
  class RandomFlip:
1484
1485
  """
1485
- Applies a random horizontal or vertical flip to an image with a given probability.
1486
+ Apply a random horizontal or vertical flip to an image with a given probability.
1486
1487
 
1487
1488
  This class performs random image flipping and updates corresponding instance annotations such as
1488
1489
  bounding boxes and keypoints.
@@ -1493,7 +1494,7 @@ class RandomFlip:
1493
1494
  flip_idx (array-like): Index mapping for flipping keypoints, if applicable.
1494
1495
 
1495
1496
  Methods:
1496
- __call__: Applies the random flip transformation to an image and its annotations.
1497
+ __call__: Apply the random flip transformation to an image and its annotations.
1497
1498
 
1498
1499
  Examples:
1499
1500
  >>> transform = RandomFlip(p=0.5, direction="horizontal")
@@ -1504,7 +1505,7 @@ class RandomFlip:
1504
1505
 
1505
1506
  def __init__(self, p=0.5, direction="horizontal", flip_idx=None) -> None:
1506
1507
  """
1507
- Initializes the RandomFlip class with probability and direction.
1508
+ Initialize the RandomFlip class with probability and direction.
1508
1509
 
1509
1510
  This class applies a random horizontal or vertical flip to an image with a given probability.
1510
1511
  It also updates any instances (bounding boxes, keypoints, etc.) accordingly.
@@ -1530,7 +1531,7 @@ class RandomFlip:
1530
1531
 
1531
1532
  def __call__(self, labels):
1532
1533
  """
1533
- Applies random flip to an image and updates any instances like bounding boxes or keypoints accordingly.
1534
+ Apply random flip to an image and update any instances like bounding boxes or keypoints accordingly.
1534
1535
 
1535
1536
  This method randomly flips the input image either horizontally or vertically based on the initialized
1536
1537
  probability and direction. It also updates the corresponding instances (bounding boxes, keypoints) to
@@ -1634,7 +1635,7 @@ class LetterBox:
1634
1635
 
1635
1636
  def __call__(self, labels=None, image=None):
1636
1637
  """
1637
- Resizes and pads an image for object detection, instance segmentation, or pose estimation tasks.
1638
+ Resize and pad an image for object detection, instance segmentation, or pose estimation tasks.
1638
1639
 
1639
1640
  This method applies letterboxing to the input image, which involves resizing the image while maintaining its
1640
1641
  aspect ratio and adding padding to fit the new shape. It also updates any associated labels accordingly.
@@ -1711,7 +1712,7 @@ class LetterBox:
1711
1712
  @staticmethod
1712
1713
  def _update_labels(labels, ratio, padw, padh):
1713
1714
  """
1714
- Updates labels after applying letterboxing to an image.
1715
+ Update labels after applying letterboxing to an image.
1715
1716
 
1716
1717
  This method modifies the bounding box coordinates of instances in the labels
1717
1718
  to account for resizing and padding applied during letterboxing.
@@ -1753,8 +1754,8 @@ class CopyPaste(BaseMixTransform):
1753
1754
  p (float): Probability of applying Copy-Paste augmentation.
1754
1755
 
1755
1756
  Methods:
1756
- _mix_transform: Applies Copy-Paste augmentation to the input labels.
1757
- __call__: Applies the Copy-Paste transformation to images and annotations.
1757
+ _mix_transform: Apply Copy-Paste augmentation to the input labels.
1758
+ __call__: Apply the Copy-Paste transformation to images and annotations.
1758
1759
 
1759
1760
  Examples:
1760
1761
  >>> from ultralytics.data.augment import CopyPaste
@@ -1764,18 +1765,18 @@ class CopyPaste(BaseMixTransform):
1764
1765
  """
1765
1766
 
1766
1767
  def __init__(self, dataset=None, pre_transform=None, p=0.5, mode="flip") -> None:
1767
- """Initializes CopyPaste object with dataset, pre_transform, and probability of applying MixUp."""
1768
+ """Initialize CopyPaste object with dataset, pre_transform, and probability of applying MixUp."""
1768
1769
  super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
1769
1770
  assert mode in {"flip", "mixup"}, f"Expected `mode` to be `flip` or `mixup`, but got {mode}."
1770
1771
  self.mode = mode
1771
1772
 
1772
1773
  def _mix_transform(self, labels):
1773
- """Applies Copy-Paste augmentation to combine objects from another image into the current image."""
1774
+ """Apply Copy-Paste augmentation to combine objects from another image into the current image."""
1774
1775
  labels2 = labels["mix_labels"][0]
1775
1776
  return self._transform(labels, labels2)
1776
1777
 
1777
1778
  def __call__(self, labels):
1778
- """Applies Copy-Paste augmentation to an image and its labels."""
1779
+ """Apply Copy-Paste augmentation to an image and its labels."""
1779
1780
  if len(labels["instances"].segments) == 0 or self.p == 0:
1780
1781
  return labels
1781
1782
  if self.mode == "flip":
@@ -1802,7 +1803,7 @@ class CopyPaste(BaseMixTransform):
1802
1803
  return labels
1803
1804
 
1804
1805
  def _transform(self, labels1, labels2={}):
1805
- """Applies Copy-Paste augmentation to combine objects from another image into the current image."""
1806
+ """Apply Copy-Paste augmentation to combine objects from another image into the current image."""
1806
1807
  im = labels1["img"]
1807
1808
  cls = labels1["cls"]
1808
1809
  h, w = im.shape[:2]
@@ -1851,7 +1852,7 @@ class Albumentations:
1851
1852
  contains_spatial (bool): Indicates if the transforms include spatial operations.
1852
1853
 
1853
1854
  Methods:
1854
- __call__: Applies the Albumentations transformations to the input labels.
1855
+ __call__: Apply the Albumentations transformations to the input labels.
1855
1856
 
1856
1857
  Examples:
1857
1858
  >>> transform = Albumentations(p=0.5)
@@ -1979,7 +1980,7 @@ class Albumentations:
1979
1980
 
1980
1981
  def __call__(self, labels):
1981
1982
  """
1982
- Applies Albumentations transformations to input labels.
1983
+ Apply Albumentations transformations to input labels.
1983
1984
 
1984
1985
  This method applies a series of image augmentations using the Albumentations library. It can perform both
1985
1986
  spatial and non-spatial transformations on the input image and its corresponding labels.
@@ -2052,9 +2053,9 @@ class Format:
2052
2053
  bgr (float): The probability to return BGR images.
2053
2054
 
2054
2055
  Methods:
2055
- __call__: Formats labels dictionary with image, classes, bounding boxes, and optionally masks and keypoints.
2056
- _format_img: Converts image from Numpy array to PyTorch tensor.
2057
- _format_segments: Converts polygon points to bitmap masks.
2056
+ __call__: Format labels dictionary with image, classes, bounding boxes, and optionally masks and keypoints.
2057
+ _format_img: Convert image from Numpy array to PyTorch tensor.
2058
+ _format_segments: Convert polygon points to bitmap masks.
2058
2059
 
2059
2060
  Examples:
2060
2061
  >>> formatter = Format(bbox_format="xywh", normalize=True, return_mask=True)
@@ -2077,7 +2078,7 @@ class Format:
2077
2078
  bgr=0.0,
2078
2079
  ):
2079
2080
  """
2080
- Initializes the Format class with given parameters for image and instance annotation formatting.
2081
+ Initialize the Format class with given parameters for image and instance annotation formatting.
2081
2082
 
2082
2083
  This class standardizes image and instance annotations for object detection, instance segmentation, and pose
2083
2084
  estimation tasks, preparing them for use in PyTorch DataLoader's `collate_fn`.
@@ -2121,7 +2122,7 @@ class Format:
2121
2122
 
2122
2123
  def __call__(self, labels):
2123
2124
  """
2124
- Formats image annotations for object detection, instance segmentation, and pose estimation tasks.
2125
+ Format image annotations for object detection, instance segmentation, and pose estimation tasks.
2125
2126
 
2126
2127
  This method standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch
2127
2128
  DataLoader. It processes the input labels dictionary, converting annotations to the specified format and
@@ -2188,7 +2189,7 @@ class Format:
2188
2189
 
2189
2190
  def _format_img(self, img):
2190
2191
  """
2191
- Formats an image for YOLO from a Numpy array to a PyTorch tensor.
2192
+ Format an image for YOLO from a Numpy array to a PyTorch tensor.
2192
2193
 
2193
2194
  This function performs the following operations:
2194
2195
  1. Ensures the image has 3 dimensions (adds a channel dimension if needed).
@@ -2219,7 +2220,7 @@ class Format:
2219
2220
 
2220
2221
  def _format_segments(self, instances, cls, w, h):
2221
2222
  """
2222
- Converts polygon segments to bitmap masks.
2223
+ Convert polygon segments to bitmap masks.
2223
2224
 
2224
2225
  Args:
2225
2226
  instances (Instances): Object containing segment information.
@@ -2250,7 +2251,7 @@ class Format:
2250
2251
 
2251
2252
 
2252
2253
  class LoadVisualPrompt:
2253
- """Creates visual prompts from bounding boxes or masks for model input."""
2254
+ """Create visual prompts from bounding boxes or masks for model input."""
2254
2255
 
2255
2256
  def __init__(self, scale_factor=1 / 8):
2256
2257
  """
@@ -2307,8 +2308,8 @@ class LoadVisualPrompt:
2307
2308
  Args:
2308
2309
  category (int | np.ndarray | torch.Tensor): The category labels for the objects.
2309
2310
  shape (tuple): The shape of the image (height, width).
2310
- bboxes (np.ndarray | torch.Tensor, optional): Bounding boxes for the objects, xyxy format. Defaults to None.
2311
- masks (np.ndarray | torch.Tensor, optional): Masks for the objects. Defaults to None.
2311
+ bboxes (np.ndarray | torch.Tensor, optional): Bounding boxes for the objects, xyxy format.
2312
+ masks (np.ndarray | torch.Tensor, optional): Masks for the objects.
2312
2313
 
2313
2314
  Returns:
2314
2315
  (torch.Tensor): A tensor containing the visual masks for each category.
@@ -2344,7 +2345,7 @@ class LoadVisualPrompt:
2344
2345
 
2345
2346
  class RandomLoadText:
2346
2347
  """
2347
- Randomly samples positive and negative texts and updates class indices accordingly.
2348
+ Randomly sample positive and negative texts and update class indices accordingly.
2348
2349
 
2349
2350
  This class is responsible for sampling texts from a given set of class texts, including both positive
2350
2351
  (present in the image) and negative (not present in the image) samples. It updates the class indices
@@ -2358,7 +2359,7 @@ class RandomLoadText:
2358
2359
  padding_value (str): The text used for padding when padding is True.
2359
2360
 
2360
2361
  Methods:
2361
- __call__: Processes the input labels and returns updated classes and texts.
2362
+ __call__: Process the input labels and return updated classes and texts.
2362
2363
 
2363
2364
  Examples:
2364
2365
  >>> loader = RandomLoadText(prompt_format="Object: {}", neg_samples=(5, 10), max_samples=20)
@@ -2377,21 +2378,21 @@ class RandomLoadText:
2377
2378
  padding_value: List[str] = [""],
2378
2379
  ) -> None:
2379
2380
  """
2380
- Initializes the RandomLoadText class for randomly sampling positive and negative texts.
2381
+ Initialize the RandomLoadText class for randomly sampling positive and negative texts.
2381
2382
 
2382
2383
  This class is designed to randomly sample positive texts and negative texts, and update the class
2383
2384
  indices accordingly to the number of samples. It can be used for text-based object detection tasks.
2384
2385
 
2385
2386
  Args:
2386
- prompt_format (str): Format string for the prompt. Default is '{}'. The format string should
2387
+ prompt_format (str): Format string for the prompt. The format string should
2387
2388
  contain a single pair of curly braces {} where the text will be inserted.
2388
2389
  neg_samples (Tuple[int, int]): A range to randomly sample negative texts. The first integer
2389
2390
  specifies the minimum number of negative samples, and the second integer specifies the
2390
- maximum. Default is (80, 80).
2391
- max_samples (int): The maximum number of different text samples in one image. Default is 80.
2391
+ maximum.
2392
+ max_samples (int): The maximum number of different text samples in one image.
2392
2393
  padding (bool): Whether to pad texts to max_samples. If True, the number of texts will always
2393
- be equal to max_samples. Default is False.
2394
- padding_value (str): The padding text to use when padding is True. Default is an empty string.
2394
+ be equal to max_samples.
2395
+ padding_value (str): The padding text to use when padding is True.
2395
2396
 
2396
2397
  Attributes:
2397
2398
  prompt_format (str): The format string for the prompt.
@@ -2417,7 +2418,7 @@ class RandomLoadText:
2417
2418
 
2418
2419
  def __call__(self, labels: dict) -> dict:
2419
2420
  """
2420
- Randomly samples positive and negative texts and updates class indices accordingly.
2421
+ Randomly sample positive and negative texts and update class indices accordingly.
2421
2422
 
2422
2423
  This method samples positive texts based on the existing class labels in the image, and randomly
2423
2424
  selects negative texts from the remaining classes. It then updates the class indices to match the
@@ -2483,7 +2484,7 @@ class RandomLoadText:
2483
2484
 
2484
2485
  def v8_transforms(dataset, imgsz, hyp, stretch=False):
2485
2486
  """
2486
- Applies a series of image transformations for training.
2487
+ Apply a series of image transformations for training.
2487
2488
 
2488
2489
  This function creates a composition of image augmentation techniques to prepare images for YOLO training.
2489
2490
  It includes operations such as mosaic, copy-paste, random perspective, mixup, and various color adjustments.
@@ -2558,7 +2559,7 @@ def classify_transforms(
2558
2559
  crop_fraction=None,
2559
2560
  ):
2560
2561
  """
2561
- Creates a composition of image transforms for classification tasks.
2562
+ Create a composition of image transforms for classification tasks.
2562
2563
 
2563
2564
  This function generates a sequence of torchvision transforms suitable for preprocessing images
2564
2565
  for classification models during evaluation or inference. The transforms include resizing,
@@ -2618,7 +2619,7 @@ def classify_augmentations(
2618
2619
  interpolation="BILINEAR",
2619
2620
  ):
2620
2621
  """
2621
- Creates a composition of image augmentation transforms for classification tasks.
2622
+ Create a composition of image augmentation transforms for classification tasks.
2622
2623
 
2623
2624
  This function generates a set of image transformations suitable for training classification models. It includes
2624
2625
  options for resizing, flipping, color jittering, auto augmentation, and random erasing.
@@ -2719,7 +2720,7 @@ class ClassifyLetterBox:
2719
2720
  stride (int): The stride value, used when 'auto' is True.
2720
2721
 
2721
2722
  Methods:
2722
- __call__: Applies the letterbox transformation to an input image.
2723
+ __call__: Apply the letterbox transformation to an input image.
2723
2724
 
2724
2725
  Examples:
2725
2726
  >>> transform = ClassifyLetterBox(size=(640, 640), auto=False, stride=32)
@@ -2731,7 +2732,7 @@ class ClassifyLetterBox:
2731
2732
 
2732
2733
  def __init__(self, size=(640, 640), auto=False, stride=32):
2733
2734
  """
2734
- Initializes the ClassifyLetterBox object for image preprocessing.
2735
+ Initialize the ClassifyLetterBox object for image preprocessing.
2735
2736
 
2736
2737
  This class is designed to be part of a transformation pipeline for image classification tasks. It resizes and
2737
2738
  pads images to a specified size while maintaining the original aspect ratio.
@@ -2739,8 +2740,8 @@ class ClassifyLetterBox:
2739
2740
  Args:
2740
2741
  size (int | Tuple[int, int]): Target size for the letterboxed image. If an int, a square image of
2741
2742
  (size, size) is created. If a tuple, it should be (height, width).
2742
- auto (bool): If True, automatically calculates the short side based on stride. Default is False.
2743
- stride (int): The stride value, used when 'auto' is True. Default is 32.
2743
+ auto (bool): If True, automatically calculates the short side based on stride.
2744
+ stride (int): The stride value, used when 'auto' is True.
2744
2745
 
2745
2746
  Attributes:
2746
2747
  h (int): Target height of the letterboxed image.
@@ -2762,7 +2763,7 @@ class ClassifyLetterBox:
2762
2763
 
2763
2764
  def __call__(self, im):
2764
2765
  """
2765
- Resizes and pads an image using the letterbox method.
2766
+ Resize and pad an image using the letterbox method.
2766
2767
 
2767
2768
  This method resizes the input image to fit within the specified dimensions while maintaining its aspect ratio,
2768
2769
  then pads the resized image to match the target size.
@@ -2798,7 +2799,7 @@ class ClassifyLetterBox:
2798
2799
  # NOTE: keep this class for backward compatibility
2799
2800
  class CenterCrop:
2800
2801
  """
2801
- Applies center cropping to images for classification tasks.
2802
+ Apply center cropping to images for classification tasks.
2802
2803
 
2803
2804
  This class performs center cropping on input images, resizing them to a specified size while maintaining the aspect
2804
2805
  ratio. It is designed to be part of a transformation pipeline, e.g., T.Compose([CenterCrop(size), ToTensor()]).
@@ -2808,7 +2809,7 @@ class CenterCrop:
2808
2809
  w (int): Target width of the cropped image.
2809
2810
 
2810
2811
  Methods:
2811
- __call__: Applies the center crop transformation to an input image.
2812
+ __call__: Apply the center crop transformation to an input image.
2812
2813
 
2813
2814
  Examples:
2814
2815
  >>> transform = CenterCrop(640)
@@ -2820,7 +2821,7 @@ class CenterCrop:
2820
2821
 
2821
2822
  def __init__(self, size=640):
2822
2823
  """
2823
- Initializes the CenterCrop object for image preprocessing.
2824
+ Initialize the CenterCrop object for image preprocessing.
2824
2825
 
2825
2826
  This class is designed to be part of a transformation pipeline, e.g., T.Compose([CenterCrop(size), ToTensor()]).
2826
2827
  It performs a center crop on input images to a specified size.
@@ -2844,7 +2845,7 @@ class CenterCrop:
2844
2845
 
2845
2846
  def __call__(self, im):
2846
2847
  """
2847
- Applies center cropping to an input image.
2848
+ Apply center cropping to an input image.
2848
2849
 
2849
2850
  This method resizes and crops the center of the image using a letterbox method. It maintains the aspect
2850
2851
  ratio of the original image while fitting it into the specified dimensions.
@@ -2873,7 +2874,7 @@ class CenterCrop:
2873
2874
  # NOTE: keep this class for backward compatibility
2874
2875
  class ToTensor:
2875
2876
  """
2876
- Converts an image from a numpy array to a PyTorch tensor.
2877
+ Convert an image from a numpy array to a PyTorch tensor.
2877
2878
 
2878
2879
  This class is designed to be part of a transformation pipeline, e.g., T.Compose([LetterBox(size), ToTensor()]).
2879
2880
 
@@ -2881,7 +2882,7 @@ class ToTensor:
2881
2882
  half (bool): If True, converts the image to half precision (float16).
2882
2883
 
2883
2884
  Methods:
2884
- __call__: Applies the tensor conversion to an input image.
2885
+ __call__: Apply the tensor conversion to an input image.
2885
2886
 
2886
2887
  Examples:
2887
2888
  >>> transform = ToTensor(half=True)
@@ -2897,14 +2898,14 @@ class ToTensor:
2897
2898
 
2898
2899
  def __init__(self, half=False):
2899
2900
  """
2900
- Initializes the ToTensor object for converting images to PyTorch tensors.
2901
+ Initialize the ToTensor object for converting images to PyTorch tensors.
2901
2902
 
2902
2903
  This class is designed to be used as part of a transformation pipeline for image preprocessing in the
2903
2904
  Ultralytics YOLO framework. It converts numpy arrays or PIL Images to PyTorch tensors, with an option
2904
2905
  for half-precision (float16) conversion.
2905
2906
 
2906
2907
  Args:
2907
- half (bool): If True, converts the tensor to half precision (float16). Default is False.
2908
+ half (bool): If True, converts the tensor to half precision (float16).
2908
2909
 
2909
2910
  Examples:
2910
2911
  >>> transform = ToTensor(half=True)
@@ -2918,7 +2919,7 @@ class ToTensor:
2918
2919
 
2919
2920
  def __call__(self, im):
2920
2921
  """
2921
- Transforms an image from a numpy array to a PyTorch tensor.
2922
+ Transform an image from a numpy array to a PyTorch tensor.
2922
2923
 
2923
2924
  This method converts the input image from a numpy array to a PyTorch tensor, applying optional
2924
2925
  half-precision conversion and normalization. The image is transposed from HWC to CHW format and