dgenerate-ultralytics-headless 8.3.194__py3-none-any.whl → 8.3.195__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/METADATA +1 -2
  2. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/RECORD +97 -96
  3. tests/test_python.py +1 -1
  4. ultralytics/__init__.py +1 -1
  5. ultralytics/cfg/__init__.py +8 -8
  6. ultralytics/data/annotator.py +1 -1
  7. ultralytics/data/augment.py +75 -75
  8. ultralytics/data/base.py +12 -12
  9. ultralytics/data/converter.py +4 -4
  10. ultralytics/data/dataset.py +7 -7
  11. ultralytics/data/loaders.py +15 -15
  12. ultralytics/data/split_dota.py +10 -10
  13. ultralytics/data/utils.py +12 -12
  14. ultralytics/engine/model.py +13 -13
  15. ultralytics/engine/predictor.py +13 -13
  16. ultralytics/engine/results.py +21 -21
  17. ultralytics/hub/google/__init__.py +2 -2
  18. ultralytics/hub/session.py +7 -7
  19. ultralytics/models/fastsam/model.py +5 -5
  20. ultralytics/models/fastsam/predict.py +11 -11
  21. ultralytics/models/nas/model.py +1 -1
  22. ultralytics/models/rtdetr/predict.py +2 -2
  23. ultralytics/models/rtdetr/val.py +4 -4
  24. ultralytics/models/sam/amg.py +6 -6
  25. ultralytics/models/sam/build.py +9 -9
  26. ultralytics/models/sam/model.py +7 -7
  27. ultralytics/models/sam/modules/blocks.py +6 -6
  28. ultralytics/models/sam/modules/decoders.py +1 -1
  29. ultralytics/models/sam/modules/encoders.py +27 -27
  30. ultralytics/models/sam/modules/sam.py +4 -4
  31. ultralytics/models/sam/modules/tiny_encoder.py +18 -18
  32. ultralytics/models/sam/modules/utils.py +8 -8
  33. ultralytics/models/sam/predict.py +63 -63
  34. ultralytics/models/utils/loss.py +22 -22
  35. ultralytics/models/utils/ops.py +8 -8
  36. ultralytics/models/yolo/classify/predict.py +2 -2
  37. ultralytics/models/yolo/classify/train.py +8 -8
  38. ultralytics/models/yolo/classify/val.py +4 -4
  39. ultralytics/models/yolo/detect/predict.py +3 -3
  40. ultralytics/models/yolo/detect/train.py +6 -6
  41. ultralytics/models/yolo/detect/val.py +32 -32
  42. ultralytics/models/yolo/model.py +6 -6
  43. ultralytics/models/yolo/obb/train.py +1 -1
  44. ultralytics/models/yolo/obb/val.py +13 -13
  45. ultralytics/models/yolo/pose/val.py +11 -11
  46. ultralytics/models/yolo/segment/predict.py +4 -4
  47. ultralytics/models/yolo/segment/train.py +1 -1
  48. ultralytics/models/yolo/segment/val.py +14 -14
  49. ultralytics/models/yolo/world/train.py +9 -9
  50. ultralytics/models/yolo/world/train_world.py +1 -1
  51. ultralytics/models/yolo/yoloe/predict.py +4 -4
  52. ultralytics/models/yolo/yoloe/train.py +4 -4
  53. ultralytics/nn/autobackend.py +2 -2
  54. ultralytics/nn/modules/block.py +6 -6
  55. ultralytics/nn/modules/conv.py +2 -2
  56. ultralytics/nn/modules/head.py +4 -4
  57. ultralytics/nn/tasks.py +13 -13
  58. ultralytics/nn/text_model.py +3 -3
  59. ultralytics/solutions/ai_gym.py +2 -2
  60. ultralytics/solutions/analytics.py +3 -3
  61. ultralytics/solutions/config.py +5 -5
  62. ultralytics/solutions/distance_calculation.py +2 -2
  63. ultralytics/solutions/heatmap.py +1 -1
  64. ultralytics/solutions/instance_segmentation.py +4 -4
  65. ultralytics/solutions/object_counter.py +4 -4
  66. ultralytics/solutions/parking_management.py +7 -7
  67. ultralytics/solutions/queue_management.py +3 -3
  68. ultralytics/solutions/region_counter.py +4 -4
  69. ultralytics/solutions/similarity_search.py +2 -2
  70. ultralytics/solutions/solutions.py +48 -48
  71. ultralytics/solutions/streamlit_inference.py +1 -1
  72. ultralytics/solutions/trackzone.py +4 -4
  73. ultralytics/solutions/vision_eye.py +1 -1
  74. ultralytics/trackers/byte_tracker.py +11 -11
  75. ultralytics/trackers/utils/gmc.py +3 -3
  76. ultralytics/trackers/utils/matching.py +5 -5
  77. ultralytics/utils/autodevice.py +2 -2
  78. ultralytics/utils/benchmarks.py +10 -10
  79. ultralytics/utils/callbacks/clearml.py +1 -1
  80. ultralytics/utils/callbacks/comet.py +5 -5
  81. ultralytics/utils/checks.py +5 -5
  82. ultralytics/utils/cpu.py +90 -0
  83. ultralytics/utils/dist.py +1 -1
  84. ultralytics/utils/downloads.py +2 -2
  85. ultralytics/utils/export.py +5 -5
  86. ultralytics/utils/instance.py +2 -2
  87. ultralytics/utils/metrics.py +35 -35
  88. ultralytics/utils/nms.py +4 -4
  89. ultralytics/utils/ops.py +1 -1
  90. ultralytics/utils/patches.py +2 -2
  91. ultralytics/utils/plotting.py +9 -9
  92. ultralytics/utils/torch_utils.py +2 -6
  93. ultralytics/utils/triton.py +5 -5
  94. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/WHEEL +0 -0
  95. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/entry_points.txt +0 -0
  96. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/licenses/LICENSE +0 -0
  97. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/top_level.txt +0 -0
@@ -150,7 +150,7 @@ class Compose:
150
150
  A class for composing multiple image transformations.
151
151
 
152
152
  Attributes:
153
- transforms (List[Callable]): A list of transformation functions to be applied sequentially.
153
+ transforms (list[Callable]): A list of transformation functions to be applied sequentially.
154
154
 
155
155
  Methods:
156
156
  __call__: Apply a series of transformations to input data.
@@ -173,7 +173,7 @@ class Compose:
173
173
  Initialize the Compose object with a list of transforms.
174
174
 
175
175
  Args:
176
- transforms (List[Callable]): A list of callable transform objects to be applied sequentially.
176
+ transforms (list[Callable]): A list of callable transform objects to be applied sequentially.
177
177
 
178
178
  Examples:
179
179
  >>> from ultralytics.data.augment import Compose, RandomHSV, RandomFlip
@@ -238,7 +238,7 @@ class Compose:
238
238
  Retrieve a specific transform or a set of transforms using indexing.
239
239
 
240
240
  Args:
241
- index (int | List[int]): Index or list of indices of the transforms to retrieve.
241
+ index (int | list[int]): Index or list of indices of the transforms to retrieve.
242
242
 
243
243
  Returns:
244
244
  (Compose): A new Compose object containing the selected transform(s).
@@ -260,8 +260,8 @@ class Compose:
260
260
  Set one or more transforms in the composition using indexing.
261
261
 
262
262
  Args:
263
- index (int | List[int]): Index or list of indices to set transforms at.
264
- value (Any | List[Any]): Transform or list of transforms to set at the specified index(es).
263
+ index (int | list[int]): Index or list of indices to set transforms at.
264
+ value (Any | list[Any]): Transform or list of transforms to set at the specified index(es).
265
265
 
266
266
  Raises:
267
267
  AssertionError: If index type is invalid, value type doesn't match index type, or index is out of range.
@@ -376,10 +376,10 @@ class BaseMixTransform:
376
376
  selects additional images, applies pre-transforms if specified, and then performs the mix transform.
377
377
 
378
378
  Args:
379
- labels (Dict[str, Any]): A dictionary containing label data for an image.
379
+ labels (dict[str, Any]): A dictionary containing label data for an image.
380
380
 
381
381
  Returns:
382
- (Dict[str, Any]): The transformed labels dictionary, which may include mixed data from other images.
382
+ (dict[str, Any]): The transformed labels dictionary, which may include mixed data from other images.
383
383
 
384
384
  Examples:
385
385
  >>> transform = BaseMixTransform(dataset, pre_transform=None, p=0.5)
@@ -416,11 +416,11 @@ class BaseMixTransform:
416
416
  Mosaic. It modifies the input label dictionary in-place with the augmented data.
417
417
 
418
418
  Args:
419
- labels (Dict[str, Any]): A dictionary containing image and label data. Expected to have a 'mix_labels' key
419
+ labels (dict[str, Any]): A dictionary containing image and label data. Expected to have a 'mix_labels' key
420
420
  with a list of additional image and label data for mixing.
421
421
 
422
422
  Returns:
423
- (Dict[str, Any]): The modified labels dictionary with augmented data after applying the mix transform.
423
+ (dict[str, Any]): The modified labels dictionary with augmented data after applying the mix transform.
424
424
 
425
425
  Examples:
426
426
  >>> transform = BaseMixTransform(dataset)
@@ -434,7 +434,7 @@ class BaseMixTransform:
434
434
  Get a list of shuffled indexes for mosaic augmentation.
435
435
 
436
436
  Returns:
437
- (List[int]): A list of shuffled indexes from the dataset.
437
+ (list[int]): A list of shuffled indexes from the dataset.
438
438
 
439
439
  Examples:
440
440
  >>> transform = BaseMixTransform(dataset)
@@ -452,11 +452,11 @@ class BaseMixTransform:
452
452
  creating a unified set of text labels and updating class IDs accordingly.
453
453
 
454
454
  Args:
455
- labels (Dict[str, Any]): A dictionary containing label information, including 'texts' and 'cls' fields,
455
+ labels (dict[str, Any]): A dictionary containing label information, including 'texts' and 'cls' fields,
456
456
  and optionally a 'mix_labels' field with additional label dictionaries.
457
457
 
458
458
  Returns:
459
- (Dict[str, Any]): The updated labels dictionary with unified text labels and updated class IDs.
459
+ (dict[str, Any]): The updated labels dictionary with unified text labels and updated class IDs.
460
460
 
461
461
  Examples:
462
462
  >>> labels = {
@@ -501,7 +501,7 @@ class Mosaic(BaseMixTransform):
501
501
  imgsz (int): Image size (height and width) after mosaic pipeline of a single image.
502
502
  p (float): Probability of applying the mosaic augmentation. Must be in the range 0-1.
503
503
  n (int): The grid size, either 4 (for 2x2) or 9 (for 3x3).
504
- border (Tuple[int, int]): Border size for width and height.
504
+ border (tuple[int, int]): Border size for width and height.
505
505
 
506
506
  Methods:
507
507
  get_indexes: Return a list of random indexes from the dataset.
@@ -553,7 +553,7 @@ class Mosaic(BaseMixTransform):
553
553
  the 'buffer' parameter. It is used to choose images for creating mosaic augmentations.
554
554
 
555
555
  Returns:
556
- (List[int]): A list of random image indexes. The length of the list is n-1, where n is the number
556
+ (list[int]): A list of random image indexes. The length of the list is n-1, where n is the number
557
557
  of images used in the mosaic (either 3 or 8, depending on whether n is 4 or 9).
558
558
 
559
559
  Examples:
@@ -575,12 +575,12 @@ class Mosaic(BaseMixTransform):
575
575
  mosaic augmentation.
576
576
 
577
577
  Args:
578
- labels (Dict[str, Any]): A dictionary containing image data and annotations. Expected keys include:
578
+ labels (dict[str, Any]): A dictionary containing image data and annotations. Expected keys include:
579
579
  - 'rect_shape': Should be None as rect and mosaic are mutually exclusive.
580
580
  - 'mix_labels': A list of dictionaries containing data for other images to be used in the mosaic.
581
581
 
582
582
  Returns:
583
- (Dict[str, Any]): A dictionary containing the mosaic-augmented image and updated annotations.
583
+ (dict[str, Any]): A dictionary containing the mosaic-augmented image and updated annotations.
584
584
 
585
585
  Raises:
586
586
  AssertionError: If 'rect_shape' is not None or if 'mix_labels' is empty.
@@ -603,12 +603,12 @@ class Mosaic(BaseMixTransform):
603
603
  additional images on either side. It's part of the Mosaic augmentation technique used in object detection.
604
604
 
605
605
  Args:
606
- labels (Dict[str, Any]): A dictionary containing image and label information for the main (center) image.
606
+ labels (dict[str, Any]): A dictionary containing image and label information for the main (center) image.
607
607
  Must include 'img' key with the image array, and 'mix_labels' key with a list of two
608
608
  dictionaries containing information for the side images.
609
609
 
610
610
  Returns:
611
- (Dict[str, Any]): A dictionary with the mosaic image and updated labels. Keys include:
611
+ (dict[str, Any]): A dictionary with the mosaic image and updated labels. Keys include:
612
612
  - 'img' (np.ndarray): The mosaic image array with shape (H, W, C).
613
613
  - Other keys from the input labels, updated to reflect the new image dimensions.
614
614
 
@@ -662,11 +662,11 @@ class Mosaic(BaseMixTransform):
662
662
  updates the corresponding labels for each image in the mosaic.
663
663
 
664
664
  Args:
665
- labels (Dict[str, Any]): A dictionary containing image data and labels for the base image (index 0) and three
665
+ labels (dict[str, Any]): A dictionary containing image data and labels for the base image (index 0) and three
666
666
  additional images (indices 1-3) in the 'mix_labels' key.
667
667
 
668
668
  Returns:
669
- (Dict[str, Any]): A dictionary containing the mosaic image and updated labels. The 'img' key contains the mosaic
669
+ (dict[str, Any]): A dictionary containing the mosaic image and updated labels. The 'img' key contains the mosaic
670
670
  image as a numpy array, and other keys contain the combined and adjusted labels for all four images.
671
671
 
672
672
  Examples:
@@ -720,15 +720,15 @@ class Mosaic(BaseMixTransform):
720
720
  and eight additional images from the dataset are placed around it in a 3x3 grid pattern.
721
721
 
722
722
  Args:
723
- labels (Dict[str, Any]): A dictionary containing the input image and its associated labels. It should have
723
+ labels (dict[str, Any]): A dictionary containing the input image and its associated labels. It should have
724
724
  the following keys:
725
725
  - 'img' (np.ndarray): The input image.
726
- - 'resized_shape' (Tuple[int, int]): The shape of the resized image (height, width).
727
- - 'mix_labels' (List[Dict]): A list of dictionaries containing information for the additional
726
+ - 'resized_shape' (tuple[int, int]): The shape of the resized image (height, width).
727
+ - 'mix_labels' (list[dict]): A list of dictionaries containing information for the additional
728
728
  eight images, each with the same structure as the input labels.
729
729
 
730
730
  Returns:
731
- (Dict[str, Any]): A dictionary containing the mosaic image and updated labels. It includes the following keys:
731
+ (dict[str, Any]): A dictionary containing the mosaic image and updated labels. It includes the following keys:
732
732
  - 'img' (np.ndarray): The final mosaic image.
733
733
  - Other keys from the input labels, updated to reflect the new mosaic arrangement.
734
734
 
@@ -793,7 +793,7 @@ class Mosaic(BaseMixTransform):
793
793
  values. It also denormalizes the coordinates if they were previously normalized.
794
794
 
795
795
  Args:
796
- labels (Dict[str, Any]): A dictionary containing image and instance information.
796
+ labels (dict[str, Any]): A dictionary containing image and instance information.
797
797
  padw (int): Padding width to be added to the x-coordinates.
798
798
  padh (int): Padding height to be added to the y-coordinates.
799
799
 
@@ -819,17 +819,17 @@ class Mosaic(BaseMixTransform):
819
819
  mosaic border, and removes zero-area boxes.
820
820
 
821
821
  Args:
822
- mosaic_labels (List[Dict[str, Any]]): A list of label dictionaries for each image in the mosaic.
822
+ mosaic_labels (list[dict[str, Any]]): A list of label dictionaries for each image in the mosaic.
823
823
 
824
824
  Returns:
825
- (Dict[str, Any]): A dictionary containing concatenated and processed labels for the mosaic image, including:
825
+ (dict[str, Any]): A dictionary containing concatenated and processed labels for the mosaic image, including:
826
826
  - im_file (str): File path of the first image in the mosaic.
827
- - ori_shape (Tuple[int, int]): Original shape of the first image.
828
- - resized_shape (Tuple[int, int]): Shape of the mosaic image (imgsz * 2, imgsz * 2).
827
+ - ori_shape (tuple[int, int]): Original shape of the first image.
828
+ - resized_shape (tuple[int, int]): Shape of the mosaic image (imgsz * 2, imgsz * 2).
829
829
  - cls (np.ndarray): Concatenated class labels.
830
830
  - instances (Instances): Concatenated instance annotations.
831
- - mosaic_border (Tuple[int, int]): Mosaic border size.
832
- - texts (List[str], optional): Text labels if present in the original labels.
831
+ - mosaic_border (tuple[int, int]): Mosaic border size.
832
+ - texts (list[str], optional): Text labels if present in the original labels.
833
833
 
834
834
  Examples:
835
835
  >>> mosaic = Mosaic(dataset, imgsz=640)
@@ -912,10 +912,10 @@ class MixUp(BaseMixTransform):
912
912
  "mixup: Beyond Empirical Risk Minimization" (https://arxiv.org/abs/1710.09412).
913
913
 
914
914
  Args:
915
- labels (Dict[str, Any]): A dictionary containing the original image and label information.
915
+ labels (dict[str, Any]): A dictionary containing the original image and label information.
916
916
 
917
917
  Returns:
918
- (Dict[str, Any]): A dictionary containing the mixed-up image and combined label information.
918
+ (dict[str, Any]): A dictionary containing the mixed-up image and combined label information.
919
919
 
920
920
  Examples:
921
921
  >>> mixer = MixUp(dataset)
@@ -978,7 +978,7 @@ class CutMix(BaseMixTransform):
978
978
  height (int): Height of the image.
979
979
 
980
980
  Returns:
981
- (Tuple[int]): (x1, y1, x2, y2) coordinates of the bounding box.
981
+ (tuple[int]): (x1, y1, x2, y2) coordinates of the bounding box.
982
982
  """
983
983
  # Sample mixing ratio from Beta distribution
984
984
  lam = np.random.beta(self.beta, self.beta)
@@ -1004,10 +1004,10 @@ class CutMix(BaseMixTransform):
1004
1004
  Apply CutMix augmentation to the input labels.
1005
1005
 
1006
1006
  Args:
1007
- labels (Dict[str, Any]): A dictionary containing the original image and label information.
1007
+ labels (dict[str, Any]): A dictionary containing the original image and label information.
1008
1008
 
1009
1009
  Returns:
1010
- (Dict[str, Any]): A dictionary containing the mixed image and adjusted labels.
1010
+ (dict[str, Any]): A dictionary containing the mixed image and adjusted labels.
1011
1011
 
1012
1012
  Examples:
1013
1013
  >>> cutter = CutMix(dataset)
@@ -1061,7 +1061,7 @@ class RandomPerspective:
1061
1061
  scale (float): Scaling factor range, e.g., scale=0.1 means 0.9-1.1.
1062
1062
  shear (float): Maximum shear angle in degrees.
1063
1063
  perspective (float): Perspective distortion factor.
1064
- border (Tuple[int, int]): Mosaic border size as (x, y).
1064
+ border (tuple[int, int]): Mosaic border size as (x, y).
1065
1065
  pre_transform (Callable | None): Optional transform to apply before the random perspective.
1066
1066
 
1067
1067
  Methods:
@@ -1103,7 +1103,7 @@ class RandomPerspective:
1103
1103
  scale (float): Scaling factor interval, e.g., a scale factor of 0.5 allows a resize between 50%-150%.
1104
1104
  shear (float): Shear intensity (angle in degrees).
1105
1105
  perspective (float): Perspective distortion factor.
1106
- border (Tuple[int, int]): Tuple specifying mosaic border (top/bottom, left/right).
1106
+ border (tuple[int, int]): Tuple specifying mosaic border (top/bottom, left/right).
1107
1107
  pre_transform (Callable | None): Function/transform to apply to the image before starting the random
1108
1108
  transformation.
1109
1109
 
@@ -1129,7 +1129,7 @@ class RandomPerspective:
1129
1129
 
1130
1130
  Args:
1131
1131
  img (np.ndarray): Input image to be transformed.
1132
- border (Tuple[int, int]): Border dimensions for the transformed image.
1132
+ border (tuple[int, int]): Border dimensions for the transformed image.
1133
1133
 
1134
1134
  Returns:
1135
1135
  img (np.ndarray): Transformed image.
@@ -1296,20 +1296,20 @@ class RandomPerspective:
1296
1296
  and keypoints accordingly.
1297
1297
 
1298
1298
  Args:
1299
- labels (Dict[str, Any]): A dictionary containing image data and annotations.
1299
+ labels (dict[str, Any]): A dictionary containing image data and annotations.
1300
1300
  Must include:
1301
1301
  'img' (np.ndarray): The input image.
1302
1302
  'cls' (np.ndarray): Class labels.
1303
1303
  'instances' (Instances): Object instances with bounding boxes, segments, and keypoints.
1304
1304
  May include:
1305
- 'mosaic_border' (Tuple[int, int]): Border size for mosaic augmentation.
1305
+ 'mosaic_border' (tuple[int, int]): Border size for mosaic augmentation.
1306
1306
 
1307
1307
  Returns:
1308
- (Dict[str, Any]): Transformed labels dictionary containing:
1308
+ (dict[str, Any]): Transformed labels dictionary containing:
1309
1309
  - 'img' (np.ndarray): The transformed image.
1310
1310
  - 'cls' (np.ndarray): Updated class labels.
1311
1311
  - 'instances' (Instances): Updated object instances.
1312
- - 'resized_shape' (Tuple[int, int]): New image shape after transformation.
1312
+ - 'resized_shape' (tuple[int, int]): New image shape after transformation.
1313
1313
 
1314
1314
  Examples:
1315
1315
  >>> transform = RandomPerspective()
@@ -1463,11 +1463,11 @@ class RandomHSV:
1463
1463
  The adjustments are made within the limits set by hgain, sgain, and vgain during initialization.
1464
1464
 
1465
1465
  Args:
1466
- labels (Dict[str, Any]): A dictionary containing image data and metadata. Must include an 'img' key with
1466
+ labels (dict[str, Any]): A dictionary containing image data and metadata. Must include an 'img' key with
1467
1467
  the image as a numpy array.
1468
1468
 
1469
1469
  Returns:
1470
- (Dict[str, Any]): A dictionary containing the mixed image and adjusted labels.
1470
+ (dict[str, Any]): A dictionary containing the mixed image and adjusted labels.
1471
1471
 
1472
1472
  Examples:
1473
1473
  >>> hsv_augmenter = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5)
@@ -1527,7 +1527,7 @@ class RandomFlip:
1527
1527
  Args:
1528
1528
  p (float): The probability of applying the flip. Must be between 0 and 1.
1529
1529
  direction (str): The direction to apply the flip. Must be 'horizontal' or 'vertical'.
1530
- flip_idx (List[int] | None): Index mapping for flipping keypoints, if any.
1530
+ flip_idx (list[int] | None): Index mapping for flipping keypoints, if any.
1531
1531
 
1532
1532
  Raises:
1533
1533
  AssertionError: If direction is not 'horizontal' or 'vertical', or if p is not between 0 and 1.
@@ -1552,13 +1552,13 @@ class RandomFlip:
1552
1552
  match the flipped image.
1553
1553
 
1554
1554
  Args:
1555
- labels (Dict[str, Any]): A dictionary containing the following keys:
1555
+ labels (dict[str, Any]): A dictionary containing the following keys:
1556
1556
  'img' (np.ndarray): The image to be flipped.
1557
1557
  'instances' (ultralytics.utils.instance.Instances): An object containing bounding boxes and
1558
1558
  optionally keypoints.
1559
1559
 
1560
1560
  Returns:
1561
- (Dict[str, Any]): The same dictionary with the flipped image and updated instances:
1561
+ (dict[str, Any]): The same dictionary with the flipped image and updated instances:
1562
1562
  'img' (np.ndarray): The flipped image.
1563
1563
  'instances' (ultralytics.utils.instance.Instances): Updated instances matching the flipped image.
1564
1564
 
@@ -1633,7 +1633,7 @@ class LetterBox:
1633
1633
  tasks. It supports various resizing modes including auto-sizing, scale-fill, and letterboxing.
1634
1634
 
1635
1635
  Args:
1636
- new_shape (Tuple[int, int]): Target size (height, width) for the resized image.
1636
+ new_shape (tuple[int, int]): Target size (height, width) for the resized image.
1637
1637
  auto (bool): If True, use minimum rectangle to resize. If False, use new_shape directly.
1638
1638
  scale_fill (bool): If True, stretch the image to new_shape without padding.
1639
1639
  scaleup (bool): If True, allow scaling up. If False, only scale down.
@@ -1643,7 +1643,7 @@ class LetterBox:
1643
1643
  interpolation (int): Interpolation method for resizing. Default is cv2.INTER_LINEAR.
1644
1644
 
1645
1645
  Attributes:
1646
- new_shape (Tuple[int, int]): Target size for the resized image.
1646
+ new_shape (tuple[int, int]): Target size for the resized image.
1647
1647
  auto (bool): Flag for using minimum rectangle resizing.
1648
1648
  scale_fill (bool): Flag for stretching image without padding.
1649
1649
  scaleup (bool): Flag for allowing upscaling.
@@ -1672,11 +1672,11 @@ class LetterBox:
1672
1672
  aspect ratio and adding padding to fit the new shape. It also updates any associated labels accordingly.
1673
1673
 
1674
1674
  Args:
1675
- labels (Dict[str, Any] | None): A dictionary containing image data and associated labels, or empty dict if None.
1675
+ labels (dict[str, Any] | None): A dictionary containing image data and associated labels, or empty dict if None.
1676
1676
  image (np.ndarray | None): The input image as a numpy array. If None, the image is taken from 'labels'.
1677
1677
 
1678
1678
  Returns:
1679
- (Dict[str, Any] | nd.ndarray): If 'labels' is provided, returns an updated dictionary with the resized and padded image,
1679
+ (dict[str, Any] | nd.ndarray): If 'labels' is provided, returns an updated dictionary with the resized and padded image,
1680
1680
  updated labels, and additional metadata. If 'labels' is empty, returns the resized
1681
1681
  and padded image.
1682
1682
 
@@ -1751,13 +1751,13 @@ class LetterBox:
1751
1751
  to account for resizing and padding applied during letterboxing.
1752
1752
 
1753
1753
  Args:
1754
- labels (Dict[str, Any]): A dictionary containing image labels and instances.
1755
- ratio (Tuple[float, float]): Scaling ratios (width, height) applied to the image.
1754
+ labels (dict[str, Any]): A dictionary containing image labels and instances.
1755
+ ratio (tuple[float, float]): Scaling ratios (width, height) applied to the image.
1756
1756
  padw (float): Padding width added to the image.
1757
1757
  padh (float): Padding height added to the image.
1758
1758
 
1759
1759
  Returns:
1760
- (Dict[str, Any]): Updated labels dictionary with modified instance coordinates.
1760
+ (dict[str, Any]): Updated labels dictionary with modified instance coordinates.
1761
1761
 
1762
1762
  Examples:
1763
1763
  >>> letterbox = LetterBox(new_shape=(640, 640))
@@ -2021,13 +2021,13 @@ class Albumentations:
2021
2021
  spatial and non-spatial transformations on the input image and its corresponding labels.
2022
2022
 
2023
2023
  Args:
2024
- labels (Dict[str, Any]): A dictionary containing image data and annotations. Expected keys are:
2024
+ labels (dict[str, Any]): A dictionary containing image data and annotations. Expected keys are:
2025
2025
  - 'img': np.ndarray representing the image
2026
2026
  - 'cls': np.ndarray of class labels
2027
2027
  - 'instances': object containing bounding boxes and other instance information
2028
2028
 
2029
2029
  Returns:
2030
- (Dict[str, Any]): The input dictionary with augmented image and updated annotations.
2030
+ (dict[str, Any]): The input dictionary with augmented image and updated annotations.
2031
2031
 
2032
2032
  Examples:
2033
2033
  >>> transform = Albumentations(p=0.5)
@@ -2164,13 +2164,13 @@ class Format:
2164
2164
  applying normalization if required.
2165
2165
 
2166
2166
  Args:
2167
- labels (Dict[str, Any]): A dictionary containing image and annotation data with the following keys:
2167
+ labels (dict[str, Any]): A dictionary containing image and annotation data with the following keys:
2168
2168
  - 'img': The input image as a numpy array.
2169
2169
  - 'cls': Class labels for instances.
2170
2170
  - 'instances': An Instances object containing bounding boxes, segments, and keypoints.
2171
2171
 
2172
2172
  Returns:
2173
- (Dict[str, Any]): A dictionary with formatted data, including:
2173
+ (dict[str, Any]): A dictionary with formatted data, including:
2174
2174
  - 'img': Formatted image tensor.
2175
2175
  - 'cls': Class label's tensor.
2176
2176
  - 'bboxes': Bounding boxes tensor in the specified format.
@@ -2324,10 +2324,10 @@ class LoadVisualPrompt:
2324
2324
  Process labels to create visual prompts.
2325
2325
 
2326
2326
  Args:
2327
- labels (Dict[str, Any]): Dictionary containing image data and annotations.
2327
+ labels (dict[str, Any]): Dictionary containing image data and annotations.
2328
2328
 
2329
2329
  Returns:
2330
- (Dict[str, Any]): Updated labels with visual prompts added.
2330
+ (dict[str, Any]): Updated labels with visual prompts added.
2331
2331
  """
2332
2332
  imgsz = labels["img"].shape[1:]
2333
2333
  bboxes, masks = None, None
@@ -2352,7 +2352,7 @@ class LoadVisualPrompt:
2352
2352
 
2353
2353
  Args:
2354
2354
  category (int | np.ndarray | torch.Tensor): The category labels for the objects.
2355
- shape (Tuple[int, int]): The shape of the image (height, width).
2355
+ shape (tuple[int, int]): The shape of the image (height, width).
2356
2356
  bboxes (np.ndarray | torch.Tensor, optional): Bounding boxes for the objects, xyxy format.
2357
2357
  masks (np.ndarray | torch.Tensor, optional): Masks for the objects.
2358
2358
 
@@ -2398,7 +2398,7 @@ class RandomLoadText:
2398
2398
 
2399
2399
  Attributes:
2400
2400
  prompt_format (str): Format string for text prompts.
2401
- neg_samples (Tuple[int, int]): Range for randomly sampling negative texts.
2401
+ neg_samples (tuple[int, int]): Range for randomly sampling negative texts.
2402
2402
  max_samples (int): Maximum number of different text samples in one image.
2403
2403
  padding (bool): Whether to pad texts to max_samples.
2404
2404
  padding_value (str): The text used for padding when padding is True.
@@ -2431,7 +2431,7 @@ class RandomLoadText:
2431
2431
  Args:
2432
2432
  prompt_format (str): Format string for the prompt. The format string should
2433
2433
  contain a single pair of curly braces {} where the text will be inserted.
2434
- neg_samples (Tuple[int, int]): A range to randomly sample negative texts. The first integer
2434
+ neg_samples (tuple[int, int]): A range to randomly sample negative texts. The first integer
2435
2435
  specifies the minimum number of negative samples, and the second integer specifies the
2436
2436
  maximum.
2437
2437
  max_samples (int): The maximum number of different text samples in one image.
@@ -2441,7 +2441,7 @@ class RandomLoadText:
2441
2441
 
2442
2442
  Attributes:
2443
2443
  prompt_format (str): The format string for the prompt.
2444
- neg_samples (Tuple[int, int]): The range for sampling negative texts.
2444
+ neg_samples (tuple[int, int]): The range for sampling negative texts.
2445
2445
  max_samples (int): The maximum number of text samples.
2446
2446
  padding (bool): Whether padding is enabled.
2447
2447
  padding_value (str): The value used for padding.
@@ -2470,10 +2470,10 @@ class RandomLoadText:
2470
2470
  new sampled text order.
2471
2471
 
2472
2472
  Args:
2473
- labels (Dict[str, Any]): A dictionary containing image labels and metadata. Must include 'texts' and 'cls' keys.
2473
+ labels (dict[str, Any]): A dictionary containing image labels and metadata. Must include 'texts' and 'cls' keys.
2474
2474
 
2475
2475
  Returns:
2476
- (Dict[str, Any]): Updated labels dictionary with new 'cls' and 'texts' entries.
2476
+ (dict[str, Any]): Updated labels dictionary with new 'cls' and 'texts' entries.
2477
2477
 
2478
2478
  Examples:
2479
2479
  >>> loader = RandomLoadText(prompt_format="A photo of {}", neg_samples=(5, 10), max_samples=20)
@@ -2613,8 +2613,8 @@ def classify_transforms(
2613
2613
  Args:
2614
2614
  size (int | tuple): The target size for the transformed image. If an int, it defines the shortest edge. If a
2615
2615
  tuple, it defines (height, width).
2616
- mean (Tuple[float, float, float]): Mean values for each RGB channel used in normalization.
2617
- std (Tuple[float, float, float]): Standard deviation values for each RGB channel used in normalization.
2616
+ mean (tuple[float, float, float]): Mean values for each RGB channel used in normalization.
2617
+ std (tuple[float, float, float]): Standard deviation values for each RGB channel used in normalization.
2618
2618
  interpolation (str): Interpolation method of either 'NEAREST', 'BILINEAR' or 'BICUBIC'.
2619
2619
  crop_fraction (float): Deprecated, will be removed in a future version.
2620
2620
 
@@ -2671,10 +2671,10 @@ def classify_augmentations(
2671
2671
 
2672
2672
  Args:
2673
2673
  size (int): Target size for the image after transformations.
2674
- mean (Tuple[float, float, float]): Mean values for each RGB channel used in normalization.
2675
- std (Tuple[float, float, float]): Standard deviation values for each RGB channel used in normalization.
2676
- scale (Tuple[float, float] | None): Range of size of the origin size cropped.
2677
- ratio (Tuple[float, float] | None): Range of aspect ratio of the origin aspect ratio cropped.
2674
+ mean (tuple[float, float, float]): Mean values for each RGB channel used in normalization.
2675
+ std (tuple[float, float, float]): Standard deviation values for each RGB channel used in normalization.
2676
+ scale (tuple[float, float] | None): Range of size of the origin size cropped.
2677
+ ratio (tuple[float, float] | None): Range of aspect ratio of the origin aspect ratio cropped.
2678
2678
  hflip (float): Probability of horizontal flip.
2679
2679
  vflip (float): Probability of vertical flip.
2680
2680
  auto_augment (str | None): Auto augmentation policy. Can be 'randaugment', 'augmix', 'autoaugment' or None.
@@ -2783,7 +2783,7 @@ class ClassifyLetterBox:
2783
2783
  pads images to a specified size while maintaining the original aspect ratio.
2784
2784
 
2785
2785
  Args:
2786
- size (int | Tuple[int, int]): Target size for the letterboxed image. If an int, a square image of
2786
+ size (int | tuple[int, int]): Target size for the letterboxed image. If an int, a square image of
2787
2787
  (size, size) is created. If a tuple, it should be (height, width).
2788
2788
  auto (bool): If True, automatically calculates the short side based on stride.
2789
2789
  stride (int): The stride value, used when 'auto' is True.
@@ -2872,7 +2872,7 @@ class CenterCrop:
2872
2872
  It performs a center crop on input images to a specified size.
2873
2873
 
2874
2874
  Args:
2875
- size (int | Tuple[int, int]): The desired output size of the crop. If size is an int, a square crop
2875
+ size (int | tuple[int, int]): The desired output size of the crop. If size is an int, a square crop
2876
2876
  (size, size) is made. If size is a sequence like (h, w), it is used as the output size.
2877
2877
 
2878
2878
  Returns:
ultralytics/data/base.py CHANGED
@@ -36,8 +36,8 @@ class BaseDataset(Dataset):
36
36
  fraction (float): Fraction of dataset to utilize.
37
37
  channels (int): Number of channels in the images (1 for grayscale, 3 for RGB).
38
38
  cv2_flag (int): OpenCV flag for reading images.
39
- im_files (List[str]): List of image file paths.
40
- labels (List[Dict]): List of label data dictionaries.
39
+ im_files (list[str]): List of image file paths.
40
+ labels (list[dict]): List of label data dictionaries.
41
41
  ni (int): Number of images in the dataset.
42
42
  rect (bool): Whether to use rectangular training.
43
43
  batch_size (int): Size of batches.
@@ -48,7 +48,7 @@ class BaseDataset(Dataset):
48
48
  ims (list): List of loaded images.
49
49
  im_hw0 (list): List of original image dimensions (h, w).
50
50
  im_hw (list): List of resized image dimensions (h, w).
51
- npy_files (List[Path]): List of numpy file paths.
51
+ npy_files (list[Path]): List of numpy file paths.
52
52
  cache (str): Cache images to RAM or disk during training.
53
53
  transforms (callable): Image transformation function.
54
54
  batch_shapes (np.ndarray): Batch shapes for rectangular training.
@@ -90,18 +90,18 @@ class BaseDataset(Dataset):
90
90
  Initialize BaseDataset with given configuration and options.
91
91
 
92
92
  Args:
93
- img_path (str | List[str]): Path to the folder containing images or list of image paths.
93
+ img_path (str | list[str]): Path to the folder containing images or list of image paths.
94
94
  imgsz (int): Image size for resizing.
95
95
  cache (bool | str): Cache images to RAM or disk during training.
96
96
  augment (bool): If True, data augmentation is applied.
97
- hyp (Dict[str, Any]): Hyperparameters to apply data augmentation.
97
+ hyp (dict[str, Any]): Hyperparameters to apply data augmentation.
98
98
  prefix (str): Prefix to print in log messages.
99
99
  rect (bool): If True, rectangular training is used.
100
100
  batch_size (int): Size of batches.
101
101
  stride (int): Stride used in the model.
102
102
  pad (float): Padding value.
103
103
  single_cls (bool): If True, single class training is used.
104
- classes (List[int], optional): List of included classes.
104
+ classes (list[int], optional): List of included classes.
105
105
  fraction (float): Fraction of dataset to utilize.
106
106
  channels (int): Number of channels in the images (1 for grayscale, 3 for RGB).
107
107
  """
@@ -152,10 +152,10 @@ class BaseDataset(Dataset):
152
152
  Read image files from the specified path.
153
153
 
154
154
  Args:
155
- img_path (str | List[str]): Path or list of paths to image directories or files.
155
+ img_path (str | list[str]): Path or list of paths to image directories or files.
156
156
 
157
157
  Returns:
158
- (List[str]): List of image file paths.
158
+ (list[str]): List of image file paths.
159
159
 
160
160
  Raises:
161
161
  FileNotFoundError: If no images are found or the path doesn't exist.
@@ -190,7 +190,7 @@ class BaseDataset(Dataset):
190
190
  Update labels to include only specified classes.
191
191
 
192
192
  Args:
193
- include_class (List[int], optional): List of classes to include. If None, all classes are included.
193
+ include_class (list[int], optional): List of classes to include. If None, all classes are included.
194
194
  """
195
195
  include_class_array = np.array(include_class).reshape(1, -1)
196
196
  for i in range(len(self.labels)):
@@ -219,8 +219,8 @@ class BaseDataset(Dataset):
219
219
 
220
220
  Returns:
221
221
  im (np.ndarray): Loaded image as a NumPy array.
222
- hw_original (Tuple[int, int]): Original image dimensions in (height, width) format.
223
- hw_resized (Tuple[int, int]): Resized image dimensions in (height, width) format.
222
+ hw_original (tuple[int, int]): Original image dimensions in (height, width) format.
223
+ hw_resized (tuple[int, int]): Resized image dimensions in (height, width) format.
224
224
 
225
225
  Raises:
226
226
  FileNotFoundError: If the image file is not found.
@@ -388,7 +388,7 @@ class BaseDataset(Dataset):
388
388
  index (int): Index of the image to retrieve.
389
389
 
390
390
  Returns:
391
- (Dict[str, Any]): Label dictionary with image and metadata.
391
+ (dict[str, Any]): Label dictionary with image and metadata.
392
392
  """
393
393
  label = deepcopy(self.labels[index]) # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948
394
394
  label.pop("shape", None) # shape is for rect, remove it
@@ -25,7 +25,7 @@ def coco91_to_coco80_class() -> list[int]:
25
25
  Convert 91-index COCO class IDs to 80-index COCO class IDs.
26
26
 
27
27
  Returns:
28
- (List[int]): A list of 91 class IDs where the index represents the 80-index class ID and the value
28
+ (list[int]): A list of 91 class IDs where the index represents the 80-index class ID and the value
29
29
  is the corresponding 91-index class ID.
30
30
  """
31
31
  return [
@@ -128,7 +128,7 @@ def coco80_to_coco91_class() -> list[int]:
128
128
  Convert 80-index (val2014) to 91-index (paper).
129
129
 
130
130
  Returns:
131
- (List[int]): A list of 80 class IDs where each value is the corresponding 91-index class ID.
131
+ (list[int]): A list of 80 class IDs where each value is the corresponding 91-index class ID.
132
132
 
133
133
  References:
134
134
  https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
@@ -539,11 +539,11 @@ def merge_multi_segment(segments: list[list]):
539
539
  This function connects these coordinates with a thin line to merge all segments into one.
540
540
 
541
541
  Args:
542
- segments (List[List]): Original segmentations in COCO's JSON file.
542
+ segments (list[list]): Original segmentations in COCO's JSON file.
543
543
  Each element is a list of coordinates, like [segmentation1, segmentation2,...].
544
544
 
545
545
  Returns:
546
- s (List[np.ndarray]): A list of connected segments represented as NumPy arrays.
546
+ s (list[np.ndarray]): A list of connected segments represented as NumPy arrays.
547
547
  """
548
548
  s = []
549
549
  segments = [np.array(i).reshape(-1, 2) for i in segments]