dgenerate-ultralytics-headless 8.3.194__py3-none-any.whl → 8.3.195__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/METADATA +1 -2
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/RECORD +97 -96
- tests/test_python.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +8 -8
- ultralytics/data/annotator.py +1 -1
- ultralytics/data/augment.py +75 -75
- ultralytics/data/base.py +12 -12
- ultralytics/data/converter.py +4 -4
- ultralytics/data/dataset.py +7 -7
- ultralytics/data/loaders.py +15 -15
- ultralytics/data/split_dota.py +10 -10
- ultralytics/data/utils.py +12 -12
- ultralytics/engine/model.py +13 -13
- ultralytics/engine/predictor.py +13 -13
- ultralytics/engine/results.py +21 -21
- ultralytics/hub/google/__init__.py +2 -2
- ultralytics/hub/session.py +7 -7
- ultralytics/models/fastsam/model.py +5 -5
- ultralytics/models/fastsam/predict.py +11 -11
- ultralytics/models/nas/model.py +1 -1
- ultralytics/models/rtdetr/predict.py +2 -2
- ultralytics/models/rtdetr/val.py +4 -4
- ultralytics/models/sam/amg.py +6 -6
- ultralytics/models/sam/build.py +9 -9
- ultralytics/models/sam/model.py +7 -7
- ultralytics/models/sam/modules/blocks.py +6 -6
- ultralytics/models/sam/modules/decoders.py +1 -1
- ultralytics/models/sam/modules/encoders.py +27 -27
- ultralytics/models/sam/modules/sam.py +4 -4
- ultralytics/models/sam/modules/tiny_encoder.py +18 -18
- ultralytics/models/sam/modules/utils.py +8 -8
- ultralytics/models/sam/predict.py +63 -63
- ultralytics/models/utils/loss.py +22 -22
- ultralytics/models/utils/ops.py +8 -8
- ultralytics/models/yolo/classify/predict.py +2 -2
- ultralytics/models/yolo/classify/train.py +8 -8
- ultralytics/models/yolo/classify/val.py +4 -4
- ultralytics/models/yolo/detect/predict.py +3 -3
- ultralytics/models/yolo/detect/train.py +6 -6
- ultralytics/models/yolo/detect/val.py +32 -32
- ultralytics/models/yolo/model.py +6 -6
- ultralytics/models/yolo/obb/train.py +1 -1
- ultralytics/models/yolo/obb/val.py +13 -13
- ultralytics/models/yolo/pose/val.py +11 -11
- ultralytics/models/yolo/segment/predict.py +4 -4
- ultralytics/models/yolo/segment/train.py +1 -1
- ultralytics/models/yolo/segment/val.py +14 -14
- ultralytics/models/yolo/world/train.py +9 -9
- ultralytics/models/yolo/world/train_world.py +1 -1
- ultralytics/models/yolo/yoloe/predict.py +4 -4
- ultralytics/models/yolo/yoloe/train.py +4 -4
- ultralytics/nn/autobackend.py +2 -2
- ultralytics/nn/modules/block.py +6 -6
- ultralytics/nn/modules/conv.py +2 -2
- ultralytics/nn/modules/head.py +4 -4
- ultralytics/nn/tasks.py +13 -13
- ultralytics/nn/text_model.py +3 -3
- ultralytics/solutions/ai_gym.py +2 -2
- ultralytics/solutions/analytics.py +3 -3
- ultralytics/solutions/config.py +5 -5
- ultralytics/solutions/distance_calculation.py +2 -2
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +4 -4
- ultralytics/solutions/object_counter.py +4 -4
- ultralytics/solutions/parking_management.py +7 -7
- ultralytics/solutions/queue_management.py +3 -3
- ultralytics/solutions/region_counter.py +4 -4
- ultralytics/solutions/similarity_search.py +2 -2
- ultralytics/solutions/solutions.py +48 -48
- ultralytics/solutions/streamlit_inference.py +1 -1
- ultralytics/solutions/trackzone.py +4 -4
- ultralytics/solutions/vision_eye.py +1 -1
- ultralytics/trackers/byte_tracker.py +11 -11
- ultralytics/trackers/utils/gmc.py +3 -3
- ultralytics/trackers/utils/matching.py +5 -5
- ultralytics/utils/autodevice.py +2 -2
- ultralytics/utils/benchmarks.py +10 -10
- ultralytics/utils/callbacks/clearml.py +1 -1
- ultralytics/utils/callbacks/comet.py +5 -5
- ultralytics/utils/checks.py +5 -5
- ultralytics/utils/cpu.py +90 -0
- ultralytics/utils/dist.py +1 -1
- ultralytics/utils/downloads.py +2 -2
- ultralytics/utils/export.py +5 -5
- ultralytics/utils/instance.py +2 -2
- ultralytics/utils/metrics.py +35 -35
- ultralytics/utils/nms.py +4 -4
- ultralytics/utils/ops.py +1 -1
- ultralytics/utils/patches.py +2 -2
- ultralytics/utils/plotting.py +9 -9
- ultralytics/utils/torch_utils.py +2 -6
- ultralytics/utils/triton.py +5 -5
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/top_level.txt +0 -0
ultralytics/data/augment.py
CHANGED
@@ -150,7 +150,7 @@ class Compose:
|
|
150
150
|
A class for composing multiple image transformations.
|
151
151
|
|
152
152
|
Attributes:
|
153
|
-
transforms (
|
153
|
+
transforms (list[Callable]): A list of transformation functions to be applied sequentially.
|
154
154
|
|
155
155
|
Methods:
|
156
156
|
__call__: Apply a series of transformations to input data.
|
@@ -173,7 +173,7 @@ class Compose:
|
|
173
173
|
Initialize the Compose object with a list of transforms.
|
174
174
|
|
175
175
|
Args:
|
176
|
-
transforms (
|
176
|
+
transforms (list[Callable]): A list of callable transform objects to be applied sequentially.
|
177
177
|
|
178
178
|
Examples:
|
179
179
|
>>> from ultralytics.data.augment import Compose, RandomHSV, RandomFlip
|
@@ -238,7 +238,7 @@ class Compose:
|
|
238
238
|
Retrieve a specific transform or a set of transforms using indexing.
|
239
239
|
|
240
240
|
Args:
|
241
|
-
index (int |
|
241
|
+
index (int | list[int]): Index or list of indices of the transforms to retrieve.
|
242
242
|
|
243
243
|
Returns:
|
244
244
|
(Compose): A new Compose object containing the selected transform(s).
|
@@ -260,8 +260,8 @@ class Compose:
|
|
260
260
|
Set one or more transforms in the composition using indexing.
|
261
261
|
|
262
262
|
Args:
|
263
|
-
index (int |
|
264
|
-
value (Any |
|
263
|
+
index (int | list[int]): Index or list of indices to set transforms at.
|
264
|
+
value (Any | list[Any]): Transform or list of transforms to set at the specified index(es).
|
265
265
|
|
266
266
|
Raises:
|
267
267
|
AssertionError: If index type is invalid, value type doesn't match index type, or index is out of range.
|
@@ -376,10 +376,10 @@ class BaseMixTransform:
|
|
376
376
|
selects additional images, applies pre-transforms if specified, and then performs the mix transform.
|
377
377
|
|
378
378
|
Args:
|
379
|
-
labels (
|
379
|
+
labels (dict[str, Any]): A dictionary containing label data for an image.
|
380
380
|
|
381
381
|
Returns:
|
382
|
-
(
|
382
|
+
(dict[str, Any]): The transformed labels dictionary, which may include mixed data from other images.
|
383
383
|
|
384
384
|
Examples:
|
385
385
|
>>> transform = BaseMixTransform(dataset, pre_transform=None, p=0.5)
|
@@ -416,11 +416,11 @@ class BaseMixTransform:
|
|
416
416
|
Mosaic. It modifies the input label dictionary in-place with the augmented data.
|
417
417
|
|
418
418
|
Args:
|
419
|
-
labels (
|
419
|
+
labels (dict[str, Any]): A dictionary containing image and label data. Expected to have a 'mix_labels' key
|
420
420
|
with a list of additional image and label data for mixing.
|
421
421
|
|
422
422
|
Returns:
|
423
|
-
(
|
423
|
+
(dict[str, Any]): The modified labels dictionary with augmented data after applying the mix transform.
|
424
424
|
|
425
425
|
Examples:
|
426
426
|
>>> transform = BaseMixTransform(dataset)
|
@@ -434,7 +434,7 @@ class BaseMixTransform:
|
|
434
434
|
Get a list of shuffled indexes for mosaic augmentation.
|
435
435
|
|
436
436
|
Returns:
|
437
|
-
(
|
437
|
+
(list[int]): A list of shuffled indexes from the dataset.
|
438
438
|
|
439
439
|
Examples:
|
440
440
|
>>> transform = BaseMixTransform(dataset)
|
@@ -452,11 +452,11 @@ class BaseMixTransform:
|
|
452
452
|
creating a unified set of text labels and updating class IDs accordingly.
|
453
453
|
|
454
454
|
Args:
|
455
|
-
labels (
|
455
|
+
labels (dict[str, Any]): A dictionary containing label information, including 'texts' and 'cls' fields,
|
456
456
|
and optionally a 'mix_labels' field with additional label dictionaries.
|
457
457
|
|
458
458
|
Returns:
|
459
|
-
(
|
459
|
+
(dict[str, Any]): The updated labels dictionary with unified text labels and updated class IDs.
|
460
460
|
|
461
461
|
Examples:
|
462
462
|
>>> labels = {
|
@@ -501,7 +501,7 @@ class Mosaic(BaseMixTransform):
|
|
501
501
|
imgsz (int): Image size (height and width) after mosaic pipeline of a single image.
|
502
502
|
p (float): Probability of applying the mosaic augmentation. Must be in the range 0-1.
|
503
503
|
n (int): The grid size, either 4 (for 2x2) or 9 (for 3x3).
|
504
|
-
border (
|
504
|
+
border (tuple[int, int]): Border size for width and height.
|
505
505
|
|
506
506
|
Methods:
|
507
507
|
get_indexes: Return a list of random indexes from the dataset.
|
@@ -553,7 +553,7 @@ class Mosaic(BaseMixTransform):
|
|
553
553
|
the 'buffer' parameter. It is used to choose images for creating mosaic augmentations.
|
554
554
|
|
555
555
|
Returns:
|
556
|
-
(
|
556
|
+
(list[int]): A list of random image indexes. The length of the list is n-1, where n is the number
|
557
557
|
of images used in the mosaic (either 3 or 8, depending on whether n is 4 or 9).
|
558
558
|
|
559
559
|
Examples:
|
@@ -575,12 +575,12 @@ class Mosaic(BaseMixTransform):
|
|
575
575
|
mosaic augmentation.
|
576
576
|
|
577
577
|
Args:
|
578
|
-
labels (
|
578
|
+
labels (dict[str, Any]): A dictionary containing image data and annotations. Expected keys include:
|
579
579
|
- 'rect_shape': Should be None as rect and mosaic are mutually exclusive.
|
580
580
|
- 'mix_labels': A list of dictionaries containing data for other images to be used in the mosaic.
|
581
581
|
|
582
582
|
Returns:
|
583
|
-
(
|
583
|
+
(dict[str, Any]): A dictionary containing the mosaic-augmented image and updated annotations.
|
584
584
|
|
585
585
|
Raises:
|
586
586
|
AssertionError: If 'rect_shape' is not None or if 'mix_labels' is empty.
|
@@ -603,12 +603,12 @@ class Mosaic(BaseMixTransform):
|
|
603
603
|
additional images on either side. It's part of the Mosaic augmentation technique used in object detection.
|
604
604
|
|
605
605
|
Args:
|
606
|
-
labels (
|
606
|
+
labels (dict[str, Any]): A dictionary containing image and label information for the main (center) image.
|
607
607
|
Must include 'img' key with the image array, and 'mix_labels' key with a list of two
|
608
608
|
dictionaries containing information for the side images.
|
609
609
|
|
610
610
|
Returns:
|
611
|
-
(
|
611
|
+
(dict[str, Any]): A dictionary with the mosaic image and updated labels. Keys include:
|
612
612
|
- 'img' (np.ndarray): The mosaic image array with shape (H, W, C).
|
613
613
|
- Other keys from the input labels, updated to reflect the new image dimensions.
|
614
614
|
|
@@ -662,11 +662,11 @@ class Mosaic(BaseMixTransform):
|
|
662
662
|
updates the corresponding labels for each image in the mosaic.
|
663
663
|
|
664
664
|
Args:
|
665
|
-
labels (
|
665
|
+
labels (dict[str, Any]): A dictionary containing image data and labels for the base image (index 0) and three
|
666
666
|
additional images (indices 1-3) in the 'mix_labels' key.
|
667
667
|
|
668
668
|
Returns:
|
669
|
-
(
|
669
|
+
(dict[str, Any]): A dictionary containing the mosaic image and updated labels. The 'img' key contains the mosaic
|
670
670
|
image as a numpy array, and other keys contain the combined and adjusted labels for all four images.
|
671
671
|
|
672
672
|
Examples:
|
@@ -720,15 +720,15 @@ class Mosaic(BaseMixTransform):
|
|
720
720
|
and eight additional images from the dataset are placed around it in a 3x3 grid pattern.
|
721
721
|
|
722
722
|
Args:
|
723
|
-
labels (
|
723
|
+
labels (dict[str, Any]): A dictionary containing the input image and its associated labels. It should have
|
724
724
|
the following keys:
|
725
725
|
- 'img' (np.ndarray): The input image.
|
726
|
-
- 'resized_shape' (
|
727
|
-
- 'mix_labels' (
|
726
|
+
- 'resized_shape' (tuple[int, int]): The shape of the resized image (height, width).
|
727
|
+
- 'mix_labels' (list[dict]): A list of dictionaries containing information for the additional
|
728
728
|
eight images, each with the same structure as the input labels.
|
729
729
|
|
730
730
|
Returns:
|
731
|
-
(
|
731
|
+
(dict[str, Any]): A dictionary containing the mosaic image and updated labels. It includes the following keys:
|
732
732
|
- 'img' (np.ndarray): The final mosaic image.
|
733
733
|
- Other keys from the input labels, updated to reflect the new mosaic arrangement.
|
734
734
|
|
@@ -793,7 +793,7 @@ class Mosaic(BaseMixTransform):
|
|
793
793
|
values. It also denormalizes the coordinates if they were previously normalized.
|
794
794
|
|
795
795
|
Args:
|
796
|
-
labels (
|
796
|
+
labels (dict[str, Any]): A dictionary containing image and instance information.
|
797
797
|
padw (int): Padding width to be added to the x-coordinates.
|
798
798
|
padh (int): Padding height to be added to the y-coordinates.
|
799
799
|
|
@@ -819,17 +819,17 @@ class Mosaic(BaseMixTransform):
|
|
819
819
|
mosaic border, and removes zero-area boxes.
|
820
820
|
|
821
821
|
Args:
|
822
|
-
mosaic_labels (
|
822
|
+
mosaic_labels (list[dict[str, Any]]): A list of label dictionaries for each image in the mosaic.
|
823
823
|
|
824
824
|
Returns:
|
825
|
-
(
|
825
|
+
(dict[str, Any]): A dictionary containing concatenated and processed labels for the mosaic image, including:
|
826
826
|
- im_file (str): File path of the first image in the mosaic.
|
827
|
-
- ori_shape (
|
828
|
-
- resized_shape (
|
827
|
+
- ori_shape (tuple[int, int]): Original shape of the first image.
|
828
|
+
- resized_shape (tuple[int, int]): Shape of the mosaic image (imgsz * 2, imgsz * 2).
|
829
829
|
- cls (np.ndarray): Concatenated class labels.
|
830
830
|
- instances (Instances): Concatenated instance annotations.
|
831
|
-
- mosaic_border (
|
832
|
-
- texts (
|
831
|
+
- mosaic_border (tuple[int, int]): Mosaic border size.
|
832
|
+
- texts (list[str], optional): Text labels if present in the original labels.
|
833
833
|
|
834
834
|
Examples:
|
835
835
|
>>> mosaic = Mosaic(dataset, imgsz=640)
|
@@ -912,10 +912,10 @@ class MixUp(BaseMixTransform):
|
|
912
912
|
"mixup: Beyond Empirical Risk Minimization" (https://arxiv.org/abs/1710.09412).
|
913
913
|
|
914
914
|
Args:
|
915
|
-
labels (
|
915
|
+
labels (dict[str, Any]): A dictionary containing the original image and label information.
|
916
916
|
|
917
917
|
Returns:
|
918
|
-
(
|
918
|
+
(dict[str, Any]): A dictionary containing the mixed-up image and combined label information.
|
919
919
|
|
920
920
|
Examples:
|
921
921
|
>>> mixer = MixUp(dataset)
|
@@ -978,7 +978,7 @@ class CutMix(BaseMixTransform):
|
|
978
978
|
height (int): Height of the image.
|
979
979
|
|
980
980
|
Returns:
|
981
|
-
(
|
981
|
+
(tuple[int]): (x1, y1, x2, y2) coordinates of the bounding box.
|
982
982
|
"""
|
983
983
|
# Sample mixing ratio from Beta distribution
|
984
984
|
lam = np.random.beta(self.beta, self.beta)
|
@@ -1004,10 +1004,10 @@ class CutMix(BaseMixTransform):
|
|
1004
1004
|
Apply CutMix augmentation to the input labels.
|
1005
1005
|
|
1006
1006
|
Args:
|
1007
|
-
labels (
|
1007
|
+
labels (dict[str, Any]): A dictionary containing the original image and label information.
|
1008
1008
|
|
1009
1009
|
Returns:
|
1010
|
-
(
|
1010
|
+
(dict[str, Any]): A dictionary containing the mixed image and adjusted labels.
|
1011
1011
|
|
1012
1012
|
Examples:
|
1013
1013
|
>>> cutter = CutMix(dataset)
|
@@ -1061,7 +1061,7 @@ class RandomPerspective:
|
|
1061
1061
|
scale (float): Scaling factor range, e.g., scale=0.1 means 0.9-1.1.
|
1062
1062
|
shear (float): Maximum shear angle in degrees.
|
1063
1063
|
perspective (float): Perspective distortion factor.
|
1064
|
-
border (
|
1064
|
+
border (tuple[int, int]): Mosaic border size as (x, y).
|
1065
1065
|
pre_transform (Callable | None): Optional transform to apply before the random perspective.
|
1066
1066
|
|
1067
1067
|
Methods:
|
@@ -1103,7 +1103,7 @@ class RandomPerspective:
|
|
1103
1103
|
scale (float): Scaling factor interval, e.g., a scale factor of 0.5 allows a resize between 50%-150%.
|
1104
1104
|
shear (float): Shear intensity (angle in degrees).
|
1105
1105
|
perspective (float): Perspective distortion factor.
|
1106
|
-
border (
|
1106
|
+
border (tuple[int, int]): Tuple specifying mosaic border (top/bottom, left/right).
|
1107
1107
|
pre_transform (Callable | None): Function/transform to apply to the image before starting the random
|
1108
1108
|
transformation.
|
1109
1109
|
|
@@ -1129,7 +1129,7 @@ class RandomPerspective:
|
|
1129
1129
|
|
1130
1130
|
Args:
|
1131
1131
|
img (np.ndarray): Input image to be transformed.
|
1132
|
-
border (
|
1132
|
+
border (tuple[int, int]): Border dimensions for the transformed image.
|
1133
1133
|
|
1134
1134
|
Returns:
|
1135
1135
|
img (np.ndarray): Transformed image.
|
@@ -1296,20 +1296,20 @@ class RandomPerspective:
|
|
1296
1296
|
and keypoints accordingly.
|
1297
1297
|
|
1298
1298
|
Args:
|
1299
|
-
labels (
|
1299
|
+
labels (dict[str, Any]): A dictionary containing image data and annotations.
|
1300
1300
|
Must include:
|
1301
1301
|
'img' (np.ndarray): The input image.
|
1302
1302
|
'cls' (np.ndarray): Class labels.
|
1303
1303
|
'instances' (Instances): Object instances with bounding boxes, segments, and keypoints.
|
1304
1304
|
May include:
|
1305
|
-
'mosaic_border' (
|
1305
|
+
'mosaic_border' (tuple[int, int]): Border size for mosaic augmentation.
|
1306
1306
|
|
1307
1307
|
Returns:
|
1308
|
-
(
|
1308
|
+
(dict[str, Any]): Transformed labels dictionary containing:
|
1309
1309
|
- 'img' (np.ndarray): The transformed image.
|
1310
1310
|
- 'cls' (np.ndarray): Updated class labels.
|
1311
1311
|
- 'instances' (Instances): Updated object instances.
|
1312
|
-
- 'resized_shape' (
|
1312
|
+
- 'resized_shape' (tuple[int, int]): New image shape after transformation.
|
1313
1313
|
|
1314
1314
|
Examples:
|
1315
1315
|
>>> transform = RandomPerspective()
|
@@ -1463,11 +1463,11 @@ class RandomHSV:
|
|
1463
1463
|
The adjustments are made within the limits set by hgain, sgain, and vgain during initialization.
|
1464
1464
|
|
1465
1465
|
Args:
|
1466
|
-
labels (
|
1466
|
+
labels (dict[str, Any]): A dictionary containing image data and metadata. Must include an 'img' key with
|
1467
1467
|
the image as a numpy array.
|
1468
1468
|
|
1469
1469
|
Returns:
|
1470
|
-
(
|
1470
|
+
(dict[str, Any]): A dictionary containing the mixed image and adjusted labels.
|
1471
1471
|
|
1472
1472
|
Examples:
|
1473
1473
|
>>> hsv_augmenter = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5)
|
@@ -1527,7 +1527,7 @@ class RandomFlip:
|
|
1527
1527
|
Args:
|
1528
1528
|
p (float): The probability of applying the flip. Must be between 0 and 1.
|
1529
1529
|
direction (str): The direction to apply the flip. Must be 'horizontal' or 'vertical'.
|
1530
|
-
flip_idx (
|
1530
|
+
flip_idx (list[int] | None): Index mapping for flipping keypoints, if any.
|
1531
1531
|
|
1532
1532
|
Raises:
|
1533
1533
|
AssertionError: If direction is not 'horizontal' or 'vertical', or if p is not between 0 and 1.
|
@@ -1552,13 +1552,13 @@ class RandomFlip:
|
|
1552
1552
|
match the flipped image.
|
1553
1553
|
|
1554
1554
|
Args:
|
1555
|
-
labels (
|
1555
|
+
labels (dict[str, Any]): A dictionary containing the following keys:
|
1556
1556
|
'img' (np.ndarray): The image to be flipped.
|
1557
1557
|
'instances' (ultralytics.utils.instance.Instances): An object containing bounding boxes and
|
1558
1558
|
optionally keypoints.
|
1559
1559
|
|
1560
1560
|
Returns:
|
1561
|
-
(
|
1561
|
+
(dict[str, Any]): The same dictionary with the flipped image and updated instances:
|
1562
1562
|
'img' (np.ndarray): The flipped image.
|
1563
1563
|
'instances' (ultralytics.utils.instance.Instances): Updated instances matching the flipped image.
|
1564
1564
|
|
@@ -1633,7 +1633,7 @@ class LetterBox:
|
|
1633
1633
|
tasks. It supports various resizing modes including auto-sizing, scale-fill, and letterboxing.
|
1634
1634
|
|
1635
1635
|
Args:
|
1636
|
-
new_shape (
|
1636
|
+
new_shape (tuple[int, int]): Target size (height, width) for the resized image.
|
1637
1637
|
auto (bool): If True, use minimum rectangle to resize. If False, use new_shape directly.
|
1638
1638
|
scale_fill (bool): If True, stretch the image to new_shape without padding.
|
1639
1639
|
scaleup (bool): If True, allow scaling up. If False, only scale down.
|
@@ -1643,7 +1643,7 @@ class LetterBox:
|
|
1643
1643
|
interpolation (int): Interpolation method for resizing. Default is cv2.INTER_LINEAR.
|
1644
1644
|
|
1645
1645
|
Attributes:
|
1646
|
-
new_shape (
|
1646
|
+
new_shape (tuple[int, int]): Target size for the resized image.
|
1647
1647
|
auto (bool): Flag for using minimum rectangle resizing.
|
1648
1648
|
scale_fill (bool): Flag for stretching image without padding.
|
1649
1649
|
scaleup (bool): Flag for allowing upscaling.
|
@@ -1672,11 +1672,11 @@ class LetterBox:
|
|
1672
1672
|
aspect ratio and adding padding to fit the new shape. It also updates any associated labels accordingly.
|
1673
1673
|
|
1674
1674
|
Args:
|
1675
|
-
labels (
|
1675
|
+
labels (dict[str, Any] | None): A dictionary containing image data and associated labels, or empty dict if None.
|
1676
1676
|
image (np.ndarray | None): The input image as a numpy array. If None, the image is taken from 'labels'.
|
1677
1677
|
|
1678
1678
|
Returns:
|
1679
|
-
(
|
1679
|
+
(dict[str, Any] | nd.ndarray): If 'labels' is provided, returns an updated dictionary with the resized and padded image,
|
1680
1680
|
updated labels, and additional metadata. If 'labels' is empty, returns the resized
|
1681
1681
|
and padded image.
|
1682
1682
|
|
@@ -1751,13 +1751,13 @@ class LetterBox:
|
|
1751
1751
|
to account for resizing and padding applied during letterboxing.
|
1752
1752
|
|
1753
1753
|
Args:
|
1754
|
-
labels (
|
1755
|
-
ratio (
|
1754
|
+
labels (dict[str, Any]): A dictionary containing image labels and instances.
|
1755
|
+
ratio (tuple[float, float]): Scaling ratios (width, height) applied to the image.
|
1756
1756
|
padw (float): Padding width added to the image.
|
1757
1757
|
padh (float): Padding height added to the image.
|
1758
1758
|
|
1759
1759
|
Returns:
|
1760
|
-
(
|
1760
|
+
(dict[str, Any]): Updated labels dictionary with modified instance coordinates.
|
1761
1761
|
|
1762
1762
|
Examples:
|
1763
1763
|
>>> letterbox = LetterBox(new_shape=(640, 640))
|
@@ -2021,13 +2021,13 @@ class Albumentations:
|
|
2021
2021
|
spatial and non-spatial transformations on the input image and its corresponding labels.
|
2022
2022
|
|
2023
2023
|
Args:
|
2024
|
-
labels (
|
2024
|
+
labels (dict[str, Any]): A dictionary containing image data and annotations. Expected keys are:
|
2025
2025
|
- 'img': np.ndarray representing the image
|
2026
2026
|
- 'cls': np.ndarray of class labels
|
2027
2027
|
- 'instances': object containing bounding boxes and other instance information
|
2028
2028
|
|
2029
2029
|
Returns:
|
2030
|
-
(
|
2030
|
+
(dict[str, Any]): The input dictionary with augmented image and updated annotations.
|
2031
2031
|
|
2032
2032
|
Examples:
|
2033
2033
|
>>> transform = Albumentations(p=0.5)
|
@@ -2164,13 +2164,13 @@ class Format:
|
|
2164
2164
|
applying normalization if required.
|
2165
2165
|
|
2166
2166
|
Args:
|
2167
|
-
labels (
|
2167
|
+
labels (dict[str, Any]): A dictionary containing image and annotation data with the following keys:
|
2168
2168
|
- 'img': The input image as a numpy array.
|
2169
2169
|
- 'cls': Class labels for instances.
|
2170
2170
|
- 'instances': An Instances object containing bounding boxes, segments, and keypoints.
|
2171
2171
|
|
2172
2172
|
Returns:
|
2173
|
-
(
|
2173
|
+
(dict[str, Any]): A dictionary with formatted data, including:
|
2174
2174
|
- 'img': Formatted image tensor.
|
2175
2175
|
- 'cls': Class label's tensor.
|
2176
2176
|
- 'bboxes': Bounding boxes tensor in the specified format.
|
@@ -2324,10 +2324,10 @@ class LoadVisualPrompt:
|
|
2324
2324
|
Process labels to create visual prompts.
|
2325
2325
|
|
2326
2326
|
Args:
|
2327
|
-
labels (
|
2327
|
+
labels (dict[str, Any]): Dictionary containing image data and annotations.
|
2328
2328
|
|
2329
2329
|
Returns:
|
2330
|
-
(
|
2330
|
+
(dict[str, Any]): Updated labels with visual prompts added.
|
2331
2331
|
"""
|
2332
2332
|
imgsz = labels["img"].shape[1:]
|
2333
2333
|
bboxes, masks = None, None
|
@@ -2352,7 +2352,7 @@ class LoadVisualPrompt:
|
|
2352
2352
|
|
2353
2353
|
Args:
|
2354
2354
|
category (int | np.ndarray | torch.Tensor): The category labels for the objects.
|
2355
|
-
shape (
|
2355
|
+
shape (tuple[int, int]): The shape of the image (height, width).
|
2356
2356
|
bboxes (np.ndarray | torch.Tensor, optional): Bounding boxes for the objects, xyxy format.
|
2357
2357
|
masks (np.ndarray | torch.Tensor, optional): Masks for the objects.
|
2358
2358
|
|
@@ -2398,7 +2398,7 @@ class RandomLoadText:
|
|
2398
2398
|
|
2399
2399
|
Attributes:
|
2400
2400
|
prompt_format (str): Format string for text prompts.
|
2401
|
-
neg_samples (
|
2401
|
+
neg_samples (tuple[int, int]): Range for randomly sampling negative texts.
|
2402
2402
|
max_samples (int): Maximum number of different text samples in one image.
|
2403
2403
|
padding (bool): Whether to pad texts to max_samples.
|
2404
2404
|
padding_value (str): The text used for padding when padding is True.
|
@@ -2431,7 +2431,7 @@ class RandomLoadText:
|
|
2431
2431
|
Args:
|
2432
2432
|
prompt_format (str): Format string for the prompt. The format string should
|
2433
2433
|
contain a single pair of curly braces {} where the text will be inserted.
|
2434
|
-
neg_samples (
|
2434
|
+
neg_samples (tuple[int, int]): A range to randomly sample negative texts. The first integer
|
2435
2435
|
specifies the minimum number of negative samples, and the second integer specifies the
|
2436
2436
|
maximum.
|
2437
2437
|
max_samples (int): The maximum number of different text samples in one image.
|
@@ -2441,7 +2441,7 @@ class RandomLoadText:
|
|
2441
2441
|
|
2442
2442
|
Attributes:
|
2443
2443
|
prompt_format (str): The format string for the prompt.
|
2444
|
-
neg_samples (
|
2444
|
+
neg_samples (tuple[int, int]): The range for sampling negative texts.
|
2445
2445
|
max_samples (int): The maximum number of text samples.
|
2446
2446
|
padding (bool): Whether padding is enabled.
|
2447
2447
|
padding_value (str): The value used for padding.
|
@@ -2470,10 +2470,10 @@ class RandomLoadText:
|
|
2470
2470
|
new sampled text order.
|
2471
2471
|
|
2472
2472
|
Args:
|
2473
|
-
labels (
|
2473
|
+
labels (dict[str, Any]): A dictionary containing image labels and metadata. Must include 'texts' and 'cls' keys.
|
2474
2474
|
|
2475
2475
|
Returns:
|
2476
|
-
(
|
2476
|
+
(dict[str, Any]): Updated labels dictionary with new 'cls' and 'texts' entries.
|
2477
2477
|
|
2478
2478
|
Examples:
|
2479
2479
|
>>> loader = RandomLoadText(prompt_format="A photo of {}", neg_samples=(5, 10), max_samples=20)
|
@@ -2613,8 +2613,8 @@ def classify_transforms(
|
|
2613
2613
|
Args:
|
2614
2614
|
size (int | tuple): The target size for the transformed image. If an int, it defines the shortest edge. If a
|
2615
2615
|
tuple, it defines (height, width).
|
2616
|
-
mean (
|
2617
|
-
std (
|
2616
|
+
mean (tuple[float, float, float]): Mean values for each RGB channel used in normalization.
|
2617
|
+
std (tuple[float, float, float]): Standard deviation values for each RGB channel used in normalization.
|
2618
2618
|
interpolation (str): Interpolation method of either 'NEAREST', 'BILINEAR' or 'BICUBIC'.
|
2619
2619
|
crop_fraction (float): Deprecated, will be removed in a future version.
|
2620
2620
|
|
@@ -2671,10 +2671,10 @@ def classify_augmentations(
|
|
2671
2671
|
|
2672
2672
|
Args:
|
2673
2673
|
size (int): Target size for the image after transformations.
|
2674
|
-
mean (
|
2675
|
-
std (
|
2676
|
-
scale (
|
2677
|
-
ratio (
|
2674
|
+
mean (tuple[float, float, float]): Mean values for each RGB channel used in normalization.
|
2675
|
+
std (tuple[float, float, float]): Standard deviation values for each RGB channel used in normalization.
|
2676
|
+
scale (tuple[float, float] | None): Range of size of the origin size cropped.
|
2677
|
+
ratio (tuple[float, float] | None): Range of aspect ratio of the origin aspect ratio cropped.
|
2678
2678
|
hflip (float): Probability of horizontal flip.
|
2679
2679
|
vflip (float): Probability of vertical flip.
|
2680
2680
|
auto_augment (str | None): Auto augmentation policy. Can be 'randaugment', 'augmix', 'autoaugment' or None.
|
@@ -2783,7 +2783,7 @@ class ClassifyLetterBox:
|
|
2783
2783
|
pads images to a specified size while maintaining the original aspect ratio.
|
2784
2784
|
|
2785
2785
|
Args:
|
2786
|
-
size (int |
|
2786
|
+
size (int | tuple[int, int]): Target size for the letterboxed image. If an int, a square image of
|
2787
2787
|
(size, size) is created. If a tuple, it should be (height, width).
|
2788
2788
|
auto (bool): If True, automatically calculates the short side based on stride.
|
2789
2789
|
stride (int): The stride value, used when 'auto' is True.
|
@@ -2872,7 +2872,7 @@ class CenterCrop:
|
|
2872
2872
|
It performs a center crop on input images to a specified size.
|
2873
2873
|
|
2874
2874
|
Args:
|
2875
|
-
size (int |
|
2875
|
+
size (int | tuple[int, int]): The desired output size of the crop. If size is an int, a square crop
|
2876
2876
|
(size, size) is made. If size is a sequence like (h, w), it is used as the output size.
|
2877
2877
|
|
2878
2878
|
Returns:
|
ultralytics/data/base.py
CHANGED
@@ -36,8 +36,8 @@ class BaseDataset(Dataset):
|
|
36
36
|
fraction (float): Fraction of dataset to utilize.
|
37
37
|
channels (int): Number of channels in the images (1 for grayscale, 3 for RGB).
|
38
38
|
cv2_flag (int): OpenCV flag for reading images.
|
39
|
-
im_files (
|
40
|
-
labels (
|
39
|
+
im_files (list[str]): List of image file paths.
|
40
|
+
labels (list[dict]): List of label data dictionaries.
|
41
41
|
ni (int): Number of images in the dataset.
|
42
42
|
rect (bool): Whether to use rectangular training.
|
43
43
|
batch_size (int): Size of batches.
|
@@ -48,7 +48,7 @@ class BaseDataset(Dataset):
|
|
48
48
|
ims (list): List of loaded images.
|
49
49
|
im_hw0 (list): List of original image dimensions (h, w).
|
50
50
|
im_hw (list): List of resized image dimensions (h, w).
|
51
|
-
npy_files (
|
51
|
+
npy_files (list[Path]): List of numpy file paths.
|
52
52
|
cache (str): Cache images to RAM or disk during training.
|
53
53
|
transforms (callable): Image transformation function.
|
54
54
|
batch_shapes (np.ndarray): Batch shapes for rectangular training.
|
@@ -90,18 +90,18 @@ class BaseDataset(Dataset):
|
|
90
90
|
Initialize BaseDataset with given configuration and options.
|
91
91
|
|
92
92
|
Args:
|
93
|
-
img_path (str |
|
93
|
+
img_path (str | list[str]): Path to the folder containing images or list of image paths.
|
94
94
|
imgsz (int): Image size for resizing.
|
95
95
|
cache (bool | str): Cache images to RAM or disk during training.
|
96
96
|
augment (bool): If True, data augmentation is applied.
|
97
|
-
hyp (
|
97
|
+
hyp (dict[str, Any]): Hyperparameters to apply data augmentation.
|
98
98
|
prefix (str): Prefix to print in log messages.
|
99
99
|
rect (bool): If True, rectangular training is used.
|
100
100
|
batch_size (int): Size of batches.
|
101
101
|
stride (int): Stride used in the model.
|
102
102
|
pad (float): Padding value.
|
103
103
|
single_cls (bool): If True, single class training is used.
|
104
|
-
classes (
|
104
|
+
classes (list[int], optional): List of included classes.
|
105
105
|
fraction (float): Fraction of dataset to utilize.
|
106
106
|
channels (int): Number of channels in the images (1 for grayscale, 3 for RGB).
|
107
107
|
"""
|
@@ -152,10 +152,10 @@ class BaseDataset(Dataset):
|
|
152
152
|
Read image files from the specified path.
|
153
153
|
|
154
154
|
Args:
|
155
|
-
img_path (str |
|
155
|
+
img_path (str | list[str]): Path or list of paths to image directories or files.
|
156
156
|
|
157
157
|
Returns:
|
158
|
-
(
|
158
|
+
(list[str]): List of image file paths.
|
159
159
|
|
160
160
|
Raises:
|
161
161
|
FileNotFoundError: If no images are found or the path doesn't exist.
|
@@ -190,7 +190,7 @@ class BaseDataset(Dataset):
|
|
190
190
|
Update labels to include only specified classes.
|
191
191
|
|
192
192
|
Args:
|
193
|
-
include_class (
|
193
|
+
include_class (list[int], optional): List of classes to include. If None, all classes are included.
|
194
194
|
"""
|
195
195
|
include_class_array = np.array(include_class).reshape(1, -1)
|
196
196
|
for i in range(len(self.labels)):
|
@@ -219,8 +219,8 @@ class BaseDataset(Dataset):
|
|
219
219
|
|
220
220
|
Returns:
|
221
221
|
im (np.ndarray): Loaded image as a NumPy array.
|
222
|
-
hw_original (
|
223
|
-
hw_resized (
|
222
|
+
hw_original (tuple[int, int]): Original image dimensions in (height, width) format.
|
223
|
+
hw_resized (tuple[int, int]): Resized image dimensions in (height, width) format.
|
224
224
|
|
225
225
|
Raises:
|
226
226
|
FileNotFoundError: If the image file is not found.
|
@@ -388,7 +388,7 @@ class BaseDataset(Dataset):
|
|
388
388
|
index (int): Index of the image to retrieve.
|
389
389
|
|
390
390
|
Returns:
|
391
|
-
(
|
391
|
+
(dict[str, Any]): Label dictionary with image and metadata.
|
392
392
|
"""
|
393
393
|
label = deepcopy(self.labels[index]) # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948
|
394
394
|
label.pop("shape", None) # shape is for rect, remove it
|
ultralytics/data/converter.py
CHANGED
@@ -25,7 +25,7 @@ def coco91_to_coco80_class() -> list[int]:
|
|
25
25
|
Convert 91-index COCO class IDs to 80-index COCO class IDs.
|
26
26
|
|
27
27
|
Returns:
|
28
|
-
(
|
28
|
+
(list[int]): A list of 91 class IDs where the index represents the 80-index class ID and the value
|
29
29
|
is the corresponding 91-index class ID.
|
30
30
|
"""
|
31
31
|
return [
|
@@ -128,7 +128,7 @@ def coco80_to_coco91_class() -> list[int]:
|
|
128
128
|
Convert 80-index (val2014) to 91-index (paper).
|
129
129
|
|
130
130
|
Returns:
|
131
|
-
(
|
131
|
+
(list[int]): A list of 80 class IDs where each value is the corresponding 91-index class ID.
|
132
132
|
|
133
133
|
References:
|
134
134
|
https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
|
@@ -539,11 +539,11 @@ def merge_multi_segment(segments: list[list]):
|
|
539
539
|
This function connects these coordinates with a thin line to merge all segments into one.
|
540
540
|
|
541
541
|
Args:
|
542
|
-
segments (
|
542
|
+
segments (list[list]): Original segmentations in COCO's JSON file.
|
543
543
|
Each element is a list of coordinates, like [segmentation1, segmentation2,...].
|
544
544
|
|
545
545
|
Returns:
|
546
|
-
s (
|
546
|
+
s (list[np.ndarray]): A list of connected segments represented as NumPy arrays.
|
547
547
|
"""
|
548
548
|
s = []
|
549
549
|
segments = [np.array(i).reshape(-1, 2) for i in segments]
|