dgenerate-ultralytics-headless 8.3.189__py3-none-any.whl → 8.3.191__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/RECORD +111 -109
- tests/test_cuda.py +6 -5
- tests/test_exports.py +1 -6
- tests/test_python.py +1 -4
- tests/test_solutions.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +16 -14
- ultralytics/cfg/datasets/VisDrone.yaml +4 -4
- ultralytics/data/annotator.py +6 -6
- ultralytics/data/augment.py +53 -51
- ultralytics/data/base.py +15 -13
- ultralytics/data/build.py +7 -4
- ultralytics/data/converter.py +9 -10
- ultralytics/data/dataset.py +24 -22
- ultralytics/data/loaders.py +13 -11
- ultralytics/data/split.py +4 -3
- ultralytics/data/split_dota.py +14 -12
- ultralytics/data/utils.py +31 -25
- ultralytics/engine/exporter.py +7 -4
- ultralytics/engine/model.py +16 -14
- ultralytics/engine/predictor.py +9 -7
- ultralytics/engine/results.py +59 -57
- ultralytics/engine/trainer.py +7 -0
- ultralytics/engine/tuner.py +4 -3
- ultralytics/engine/validator.py +3 -1
- ultralytics/hub/__init__.py +6 -2
- ultralytics/hub/auth.py +2 -2
- ultralytics/hub/google/__init__.py +9 -8
- ultralytics/hub/session.py +11 -11
- ultralytics/hub/utils.py +8 -9
- ultralytics/models/fastsam/model.py +8 -6
- ultralytics/models/nas/model.py +5 -3
- ultralytics/models/rtdetr/train.py +4 -3
- ultralytics/models/rtdetr/val.py +6 -4
- ultralytics/models/sam/amg.py +13 -10
- ultralytics/models/sam/model.py +3 -2
- ultralytics/models/sam/modules/blocks.py +21 -21
- ultralytics/models/sam/modules/decoders.py +11 -11
- ultralytics/models/sam/modules/encoders.py +25 -25
- ultralytics/models/sam/modules/memory_attention.py +9 -8
- ultralytics/models/sam/modules/sam.py +8 -10
- ultralytics/models/sam/modules/tiny_encoder.py +21 -20
- ultralytics/models/sam/modules/transformer.py +6 -5
- ultralytics/models/sam/modules/utils.py +7 -5
- ultralytics/models/sam/predict.py +32 -31
- ultralytics/models/utils/loss.py +29 -27
- ultralytics/models/utils/ops.py +10 -8
- ultralytics/models/yolo/classify/train.py +7 -5
- ultralytics/models/yolo/classify/val.py +10 -8
- ultralytics/models/yolo/detect/predict.py +3 -3
- ultralytics/models/yolo/detect/train.py +8 -6
- ultralytics/models/yolo/detect/val.py +23 -21
- ultralytics/models/yolo/model.py +14 -14
- ultralytics/models/yolo/obb/train.py +5 -3
- ultralytics/models/yolo/obb/val.py +13 -10
- ultralytics/models/yolo/pose/train.py +7 -5
- ultralytics/models/yolo/pose/val.py +11 -9
- ultralytics/models/yolo/segment/train.py +4 -5
- ultralytics/models/yolo/segment/val.py +12 -10
- ultralytics/models/yolo/world/train.py +9 -7
- ultralytics/models/yolo/yoloe/train.py +7 -6
- ultralytics/models/yolo/yoloe/val.py +10 -8
- ultralytics/nn/autobackend.py +40 -52
- ultralytics/nn/modules/__init__.py +3 -3
- ultralytics/nn/modules/block.py +12 -12
- ultralytics/nn/modules/conv.py +4 -3
- ultralytics/nn/modules/head.py +46 -38
- ultralytics/nn/modules/transformer.py +22 -21
- ultralytics/nn/tasks.py +2 -2
- ultralytics/nn/text_model.py +6 -5
- ultralytics/solutions/analytics.py +7 -5
- ultralytics/solutions/config.py +12 -10
- ultralytics/solutions/distance_calculation.py +3 -3
- ultralytics/solutions/heatmap.py +4 -2
- ultralytics/solutions/object_counter.py +5 -3
- ultralytics/solutions/parking_management.py +4 -2
- ultralytics/solutions/region_counter.py +7 -5
- ultralytics/solutions/similarity_search.py +5 -3
- ultralytics/solutions/solutions.py +38 -36
- ultralytics/solutions/streamlit_inference.py +8 -7
- ultralytics/trackers/bot_sort.py +11 -9
- ultralytics/trackers/byte_tracker.py +17 -15
- ultralytics/trackers/utils/gmc.py +4 -3
- ultralytics/utils/__init__.py +27 -77
- ultralytics/utils/autobatch.py +3 -2
- ultralytics/utils/autodevice.py +10 -10
- ultralytics/utils/benchmarks.py +11 -10
- ultralytics/utils/callbacks/comet.py +9 -9
- ultralytics/utils/callbacks/platform.py +2 -1
- ultralytics/utils/checks.py +20 -29
- ultralytics/utils/downloads.py +2 -2
- ultralytics/utils/export.py +12 -11
- ultralytics/utils/files.py +8 -7
- ultralytics/utils/git.py +139 -0
- ultralytics/utils/instance.py +8 -7
- ultralytics/utils/logger.py +7 -6
- ultralytics/utils/loss.py +15 -13
- ultralytics/utils/metrics.py +62 -62
- ultralytics/utils/nms.py +346 -0
- ultralytics/utils/ops.py +83 -251
- ultralytics/utils/patches.py +6 -4
- ultralytics/utils/plotting.py +18 -16
- ultralytics/utils/tal.py +1 -1
- ultralytics/utils/torch_utils.py +4 -2
- ultralytics/utils/tqdm.py +47 -33
- ultralytics/utils/triton.py +3 -2
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/top_level.txt +0 -0
ultralytics/data/augment.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import math
|
4
6
|
import random
|
5
7
|
from copy import deepcopy
|
6
|
-
from typing import Any
|
8
|
+
from typing import Any
|
7
9
|
|
8
10
|
import cv2
|
9
11
|
import numpy as np
|
@@ -231,7 +233,7 @@ class Compose:
|
|
231
233
|
"""
|
232
234
|
self.transforms.insert(index, transform)
|
233
235
|
|
234
|
-
def __getitem__(self, index:
|
236
|
+
def __getitem__(self, index: list | int) -> Compose:
|
235
237
|
"""
|
236
238
|
Retrieve a specific transform or a set of transforms using indexing.
|
237
239
|
|
@@ -253,7 +255,7 @@ class Compose:
|
|
253
255
|
assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}"
|
254
256
|
return Compose([self.transforms[i] for i in index]) if isinstance(index, list) else self.transforms[index]
|
255
257
|
|
256
|
-
def __setitem__(self, index:
|
258
|
+
def __setitem__(self, index: list | int, value: list | int) -> None:
|
257
259
|
"""
|
258
260
|
Set one or more transforms in the composition using indexing.
|
259
261
|
|
@@ -366,7 +368,7 @@ class BaseMixTransform:
|
|
366
368
|
self.pre_transform = pre_transform
|
367
369
|
self.p = p
|
368
370
|
|
369
|
-
def __call__(self, labels:
|
371
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
370
372
|
"""
|
371
373
|
Apply pre-processing transforms and cutmix/mixup/mosaic transforms to labels data.
|
372
374
|
|
@@ -406,7 +408,7 @@ class BaseMixTransform:
|
|
406
408
|
labels.pop("mix_labels", None)
|
407
409
|
return labels
|
408
410
|
|
409
|
-
def _mix_transform(self, labels:
|
411
|
+
def _mix_transform(self, labels: dict[str, Any]):
|
410
412
|
"""
|
411
413
|
Apply CutMix, MixUp or Mosaic augmentation to the label dictionary.
|
412
414
|
|
@@ -442,7 +444,7 @@ class BaseMixTransform:
|
|
442
444
|
return random.randint(0, len(self.dataset) - 1)
|
443
445
|
|
444
446
|
@staticmethod
|
445
|
-
def _update_label_text(labels:
|
447
|
+
def _update_label_text(labels: dict[str, Any]) -> dict[str, Any]:
|
446
448
|
"""
|
447
449
|
Update label text and class IDs for mixed labels in image augmentation.
|
448
450
|
|
@@ -564,7 +566,7 @@ class Mosaic(BaseMixTransform):
|
|
564
566
|
else: # select any images
|
565
567
|
return [random.randint(0, len(self.dataset) - 1) for _ in range(self.n - 1)]
|
566
568
|
|
567
|
-
def _mix_transform(self, labels:
|
569
|
+
def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
|
568
570
|
"""
|
569
571
|
Apply mosaic augmentation to the input image and labels.
|
570
572
|
|
@@ -587,13 +589,13 @@ class Mosaic(BaseMixTransform):
|
|
587
589
|
>>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=4)
|
588
590
|
>>> augmented_data = mosaic._mix_transform(labels)
|
589
591
|
"""
|
590
|
-
assert labels.get("rect_shape"
|
592
|
+
assert labels.get("rect_shape") is None, "rect and mosaic are mutually exclusive."
|
591
593
|
assert len(labels.get("mix_labels", [])), "There are no other images for mosaic augment."
|
592
594
|
return (
|
593
595
|
self._mosaic3(labels) if self.n == 3 else self._mosaic4(labels) if self.n == 4 else self._mosaic9(labels)
|
594
596
|
) # This code is modified for mosaic3 method.
|
595
597
|
|
596
|
-
def _mosaic3(self, labels:
|
598
|
+
def _mosaic3(self, labels: dict[str, Any]) -> dict[str, Any]:
|
597
599
|
"""
|
598
600
|
Create a 1x3 image mosaic by combining three images.
|
599
601
|
|
@@ -652,7 +654,7 @@ class Mosaic(BaseMixTransform):
|
|
652
654
|
final_labels["img"] = img3[-self.border[0] : self.border[0], -self.border[1] : self.border[1]]
|
653
655
|
return final_labels
|
654
656
|
|
655
|
-
def _mosaic4(self, labels:
|
657
|
+
def _mosaic4(self, labels: dict[str, Any]) -> dict[str, Any]:
|
656
658
|
"""
|
657
659
|
Create a 2x2 image mosaic from four input images.
|
658
660
|
|
@@ -710,7 +712,7 @@ class Mosaic(BaseMixTransform):
|
|
710
712
|
final_labels["img"] = img4
|
711
713
|
return final_labels
|
712
714
|
|
713
|
-
def _mosaic9(self, labels:
|
715
|
+
def _mosaic9(self, labels: dict[str, Any]) -> dict[str, Any]:
|
714
716
|
"""
|
715
717
|
Create a 3x3 image mosaic from the input image and eight additional images.
|
716
718
|
|
@@ -783,7 +785,7 @@ class Mosaic(BaseMixTransform):
|
|
783
785
|
return final_labels
|
784
786
|
|
785
787
|
@staticmethod
|
786
|
-
def _update_labels(labels, padw: int, padh: int) ->
|
788
|
+
def _update_labels(labels, padw: int, padh: int) -> dict[str, Any]:
|
787
789
|
"""
|
788
790
|
Update label coordinates with padding values.
|
789
791
|
|
@@ -809,7 +811,7 @@ class Mosaic(BaseMixTransform):
|
|
809
811
|
labels["instances"].add_padding(padw, padh)
|
810
812
|
return labels
|
811
813
|
|
812
|
-
def _cat_labels(self, mosaic_labels:
|
814
|
+
def _cat_labels(self, mosaic_labels: list[dict[str, Any]]) -> dict[str, Any]:
|
813
815
|
"""
|
814
816
|
Concatenate and process labels for mosaic augmentation.
|
815
817
|
|
@@ -836,7 +838,7 @@ class Mosaic(BaseMixTransform):
|
|
836
838
|
>>> print(result.keys())
|
837
839
|
dict_keys(['im_file', 'ori_shape', 'resized_shape', 'cls', 'instances', 'mosaic_border'])
|
838
840
|
"""
|
839
|
-
if
|
841
|
+
if not mosaic_labels:
|
840
842
|
return {}
|
841
843
|
cls = []
|
842
844
|
instances = []
|
@@ -902,7 +904,7 @@ class MixUp(BaseMixTransform):
|
|
902
904
|
"""
|
903
905
|
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
|
904
906
|
|
905
|
-
def _mix_transform(self, labels:
|
907
|
+
def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
|
906
908
|
"""
|
907
909
|
Apply MixUp augmentation to the input labels.
|
908
910
|
|
@@ -967,7 +969,7 @@ class CutMix(BaseMixTransform):
|
|
967
969
|
self.beta = beta
|
968
970
|
self.num_areas = num_areas
|
969
971
|
|
970
|
-
def _rand_bbox(self, width: int, height: int) ->
|
972
|
+
def _rand_bbox(self, width: int, height: int) -> tuple[int, int, int, int]:
|
971
973
|
"""
|
972
974
|
Generate random bounding box coordinates for the cut region.
|
973
975
|
|
@@ -997,7 +999,7 @@ class CutMix(BaseMixTransform):
|
|
997
999
|
|
998
1000
|
return x1, y1, x2, y2
|
999
1001
|
|
1000
|
-
def _mix_transform(self, labels:
|
1002
|
+
def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
|
1001
1003
|
"""
|
1002
1004
|
Apply CutMix augmentation to the input labels.
|
1003
1005
|
|
@@ -1086,7 +1088,7 @@ class RandomPerspective:
|
|
1086
1088
|
scale: float = 0.5,
|
1087
1089
|
shear: float = 0.0,
|
1088
1090
|
perspective: float = 0.0,
|
1089
|
-
border:
|
1091
|
+
border: tuple[int, int] = (0, 0),
|
1090
1092
|
pre_transform=None,
|
1091
1093
|
):
|
1092
1094
|
"""
|
@@ -1117,7 +1119,7 @@ class RandomPerspective:
|
|
1117
1119
|
self.border = border # mosaic border
|
1118
1120
|
self.pre_transform = pre_transform
|
1119
1121
|
|
1120
|
-
def affine_transform(self, img: np.ndarray, border:
|
1122
|
+
def affine_transform(self, img: np.ndarray, border: tuple[int, int]) -> tuple[np.ndarray, np.ndarray, float]:
|
1121
1123
|
"""
|
1122
1124
|
Apply a sequence of affine transformations centered around the image center.
|
1123
1125
|
|
@@ -1215,7 +1217,7 @@ class RandomPerspective:
|
|
1215
1217
|
y = xy[:, [1, 3, 5, 7]]
|
1216
1218
|
return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).T
|
1217
1219
|
|
1218
|
-
def apply_segments(self, segments: np.ndarray, M: np.ndarray) ->
|
1220
|
+
def apply_segments(self, segments: np.ndarray, M: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
1219
1221
|
"""
|
1220
1222
|
Apply affine transformations to segments and generate new bounding boxes.
|
1221
1223
|
|
@@ -1285,7 +1287,7 @@ class RandomPerspective:
|
|
1285
1287
|
visible[out_mask] = 0
|
1286
1288
|
return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3)
|
1287
1289
|
|
1288
|
-
def __call__(self, labels:
|
1290
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
1289
1291
|
"""
|
1290
1292
|
Apply random perspective and affine transformations to an image and its associated labels.
|
1291
1293
|
|
@@ -1453,7 +1455,7 @@ class RandomHSV:
|
|
1453
1455
|
self.sgain = sgain
|
1454
1456
|
self.vgain = vgain
|
1455
1457
|
|
1456
|
-
def __call__(self, labels:
|
1458
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
1457
1459
|
"""
|
1458
1460
|
Apply random HSV augmentation to an image within predefined limits.
|
1459
1461
|
|
@@ -1515,7 +1517,7 @@ class RandomFlip:
|
|
1515
1517
|
>>> flipped_instances = result["instances"]
|
1516
1518
|
"""
|
1517
1519
|
|
1518
|
-
def __init__(self, p: float = 0.5, direction: str = "horizontal", flip_idx:
|
1520
|
+
def __init__(self, p: float = 0.5, direction: str = "horizontal", flip_idx: list[int] = None) -> None:
|
1519
1521
|
"""
|
1520
1522
|
Initialize the RandomFlip class with probability and direction.
|
1521
1523
|
|
@@ -1541,7 +1543,7 @@ class RandomFlip:
|
|
1541
1543
|
self.direction = direction
|
1542
1544
|
self.flip_idx = flip_idx
|
1543
1545
|
|
1544
|
-
def __call__(self, labels:
|
1546
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
1545
1547
|
"""
|
1546
1548
|
Apply random flip to an image and update any instances like bounding boxes or keypoints accordingly.
|
1547
1549
|
|
@@ -1615,7 +1617,7 @@ class LetterBox:
|
|
1615
1617
|
|
1616
1618
|
def __init__(
|
1617
1619
|
self,
|
1618
|
-
new_shape:
|
1620
|
+
new_shape: tuple[int, int] = (640, 640),
|
1619
1621
|
auto: bool = False,
|
1620
1622
|
scale_fill: bool = False,
|
1621
1623
|
scaleup: bool = True,
|
@@ -1662,7 +1664,7 @@ class LetterBox:
|
|
1662
1664
|
self.padding_value = padding_value
|
1663
1665
|
self.interpolation = interpolation
|
1664
1666
|
|
1665
|
-
def __call__(self, labels:
|
1667
|
+
def __call__(self, labels: dict[str, Any] = None, image: np.ndarray = None) -> dict[str, Any] | np.ndarray:
|
1666
1668
|
"""
|
1667
1669
|
Resize and pad an image for object detection, instance segmentation, or pose estimation tasks.
|
1668
1670
|
|
@@ -1741,7 +1743,7 @@ class LetterBox:
|
|
1741
1743
|
return img
|
1742
1744
|
|
1743
1745
|
@staticmethod
|
1744
|
-
def _update_labels(labels:
|
1746
|
+
def _update_labels(labels: dict[str, Any], ratio: tuple[float, float], padw: float, padh: float) -> dict[str, Any]:
|
1745
1747
|
"""
|
1746
1748
|
Update labels after applying letterboxing to an image.
|
1747
1749
|
|
@@ -1801,12 +1803,12 @@ class CopyPaste(BaseMixTransform):
|
|
1801
1803
|
assert mode in {"flip", "mixup"}, f"Expected `mode` to be `flip` or `mixup`, but got {mode}."
|
1802
1804
|
self.mode = mode
|
1803
1805
|
|
1804
|
-
def _mix_transform(self, labels:
|
1806
|
+
def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
|
1805
1807
|
"""Apply Copy-Paste augmentation to combine objects from another image into the current image."""
|
1806
1808
|
labels2 = labels["mix_labels"][0]
|
1807
1809
|
return self._transform(labels, labels2)
|
1808
1810
|
|
1809
|
-
def __call__(self, labels:
|
1811
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
1810
1812
|
"""Apply Copy-Paste augmentation to an image and its labels."""
|
1811
1813
|
if len(labels["instances"].segments) == 0 or self.p == 0:
|
1812
1814
|
return labels
|
@@ -1833,7 +1835,7 @@ class CopyPaste(BaseMixTransform):
|
|
1833
1835
|
labels.pop("mix_labels", None)
|
1834
1836
|
return labels
|
1835
1837
|
|
1836
|
-
def _transform(self, labels1:
|
1838
|
+
def _transform(self, labels1: dict[str, Any], labels2: dict[str, Any] = {}) -> dict[str, Any]:
|
1837
1839
|
"""Apply Copy-Paste augmentation to combine objects from another image into the current image."""
|
1838
1840
|
im = labels1["img"]
|
1839
1841
|
if "mosaic_border" not in labels1:
|
@@ -2011,7 +2013,7 @@ class Albumentations:
|
|
2011
2013
|
except Exception as e:
|
2012
2014
|
LOGGER.info(f"{prefix}{e}")
|
2013
2015
|
|
2014
|
-
def __call__(self, labels:
|
2016
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
2015
2017
|
"""
|
2016
2018
|
Apply Albumentations transformations to input labels.
|
2017
2019
|
|
@@ -2153,7 +2155,7 @@ class Format:
|
|
2153
2155
|
self.batch_idx = batch_idx # keep the batch indexes
|
2154
2156
|
self.bgr = bgr
|
2155
2157
|
|
2156
|
-
def __call__(self, labels:
|
2158
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
2157
2159
|
"""
|
2158
2160
|
Format image annotations for object detection, instance segmentation, and pose estimation tasks.
|
2159
2161
|
|
@@ -2255,7 +2257,7 @@ class Format:
|
|
2255
2257
|
|
2256
2258
|
def _format_segments(
|
2257
2259
|
self, instances: Instances, cls: np.ndarray, w: int, h: int
|
2258
|
-
) ->
|
2260
|
+
) -> tuple[np.ndarray, Instances, np.ndarray]:
|
2259
2261
|
"""
|
2260
2262
|
Convert polygon segments to bitmap masks.
|
2261
2263
|
|
@@ -2317,7 +2319,7 @@ class LoadVisualPrompt:
|
|
2317
2319
|
|
2318
2320
|
return (r >= x1) * (r < x2) * (c >= y1) * (c < y2)
|
2319
2321
|
|
2320
|
-
def __call__(self, labels:
|
2322
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
2321
2323
|
"""
|
2322
2324
|
Process labels to create visual prompts.
|
2323
2325
|
|
@@ -2340,10 +2342,10 @@ class LoadVisualPrompt:
|
|
2340
2342
|
|
2341
2343
|
def get_visuals(
|
2342
2344
|
self,
|
2343
|
-
category:
|
2344
|
-
shape:
|
2345
|
-
bboxes:
|
2346
|
-
masks:
|
2345
|
+
category: int | np.ndarray | torch.Tensor,
|
2346
|
+
shape: tuple[int, int],
|
2347
|
+
bboxes: np.ndarray | torch.Tensor = None,
|
2348
|
+
masks: np.ndarray | torch.Tensor = None,
|
2347
2349
|
) -> torch.Tensor:
|
2348
2350
|
"""
|
2349
2351
|
Generate visual masks based on bounding boxes or masks.
|
@@ -2415,10 +2417,10 @@ class RandomLoadText:
|
|
2415
2417
|
def __init__(
|
2416
2418
|
self,
|
2417
2419
|
prompt_format: str = "{}",
|
2418
|
-
neg_samples:
|
2420
|
+
neg_samples: tuple[int, int] = (80, 80),
|
2419
2421
|
max_samples: int = 80,
|
2420
2422
|
padding: bool = False,
|
2421
|
-
padding_value:
|
2423
|
+
padding_value: list[str] = [""],
|
2422
2424
|
) -> None:
|
2423
2425
|
"""
|
2424
2426
|
Initialize the RandomLoadText class for randomly sampling positive and negative texts.
|
@@ -2459,7 +2461,7 @@ class RandomLoadText:
|
|
2459
2461
|
self.padding = padding
|
2460
2462
|
self.padding_value = padding_value
|
2461
2463
|
|
2462
|
-
def __call__(self, labels:
|
2464
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
2463
2465
|
"""
|
2464
2466
|
Randomly sample positive and negative texts and update class indices accordingly.
|
2465
2467
|
|
@@ -2595,9 +2597,9 @@ def v8_transforms(dataset, imgsz: int, hyp: IterableSimpleNamespace, stretch: bo
|
|
2595
2597
|
|
2596
2598
|
# Classification augmentations -----------------------------------------------------------------------------------------
|
2597
2599
|
def classify_transforms(
|
2598
|
-
size:
|
2599
|
-
mean:
|
2600
|
-
std:
|
2600
|
+
size: tuple[int, int] | int = 224,
|
2601
|
+
mean: tuple[float, float, float] = DEFAULT_MEAN,
|
2602
|
+
std: tuple[float, float, float] = DEFAULT_STD,
|
2601
2603
|
interpolation: str = "BILINEAR",
|
2602
2604
|
crop_fraction: float = None,
|
2603
2605
|
):
|
@@ -2647,10 +2649,10 @@ def classify_transforms(
|
|
2647
2649
|
# Classification training augmentations --------------------------------------------------------------------------------
|
2648
2650
|
def classify_augmentations(
|
2649
2651
|
size: int = 224,
|
2650
|
-
mean:
|
2651
|
-
std:
|
2652
|
-
scale:
|
2653
|
-
ratio:
|
2652
|
+
mean: tuple[float, float, float] = DEFAULT_MEAN,
|
2653
|
+
std: tuple[float, float, float] = DEFAULT_STD,
|
2654
|
+
scale: tuple[float, float] = None,
|
2655
|
+
ratio: tuple[float, float] = None,
|
2654
2656
|
hflip: float = 0.5,
|
2655
2657
|
vflip: float = 0.0,
|
2656
2658
|
auto_augment: str = None,
|
@@ -2773,7 +2775,7 @@ class ClassifyLetterBox:
|
|
2773
2775
|
(640, 640, 3)
|
2774
2776
|
"""
|
2775
2777
|
|
2776
|
-
def __init__(self, size:
|
2778
|
+
def __init__(self, size: int | tuple[int, int] = (640, 640), auto: bool = False, stride: int = 32):
|
2777
2779
|
"""
|
2778
2780
|
Initialize the ClassifyLetterBox object for image preprocessing.
|
2779
2781
|
|
@@ -2862,7 +2864,7 @@ class CenterCrop:
|
|
2862
2864
|
(640, 640, 3)
|
2863
2865
|
"""
|
2864
2866
|
|
2865
|
-
def __init__(self, size:
|
2867
|
+
def __init__(self, size: int | tuple[int, int] = (640, 640)):
|
2866
2868
|
"""
|
2867
2869
|
Initialize the CenterCrop object for image preprocessing.
|
2868
2870
|
|
@@ -2886,7 +2888,7 @@ class CenterCrop:
|
|
2886
2888
|
super().__init__()
|
2887
2889
|
self.h, self.w = (size, size) if isinstance(size, int) else size
|
2888
2890
|
|
2889
|
-
def __call__(self, im:
|
2891
|
+
def __call__(self, im: Image.Image | np.ndarray) -> np.ndarray:
|
2890
2892
|
"""
|
2891
2893
|
Apply center cropping to an input image.
|
2892
2894
|
|
ultralytics/data/base.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import glob
|
4
6
|
import math
|
5
7
|
import os
|
@@ -7,7 +9,7 @@ import random
|
|
7
9
|
from copy import deepcopy
|
8
10
|
from multiprocessing.pool import ThreadPool
|
9
11
|
from pathlib import Path
|
10
|
-
from typing import Any
|
12
|
+
from typing import Any
|
11
13
|
|
12
14
|
import cv2
|
13
15
|
import numpy as np
|
@@ -69,18 +71,18 @@ class BaseDataset(Dataset):
|
|
69
71
|
|
70
72
|
def __init__(
|
71
73
|
self,
|
72
|
-
img_path:
|
74
|
+
img_path: str | list[str],
|
73
75
|
imgsz: int = 640,
|
74
|
-
cache:
|
76
|
+
cache: bool | str = False,
|
75
77
|
augment: bool = True,
|
76
|
-
hyp:
|
78
|
+
hyp: dict[str, Any] = DEFAULT_CFG,
|
77
79
|
prefix: str = "",
|
78
80
|
rect: bool = False,
|
79
81
|
batch_size: int = 16,
|
80
82
|
stride: int = 32,
|
81
83
|
pad: float = 0.5,
|
82
84
|
single_cls: bool = False,
|
83
|
-
classes:
|
85
|
+
classes: list[int] | None = None,
|
84
86
|
fraction: float = 1.0,
|
85
87
|
channels: int = 3,
|
86
88
|
):
|
@@ -145,7 +147,7 @@ class BaseDataset(Dataset):
|
|
145
147
|
# Transforms
|
146
148
|
self.transforms = self.build_transforms(hyp=hyp)
|
147
149
|
|
148
|
-
def get_img_files(self, img_path:
|
150
|
+
def get_img_files(self, img_path: str | list[str]) -> list[str]:
|
149
151
|
"""
|
150
152
|
Read image files from the specified path.
|
151
153
|
|
@@ -183,7 +185,7 @@ class BaseDataset(Dataset):
|
|
183
185
|
check_file_speeds(im_files, prefix=self.prefix) # check image read speeds
|
184
186
|
return im_files
|
185
187
|
|
186
|
-
def update_labels(self, include_class:
|
188
|
+
def update_labels(self, include_class: list[int] | None) -> None:
|
187
189
|
"""
|
188
190
|
Update labels to include only specified classes.
|
189
191
|
|
@@ -207,7 +209,7 @@ class BaseDataset(Dataset):
|
|
207
209
|
if self.single_cls:
|
208
210
|
self.labels[i]["cls"][:, 0] = 0
|
209
211
|
|
210
|
-
def load_image(self, i: int, rect_mode: bool = True) ->
|
212
|
+
def load_image(self, i: int, rect_mode: bool = True) -> tuple[np.ndarray, tuple[int, int], tuple[int, int]]:
|
211
213
|
"""
|
212
214
|
Load an image from dataset index 'i'.
|
213
215
|
|
@@ -374,11 +376,11 @@ class BaseDataset(Dataset):
|
|
374
376
|
self.batch_shapes = np.ceil(np.array(shapes) * self.imgsz / self.stride + self.pad).astype(int) * self.stride
|
375
377
|
self.batch = bi # batch index of image
|
376
378
|
|
377
|
-
def __getitem__(self, index: int) ->
|
379
|
+
def __getitem__(self, index: int) -> dict[str, Any]:
|
378
380
|
"""Return transformed label information for given index."""
|
379
381
|
return self.transforms(self.get_image_and_label(index))
|
380
382
|
|
381
|
-
def get_image_and_label(self, index: int) ->
|
383
|
+
def get_image_and_label(self, index: int) -> dict[str, Any]:
|
382
384
|
"""
|
383
385
|
Get and return label information from the dataset.
|
384
386
|
|
@@ -403,11 +405,11 @@ class BaseDataset(Dataset):
|
|
403
405
|
"""Return the length of the labels list for the dataset."""
|
404
406
|
return len(self.labels)
|
405
407
|
|
406
|
-
def update_labels_info(self, label:
|
408
|
+
def update_labels_info(self, label: dict[str, Any]) -> dict[str, Any]:
|
407
409
|
"""Custom your label format here."""
|
408
410
|
return label
|
409
411
|
|
410
|
-
def build_transforms(self, hyp:
|
412
|
+
def build_transforms(self, hyp: dict[str, Any] | None = None):
|
411
413
|
"""
|
412
414
|
Users can customize augmentations here.
|
413
415
|
|
@@ -421,7 +423,7 @@ class BaseDataset(Dataset):
|
|
421
423
|
"""
|
422
424
|
raise NotImplementedError
|
423
425
|
|
424
|
-
def get_labels(self) ->
|
426
|
+
def get_labels(self) -> list[dict[str, Any]]:
|
425
427
|
"""
|
426
428
|
Users can customize their own format here.
|
427
429
|
|
ultralytics/data/build.py
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import os
|
4
6
|
import random
|
7
|
+
from collections.abc import Iterator
|
5
8
|
from pathlib import Path
|
6
|
-
from typing import Any
|
9
|
+
from typing import Any
|
7
10
|
|
8
11
|
import numpy as np
|
9
12
|
import torch
|
@@ -116,7 +119,7 @@ def build_yolo_dataset(
|
|
116
119
|
cfg: IterableSimpleNamespace,
|
117
120
|
img_path: str,
|
118
121
|
batch: int,
|
119
|
-
data:
|
122
|
+
data: dict[str, Any],
|
120
123
|
mode: str = "train",
|
121
124
|
rect: bool = False,
|
122
125
|
stride: int = 32,
|
@@ -133,7 +136,7 @@ def build_yolo_dataset(
|
|
133
136
|
rect=cfg.rect or rect, # rectangular batches
|
134
137
|
cache=cfg.cache or None,
|
135
138
|
single_cls=cfg.single_cls or False,
|
136
|
-
stride=
|
139
|
+
stride=stride,
|
137
140
|
pad=0.0 if mode == "train" else 0.5,
|
138
141
|
prefix=colorstr(f"{mode}: "),
|
139
142
|
task=cfg.task,
|
@@ -165,7 +168,7 @@ def build_grounding(
|
|
165
168
|
rect=cfg.rect or rect, # rectangular batches
|
166
169
|
cache=cfg.cache or None,
|
167
170
|
single_cls=cfg.single_cls or False,
|
168
|
-
stride=
|
171
|
+
stride=stride,
|
169
172
|
pad=0.0 if mode == "train" else 0.5,
|
170
173
|
prefix=colorstr(f"{mode}: "),
|
171
174
|
task=cfg.task,
|
ultralytics/data/converter.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import asyncio
|
4
6
|
import json
|
5
7
|
import random
|
@@ -7,7 +9,6 @@ import shutil
|
|
7
9
|
from collections import defaultdict
|
8
10
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
9
11
|
from pathlib import Path
|
10
|
-
from typing import List, Optional, Union
|
11
12
|
|
12
13
|
import cv2
|
13
14
|
import numpy as np
|
@@ -19,7 +20,7 @@ from ultralytics.utils.downloads import download, zip_directory
|
|
19
20
|
from ultralytics.utils.files import increment_path
|
20
21
|
|
21
22
|
|
22
|
-
def coco91_to_coco80_class() ->
|
23
|
+
def coco91_to_coco80_class() -> list[int]:
|
23
24
|
"""
|
24
25
|
Convert 91-index COCO class IDs to 80-index COCO class IDs.
|
25
26
|
|
@@ -122,7 +123,7 @@ def coco91_to_coco80_class() -> List[int]:
|
|
122
123
|
]
|
123
124
|
|
124
125
|
|
125
|
-
def coco80_to_coco91_class() ->
|
126
|
+
def coco80_to_coco91_class() -> list[int]:
|
126
127
|
r"""
|
127
128
|
Convert 80-index (val2014) to 91-index (paper).
|
128
129
|
|
@@ -531,7 +532,7 @@ def min_index(arr1: np.ndarray, arr2: np.ndarray):
|
|
531
532
|
return np.unravel_index(np.argmin(dis, axis=None), dis.shape)
|
532
533
|
|
533
534
|
|
534
|
-
def merge_multi_segment(segments:
|
535
|
+
def merge_multi_segment(segments: list[list]):
|
535
536
|
"""
|
536
537
|
Merge multiple segments into one list by connecting the coordinates with the minimum distance between each segment.
|
537
538
|
|
@@ -582,9 +583,7 @@ def merge_multi_segment(segments: List[List]):
|
|
582
583
|
return s
|
583
584
|
|
584
585
|
|
585
|
-
def yolo_bbox2segment(
|
586
|
-
im_dir: Union[str, Path], save_dir: Optional[Union[str, Path]] = None, sam_model: str = "sam_b.pt", device=None
|
587
|
-
):
|
586
|
+
def yolo_bbox2segment(im_dir: str | Path, save_dir: str | Path | None = None, sam_model: str = "sam_b.pt", device=None):
|
588
587
|
"""
|
589
588
|
Convert existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB) in
|
590
589
|
YOLO format. Generate segmentation data using SAM auto-annotator as needed.
|
@@ -706,7 +705,7 @@ def create_synthetic_coco_dataset():
|
|
706
705
|
LOGGER.info("Synthetic COCO dataset created successfully.")
|
707
706
|
|
708
707
|
|
709
|
-
def convert_to_multispectral(path:
|
708
|
+
def convert_to_multispectral(path: str | Path, n_channels: int = 10, replace: bool = False, zip: bool = False):
|
710
709
|
"""
|
711
710
|
Convert RGB images to multispectral images by interpolating across wavelength bands.
|
712
711
|
|
@@ -733,7 +732,7 @@ def convert_to_multispectral(path: Union[str, Path], n_channels: int = 10, repla
|
|
733
732
|
path = Path(path)
|
734
733
|
if path.is_dir():
|
735
734
|
# Process directory
|
736
|
-
im_files = sum(
|
735
|
+
im_files = sum((list(path.rglob(f"*.{ext}")) for ext in (IMG_FORMATS - {"tif", "tiff"})), [])
|
737
736
|
for im_path in im_files:
|
738
737
|
try:
|
739
738
|
convert_to_multispectral(im_path, n_channels)
|
@@ -758,7 +757,7 @@ def convert_to_multispectral(path: Union[str, Path], n_channels: int = 10, repla
|
|
758
757
|
LOGGER.info(f"Converted {output_path}")
|
759
758
|
|
760
759
|
|
761
|
-
async def convert_ndjson_to_yolo(ndjson_path:
|
760
|
+
async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Path | None = None) -> Path:
|
762
761
|
"""
|
763
762
|
Convert NDJSON dataset format to Ultralytics YOLO11 dataset structure.
|
764
763
|
|