dgenerate-ultralytics-headless 8.3.141__py3-none-any.whl → 8.3.144__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/METADATA +1 -1
- dgenerate_ultralytics_headless-8.3.144.dist-info/RECORD +272 -0
- tests/conftest.py +7 -24
- tests/test_cli.py +1 -1
- tests/test_cuda.py +7 -2
- tests/test_engine.py +7 -8
- tests/test_exports.py +16 -16
- tests/test_integrations.py +1 -1
- tests/test_solutions.py +12 -12
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +22 -19
- ultralytics/data/annotator.py +6 -5
- ultralytics/data/augment.py +127 -126
- ultralytics/data/base.py +54 -51
- ultralytics/data/build.py +47 -23
- ultralytics/data/converter.py +47 -43
- ultralytics/data/dataset.py +51 -50
- ultralytics/data/loaders.py +77 -44
- ultralytics/data/split.py +22 -9
- ultralytics/data/split_dota.py +63 -39
- ultralytics/data/utils.py +59 -39
- ultralytics/engine/exporter.py +79 -27
- ultralytics/engine/model.py +39 -39
- ultralytics/engine/predictor.py +37 -28
- ultralytics/engine/results.py +187 -158
- ultralytics/engine/trainer.py +36 -19
- ultralytics/engine/tuner.py +12 -9
- ultralytics/engine/validator.py +7 -9
- ultralytics/hub/__init__.py +11 -13
- ultralytics/hub/auth.py +22 -2
- ultralytics/hub/google/__init__.py +19 -19
- ultralytics/hub/session.py +37 -51
- ultralytics/hub/utils.py +19 -5
- ultralytics/models/fastsam/model.py +30 -12
- ultralytics/models/fastsam/predict.py +5 -6
- ultralytics/models/fastsam/utils.py +3 -3
- ultralytics/models/fastsam/val.py +10 -6
- ultralytics/models/nas/model.py +9 -5
- ultralytics/models/nas/predict.py +6 -6
- ultralytics/models/nas/val.py +3 -3
- ultralytics/models/rtdetr/model.py +7 -6
- ultralytics/models/rtdetr/predict.py +14 -7
- ultralytics/models/rtdetr/train.py +10 -4
- ultralytics/models/rtdetr/val.py +36 -9
- ultralytics/models/sam/amg.py +30 -12
- ultralytics/models/sam/build.py +22 -22
- ultralytics/models/sam/model.py +10 -9
- ultralytics/models/sam/modules/blocks.py +76 -80
- ultralytics/models/sam/modules/decoders.py +6 -8
- ultralytics/models/sam/modules/encoders.py +23 -26
- ultralytics/models/sam/modules/memory_attention.py +13 -1
- ultralytics/models/sam/modules/sam.py +57 -26
- ultralytics/models/sam/modules/tiny_encoder.py +232 -237
- ultralytics/models/sam/modules/transformer.py +13 -13
- ultralytics/models/sam/modules/utils.py +11 -19
- ultralytics/models/sam/predict.py +114 -101
- ultralytics/models/utils/loss.py +98 -77
- ultralytics/models/utils/ops.py +116 -67
- ultralytics/models/yolo/classify/predict.py +5 -5
- ultralytics/models/yolo/classify/train.py +32 -28
- ultralytics/models/yolo/classify/val.py +7 -8
- ultralytics/models/yolo/detect/predict.py +1 -0
- ultralytics/models/yolo/detect/train.py +15 -14
- ultralytics/models/yolo/detect/val.py +37 -36
- ultralytics/models/yolo/model.py +106 -23
- ultralytics/models/yolo/obb/predict.py +3 -4
- ultralytics/models/yolo/obb/train.py +14 -6
- ultralytics/models/yolo/obb/val.py +29 -23
- ultralytics/models/yolo/pose/predict.py +9 -8
- ultralytics/models/yolo/pose/train.py +24 -16
- ultralytics/models/yolo/pose/val.py +44 -26
- ultralytics/models/yolo/segment/predict.py +5 -5
- ultralytics/models/yolo/segment/train.py +11 -7
- ultralytics/models/yolo/segment/val.py +2 -2
- ultralytics/models/yolo/world/train.py +33 -23
- ultralytics/models/yolo/world/train_world.py +11 -3
- ultralytics/models/yolo/yoloe/predict.py +11 -11
- ultralytics/models/yolo/yoloe/train.py +73 -21
- ultralytics/models/yolo/yoloe/train_seg.py +10 -7
- ultralytics/models/yolo/yoloe/val.py +42 -18
- ultralytics/nn/autobackend.py +59 -15
- ultralytics/nn/modules/__init__.py +4 -4
- ultralytics/nn/modules/activation.py +4 -1
- ultralytics/nn/modules/block.py +178 -111
- ultralytics/nn/modules/conv.py +6 -5
- ultralytics/nn/modules/head.py +469 -121
- ultralytics/nn/modules/transformer.py +147 -58
- ultralytics/nn/tasks.py +227 -20
- ultralytics/nn/text_model.py +30 -33
- ultralytics/solutions/ai_gym.py +1 -1
- ultralytics/solutions/analytics.py +7 -4
- ultralytics/solutions/config.py +10 -10
- ultralytics/solutions/distance_calculation.py +13 -11
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +6 -3
- ultralytics/solutions/object_blurrer.py +3 -3
- ultralytics/solutions/object_counter.py +18 -12
- ultralytics/solutions/object_cropper.py +12 -5
- ultralytics/solutions/parking_management.py +29 -28
- ultralytics/solutions/queue_management.py +6 -6
- ultralytics/solutions/region_counter.py +10 -3
- ultralytics/solutions/security_alarm.py +3 -3
- ultralytics/solutions/similarity_search.py +85 -24
- ultralytics/solutions/solutions.py +215 -85
- ultralytics/solutions/speed_estimation.py +28 -22
- ultralytics/solutions/streamlit_inference.py +17 -12
- ultralytics/solutions/trackzone.py +4 -4
- ultralytics/trackers/basetrack.py +16 -23
- ultralytics/trackers/bot_sort.py +30 -20
- ultralytics/trackers/byte_tracker.py +70 -64
- ultralytics/trackers/track.py +4 -8
- ultralytics/trackers/utils/gmc.py +31 -58
- ultralytics/trackers/utils/kalman_filter.py +37 -37
- ultralytics/trackers/utils/matching.py +1 -1
- ultralytics/utils/__init__.py +105 -89
- ultralytics/utils/autobatch.py +16 -3
- ultralytics/utils/autodevice.py +54 -24
- ultralytics/utils/benchmarks.py +42 -28
- ultralytics/utils/callbacks/base.py +3 -3
- ultralytics/utils/callbacks/clearml.py +9 -9
- ultralytics/utils/callbacks/comet.py +67 -25
- ultralytics/utils/callbacks/dvc.py +7 -10
- ultralytics/utils/callbacks/mlflow.py +2 -5
- ultralytics/utils/callbacks/neptune.py +7 -13
- ultralytics/utils/callbacks/raytune.py +1 -1
- ultralytics/utils/callbacks/tensorboard.py +5 -6
- ultralytics/utils/callbacks/wb.py +14 -14
- ultralytics/utils/checks.py +14 -13
- ultralytics/utils/dist.py +5 -5
- ultralytics/utils/downloads.py +94 -67
- ultralytics/utils/errors.py +5 -5
- ultralytics/utils/export.py +61 -47
- ultralytics/utils/files.py +23 -22
- ultralytics/utils/instance.py +48 -52
- ultralytics/utils/loss.py +78 -40
- ultralytics/utils/metrics.py +186 -130
- ultralytics/utils/ops.py +186 -190
- ultralytics/utils/patches.py +15 -17
- ultralytics/utils/plotting.py +84 -42
- ultralytics/utils/tal.py +21 -15
- ultralytics/utils/torch_utils.py +53 -50
- ultralytics/utils/triton.py +5 -4
- ultralytics/utils/tuner.py +5 -5
- dgenerate_ultralytics_headless-8.3.141.dist-info/RECORD +0 -272
- {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/top_level.txt +0 -0
ultralytics/data/base.py
CHANGED
@@ -7,7 +7,7 @@ import random
|
|
7
7
|
from copy import deepcopy
|
8
8
|
from multiprocessing.pool import ThreadPool
|
9
9
|
from pathlib import Path
|
10
|
-
from typing import Optional
|
10
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
11
11
|
|
12
12
|
import cv2
|
13
13
|
import numpy as np
|
@@ -32,6 +32,7 @@ class BaseDataset(Dataset):
|
|
32
32
|
single_cls (bool): Whether to treat all objects as a single class.
|
33
33
|
prefix (str): Prefix to print in log messages.
|
34
34
|
fraction (float): Fraction of dataset to utilize.
|
35
|
+
channels (int): Number of channels in the images (1 for grayscale, 3 for RGB).
|
35
36
|
cv2_flag (int): OpenCV flag for reading images.
|
36
37
|
im_files (List[str]): List of image file paths.
|
37
38
|
labels (List[Dict]): List of label data dictionaries.
|
@@ -48,6 +49,8 @@ class BaseDataset(Dataset):
|
|
48
49
|
npy_files (List[Path]): List of numpy file paths.
|
49
50
|
cache (str): Cache images to RAM or disk during training.
|
50
51
|
transforms (callable): Image transformation function.
|
52
|
+
batch_shapes (np.ndarray): Batch shapes for rectangular training.
|
53
|
+
batch (np.ndarray): Batch index of each image.
|
51
54
|
|
52
55
|
Methods:
|
53
56
|
get_img_files: Read image files from the specified path.
|
@@ -66,39 +69,39 @@ class BaseDataset(Dataset):
|
|
66
69
|
|
67
70
|
def __init__(
|
68
71
|
self,
|
69
|
-
img_path,
|
70
|
-
imgsz=640,
|
71
|
-
cache=False,
|
72
|
-
augment=True,
|
73
|
-
hyp=DEFAULT_CFG,
|
74
|
-
prefix="",
|
75
|
-
rect=False,
|
76
|
-
batch_size=16,
|
77
|
-
stride=32,
|
78
|
-
pad=0.5,
|
79
|
-
single_cls=False,
|
80
|
-
classes=None,
|
81
|
-
fraction=1.0,
|
82
|
-
channels=3,
|
72
|
+
img_path: Union[str, List[str]],
|
73
|
+
imgsz: int = 640,
|
74
|
+
cache: Union[bool, str] = False,
|
75
|
+
augment: bool = True,
|
76
|
+
hyp: Dict[str, Any] = DEFAULT_CFG,
|
77
|
+
prefix: str = "",
|
78
|
+
rect: bool = False,
|
79
|
+
batch_size: int = 16,
|
80
|
+
stride: int = 32,
|
81
|
+
pad: float = 0.5,
|
82
|
+
single_cls: bool = False,
|
83
|
+
classes: Optional[List[int]] = None,
|
84
|
+
fraction: float = 1.0,
|
85
|
+
channels: int = 3,
|
83
86
|
):
|
84
87
|
"""
|
85
88
|
Initialize BaseDataset with given configuration and options.
|
86
89
|
|
87
90
|
Args:
|
88
|
-
img_path (str): Path to the folder containing images.
|
89
|
-
imgsz (int
|
90
|
-
cache (bool | str
|
91
|
-
augment (bool
|
92
|
-
hyp (
|
93
|
-
prefix (str
|
94
|
-
rect (bool
|
95
|
-
batch_size (int
|
96
|
-
stride (int
|
97
|
-
pad (float
|
98
|
-
single_cls (bool
|
99
|
-
classes (
|
100
|
-
fraction (float
|
101
|
-
channels (int
|
91
|
+
img_path (str | List[str]): Path to the folder containing images or list of image paths.
|
92
|
+
imgsz (int): Image size for resizing.
|
93
|
+
cache (bool | str): Cache images to RAM or disk during training.
|
94
|
+
augment (bool): If True, data augmentation is applied.
|
95
|
+
hyp (Dict[str, Any]): Hyperparameters to apply data augmentation.
|
96
|
+
prefix (str): Prefix to print in log messages.
|
97
|
+
rect (bool): If True, rectangular training is used.
|
98
|
+
batch_size (int): Size of batches.
|
99
|
+
stride (int): Stride used in the model.
|
100
|
+
pad (float): Padding value.
|
101
|
+
single_cls (bool): If True, single class training is used.
|
102
|
+
classes (List[int], optional): List of included classes.
|
103
|
+
fraction (float): Fraction of dataset to utilize.
|
104
|
+
channels (int): Number of channels in the images (1 for grayscale, 3 for RGB).
|
102
105
|
"""
|
103
106
|
super().__init__()
|
104
107
|
self.img_path = img_path
|
@@ -142,7 +145,7 @@ class BaseDataset(Dataset):
|
|
142
145
|
# Transforms
|
143
146
|
self.transforms = self.build_transforms(hyp=hyp)
|
144
147
|
|
145
|
-
def get_img_files(self, img_path):
|
148
|
+
def get_img_files(self, img_path: Union[str, List[str]]) -> List[str]:
|
146
149
|
"""
|
147
150
|
Read image files from the specified path.
|
148
151
|
|
@@ -180,12 +183,12 @@ class BaseDataset(Dataset):
|
|
180
183
|
check_file_speeds(im_files, prefix=self.prefix) # check image read speeds
|
181
184
|
return im_files
|
182
185
|
|
183
|
-
def update_labels(self, include_class: Optional[
|
186
|
+
def update_labels(self, include_class: Optional[List[int]]) -> None:
|
184
187
|
"""
|
185
188
|
Update labels to include only specified classes.
|
186
189
|
|
187
190
|
Args:
|
188
|
-
include_class (
|
191
|
+
include_class (List[int], optional): List of classes to include. If None, all classes are included.
|
189
192
|
"""
|
190
193
|
include_class_array = np.array(include_class).reshape(1, -1)
|
191
194
|
for i in range(len(self.labels)):
|
@@ -204,18 +207,18 @@ class BaseDataset(Dataset):
|
|
204
207
|
if self.single_cls:
|
205
208
|
self.labels[i]["cls"][:, 0] = 0
|
206
209
|
|
207
|
-
def load_image(self, i, rect_mode=True):
|
210
|
+
def load_image(self, i: int, rect_mode: bool = True) -> Tuple[np.ndarray, Tuple[int, int], Tuple[int, int]]:
|
208
211
|
"""
|
209
212
|
Load an image from dataset index 'i'.
|
210
213
|
|
211
214
|
Args:
|
212
215
|
i (int): Index of the image to load.
|
213
|
-
rect_mode (bool
|
216
|
+
rect_mode (bool): Whether to use rectangular resizing.
|
214
217
|
|
215
218
|
Returns:
|
216
|
-
(np.ndarray): Loaded image as a NumPy array.
|
217
|
-
(Tuple[int, int]): Original image dimensions in (height, width) format.
|
218
|
-
(Tuple[int, int]): Resized image dimensions in (height, width) format.
|
219
|
+
im (np.ndarray): Loaded image as a NumPy array.
|
220
|
+
hw_original (Tuple[int, int]): Original image dimensions in (height, width) format.
|
221
|
+
hw_resized (Tuple[int, int]): Resized image dimensions in (height, width) format.
|
219
222
|
|
220
223
|
Raises:
|
221
224
|
FileNotFoundError: If the image file is not found.
|
@@ -258,7 +261,7 @@ class BaseDataset(Dataset):
|
|
258
261
|
|
259
262
|
return self.ims[i], self.im_hw0[i], self.im_hw[i]
|
260
263
|
|
261
|
-
def cache_images(self):
|
264
|
+
def cache_images(self) -> None:
|
262
265
|
"""Cache images to memory or disk for faster training."""
|
263
266
|
b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
|
264
267
|
fcn, storage = (self.cache_images_to_disk, "Disk") if self.cache == "disk" else (self.load_image, "RAM")
|
@@ -274,18 +277,18 @@ class BaseDataset(Dataset):
|
|
274
277
|
pbar.desc = f"{self.prefix}Caching images ({b / gb:.1f}GB {storage})"
|
275
278
|
pbar.close()
|
276
279
|
|
277
|
-
def cache_images_to_disk(self, i):
|
280
|
+
def cache_images_to_disk(self, i: int) -> None:
|
278
281
|
"""Save an image as an *.npy file for faster loading."""
|
279
282
|
f = self.npy_files[i]
|
280
283
|
if not f.exists():
|
281
284
|
np.save(f.as_posix(), imread(self.im_files[i]), allow_pickle=False)
|
282
285
|
|
283
|
-
def check_cache_disk(self, safety_margin=0.5):
|
286
|
+
def check_cache_disk(self, safety_margin: float = 0.5) -> bool:
|
284
287
|
"""
|
285
288
|
Check if there's enough disk space for caching images.
|
286
289
|
|
287
290
|
Args:
|
288
|
-
safety_margin (float
|
291
|
+
safety_margin (float): Safety margin factor for disk space calculation.
|
289
292
|
|
290
293
|
Returns:
|
291
294
|
(bool): True if there's enough disk space, False otherwise.
|
@@ -316,12 +319,12 @@ class BaseDataset(Dataset):
|
|
316
319
|
return False
|
317
320
|
return True
|
318
321
|
|
319
|
-
def check_cache_ram(self, safety_margin=0.5):
|
322
|
+
def check_cache_ram(self, safety_margin: float = 0.5) -> bool:
|
320
323
|
"""
|
321
324
|
Check if there's enough RAM for caching images.
|
322
325
|
|
323
326
|
Args:
|
324
|
-
safety_margin (float
|
327
|
+
safety_margin (float): Safety margin factor for RAM calculation.
|
325
328
|
|
326
329
|
Returns:
|
327
330
|
(bool): True if there's enough RAM, False otherwise.
|
@@ -346,7 +349,7 @@ class BaseDataset(Dataset):
|
|
346
349
|
return False
|
347
350
|
return True
|
348
351
|
|
349
|
-
def set_rectangle(self):
|
352
|
+
def set_rectangle(self) -> None:
|
350
353
|
"""Set the shape of bounding boxes for YOLO detections as rectangles."""
|
351
354
|
bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int) # batch index
|
352
355
|
nb = bi[-1] + 1 # number of batches
|
@@ -371,11 +374,11 @@ class BaseDataset(Dataset):
|
|
371
374
|
self.batch_shapes = np.ceil(np.array(shapes) * self.imgsz / self.stride + self.pad).astype(int) * self.stride
|
372
375
|
self.batch = bi # batch index of image
|
373
376
|
|
374
|
-
def __getitem__(self, index):
|
377
|
+
def __getitem__(self, index: int) -> Dict[str, Any]:
|
375
378
|
"""Return transformed label information for given index."""
|
376
379
|
return self.transforms(self.get_image_and_label(index))
|
377
380
|
|
378
|
-
def get_image_and_label(self, index):
|
381
|
+
def get_image_and_label(self, index: int) -> Dict[str, Any]:
|
379
382
|
"""
|
380
383
|
Get and return label information from the dataset.
|
381
384
|
|
@@ -383,7 +386,7 @@ class BaseDataset(Dataset):
|
|
383
386
|
index (int): Index of the image to retrieve.
|
384
387
|
|
385
388
|
Returns:
|
386
|
-
(
|
389
|
+
(Dict[str, Any]): Label dictionary with image and metadata.
|
387
390
|
"""
|
388
391
|
label = deepcopy(self.labels[index]) # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948
|
389
392
|
label.pop("shape", None) # shape is for rect, remove it
|
@@ -396,15 +399,15 @@ class BaseDataset(Dataset):
|
|
396
399
|
label["rect_shape"] = self.batch_shapes[self.batch[index]]
|
397
400
|
return self.update_labels_info(label)
|
398
401
|
|
399
|
-
def __len__(self):
|
402
|
+
def __len__(self) -> int:
|
400
403
|
"""Return the length of the labels list for the dataset."""
|
401
404
|
return len(self.labels)
|
402
405
|
|
403
|
-
def update_labels_info(self, label):
|
406
|
+
def update_labels_info(self, label: Dict[str, Any]) -> Dict[str, Any]:
|
404
407
|
"""Custom your label format here."""
|
405
408
|
return label
|
406
409
|
|
407
|
-
def build_transforms(self, hyp=None):
|
410
|
+
def build_transforms(self, hyp: Optional[Dict[str, Any]] = None):
|
408
411
|
"""
|
409
412
|
Users can customize augmentations here.
|
410
413
|
|
@@ -418,7 +421,7 @@ class BaseDataset(Dataset):
|
|
418
421
|
"""
|
419
422
|
raise NotImplementedError
|
420
423
|
|
421
|
-
def get_labels(self):
|
424
|
+
def get_labels(self) -> List[Dict[str, Any]]:
|
422
425
|
"""
|
423
426
|
Users can customize their own format here.
|
424
427
|
|
ultralytics/data/build.py
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
import os
|
4
4
|
import random
|
5
5
|
from pathlib import Path
|
6
|
+
from typing import Any, Iterator
|
6
7
|
|
7
8
|
import numpy as np
|
8
9
|
import torch
|
@@ -27,33 +28,40 @@ from ultralytics.utils.checks import check_file
|
|
27
28
|
|
28
29
|
class InfiniteDataLoader(dataloader.DataLoader):
|
29
30
|
"""
|
30
|
-
Dataloader that reuses workers.
|
31
|
+
Dataloader that reuses workers for infinite iteration.
|
31
32
|
|
32
33
|
This dataloader extends the PyTorch DataLoader to provide infinite recycling of workers, which improves efficiency
|
33
|
-
for training loops that need to iterate through the dataset multiple times.
|
34
|
+
for training loops that need to iterate through the dataset multiple times without recreating workers.
|
34
35
|
|
35
36
|
Attributes:
|
36
37
|
batch_sampler (_RepeatSampler): A sampler that repeats indefinitely.
|
37
38
|
iterator (Iterator): The iterator from the parent DataLoader.
|
38
39
|
|
39
40
|
Methods:
|
40
|
-
__len__:
|
41
|
-
__iter__:
|
42
|
-
__del__:
|
43
|
-
reset:
|
41
|
+
__len__: Return the length of the batch sampler's sampler.
|
42
|
+
__iter__: Create a sampler that repeats indefinitely.
|
43
|
+
__del__: Ensure workers are properly terminated.
|
44
|
+
reset: Reset the iterator, useful when modifying dataset settings during training.
|
45
|
+
|
46
|
+
Examples:
|
47
|
+
Create an infinite dataloader for training
|
48
|
+
>>> dataset = YOLODataset(...)
|
49
|
+
>>> dataloader = InfiniteDataLoader(dataset, batch_size=16, shuffle=True)
|
50
|
+
>>> for batch in dataloader: # Infinite iteration
|
51
|
+
>>> train_step(batch)
|
44
52
|
"""
|
45
53
|
|
46
|
-
def __init__(self, *args, **kwargs):
|
54
|
+
def __init__(self, *args: Any, **kwargs: Any):
|
47
55
|
"""Initialize the InfiniteDataLoader with the same arguments as DataLoader."""
|
48
56
|
super().__init__(*args, **kwargs)
|
49
57
|
object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
|
50
58
|
self.iterator = super().__iter__()
|
51
59
|
|
52
|
-
def __len__(self):
|
60
|
+
def __len__(self) -> int:
|
53
61
|
"""Return the length of the batch sampler's sampler."""
|
54
62
|
return len(self.batch_sampler.sampler)
|
55
63
|
|
56
|
-
def __iter__(self):
|
64
|
+
def __iter__(self) -> Iterator:
|
57
65
|
"""Create an iterator that yields indefinitely from the underlying iterator."""
|
58
66
|
for _ in range(len(self)):
|
59
67
|
yield next(self.iterator)
|
@@ -77,26 +85,26 @@ class InfiniteDataLoader(dataloader.DataLoader):
|
|
77
85
|
|
78
86
|
class _RepeatSampler:
|
79
87
|
"""
|
80
|
-
Sampler that repeats forever.
|
88
|
+
Sampler that repeats forever for infinite iteration.
|
81
89
|
|
82
90
|
This sampler wraps another sampler and yields its contents indefinitely, allowing for infinite iteration
|
83
|
-
over a dataset.
|
91
|
+
over a dataset without recreating the sampler.
|
84
92
|
|
85
93
|
Attributes:
|
86
94
|
sampler (Dataset.sampler): The sampler to repeat.
|
87
95
|
"""
|
88
96
|
|
89
|
-
def __init__(self, sampler):
|
97
|
+
def __init__(self, sampler: Any):
|
90
98
|
"""Initialize the _RepeatSampler with a sampler to repeat indefinitely."""
|
91
99
|
self.sampler = sampler
|
92
100
|
|
93
|
-
def __iter__(self):
|
101
|
+
def __iter__(self) -> Iterator:
|
94
102
|
"""Iterate over the sampler indefinitely, yielding its contents."""
|
95
103
|
while True:
|
96
104
|
yield from iter(self.sampler)
|
97
105
|
|
98
106
|
|
99
|
-
def seed_worker(worker_id): # noqa
|
107
|
+
def seed_worker(worker_id: int): # noqa
|
100
108
|
"""Set dataloader worker seed for reproducibility across worker processes."""
|
101
109
|
worker_seed = torch.initial_seed() % 2**32
|
102
110
|
np.random.seed(worker_seed)
|
@@ -146,7 +154,7 @@ def build_grounding(cfg, img_path, json_file, batch, mode="train", rect=False, s
|
|
146
154
|
)
|
147
155
|
|
148
156
|
|
149
|
-
def build_dataloader(dataset, batch, workers, shuffle=True, rank
|
157
|
+
def build_dataloader(dataset, batch: int, workers: int, shuffle: bool = True, rank: int = -1):
|
150
158
|
"""
|
151
159
|
Create and return an InfiniteDataLoader or DataLoader for training or validation.
|
152
160
|
|
@@ -154,11 +162,16 @@ def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):
|
|
154
162
|
dataset (Dataset): Dataset to load data from.
|
155
163
|
batch (int): Batch size for the dataloader.
|
156
164
|
workers (int): Number of worker threads for loading data.
|
157
|
-
shuffle (bool): Whether to shuffle the dataset.
|
158
|
-
rank (int): Process rank in distributed training. -1 for single-GPU training.
|
165
|
+
shuffle (bool, optional): Whether to shuffle the dataset.
|
166
|
+
rank (int, optional): Process rank in distributed training. -1 for single-GPU training.
|
159
167
|
|
160
168
|
Returns:
|
161
169
|
(InfiniteDataLoader): A dataloader that can be used for training or validation.
|
170
|
+
|
171
|
+
Examples:
|
172
|
+
Create a dataloader for training
|
173
|
+
>>> dataset = YOLODataset(...)
|
174
|
+
>>> dataloader = build_dataloader(dataset, batch=16, workers=4, shuffle=True)
|
162
175
|
"""
|
163
176
|
batch = min(batch, len(dataset))
|
164
177
|
nd = torch.cuda.device_count() # number of CUDA devices
|
@@ -184,18 +197,22 @@ def check_source(source):
|
|
184
197
|
Check the type of input source and return corresponding flag values.
|
185
198
|
|
186
199
|
Args:
|
187
|
-
source (str | int | Path |
|
200
|
+
source (str | int | Path | list | tuple | np.ndarray | PIL.Image | torch.Tensor): The input source to check.
|
188
201
|
|
189
202
|
Returns:
|
190
|
-
source (str | int | Path |
|
203
|
+
source (str | int | Path | list | tuple | np.ndarray | PIL.Image | torch.Tensor): The processed source.
|
191
204
|
webcam (bool): Whether the source is a webcam.
|
192
205
|
screenshot (bool): Whether the source is a screenshot.
|
193
206
|
from_img (bool): Whether the source is an image or list of images.
|
194
207
|
in_memory (bool): Whether the source is an in-memory object.
|
195
208
|
tensor (bool): Whether the source is a torch.Tensor.
|
196
209
|
|
197
|
-
|
198
|
-
|
210
|
+
Examples:
|
211
|
+
Check a file path source
|
212
|
+
>>> source, webcam, screenshot, from_img, in_memory, tensor = check_source("image.jpg")
|
213
|
+
|
214
|
+
Check a webcam source
|
215
|
+
>>> source, webcam, screenshot, from_img, in_memory, tensor = check_source(0)
|
199
216
|
"""
|
200
217
|
webcam, screenshot, from_img, in_memory, tensor = False, False, False, False, False
|
201
218
|
if isinstance(source, (str, int, Path)): # int for local usb camera
|
@@ -222,7 +239,7 @@ def check_source(source):
|
|
222
239
|
return source, webcam, screenshot, from_img, in_memory, tensor
|
223
240
|
|
224
241
|
|
225
|
-
def load_inference_source(source=None, batch=1, vid_stride=1, buffer=False, channels=3):
|
242
|
+
def load_inference_source(source=None, batch: int = 1, vid_stride: int = 1, buffer: bool = False, channels: int = 3):
|
226
243
|
"""
|
227
244
|
Load an inference source for object detection and apply necessary transformations.
|
228
245
|
|
@@ -231,10 +248,17 @@ def load_inference_source(source=None, batch=1, vid_stride=1, buffer=False, chan
|
|
231
248
|
batch (int, optional): Batch size for dataloaders.
|
232
249
|
vid_stride (int, optional): The frame interval for video sources.
|
233
250
|
buffer (bool, optional): Whether stream frames will be buffered.
|
234
|
-
channels (int): The number of input channels for the model.
|
251
|
+
channels (int, optional): The number of input channels for the model.
|
235
252
|
|
236
253
|
Returns:
|
237
254
|
(Dataset): A dataset object for the specified input source with attached source_type attribute.
|
255
|
+
|
256
|
+
Examples:
|
257
|
+
Load an image source for inference
|
258
|
+
>>> dataset = load_inference_source("image.jpg", batch=1)
|
259
|
+
|
260
|
+
Load a video stream source
|
261
|
+
>>> dataset = load_inference_source("rtsp://example.com/stream", vid_stride=2)
|
238
262
|
"""
|
239
263
|
source, stream, screenshot, from_img, in_memory, tensor = check_source(source)
|
240
264
|
source_type = source.source_type if in_memory else SourceTypes(stream, screenshot, from_img, tensor)
|
ultralytics/data/converter.py
CHANGED
@@ -6,6 +6,7 @@ import shutil
|
|
6
6
|
from collections import defaultdict
|
7
7
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
8
8
|
from pathlib import Path
|
9
|
+
from typing import List, Optional, Union
|
9
10
|
|
10
11
|
import cv2
|
11
12
|
import numpy as np
|
@@ -16,13 +17,13 @@ from ultralytics.utils.downloads import download, zip_directory
|
|
16
17
|
from ultralytics.utils.files import increment_path
|
17
18
|
|
18
19
|
|
19
|
-
def coco91_to_coco80_class():
|
20
|
+
def coco91_to_coco80_class() -> List[int]:
|
20
21
|
"""
|
21
|
-
|
22
|
+
Convert 91-index COCO class IDs to 80-index COCO class IDs.
|
22
23
|
|
23
24
|
Returns:
|
24
|
-
(
|
25
|
-
corresponding 91-index class ID.
|
25
|
+
(List[int]): A list of 91 class IDs where the index represents the 80-index class ID and the value
|
26
|
+
is the corresponding 91-index class ID.
|
26
27
|
"""
|
27
28
|
return [
|
28
29
|
0,
|
@@ -119,10 +120,15 @@ def coco91_to_coco80_class():
|
|
119
120
|
]
|
120
121
|
|
121
122
|
|
122
|
-
def coco80_to_coco91_class():
|
123
|
+
def coco80_to_coco91_class() -> List[int]:
|
123
124
|
r"""
|
124
|
-
|
125
|
-
|
125
|
+
Convert 80-index (val2014) to 91-index (paper).
|
126
|
+
|
127
|
+
Returns:
|
128
|
+
(List[int]): A list of 80 class IDs where each value is the corresponding 91-index class ID.
|
129
|
+
|
130
|
+
References:
|
131
|
+
https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
|
126
132
|
|
127
133
|
Examples:
|
128
134
|
>>> import numpy as np
|
@@ -220,15 +226,15 @@ def coco80_to_coco91_class():
|
|
220
226
|
|
221
227
|
|
222
228
|
def convert_coco(
|
223
|
-
labels_dir="../coco/annotations/",
|
224
|
-
save_dir="coco_converted/",
|
225
|
-
use_segments=False,
|
226
|
-
use_keypoints=False,
|
227
|
-
cls91to80=True,
|
228
|
-
lvis=False,
|
229
|
+
labels_dir: str = "../coco/annotations/",
|
230
|
+
save_dir: str = "coco_converted/",
|
231
|
+
use_segments: bool = False,
|
232
|
+
use_keypoints: bool = False,
|
233
|
+
cls91to80: bool = True,
|
234
|
+
lvis: bool = False,
|
229
235
|
):
|
230
236
|
"""
|
231
|
-
|
237
|
+
Convert COCO dataset annotations to a YOLO annotation format suitable for training YOLO models.
|
232
238
|
|
233
239
|
Args:
|
234
240
|
labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
|
@@ -246,15 +252,8 @@ def convert_coco(
|
|
246
252
|
|
247
253
|
Convert LVIS annotations to YOLO format
|
248
254
|
>>> convert_coco(
|
249
|
-
|
250
|
-
... use_segments=True,
|
251
|
-
... use_keypoints=False,
|
252
|
-
... cls91to80=False,
|
253
|
-
... lvis=True
|
255
|
+
... "../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True
|
254
256
|
... )
|
255
|
-
|
256
|
-
Output:
|
257
|
-
Generates output files in the specified output directory.
|
258
257
|
"""
|
259
258
|
# Create dataset directory
|
260
259
|
save_dir = increment_path(save_dir) # increment if save directory already exists
|
@@ -347,12 +346,12 @@ def convert_coco(
|
|
347
346
|
LOGGER.info(f"{'LVIS' if lvis else 'COCO'} data converted successfully.\nResults saved to {save_dir.resolve()}")
|
348
347
|
|
349
348
|
|
350
|
-
def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes):
|
349
|
+
def convert_segment_masks_to_yolo_seg(masks_dir: str, output_dir: str, classes: int):
|
351
350
|
"""
|
352
|
-
|
351
|
+
Convert a dataset of segmentation mask images to the YOLO segmentation format.
|
353
352
|
|
354
|
-
This function takes the directory containing the binary format mask images and converts them into YOLO segmentation
|
355
|
-
The converted masks are saved in the specified output directory.
|
353
|
+
This function takes the directory containing the binary format mask images and converts them into YOLO segmentation
|
354
|
+
format. The converted masks are saved in the specified output directory.
|
356
355
|
|
357
356
|
Args:
|
358
357
|
masks_dir (str): The path to the directory where all mask images (png, jpg) are stored.
|
@@ -425,7 +424,7 @@ def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes):
|
|
425
424
|
|
426
425
|
def convert_dota_to_yolo_obb(dota_root_path: str):
|
427
426
|
"""
|
428
|
-
|
427
|
+
Convert DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.
|
429
428
|
|
430
429
|
The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the
|
431
430
|
associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory.
|
@@ -479,8 +478,8 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
|
|
479
478
|
"helipad": 17,
|
480
479
|
}
|
481
480
|
|
482
|
-
def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
|
483
|
-
"""
|
481
|
+
def convert_label(image_name: str, image_width: int, image_height: int, orig_label_dir: Path, save_dir: Path):
|
482
|
+
"""Convert a single image's DOTA annotation to YOLO OBB format and save it to a specified directory."""
|
484
483
|
orig_label_path = orig_label_dir / f"{image_name}.txt"
|
485
484
|
save_path = save_dir / f"{image_name}.txt"
|
486
485
|
|
@@ -516,7 +515,7 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
|
|
516
515
|
convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir)
|
517
516
|
|
518
517
|
|
519
|
-
def min_index(arr1, arr2):
|
518
|
+
def min_index(arr1: np.ndarray, arr2: np.ndarray):
|
520
519
|
"""
|
521
520
|
Find a pair of indexes with the shortest distance between two arrays of 2D points.
|
522
521
|
|
@@ -525,15 +524,17 @@ def min_index(arr1, arr2):
|
|
525
524
|
arr2 (np.ndarray): A NumPy array of shape (M, 2) representing M 2D points.
|
526
525
|
|
527
526
|
Returns:
|
528
|
-
(
|
527
|
+
idx1 (int): Index of the point in arr1 with the shortest distance.
|
528
|
+
idx2 (int): Index of the point in arr2 with the shortest distance.
|
529
529
|
"""
|
530
530
|
dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1)
|
531
531
|
return np.unravel_index(np.argmin(dis, axis=None), dis.shape)
|
532
532
|
|
533
533
|
|
534
|
-
def merge_multi_segment(segments):
|
534
|
+
def merge_multi_segment(segments: List[List]):
|
535
535
|
"""
|
536
536
|
Merge multiple segments into one list by connecting the coordinates with the minimum distance between each segment.
|
537
|
+
|
537
538
|
This function connects these coordinates with a thin line to merge all segments into one.
|
538
539
|
|
539
540
|
Args:
|
@@ -581,17 +582,19 @@ def merge_multi_segment(segments):
|
|
581
582
|
return s
|
582
583
|
|
583
584
|
|
584
|
-
def yolo_bbox2segment(
|
585
|
+
def yolo_bbox2segment(
|
586
|
+
im_dir: Union[str, Path], save_dir: Optional[Union[str, Path]] = None, sam_model: str = "sam_b.pt", device=None
|
587
|
+
):
|
585
588
|
"""
|
586
|
-
|
587
|
-
|
589
|
+
Convert existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB) in
|
590
|
+
YOLO format. Generate segmentation data using SAM auto-annotator as needed.
|
588
591
|
|
589
592
|
Args:
|
590
593
|
im_dir (str | Path): Path to image directory to convert.
|
591
|
-
save_dir (str | Path): Path to save the generated labels, labels will be saved
|
594
|
+
save_dir (str | Path, optional): Path to save the generated labels, labels will be saved
|
592
595
|
into `labels-segment` in the same directory level of `im_dir` if save_dir is None.
|
593
596
|
sam_model (str): Segmentation model to use for intermediate segmentation data.
|
594
|
-
device (int | str): The specific device to run SAM models.
|
597
|
+
device (int | str, optional): The specific device to run SAM models.
|
595
598
|
|
596
599
|
Notes:
|
597
600
|
The input directory structure assumed for dataset:
|
@@ -647,7 +650,7 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt", device=None):
|
|
647
650
|
|
648
651
|
def create_synthetic_coco_dataset():
|
649
652
|
"""
|
650
|
-
|
653
|
+
Create a synthetic COCO dataset with random images based on filenames from label lists.
|
651
654
|
|
652
655
|
This function downloads COCO labels, reads image filenames from label list files,
|
653
656
|
creates synthetic images for train2017 and val2017 subsets, and organizes
|
@@ -664,8 +667,8 @@ def create_synthetic_coco_dataset():
|
|
664
667
|
- Reads image filenames from train2017.txt and val2017.txt files.
|
665
668
|
"""
|
666
669
|
|
667
|
-
def create_synthetic_image(image_file):
|
668
|
-
"""
|
670
|
+
def create_synthetic_image(image_file: Path):
|
671
|
+
"""Generate synthetic images with random sizes and colors for dataset augmentation or testing purposes."""
|
669
672
|
if not image_file.exists():
|
670
673
|
size = (random.randint(480, 640), random.randint(480, 640))
|
671
674
|
Image.new(
|
@@ -703,7 +706,7 @@ def create_synthetic_coco_dataset():
|
|
703
706
|
LOGGER.info("Synthetic COCO dataset created successfully.")
|
704
707
|
|
705
708
|
|
706
|
-
def convert_to_multispectral(path, n_channels=10, replace=False, zip=False):
|
709
|
+
def convert_to_multispectral(path: Union[str, Path], n_channels: int = 10, replace: bool = False, zip: bool = False):
|
707
710
|
"""
|
708
711
|
Convert RGB images to multispectral images by interpolating across wavelength bands.
|
709
712
|
|
@@ -717,9 +720,10 @@ def convert_to_multispectral(path, n_channels=10, replace=False, zip=False):
|
|
717
720
|
zip (bool): Whether to zip the converted images into a zip file.
|
718
721
|
|
719
722
|
Examples:
|
720
|
-
|
723
|
+
Convert a single image
|
721
724
|
>>> convert_to_multispectral("path/to/image.jpg", n_channels=10)
|
722
|
-
|
725
|
+
|
726
|
+
Convert a dataset
|
723
727
|
>>> convert_to_multispectral("../datasets/coco8", n_channels=10)
|
724
728
|
"""
|
725
729
|
from scipy.interpolate import interp1d
|