dgenerate-ultralytics-headless 8.3.143__py3-none-any.whl → 8.3.144__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/METADATA +1 -1
  2. dgenerate_ultralytics_headless-8.3.144.dist-info/RECORD +272 -0
  3. tests/conftest.py +7 -24
  4. tests/test_cli.py +1 -1
  5. tests/test_cuda.py +7 -2
  6. tests/test_engine.py +7 -8
  7. tests/test_exports.py +16 -16
  8. tests/test_integrations.py +1 -1
  9. tests/test_solutions.py +11 -11
  10. ultralytics/__init__.py +1 -1
  11. ultralytics/cfg/__init__.py +16 -13
  12. ultralytics/data/annotator.py +6 -5
  13. ultralytics/data/augment.py +127 -126
  14. ultralytics/data/base.py +54 -51
  15. ultralytics/data/build.py +47 -23
  16. ultralytics/data/converter.py +47 -43
  17. ultralytics/data/dataset.py +51 -50
  18. ultralytics/data/loaders.py +77 -44
  19. ultralytics/data/split.py +22 -9
  20. ultralytics/data/split_dota.py +63 -39
  21. ultralytics/data/utils.py +59 -39
  22. ultralytics/engine/exporter.py +79 -27
  23. ultralytics/engine/model.py +39 -39
  24. ultralytics/engine/predictor.py +37 -28
  25. ultralytics/engine/results.py +187 -157
  26. ultralytics/engine/trainer.py +36 -19
  27. ultralytics/engine/tuner.py +12 -9
  28. ultralytics/engine/validator.py +7 -9
  29. ultralytics/hub/__init__.py +11 -13
  30. ultralytics/hub/auth.py +22 -2
  31. ultralytics/hub/google/__init__.py +19 -19
  32. ultralytics/hub/session.py +37 -51
  33. ultralytics/hub/utils.py +19 -5
  34. ultralytics/models/fastsam/model.py +30 -12
  35. ultralytics/models/fastsam/predict.py +5 -6
  36. ultralytics/models/fastsam/utils.py +3 -3
  37. ultralytics/models/fastsam/val.py +10 -6
  38. ultralytics/models/nas/model.py +9 -5
  39. ultralytics/models/nas/predict.py +6 -6
  40. ultralytics/models/nas/val.py +3 -3
  41. ultralytics/models/rtdetr/model.py +7 -6
  42. ultralytics/models/rtdetr/predict.py +14 -7
  43. ultralytics/models/rtdetr/train.py +10 -4
  44. ultralytics/models/rtdetr/val.py +36 -9
  45. ultralytics/models/sam/amg.py +30 -12
  46. ultralytics/models/sam/build.py +22 -22
  47. ultralytics/models/sam/model.py +10 -9
  48. ultralytics/models/sam/modules/blocks.py +76 -80
  49. ultralytics/models/sam/modules/decoders.py +6 -8
  50. ultralytics/models/sam/modules/encoders.py +23 -26
  51. ultralytics/models/sam/modules/memory_attention.py +13 -1
  52. ultralytics/models/sam/modules/sam.py +57 -26
  53. ultralytics/models/sam/modules/tiny_encoder.py +232 -237
  54. ultralytics/models/sam/modules/transformer.py +13 -13
  55. ultralytics/models/sam/modules/utils.py +11 -19
  56. ultralytics/models/sam/predict.py +114 -101
  57. ultralytics/models/utils/loss.py +98 -77
  58. ultralytics/models/utils/ops.py +116 -67
  59. ultralytics/models/yolo/classify/predict.py +5 -5
  60. ultralytics/models/yolo/classify/train.py +32 -28
  61. ultralytics/models/yolo/classify/val.py +7 -8
  62. ultralytics/models/yolo/detect/predict.py +1 -0
  63. ultralytics/models/yolo/detect/train.py +15 -14
  64. ultralytics/models/yolo/detect/val.py +37 -36
  65. ultralytics/models/yolo/model.py +106 -23
  66. ultralytics/models/yolo/obb/predict.py +3 -4
  67. ultralytics/models/yolo/obb/train.py +14 -6
  68. ultralytics/models/yolo/obb/val.py +29 -23
  69. ultralytics/models/yolo/pose/predict.py +9 -8
  70. ultralytics/models/yolo/pose/train.py +24 -16
  71. ultralytics/models/yolo/pose/val.py +44 -26
  72. ultralytics/models/yolo/segment/predict.py +5 -5
  73. ultralytics/models/yolo/segment/train.py +11 -7
  74. ultralytics/models/yolo/segment/val.py +2 -2
  75. ultralytics/models/yolo/world/train.py +33 -23
  76. ultralytics/models/yolo/world/train_world.py +11 -3
  77. ultralytics/models/yolo/yoloe/predict.py +11 -11
  78. ultralytics/models/yolo/yoloe/train.py +73 -21
  79. ultralytics/models/yolo/yoloe/train_seg.py +10 -7
  80. ultralytics/models/yolo/yoloe/val.py +42 -18
  81. ultralytics/nn/autobackend.py +59 -15
  82. ultralytics/nn/modules/__init__.py +4 -4
  83. ultralytics/nn/modules/activation.py +4 -1
  84. ultralytics/nn/modules/block.py +178 -111
  85. ultralytics/nn/modules/conv.py +6 -5
  86. ultralytics/nn/modules/head.py +469 -121
  87. ultralytics/nn/modules/transformer.py +147 -58
  88. ultralytics/nn/tasks.py +227 -20
  89. ultralytics/nn/text_model.py +30 -33
  90. ultralytics/solutions/ai_gym.py +1 -1
  91. ultralytics/solutions/analytics.py +7 -4
  92. ultralytics/solutions/config.py +10 -10
  93. ultralytics/solutions/distance_calculation.py +11 -10
  94. ultralytics/solutions/heatmap.py +1 -1
  95. ultralytics/solutions/instance_segmentation.py +6 -3
  96. ultralytics/solutions/object_blurrer.py +3 -3
  97. ultralytics/solutions/object_counter.py +15 -7
  98. ultralytics/solutions/object_cropper.py +3 -2
  99. ultralytics/solutions/parking_management.py +29 -28
  100. ultralytics/solutions/queue_management.py +6 -6
  101. ultralytics/solutions/region_counter.py +10 -3
  102. ultralytics/solutions/security_alarm.py +3 -3
  103. ultralytics/solutions/similarity_search.py +85 -24
  104. ultralytics/solutions/solutions.py +184 -75
  105. ultralytics/solutions/speed_estimation.py +28 -22
  106. ultralytics/solutions/streamlit_inference.py +17 -12
  107. ultralytics/solutions/trackzone.py +4 -4
  108. ultralytics/trackers/basetrack.py +16 -23
  109. ultralytics/trackers/bot_sort.py +30 -20
  110. ultralytics/trackers/byte_tracker.py +70 -64
  111. ultralytics/trackers/track.py +4 -8
  112. ultralytics/trackers/utils/gmc.py +31 -58
  113. ultralytics/trackers/utils/kalman_filter.py +37 -37
  114. ultralytics/trackers/utils/matching.py +1 -1
  115. ultralytics/utils/__init__.py +105 -89
  116. ultralytics/utils/autobatch.py +16 -3
  117. ultralytics/utils/autodevice.py +54 -24
  118. ultralytics/utils/benchmarks.py +42 -28
  119. ultralytics/utils/callbacks/base.py +3 -3
  120. ultralytics/utils/callbacks/clearml.py +9 -9
  121. ultralytics/utils/callbacks/comet.py +67 -25
  122. ultralytics/utils/callbacks/dvc.py +7 -10
  123. ultralytics/utils/callbacks/mlflow.py +2 -5
  124. ultralytics/utils/callbacks/neptune.py +7 -13
  125. ultralytics/utils/callbacks/raytune.py +1 -1
  126. ultralytics/utils/callbacks/tensorboard.py +5 -6
  127. ultralytics/utils/callbacks/wb.py +14 -14
  128. ultralytics/utils/checks.py +14 -13
  129. ultralytics/utils/dist.py +5 -5
  130. ultralytics/utils/downloads.py +94 -67
  131. ultralytics/utils/errors.py +5 -5
  132. ultralytics/utils/export.py +61 -47
  133. ultralytics/utils/files.py +23 -22
  134. ultralytics/utils/instance.py +48 -52
  135. ultralytics/utils/loss.py +78 -40
  136. ultralytics/utils/metrics.py +186 -130
  137. ultralytics/utils/ops.py +186 -190
  138. ultralytics/utils/patches.py +15 -17
  139. ultralytics/utils/plotting.py +71 -27
  140. ultralytics/utils/tal.py +21 -15
  141. ultralytics/utils/torch_utils.py +53 -50
  142. ultralytics/utils/triton.py +5 -4
  143. ultralytics/utils/tuner.py +5 -5
  144. dgenerate_ultralytics_headless-8.3.143.dist-info/RECORD +0 -272
  145. {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/WHEEL +0 -0
  146. {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/entry_points.txt +0 -0
  147. {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/licenses/LICENSE +0 -0
  148. {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/top_level.txt +0 -0
ultralytics/data/base.py CHANGED
@@ -7,7 +7,7 @@ import random
7
7
  from copy import deepcopy
8
8
  from multiprocessing.pool import ThreadPool
9
9
  from pathlib import Path
10
- from typing import Optional
10
+ from typing import Any, Dict, List, Optional, Tuple, Union
11
11
 
12
12
  import cv2
13
13
  import numpy as np
@@ -32,6 +32,7 @@ class BaseDataset(Dataset):
32
32
  single_cls (bool): Whether to treat all objects as a single class.
33
33
  prefix (str): Prefix to print in log messages.
34
34
  fraction (float): Fraction of dataset to utilize.
35
+ channels (int): Number of channels in the images (1 for grayscale, 3 for RGB).
35
36
  cv2_flag (int): OpenCV flag for reading images.
36
37
  im_files (List[str]): List of image file paths.
37
38
  labels (List[Dict]): List of label data dictionaries.
@@ -48,6 +49,8 @@ class BaseDataset(Dataset):
48
49
  npy_files (List[Path]): List of numpy file paths.
49
50
  cache (str): Cache images to RAM or disk during training.
50
51
  transforms (callable): Image transformation function.
52
+ batch_shapes (np.ndarray): Batch shapes for rectangular training.
53
+ batch (np.ndarray): Batch index of each image.
51
54
 
52
55
  Methods:
53
56
  get_img_files: Read image files from the specified path.
@@ -66,39 +69,39 @@ class BaseDataset(Dataset):
66
69
 
67
70
  def __init__(
68
71
  self,
69
- img_path,
70
- imgsz=640,
71
- cache=False,
72
- augment=True,
73
- hyp=DEFAULT_CFG,
74
- prefix="",
75
- rect=False,
76
- batch_size=16,
77
- stride=32,
78
- pad=0.5,
79
- single_cls=False,
80
- classes=None,
81
- fraction=1.0,
82
- channels=3,
72
+ img_path: Union[str, List[str]],
73
+ imgsz: int = 640,
74
+ cache: Union[bool, str] = False,
75
+ augment: bool = True,
76
+ hyp: Dict[str, Any] = DEFAULT_CFG,
77
+ prefix: str = "",
78
+ rect: bool = False,
79
+ batch_size: int = 16,
80
+ stride: int = 32,
81
+ pad: float = 0.5,
82
+ single_cls: bool = False,
83
+ classes: Optional[List[int]] = None,
84
+ fraction: float = 1.0,
85
+ channels: int = 3,
83
86
  ):
84
87
  """
85
88
  Initialize BaseDataset with given configuration and options.
86
89
 
87
90
  Args:
88
- img_path (str): Path to the folder containing images.
89
- imgsz (int, optional): Image size for resizing.
90
- cache (bool | str, optional): Cache images to RAM or disk during training.
91
- augment (bool, optional): If True, data augmentation is applied.
92
- hyp (dict, optional): Hyperparameters to apply data augmentation.
93
- prefix (str, optional): Prefix to print in log messages.
94
- rect (bool, optional): If True, rectangular training is used.
95
- batch_size (int, optional): Size of batches.
96
- stride (int, optional): Stride used in the model.
97
- pad (float, optional): Padding value.
98
- single_cls (bool, optional): If True, single class training is used.
99
- classes (list, optional): List of included classes.
100
- fraction (float, optional): Fraction of dataset to utilize.
101
- channels (int, optional): Number of channels in the images (1 for grayscale, 3 for RGB).
91
+ img_path (str | List[str]): Path to the folder containing images or list of image paths.
92
+ imgsz (int): Image size for resizing.
93
+ cache (bool | str): Cache images to RAM or disk during training.
94
+ augment (bool): If True, data augmentation is applied.
95
+ hyp (Dict[str, Any]): Hyperparameters to apply data augmentation.
96
+ prefix (str): Prefix to print in log messages.
97
+ rect (bool): If True, rectangular training is used.
98
+ batch_size (int): Size of batches.
99
+ stride (int): Stride used in the model.
100
+ pad (float): Padding value.
101
+ single_cls (bool): If True, single class training is used.
102
+ classes (List[int], optional): List of included classes.
103
+ fraction (float): Fraction of dataset to utilize.
104
+ channels (int): Number of channels in the images (1 for grayscale, 3 for RGB).
102
105
  """
103
106
  super().__init__()
104
107
  self.img_path = img_path
@@ -142,7 +145,7 @@ class BaseDataset(Dataset):
142
145
  # Transforms
143
146
  self.transforms = self.build_transforms(hyp=hyp)
144
147
 
145
- def get_img_files(self, img_path):
148
+ def get_img_files(self, img_path: Union[str, List[str]]) -> List[str]:
146
149
  """
147
150
  Read image files from the specified path.
148
151
 
@@ -180,12 +183,12 @@ class BaseDataset(Dataset):
180
183
  check_file_speeds(im_files, prefix=self.prefix) # check image read speeds
181
184
  return im_files
182
185
 
183
- def update_labels(self, include_class: Optional[list]):
186
+ def update_labels(self, include_class: Optional[List[int]]) -> None:
184
187
  """
185
188
  Update labels to include only specified classes.
186
189
 
187
190
  Args:
188
- include_class (list, optional): List of classes to include. If None, all classes are included.
191
+ include_class (List[int], optional): List of classes to include. If None, all classes are included.
189
192
  """
190
193
  include_class_array = np.array(include_class).reshape(1, -1)
191
194
  for i in range(len(self.labels)):
@@ -204,18 +207,18 @@ class BaseDataset(Dataset):
204
207
  if self.single_cls:
205
208
  self.labels[i]["cls"][:, 0] = 0
206
209
 
207
- def load_image(self, i, rect_mode=True):
210
+ def load_image(self, i: int, rect_mode: bool = True) -> Tuple[np.ndarray, Tuple[int, int], Tuple[int, int]]:
208
211
  """
209
212
  Load an image from dataset index 'i'.
210
213
 
211
214
  Args:
212
215
  i (int): Index of the image to load.
213
- rect_mode (bool, optional): Whether to use rectangular resizing.
216
+ rect_mode (bool): Whether to use rectangular resizing.
214
217
 
215
218
  Returns:
216
- (np.ndarray): Loaded image as a NumPy array.
217
- (Tuple[int, int]): Original image dimensions in (height, width) format.
218
- (Tuple[int, int]): Resized image dimensions in (height, width) format.
219
+ im (np.ndarray): Loaded image as a NumPy array.
220
+ hw_original (Tuple[int, int]): Original image dimensions in (height, width) format.
221
+ hw_resized (Tuple[int, int]): Resized image dimensions in (height, width) format.
219
222
 
220
223
  Raises:
221
224
  FileNotFoundError: If the image file is not found.
@@ -258,7 +261,7 @@ class BaseDataset(Dataset):
258
261
 
259
262
  return self.ims[i], self.im_hw0[i], self.im_hw[i]
260
263
 
261
- def cache_images(self):
264
+ def cache_images(self) -> None:
262
265
  """Cache images to memory or disk for faster training."""
263
266
  b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
264
267
  fcn, storage = (self.cache_images_to_disk, "Disk") if self.cache == "disk" else (self.load_image, "RAM")
@@ -274,18 +277,18 @@ class BaseDataset(Dataset):
274
277
  pbar.desc = f"{self.prefix}Caching images ({b / gb:.1f}GB {storage})"
275
278
  pbar.close()
276
279
 
277
- def cache_images_to_disk(self, i):
280
+ def cache_images_to_disk(self, i: int) -> None:
278
281
  """Save an image as an *.npy file for faster loading."""
279
282
  f = self.npy_files[i]
280
283
  if not f.exists():
281
284
  np.save(f.as_posix(), imread(self.im_files[i]), allow_pickle=False)
282
285
 
283
- def check_cache_disk(self, safety_margin=0.5):
286
+ def check_cache_disk(self, safety_margin: float = 0.5) -> bool:
284
287
  """
285
288
  Check if there's enough disk space for caching images.
286
289
 
287
290
  Args:
288
- safety_margin (float, optional): Safety margin factor for disk space calculation.
291
+ safety_margin (float): Safety margin factor for disk space calculation.
289
292
 
290
293
  Returns:
291
294
  (bool): True if there's enough disk space, False otherwise.
@@ -316,12 +319,12 @@ class BaseDataset(Dataset):
316
319
  return False
317
320
  return True
318
321
 
319
- def check_cache_ram(self, safety_margin=0.5):
322
+ def check_cache_ram(self, safety_margin: float = 0.5) -> bool:
320
323
  """
321
324
  Check if there's enough RAM for caching images.
322
325
 
323
326
  Args:
324
- safety_margin (float, optional): Safety margin factor for RAM calculation.
327
+ safety_margin (float): Safety margin factor for RAM calculation.
325
328
 
326
329
  Returns:
327
330
  (bool): True if there's enough RAM, False otherwise.
@@ -346,7 +349,7 @@ class BaseDataset(Dataset):
346
349
  return False
347
350
  return True
348
351
 
349
- def set_rectangle(self):
352
+ def set_rectangle(self) -> None:
350
353
  """Set the shape of bounding boxes for YOLO detections as rectangles."""
351
354
  bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int) # batch index
352
355
  nb = bi[-1] + 1 # number of batches
@@ -371,11 +374,11 @@ class BaseDataset(Dataset):
371
374
  self.batch_shapes = np.ceil(np.array(shapes) * self.imgsz / self.stride + self.pad).astype(int) * self.stride
372
375
  self.batch = bi # batch index of image
373
376
 
374
- def __getitem__(self, index):
377
+ def __getitem__(self, index: int) -> Dict[str, Any]:
375
378
  """Return transformed label information for given index."""
376
379
  return self.transforms(self.get_image_and_label(index))
377
380
 
378
- def get_image_and_label(self, index):
381
+ def get_image_and_label(self, index: int) -> Dict[str, Any]:
379
382
  """
380
383
  Get and return label information from the dataset.
381
384
 
@@ -383,7 +386,7 @@ class BaseDataset(Dataset):
383
386
  index (int): Index of the image to retrieve.
384
387
 
385
388
  Returns:
386
- (dict): Label dictionary with image and metadata.
389
+ (Dict[str, Any]): Label dictionary with image and metadata.
387
390
  """
388
391
  label = deepcopy(self.labels[index]) # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948
389
392
  label.pop("shape", None) # shape is for rect, remove it
@@ -396,15 +399,15 @@ class BaseDataset(Dataset):
396
399
  label["rect_shape"] = self.batch_shapes[self.batch[index]]
397
400
  return self.update_labels_info(label)
398
401
 
399
- def __len__(self):
402
+ def __len__(self) -> int:
400
403
  """Return the length of the labels list for the dataset."""
401
404
  return len(self.labels)
402
405
 
403
- def update_labels_info(self, label):
406
+ def update_labels_info(self, label: Dict[str, Any]) -> Dict[str, Any]:
404
407
  """Custom your label format here."""
405
408
  return label
406
409
 
407
- def build_transforms(self, hyp=None):
410
+ def build_transforms(self, hyp: Optional[Dict[str, Any]] = None):
408
411
  """
409
412
  Users can customize augmentations here.
410
413
 
@@ -418,7 +421,7 @@ class BaseDataset(Dataset):
418
421
  """
419
422
  raise NotImplementedError
420
423
 
421
- def get_labels(self):
424
+ def get_labels(self) -> List[Dict[str, Any]]:
422
425
  """
423
426
  Users can customize their own format here.
424
427
 
ultralytics/data/build.py CHANGED
@@ -3,6 +3,7 @@
3
3
  import os
4
4
  import random
5
5
  from pathlib import Path
6
+ from typing import Any, Iterator
6
7
 
7
8
  import numpy as np
8
9
  import torch
@@ -27,33 +28,40 @@ from ultralytics.utils.checks import check_file
27
28
 
28
29
  class InfiniteDataLoader(dataloader.DataLoader):
29
30
  """
30
- Dataloader that reuses workers.
31
+ Dataloader that reuses workers for infinite iteration.
31
32
 
32
33
  This dataloader extends the PyTorch DataLoader to provide infinite recycling of workers, which improves efficiency
33
- for training loops that need to iterate through the dataset multiple times.
34
+ for training loops that need to iterate through the dataset multiple times without recreating workers.
34
35
 
35
36
  Attributes:
36
37
  batch_sampler (_RepeatSampler): A sampler that repeats indefinitely.
37
38
  iterator (Iterator): The iterator from the parent DataLoader.
38
39
 
39
40
  Methods:
40
- __len__: Returns the length of the batch sampler's sampler.
41
- __iter__: Creates a sampler that repeats indefinitely.
42
- __del__: Ensures workers are properly terminated.
43
- reset: Resets the iterator, useful when modifying dataset settings during training.
41
+ __len__: Return the length of the batch sampler's sampler.
42
+ __iter__: Create a sampler that repeats indefinitely.
43
+ __del__: Ensure workers are properly terminated.
44
+ reset: Reset the iterator, useful when modifying dataset settings during training.
45
+
46
+ Examples:
47
+ Create an infinite dataloader for training
48
+ >>> dataset = YOLODataset(...)
49
+ >>> dataloader = InfiniteDataLoader(dataset, batch_size=16, shuffle=True)
50
+ >>> for batch in dataloader: # Infinite iteration
51
+ >>> train_step(batch)
44
52
  """
45
53
 
46
- def __init__(self, *args, **kwargs):
54
+ def __init__(self, *args: Any, **kwargs: Any):
47
55
  """Initialize the InfiniteDataLoader with the same arguments as DataLoader."""
48
56
  super().__init__(*args, **kwargs)
49
57
  object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
50
58
  self.iterator = super().__iter__()
51
59
 
52
- def __len__(self):
60
+ def __len__(self) -> int:
53
61
  """Return the length of the batch sampler's sampler."""
54
62
  return len(self.batch_sampler.sampler)
55
63
 
56
- def __iter__(self):
64
+ def __iter__(self) -> Iterator:
57
65
  """Create an iterator that yields indefinitely from the underlying iterator."""
58
66
  for _ in range(len(self)):
59
67
  yield next(self.iterator)
@@ -77,26 +85,26 @@ class InfiniteDataLoader(dataloader.DataLoader):
77
85
 
78
86
  class _RepeatSampler:
79
87
  """
80
- Sampler that repeats forever.
88
+ Sampler that repeats forever for infinite iteration.
81
89
 
82
90
  This sampler wraps another sampler and yields its contents indefinitely, allowing for infinite iteration
83
- over a dataset.
91
+ over a dataset without recreating the sampler.
84
92
 
85
93
  Attributes:
86
94
  sampler (Dataset.sampler): The sampler to repeat.
87
95
  """
88
96
 
89
- def __init__(self, sampler):
97
+ def __init__(self, sampler: Any):
90
98
  """Initialize the _RepeatSampler with a sampler to repeat indefinitely."""
91
99
  self.sampler = sampler
92
100
 
93
- def __iter__(self):
101
+ def __iter__(self) -> Iterator:
94
102
  """Iterate over the sampler indefinitely, yielding its contents."""
95
103
  while True:
96
104
  yield from iter(self.sampler)
97
105
 
98
106
 
99
- def seed_worker(worker_id): # noqa
107
+ def seed_worker(worker_id: int): # noqa
100
108
  """Set dataloader worker seed for reproducibility across worker processes."""
101
109
  worker_seed = torch.initial_seed() % 2**32
102
110
  np.random.seed(worker_seed)
@@ -146,7 +154,7 @@ def build_grounding(cfg, img_path, json_file, batch, mode="train", rect=False, s
146
154
  )
147
155
 
148
156
 
149
- def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):
157
+ def build_dataloader(dataset, batch: int, workers: int, shuffle: bool = True, rank: int = -1):
150
158
  """
151
159
  Create and return an InfiniteDataLoader or DataLoader for training or validation.
152
160
 
@@ -154,11 +162,16 @@ def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):
154
162
  dataset (Dataset): Dataset to load data from.
155
163
  batch (int): Batch size for the dataloader.
156
164
  workers (int): Number of worker threads for loading data.
157
- shuffle (bool): Whether to shuffle the dataset.
158
- rank (int): Process rank in distributed training. -1 for single-GPU training.
165
+ shuffle (bool, optional): Whether to shuffle the dataset.
166
+ rank (int, optional): Process rank in distributed training. -1 for single-GPU training.
159
167
 
160
168
  Returns:
161
169
  (InfiniteDataLoader): A dataloader that can be used for training or validation.
170
+
171
+ Examples:
172
+ Create a dataloader for training
173
+ >>> dataset = YOLODataset(...)
174
+ >>> dataloader = build_dataloader(dataset, batch=16, workers=4, shuffle=True)
162
175
  """
163
176
  batch = min(batch, len(dataset))
164
177
  nd = torch.cuda.device_count() # number of CUDA devices
@@ -184,18 +197,22 @@ def check_source(source):
184
197
  Check the type of input source and return corresponding flag values.
185
198
 
186
199
  Args:
187
- source (str | int | Path | List | Tuple | np.ndarray | PIL.Image | torch.Tensor): The input source to check.
200
+ source (str | int | Path | list | tuple | np.ndarray | PIL.Image | torch.Tensor): The input source to check.
188
201
 
189
202
  Returns:
190
- source (str | int | Path | List | Tuple | np.ndarray | PIL.Image | torch.Tensor): The processed source.
203
+ source (str | int | Path | list | tuple | np.ndarray | PIL.Image | torch.Tensor): The processed source.
191
204
  webcam (bool): Whether the source is a webcam.
192
205
  screenshot (bool): Whether the source is a screenshot.
193
206
  from_img (bool): Whether the source is an image or list of images.
194
207
  in_memory (bool): Whether the source is an in-memory object.
195
208
  tensor (bool): Whether the source is a torch.Tensor.
196
209
 
197
- Raises:
198
- TypeError: If the source type is unsupported.
210
+ Examples:
211
+ Check a file path source
212
+ >>> source, webcam, screenshot, from_img, in_memory, tensor = check_source("image.jpg")
213
+
214
+ Check a webcam source
215
+ >>> source, webcam, screenshot, from_img, in_memory, tensor = check_source(0)
199
216
  """
200
217
  webcam, screenshot, from_img, in_memory, tensor = False, False, False, False, False
201
218
  if isinstance(source, (str, int, Path)): # int for local usb camera
@@ -222,7 +239,7 @@ def check_source(source):
222
239
  return source, webcam, screenshot, from_img, in_memory, tensor
223
240
 
224
241
 
225
- def load_inference_source(source=None, batch=1, vid_stride=1, buffer=False, channels=3):
242
+ def load_inference_source(source=None, batch: int = 1, vid_stride: int = 1, buffer: bool = False, channels: int = 3):
226
243
  """
227
244
  Load an inference source for object detection and apply necessary transformations.
228
245
 
@@ -231,10 +248,17 @@ def load_inference_source(source=None, batch=1, vid_stride=1, buffer=False, chan
231
248
  batch (int, optional): Batch size for dataloaders.
232
249
  vid_stride (int, optional): The frame interval for video sources.
233
250
  buffer (bool, optional): Whether stream frames will be buffered.
234
- channels (int): The number of input channels for the model.
251
+ channels (int, optional): The number of input channels for the model.
235
252
 
236
253
  Returns:
237
254
  (Dataset): A dataset object for the specified input source with attached source_type attribute.
255
+
256
+ Examples:
257
+ Load an image source for inference
258
+ >>> dataset = load_inference_source("image.jpg", batch=1)
259
+
260
+ Load a video stream source
261
+ >>> dataset = load_inference_source("rtsp://example.com/stream", vid_stride=2)
238
262
  """
239
263
  source, stream, screenshot, from_img, in_memory, tensor = check_source(source)
240
264
  source_type = source.source_type if in_memory else SourceTypes(stream, screenshot, from_img, tensor)
@@ -6,6 +6,7 @@ import shutil
6
6
  from collections import defaultdict
7
7
  from concurrent.futures import ThreadPoolExecutor, as_completed
8
8
  from pathlib import Path
9
+ from typing import List, Optional, Union
9
10
 
10
11
  import cv2
11
12
  import numpy as np
@@ -16,13 +17,13 @@ from ultralytics.utils.downloads import download, zip_directory
16
17
  from ultralytics.utils.files import increment_path
17
18
 
18
19
 
19
- def coco91_to_coco80_class():
20
+ def coco91_to_coco80_class() -> List[int]:
20
21
  """
21
- Converts 91-index COCO class IDs to 80-index COCO class IDs.
22
+ Convert 91-index COCO class IDs to 80-index COCO class IDs.
22
23
 
23
24
  Returns:
24
- (list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the
25
- corresponding 91-index class ID.
25
+ (List[int]): A list of 91 class IDs where the index represents the 80-index class ID and the value
26
+ is the corresponding 91-index class ID.
26
27
  """
27
28
  return [
28
29
  0,
@@ -119,10 +120,15 @@ def coco91_to_coco80_class():
119
120
  ]
120
121
 
121
122
 
122
- def coco80_to_coco91_class():
123
+ def coco80_to_coco91_class() -> List[int]:
123
124
  r"""
124
- Converts 80-index (val2014) to 91-index (paper).
125
- For details see https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/.
125
+ Convert 80-index (val2014) to 91-index (paper).
126
+
127
+ Returns:
128
+ (List[int]): A list of 80 class IDs where each value is the corresponding 91-index class ID.
129
+
130
+ References:
131
+ https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
126
132
 
127
133
  Examples:
128
134
  >>> import numpy as np
@@ -220,15 +226,15 @@ def coco80_to_coco91_class():
220
226
 
221
227
 
222
228
  def convert_coco(
223
- labels_dir="../coco/annotations/",
224
- save_dir="coco_converted/",
225
- use_segments=False,
226
- use_keypoints=False,
227
- cls91to80=True,
228
- lvis=False,
229
+ labels_dir: str = "../coco/annotations/",
230
+ save_dir: str = "coco_converted/",
231
+ use_segments: bool = False,
232
+ use_keypoints: bool = False,
233
+ cls91to80: bool = True,
234
+ lvis: bool = False,
229
235
  ):
230
236
  """
231
- Converts COCO dataset annotations to a YOLO annotation format suitable for training YOLO models.
237
+ Convert COCO dataset annotations to a YOLO annotation format suitable for training YOLO models.
232
238
 
233
239
  Args:
234
240
  labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
@@ -246,15 +252,8 @@ def convert_coco(
246
252
 
247
253
  Convert LVIS annotations to YOLO format
248
254
  >>> convert_coco(
249
- >>> "../datasets/lvis/annotations/",
250
- ... use_segments=True,
251
- ... use_keypoints=False,
252
- ... cls91to80=False,
253
- ... lvis=True
255
+ ... "../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True
254
256
  ... )
255
-
256
- Output:
257
- Generates output files in the specified output directory.
258
257
  """
259
258
  # Create dataset directory
260
259
  save_dir = increment_path(save_dir) # increment if save directory already exists
@@ -347,12 +346,12 @@ def convert_coco(
347
346
  LOGGER.info(f"{'LVIS' if lvis else 'COCO'} data converted successfully.\nResults saved to {save_dir.resolve()}")
348
347
 
349
348
 
350
- def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes):
349
+ def convert_segment_masks_to_yolo_seg(masks_dir: str, output_dir: str, classes: int):
351
350
  """
352
- Converts a dataset of segmentation mask images to the YOLO segmentation format.
351
+ Convert a dataset of segmentation mask images to the YOLO segmentation format.
353
352
 
354
- This function takes the directory containing the binary format mask images and converts them into YOLO segmentation format.
355
- The converted masks are saved in the specified output directory.
353
+ This function takes the directory containing the binary format mask images and converts them into YOLO segmentation
354
+ format. The converted masks are saved in the specified output directory.
356
355
 
357
356
  Args:
358
357
  masks_dir (str): The path to the directory where all mask images (png, jpg) are stored.
@@ -425,7 +424,7 @@ def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes):
425
424
 
426
425
  def convert_dota_to_yolo_obb(dota_root_path: str):
427
426
  """
428
- Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.
427
+ Convert DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.
429
428
 
430
429
  The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the
431
430
  associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory.
@@ -479,8 +478,8 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
479
478
  "helipad": 17,
480
479
  }
481
480
 
482
- def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
483
- """Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
481
+ def convert_label(image_name: str, image_width: int, image_height: int, orig_label_dir: Path, save_dir: Path):
482
+ """Convert a single image's DOTA annotation to YOLO OBB format and save it to a specified directory."""
484
483
  orig_label_path = orig_label_dir / f"{image_name}.txt"
485
484
  save_path = save_dir / f"{image_name}.txt"
486
485
 
@@ -516,7 +515,7 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
516
515
  convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir)
517
516
 
518
517
 
519
- def min_index(arr1, arr2):
518
+ def min_index(arr1: np.ndarray, arr2: np.ndarray):
520
519
  """
521
520
  Find a pair of indexes with the shortest distance between two arrays of 2D points.
522
521
 
@@ -525,15 +524,17 @@ def min_index(arr1, arr2):
525
524
  arr2 (np.ndarray): A NumPy array of shape (M, 2) representing M 2D points.
526
525
 
527
526
  Returns:
528
- (tuple): A tuple containing the indexes of the points with the shortest distance in arr1 and arr2 respectively.
527
+ idx1 (int): Index of the point in arr1 with the shortest distance.
528
+ idx2 (int): Index of the point in arr2 with the shortest distance.
529
529
  """
530
530
  dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1)
531
531
  return np.unravel_index(np.argmin(dis, axis=None), dis.shape)
532
532
 
533
533
 
534
- def merge_multi_segment(segments):
534
+ def merge_multi_segment(segments: List[List]):
535
535
  """
536
536
  Merge multiple segments into one list by connecting the coordinates with the minimum distance between each segment.
537
+
537
538
  This function connects these coordinates with a thin line to merge all segments into one.
538
539
 
539
540
  Args:
@@ -581,17 +582,19 @@ def merge_multi_segment(segments):
581
582
  return s
582
583
 
583
584
 
584
- def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt", device=None):
585
+ def yolo_bbox2segment(
586
+ im_dir: Union[str, Path], save_dir: Optional[Union[str, Path]] = None, sam_model: str = "sam_b.pt", device=None
587
+ ):
585
588
  """
586
- Converts existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB)
587
- in YOLO format. Generates segmentation data using SAM auto-annotator as needed.
589
+ Convert existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB) in
590
+ YOLO format. Generate segmentation data using SAM auto-annotator as needed.
588
591
 
589
592
  Args:
590
593
  im_dir (str | Path): Path to image directory to convert.
591
- save_dir (str | Path): Path to save the generated labels, labels will be saved
594
+ save_dir (str | Path, optional): Path to save the generated labels, labels will be saved
592
595
  into `labels-segment` in the same directory level of `im_dir` if save_dir is None.
593
596
  sam_model (str): Segmentation model to use for intermediate segmentation data.
594
- device (int | str): The specific device to run SAM models.
597
+ device (int | str, optional): The specific device to run SAM models.
595
598
 
596
599
  Notes:
597
600
  The input directory structure assumed for dataset:
@@ -647,7 +650,7 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt", device=None):
647
650
 
648
651
  def create_synthetic_coco_dataset():
649
652
  """
650
- Creates a synthetic COCO dataset with random images based on filenames from label lists.
653
+ Create a synthetic COCO dataset with random images based on filenames from label lists.
651
654
 
652
655
  This function downloads COCO labels, reads image filenames from label list files,
653
656
  creates synthetic images for train2017 and val2017 subsets, and organizes
@@ -664,8 +667,8 @@ def create_synthetic_coco_dataset():
664
667
  - Reads image filenames from train2017.txt and val2017.txt files.
665
668
  """
666
669
 
667
- def create_synthetic_image(image_file):
668
- """Generates synthetic images with random sizes and colors for dataset augmentation or testing purposes."""
670
+ def create_synthetic_image(image_file: Path):
671
+ """Generate synthetic images with random sizes and colors for dataset augmentation or testing purposes."""
669
672
  if not image_file.exists():
670
673
  size = (random.randint(480, 640), random.randint(480, 640))
671
674
  Image.new(
@@ -703,7 +706,7 @@ def create_synthetic_coco_dataset():
703
706
  LOGGER.info("Synthetic COCO dataset created successfully.")
704
707
 
705
708
 
706
- def convert_to_multispectral(path, n_channels=10, replace=False, zip=False):
709
+ def convert_to_multispectral(path: Union[str, Path], n_channels: int = 10, replace: bool = False, zip: bool = False):
707
710
  """
708
711
  Convert RGB images to multispectral images by interpolating across wavelength bands.
709
712
 
@@ -717,9 +720,10 @@ def convert_to_multispectral(path, n_channels=10, replace=False, zip=False):
717
720
  zip (bool): Whether to zip the converted images into a zip file.
718
721
 
719
722
  Examples:
720
- >>> # Convert a single image
723
+ Convert a single image
721
724
  >>> convert_to_multispectral("path/to/image.jpg", n_channels=10)
722
- >>> # Convert a dataset
725
+
726
+ Convert a dataset
723
727
  >>> convert_to_multispectral("../datasets/coco8", n_channels=10)
724
728
  """
725
729
  from scipy.interpolate import interp1d