dgenerate-ultralytics-headless 8.3.196__py3-none-any.whl → 8.3.248__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/METADATA +33 -34
  2. dgenerate_ultralytics_headless-8.3.248.dist-info/RECORD +298 -0
  3. tests/__init__.py +5 -7
  4. tests/conftest.py +8 -15
  5. tests/test_cli.py +8 -10
  6. tests/test_cuda.py +9 -10
  7. tests/test_engine.py +29 -2
  8. tests/test_exports.py +69 -21
  9. tests/test_integrations.py +8 -11
  10. tests/test_python.py +109 -71
  11. tests/test_solutions.py +170 -159
  12. ultralytics/__init__.py +27 -9
  13. ultralytics/cfg/__init__.py +57 -64
  14. ultralytics/cfg/datasets/Argoverse.yaml +7 -6
  15. ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
  16. ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
  17. ultralytics/cfg/datasets/ImageNet.yaml +1 -1
  18. ultralytics/cfg/datasets/Objects365.yaml +19 -15
  19. ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
  20. ultralytics/cfg/datasets/VOC.yaml +19 -21
  21. ultralytics/cfg/datasets/VisDrone.yaml +5 -5
  22. ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
  23. ultralytics/cfg/datasets/coco-pose.yaml +24 -2
  24. ultralytics/cfg/datasets/coco.yaml +2 -2
  25. ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
  26. ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
  27. ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
  28. ultralytics/cfg/datasets/dog-pose.yaml +28 -0
  29. ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
  30. ultralytics/cfg/datasets/dota8.yaml +2 -2
  31. ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
  32. ultralytics/cfg/datasets/kitti.yaml +27 -0
  33. ultralytics/cfg/datasets/lvis.yaml +7 -7
  34. ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
  35. ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
  36. ultralytics/cfg/datasets/xView.yaml +16 -16
  37. ultralytics/cfg/default.yaml +96 -94
  38. ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
  39. ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
  40. ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
  41. ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
  42. ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
  43. ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
  44. ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
  45. ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
  46. ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
  47. ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
  48. ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
  49. ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
  50. ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
  51. ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
  52. ultralytics/cfg/models/v6/yolov6.yaml +1 -1
  53. ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
  54. ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
  55. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
  56. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
  57. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
  58. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
  59. ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
  60. ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
  61. ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
  62. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
  63. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
  64. ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
  65. ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
  66. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
  67. ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
  68. ultralytics/cfg/trackers/botsort.yaml +16 -17
  69. ultralytics/cfg/trackers/bytetrack.yaml +9 -11
  70. ultralytics/data/__init__.py +4 -4
  71. ultralytics/data/annotator.py +3 -4
  72. ultralytics/data/augment.py +286 -476
  73. ultralytics/data/base.py +18 -26
  74. ultralytics/data/build.py +151 -26
  75. ultralytics/data/converter.py +38 -50
  76. ultralytics/data/dataset.py +47 -75
  77. ultralytics/data/loaders.py +42 -49
  78. ultralytics/data/split.py +5 -6
  79. ultralytics/data/split_dota.py +8 -15
  80. ultralytics/data/utils.py +41 -45
  81. ultralytics/engine/exporter.py +462 -462
  82. ultralytics/engine/model.py +150 -191
  83. ultralytics/engine/predictor.py +30 -40
  84. ultralytics/engine/results.py +177 -311
  85. ultralytics/engine/trainer.py +193 -120
  86. ultralytics/engine/tuner.py +77 -63
  87. ultralytics/engine/validator.py +39 -22
  88. ultralytics/hub/__init__.py +16 -19
  89. ultralytics/hub/auth.py +6 -12
  90. ultralytics/hub/google/__init__.py +7 -10
  91. ultralytics/hub/session.py +15 -25
  92. ultralytics/hub/utils.py +5 -8
  93. ultralytics/models/__init__.py +1 -1
  94. ultralytics/models/fastsam/__init__.py +1 -1
  95. ultralytics/models/fastsam/model.py +8 -10
  96. ultralytics/models/fastsam/predict.py +19 -30
  97. ultralytics/models/fastsam/utils.py +1 -2
  98. ultralytics/models/fastsam/val.py +5 -7
  99. ultralytics/models/nas/__init__.py +1 -1
  100. ultralytics/models/nas/model.py +5 -8
  101. ultralytics/models/nas/predict.py +7 -9
  102. ultralytics/models/nas/val.py +1 -2
  103. ultralytics/models/rtdetr/__init__.py +1 -1
  104. ultralytics/models/rtdetr/model.py +7 -8
  105. ultralytics/models/rtdetr/predict.py +15 -19
  106. ultralytics/models/rtdetr/train.py +10 -13
  107. ultralytics/models/rtdetr/val.py +21 -23
  108. ultralytics/models/sam/__init__.py +15 -2
  109. ultralytics/models/sam/amg.py +14 -20
  110. ultralytics/models/sam/build.py +26 -19
  111. ultralytics/models/sam/build_sam3.py +377 -0
  112. ultralytics/models/sam/model.py +29 -32
  113. ultralytics/models/sam/modules/blocks.py +83 -144
  114. ultralytics/models/sam/modules/decoders.py +22 -40
  115. ultralytics/models/sam/modules/encoders.py +44 -101
  116. ultralytics/models/sam/modules/memory_attention.py +16 -30
  117. ultralytics/models/sam/modules/sam.py +206 -79
  118. ultralytics/models/sam/modules/tiny_encoder.py +64 -83
  119. ultralytics/models/sam/modules/transformer.py +18 -28
  120. ultralytics/models/sam/modules/utils.py +174 -50
  121. ultralytics/models/sam/predict.py +2268 -366
  122. ultralytics/models/sam/sam3/__init__.py +3 -0
  123. ultralytics/models/sam/sam3/decoder.py +546 -0
  124. ultralytics/models/sam/sam3/encoder.py +529 -0
  125. ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
  126. ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
  127. ultralytics/models/sam/sam3/model_misc.py +199 -0
  128. ultralytics/models/sam/sam3/necks.py +129 -0
  129. ultralytics/models/sam/sam3/sam3_image.py +339 -0
  130. ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
  131. ultralytics/models/sam/sam3/vitdet.py +547 -0
  132. ultralytics/models/sam/sam3/vl_combiner.py +160 -0
  133. ultralytics/models/utils/loss.py +14 -26
  134. ultralytics/models/utils/ops.py +13 -17
  135. ultralytics/models/yolo/__init__.py +1 -1
  136. ultralytics/models/yolo/classify/predict.py +9 -12
  137. ultralytics/models/yolo/classify/train.py +15 -41
  138. ultralytics/models/yolo/classify/val.py +34 -32
  139. ultralytics/models/yolo/detect/predict.py +8 -11
  140. ultralytics/models/yolo/detect/train.py +13 -32
  141. ultralytics/models/yolo/detect/val.py +75 -63
  142. ultralytics/models/yolo/model.py +37 -53
  143. ultralytics/models/yolo/obb/predict.py +5 -14
  144. ultralytics/models/yolo/obb/train.py +11 -14
  145. ultralytics/models/yolo/obb/val.py +42 -39
  146. ultralytics/models/yolo/pose/__init__.py +1 -1
  147. ultralytics/models/yolo/pose/predict.py +7 -22
  148. ultralytics/models/yolo/pose/train.py +10 -22
  149. ultralytics/models/yolo/pose/val.py +40 -59
  150. ultralytics/models/yolo/segment/predict.py +16 -20
  151. ultralytics/models/yolo/segment/train.py +3 -12
  152. ultralytics/models/yolo/segment/val.py +106 -56
  153. ultralytics/models/yolo/world/train.py +12 -16
  154. ultralytics/models/yolo/world/train_world.py +11 -34
  155. ultralytics/models/yolo/yoloe/__init__.py +7 -7
  156. ultralytics/models/yolo/yoloe/predict.py +16 -23
  157. ultralytics/models/yolo/yoloe/train.py +31 -56
  158. ultralytics/models/yolo/yoloe/train_seg.py +5 -10
  159. ultralytics/models/yolo/yoloe/val.py +16 -21
  160. ultralytics/nn/__init__.py +7 -7
  161. ultralytics/nn/autobackend.py +152 -80
  162. ultralytics/nn/modules/__init__.py +60 -60
  163. ultralytics/nn/modules/activation.py +4 -6
  164. ultralytics/nn/modules/block.py +133 -217
  165. ultralytics/nn/modules/conv.py +52 -97
  166. ultralytics/nn/modules/head.py +64 -116
  167. ultralytics/nn/modules/transformer.py +79 -89
  168. ultralytics/nn/modules/utils.py +16 -21
  169. ultralytics/nn/tasks.py +111 -156
  170. ultralytics/nn/text_model.py +40 -67
  171. ultralytics/solutions/__init__.py +12 -12
  172. ultralytics/solutions/ai_gym.py +11 -17
  173. ultralytics/solutions/analytics.py +15 -16
  174. ultralytics/solutions/config.py +5 -6
  175. ultralytics/solutions/distance_calculation.py +10 -13
  176. ultralytics/solutions/heatmap.py +7 -13
  177. ultralytics/solutions/instance_segmentation.py +5 -8
  178. ultralytics/solutions/object_blurrer.py +7 -10
  179. ultralytics/solutions/object_counter.py +12 -19
  180. ultralytics/solutions/object_cropper.py +8 -14
  181. ultralytics/solutions/parking_management.py +33 -31
  182. ultralytics/solutions/queue_management.py +10 -12
  183. ultralytics/solutions/region_counter.py +9 -12
  184. ultralytics/solutions/security_alarm.py +15 -20
  185. ultralytics/solutions/similarity_search.py +13 -17
  186. ultralytics/solutions/solutions.py +75 -74
  187. ultralytics/solutions/speed_estimation.py +7 -10
  188. ultralytics/solutions/streamlit_inference.py +4 -7
  189. ultralytics/solutions/templates/similarity-search.html +7 -18
  190. ultralytics/solutions/trackzone.py +7 -10
  191. ultralytics/solutions/vision_eye.py +5 -8
  192. ultralytics/trackers/__init__.py +1 -1
  193. ultralytics/trackers/basetrack.py +3 -5
  194. ultralytics/trackers/bot_sort.py +10 -27
  195. ultralytics/trackers/byte_tracker.py +14 -30
  196. ultralytics/trackers/track.py +3 -6
  197. ultralytics/trackers/utils/gmc.py +11 -22
  198. ultralytics/trackers/utils/kalman_filter.py +37 -48
  199. ultralytics/trackers/utils/matching.py +12 -15
  200. ultralytics/utils/__init__.py +116 -116
  201. ultralytics/utils/autobatch.py +2 -4
  202. ultralytics/utils/autodevice.py +17 -18
  203. ultralytics/utils/benchmarks.py +70 -70
  204. ultralytics/utils/callbacks/base.py +8 -10
  205. ultralytics/utils/callbacks/clearml.py +5 -13
  206. ultralytics/utils/callbacks/comet.py +32 -46
  207. ultralytics/utils/callbacks/dvc.py +13 -18
  208. ultralytics/utils/callbacks/mlflow.py +4 -5
  209. ultralytics/utils/callbacks/neptune.py +7 -15
  210. ultralytics/utils/callbacks/platform.py +314 -38
  211. ultralytics/utils/callbacks/raytune.py +3 -4
  212. ultralytics/utils/callbacks/tensorboard.py +23 -31
  213. ultralytics/utils/callbacks/wb.py +10 -13
  214. ultralytics/utils/checks.py +151 -87
  215. ultralytics/utils/cpu.py +3 -8
  216. ultralytics/utils/dist.py +19 -15
  217. ultralytics/utils/downloads.py +29 -41
  218. ultralytics/utils/errors.py +6 -14
  219. ultralytics/utils/events.py +2 -4
  220. ultralytics/utils/export/__init__.py +7 -0
  221. ultralytics/utils/{export.py → export/engine.py} +16 -16
  222. ultralytics/utils/export/imx.py +325 -0
  223. ultralytics/utils/export/tensorflow.py +231 -0
  224. ultralytics/utils/files.py +24 -28
  225. ultralytics/utils/git.py +9 -11
  226. ultralytics/utils/instance.py +30 -51
  227. ultralytics/utils/logger.py +212 -114
  228. ultralytics/utils/loss.py +15 -24
  229. ultralytics/utils/metrics.py +131 -160
  230. ultralytics/utils/nms.py +21 -30
  231. ultralytics/utils/ops.py +107 -165
  232. ultralytics/utils/patches.py +33 -21
  233. ultralytics/utils/plotting.py +122 -119
  234. ultralytics/utils/tal.py +28 -44
  235. ultralytics/utils/torch_utils.py +70 -187
  236. ultralytics/utils/tqdm.py +20 -20
  237. ultralytics/utils/triton.py +13 -19
  238. ultralytics/utils/tuner.py +17 -5
  239. dgenerate_ultralytics_headless-8.3.196.dist-info/RECORD +0 -281
  240. {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/WHEEL +0 -0
  241. {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/entry_points.txt +0 -0
  242. {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/licenses/LICENSE +0 -0
  243. {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/top_level.txt +0 -0
@@ -47,8 +47,7 @@ DATASET_CACHE_VERSION = "1.0.3"
47
47
 
48
48
 
49
49
  class YOLODataset(BaseDataset):
50
- """
51
- Dataset class for loading object detection and/or segmentation labels in YOLO format.
50
+ """Dataset class for loading object detection and/or segmentation labels in YOLO format.
52
51
 
53
52
  This class supports loading data for object detection, segmentation, pose estimation, and oriented bounding box
54
53
  (OBB) tasks using the YOLO format.
@@ -73,8 +72,7 @@ class YOLODataset(BaseDataset):
73
72
  """
74
73
 
75
74
  def __init__(self, *args, data: dict | None = None, task: str = "detect", **kwargs):
76
- """
77
- Initialize the YOLODataset.
75
+ """Initialize the YOLODataset.
78
76
 
79
77
  Args:
80
78
  data (dict, optional): Dataset configuration dictionary.
@@ -90,8 +88,7 @@ class YOLODataset(BaseDataset):
90
88
  super().__init__(*args, channels=self.data.get("channels", 3), **kwargs)
91
89
 
92
90
  def cache_labels(self, path: Path = Path("./labels.cache")) -> dict:
93
- """
94
- Cache dataset labels, check images and read shapes.
91
+ """Cache dataset labels, check images and read shapes.
95
92
 
96
93
  Args:
97
94
  path (Path): Path where to save the cache file.
@@ -158,8 +155,7 @@ class YOLODataset(BaseDataset):
158
155
  return x
159
156
 
160
157
  def get_labels(self) -> list[dict]:
161
- """
162
- Return dictionary of labels for YOLO training.
158
+ """Return dictionary of labels for YOLO training.
163
159
 
164
160
  This method loads labels from disk or cache, verifies their integrity, and prepares them for training.
165
161
 
@@ -172,7 +168,7 @@ class YOLODataset(BaseDataset):
172
168
  cache, exists = load_dataset_cache_file(cache_path), True # attempt to load a *.cache file
173
169
  assert cache["version"] == DATASET_CACHE_VERSION # matches current version
174
170
  assert cache["hash"] == get_hash(self.label_files + self.im_files) # identical hash
175
- except (FileNotFoundError, AssertionError, AttributeError):
171
+ except (FileNotFoundError, AssertionError, AttributeError, ModuleNotFoundError):
176
172
  cache, exists = self.cache_labels(cache_path), False # run cache ops
177
173
 
178
174
  # Display cache
@@ -208,8 +204,7 @@ class YOLODataset(BaseDataset):
208
204
  return labels
209
205
 
210
206
  def build_transforms(self, hyp: dict | None = None) -> Compose:
211
- """
212
- Build and append transforms to the list.
207
+ """Build and append transforms to the list.
213
208
 
214
209
  Args:
215
210
  hyp (dict, optional): Hyperparameters for transforms.
@@ -240,8 +235,7 @@ class YOLODataset(BaseDataset):
240
235
  return transforms
241
236
 
242
237
  def close_mosaic(self, hyp: dict) -> None:
243
- """
244
- Disable mosaic, copy_paste, mixup and cutmix augmentations by setting their probabilities to 0.0.
238
+ """Disable mosaic, copy_paste, mixup and cutmix augmentations by setting their probabilities to 0.0.
245
239
 
246
240
  Args:
247
241
  hyp (dict): Hyperparameters for transforms.
@@ -253,8 +247,7 @@ class YOLODataset(BaseDataset):
253
247
  self.transforms = self.build_transforms(hyp)
254
248
 
255
249
  def update_labels_info(self, label: dict) -> dict:
256
- """
257
- Update label format for different tasks.
250
+ """Update label format for different tasks.
258
251
 
259
252
  Args:
260
253
  label (dict): Label dictionary containing bboxes, segments, keypoints, etc.
@@ -262,7 +255,7 @@ class YOLODataset(BaseDataset):
262
255
  Returns:
263
256
  (dict): Updated label dictionary with instances.
264
257
 
265
- Note:
258
+ Notes:
266
259
  cls is not with bboxes now, classification and semantic segmentation need an independent cls label
267
260
  Can also support classification and semantic segmentation by adding or removing dict keys there.
268
261
  """
@@ -287,8 +280,7 @@ class YOLODataset(BaseDataset):
287
280
 
288
281
  @staticmethod
289
282
  def collate_fn(batch: list[dict]) -> dict:
290
- """
291
- Collate data samples into batches.
283
+ """Collate data samples into batches.
292
284
 
293
285
  Args:
294
286
  batch (list[dict]): List of dictionaries containing sample data.
@@ -317,11 +309,10 @@ class YOLODataset(BaseDataset):
317
309
 
318
310
 
319
311
  class YOLOMultiModalDataset(YOLODataset):
320
- """
321
- Dataset class for loading object detection and/or segmentation labels in YOLO format with multi-modal support.
312
+ """Dataset class for loading object detection and/or segmentation labels in YOLO format with multi-modal support.
322
313
 
323
- This class extends YOLODataset to add text information for multi-modal model training, enabling models to
324
- process both image and text data.
314
+ This class extends YOLODataset to add text information for multi-modal model training, enabling models to process
315
+ both image and text data.
325
316
 
326
317
  Methods:
327
318
  update_labels_info: Add text information for multi-modal model training.
@@ -334,8 +325,7 @@ class YOLOMultiModalDataset(YOLODataset):
334
325
  """
335
326
 
336
327
  def __init__(self, *args, data: dict | None = None, task: str = "detect", **kwargs):
337
- """
338
- Initialize a YOLOMultiModalDataset.
328
+ """Initialize a YOLOMultiModalDataset.
339
329
 
340
330
  Args:
341
331
  data (dict, optional): Dataset configuration dictionary.
@@ -346,8 +336,7 @@ class YOLOMultiModalDataset(YOLODataset):
346
336
  super().__init__(*args, data=data, task=task, **kwargs)
347
337
 
348
338
  def update_labels_info(self, label: dict) -> dict:
349
- """
350
- Add text information for multi-modal model training.
339
+ """Add text information for multi-modal model training.
351
340
 
352
341
  Args:
353
342
  label (dict): Label dictionary containing bboxes, segments, keypoints, etc.
@@ -363,8 +352,7 @@ class YOLOMultiModalDataset(YOLODataset):
363
352
  return labels
364
353
 
365
354
  def build_transforms(self, hyp: dict | None = None) -> Compose:
366
- """
367
- Enhance data transformations with optional text augmentation for multi-modal training.
355
+ """Enhance data transformations with optional text augmentation for multi-modal training.
368
356
 
369
357
  Args:
370
358
  hyp (dict, optional): Hyperparameters for transforms.
@@ -388,8 +376,7 @@ class YOLOMultiModalDataset(YOLODataset):
388
376
 
389
377
  @property
390
378
  def category_names(self):
391
- """
392
- Return category names for the dataset.
379
+ """Return category names for the dataset.
393
380
 
394
381
  Returns:
395
382
  (set[str]): List of class names.
@@ -418,11 +405,10 @@ class YOLOMultiModalDataset(YOLODataset):
418
405
 
419
406
 
420
407
  class GroundingDataset(YOLODataset):
421
- """
422
- Dataset class for object detection tasks using annotations from a JSON file in grounding format.
408
+ """Dataset class for object detection tasks using annotations from a JSON file in grounding format.
423
409
 
424
- This dataset is designed for grounding tasks where annotations are provided in a JSON file rather than
425
- the standard YOLO format text files.
410
+ This dataset is designed for grounding tasks where annotations are provided in a JSON file rather than the standard
411
+ YOLO format text files.
426
412
 
427
413
  Attributes:
428
414
  json_file (str): Path to the JSON file containing annotations.
@@ -438,8 +424,7 @@ class GroundingDataset(YOLODataset):
438
424
  """
439
425
 
440
426
  def __init__(self, *args, task: str = "detect", json_file: str = "", max_samples: int = 80, **kwargs):
441
- """
442
- Initialize a GroundingDataset for object detection.
427
+ """Initialize a GroundingDataset for object detection.
443
428
 
444
429
  Args:
445
430
  json_file (str): Path to the JSON file containing annotations.
@@ -454,8 +439,7 @@ class GroundingDataset(YOLODataset):
454
439
  super().__init__(*args, task=task, data={"channels": 3}, **kwargs)
455
440
 
456
441
  def get_img_files(self, img_path: str) -> list:
457
- """
458
- The image files would be read in `get_labels` function, return empty list here.
442
+ """The image files would be read in `get_labels` function, return empty list here.
459
443
 
460
444
  Args:
461
445
  img_path (str): Path to the directory containing images.
@@ -466,23 +450,21 @@ class GroundingDataset(YOLODataset):
466
450
  return []
467
451
 
468
452
  def verify_labels(self, labels: list[dict[str, Any]]) -> None:
469
- """
470
- Verify the number of instances in the dataset matches expected counts.
453
+ """Verify the number of instances in the dataset matches expected counts.
471
454
 
472
- This method checks if the total number of bounding box instances in the provided
473
- labels matches the expected count for known datasets. It performs validation
474
- against a predefined set of datasets with known instance counts.
455
+ This method checks if the total number of bounding box instances in the provided labels matches the expected
456
+ count for known datasets. It performs validation against a predefined set of datasets with known instance
457
+ counts.
475
458
 
476
459
  Args:
477
- labels (list[dict[str, Any]]): List of label dictionaries, where each dictionary
478
- contains dataset annotations. Each label dict must have a 'bboxes' key with
479
- a numpy array or tensor containing bounding box coordinates.
460
+ labels (list[dict[str, Any]]): List of label dictionaries, where each dictionary contains dataset
461
+ annotations. Each label dict must have a 'bboxes' key with a numpy array or tensor containing bounding
462
+ box coordinates.
480
463
 
481
464
  Raises:
482
- AssertionError: If the actual instance count doesn't match the expected count
483
- for a recognized dataset.
465
+ AssertionError: If the actual instance count doesn't match the expected count for a recognized dataset.
484
466
 
485
- Note:
467
+ Notes:
486
468
  For unrecognized datasets (those not in the predefined expected_counts),
487
469
  a warning is logged and verification is skipped.
488
470
  """
@@ -501,8 +483,7 @@ class GroundingDataset(YOLODataset):
501
483
  LOGGER.warning(f"Skipping instance count verification for unrecognized dataset '{self.json_file}'")
502
484
 
503
485
  def cache_labels(self, path: Path = Path("./labels.cache")) -> dict[str, Any]:
504
- """
505
- Load annotations from a JSON file, filter, and normalize bounding boxes for each image.
486
+ """Load annotations from a JSON file, filter, and normalize bounding boxes for each image.
506
487
 
507
488
  Args:
508
489
  path (Path): Path where to save the cache file.
@@ -548,7 +529,7 @@ class GroundingDataset(YOLODataset):
548
529
  cat2id[cat_name] = len(cat2id)
549
530
  texts.append([cat_name])
550
531
  cls = cat2id[cat_name] # class
551
- box = [cls] + box.tolist()
532
+ box = [cls, *box.tolist()]
552
533
  if box not in bboxes:
553
534
  bboxes.append(box)
554
535
  if ann.get("segmentation") is not None:
@@ -565,7 +546,7 @@ class GroundingDataset(YOLODataset):
565
546
  .reshape(-1)
566
547
  .tolist()
567
548
  )
568
- s = [cls] + s
549
+ s = [cls, *s]
569
550
  segments.append(s)
570
551
  lb = np.array(bboxes, dtype=np.float32) if len(bboxes) else np.zeros((0, 5), dtype=np.float32)
571
552
 
@@ -592,8 +573,7 @@ class GroundingDataset(YOLODataset):
592
573
  return x
593
574
 
594
575
  def get_labels(self) -> list[dict]:
595
- """
596
- Load labels from cache or generate them from JSON file.
576
+ """Load labels from cache or generate them from JSON file.
597
577
 
598
578
  Returns:
599
579
  (list[dict]): List of label dictionaries, each containing information about an image and its annotations.
@@ -614,8 +594,7 @@ class GroundingDataset(YOLODataset):
614
594
  return labels
615
595
 
616
596
  def build_transforms(self, hyp: dict | None = None) -> Compose:
617
- """
618
- Configure augmentations for training with optional text loading.
597
+ """Configure augmentations for training with optional text loading.
619
598
 
620
599
  Args:
621
600
  hyp (dict, optional): Hyperparameters for transforms.
@@ -661,11 +640,10 @@ class GroundingDataset(YOLODataset):
661
640
 
662
641
 
663
642
  class YOLOConcatDataset(ConcatDataset):
664
- """
665
- Dataset as a concatenation of multiple datasets.
643
+ """Dataset as a concatenation of multiple datasets.
666
644
 
667
- This class is useful to assemble different existing datasets for YOLO training, ensuring they use the same
668
- collation function.
645
+ This class is useful to assemble different existing datasets for YOLO training, ensuring they use the same collation
646
+ function.
669
647
 
670
648
  Methods:
671
649
  collate_fn: Static method that collates data samples into batches using YOLODataset's collation function.
@@ -678,8 +656,7 @@ class YOLOConcatDataset(ConcatDataset):
678
656
 
679
657
  @staticmethod
680
658
  def collate_fn(batch: list[dict]) -> dict:
681
- """
682
- Collate data samples into batches.
659
+ """Collate data samples into batches.
683
660
 
684
661
  Args:
685
662
  batch (list[dict]): List of dictionaries containing sample data.
@@ -690,8 +667,7 @@ class YOLOConcatDataset(ConcatDataset):
690
667
  return YOLODataset.collate_fn(batch)
691
668
 
692
669
  def close_mosaic(self, hyp: dict) -> None:
693
- """
694
- Set mosaic, copy_paste and mixup options to 0.0 and build transformations.
670
+ """Set mosaic, copy_paste and mixup options to 0.0 and build transformations.
695
671
 
696
672
  Args:
697
673
  hyp (dict): Hyperparameters for transforms.
@@ -712,8 +688,7 @@ class SemanticDataset(BaseDataset):
712
688
 
713
689
 
714
690
  class ClassificationDataset:
715
- """
716
- Dataset class for image classification tasks extending torchvision ImageFolder functionality.
691
+ """Dataset class for image classification tasks extending torchvision ImageFolder functionality.
717
692
 
718
693
  This class offers functionalities like image augmentation, caching, and verification. It's designed to efficiently
719
694
  handle large datasets for training deep learning models, with optional image transformations and caching mechanisms
@@ -723,7 +698,7 @@ class ClassificationDataset:
723
698
  cache_ram (bool): Indicates if caching in RAM is enabled.
724
699
  cache_disk (bool): Indicates if caching on disk is enabled.
725
700
  samples (list): A list of tuples, each containing the path to an image, its class index, path to its .npy cache
726
- file (if caching on disk), and optionally the loaded image array (if caching in RAM).
701
+ file (if caching on disk), and optionally the loaded image array (if caching in RAM).
727
702
  torch_transforms (callable): PyTorch transforms to be applied to the images.
728
703
  root (str): Root directory of the dataset.
729
704
  prefix (str): Prefix for logging and cache filenames.
@@ -735,8 +710,7 @@ class ClassificationDataset:
735
710
  """
736
711
 
737
712
  def __init__(self, root: str, args, augment: bool = False, prefix: str = ""):
738
- """
739
- Initialize YOLO classification dataset with root directory, arguments, augmentations, and cache settings.
713
+ """Initialize YOLO classification dataset with root directory, arguments, augmentations, and cache settings.
740
714
 
741
715
  Args:
742
716
  root (str): Path to the dataset directory where images are stored in a class-specific folder structure.
@@ -768,7 +742,7 @@ class ClassificationDataset:
768
742
  self.cache_ram = False
769
743
  self.cache_disk = str(args.cache).lower() == "disk" # cache images on hard drive as uncompressed *.npy files
770
744
  self.samples = self.verify_images() # filter out bad images
771
- self.samples = [list(x) + [Path(x[0]).with_suffix(".npy"), None] for x in self.samples] # file, index, npy, im
745
+ self.samples = [[*list(x), Path(x[0]).with_suffix(".npy"), None] for x in self.samples] # file, index, npy, im
772
746
  scale = (1.0 - args.scale, 1.0) # (0.08, 1.0)
773
747
  self.torch_transforms = (
774
748
  classify_augmentations(
@@ -787,8 +761,7 @@ class ClassificationDataset:
787
761
  )
788
762
 
789
763
  def __getitem__(self, i: int) -> dict:
790
- """
791
- Return subset of data and targets corresponding to given indices.
764
+ """Return subset of data and targets corresponding to given indices.
792
765
 
793
766
  Args:
794
767
  i (int): Index of the sample to retrieve.
@@ -816,8 +789,7 @@ class ClassificationDataset:
816
789
  return len(self.samples)
817
790
 
818
791
  def verify_images(self) -> list[tuple]:
819
- """
820
- Verify all images in dataset.
792
+ """Verify all images in dataset.
821
793
 
822
794
  Returns:
823
795
  (list): List of valid samples after verification.
@@ -25,11 +25,10 @@ from ultralytics.utils.patches import imread
25
25
 
26
26
  @dataclass
27
27
  class SourceTypes:
28
- """
29
- Class to represent various types of input sources for predictions.
28
+ """Class to represent various types of input sources for predictions.
30
29
 
31
- This class uses dataclass to define boolean flags for different types of input sources that can be used for
32
- making predictions with YOLO models.
30
+ This class uses dataclass to define boolean flags for different types of input sources that can be used for making
31
+ predictions with YOLO models.
33
32
 
34
33
  Attributes:
35
34
  stream (bool): Flag indicating if the input source is a video stream.
@@ -52,11 +51,10 @@ class SourceTypes:
52
51
 
53
52
 
54
53
  class LoadStreams:
55
- """
56
- Stream Loader for various types of video streams.
54
+ """Stream Loader for various types of video streams.
57
55
 
58
- Supports RTSP, RTMP, HTTP, and TCP streams. This class handles the loading and processing of multiple video
59
- streams simultaneously, making it suitable for real-time video analysis tasks.
56
+ Supports RTSP, RTMP, HTTP, and TCP streams. This class handles the loading and processing of multiple video streams
57
+ simultaneously, making it suitable for real-time video analysis tasks.
60
58
 
61
59
  Attributes:
62
60
  sources (list[str]): The source input paths or URLs for the video streams.
@@ -71,7 +69,7 @@ class LoadStreams:
71
69
  shape (list[tuple[int, int, int]]): List of shapes for each stream.
72
70
  caps (list[cv2.VideoCapture]): List of cv2.VideoCapture objects for each stream.
73
71
  bs (int): Batch size for processing.
74
- cv2_flag (int): OpenCV flag for image reading (grayscale or RGB).
72
+ cv2_flag (int): OpenCV flag for image reading (grayscale or color/BGR).
75
73
 
76
74
  Methods:
77
75
  update: Read stream frames in daemon thread.
@@ -94,21 +92,20 @@ class LoadStreams:
94
92
  """
95
93
 
96
94
  def __init__(self, sources: str = "file.streams", vid_stride: int = 1, buffer: bool = False, channels: int = 3):
97
- """
98
- Initialize stream loader for multiple video sources, supporting various stream types.
95
+ """Initialize stream loader for multiple video sources, supporting various stream types.
99
96
 
100
97
  Args:
101
98
  sources (str): Path to streams file or single stream URL.
102
99
  vid_stride (int): Video frame-rate stride.
103
100
  buffer (bool): Whether to buffer input streams.
104
- channels (int): Number of image channels (1 for grayscale, 3 for RGB).
101
+ channels (int): Number of image channels (1 for grayscale, 3 for color).
105
102
  """
106
103
  torch.backends.cudnn.benchmark = True # faster for fixed-size inference
107
104
  self.buffer = buffer # buffer input streams
108
105
  self.running = True # running flag for Thread
109
106
  self.mode = "stream"
110
107
  self.vid_stride = vid_stride # video frame-rate stride
111
- self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR # grayscale or RGB
108
+ self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR # grayscale or color (BGR)
112
109
 
113
110
  sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources]
114
111
  n = len(sources)
@@ -126,7 +123,7 @@ class LoadStreams:
126
123
  if urllib.parse.urlparse(s).hostname in {"www.youtube.com", "youtube.com", "youtu.be"}: # YouTube video
127
124
  # YouTube format i.e. 'https://www.youtube.com/watch?v=Jsn8D3aC840' or 'https://youtu.be/Jsn8D3aC840'
128
125
  s = get_best_youtube_url(s)
129
- s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
126
+ s = int(s) if s.isnumeric() else s # i.e. s = '0' local webcam
130
127
  if s == 0 and (IS_COLAB or IS_KAGGLE):
131
128
  raise NotImplementedError(
132
129
  "'source=0' webcam not supported in Colab and Kaggle notebooks. "
@@ -227,11 +224,10 @@ class LoadStreams:
227
224
 
228
225
 
229
226
  class LoadScreenshots:
230
- """
231
- Ultralytics screenshot dataloader for capturing and processing screen images.
227
+ """Ultralytics screenshot dataloader for capturing and processing screen images.
232
228
 
233
- This class manages the loading of screenshot images for processing with YOLO. It is suitable for use with
234
- `yolo predict source=screen`.
229
+ This class manages the loading of screenshot images for processing with YOLO. It is suitable for use with `yolo
230
+ predict source=screen`.
235
231
 
236
232
  Attributes:
237
233
  source (str): The source input indicating which screen to capture.
@@ -246,7 +242,7 @@ class LoadScreenshots:
246
242
  bs (int): Batch size, set to 1.
247
243
  fps (int): Frames per second, set to 30.
248
244
  monitor (dict[str, int]): Monitor configuration details.
249
- cv2_flag (int): OpenCV flag for image reading (grayscale or RGB).
245
+ cv2_flag (int): OpenCV flag for image reading (grayscale or color/BGR).
250
246
 
251
247
  Methods:
252
248
  __iter__: Returns an iterator object.
@@ -259,15 +255,14 @@ class LoadScreenshots:
259
255
  """
260
256
 
261
257
  def __init__(self, source: str, channels: int = 3):
262
- """
263
- Initialize screenshot capture with specified screen and region parameters.
258
+ """Initialize screenshot capture with specified screen and region parameters.
264
259
 
265
260
  Args:
266
261
  source (str): Screen capture source string in format "screen_num left top width height".
267
- channels (int): Number of image channels (1 for grayscale, 3 for RGB).
262
+ channels (int): Number of image channels (1 for grayscale, 3 for color).
268
263
  """
269
264
  check_requirements("mss")
270
- import mss # noqa
265
+ import mss
271
266
 
272
267
  source, *params = source.split()
273
268
  self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0
@@ -282,7 +277,7 @@ class LoadScreenshots:
282
277
  self.sct = mss.mss()
283
278
  self.bs = 1
284
279
  self.fps = 30
285
- self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR # grayscale or RGB
280
+ self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR # grayscale or color (BGR)
286
281
 
287
282
  # Parse monitor shape
288
283
  monitor = self.sct.monitors[self.screen]
@@ -307,11 +302,10 @@ class LoadScreenshots:
307
302
 
308
303
 
309
304
  class LoadImagesAndVideos:
310
- """
311
- A class for loading and processing images and videos for YOLO object detection.
305
+ """A class for loading and processing images and videos for YOLO object detection.
312
306
 
313
- This class manages the loading and pre-processing of image and video data from various sources, including
314
- single image files, video files, and lists of image and video paths.
307
+ This class manages the loading and pre-processing of image and video data from various sources, including single
308
+ image files, video files, and lists of image and video paths.
315
309
 
316
310
  Attributes:
317
311
  files (list[str]): List of image and video file paths.
@@ -325,7 +319,7 @@ class LoadImagesAndVideos:
325
319
  frames (int): Total number of frames in the video.
326
320
  count (int): Counter for iteration, initialized at 0 during __iter__().
327
321
  ni (int): Number of images.
328
- cv2_flag (int): OpenCV flag for image reading (grayscale or RGB).
322
+ cv2_flag (int): OpenCV flag for image reading (grayscale or color/BGR).
329
323
 
330
324
  Methods:
331
325
  __init__: Initialize the LoadImagesAndVideos object.
@@ -347,14 +341,13 @@ class LoadImagesAndVideos:
347
341
  """
348
342
 
349
343
  def __init__(self, path: str | Path | list, batch: int = 1, vid_stride: int = 1, channels: int = 3):
350
- """
351
- Initialize dataloader for images and videos, supporting various input formats.
344
+ """Initialize dataloader for images and videos, supporting various input formats.
352
345
 
353
346
  Args:
354
347
  path (str | Path | list): Path to images/videos, directory, or list of paths.
355
348
  batch (int): Batch size for processing.
356
349
  vid_stride (int): Video frame-rate stride.
357
- channels (int): Number of image channels (1 for grayscale, 3 for RGB).
350
+ channels (int): Number of image channels (1 for grayscale, 3 for color).
358
351
  """
359
352
  parent = None
360
353
  if isinstance(path, str) and Path(path).suffix in {".txt", ".csv"}: # txt/csv file with source paths
@@ -392,7 +385,7 @@ class LoadImagesAndVideos:
392
385
  self.mode = "video" if ni == 0 else "image" # default to video if no images
393
386
  self.vid_stride = vid_stride # video frame-rate stride
394
387
  self.bs = batch
395
- self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR # grayscale or RGB
388
+ self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR # grayscale or color (BGR)
396
389
  if any(videos):
397
390
  self._new_video(videos[0]) # new video
398
391
  else:
@@ -490,8 +483,7 @@ class LoadImagesAndVideos:
490
483
 
491
484
 
492
485
  class LoadPilAndNumpy:
493
- """
494
- Load images from PIL and Numpy arrays for batch processing.
486
+ """Load images from PIL and Numpy arrays for batch processing.
495
487
 
496
488
  This class manages loading and pre-processing of image data from both PIL and Numpy formats. It performs basic
497
489
  validation and format conversion to ensure that the images are in the required format for downstream processing.
@@ -517,12 +509,11 @@ class LoadPilAndNumpy:
517
509
  """
518
510
 
519
511
  def __init__(self, im0: Image.Image | np.ndarray | list, channels: int = 3):
520
- """
521
- Initialize a loader for PIL and Numpy images, converting inputs to a standardized format.
512
+ """Initialize a loader for PIL and Numpy images, converting inputs to a standardized format.
522
513
 
523
514
  Args:
524
515
  im0 (PIL.Image.Image | np.ndarray | list): Single image or list of images in PIL or numpy format.
525
- channels (int): Number of image channels (1 for grayscale, 3 for RGB).
516
+ channels (int): Number of image channels (1 for grayscale, 3 for color).
526
517
  """
527
518
  if not isinstance(im0, list):
528
519
  im0 = [im0]
@@ -535,11 +526,16 @@ class LoadPilAndNumpy:
535
526
 
536
527
  @staticmethod
537
528
  def _single_check(im: Image.Image | np.ndarray, flag: str = "RGB") -> np.ndarray:
538
- """Validate and format an image to numpy array, ensuring RGB order and contiguous memory."""
529
+ """Validate and format an image to a NumPy array.
530
+
531
+ Notes:
532
+ - PIL inputs are converted to NumPy and returned in OpenCV-compatible BGR order for color images.
533
+ - NumPy inputs are returned as-is (no channel-order conversion is applied).
534
+ """
539
535
  assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
540
536
  if isinstance(im, Image.Image):
541
537
  im = np.asarray(im.convert(flag))
542
- # adding new axis if it's grayscale, and converting to BGR if it's RGB
538
+ # Add a new axis if grayscale; convert RGB -> BGR for OpenCV compatibility.
543
539
  im = im[..., None] if flag == "L" else im[..., ::-1]
544
540
  im = np.ascontiguousarray(im) # contiguous
545
541
  elif im.ndim == 2: # grayscale in numpy form
@@ -564,11 +560,10 @@ class LoadPilAndNumpy:
564
560
 
565
561
 
566
562
  class LoadTensor:
567
- """
568
- A class for loading and processing tensor data for object detection tasks.
563
+ """A class for loading and processing tensor data for object detection tasks.
569
564
 
570
- This class handles the loading and pre-processing of image data from PyTorch tensors, preparing them for
571
- further processing in object detection pipelines.
565
+ This class handles the loading and pre-processing of image data from PyTorch tensors, preparing them for further
566
+ processing in object detection pipelines.
572
567
 
573
568
  Attributes:
574
569
  im0 (torch.Tensor): The input tensor containing the image(s) with shape (B, C, H, W).
@@ -588,8 +583,7 @@ class LoadTensor:
588
583
  """
589
584
 
590
585
  def __init__(self, im0: torch.Tensor) -> None:
591
- """
592
- Initialize LoadTensor object for processing torch.Tensor image data.
586
+ """Initialize LoadTensor object for processing torch.Tensor image data.
593
587
 
594
588
  Args:
595
589
  im0 (torch.Tensor): Input tensor with shape (B, C, H, W).
@@ -656,8 +650,7 @@ def autocast_list(source: list[Any]) -> list[Image.Image | np.ndarray]:
656
650
 
657
651
 
658
652
  def get_best_youtube_url(url: str, method: str = "pytube") -> str | None:
659
- """
660
- Retrieve the URL of the best quality MP4 video stream from a given YouTube video.
653
+ """Retrieve the URL of the best quality MP4 video stream from a given YouTube video.
661
654
 
662
655
  Args:
663
656
  url (str): The URL of the YouTube video.
@@ -690,7 +683,7 @@ def get_best_youtube_url(url: str, method: str = "pytube") -> str | None:
690
683
 
691
684
  elif method == "pafy":
692
685
  check_requirements(("pafy", "youtube_dl==2020.12.2"))
693
- import pafy # noqa
686
+ import pafy
694
687
 
695
688
  return pafy.new(url).getbestvideo(preftype="mp4").url
696
689
 
ultralytics/data/split.py CHANGED
@@ -11,11 +11,10 @@ from ultralytics.utils import DATASETS_DIR, LOGGER, TQDM
11
11
 
12
12
 
13
13
  def split_classify_dataset(source_dir: str | Path, train_ratio: float = 0.8) -> Path:
14
- """
15
- Split classification dataset into train and val directories in a new directory.
14
+ """Split classification dataset into train and val directories in a new directory.
16
15
 
17
- Creates a new directory '{source_dir}_split' with train/val subdirectories, preserving the original class
18
- structure with an 80/20 split by default.
16
+ Creates a new directory '{source_dir}_split' with train/val subdirectories, preserving the original class structure
17
+ with an 80/20 split by default.
19
18
 
20
19
  Directory structure:
21
20
  Before:
@@ -101,8 +100,8 @@ def autosplit(
101
100
  weights: tuple[float, float, float] = (0.9, 0.1, 0.0),
102
101
  annotated_only: bool = False,
103
102
  ) -> None:
104
- """
105
- Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.
103
+ """Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt
104
+ files.
106
105
 
107
106
  Args:
108
107
  path (Path): Path to images directory.