dgenerate-ultralytics-headless 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/METADATA +41 -34
  2. dgenerate_ultralytics_headless-8.3.224.dist-info/RECORD +285 -0
  3. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/WHEEL +1 -1
  4. tests/__init__.py +7 -6
  5. tests/conftest.py +15 -39
  6. tests/test_cli.py +17 -17
  7. tests/test_cuda.py +17 -8
  8. tests/test_engine.py +36 -10
  9. tests/test_exports.py +98 -37
  10. tests/test_integrations.py +12 -15
  11. tests/test_python.py +126 -82
  12. tests/test_solutions.py +319 -135
  13. ultralytics/__init__.py +27 -9
  14. ultralytics/cfg/__init__.py +83 -87
  15. ultralytics/cfg/datasets/Argoverse.yaml +4 -4
  16. ultralytics/cfg/datasets/DOTAv1.5.yaml +2 -2
  17. ultralytics/cfg/datasets/DOTAv1.yaml +2 -2
  18. ultralytics/cfg/datasets/GlobalWheat2020.yaml +2 -2
  19. ultralytics/cfg/datasets/HomeObjects-3K.yaml +4 -5
  20. ultralytics/cfg/datasets/ImageNet.yaml +3 -3
  21. ultralytics/cfg/datasets/Objects365.yaml +24 -20
  22. ultralytics/cfg/datasets/SKU-110K.yaml +9 -9
  23. ultralytics/cfg/datasets/VOC.yaml +10 -13
  24. ultralytics/cfg/datasets/VisDrone.yaml +43 -33
  25. ultralytics/cfg/datasets/african-wildlife.yaml +5 -5
  26. ultralytics/cfg/datasets/brain-tumor.yaml +4 -5
  27. ultralytics/cfg/datasets/carparts-seg.yaml +5 -5
  28. ultralytics/cfg/datasets/coco-pose.yaml +26 -4
  29. ultralytics/cfg/datasets/coco.yaml +4 -4
  30. ultralytics/cfg/datasets/coco128-seg.yaml +2 -2
  31. ultralytics/cfg/datasets/coco128.yaml +2 -2
  32. ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
  33. ultralytics/cfg/datasets/coco8-multispectral.yaml +2 -2
  34. ultralytics/cfg/datasets/coco8-pose.yaml +23 -2
  35. ultralytics/cfg/datasets/coco8-seg.yaml +2 -2
  36. ultralytics/cfg/datasets/coco8.yaml +2 -2
  37. ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
  38. ultralytics/cfg/datasets/crack-seg.yaml +5 -5
  39. ultralytics/cfg/datasets/dog-pose.yaml +32 -4
  40. ultralytics/cfg/datasets/dota8-multispectral.yaml +2 -2
  41. ultralytics/cfg/datasets/dota8.yaml +2 -2
  42. ultralytics/cfg/datasets/hand-keypoints.yaml +29 -4
  43. ultralytics/cfg/datasets/lvis.yaml +9 -9
  44. ultralytics/cfg/datasets/medical-pills.yaml +4 -5
  45. ultralytics/cfg/datasets/open-images-v7.yaml +7 -10
  46. ultralytics/cfg/datasets/package-seg.yaml +5 -5
  47. ultralytics/cfg/datasets/signature.yaml +4 -4
  48. ultralytics/cfg/datasets/tiger-pose.yaml +20 -4
  49. ultralytics/cfg/datasets/xView.yaml +5 -5
  50. ultralytics/cfg/default.yaml +96 -93
  51. ultralytics/cfg/trackers/botsort.yaml +16 -17
  52. ultralytics/cfg/trackers/bytetrack.yaml +9 -11
  53. ultralytics/data/__init__.py +4 -4
  54. ultralytics/data/annotator.py +12 -12
  55. ultralytics/data/augment.py +531 -564
  56. ultralytics/data/base.py +76 -81
  57. ultralytics/data/build.py +206 -42
  58. ultralytics/data/converter.py +179 -78
  59. ultralytics/data/dataset.py +121 -121
  60. ultralytics/data/loaders.py +114 -91
  61. ultralytics/data/split.py +28 -15
  62. ultralytics/data/split_dota.py +67 -48
  63. ultralytics/data/utils.py +110 -89
  64. ultralytics/engine/exporter.py +422 -460
  65. ultralytics/engine/model.py +224 -252
  66. ultralytics/engine/predictor.py +94 -89
  67. ultralytics/engine/results.py +345 -595
  68. ultralytics/engine/trainer.py +231 -134
  69. ultralytics/engine/tuner.py +279 -73
  70. ultralytics/engine/validator.py +53 -46
  71. ultralytics/hub/__init__.py +26 -28
  72. ultralytics/hub/auth.py +30 -16
  73. ultralytics/hub/google/__init__.py +34 -36
  74. ultralytics/hub/session.py +53 -77
  75. ultralytics/hub/utils.py +23 -109
  76. ultralytics/models/__init__.py +1 -1
  77. ultralytics/models/fastsam/__init__.py +1 -1
  78. ultralytics/models/fastsam/model.py +36 -18
  79. ultralytics/models/fastsam/predict.py +33 -44
  80. ultralytics/models/fastsam/utils.py +4 -5
  81. ultralytics/models/fastsam/val.py +12 -14
  82. ultralytics/models/nas/__init__.py +1 -1
  83. ultralytics/models/nas/model.py +16 -20
  84. ultralytics/models/nas/predict.py +12 -14
  85. ultralytics/models/nas/val.py +4 -5
  86. ultralytics/models/rtdetr/__init__.py +1 -1
  87. ultralytics/models/rtdetr/model.py +9 -9
  88. ultralytics/models/rtdetr/predict.py +22 -17
  89. ultralytics/models/rtdetr/train.py +20 -16
  90. ultralytics/models/rtdetr/val.py +79 -59
  91. ultralytics/models/sam/__init__.py +8 -2
  92. ultralytics/models/sam/amg.py +53 -38
  93. ultralytics/models/sam/build.py +29 -31
  94. ultralytics/models/sam/model.py +33 -38
  95. ultralytics/models/sam/modules/blocks.py +159 -182
  96. ultralytics/models/sam/modules/decoders.py +38 -47
  97. ultralytics/models/sam/modules/encoders.py +114 -133
  98. ultralytics/models/sam/modules/memory_attention.py +38 -31
  99. ultralytics/models/sam/modules/sam.py +114 -93
  100. ultralytics/models/sam/modules/tiny_encoder.py +268 -291
  101. ultralytics/models/sam/modules/transformer.py +59 -66
  102. ultralytics/models/sam/modules/utils.py +55 -72
  103. ultralytics/models/sam/predict.py +745 -341
  104. ultralytics/models/utils/loss.py +118 -107
  105. ultralytics/models/utils/ops.py +118 -71
  106. ultralytics/models/yolo/__init__.py +1 -1
  107. ultralytics/models/yolo/classify/predict.py +28 -26
  108. ultralytics/models/yolo/classify/train.py +50 -81
  109. ultralytics/models/yolo/classify/val.py +68 -61
  110. ultralytics/models/yolo/detect/predict.py +12 -15
  111. ultralytics/models/yolo/detect/train.py +56 -46
  112. ultralytics/models/yolo/detect/val.py +279 -223
  113. ultralytics/models/yolo/model.py +167 -86
  114. ultralytics/models/yolo/obb/predict.py +7 -11
  115. ultralytics/models/yolo/obb/train.py +23 -25
  116. ultralytics/models/yolo/obb/val.py +107 -99
  117. ultralytics/models/yolo/pose/__init__.py +1 -1
  118. ultralytics/models/yolo/pose/predict.py +12 -14
  119. ultralytics/models/yolo/pose/train.py +31 -69
  120. ultralytics/models/yolo/pose/val.py +119 -254
  121. ultralytics/models/yolo/segment/predict.py +21 -25
  122. ultralytics/models/yolo/segment/train.py +12 -66
  123. ultralytics/models/yolo/segment/val.py +126 -305
  124. ultralytics/models/yolo/world/train.py +53 -45
  125. ultralytics/models/yolo/world/train_world.py +51 -32
  126. ultralytics/models/yolo/yoloe/__init__.py +7 -7
  127. ultralytics/models/yolo/yoloe/predict.py +30 -37
  128. ultralytics/models/yolo/yoloe/train.py +89 -71
  129. ultralytics/models/yolo/yoloe/train_seg.py +15 -17
  130. ultralytics/models/yolo/yoloe/val.py +56 -41
  131. ultralytics/nn/__init__.py +9 -11
  132. ultralytics/nn/autobackend.py +179 -107
  133. ultralytics/nn/modules/__init__.py +67 -67
  134. ultralytics/nn/modules/activation.py +8 -7
  135. ultralytics/nn/modules/block.py +302 -323
  136. ultralytics/nn/modules/conv.py +61 -104
  137. ultralytics/nn/modules/head.py +488 -186
  138. ultralytics/nn/modules/transformer.py +183 -123
  139. ultralytics/nn/modules/utils.py +15 -20
  140. ultralytics/nn/tasks.py +327 -203
  141. ultralytics/nn/text_model.py +81 -65
  142. ultralytics/py.typed +1 -0
  143. ultralytics/solutions/__init__.py +12 -12
  144. ultralytics/solutions/ai_gym.py +19 -27
  145. ultralytics/solutions/analytics.py +36 -26
  146. ultralytics/solutions/config.py +29 -28
  147. ultralytics/solutions/distance_calculation.py +23 -24
  148. ultralytics/solutions/heatmap.py +17 -19
  149. ultralytics/solutions/instance_segmentation.py +21 -19
  150. ultralytics/solutions/object_blurrer.py +16 -17
  151. ultralytics/solutions/object_counter.py +48 -53
  152. ultralytics/solutions/object_cropper.py +22 -16
  153. ultralytics/solutions/parking_management.py +61 -58
  154. ultralytics/solutions/queue_management.py +19 -19
  155. ultralytics/solutions/region_counter.py +63 -50
  156. ultralytics/solutions/security_alarm.py +22 -25
  157. ultralytics/solutions/similarity_search.py +107 -60
  158. ultralytics/solutions/solutions.py +343 -262
  159. ultralytics/solutions/speed_estimation.py +35 -31
  160. ultralytics/solutions/streamlit_inference.py +104 -40
  161. ultralytics/solutions/templates/similarity-search.html +31 -24
  162. ultralytics/solutions/trackzone.py +24 -24
  163. ultralytics/solutions/vision_eye.py +11 -12
  164. ultralytics/trackers/__init__.py +1 -1
  165. ultralytics/trackers/basetrack.py +18 -27
  166. ultralytics/trackers/bot_sort.py +48 -39
  167. ultralytics/trackers/byte_tracker.py +94 -94
  168. ultralytics/trackers/track.py +7 -16
  169. ultralytics/trackers/utils/gmc.py +37 -69
  170. ultralytics/trackers/utils/kalman_filter.py +68 -76
  171. ultralytics/trackers/utils/matching.py +13 -17
  172. ultralytics/utils/__init__.py +251 -275
  173. ultralytics/utils/autobatch.py +19 -7
  174. ultralytics/utils/autodevice.py +68 -38
  175. ultralytics/utils/benchmarks.py +169 -130
  176. ultralytics/utils/callbacks/base.py +12 -13
  177. ultralytics/utils/callbacks/clearml.py +14 -15
  178. ultralytics/utils/callbacks/comet.py +139 -66
  179. ultralytics/utils/callbacks/dvc.py +19 -27
  180. ultralytics/utils/callbacks/hub.py +8 -6
  181. ultralytics/utils/callbacks/mlflow.py +6 -10
  182. ultralytics/utils/callbacks/neptune.py +11 -19
  183. ultralytics/utils/callbacks/platform.py +73 -0
  184. ultralytics/utils/callbacks/raytune.py +3 -4
  185. ultralytics/utils/callbacks/tensorboard.py +9 -12
  186. ultralytics/utils/callbacks/wb.py +33 -30
  187. ultralytics/utils/checks.py +163 -114
  188. ultralytics/utils/cpu.py +89 -0
  189. ultralytics/utils/dist.py +24 -20
  190. ultralytics/utils/downloads.py +176 -146
  191. ultralytics/utils/errors.py +11 -13
  192. ultralytics/utils/events.py +113 -0
  193. ultralytics/utils/export/__init__.py +7 -0
  194. ultralytics/utils/{export.py → export/engine.py} +81 -63
  195. ultralytics/utils/export/imx.py +294 -0
  196. ultralytics/utils/export/tensorflow.py +217 -0
  197. ultralytics/utils/files.py +33 -36
  198. ultralytics/utils/git.py +137 -0
  199. ultralytics/utils/instance.py +105 -120
  200. ultralytics/utils/logger.py +404 -0
  201. ultralytics/utils/loss.py +99 -61
  202. ultralytics/utils/metrics.py +649 -478
  203. ultralytics/utils/nms.py +337 -0
  204. ultralytics/utils/ops.py +263 -451
  205. ultralytics/utils/patches.py +70 -31
  206. ultralytics/utils/plotting.py +253 -223
  207. ultralytics/utils/tal.py +48 -61
  208. ultralytics/utils/torch_utils.py +244 -251
  209. ultralytics/utils/tqdm.py +438 -0
  210. ultralytics/utils/triton.py +22 -23
  211. ultralytics/utils/tuner.py +11 -10
  212. dgenerate_ultralytics_headless-8.3.137.dist-info/RECORD +0 -272
  213. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/entry_points.txt +0 -0
  214. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/licenses/LICENSE +0 -0
  215. {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,7 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import glob
4
6
  import math
5
7
  import os
@@ -8,6 +10,7 @@ import urllib
8
10
  from dataclasses import dataclass
9
11
  from pathlib import Path
10
12
  from threading import Thread
13
+ from typing import Any
11
14
 
12
15
  import cv2
13
16
  import numpy as np
@@ -22,11 +25,10 @@ from ultralytics.utils.patches import imread
22
25
 
23
26
  @dataclass
24
27
  class SourceTypes:
25
- """
26
- Class to represent various types of input sources for predictions.
28
+ """Class to represent various types of input sources for predictions.
27
29
 
28
- This class uses dataclass to define boolean flags for different types of input sources that can be used for
29
- making predictions with YOLO models.
30
+ This class uses dataclass to define boolean flags for different types of input sources that can be used for making
31
+ predictions with YOLO models.
30
32
 
31
33
  Attributes:
32
34
  stream (bool): Flag indicating if the input source is a video stream.
@@ -49,24 +51,23 @@ class SourceTypes:
49
51
 
50
52
 
51
53
  class LoadStreams:
52
- """
53
- Stream Loader for various types of video streams.
54
+ """Stream Loader for various types of video streams.
54
55
 
55
- Supports RTSP, RTMP, HTTP, and TCP streams. This class handles the loading and processing of multiple video
56
- streams simultaneously, making it suitable for real-time video analysis tasks.
56
+ Supports RTSP, RTMP, HTTP, and TCP streams. This class handles the loading and processing of multiple video streams
57
+ simultaneously, making it suitable for real-time video analysis tasks.
57
58
 
58
59
  Attributes:
59
- sources (List[str]): The source input paths or URLs for the video streams.
60
+ sources (list[str]): The source input paths or URLs for the video streams.
60
61
  vid_stride (int): Video frame-rate stride.
61
62
  buffer (bool): Whether to buffer input streams.
62
63
  running (bool): Flag to indicate if the streaming thread is running.
63
64
  mode (str): Set to 'stream' indicating real-time capture.
64
- imgs (List[List[np.ndarray]]): List of image frames for each stream.
65
- fps (List[float]): List of FPS for each stream.
66
- frames (List[int]): List of total frames for each stream.
67
- threads (List[Thread]): List of threads for each stream.
68
- shape (List[Tuple[int, int, int]]): List of shapes for each stream.
69
- caps (List[cv2.VideoCapture]): List of cv2.VideoCapture objects for each stream.
65
+ imgs (list[list[np.ndarray]]): List of image frames for each stream.
66
+ fps (list[float]): List of FPS for each stream.
67
+ frames (list[int]): List of total frames for each stream.
68
+ threads (list[Thread]): List of threads for each stream.
69
+ shape (list[tuple[int, int, int]]): List of shapes for each stream.
70
+ caps (list[cv2.VideoCapture]): List of cv2.VideoCapture objects for each stream.
70
71
  bs (int): Batch size for processing.
71
72
  cv2_flag (int): OpenCV flag for image reading (grayscale or RGB).
72
73
 
@@ -90,8 +91,15 @@ class LoadStreams:
90
91
  - The class implements a buffer system to manage frame storage and retrieval.
91
92
  """
92
93
 
93
- def __init__(self, sources="file.streams", vid_stride=1, buffer=False, channels=3):
94
- """Initialize stream loader for multiple video sources, supporting various stream types."""
94
+ def __init__(self, sources: str = "file.streams", vid_stride: int = 1, buffer: bool = False, channels: int = 3):
95
+ """Initialize stream loader for multiple video sources, supporting various stream types.
96
+
97
+ Args:
98
+ sources (str): Path to streams file or single stream URL.
99
+ vid_stride (int): Video frame-rate stride.
100
+ buffer (bool): Whether to buffer input streams.
101
+ channels (int): Number of image channels (1 for grayscale, 3 for RGB).
102
+ """
95
103
  torch.backends.cudnn.benchmark = True # faster for fixed-size inference
96
104
  self.buffer = buffer # buffer input streams
97
105
  self.running = True # running flag for Thread
@@ -143,7 +151,7 @@ class LoadStreams:
143
151
  self.threads[i].start()
144
152
  LOGGER.info("") # newline
145
153
 
146
- def update(self, i, cap, stream):
154
+ def update(self, i: int, cap: cv2.VideoCapture, stream: str):
147
155
  """Read stream frames in daemon thread and update image buffer."""
148
156
  n, f = 0, self.frames[i] # frame number, frame array
149
157
  while self.running and cap.isOpened() and n < (f - 1):
@@ -167,7 +175,7 @@ class LoadStreams:
167
175
  time.sleep(0.01) # wait until the buffer is empty
168
176
 
169
177
  def close(self):
170
- """Terminates stream loader, stops threads, and releases video capture resources."""
178
+ """Terminate stream loader, stop threads, and release video capture resources."""
171
179
  self.running = False # stop flag for Thread
172
180
  for thread in self.threads:
173
181
  if thread.is_alive():
@@ -177,22 +185,21 @@ class LoadStreams:
177
185
  cap.release() # release video capture
178
186
  except Exception as e:
179
187
  LOGGER.warning(f"Could not release VideoCapture object: {e}")
180
- cv2.destroyAllWindows()
181
188
 
182
189
  def __iter__(self):
183
- """Iterates through YOLO image feed and re-opens unresponsive streams."""
190
+ """Iterate through YOLO image feed and re-open unresponsive streams."""
184
191
  self.count = -1
185
192
  return self
186
193
 
187
- def __next__(self):
188
- """Returns the next batch of frames from multiple video streams for processing."""
194
+ def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
195
+ """Return the next batch of frames from multiple video streams for processing."""
189
196
  self.count += 1
190
197
 
191
198
  images = []
192
199
  for i, x in enumerate(self.imgs):
193
200
  # Wait until a frame is available in each buffer
194
201
  while not x:
195
- if not self.threads[i].is_alive() or cv2.waitKey(1) == ord("q"): # q to quit
202
+ if not self.threads[i].is_alive():
196
203
  self.close()
197
204
  raise StopIteration
198
205
  time.sleep(1 / min(self.fps))
@@ -211,17 +218,16 @@ class LoadStreams:
211
218
 
212
219
  return self.sources, images, [""] * self.bs
213
220
 
214
- def __len__(self):
221
+ def __len__(self) -> int:
215
222
  """Return the number of video streams in the LoadStreams object."""
216
223
  return self.bs # 1E12 frames = 32 streams at 30 FPS for 30 years
217
224
 
218
225
 
219
226
  class LoadScreenshots:
220
- """
221
- Ultralytics screenshot dataloader for capturing and processing screen images.
227
+ """Ultralytics screenshot dataloader for capturing and processing screen images.
222
228
 
223
- This class manages the loading of screenshot images for processing with YOLO. It is suitable for use with
224
- `yolo predict source=screen`.
229
+ This class manages the loading of screenshot images for processing with YOLO. It is suitable for use with `yolo
230
+ predict source=screen`.
225
231
 
226
232
  Attributes:
227
233
  source (str): The source input indicating which screen to capture.
@@ -235,7 +241,7 @@ class LoadScreenshots:
235
241
  sct (mss.mss): Screen capture object from `mss` library.
236
242
  bs (int): Batch size, set to 1.
237
243
  fps (int): Frames per second, set to 30.
238
- monitor (Dict[str, int]): Monitor configuration details.
244
+ monitor (dict[str, int]): Monitor configuration details.
239
245
  cv2_flag (int): OpenCV flag for image reading (grayscale or RGB).
240
246
 
241
247
  Methods:
@@ -248,10 +254,15 @@ class LoadScreenshots:
248
254
  ... print(f"Captured frame: {im.shape}")
249
255
  """
250
256
 
251
- def __init__(self, source, channels=3):
252
- """Initialize screenshot capture with specified screen and region parameters."""
257
+ def __init__(self, source: str, channels: int = 3):
258
+ """Initialize screenshot capture with specified screen and region parameters.
259
+
260
+ Args:
261
+ source (str): Screen capture source string in format "screen_num left top width height".
262
+ channels (int): Number of image channels (1 for grayscale, 3 for RGB).
263
+ """
253
264
  check_requirements("mss")
254
- import mss # noqa
265
+ import mss
255
266
 
256
267
  source, *params = source.split()
257
268
  self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0
@@ -277,11 +288,11 @@ class LoadScreenshots:
277
288
  self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
278
289
 
279
290
  def __iter__(self):
280
- """Yields the next screenshot image from the specified screen or region for processing."""
291
+ """Yield the next screenshot image from the specified screen or region for processing."""
281
292
  return self
282
293
 
283
- def __next__(self):
284
- """Captures and returns the next screenshot as a numpy array using the mss library."""
294
+ def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
295
+ """Capture and return the next screenshot as a numpy array using the mss library."""
285
296
  im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR
286
297
  im0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY)[..., None] if self.cv2_flag == cv2.IMREAD_GRAYSCALE else im0
287
298
  s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
@@ -291,16 +302,15 @@ class LoadScreenshots:
291
302
 
292
303
 
293
304
  class LoadImagesAndVideos:
294
- """
295
- A class for loading and processing images and videos for YOLO object detection.
305
+ """A class for loading and processing images and videos for YOLO object detection.
296
306
 
297
- This class manages the loading and pre-processing of image and video data from various sources, including
298
- single image files, video files, and lists of image and video paths.
307
+ This class manages the loading and pre-processing of image and video data from various sources, including single
308
+ image files, video files, and lists of image and video paths.
299
309
 
300
310
  Attributes:
301
- files (List[str]): List of image and video file paths.
311
+ files (list[str]): List of image and video file paths.
302
312
  nf (int): Total number of files (images and videos).
303
- video_flag (List[bool]): Flags indicating whether a file is a video (True) or an image (False).
313
+ video_flag (list[bool]): Flags indicating whether a file is a video (True) or an image (False).
304
314
  mode (str): Current mode, 'image' or 'video'.
305
315
  vid_stride (int): Stride for video frame-rate.
306
316
  bs (int): Batch size.
@@ -330,12 +340,20 @@ class LoadImagesAndVideos:
330
340
  - Can read from a text file containing paths to images and videos.
331
341
  """
332
342
 
333
- def __init__(self, path, batch=1, vid_stride=1, channels=3):
334
- """Initialize dataloader for images and videos, supporting various input formats."""
343
+ def __init__(self, path: str | Path | list, batch: int = 1, vid_stride: int = 1, channels: int = 3):
344
+ """Initialize dataloader for images and videos, supporting various input formats.
345
+
346
+ Args:
347
+ path (str | Path | list): Path to images/videos, directory, or list of paths.
348
+ batch (int): Batch size for processing.
349
+ vid_stride (int): Video frame-rate stride.
350
+ channels (int): Number of image channels (1 for grayscale, 3 for RGB).
351
+ """
335
352
  parent = None
336
- if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line
337
- parent = Path(path).parent
338
- path = Path(path).read_text().splitlines() # list of sources
353
+ if isinstance(path, str) and Path(path).suffix in {".txt", ".csv"}: # txt/csv file with source paths
354
+ parent, content = Path(path).parent, Path(path).read_text()
355
+ path = content.splitlines() if Path(path).suffix == ".txt" else content.split(",") # list of sources
356
+ path = [p.strip() for p in path]
339
357
  files = []
340
358
  for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
341
359
  a = str(Path(p).absolute()) # do not use .resolve() https://github.com/ultralytics/ultralytics/issues/2912
@@ -353,7 +371,7 @@ class LoadImagesAndVideos:
353
371
  # Define files as images or videos
354
372
  images, videos = [], []
355
373
  for f in files:
356
- suffix = f.split(".")[-1].lower() # Get file extension without the dot and lowercase
374
+ suffix = f.rpartition(".")[-1].lower() # Get file extension without the dot and lowercase
357
375
  if suffix in IMG_FORMATS:
358
376
  images.append(f)
359
377
  elif suffix in VID_FORMATS:
@@ -376,12 +394,12 @@ class LoadImagesAndVideos:
376
394
  raise FileNotFoundError(f"No images or videos found in {p}. {FORMATS_HELP_MSG}")
377
395
 
378
396
  def __iter__(self):
379
- """Iterates through image/video files, yielding source paths, images, and metadata."""
397
+ """Iterate through image/video files, yielding source paths, images, and metadata."""
380
398
  self.count = 0
381
399
  return self
382
400
 
383
- def __next__(self):
384
- """Returns the next batch of images or video frames with their paths and metadata."""
401
+ def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
402
+ """Return the next batch of images or video frames with their paths and metadata."""
385
403
  paths, imgs, info = [], [], []
386
404
  while len(imgs) < self.bs:
387
405
  if self.count >= self.nf: # end of file list
@@ -427,11 +445,11 @@ class LoadImagesAndVideos:
427
445
  else:
428
446
  # Handle image files (including HEIC)
429
447
  self.mode = "image"
430
- if path.split(".")[-1].lower() == "heic":
448
+ if path.rpartition(".")[-1].lower() == "heic":
431
449
  # Load HEIC image using Pillow with pillow-heif
432
- check_requirements("pillow-heif")
450
+ check_requirements("pi-heif")
433
451
 
434
- from pillow_heif import register_heif_opener
452
+ from pi_heif import register_heif_opener
435
453
 
436
454
  register_heif_opener() # Register HEIF opener with Pillow
437
455
  with Image.open(path) as img:
@@ -450,8 +468,8 @@ class LoadImagesAndVideos:
450
468
 
451
469
  return paths, imgs, info
452
470
 
453
- def _new_video(self, path):
454
- """Creates a new video capture object for the given path and initializes video-related attributes."""
471
+ def _new_video(self, path: str):
472
+ """Create a new video capture object for the given path and initialize video-related attributes."""
455
473
  self.frame = 0
456
474
  self.cap = cv2.VideoCapture(path)
457
475
  self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
@@ -459,21 +477,20 @@ class LoadImagesAndVideos:
459
477
  raise FileNotFoundError(f"Failed to open video {path}")
460
478
  self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
461
479
 
462
- def __len__(self):
463
- """Returns the number of files (images and videos) in the dataset."""
480
+ def __len__(self) -> int:
481
+ """Return the number of files (images and videos) in the dataset."""
464
482
  return math.ceil(self.nf / self.bs) # number of batches
465
483
 
466
484
 
467
485
  class LoadPilAndNumpy:
468
- """
469
- Load images from PIL and Numpy arrays for batch processing.
486
+ """Load images from PIL and Numpy arrays for batch processing.
470
487
 
471
488
  This class manages loading and pre-processing of image data from both PIL and Numpy formats. It performs basic
472
489
  validation and format conversion to ensure that the images are in the required format for downstream processing.
473
490
 
474
491
  Attributes:
475
- paths (List[str]): List of image paths or autogenerated filenames.
476
- im0 (List[np.ndarray]): List of images stored as Numpy arrays.
492
+ paths (list[str]): List of image paths or autogenerated filenames.
493
+ im0 (list[np.ndarray]): List of images stored as Numpy arrays.
477
494
  mode (str): Type of data being processed, set to 'image'.
478
495
  bs (int): Batch size, equivalent to the length of `im0`.
479
496
 
@@ -491,8 +508,13 @@ class LoadPilAndNumpy:
491
508
  Loaded 2 images
492
509
  """
493
510
 
494
- def __init__(self, im0, channels=3):
495
- """Initializes a loader for PIL and Numpy images, converting inputs to a standardized format."""
511
+ def __init__(self, im0: Image.Image | np.ndarray | list, channels: int = 3):
512
+ """Initialize a loader for PIL and Numpy images, converting inputs to a standardized format.
513
+
514
+ Args:
515
+ im0 (PIL.Image.Image | np.ndarray | list): Single image or list of images in PIL or numpy format.
516
+ channels (int): Number of image channels (1 for grayscale, 3 for RGB).
517
+ """
496
518
  if not isinstance(im0, list):
497
519
  im0 = [im0]
498
520
  # use `image{i}.jpg` when Image.filename returns an empty path.
@@ -503,7 +525,7 @@ class LoadPilAndNumpy:
503
525
  self.bs = len(self.im0)
504
526
 
505
527
  @staticmethod
506
- def _single_check(im, flag="RGB"):
528
+ def _single_check(im: Image.Image | np.ndarray, flag: str = "RGB") -> np.ndarray:
507
529
  """Validate and format an image to numpy array, ensuring RGB order and contiguous memory."""
508
530
  assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
509
531
  if isinstance(im, Image.Image):
@@ -515,35 +537,34 @@ class LoadPilAndNumpy:
515
537
  im = im[..., None]
516
538
  return im
517
539
 
518
- def __len__(self):
519
- """Returns the length of the 'im0' attribute, representing the number of loaded images."""
540
+ def __len__(self) -> int:
541
+ """Return the length of the 'im0' attribute, representing the number of loaded images."""
520
542
  return len(self.im0)
521
543
 
522
- def __next__(self):
523
- """Returns the next batch of images, paths, and metadata for processing."""
544
+ def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
545
+ """Return the next batch of images, paths, and metadata for processing."""
524
546
  if self.count == 1: # loop only once as it's batch inference
525
547
  raise StopIteration
526
548
  self.count += 1
527
549
  return self.paths, self.im0, [""] * self.bs
528
550
 
529
551
  def __iter__(self):
530
- """Iterates through PIL/numpy images, yielding paths, raw images, and metadata for processing."""
552
+ """Iterate through PIL/numpy images, yielding paths, raw images, and metadata for processing."""
531
553
  self.count = 0
532
554
  return self
533
555
 
534
556
 
535
557
  class LoadTensor:
536
- """
537
- A class for loading and processing tensor data for object detection tasks.
558
+ """A class for loading and processing tensor data for object detection tasks.
538
559
 
539
- This class handles the loading and pre-processing of image data from PyTorch tensors, preparing them for
540
- further processing in object detection pipelines.
560
+ This class handles the loading and pre-processing of image data from PyTorch tensors, preparing them for further
561
+ processing in object detection pipelines.
541
562
 
542
563
  Attributes:
543
564
  im0 (torch.Tensor): The input tensor containing the image(s) with shape (B, C, H, W).
544
565
  bs (int): Batch size, inferred from the shape of `im0`.
545
566
  mode (str): Current processing mode, set to 'image'.
546
- paths (List[str]): List of image paths or auto-generated filenames.
567
+ paths (list[str]): List of image paths or auto-generated filenames.
547
568
 
548
569
  Methods:
549
570
  _single_check: Validates and formats an input tensor.
@@ -556,16 +577,20 @@ class LoadTensor:
556
577
  >>> print(f"Processed {len(images)} images")
557
578
  """
558
579
 
559
- def __init__(self, im0) -> None:
560
- """Initialize LoadTensor object for processing torch.Tensor image data."""
580
+ def __init__(self, im0: torch.Tensor) -> None:
581
+ """Initialize LoadTensor object for processing torch.Tensor image data.
582
+
583
+ Args:
584
+ im0 (torch.Tensor): Input tensor with shape (B, C, H, W).
585
+ """
561
586
  self.im0 = self._single_check(im0)
562
587
  self.bs = self.im0.shape[0]
563
588
  self.mode = "image"
564
589
  self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
565
590
 
566
591
  @staticmethod
567
- def _single_check(im, stride=32):
568
- """Validates and formats a single image tensor, ensuring correct shape and normalization."""
592
+ def _single_check(im: torch.Tensor, stride: int = 32) -> torch.Tensor:
593
+ """Validate and format a single image tensor, ensuring correct shape and normalization."""
569
594
  s = (
570
595
  f"torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) "
571
596
  f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible."
@@ -586,24 +611,24 @@ class LoadTensor:
586
611
  return im
587
612
 
588
613
  def __iter__(self):
589
- """Yields an iterator object for iterating through tensor image data."""
614
+ """Yield an iterator object for iterating through tensor image data."""
590
615
  self.count = 0
591
616
  return self
592
617
 
593
- def __next__(self):
594
- """Yields the next batch of tensor images and metadata for processing."""
618
+ def __next__(self) -> tuple[list[str], torch.Tensor, list[str]]:
619
+ """Yield the next batch of tensor images and metadata for processing."""
595
620
  if self.count == 1:
596
621
  raise StopIteration
597
622
  self.count += 1
598
623
  return self.paths, self.im0, [""] * self.bs
599
624
 
600
- def __len__(self):
601
- """Returns the batch size of the tensor input."""
625
+ def __len__(self) -> int:
626
+ """Return the batch size of the tensor input."""
602
627
  return self.bs
603
628
 
604
629
 
605
- def autocast_list(source):
606
- """Merges a list of sources into a list of numpy arrays or PIL images for Ultralytics prediction."""
630
+ def autocast_list(source: list[Any]) -> list[Image.Image | np.ndarray]:
631
+ """Merge a list of sources into a list of numpy arrays or PIL images for Ultralytics prediction."""
607
632
  files = []
608
633
  for im in source:
609
634
  if isinstance(im, (str, Path)): # filename or uri
@@ -619,14 +644,12 @@ def autocast_list(source):
619
644
  return files
620
645
 
621
646
 
622
- def get_best_youtube_url(url, method="pytube"):
623
- """
624
- Retrieves the URL of the best quality MP4 video stream from a given YouTube video.
647
+ def get_best_youtube_url(url: str, method: str = "pytube") -> str | None:
648
+ """Retrieve the URL of the best quality MP4 video stream from a given YouTube video.
625
649
 
626
650
  Args:
627
651
  url (str): The URL of the YouTube video.
628
652
  method (str): The method to use for extracting video info. Options are "pytube", "pafy", and "yt-dlp".
629
- Defaults to "pytube".
630
653
 
631
654
  Returns:
632
655
  (str | None): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
@@ -655,7 +678,7 @@ def get_best_youtube_url(url, method="pytube"):
655
678
 
656
679
  elif method == "pafy":
657
680
  check_requirements(("pafy", "youtube_dl==2020.12.2"))
658
- import pafy # noqa
681
+ import pafy
659
682
 
660
683
  return pafy.new(url).getbestvideo(preftype="mp4").url
661
684
 
ultralytics/data/split.py CHANGED
@@ -1,5 +1,7 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import random
4
6
  import shutil
5
7
  from pathlib import Path
@@ -8,12 +10,11 @@ from ultralytics.data.utils import IMG_FORMATS, img2label_paths
8
10
  from ultralytics.utils import DATASETS_DIR, LOGGER, TQDM
9
11
 
10
12
 
11
- def split_classify_dataset(source_dir, train_ratio=0.8):
12
- """
13
- Split dataset into train and val directories in a new directory.
13
+ def split_classify_dataset(source_dir: str | Path, train_ratio: float = 0.8) -> Path:
14
+ """Split classification dataset into train and val directories in a new directory.
14
15
 
15
- Creates a new directory '{source_dir}_split' with train/val subdirectories, preserving the original class
16
- structure with an 80/20 split by default.
16
+ Creates a new directory '{source_dir}_split' with train/val subdirectories, preserving the original class structure
17
+ with an 80/20 split by default.
17
18
 
18
19
  Directory structure:
19
20
  Before:
@@ -46,13 +47,17 @@ def split_classify_dataset(source_dir, train_ratio=0.8):
46
47
  └── ...
47
48
 
48
49
  Args:
49
- source_dir (str | Path): Path to Caltech dataset root directory.
50
+ source_dir (str | Path): Path to classification dataset root directory.
50
51
  train_ratio (float): Ratio for train split, between 0 and 1.
51
52
 
53
+ Returns:
54
+ (Path): Path to the created split directory.
55
+
52
56
  Examples:
53
- >>> # Split dataset with default 80/20 ratio
57
+ Split dataset with default 80/20 ratio
54
58
  >>> split_classify_dataset("path/to/caltech")
55
- >>> # Split with custom ratio
59
+
60
+ Split with custom ratio
56
61
  >>> split_classify_dataset("path/to/caltech", 0.75)
57
62
  """
58
63
  source_path = Path(source_dir)
@@ -90,18 +95,26 @@ def split_classify_dataset(source_dir, train_ratio=0.8):
90
95
  return split_path
91
96
 
92
97
 
93
- def autosplit(path=DATASETS_DIR / "coco8/images", weights=(0.9, 0.1, 0.0), annotated_only=False):
94
- """
95
- Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.
98
+ def autosplit(
99
+ path: Path = DATASETS_DIR / "coco8/images",
100
+ weights: tuple[float, float, float] = (0.9, 0.1, 0.0),
101
+ annotated_only: bool = False,
102
+ ) -> None:
103
+ """Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt
104
+ files.
96
105
 
97
106
  Args:
98
- path (Path, optional): Path to images directory.
99
- weights (list | tuple, optional): Train, validation, and test split fractions.
100
- annotated_only (bool, optional): If True, only images with an associated txt file are used.
107
+ path (Path): Path to images directory.
108
+ weights (tuple): Train, validation, and test split fractions.
109
+ annotated_only (bool): If True, only images with an associated txt file are used.
101
110
 
102
111
  Examples:
112
+ Split images with default weights
103
113
  >>> from ultralytics.data.split import autosplit
104
114
  >>> autosplit()
115
+
116
+ Split with custom weights and annotated images only
117
+ >>> autosplit(path="path/to/images", weights=(0.8, 0.15, 0.05), annotated_only=True)
105
118
  """
106
119
  path = Path(path) # images dir
107
120
  files = sorted(x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS) # image files only
@@ -122,4 +135,4 @@ def autosplit(path=DATASETS_DIR / "coco8/images", weights=(0.9, 0.1, 0.0), annot
122
135
 
123
136
 
124
137
  if __name__ == "__main__":
125
- split_classify_dataset("../datasets/caltech101")
138
+ split_classify_dataset("caltech101")