dgenerate-ultralytics-headless 8.3.137__py3-none-any.whl → 8.3.224__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/METADATA +41 -34
- dgenerate_ultralytics_headless-8.3.224.dist-info/RECORD +285 -0
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/WHEEL +1 -1
- tests/__init__.py +7 -6
- tests/conftest.py +15 -39
- tests/test_cli.py +17 -17
- tests/test_cuda.py +17 -8
- tests/test_engine.py +36 -10
- tests/test_exports.py +98 -37
- tests/test_integrations.py +12 -15
- tests/test_python.py +126 -82
- tests/test_solutions.py +319 -135
- ultralytics/__init__.py +27 -9
- ultralytics/cfg/__init__.py +83 -87
- ultralytics/cfg/datasets/Argoverse.yaml +4 -4
- ultralytics/cfg/datasets/DOTAv1.5.yaml +2 -2
- ultralytics/cfg/datasets/DOTAv1.yaml +2 -2
- ultralytics/cfg/datasets/GlobalWheat2020.yaml +2 -2
- ultralytics/cfg/datasets/HomeObjects-3K.yaml +4 -5
- ultralytics/cfg/datasets/ImageNet.yaml +3 -3
- ultralytics/cfg/datasets/Objects365.yaml +24 -20
- ultralytics/cfg/datasets/SKU-110K.yaml +9 -9
- ultralytics/cfg/datasets/VOC.yaml +10 -13
- ultralytics/cfg/datasets/VisDrone.yaml +43 -33
- ultralytics/cfg/datasets/african-wildlife.yaml +5 -5
- ultralytics/cfg/datasets/brain-tumor.yaml +4 -5
- ultralytics/cfg/datasets/carparts-seg.yaml +5 -5
- ultralytics/cfg/datasets/coco-pose.yaml +26 -4
- ultralytics/cfg/datasets/coco.yaml +4 -4
- ultralytics/cfg/datasets/coco128-seg.yaml +2 -2
- ultralytics/cfg/datasets/coco128.yaml +2 -2
- ultralytics/cfg/datasets/coco8-grayscale.yaml +103 -0
- ultralytics/cfg/datasets/coco8-multispectral.yaml +2 -2
- ultralytics/cfg/datasets/coco8-pose.yaml +23 -2
- ultralytics/cfg/datasets/coco8-seg.yaml +2 -2
- ultralytics/cfg/datasets/coco8.yaml +2 -2
- ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
- ultralytics/cfg/datasets/crack-seg.yaml +5 -5
- ultralytics/cfg/datasets/dog-pose.yaml +32 -4
- ultralytics/cfg/datasets/dota8-multispectral.yaml +2 -2
- ultralytics/cfg/datasets/dota8.yaml +2 -2
- ultralytics/cfg/datasets/hand-keypoints.yaml +29 -4
- ultralytics/cfg/datasets/lvis.yaml +9 -9
- ultralytics/cfg/datasets/medical-pills.yaml +4 -5
- ultralytics/cfg/datasets/open-images-v7.yaml +7 -10
- ultralytics/cfg/datasets/package-seg.yaml +5 -5
- ultralytics/cfg/datasets/signature.yaml +4 -4
- ultralytics/cfg/datasets/tiger-pose.yaml +20 -4
- ultralytics/cfg/datasets/xView.yaml +5 -5
- ultralytics/cfg/default.yaml +96 -93
- ultralytics/cfg/trackers/botsort.yaml +16 -17
- ultralytics/cfg/trackers/bytetrack.yaml +9 -11
- ultralytics/data/__init__.py +4 -4
- ultralytics/data/annotator.py +12 -12
- ultralytics/data/augment.py +531 -564
- ultralytics/data/base.py +76 -81
- ultralytics/data/build.py +206 -42
- ultralytics/data/converter.py +179 -78
- ultralytics/data/dataset.py +121 -121
- ultralytics/data/loaders.py +114 -91
- ultralytics/data/split.py +28 -15
- ultralytics/data/split_dota.py +67 -48
- ultralytics/data/utils.py +110 -89
- ultralytics/engine/exporter.py +422 -460
- ultralytics/engine/model.py +224 -252
- ultralytics/engine/predictor.py +94 -89
- ultralytics/engine/results.py +345 -595
- ultralytics/engine/trainer.py +231 -134
- ultralytics/engine/tuner.py +279 -73
- ultralytics/engine/validator.py +53 -46
- ultralytics/hub/__init__.py +26 -28
- ultralytics/hub/auth.py +30 -16
- ultralytics/hub/google/__init__.py +34 -36
- ultralytics/hub/session.py +53 -77
- ultralytics/hub/utils.py +23 -109
- ultralytics/models/__init__.py +1 -1
- ultralytics/models/fastsam/__init__.py +1 -1
- ultralytics/models/fastsam/model.py +36 -18
- ultralytics/models/fastsam/predict.py +33 -44
- ultralytics/models/fastsam/utils.py +4 -5
- ultralytics/models/fastsam/val.py +12 -14
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/nas/model.py +16 -20
- ultralytics/models/nas/predict.py +12 -14
- ultralytics/models/nas/val.py +4 -5
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/rtdetr/model.py +9 -9
- ultralytics/models/rtdetr/predict.py +22 -17
- ultralytics/models/rtdetr/train.py +20 -16
- ultralytics/models/rtdetr/val.py +79 -59
- ultralytics/models/sam/__init__.py +8 -2
- ultralytics/models/sam/amg.py +53 -38
- ultralytics/models/sam/build.py +29 -31
- ultralytics/models/sam/model.py +33 -38
- ultralytics/models/sam/modules/blocks.py +159 -182
- ultralytics/models/sam/modules/decoders.py +38 -47
- ultralytics/models/sam/modules/encoders.py +114 -133
- ultralytics/models/sam/modules/memory_attention.py +38 -31
- ultralytics/models/sam/modules/sam.py +114 -93
- ultralytics/models/sam/modules/tiny_encoder.py +268 -291
- ultralytics/models/sam/modules/transformer.py +59 -66
- ultralytics/models/sam/modules/utils.py +55 -72
- ultralytics/models/sam/predict.py +745 -341
- ultralytics/models/utils/loss.py +118 -107
- ultralytics/models/utils/ops.py +118 -71
- ultralytics/models/yolo/__init__.py +1 -1
- ultralytics/models/yolo/classify/predict.py +28 -26
- ultralytics/models/yolo/classify/train.py +50 -81
- ultralytics/models/yolo/classify/val.py +68 -61
- ultralytics/models/yolo/detect/predict.py +12 -15
- ultralytics/models/yolo/detect/train.py +56 -46
- ultralytics/models/yolo/detect/val.py +279 -223
- ultralytics/models/yolo/model.py +167 -86
- ultralytics/models/yolo/obb/predict.py +7 -11
- ultralytics/models/yolo/obb/train.py +23 -25
- ultralytics/models/yolo/obb/val.py +107 -99
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/pose/predict.py +12 -14
- ultralytics/models/yolo/pose/train.py +31 -69
- ultralytics/models/yolo/pose/val.py +119 -254
- ultralytics/models/yolo/segment/predict.py +21 -25
- ultralytics/models/yolo/segment/train.py +12 -66
- ultralytics/models/yolo/segment/val.py +126 -305
- ultralytics/models/yolo/world/train.py +53 -45
- ultralytics/models/yolo/world/train_world.py +51 -32
- ultralytics/models/yolo/yoloe/__init__.py +7 -7
- ultralytics/models/yolo/yoloe/predict.py +30 -37
- ultralytics/models/yolo/yoloe/train.py +89 -71
- ultralytics/models/yolo/yoloe/train_seg.py +15 -17
- ultralytics/models/yolo/yoloe/val.py +56 -41
- ultralytics/nn/__init__.py +9 -11
- ultralytics/nn/autobackend.py +179 -107
- ultralytics/nn/modules/__init__.py +67 -67
- ultralytics/nn/modules/activation.py +8 -7
- ultralytics/nn/modules/block.py +302 -323
- ultralytics/nn/modules/conv.py +61 -104
- ultralytics/nn/modules/head.py +488 -186
- ultralytics/nn/modules/transformer.py +183 -123
- ultralytics/nn/modules/utils.py +15 -20
- ultralytics/nn/tasks.py +327 -203
- ultralytics/nn/text_model.py +81 -65
- ultralytics/py.typed +1 -0
- ultralytics/solutions/__init__.py +12 -12
- ultralytics/solutions/ai_gym.py +19 -27
- ultralytics/solutions/analytics.py +36 -26
- ultralytics/solutions/config.py +29 -28
- ultralytics/solutions/distance_calculation.py +23 -24
- ultralytics/solutions/heatmap.py +17 -19
- ultralytics/solutions/instance_segmentation.py +21 -19
- ultralytics/solutions/object_blurrer.py +16 -17
- ultralytics/solutions/object_counter.py +48 -53
- ultralytics/solutions/object_cropper.py +22 -16
- ultralytics/solutions/parking_management.py +61 -58
- ultralytics/solutions/queue_management.py +19 -19
- ultralytics/solutions/region_counter.py +63 -50
- ultralytics/solutions/security_alarm.py +22 -25
- ultralytics/solutions/similarity_search.py +107 -60
- ultralytics/solutions/solutions.py +343 -262
- ultralytics/solutions/speed_estimation.py +35 -31
- ultralytics/solutions/streamlit_inference.py +104 -40
- ultralytics/solutions/templates/similarity-search.html +31 -24
- ultralytics/solutions/trackzone.py +24 -24
- ultralytics/solutions/vision_eye.py +11 -12
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/basetrack.py +18 -27
- ultralytics/trackers/bot_sort.py +48 -39
- ultralytics/trackers/byte_tracker.py +94 -94
- ultralytics/trackers/track.py +7 -16
- ultralytics/trackers/utils/gmc.py +37 -69
- ultralytics/trackers/utils/kalman_filter.py +68 -76
- ultralytics/trackers/utils/matching.py +13 -17
- ultralytics/utils/__init__.py +251 -275
- ultralytics/utils/autobatch.py +19 -7
- ultralytics/utils/autodevice.py +68 -38
- ultralytics/utils/benchmarks.py +169 -130
- ultralytics/utils/callbacks/base.py +12 -13
- ultralytics/utils/callbacks/clearml.py +14 -15
- ultralytics/utils/callbacks/comet.py +139 -66
- ultralytics/utils/callbacks/dvc.py +19 -27
- ultralytics/utils/callbacks/hub.py +8 -6
- ultralytics/utils/callbacks/mlflow.py +6 -10
- ultralytics/utils/callbacks/neptune.py +11 -19
- ultralytics/utils/callbacks/platform.py +73 -0
- ultralytics/utils/callbacks/raytune.py +3 -4
- ultralytics/utils/callbacks/tensorboard.py +9 -12
- ultralytics/utils/callbacks/wb.py +33 -30
- ultralytics/utils/checks.py +163 -114
- ultralytics/utils/cpu.py +89 -0
- ultralytics/utils/dist.py +24 -20
- ultralytics/utils/downloads.py +176 -146
- ultralytics/utils/errors.py +11 -13
- ultralytics/utils/events.py +113 -0
- ultralytics/utils/export/__init__.py +7 -0
- ultralytics/utils/{export.py → export/engine.py} +81 -63
- ultralytics/utils/export/imx.py +294 -0
- ultralytics/utils/export/tensorflow.py +217 -0
- ultralytics/utils/files.py +33 -36
- ultralytics/utils/git.py +137 -0
- ultralytics/utils/instance.py +105 -120
- ultralytics/utils/logger.py +404 -0
- ultralytics/utils/loss.py +99 -61
- ultralytics/utils/metrics.py +649 -478
- ultralytics/utils/nms.py +337 -0
- ultralytics/utils/ops.py +263 -451
- ultralytics/utils/patches.py +70 -31
- ultralytics/utils/plotting.py +253 -223
- ultralytics/utils/tal.py +48 -61
- ultralytics/utils/torch_utils.py +244 -251
- ultralytics/utils/tqdm.py +438 -0
- ultralytics/utils/triton.py +22 -23
- ultralytics/utils/tuner.py +11 -10
- dgenerate_ultralytics_headless-8.3.137.dist-info/RECORD +0 -272
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.137.dist-info → dgenerate_ultralytics_headless-8.3.224.dist-info}/top_level.txt +0 -0
ultralytics/data/loaders.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import glob
|
|
4
6
|
import math
|
|
5
7
|
import os
|
|
@@ -8,6 +10,7 @@ import urllib
|
|
|
8
10
|
from dataclasses import dataclass
|
|
9
11
|
from pathlib import Path
|
|
10
12
|
from threading import Thread
|
|
13
|
+
from typing import Any
|
|
11
14
|
|
|
12
15
|
import cv2
|
|
13
16
|
import numpy as np
|
|
@@ -22,11 +25,10 @@ from ultralytics.utils.patches import imread
|
|
|
22
25
|
|
|
23
26
|
@dataclass
|
|
24
27
|
class SourceTypes:
|
|
25
|
-
"""
|
|
26
|
-
Class to represent various types of input sources for predictions.
|
|
28
|
+
"""Class to represent various types of input sources for predictions.
|
|
27
29
|
|
|
28
|
-
This class uses dataclass to define boolean flags for different types of input sources that can be used for
|
|
29
|
-
|
|
30
|
+
This class uses dataclass to define boolean flags for different types of input sources that can be used for making
|
|
31
|
+
predictions with YOLO models.
|
|
30
32
|
|
|
31
33
|
Attributes:
|
|
32
34
|
stream (bool): Flag indicating if the input source is a video stream.
|
|
@@ -49,24 +51,23 @@ class SourceTypes:
|
|
|
49
51
|
|
|
50
52
|
|
|
51
53
|
class LoadStreams:
|
|
52
|
-
"""
|
|
53
|
-
Stream Loader for various types of video streams.
|
|
54
|
+
"""Stream Loader for various types of video streams.
|
|
54
55
|
|
|
55
|
-
Supports RTSP, RTMP, HTTP, and TCP streams. This class handles the loading and processing of multiple video
|
|
56
|
-
|
|
56
|
+
Supports RTSP, RTMP, HTTP, and TCP streams. This class handles the loading and processing of multiple video streams
|
|
57
|
+
simultaneously, making it suitable for real-time video analysis tasks.
|
|
57
58
|
|
|
58
59
|
Attributes:
|
|
59
|
-
sources (
|
|
60
|
+
sources (list[str]): The source input paths or URLs for the video streams.
|
|
60
61
|
vid_stride (int): Video frame-rate stride.
|
|
61
62
|
buffer (bool): Whether to buffer input streams.
|
|
62
63
|
running (bool): Flag to indicate if the streaming thread is running.
|
|
63
64
|
mode (str): Set to 'stream' indicating real-time capture.
|
|
64
|
-
imgs (
|
|
65
|
-
fps (
|
|
66
|
-
frames (
|
|
67
|
-
threads (
|
|
68
|
-
shape (
|
|
69
|
-
caps (
|
|
65
|
+
imgs (list[list[np.ndarray]]): List of image frames for each stream.
|
|
66
|
+
fps (list[float]): List of FPS for each stream.
|
|
67
|
+
frames (list[int]): List of total frames for each stream.
|
|
68
|
+
threads (list[Thread]): List of threads for each stream.
|
|
69
|
+
shape (list[tuple[int, int, int]]): List of shapes for each stream.
|
|
70
|
+
caps (list[cv2.VideoCapture]): List of cv2.VideoCapture objects for each stream.
|
|
70
71
|
bs (int): Batch size for processing.
|
|
71
72
|
cv2_flag (int): OpenCV flag for image reading (grayscale or RGB).
|
|
72
73
|
|
|
@@ -90,8 +91,15 @@ class LoadStreams:
|
|
|
90
91
|
- The class implements a buffer system to manage frame storage and retrieval.
|
|
91
92
|
"""
|
|
92
93
|
|
|
93
|
-
def __init__(self, sources="file.streams", vid_stride=1, buffer=False, channels=3):
|
|
94
|
-
"""Initialize stream loader for multiple video sources, supporting various stream types.
|
|
94
|
+
def __init__(self, sources: str = "file.streams", vid_stride: int = 1, buffer: bool = False, channels: int = 3):
|
|
95
|
+
"""Initialize stream loader for multiple video sources, supporting various stream types.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
sources (str): Path to streams file or single stream URL.
|
|
99
|
+
vid_stride (int): Video frame-rate stride.
|
|
100
|
+
buffer (bool): Whether to buffer input streams.
|
|
101
|
+
channels (int): Number of image channels (1 for grayscale, 3 for RGB).
|
|
102
|
+
"""
|
|
95
103
|
torch.backends.cudnn.benchmark = True # faster for fixed-size inference
|
|
96
104
|
self.buffer = buffer # buffer input streams
|
|
97
105
|
self.running = True # running flag for Thread
|
|
@@ -143,7 +151,7 @@ class LoadStreams:
|
|
|
143
151
|
self.threads[i].start()
|
|
144
152
|
LOGGER.info("") # newline
|
|
145
153
|
|
|
146
|
-
def update(self, i, cap, stream):
|
|
154
|
+
def update(self, i: int, cap: cv2.VideoCapture, stream: str):
|
|
147
155
|
"""Read stream frames in daemon thread and update image buffer."""
|
|
148
156
|
n, f = 0, self.frames[i] # frame number, frame array
|
|
149
157
|
while self.running and cap.isOpened() and n < (f - 1):
|
|
@@ -167,7 +175,7 @@ class LoadStreams:
|
|
|
167
175
|
time.sleep(0.01) # wait until the buffer is empty
|
|
168
176
|
|
|
169
177
|
def close(self):
|
|
170
|
-
"""
|
|
178
|
+
"""Terminate stream loader, stop threads, and release video capture resources."""
|
|
171
179
|
self.running = False # stop flag for Thread
|
|
172
180
|
for thread in self.threads:
|
|
173
181
|
if thread.is_alive():
|
|
@@ -177,22 +185,21 @@ class LoadStreams:
|
|
|
177
185
|
cap.release() # release video capture
|
|
178
186
|
except Exception as e:
|
|
179
187
|
LOGGER.warning(f"Could not release VideoCapture object: {e}")
|
|
180
|
-
cv2.destroyAllWindows()
|
|
181
188
|
|
|
182
189
|
def __iter__(self):
|
|
183
|
-
"""
|
|
190
|
+
"""Iterate through YOLO image feed and re-open unresponsive streams."""
|
|
184
191
|
self.count = -1
|
|
185
192
|
return self
|
|
186
193
|
|
|
187
|
-
def __next__(self):
|
|
188
|
-
"""
|
|
194
|
+
def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
|
|
195
|
+
"""Return the next batch of frames from multiple video streams for processing."""
|
|
189
196
|
self.count += 1
|
|
190
197
|
|
|
191
198
|
images = []
|
|
192
199
|
for i, x in enumerate(self.imgs):
|
|
193
200
|
# Wait until a frame is available in each buffer
|
|
194
201
|
while not x:
|
|
195
|
-
if not self.threads[i].is_alive()
|
|
202
|
+
if not self.threads[i].is_alive():
|
|
196
203
|
self.close()
|
|
197
204
|
raise StopIteration
|
|
198
205
|
time.sleep(1 / min(self.fps))
|
|
@@ -211,17 +218,16 @@ class LoadStreams:
|
|
|
211
218
|
|
|
212
219
|
return self.sources, images, [""] * self.bs
|
|
213
220
|
|
|
214
|
-
def __len__(self):
|
|
221
|
+
def __len__(self) -> int:
|
|
215
222
|
"""Return the number of video streams in the LoadStreams object."""
|
|
216
223
|
return self.bs # 1E12 frames = 32 streams at 30 FPS for 30 years
|
|
217
224
|
|
|
218
225
|
|
|
219
226
|
class LoadScreenshots:
|
|
220
|
-
"""
|
|
221
|
-
Ultralytics screenshot dataloader for capturing and processing screen images.
|
|
227
|
+
"""Ultralytics screenshot dataloader for capturing and processing screen images.
|
|
222
228
|
|
|
223
|
-
This class manages the loading of screenshot images for processing with YOLO. It is suitable for use with
|
|
224
|
-
|
|
229
|
+
This class manages the loading of screenshot images for processing with YOLO. It is suitable for use with `yolo
|
|
230
|
+
predict source=screen`.
|
|
225
231
|
|
|
226
232
|
Attributes:
|
|
227
233
|
source (str): The source input indicating which screen to capture.
|
|
@@ -235,7 +241,7 @@ class LoadScreenshots:
|
|
|
235
241
|
sct (mss.mss): Screen capture object from `mss` library.
|
|
236
242
|
bs (int): Batch size, set to 1.
|
|
237
243
|
fps (int): Frames per second, set to 30.
|
|
238
|
-
monitor (
|
|
244
|
+
monitor (dict[str, int]): Monitor configuration details.
|
|
239
245
|
cv2_flag (int): OpenCV flag for image reading (grayscale or RGB).
|
|
240
246
|
|
|
241
247
|
Methods:
|
|
@@ -248,10 +254,15 @@ class LoadScreenshots:
|
|
|
248
254
|
... print(f"Captured frame: {im.shape}")
|
|
249
255
|
"""
|
|
250
256
|
|
|
251
|
-
def __init__(self, source, channels=3):
|
|
252
|
-
"""Initialize screenshot capture with specified screen and region parameters.
|
|
257
|
+
def __init__(self, source: str, channels: int = 3):
|
|
258
|
+
"""Initialize screenshot capture with specified screen and region parameters.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
source (str): Screen capture source string in format "screen_num left top width height".
|
|
262
|
+
channels (int): Number of image channels (1 for grayscale, 3 for RGB).
|
|
263
|
+
"""
|
|
253
264
|
check_requirements("mss")
|
|
254
|
-
import mss
|
|
265
|
+
import mss
|
|
255
266
|
|
|
256
267
|
source, *params = source.split()
|
|
257
268
|
self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0
|
|
@@ -277,11 +288,11 @@ class LoadScreenshots:
|
|
|
277
288
|
self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
|
|
278
289
|
|
|
279
290
|
def __iter__(self):
|
|
280
|
-
"""
|
|
291
|
+
"""Yield the next screenshot image from the specified screen or region for processing."""
|
|
281
292
|
return self
|
|
282
293
|
|
|
283
|
-
def __next__(self):
|
|
284
|
-
"""
|
|
294
|
+
def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
|
|
295
|
+
"""Capture and return the next screenshot as a numpy array using the mss library."""
|
|
285
296
|
im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR
|
|
286
297
|
im0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY)[..., None] if self.cv2_flag == cv2.IMREAD_GRAYSCALE else im0
|
|
287
298
|
s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
|
|
@@ -291,16 +302,15 @@ class LoadScreenshots:
|
|
|
291
302
|
|
|
292
303
|
|
|
293
304
|
class LoadImagesAndVideos:
|
|
294
|
-
"""
|
|
295
|
-
A class for loading and processing images and videos for YOLO object detection.
|
|
305
|
+
"""A class for loading and processing images and videos for YOLO object detection.
|
|
296
306
|
|
|
297
|
-
This class manages the loading and pre-processing of image and video data from various sources, including
|
|
298
|
-
|
|
307
|
+
This class manages the loading and pre-processing of image and video data from various sources, including single
|
|
308
|
+
image files, video files, and lists of image and video paths.
|
|
299
309
|
|
|
300
310
|
Attributes:
|
|
301
|
-
files (
|
|
311
|
+
files (list[str]): List of image and video file paths.
|
|
302
312
|
nf (int): Total number of files (images and videos).
|
|
303
|
-
video_flag (
|
|
313
|
+
video_flag (list[bool]): Flags indicating whether a file is a video (True) or an image (False).
|
|
304
314
|
mode (str): Current mode, 'image' or 'video'.
|
|
305
315
|
vid_stride (int): Stride for video frame-rate.
|
|
306
316
|
bs (int): Batch size.
|
|
@@ -330,12 +340,20 @@ class LoadImagesAndVideos:
|
|
|
330
340
|
- Can read from a text file containing paths to images and videos.
|
|
331
341
|
"""
|
|
332
342
|
|
|
333
|
-
def __init__(self, path, batch=1, vid_stride=1, channels=3):
|
|
334
|
-
"""Initialize dataloader for images and videos, supporting various input formats.
|
|
343
|
+
def __init__(self, path: str | Path | list, batch: int = 1, vid_stride: int = 1, channels: int = 3):
|
|
344
|
+
"""Initialize dataloader for images and videos, supporting various input formats.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
path (str | Path | list): Path to images/videos, directory, or list of paths.
|
|
348
|
+
batch (int): Batch size for processing.
|
|
349
|
+
vid_stride (int): Video frame-rate stride.
|
|
350
|
+
channels (int): Number of image channels (1 for grayscale, 3 for RGB).
|
|
351
|
+
"""
|
|
335
352
|
parent = None
|
|
336
|
-
if isinstance(path, str) and Path(path).suffix
|
|
337
|
-
parent = Path(path).parent
|
|
338
|
-
path = Path(path).
|
|
353
|
+
if isinstance(path, str) and Path(path).suffix in {".txt", ".csv"}: # txt/csv file with source paths
|
|
354
|
+
parent, content = Path(path).parent, Path(path).read_text()
|
|
355
|
+
path = content.splitlines() if Path(path).suffix == ".txt" else content.split(",") # list of sources
|
|
356
|
+
path = [p.strip() for p in path]
|
|
339
357
|
files = []
|
|
340
358
|
for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
|
|
341
359
|
a = str(Path(p).absolute()) # do not use .resolve() https://github.com/ultralytics/ultralytics/issues/2912
|
|
@@ -353,7 +371,7 @@ class LoadImagesAndVideos:
|
|
|
353
371
|
# Define files as images or videos
|
|
354
372
|
images, videos = [], []
|
|
355
373
|
for f in files:
|
|
356
|
-
suffix = f.
|
|
374
|
+
suffix = f.rpartition(".")[-1].lower() # Get file extension without the dot and lowercase
|
|
357
375
|
if suffix in IMG_FORMATS:
|
|
358
376
|
images.append(f)
|
|
359
377
|
elif suffix in VID_FORMATS:
|
|
@@ -376,12 +394,12 @@ class LoadImagesAndVideos:
|
|
|
376
394
|
raise FileNotFoundError(f"No images or videos found in {p}. {FORMATS_HELP_MSG}")
|
|
377
395
|
|
|
378
396
|
def __iter__(self):
|
|
379
|
-
"""
|
|
397
|
+
"""Iterate through image/video files, yielding source paths, images, and metadata."""
|
|
380
398
|
self.count = 0
|
|
381
399
|
return self
|
|
382
400
|
|
|
383
|
-
def __next__(self):
|
|
384
|
-
"""
|
|
401
|
+
def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
|
|
402
|
+
"""Return the next batch of images or video frames with their paths and metadata."""
|
|
385
403
|
paths, imgs, info = [], [], []
|
|
386
404
|
while len(imgs) < self.bs:
|
|
387
405
|
if self.count >= self.nf: # end of file list
|
|
@@ -427,11 +445,11 @@ class LoadImagesAndVideos:
|
|
|
427
445
|
else:
|
|
428
446
|
# Handle image files (including HEIC)
|
|
429
447
|
self.mode = "image"
|
|
430
|
-
if path.
|
|
448
|
+
if path.rpartition(".")[-1].lower() == "heic":
|
|
431
449
|
# Load HEIC image using Pillow with pillow-heif
|
|
432
|
-
check_requirements("
|
|
450
|
+
check_requirements("pi-heif")
|
|
433
451
|
|
|
434
|
-
from
|
|
452
|
+
from pi_heif import register_heif_opener
|
|
435
453
|
|
|
436
454
|
register_heif_opener() # Register HEIF opener with Pillow
|
|
437
455
|
with Image.open(path) as img:
|
|
@@ -450,8 +468,8 @@ class LoadImagesAndVideos:
|
|
|
450
468
|
|
|
451
469
|
return paths, imgs, info
|
|
452
470
|
|
|
453
|
-
def _new_video(self, path):
|
|
454
|
-
"""
|
|
471
|
+
def _new_video(self, path: str):
|
|
472
|
+
"""Create a new video capture object for the given path and initialize video-related attributes."""
|
|
455
473
|
self.frame = 0
|
|
456
474
|
self.cap = cv2.VideoCapture(path)
|
|
457
475
|
self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
|
|
@@ -459,21 +477,20 @@ class LoadImagesAndVideos:
|
|
|
459
477
|
raise FileNotFoundError(f"Failed to open video {path}")
|
|
460
478
|
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
|
|
461
479
|
|
|
462
|
-
def __len__(self):
|
|
463
|
-
"""
|
|
480
|
+
def __len__(self) -> int:
|
|
481
|
+
"""Return the number of files (images and videos) in the dataset."""
|
|
464
482
|
return math.ceil(self.nf / self.bs) # number of batches
|
|
465
483
|
|
|
466
484
|
|
|
467
485
|
class LoadPilAndNumpy:
|
|
468
|
-
"""
|
|
469
|
-
Load images from PIL and Numpy arrays for batch processing.
|
|
486
|
+
"""Load images from PIL and Numpy arrays for batch processing.
|
|
470
487
|
|
|
471
488
|
This class manages loading and pre-processing of image data from both PIL and Numpy formats. It performs basic
|
|
472
489
|
validation and format conversion to ensure that the images are in the required format for downstream processing.
|
|
473
490
|
|
|
474
491
|
Attributes:
|
|
475
|
-
paths (
|
|
476
|
-
im0 (
|
|
492
|
+
paths (list[str]): List of image paths or autogenerated filenames.
|
|
493
|
+
im0 (list[np.ndarray]): List of images stored as Numpy arrays.
|
|
477
494
|
mode (str): Type of data being processed, set to 'image'.
|
|
478
495
|
bs (int): Batch size, equivalent to the length of `im0`.
|
|
479
496
|
|
|
@@ -491,8 +508,13 @@ class LoadPilAndNumpy:
|
|
|
491
508
|
Loaded 2 images
|
|
492
509
|
"""
|
|
493
510
|
|
|
494
|
-
def __init__(self, im0, channels=3):
|
|
495
|
-
"""
|
|
511
|
+
def __init__(self, im0: Image.Image | np.ndarray | list, channels: int = 3):
|
|
512
|
+
"""Initialize a loader for PIL and Numpy images, converting inputs to a standardized format.
|
|
513
|
+
|
|
514
|
+
Args:
|
|
515
|
+
im0 (PIL.Image.Image | np.ndarray | list): Single image or list of images in PIL or numpy format.
|
|
516
|
+
channels (int): Number of image channels (1 for grayscale, 3 for RGB).
|
|
517
|
+
"""
|
|
496
518
|
if not isinstance(im0, list):
|
|
497
519
|
im0 = [im0]
|
|
498
520
|
# use `image{i}.jpg` when Image.filename returns an empty path.
|
|
@@ -503,7 +525,7 @@ class LoadPilAndNumpy:
|
|
|
503
525
|
self.bs = len(self.im0)
|
|
504
526
|
|
|
505
527
|
@staticmethod
|
|
506
|
-
def _single_check(im, flag="RGB"):
|
|
528
|
+
def _single_check(im: Image.Image | np.ndarray, flag: str = "RGB") -> np.ndarray:
|
|
507
529
|
"""Validate and format an image to numpy array, ensuring RGB order and contiguous memory."""
|
|
508
530
|
assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
|
|
509
531
|
if isinstance(im, Image.Image):
|
|
@@ -515,35 +537,34 @@ class LoadPilAndNumpy:
|
|
|
515
537
|
im = im[..., None]
|
|
516
538
|
return im
|
|
517
539
|
|
|
518
|
-
def __len__(self):
|
|
519
|
-
"""
|
|
540
|
+
def __len__(self) -> int:
|
|
541
|
+
"""Return the length of the 'im0' attribute, representing the number of loaded images."""
|
|
520
542
|
return len(self.im0)
|
|
521
543
|
|
|
522
|
-
def __next__(self):
|
|
523
|
-
"""
|
|
544
|
+
def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
|
|
545
|
+
"""Return the next batch of images, paths, and metadata for processing."""
|
|
524
546
|
if self.count == 1: # loop only once as it's batch inference
|
|
525
547
|
raise StopIteration
|
|
526
548
|
self.count += 1
|
|
527
549
|
return self.paths, self.im0, [""] * self.bs
|
|
528
550
|
|
|
529
551
|
def __iter__(self):
|
|
530
|
-
"""
|
|
552
|
+
"""Iterate through PIL/numpy images, yielding paths, raw images, and metadata for processing."""
|
|
531
553
|
self.count = 0
|
|
532
554
|
return self
|
|
533
555
|
|
|
534
556
|
|
|
535
557
|
class LoadTensor:
|
|
536
|
-
"""
|
|
537
|
-
A class for loading and processing tensor data for object detection tasks.
|
|
558
|
+
"""A class for loading and processing tensor data for object detection tasks.
|
|
538
559
|
|
|
539
|
-
This class handles the loading and pre-processing of image data from PyTorch tensors, preparing them for
|
|
540
|
-
|
|
560
|
+
This class handles the loading and pre-processing of image data from PyTorch tensors, preparing them for further
|
|
561
|
+
processing in object detection pipelines.
|
|
541
562
|
|
|
542
563
|
Attributes:
|
|
543
564
|
im0 (torch.Tensor): The input tensor containing the image(s) with shape (B, C, H, W).
|
|
544
565
|
bs (int): Batch size, inferred from the shape of `im0`.
|
|
545
566
|
mode (str): Current processing mode, set to 'image'.
|
|
546
|
-
paths (
|
|
567
|
+
paths (list[str]): List of image paths or auto-generated filenames.
|
|
547
568
|
|
|
548
569
|
Methods:
|
|
549
570
|
_single_check: Validates and formats an input tensor.
|
|
@@ -556,16 +577,20 @@ class LoadTensor:
|
|
|
556
577
|
>>> print(f"Processed {len(images)} images")
|
|
557
578
|
"""
|
|
558
579
|
|
|
559
|
-
def __init__(self, im0) -> None:
|
|
560
|
-
"""Initialize LoadTensor object for processing torch.Tensor image data.
|
|
580
|
+
def __init__(self, im0: torch.Tensor) -> None:
|
|
581
|
+
"""Initialize LoadTensor object for processing torch.Tensor image data.
|
|
582
|
+
|
|
583
|
+
Args:
|
|
584
|
+
im0 (torch.Tensor): Input tensor with shape (B, C, H, W).
|
|
585
|
+
"""
|
|
561
586
|
self.im0 = self._single_check(im0)
|
|
562
587
|
self.bs = self.im0.shape[0]
|
|
563
588
|
self.mode = "image"
|
|
564
589
|
self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
|
|
565
590
|
|
|
566
591
|
@staticmethod
|
|
567
|
-
def _single_check(im, stride=32):
|
|
568
|
-
"""
|
|
592
|
+
def _single_check(im: torch.Tensor, stride: int = 32) -> torch.Tensor:
|
|
593
|
+
"""Validate and format a single image tensor, ensuring correct shape and normalization."""
|
|
569
594
|
s = (
|
|
570
595
|
f"torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) "
|
|
571
596
|
f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible."
|
|
@@ -586,24 +611,24 @@ class LoadTensor:
|
|
|
586
611
|
return im
|
|
587
612
|
|
|
588
613
|
def __iter__(self):
|
|
589
|
-
"""
|
|
614
|
+
"""Yield an iterator object for iterating through tensor image data."""
|
|
590
615
|
self.count = 0
|
|
591
616
|
return self
|
|
592
617
|
|
|
593
|
-
def __next__(self):
|
|
594
|
-
"""
|
|
618
|
+
def __next__(self) -> tuple[list[str], torch.Tensor, list[str]]:
|
|
619
|
+
"""Yield the next batch of tensor images and metadata for processing."""
|
|
595
620
|
if self.count == 1:
|
|
596
621
|
raise StopIteration
|
|
597
622
|
self.count += 1
|
|
598
623
|
return self.paths, self.im0, [""] * self.bs
|
|
599
624
|
|
|
600
|
-
def __len__(self):
|
|
601
|
-
"""
|
|
625
|
+
def __len__(self) -> int:
|
|
626
|
+
"""Return the batch size of the tensor input."""
|
|
602
627
|
return self.bs
|
|
603
628
|
|
|
604
629
|
|
|
605
|
-
def autocast_list(source):
|
|
606
|
-
"""
|
|
630
|
+
def autocast_list(source: list[Any]) -> list[Image.Image | np.ndarray]:
|
|
631
|
+
"""Merge a list of sources into a list of numpy arrays or PIL images for Ultralytics prediction."""
|
|
607
632
|
files = []
|
|
608
633
|
for im in source:
|
|
609
634
|
if isinstance(im, (str, Path)): # filename or uri
|
|
@@ -619,14 +644,12 @@ def autocast_list(source):
|
|
|
619
644
|
return files
|
|
620
645
|
|
|
621
646
|
|
|
622
|
-
def get_best_youtube_url(url, method="pytube"):
|
|
623
|
-
"""
|
|
624
|
-
Retrieves the URL of the best quality MP4 video stream from a given YouTube video.
|
|
647
|
+
def get_best_youtube_url(url: str, method: str = "pytube") -> str | None:
|
|
648
|
+
"""Retrieve the URL of the best quality MP4 video stream from a given YouTube video.
|
|
625
649
|
|
|
626
650
|
Args:
|
|
627
651
|
url (str): The URL of the YouTube video.
|
|
628
652
|
method (str): The method to use for extracting video info. Options are "pytube", "pafy", and "yt-dlp".
|
|
629
|
-
Defaults to "pytube".
|
|
630
653
|
|
|
631
654
|
Returns:
|
|
632
655
|
(str | None): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
|
|
@@ -655,7 +678,7 @@ def get_best_youtube_url(url, method="pytube"):
|
|
|
655
678
|
|
|
656
679
|
elif method == "pafy":
|
|
657
680
|
check_requirements(("pafy", "youtube_dl==2020.12.2"))
|
|
658
|
-
import pafy
|
|
681
|
+
import pafy
|
|
659
682
|
|
|
660
683
|
return pafy.new(url).getbestvideo(preftype="mp4").url
|
|
661
684
|
|
ultralytics/data/split.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import random
|
|
4
6
|
import shutil
|
|
5
7
|
from pathlib import Path
|
|
@@ -8,12 +10,11 @@ from ultralytics.data.utils import IMG_FORMATS, img2label_paths
|
|
|
8
10
|
from ultralytics.utils import DATASETS_DIR, LOGGER, TQDM
|
|
9
11
|
|
|
10
12
|
|
|
11
|
-
def split_classify_dataset(source_dir, train_ratio=0.8):
|
|
12
|
-
"""
|
|
13
|
-
Split dataset into train and val directories in a new directory.
|
|
13
|
+
def split_classify_dataset(source_dir: str | Path, train_ratio: float = 0.8) -> Path:
|
|
14
|
+
"""Split classification dataset into train and val directories in a new directory.
|
|
14
15
|
|
|
15
|
-
Creates a new directory '{source_dir}_split' with train/val subdirectories, preserving the original class
|
|
16
|
-
|
|
16
|
+
Creates a new directory '{source_dir}_split' with train/val subdirectories, preserving the original class structure
|
|
17
|
+
with an 80/20 split by default.
|
|
17
18
|
|
|
18
19
|
Directory structure:
|
|
19
20
|
Before:
|
|
@@ -46,13 +47,17 @@ def split_classify_dataset(source_dir, train_ratio=0.8):
|
|
|
46
47
|
└── ...
|
|
47
48
|
|
|
48
49
|
Args:
|
|
49
|
-
source_dir (str | Path): Path to
|
|
50
|
+
source_dir (str | Path): Path to classification dataset root directory.
|
|
50
51
|
train_ratio (float): Ratio for train split, between 0 and 1.
|
|
51
52
|
|
|
53
|
+
Returns:
|
|
54
|
+
(Path): Path to the created split directory.
|
|
55
|
+
|
|
52
56
|
Examples:
|
|
53
|
-
|
|
57
|
+
Split dataset with default 80/20 ratio
|
|
54
58
|
>>> split_classify_dataset("path/to/caltech")
|
|
55
|
-
|
|
59
|
+
|
|
60
|
+
Split with custom ratio
|
|
56
61
|
>>> split_classify_dataset("path/to/caltech", 0.75)
|
|
57
62
|
"""
|
|
58
63
|
source_path = Path(source_dir)
|
|
@@ -90,18 +95,26 @@ def split_classify_dataset(source_dir, train_ratio=0.8):
|
|
|
90
95
|
return split_path
|
|
91
96
|
|
|
92
97
|
|
|
93
|
-
def autosplit(
|
|
94
|
-
""
|
|
95
|
-
|
|
98
|
+
def autosplit(
|
|
99
|
+
path: Path = DATASETS_DIR / "coco8/images",
|
|
100
|
+
weights: tuple[float, float, float] = (0.9, 0.1, 0.0),
|
|
101
|
+
annotated_only: bool = False,
|
|
102
|
+
) -> None:
|
|
103
|
+
"""Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt
|
|
104
|
+
files.
|
|
96
105
|
|
|
97
106
|
Args:
|
|
98
|
-
path (Path
|
|
99
|
-
weights (
|
|
100
|
-
annotated_only (bool
|
|
107
|
+
path (Path): Path to images directory.
|
|
108
|
+
weights (tuple): Train, validation, and test split fractions.
|
|
109
|
+
annotated_only (bool): If True, only images with an associated txt file are used.
|
|
101
110
|
|
|
102
111
|
Examples:
|
|
112
|
+
Split images with default weights
|
|
103
113
|
>>> from ultralytics.data.split import autosplit
|
|
104
114
|
>>> autosplit()
|
|
115
|
+
|
|
116
|
+
Split with custom weights and annotated images only
|
|
117
|
+
>>> autosplit(path="path/to/images", weights=(0.8, 0.15, 0.05), annotated_only=True)
|
|
105
118
|
"""
|
|
106
119
|
path = Path(path) # images dir
|
|
107
120
|
files = sorted(x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS) # image files only
|
|
@@ -122,4 +135,4 @@ def autosplit(path=DATASETS_DIR / "coco8/images", weights=(0.9, 0.1, 0.0), annot
|
|
|
122
135
|
|
|
123
136
|
|
|
124
137
|
if __name__ == "__main__":
|
|
125
|
-
split_classify_dataset("
|
|
138
|
+
split_classify_dataset("caltech101")
|