dgenerate-ultralytics-headless 8.3.194__py3-none-any.whl → 8.3.195__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/METADATA +1 -2
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/RECORD +97 -96
- tests/test_python.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +8 -8
- ultralytics/data/annotator.py +1 -1
- ultralytics/data/augment.py +75 -75
- ultralytics/data/base.py +12 -12
- ultralytics/data/converter.py +4 -4
- ultralytics/data/dataset.py +7 -7
- ultralytics/data/loaders.py +15 -15
- ultralytics/data/split_dota.py +10 -10
- ultralytics/data/utils.py +12 -12
- ultralytics/engine/model.py +13 -13
- ultralytics/engine/predictor.py +13 -13
- ultralytics/engine/results.py +21 -21
- ultralytics/hub/google/__init__.py +2 -2
- ultralytics/hub/session.py +7 -7
- ultralytics/models/fastsam/model.py +5 -5
- ultralytics/models/fastsam/predict.py +11 -11
- ultralytics/models/nas/model.py +1 -1
- ultralytics/models/rtdetr/predict.py +2 -2
- ultralytics/models/rtdetr/val.py +4 -4
- ultralytics/models/sam/amg.py +6 -6
- ultralytics/models/sam/build.py +9 -9
- ultralytics/models/sam/model.py +7 -7
- ultralytics/models/sam/modules/blocks.py +6 -6
- ultralytics/models/sam/modules/decoders.py +1 -1
- ultralytics/models/sam/modules/encoders.py +27 -27
- ultralytics/models/sam/modules/sam.py +4 -4
- ultralytics/models/sam/modules/tiny_encoder.py +18 -18
- ultralytics/models/sam/modules/utils.py +8 -8
- ultralytics/models/sam/predict.py +63 -63
- ultralytics/models/utils/loss.py +22 -22
- ultralytics/models/utils/ops.py +8 -8
- ultralytics/models/yolo/classify/predict.py +2 -2
- ultralytics/models/yolo/classify/train.py +8 -8
- ultralytics/models/yolo/classify/val.py +4 -4
- ultralytics/models/yolo/detect/predict.py +3 -3
- ultralytics/models/yolo/detect/train.py +6 -6
- ultralytics/models/yolo/detect/val.py +32 -32
- ultralytics/models/yolo/model.py +6 -6
- ultralytics/models/yolo/obb/train.py +1 -1
- ultralytics/models/yolo/obb/val.py +13 -13
- ultralytics/models/yolo/pose/val.py +11 -11
- ultralytics/models/yolo/segment/predict.py +4 -4
- ultralytics/models/yolo/segment/train.py +1 -1
- ultralytics/models/yolo/segment/val.py +14 -14
- ultralytics/models/yolo/world/train.py +9 -9
- ultralytics/models/yolo/world/train_world.py +1 -1
- ultralytics/models/yolo/yoloe/predict.py +4 -4
- ultralytics/models/yolo/yoloe/train.py +4 -4
- ultralytics/nn/autobackend.py +2 -2
- ultralytics/nn/modules/block.py +6 -6
- ultralytics/nn/modules/conv.py +2 -2
- ultralytics/nn/modules/head.py +4 -4
- ultralytics/nn/tasks.py +13 -13
- ultralytics/nn/text_model.py +3 -3
- ultralytics/solutions/ai_gym.py +2 -2
- ultralytics/solutions/analytics.py +3 -3
- ultralytics/solutions/config.py +5 -5
- ultralytics/solutions/distance_calculation.py +2 -2
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +4 -4
- ultralytics/solutions/object_counter.py +4 -4
- ultralytics/solutions/parking_management.py +7 -7
- ultralytics/solutions/queue_management.py +3 -3
- ultralytics/solutions/region_counter.py +4 -4
- ultralytics/solutions/similarity_search.py +2 -2
- ultralytics/solutions/solutions.py +48 -48
- ultralytics/solutions/streamlit_inference.py +1 -1
- ultralytics/solutions/trackzone.py +4 -4
- ultralytics/solutions/vision_eye.py +1 -1
- ultralytics/trackers/byte_tracker.py +11 -11
- ultralytics/trackers/utils/gmc.py +3 -3
- ultralytics/trackers/utils/matching.py +5 -5
- ultralytics/utils/autodevice.py +2 -2
- ultralytics/utils/benchmarks.py +10 -10
- ultralytics/utils/callbacks/clearml.py +1 -1
- ultralytics/utils/callbacks/comet.py +5 -5
- ultralytics/utils/checks.py +5 -5
- ultralytics/utils/cpu.py +90 -0
- ultralytics/utils/dist.py +1 -1
- ultralytics/utils/downloads.py +2 -2
- ultralytics/utils/export.py +5 -5
- ultralytics/utils/instance.py +2 -2
- ultralytics/utils/metrics.py +35 -35
- ultralytics/utils/nms.py +4 -4
- ultralytics/utils/ops.py +1 -1
- ultralytics/utils/patches.py +2 -2
- ultralytics/utils/plotting.py +9 -9
- ultralytics/utils/torch_utils.py +2 -6
- ultralytics/utils/triton.py +5 -5
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/top_level.txt +0 -0
ultralytics/data/dataset.py
CHANGED
@@ -164,7 +164,7 @@ class YOLODataset(BaseDataset):
|
|
164
164
|
This method loads labels from disk or cache, verifies their integrity, and prepares them for training.
|
165
165
|
|
166
166
|
Returns:
|
167
|
-
(
|
167
|
+
(list[dict]): List of label dictionaries, each containing information about an image and its annotations.
|
168
168
|
"""
|
169
169
|
self.label_files = img2label_paths(self.im_files)
|
170
170
|
cache_path = Path(self.label_files[0]).parent.with_suffix(".cache")
|
@@ -291,7 +291,7 @@ class YOLODataset(BaseDataset):
|
|
291
291
|
Collate data samples into batches.
|
292
292
|
|
293
293
|
Args:
|
294
|
-
batch (
|
294
|
+
batch (list[dict]): List of dictionaries containing sample data.
|
295
295
|
|
296
296
|
Returns:
|
297
297
|
(dict): Collated batch with stacked tensors.
|
@@ -392,7 +392,7 @@ class YOLOMultiModalDataset(YOLODataset):
|
|
392
392
|
Return category names for the dataset.
|
393
393
|
|
394
394
|
Returns:
|
395
|
-
(
|
395
|
+
(set[str]): List of class names.
|
396
396
|
"""
|
397
397
|
names = self.data["names"].values()
|
398
398
|
return {n.strip() for name in names for n in name.split("/")} # category names
|
@@ -474,7 +474,7 @@ class GroundingDataset(YOLODataset):
|
|
474
474
|
against a predefined set of datasets with known instance counts.
|
475
475
|
|
476
476
|
Args:
|
477
|
-
labels (
|
477
|
+
labels (list[dict[str, Any]]): List of label dictionaries, where each dictionary
|
478
478
|
contains dataset annotations. Each label dict must have a 'bboxes' key with
|
479
479
|
a numpy array or tensor containing bounding box coordinates.
|
480
480
|
|
@@ -508,7 +508,7 @@ class GroundingDataset(YOLODataset):
|
|
508
508
|
path (Path): Path where to save the cache file.
|
509
509
|
|
510
510
|
Returns:
|
511
|
-
(
|
511
|
+
(dict[str, Any]): Dictionary containing cached labels and related information.
|
512
512
|
"""
|
513
513
|
x = {"labels": []}
|
514
514
|
LOGGER.info("Loading annotation file...")
|
@@ -596,7 +596,7 @@ class GroundingDataset(YOLODataset):
|
|
596
596
|
Load labels from cache or generate them from JSON file.
|
597
597
|
|
598
598
|
Returns:
|
599
|
-
(
|
599
|
+
(list[dict]): List of label dictionaries, each containing information about an image and its annotations.
|
600
600
|
"""
|
601
601
|
cache_path = Path(self.json_file).with_suffix(".cache")
|
602
602
|
try:
|
@@ -682,7 +682,7 @@ class YOLOConcatDataset(ConcatDataset):
|
|
682
682
|
Collate data samples into batches.
|
683
683
|
|
684
684
|
Args:
|
685
|
-
batch (
|
685
|
+
batch (list[dict]): List of dictionaries containing sample data.
|
686
686
|
|
687
687
|
Returns:
|
688
688
|
(dict): Collated batch with stacked tensors.
|
ultralytics/data/loaders.py
CHANGED
@@ -59,17 +59,17 @@ class LoadStreams:
|
|
59
59
|
streams simultaneously, making it suitable for real-time video analysis tasks.
|
60
60
|
|
61
61
|
Attributes:
|
62
|
-
sources (
|
62
|
+
sources (list[str]): The source input paths or URLs for the video streams.
|
63
63
|
vid_stride (int): Video frame-rate stride.
|
64
64
|
buffer (bool): Whether to buffer input streams.
|
65
65
|
running (bool): Flag to indicate if the streaming thread is running.
|
66
66
|
mode (str): Set to 'stream' indicating real-time capture.
|
67
|
-
imgs (
|
68
|
-
fps (
|
69
|
-
frames (
|
70
|
-
threads (
|
71
|
-
shape (
|
72
|
-
caps (
|
67
|
+
imgs (list[list[np.ndarray]]): List of image frames for each stream.
|
68
|
+
fps (list[float]): List of FPS for each stream.
|
69
|
+
frames (list[int]): List of total frames for each stream.
|
70
|
+
threads (list[Thread]): List of threads for each stream.
|
71
|
+
shape (list[tuple[int, int, int]]): List of shapes for each stream.
|
72
|
+
caps (list[cv2.VideoCapture]): List of cv2.VideoCapture objects for each stream.
|
73
73
|
bs (int): Batch size for processing.
|
74
74
|
cv2_flag (int): OpenCV flag for image reading (grayscale or RGB).
|
75
75
|
|
@@ -245,7 +245,7 @@ class LoadScreenshots:
|
|
245
245
|
sct (mss.mss): Screen capture object from `mss` library.
|
246
246
|
bs (int): Batch size, set to 1.
|
247
247
|
fps (int): Frames per second, set to 30.
|
248
|
-
monitor (
|
248
|
+
monitor (dict[str, int]): Monitor configuration details.
|
249
249
|
cv2_flag (int): OpenCV flag for image reading (grayscale or RGB).
|
250
250
|
|
251
251
|
Methods:
|
@@ -314,9 +314,9 @@ class LoadImagesAndVideos:
|
|
314
314
|
single image files, video files, and lists of image and video paths.
|
315
315
|
|
316
316
|
Attributes:
|
317
|
-
files (
|
317
|
+
files (list[str]): List of image and video file paths.
|
318
318
|
nf (int): Total number of files (images and videos).
|
319
|
-
video_flag (
|
319
|
+
video_flag (list[bool]): Flags indicating whether a file is a video (True) or an image (False).
|
320
320
|
mode (str): Current mode, 'image' or 'video'.
|
321
321
|
vid_stride (int): Stride for video frame-rate.
|
322
322
|
bs (int): Batch size.
|
@@ -351,7 +351,7 @@ class LoadImagesAndVideos:
|
|
351
351
|
Initialize dataloader for images and videos, supporting various input formats.
|
352
352
|
|
353
353
|
Args:
|
354
|
-
path (str | Path |
|
354
|
+
path (str | Path | list): Path to images/videos, directory, or list of paths.
|
355
355
|
batch (int): Batch size for processing.
|
356
356
|
vid_stride (int): Video frame-rate stride.
|
357
357
|
channels (int): Number of image channels (1 for grayscale, 3 for RGB).
|
@@ -497,8 +497,8 @@ class LoadPilAndNumpy:
|
|
497
497
|
validation and format conversion to ensure that the images are in the required format for downstream processing.
|
498
498
|
|
499
499
|
Attributes:
|
500
|
-
paths (
|
501
|
-
im0 (
|
500
|
+
paths (list[str]): List of image paths or autogenerated filenames.
|
501
|
+
im0 (list[np.ndarray]): List of images stored as Numpy arrays.
|
502
502
|
mode (str): Type of data being processed, set to 'image'.
|
503
503
|
bs (int): Batch size, equivalent to the length of `im0`.
|
504
504
|
|
@@ -521,7 +521,7 @@ class LoadPilAndNumpy:
|
|
521
521
|
Initialize a loader for PIL and Numpy images, converting inputs to a standardized format.
|
522
522
|
|
523
523
|
Args:
|
524
|
-
im0 (PIL.Image.Image | np.ndarray |
|
524
|
+
im0 (PIL.Image.Image | np.ndarray | list): Single image or list of images in PIL or numpy format.
|
525
525
|
channels (int): Number of image channels (1 for grayscale, 3 for RGB).
|
526
526
|
"""
|
527
527
|
if not isinstance(im0, list):
|
@@ -574,7 +574,7 @@ class LoadTensor:
|
|
574
574
|
im0 (torch.Tensor): The input tensor containing the image(s) with shape (B, C, H, W).
|
575
575
|
bs (int): Batch size, inferred from the shape of `im0`.
|
576
576
|
mode (str): Current processing mode, set to 'image'.
|
577
|
-
paths (
|
577
|
+
paths (list[str]): List of image paths or auto-generated filenames.
|
578
578
|
|
579
579
|
Methods:
|
580
580
|
_single_check: Validates and formats an input tensor.
|
ultralytics/data/split_dota.py
CHANGED
@@ -73,7 +73,7 @@ def load_yolo_dota(data_root: str, split: str = "train") -> list[dict[str, Any]]
|
|
73
73
|
split (str, optional): The split data set, could be 'train' or 'val'.
|
74
74
|
|
75
75
|
Returns:
|
76
|
-
(
|
76
|
+
(list[dict[str, Any]]): List of annotation dictionaries containing image information.
|
77
77
|
|
78
78
|
Notes:
|
79
79
|
The directory structure assumed for the DOTA dataset:
|
@@ -111,9 +111,9 @@ def get_windows(
|
|
111
111
|
Get the coordinates of sliding windows for image cropping.
|
112
112
|
|
113
113
|
Args:
|
114
|
-
im_size (
|
115
|
-
crop_sizes (
|
116
|
-
gaps (
|
114
|
+
im_size (tuple[int, int]): Original image size, (H, W).
|
115
|
+
crop_sizes (tuple[int, ...], optional): Crop size of windows.
|
116
|
+
gaps (tuple[int, ...], optional): Gap between crops.
|
117
117
|
im_rate_thr (float, optional): Threshold of windows areas divided by image areas.
|
118
118
|
eps (float, optional): Epsilon value for math operations.
|
119
119
|
|
@@ -179,9 +179,9 @@ def crop_and_save(
|
|
179
179
|
Crop images and save new labels for each window.
|
180
180
|
|
181
181
|
Args:
|
182
|
-
anno (
|
182
|
+
anno (dict[str, Any]): Annotation dict, including 'filepath', 'label', 'ori_size' as its keys.
|
183
183
|
windows (np.ndarray): Array of windows coordinates with shape (N, 4).
|
184
|
-
window_objs (
|
184
|
+
window_objs (list[np.ndarray]): A list of labels inside each window.
|
185
185
|
im_dir (str): The output directory path of images.
|
186
186
|
lb_dir (str): The output directory path of labels.
|
187
187
|
allow_background_images (bool, optional): Whether to include background images without labels.
|
@@ -233,8 +233,8 @@ def split_images_and_labels(
|
|
233
233
|
data_root (str): Root directory of the dataset.
|
234
234
|
save_dir (str): Directory to save the split dataset.
|
235
235
|
split (str, optional): The split data set, could be 'train' or 'val'.
|
236
|
-
crop_sizes (
|
237
|
-
gaps (
|
236
|
+
crop_sizes (tuple[int, ...], optional): Tuple of crop sizes.
|
237
|
+
gaps (tuple[int, ...], optional): Tuple of gaps between crops.
|
238
238
|
|
239
239
|
Notes:
|
240
240
|
The directory structure assumed for the DOTA dataset:
|
@@ -273,7 +273,7 @@ def split_trainval(
|
|
273
273
|
save_dir (str): Directory to save the split dataset.
|
274
274
|
crop_size (int, optional): Base crop size.
|
275
275
|
gap (int, optional): Base gap between crops.
|
276
|
-
rates (
|
276
|
+
rates (tuple[float, ...], optional): Scaling rates for crop_size and gap.
|
277
277
|
|
278
278
|
Notes:
|
279
279
|
The directory structure assumed for the DOTA dataset:
|
@@ -312,7 +312,7 @@ def split_test(
|
|
312
312
|
save_dir (str): Directory to save the split dataset.
|
313
313
|
crop_size (int, optional): Base crop size.
|
314
314
|
gap (int, optional): Base gap between crops.
|
315
|
-
rates (
|
315
|
+
rates (tuple[float, ...], optional): Scaling rates for crop_size and gap.
|
316
316
|
|
317
317
|
Notes:
|
318
318
|
The directory structure assumed for the DOTA dataset:
|
ultralytics/data/utils.py
CHANGED
@@ -57,7 +57,7 @@ def check_file_speeds(
|
|
57
57
|
It samples up to 5 files from the provided list and warns if access times exceed the threshold.
|
58
58
|
|
59
59
|
Args:
|
60
|
-
files (
|
60
|
+
files (list[str]): List of file paths to check for access speed.
|
61
61
|
threshold_ms (float, optional): Threshold in milliseconds for ping time warnings.
|
62
62
|
threshold_mb (float, optional): Threshold in megabytes per second for read speed warnings.
|
63
63
|
max_files (int, optional): The maximum number of files to check.
|
@@ -235,7 +235,7 @@ def verify_image_label(args: tuple) -> list:
|
|
235
235
|
lb = np.zeros((0, (5 + nkpt * ndim) if keypoint else 5), dtype=np.float32)
|
236
236
|
else:
|
237
237
|
nm = 1 # label missing
|
238
|
-
lb = np.zeros((0, (5 + nkpt * ndim) if
|
238
|
+
lb = np.zeros((0, (5 + nkpt * ndim) if keypoint else 5), dtype=np.float32)
|
239
239
|
if keypoint:
|
240
240
|
keypoints = lb[:, 5:].reshape(-1, nkpt, ndim)
|
241
241
|
if ndim == 2:
|
@@ -261,7 +261,7 @@ def visualize_image_annotations(image_path: str, txt_path: str, label_map: dict[
|
|
261
261
|
Args:
|
262
262
|
image_path (str): The path to the image file to annotate, and it can be in formats supported by PIL.
|
263
263
|
txt_path (str): The path to the annotation file in YOLO format, that should contain one line per object.
|
264
|
-
label_map (
|
264
|
+
label_map (dict[int, str]): A dictionary that maps class IDs (integers) to class labels (strings).
|
265
265
|
|
266
266
|
Examples:
|
267
267
|
>>> label_map = {0: "cat", 1: "dog", 2: "bird"} # It should include all annotated classes details
|
@@ -300,8 +300,8 @@ def polygon2mask(
|
|
300
300
|
Convert a list of polygons to a binary mask of the specified image size.
|
301
301
|
|
302
302
|
Args:
|
303
|
-
imgsz (
|
304
|
-
polygons (
|
303
|
+
imgsz (tuple[int, int]): The size of the image as (height, width).
|
304
|
+
polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape (N, M), where
|
305
305
|
N is the number of polygons, and M is the number of points such that M % 2 = 0.
|
306
306
|
color (int, optional): The color value to fill in the polygons on the mask.
|
307
307
|
downsample_ratio (int, optional): Factor by which to downsample the mask.
|
@@ -325,8 +325,8 @@ def polygons2masks(
|
|
325
325
|
Convert a list of polygons to a set of binary masks of the specified image size.
|
326
326
|
|
327
327
|
Args:
|
328
|
-
imgsz (
|
329
|
-
polygons (
|
328
|
+
imgsz (tuple[int, int]): The size of the image as (height, width).
|
329
|
+
polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape (N, M), where
|
330
330
|
N is the number of polygons, and M is the number of points such that M % 2 = 0.
|
331
331
|
color (int): The color value to fill in the polygons on the masks.
|
332
332
|
downsample_ratio (int, optional): Factor by which to downsample each mask.
|
@@ -400,7 +400,7 @@ def check_det_dataset(dataset: str, autodownload: bool = True) -> dict[str, Any]
|
|
400
400
|
autodownload (bool, optional): Whether to automatically download the dataset if not found.
|
401
401
|
|
402
402
|
Returns:
|
403
|
-
(
|
403
|
+
(dict[str, Any]): Parsed dataset information and paths.
|
404
404
|
"""
|
405
405
|
file = check_file(dataset)
|
406
406
|
|
@@ -494,13 +494,13 @@ def check_cls_dataset(dataset: str | Path, split: str = "") -> dict[str, Any]:
|
|
494
494
|
split (str, optional): The split of the dataset. Either 'val', 'test', or ''.
|
495
495
|
|
496
496
|
Returns:
|
497
|
-
(
|
497
|
+
(dict[str, Any]): A dictionary containing the following keys:
|
498
498
|
|
499
499
|
- 'train' (Path): The directory path containing the training set of the dataset.
|
500
500
|
- 'val' (Path): The directory path containing the validation set of the dataset.
|
501
501
|
- 'test' (Path): The directory path containing the test set of the dataset.
|
502
502
|
- 'nc' (int): The number of classes in the dataset.
|
503
|
-
- 'names' (
|
503
|
+
- 'names' (dict[int, str]): A dictionary of class names in the dataset.
|
504
504
|
"""
|
505
505
|
# Download (optional if dataset=https://file.zip is passed directly)
|
506
506
|
if str(dataset).startswith(("http:/", "https:/")):
|
@@ -588,8 +588,8 @@ class HUBDatasetStats:
|
|
588
588
|
task (str): Dataset task type.
|
589
589
|
hub_dir (Path): Directory path for HUB dataset files.
|
590
590
|
im_dir (Path): Directory path for compressed images.
|
591
|
-
stats (
|
592
|
-
data (
|
591
|
+
stats (dict): Statistics dictionary containing dataset information.
|
592
|
+
data (dict): Dataset configuration data.
|
593
593
|
|
594
594
|
Methods:
|
595
595
|
get_json: Return dataset JSON for Ultralytics HUB.
|
ultralytics/engine/model.py
CHANGED
@@ -168,14 +168,14 @@ class Model(torch.nn.Module):
|
|
168
168
|
directly with the required arguments.
|
169
169
|
|
170
170
|
Args:
|
171
|
-
source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor |
|
171
|
+
source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor | list | tuple): The source of
|
172
172
|
the image(s) to make predictions on. Can be a file path, URL, PIL image, numpy array, PyTorch
|
173
173
|
tensor, or a list/tuple of these.
|
174
174
|
stream (bool): If True, treat the input source as a continuous stream for predictions.
|
175
175
|
**kwargs (Any): Additional keyword arguments to configure the prediction process.
|
176
176
|
|
177
177
|
Returns:
|
178
|
-
(
|
178
|
+
(list[ultralytics.engine.results.Results]): A list of prediction results, each encapsulated in a
|
179
179
|
Results object.
|
180
180
|
|
181
181
|
Examples:
|
@@ -433,7 +433,7 @@ class Model(torch.nn.Module):
|
|
433
433
|
verbose (bool): If True, prints the information. If False, returns the information as a list.
|
434
434
|
|
435
435
|
Returns:
|
436
|
-
(
|
436
|
+
(list[str]): A list of strings containing various types of information about the model, including
|
437
437
|
model summary, layer details, and parameter counts. Empty if verbose is True.
|
438
438
|
|
439
439
|
Examples:
|
@@ -477,13 +477,13 @@ class Model(torch.nn.Module):
|
|
477
477
|
source. It allows customization of the embedding process through various keyword arguments.
|
478
478
|
|
479
479
|
Args:
|
480
|
-
source (str | Path | int |
|
480
|
+
source (str | Path | int | list | tuple | np.ndarray | torch.Tensor): The source of the image for
|
481
481
|
generating embeddings. Can be a file path, URL, PIL image, numpy array, etc.
|
482
482
|
stream (bool): If True, predictions are streamed.
|
483
483
|
**kwargs (Any): Additional keyword arguments for configuring the embedding process.
|
484
484
|
|
485
485
|
Returns:
|
486
|
-
(
|
486
|
+
(list[torch.Tensor]): A list containing the image embeddings.
|
487
487
|
|
488
488
|
Examples:
|
489
489
|
>>> model = YOLO("yolo11n.pt")
|
@@ -510,7 +510,7 @@ class Model(torch.nn.Module):
|
|
510
510
|
types of image sources and can operate in a streaming mode.
|
511
511
|
|
512
512
|
Args:
|
513
|
-
source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor |
|
513
|
+
source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor | list | tuple): The source
|
514
514
|
of the image(s) to make predictions on. Accepts various types including file paths, URLs, PIL
|
515
515
|
images, numpy arrays, and torch tensors.
|
516
516
|
stream (bool): If True, treats the input source as a continuous stream for predictions.
|
@@ -519,7 +519,7 @@ class Model(torch.nn.Module):
|
|
519
519
|
**kwargs (Any): Additional keyword arguments for configuring the prediction process.
|
520
520
|
|
521
521
|
Returns:
|
522
|
-
(
|
522
|
+
(list[ultralytics.engine.results.Results]): A list of prediction results, each encapsulated in a
|
523
523
|
Results object.
|
524
524
|
|
525
525
|
Examples:
|
@@ -571,14 +571,14 @@ class Model(torch.nn.Module):
|
|
571
571
|
The method registers trackers if not already present and can persist them between calls.
|
572
572
|
|
573
573
|
Args:
|
574
|
-
source (str | Path | int |
|
574
|
+
source (str | Path | int | list | tuple | np.ndarray | torch.Tensor, optional): Input source for object
|
575
575
|
tracking. Can be a file path, URL, or video stream.
|
576
576
|
stream (bool): If True, treats the input source as a continuous video stream.
|
577
577
|
persist (bool): If True, persists trackers between different calls to this method.
|
578
578
|
**kwargs (Any): Additional keyword arguments for configuring the tracking process.
|
579
579
|
|
580
580
|
Returns:
|
581
|
-
(
|
581
|
+
(list[ultralytics.engine.results.Results]): A list of tracking results, each a Results object.
|
582
582
|
|
583
583
|
Examples:
|
584
584
|
>>> model = YOLO("yolo11n.pt")
|
@@ -650,7 +650,7 @@ class Model(torch.nn.Module):
|
|
650
650
|
verbose (bool): Whether to print detailed benchmark information.
|
651
651
|
format (str): Export format name for specific benchmarking.
|
652
652
|
**kwargs (Any): Arbitrary keyword arguments to customize the benchmarking process. Common options include:
|
653
|
-
- imgsz (int |
|
653
|
+
- imgsz (int | list[int]): Image size for benchmarking.
|
654
654
|
- half (bool): Whether to use half-precision (FP16) mode.
|
655
655
|
- int8 (bool): Whether to use int8 precision mode.
|
656
656
|
- device (str): Device to run the benchmark on (e.g., 'cpu', 'cuda').
|
@@ -765,7 +765,7 @@ class Model(torch.nn.Module):
|
|
765
765
|
patience (int): Epochs to wait for no observable improvement for early stopping of training.
|
766
766
|
|
767
767
|
Returns:
|
768
|
-
(
|
768
|
+
(dict | None): Training metrics if available and training is successful; otherwise, None.
|
769
769
|
|
770
770
|
Examples:
|
771
771
|
>>> model = YOLO("yolo11n.pt")
|
@@ -893,7 +893,7 @@ class Model(torch.nn.Module):
|
|
893
893
|
initialized, it sets it up before retrieving the names.
|
894
894
|
|
895
895
|
Returns:
|
896
|
-
(
|
896
|
+
(dict[int, str]): A dictionary of class names associated with the model, where keys are class indices and
|
897
897
|
values are the corresponding class names.
|
898
898
|
|
899
899
|
Raises:
|
@@ -1109,7 +1109,7 @@ class Model(torch.nn.Module):
|
|
1109
1109
|
various tasks and modes within the Ultralytics framework.
|
1110
1110
|
|
1111
1111
|
Returns:
|
1112
|
-
(
|
1112
|
+
(dict[str, dict[str, Any]]): A dictionary mapping task names to nested dictionaries. Each nested dictionary
|
1113
1113
|
contains mappings for 'model', 'trainer', 'validator', and 'predictor' keys to their respective class
|
1114
1114
|
implementations for that task.
|
1115
1115
|
|
ultralytics/engine/predictor.py
CHANGED
@@ -81,15 +81,15 @@ class BasePredictor:
|
|
81
81
|
data (dict): Data configuration.
|
82
82
|
device (torch.device): Device used for prediction.
|
83
83
|
dataset (Dataset): Dataset used for prediction.
|
84
|
-
vid_writer (
|
84
|
+
vid_writer (dict[str, cv2.VideoWriter]): Dictionary of {save_path: video_writer} for saving video output.
|
85
85
|
plotted_img (np.ndarray): Last plotted image.
|
86
86
|
source_type (SimpleNamespace): Type of input source.
|
87
87
|
seen (int): Number of images processed.
|
88
|
-
windows (
|
88
|
+
windows (list[str]): List of window names for visualization.
|
89
89
|
batch (tuple): Current batch data.
|
90
|
-
results (
|
90
|
+
results (list[Any]): Current batch results.
|
91
91
|
transforms (callable): Image transforms for classification.
|
92
|
-
callbacks (
|
92
|
+
callbacks (dict[str, list[callable]]): Callback functions for different events.
|
93
93
|
txt_path (Path): Path to save text results.
|
94
94
|
_lock (threading.Lock): Lock for thread-safe inference.
|
95
95
|
|
@@ -154,7 +154,7 @@ class BasePredictor:
|
|
154
154
|
Prepare input image before inference.
|
155
155
|
|
156
156
|
Args:
|
157
|
-
im (torch.Tensor |
|
157
|
+
im (torch.Tensor | list[np.ndarray]): Images of shape (N, 3, H, W) for tensor, [(H, W, 3) x N] for list.
|
158
158
|
|
159
159
|
Returns:
|
160
160
|
(torch.Tensor): Preprocessed image tensor of shape (N, 3, H, W).
|
@@ -188,10 +188,10 @@ class BasePredictor:
|
|
188
188
|
Pre-transform input image before inference.
|
189
189
|
|
190
190
|
Args:
|
191
|
-
im (
|
191
|
+
im (list[np.ndarray]): List of images with shape [(H, W, 3) x N].
|
192
192
|
|
193
193
|
Returns:
|
194
|
-
(
|
194
|
+
(list[np.ndarray]): List of transformed images.
|
195
195
|
"""
|
196
196
|
same_shapes = len({x.shape for x in im}) == 1
|
197
197
|
letterbox = LetterBox(
|
@@ -212,7 +212,7 @@ class BasePredictor:
|
|
212
212
|
Perform inference on an image or stream.
|
213
213
|
|
214
214
|
Args:
|
215
|
-
source (str | Path |
|
215
|
+
source (str | Path | list[str] | list[Path] | list[np.ndarray] | np.ndarray | torch.Tensor, optional):
|
216
216
|
Source for inference.
|
217
217
|
model (str | Path | torch.nn.Module, optional): Model for inference.
|
218
218
|
stream (bool): Whether to stream the inference results. If True, returns a generator.
|
@@ -220,7 +220,7 @@ class BasePredictor:
|
|
220
220
|
**kwargs (Any): Additional keyword arguments for the inference method.
|
221
221
|
|
222
222
|
Returns:
|
223
|
-
(
|
223
|
+
(list[ultralytics.engine.results.Results] | generator): Results objects or generator of Results objects.
|
224
224
|
"""
|
225
225
|
self.stream = stream
|
226
226
|
if stream:
|
@@ -237,7 +237,7 @@ class BasePredictor:
|
|
237
237
|
generator without storing results.
|
238
238
|
|
239
239
|
Args:
|
240
|
-
source (str | Path |
|
240
|
+
source (str | Path | list[str] | list[Path] | list[np.ndarray] | np.ndarray | torch.Tensor, optional):
|
241
241
|
Source for inference.
|
242
242
|
model (str | Path | torch.nn.Module, optional): Model for inference.
|
243
243
|
|
@@ -254,7 +254,7 @@ class BasePredictor:
|
|
254
254
|
Set up source and inference mode.
|
255
255
|
|
256
256
|
Args:
|
257
|
-
source (str | Path |
|
257
|
+
source (str | Path | list[str] | list[Path] | list[np.ndarray] | np.ndarray | torch.Tensor):
|
258
258
|
Source for inference.
|
259
259
|
"""
|
260
260
|
self.imgsz = check_imgsz(self.args.imgsz, stride=self.model.stride, min_dim=2) # check image size
|
@@ -285,7 +285,7 @@ class BasePredictor:
|
|
285
285
|
Stream real-time inference on camera feed and save results to file.
|
286
286
|
|
287
287
|
Args:
|
288
|
-
source (str | Path |
|
288
|
+
source (str | Path | list[str] | list[Path] | list[np.ndarray] | np.ndarray | torch.Tensor, optional):
|
289
289
|
Source for inference.
|
290
290
|
model (str | Path | torch.nn.Module, optional): Model for inference.
|
291
291
|
*args (Any): Additional arguments for the inference method.
|
@@ -418,7 +418,7 @@ class BasePredictor:
|
|
418
418
|
i (int): Index of the current image in the batch.
|
419
419
|
p (Path): Path to the current image.
|
420
420
|
im (torch.Tensor): Preprocessed image tensor.
|
421
|
-
s (
|
421
|
+
s (list[str]): List of result strings.
|
422
422
|
|
423
423
|
Returns:
|
424
424
|
(str): String with result information.
|