dgenerate-ultralytics-headless 8.3.236__py3-none-any.whl → 8.3.239__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/RECORD +117 -105
- tests/test_exports.py +3 -1
- tests/test_python.py +2 -2
- tests/test_solutions.py +6 -6
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +4 -4
- ultralytics/cfg/datasets/Argoverse.yaml +7 -6
- ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
- ultralytics/cfg/datasets/VOC.yaml +15 -16
- ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
- ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
- ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/dota8.yaml +2 -2
- ultralytics/cfg/datasets/kitti.yaml +1 -1
- ultralytics/cfg/datasets/xView.yaml +16 -16
- ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
- ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
- ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
- ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
- ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
- ultralytics/data/augment.py +1 -1
- ultralytics/data/base.py +4 -2
- ultralytics/data/build.py +4 -4
- ultralytics/data/loaders.py +17 -12
- ultralytics/data/utils.py +4 -4
- ultralytics/engine/exporter.py +40 -25
- ultralytics/engine/predictor.py +8 -6
- ultralytics/engine/results.py +12 -13
- ultralytics/engine/trainer.py +10 -2
- ultralytics/engine/tuner.py +2 -3
- ultralytics/engine/validator.py +2 -2
- ultralytics/models/fastsam/model.py +2 -2
- ultralytics/models/fastsam/predict.py +2 -3
- ultralytics/models/fastsam/val.py +4 -4
- ultralytics/models/rtdetr/predict.py +2 -3
- ultralytics/models/rtdetr/val.py +10 -5
- ultralytics/models/sam/__init__.py +14 -1
- ultralytics/models/sam/build.py +22 -13
- ultralytics/models/sam/build_sam3.py +377 -0
- ultralytics/models/sam/model.py +13 -5
- ultralytics/models/sam/modules/blocks.py +20 -8
- ultralytics/models/sam/modules/decoders.py +2 -3
- ultralytics/models/sam/modules/encoders.py +4 -1
- ultralytics/models/sam/modules/memory_attention.py +6 -2
- ultralytics/models/sam/modules/sam.py +159 -10
- ultralytics/models/sam/modules/utils.py +134 -4
- ultralytics/models/sam/predict.py +2073 -139
- ultralytics/models/sam/sam3/__init__.py +3 -0
- ultralytics/models/sam/sam3/decoder.py +546 -0
- ultralytics/models/sam/sam3/encoder.py +535 -0
- ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
- ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
- ultralytics/models/sam/sam3/model_misc.py +198 -0
- ultralytics/models/sam/sam3/necks.py +129 -0
- ultralytics/models/sam/sam3/sam3_image.py +339 -0
- ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
- ultralytics/models/sam/sam3/vitdet.py +546 -0
- ultralytics/models/sam/sam3/vl_combiner.py +160 -0
- ultralytics/models/yolo/classify/val.py +1 -1
- ultralytics/models/yolo/detect/train.py +1 -1
- ultralytics/models/yolo/detect/val.py +7 -7
- ultralytics/models/yolo/obb/val.py +19 -8
- ultralytics/models/yolo/pose/val.py +1 -1
- ultralytics/models/yolo/segment/val.py +1 -1
- ultralytics/nn/autobackend.py +9 -9
- ultralytics/nn/modules/block.py +1 -1
- ultralytics/nn/modules/transformer.py +21 -1
- ultralytics/nn/tasks.py +3 -3
- ultralytics/nn/text_model.py +2 -7
- ultralytics/solutions/ai_gym.py +1 -1
- ultralytics/solutions/analytics.py +6 -6
- ultralytics/solutions/config.py +1 -1
- ultralytics/solutions/distance_calculation.py +1 -1
- ultralytics/solutions/object_counter.py +1 -1
- ultralytics/solutions/object_cropper.py +3 -6
- ultralytics/solutions/parking_management.py +21 -17
- ultralytics/solutions/queue_management.py +5 -5
- ultralytics/solutions/region_counter.py +2 -2
- ultralytics/solutions/security_alarm.py +1 -1
- ultralytics/solutions/solutions.py +45 -22
- ultralytics/solutions/speed_estimation.py +1 -1
- ultralytics/trackers/basetrack.py +1 -1
- ultralytics/trackers/bot_sort.py +4 -3
- ultralytics/trackers/byte_tracker.py +4 -4
- ultralytics/trackers/utils/gmc.py +6 -7
- ultralytics/trackers/utils/kalman_filter.py +2 -1
- ultralytics/trackers/utils/matching.py +4 -3
- ultralytics/utils/__init__.py +12 -3
- ultralytics/utils/benchmarks.py +2 -2
- ultralytics/utils/callbacks/tensorboard.py +19 -25
- ultralytics/utils/checks.py +4 -3
- ultralytics/utils/downloads.py +1 -1
- ultralytics/utils/export/tensorflow.py +16 -2
- ultralytics/utils/files.py +13 -12
- ultralytics/utils/logger.py +62 -27
- ultralytics/utils/metrics.py +1 -1
- ultralytics/utils/ops.py +7 -9
- ultralytics/utils/patches.py +3 -3
- ultralytics/utils/plotting.py +7 -12
- ultralytics/utils/tuner.py +1 -1
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.236.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
|
+
|
|
3
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
|
|
4
|
+
|
|
5
|
+
"""Provides utility to combine a vision backbone with a language backbone."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from copy import copy
|
|
10
|
+
|
|
11
|
+
import torch
|
|
12
|
+
import torch.nn as nn
|
|
13
|
+
from torch.nn.attention import SDPBackend, sdpa_kernel
|
|
14
|
+
|
|
15
|
+
from .necks import Sam3DualViTDetNeck
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class SAM3VLBackbone(nn.Module):
|
|
19
|
+
"""This backbone combines a vision backbone and a language backbone without fusion. As such it is more of a
|
|
20
|
+
convenience wrapper to handle the two backbones together.
|
|
21
|
+
|
|
22
|
+
It adds support for activation checkpointing and compilation.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
visual: Sam3DualViTDetNeck,
|
|
28
|
+
text,
|
|
29
|
+
compile_visual: bool = False,
|
|
30
|
+
act_ckpt_whole_vision_backbone: bool = False,
|
|
31
|
+
act_ckpt_whole_language_backbone: bool = False,
|
|
32
|
+
scalp=0,
|
|
33
|
+
):
|
|
34
|
+
"""Initialize the backbone combiner.
|
|
35
|
+
|
|
36
|
+
:param visual: The vision backbone to use
|
|
37
|
+
:param text: The text encoder to use
|
|
38
|
+
"""
|
|
39
|
+
super().__init__()
|
|
40
|
+
self.vision_backbone: Sam3DualViTDetNeck = torch.compile(visual) if compile_visual else visual
|
|
41
|
+
self.language_backbone = text
|
|
42
|
+
self.scalp = scalp
|
|
43
|
+
# allow running activation checkpointing on the entire vision and language backbones
|
|
44
|
+
self.act_ckpt_whole_vision_backbone = act_ckpt_whole_vision_backbone
|
|
45
|
+
self.act_ckpt_whole_language_backbone = act_ckpt_whole_language_backbone
|
|
46
|
+
|
|
47
|
+
def forward(
|
|
48
|
+
self,
|
|
49
|
+
samples: torch.Tensor,
|
|
50
|
+
captions: list[str],
|
|
51
|
+
input_boxes: torch.Tensor = None,
|
|
52
|
+
additional_text: list[str] | None = None,
|
|
53
|
+
):
|
|
54
|
+
"""Forward pass of the backbone combiner.
|
|
55
|
+
|
|
56
|
+
:param samples: The input images
|
|
57
|
+
:param captions: The input captions
|
|
58
|
+
:param input_boxes: If the text contains place-holders for boxes, this
|
|
59
|
+
parameter contains the tensor containing their spatial features
|
|
60
|
+
:param additional_text: This can be used to encode some additional text
|
|
61
|
+
(different from the captions) in the same forward of the backbone
|
|
62
|
+
:return: Output dictionary with the following keys:
|
|
63
|
+
- vision_features: The output of the vision backbone
|
|
64
|
+
- language_features: The output of the language backbone
|
|
65
|
+
- language_mask: The attention mask of the language backbone
|
|
66
|
+
- vision_pos_enc: The positional encoding of the vision backbone
|
|
67
|
+
- (optional) additional_text_features: The output of the language
|
|
68
|
+
backbone for the additional text
|
|
69
|
+
- (optional) additional_text_mask: The attention mask of the
|
|
70
|
+
language backbone for the additional text
|
|
71
|
+
"""
|
|
72
|
+
output = self.forward_image(samples)
|
|
73
|
+
output.update(self.forward_text(captions, input_boxes, additional_text))
|
|
74
|
+
return output
|
|
75
|
+
|
|
76
|
+
def forward_image(self, samples: torch.Tensor):
|
|
77
|
+
"""Forward pass of the vision backbone and get both SAM3 and SAM2 features."""
|
|
78
|
+
# Forward through backbone
|
|
79
|
+
sam3_features, sam3_pos, sam2_features, sam2_pos = self.vision_backbone.forward(samples)
|
|
80
|
+
if self.scalp > 0:
|
|
81
|
+
# Discard the lowest resolution features
|
|
82
|
+
sam3_features, sam3_pos = (
|
|
83
|
+
sam3_features[: -self.scalp],
|
|
84
|
+
sam3_pos[: -self.scalp],
|
|
85
|
+
)
|
|
86
|
+
if sam2_features is not None and sam2_pos is not None:
|
|
87
|
+
sam2_features, sam2_pos = (
|
|
88
|
+
sam2_features[: -self.scalp],
|
|
89
|
+
sam2_pos[: -self.scalp],
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
sam2_output = None
|
|
93
|
+
|
|
94
|
+
if sam2_features is not None and sam2_pos is not None:
|
|
95
|
+
sam2_src = sam2_features[-1]
|
|
96
|
+
sam2_output = {
|
|
97
|
+
"vision_features": sam2_src,
|
|
98
|
+
"vision_pos_enc": sam2_pos,
|
|
99
|
+
"backbone_fpn": sam2_features,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
sam3_src = sam3_features[-1]
|
|
103
|
+
return {
|
|
104
|
+
"vision_features": sam3_src,
|
|
105
|
+
"vision_pos_enc": sam3_pos,
|
|
106
|
+
"backbone_fpn": sam3_features,
|
|
107
|
+
"sam2_backbone_out": sam2_output,
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
def forward_image_sam2(self, samples: torch.Tensor):
|
|
111
|
+
"""Forward pass of the vision backbone to get SAM2 features only."""
|
|
112
|
+
xs = self.vision_backbone.trunk(samples)
|
|
113
|
+
x = xs[-1] # simpleFPN
|
|
114
|
+
|
|
115
|
+
assert self.vision_backbone.sam2_convs is not None, "SAM2 neck is not available."
|
|
116
|
+
sam2_features, sam2_pos = self.vision_backbone.sam_forward_feature_levels(x, self.vision_backbone.sam2_convs)
|
|
117
|
+
|
|
118
|
+
if self.scalp > 0:
|
|
119
|
+
# Discard the lowest resolution features
|
|
120
|
+
sam2_features, sam2_pos = (
|
|
121
|
+
sam2_features[: -self.scalp],
|
|
122
|
+
sam2_pos[: -self.scalp],
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
"vision_features": sam2_features[-1],
|
|
127
|
+
"vision_pos_enc": sam2_pos,
|
|
128
|
+
"backbone_fpn": sam2_features,
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
def forward_text(self, captions, input_boxes=None, additional_text=None):
|
|
132
|
+
"""Forward pass of the text encoder."""
|
|
133
|
+
output = {}
|
|
134
|
+
|
|
135
|
+
# Forward through text_encoder
|
|
136
|
+
text_to_encode = copy(captions)
|
|
137
|
+
if additional_text is not None:
|
|
138
|
+
# if there are additional_text, we piggy-back them into this forward.
|
|
139
|
+
# They'll be used later for output alignment
|
|
140
|
+
text_to_encode += additional_text
|
|
141
|
+
|
|
142
|
+
with sdpa_kernel([SDPBackend.MATH, SDPBackend.EFFICIENT_ATTENTION, SDPBackend.FLASH_ATTENTION]):
|
|
143
|
+
text_attention_mask, text_memory, text_embeds = self.language_backbone(text_to_encode, input_boxes)
|
|
144
|
+
|
|
145
|
+
if additional_text is not None:
|
|
146
|
+
output["additional_text_features"] = text_memory[:, -len(additional_text) :]
|
|
147
|
+
output["additional_text_mask"] = text_attention_mask[-len(additional_text) :]
|
|
148
|
+
|
|
149
|
+
text_memory = text_memory[:, : len(captions)]
|
|
150
|
+
text_attention_mask = text_attention_mask[: len(captions)]
|
|
151
|
+
text_embeds = text_embeds[:, : len(captions)]
|
|
152
|
+
output["language_features"] = text_memory
|
|
153
|
+
output["language_mask"] = text_attention_mask
|
|
154
|
+
output["language_embeds"] = text_embeds # Text embeddings before forward to the encoder
|
|
155
|
+
|
|
156
|
+
return output
|
|
157
|
+
|
|
158
|
+
def set_imgsz(self, imgsz: list[int] = [1008, 1008]):
|
|
159
|
+
"""Set the image size for the vision backbone."""
|
|
160
|
+
self.vision_backbone.set_imgsz(imgsz)
|
|
@@ -57,7 +57,7 @@ class ClassificationValidator(BaseValidator):
|
|
|
57
57
|
"""Initialize ClassificationValidator with dataloader, save directory, and other parameters.
|
|
58
58
|
|
|
59
59
|
Args:
|
|
60
|
-
dataloader (torch.utils.data.DataLoader, optional):
|
|
60
|
+
dataloader (torch.utils.data.DataLoader, optional): DataLoader to use for validation.
|
|
61
61
|
save_dir (str | Path, optional): Directory to save results.
|
|
62
62
|
args (dict, optional): Arguments containing model and validation configuration.
|
|
63
63
|
_callbacks (list, optional): List of callback functions to be called during validation.
|
|
@@ -53,7 +53,7 @@ class DetectionTrainer(BaseTrainer):
|
|
|
53
53
|
"""
|
|
54
54
|
|
|
55
55
|
def __init__(self, cfg=DEFAULT_CFG, overrides: dict[str, Any] | None = None, _callbacks=None):
|
|
56
|
-
"""Initialize a DetectionTrainer object for training YOLO object detection
|
|
56
|
+
"""Initialize a DetectionTrainer object for training YOLO object detection models.
|
|
57
57
|
|
|
58
58
|
Args:
|
|
59
59
|
cfg (dict, optional): Default configuration dictionary containing training parameters.
|
|
@@ -46,7 +46,7 @@ class DetectionValidator(BaseValidator):
|
|
|
46
46
|
"""Initialize detection validator with necessary variables and settings.
|
|
47
47
|
|
|
48
48
|
Args:
|
|
49
|
-
dataloader (torch.utils.data.DataLoader, optional):
|
|
49
|
+
dataloader (torch.utils.data.DataLoader, optional): DataLoader to use for validation.
|
|
50
50
|
save_dir (Path, optional): Directory to save results.
|
|
51
51
|
args (dict[str, Any], optional): Arguments for the validator.
|
|
52
52
|
_callbacks (list[Any], optional): List of callback functions.
|
|
@@ -256,7 +256,7 @@ class DetectionValidator(BaseValidator):
|
|
|
256
256
|
pf = "%22s" + "%11i" * 2 + "%11.3g" * len(self.metrics.keys) # print format
|
|
257
257
|
LOGGER.info(pf % ("all", self.seen, self.metrics.nt_per_class.sum(), *self.metrics.mean_results()))
|
|
258
258
|
if self.metrics.nt_per_class.sum() == 0:
|
|
259
|
-
LOGGER.warning(f"no labels found in {self.args.task} set,
|
|
259
|
+
LOGGER.warning(f"no labels found in {self.args.task} set, cannot compute metrics without labels")
|
|
260
260
|
|
|
261
261
|
# Print results per class
|
|
262
262
|
if self.args.verbose and not self.training and self.nc > 1 and len(self.metrics.stats):
|
|
@@ -308,7 +308,7 @@ class DetectionValidator(BaseValidator):
|
|
|
308
308
|
batch_size (int): Size of each batch.
|
|
309
309
|
|
|
310
310
|
Returns:
|
|
311
|
-
(torch.utils.data.DataLoader):
|
|
311
|
+
(torch.utils.data.DataLoader): DataLoader for validation.
|
|
312
312
|
"""
|
|
313
313
|
dataset = self.build_dataset(dataset_path, batch=batch_size, mode="val")
|
|
314
314
|
return build_dataloader(
|
|
@@ -460,11 +460,11 @@ class DetectionValidator(BaseValidator):
|
|
|
460
460
|
|
|
461
461
|
Args:
|
|
462
462
|
stats (dict[str, Any]): Dictionary to store computed metrics and statistics.
|
|
463
|
-
pred_json (str | Path
|
|
464
|
-
anno_json (str | Path
|
|
465
|
-
iou_types (str | list[str]
|
|
463
|
+
pred_json (str | Path): Path to JSON file containing predictions in COCO format.
|
|
464
|
+
anno_json (str | Path): Path to JSON file containing ground truth annotations in COCO format.
|
|
465
|
+
iou_types (str | list[str]): IoU type(s) for evaluation. Can be single string or list of strings. Common
|
|
466
466
|
values include "bbox", "segm", "keypoints". Defaults to "bbox".
|
|
467
|
-
suffix (str | list[str]
|
|
467
|
+
suffix (str | list[str]): Suffix to append to metric names in stats dictionary. Should correspond to
|
|
468
468
|
iou_types if multiple types provided. Defaults to "Box".
|
|
469
469
|
|
|
470
470
|
Returns:
|
|
@@ -12,6 +12,7 @@ from ultralytics.models.yolo.detect import DetectionValidator
|
|
|
12
12
|
from ultralytics.utils import LOGGER, ops
|
|
13
13
|
from ultralytics.utils.metrics import OBBMetrics, batch_probiou
|
|
14
14
|
from ultralytics.utils.nms import TorchNMS
|
|
15
|
+
from ultralytics.utils.plotting import plot_images
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class OBBValidator(DetectionValidator):
|
|
@@ -49,7 +50,7 @@ class OBBValidator(DetectionValidator):
|
|
|
49
50
|
extends the DetectionValidator class and configures it specifically for the OBB task.
|
|
50
51
|
|
|
51
52
|
Args:
|
|
52
|
-
dataloader (torch.utils.data.DataLoader, optional):
|
|
53
|
+
dataloader (torch.utils.data.DataLoader, optional): DataLoader to be used for validation.
|
|
53
54
|
save_dir (str | Path, optional): Directory to save results.
|
|
54
55
|
args (dict | SimpleNamespace, optional): Arguments containing validation parameters.
|
|
55
56
|
_callbacks (list, optional): List of callback functions to be called during validation.
|
|
@@ -141,24 +142,34 @@ class OBBValidator(DetectionValidator):
|
|
|
141
142
|
"im_file": batch["im_file"][si],
|
|
142
143
|
}
|
|
143
144
|
|
|
144
|
-
def plot_predictions(self, batch: dict[str, Any], preds: list[torch.Tensor], ni: int) -> None:
|
|
145
|
+
def plot_predictions(self, batch: dict[str, Any], preds: list[dict[str, torch.Tensor]], ni: int) -> None:
|
|
145
146
|
"""Plot predicted bounding boxes on input images and save the result.
|
|
146
147
|
|
|
147
148
|
Args:
|
|
148
149
|
batch (dict[str, Any]): Batch data containing images, file paths, and other metadata.
|
|
149
|
-
preds (list[torch.Tensor]): List of prediction
|
|
150
|
+
preds (list[dict[str, torch.Tensor]]): List of prediction dictionaries for each image in the batch.
|
|
150
151
|
ni (int): Batch index used for naming the output file.
|
|
151
152
|
|
|
152
153
|
Examples:
|
|
153
154
|
>>> validator = OBBValidator()
|
|
154
155
|
>>> batch = {"img": images, "im_file": paths}
|
|
155
|
-
>>> preds = [torch.rand(10,
|
|
156
|
+
>>> preds = [{"bboxes": torch.rand(10, 5), "cls": torch.zeros(10), "conf": torch.rand(10)}]
|
|
156
157
|
>>> validator.plot_predictions(batch, preds, 0)
|
|
157
158
|
"""
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
159
|
+
if not preds:
|
|
160
|
+
return
|
|
161
|
+
for i, pred in enumerate(preds):
|
|
162
|
+
pred["batch_idx"] = torch.ones_like(pred["conf"]) * i
|
|
163
|
+
keys = preds[0].keys()
|
|
164
|
+
batched_preds = {k: torch.cat([x[k] for x in preds], dim=0) for k in keys}
|
|
165
|
+
plot_images(
|
|
166
|
+
images=batch["img"],
|
|
167
|
+
labels=batched_preds,
|
|
168
|
+
paths=batch["im_file"],
|
|
169
|
+
fname=self.save_dir / f"val_batch{ni}_pred.jpg",
|
|
170
|
+
names=self.names,
|
|
171
|
+
on_plot=self.on_plot,
|
|
172
|
+
)
|
|
162
173
|
|
|
163
174
|
def pred_to_json(self, predn: dict[str, torch.Tensor], pbatch: dict[str, Any]) -> None:
|
|
164
175
|
"""Convert YOLO predictions to COCO JSON format with rotated bounding box information.
|
|
@@ -59,7 +59,7 @@ class PoseValidator(DetectionValidator):
|
|
|
59
59
|
specialized metrics for pose evaluation.
|
|
60
60
|
|
|
61
61
|
Args:
|
|
62
|
-
dataloader (torch.utils.data.DataLoader, optional):
|
|
62
|
+
dataloader (torch.utils.data.DataLoader, optional): DataLoader to be used for validation.
|
|
63
63
|
save_dir (Path | str, optional): Directory to save results.
|
|
64
64
|
args (dict, optional): Arguments for the validator including task set to "pose".
|
|
65
65
|
_callbacks (list, optional): List of callback functions to be executed during validation.
|
|
@@ -39,7 +39,7 @@ class SegmentationValidator(DetectionValidator):
|
|
|
39
39
|
"""Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics.
|
|
40
40
|
|
|
41
41
|
Args:
|
|
42
|
-
dataloader (torch.utils.data.DataLoader, optional):
|
|
42
|
+
dataloader (torch.utils.data.DataLoader, optional): DataLoader to use for validation.
|
|
43
43
|
save_dir (Path, optional): Directory to save results.
|
|
44
44
|
args (namespace, optional): Arguments for the validator.
|
|
45
45
|
_callbacks (list, optional): List of callback functions.
|
ultralytics/nn/autobackend.py
CHANGED
|
@@ -127,7 +127,7 @@ class AutoBackend(nn.Module):
|
|
|
127
127
|
|
|
128
128
|
Methods:
|
|
129
129
|
forward: Run inference on an input image.
|
|
130
|
-
from_numpy: Convert
|
|
130
|
+
from_numpy: Convert NumPy arrays to tensors on the model device.
|
|
131
131
|
warmup: Warm up the model with a dummy input.
|
|
132
132
|
_model_type: Determine the model type from file path.
|
|
133
133
|
|
|
@@ -182,7 +182,7 @@ class AutoBackend(nn.Module):
|
|
|
182
182
|
triton,
|
|
183
183
|
) = self._model_type("" if nn_module else model)
|
|
184
184
|
fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16
|
|
185
|
-
nhwc = coreml or saved_model or pb or tflite or edgetpu or rknn # BHWC formats (vs torch
|
|
185
|
+
nhwc = coreml or saved_model or pb or tflite or edgetpu or rknn # BHWC formats (vs torch BCHW)
|
|
186
186
|
stride, ch = 32, 3 # default stride and channels
|
|
187
187
|
end2end, dynamic = False, False
|
|
188
188
|
metadata, task = None, None
|
|
@@ -894,14 +894,14 @@ class AutoBackend(nn.Module):
|
|
|
894
894
|
else:
|
|
895
895
|
return self.from_numpy(y)
|
|
896
896
|
|
|
897
|
-
def from_numpy(self, x: np.ndarray) -> torch.Tensor:
|
|
898
|
-
"""Convert a
|
|
897
|
+
def from_numpy(self, x: np.ndarray | torch.Tensor) -> torch.Tensor:
|
|
898
|
+
"""Convert a NumPy array to a torch tensor on the model device.
|
|
899
899
|
|
|
900
900
|
Args:
|
|
901
|
-
x (np.ndarray):
|
|
901
|
+
x (np.ndarray | torch.Tensor): Input array or tensor.
|
|
902
902
|
|
|
903
903
|
Returns:
|
|
904
|
-
(torch.Tensor):
|
|
904
|
+
(torch.Tensor): Tensor on `self.device`.
|
|
905
905
|
"""
|
|
906
906
|
return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
|
|
907
907
|
|
|
@@ -909,7 +909,7 @@ class AutoBackend(nn.Module):
|
|
|
909
909
|
"""Warm up the model by running one forward pass with a dummy input.
|
|
910
910
|
|
|
911
911
|
Args:
|
|
912
|
-
imgsz (tuple
|
|
912
|
+
imgsz (tuple[int, int, int, int]): Dummy input shape in (batch, channels, height, width) format.
|
|
913
913
|
"""
|
|
914
914
|
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module
|
|
915
915
|
if any(warmup_types) and (self.device.type != "cpu" or self.triton):
|
|
@@ -931,8 +931,8 @@ class AutoBackend(nn.Module):
|
|
|
931
931
|
(list[bool]): List of booleans indicating the model type.
|
|
932
932
|
|
|
933
933
|
Examples:
|
|
934
|
-
>>>
|
|
935
|
-
>>>
|
|
934
|
+
>>> types = AutoBackend._model_type("path/to/model.onnx")
|
|
935
|
+
>>> assert types[2] # onnx
|
|
936
936
|
"""
|
|
937
937
|
from ultralytics.engine.exporter import export_formats
|
|
938
938
|
|
ultralytics/nn/modules/block.py
CHANGED
|
@@ -1812,7 +1812,7 @@ class A2C2f(nn.Module):
|
|
|
1812
1812
|
"""
|
|
1813
1813
|
super().__init__()
|
|
1814
1814
|
c_ = int(c2 * e) # hidden channels
|
|
1815
|
-
assert c_ % 32 == 0, "Dimension of ABlock be a multiple of 32."
|
|
1815
|
+
assert c_ % 32 == 0, "Dimension of ABlock must be a multiple of 32."
|
|
1816
1816
|
|
|
1817
1817
|
self.cv1 = Conv(c1, c_, 1, 1)
|
|
1818
1818
|
self.cv2 = Conv((1 + n) * c_, c2, 1)
|
|
@@ -359,7 +359,15 @@ class MLP(nn.Module):
|
|
|
359
359
|
"""
|
|
360
360
|
|
|
361
361
|
def __init__(
|
|
362
|
-
self,
|
|
362
|
+
self,
|
|
363
|
+
input_dim: int,
|
|
364
|
+
hidden_dim: int,
|
|
365
|
+
output_dim: int,
|
|
366
|
+
num_layers: int,
|
|
367
|
+
act=nn.ReLU,
|
|
368
|
+
sigmoid: bool = False,
|
|
369
|
+
residual: bool = False,
|
|
370
|
+
out_norm: nn.Module = None,
|
|
363
371
|
):
|
|
364
372
|
"""Initialize the MLP with specified input, hidden, output dimensions and number of layers.
|
|
365
373
|
|
|
@@ -370,6 +378,8 @@ class MLP(nn.Module):
|
|
|
370
378
|
num_layers (int): Number of layers.
|
|
371
379
|
act (nn.Module): Activation function.
|
|
372
380
|
sigmoid (bool): Whether to apply sigmoid to the output.
|
|
381
|
+
residual (bool): Whether to use residual connections.
|
|
382
|
+
out_norm (nn.Module, optional): Normalization layer for the output.
|
|
373
383
|
"""
|
|
374
384
|
super().__init__()
|
|
375
385
|
self.num_layers = num_layers
|
|
@@ -377,6 +387,12 @@ class MLP(nn.Module):
|
|
|
377
387
|
self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim, *h], [*h, output_dim]))
|
|
378
388
|
self.sigmoid = sigmoid
|
|
379
389
|
self.act = act()
|
|
390
|
+
if residual and input_dim != output_dim:
|
|
391
|
+
raise ValueError("residual is only supported if input_dim == output_dim")
|
|
392
|
+
self.residual = residual
|
|
393
|
+
# whether to apply a normalization layer to the output
|
|
394
|
+
assert isinstance(out_norm, nn.Module) or out_norm is None
|
|
395
|
+
self.out_norm = out_norm or nn.Identity()
|
|
380
396
|
|
|
381
397
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
382
398
|
"""Forward pass for the entire MLP.
|
|
@@ -387,8 +403,12 @@ class MLP(nn.Module):
|
|
|
387
403
|
Returns:
|
|
388
404
|
(torch.Tensor): Output tensor after MLP.
|
|
389
405
|
"""
|
|
406
|
+
orig_x = x
|
|
390
407
|
for i, layer in enumerate(self.layers):
|
|
391
408
|
x = getattr(self, "act", nn.ReLU())(layer(x)) if i < self.num_layers - 1 else layer(x)
|
|
409
|
+
if getattr(self, "residual", False):
|
|
410
|
+
x = x + orig_x
|
|
411
|
+
x = getattr(self, "out_norm", nn.Identity())(x)
|
|
392
412
|
return x.sigmoid() if getattr(self, "sigmoid", False) else x
|
|
393
413
|
|
|
394
414
|
|
ultralytics/nn/tasks.py
CHANGED
|
@@ -866,7 +866,7 @@ class WorldModel(DetectionModel):
|
|
|
866
866
|
self.model[-1].nc = len(text)
|
|
867
867
|
|
|
868
868
|
def get_text_pe(self, text, batch=80, cache_clip_model=True):
|
|
869
|
-
"""
|
|
869
|
+
"""Get text positional embeddings for offline inference without CLIP model.
|
|
870
870
|
|
|
871
871
|
Args:
|
|
872
872
|
text (list[str]): List of class names.
|
|
@@ -987,13 +987,13 @@ class YOLOEModel(DetectionModel):
|
|
|
987
987
|
|
|
988
988
|
@smart_inference_mode()
|
|
989
989
|
def get_text_pe(self, text, batch=80, cache_clip_model=False, without_reprta=False):
|
|
990
|
-
"""
|
|
990
|
+
"""Get text positional embeddings for offline inference without CLIP model.
|
|
991
991
|
|
|
992
992
|
Args:
|
|
993
993
|
text (list[str]): List of class names.
|
|
994
994
|
batch (int): Batch size for processing text tokens.
|
|
995
995
|
cache_clip_model (bool): Whether to cache the CLIP model.
|
|
996
|
-
without_reprta (bool): Whether to return text embeddings
|
|
996
|
+
without_reprta (bool): Whether to return text embeddings without reprta module processing.
|
|
997
997
|
|
|
998
998
|
Returns:
|
|
999
999
|
(torch.Tensor): Text positional embeddings.
|
ultralytics/nn/text_model.py
CHANGED
|
@@ -196,12 +196,7 @@ class MobileCLIP(TextModel):
|
|
|
196
196
|
device (torch.device): Device to load the model on.
|
|
197
197
|
"""
|
|
198
198
|
try:
|
|
199
|
-
import
|
|
200
|
-
|
|
201
|
-
# Suppress 'timm.models.layers is deprecated, please import via timm.layers' warning from mobileclip usage
|
|
202
|
-
with warnings.catch_warnings():
|
|
203
|
-
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
204
|
-
import mobileclip
|
|
199
|
+
import mobileclip
|
|
205
200
|
except ImportError:
|
|
206
201
|
# Ultralytics fork preferred since Apple MobileCLIP repo has incorrect version of torchvision
|
|
207
202
|
checks.check_requirements("git+https://github.com/ultralytics/mobileclip.git")
|
|
@@ -308,7 +303,7 @@ class MobileCLIPTS(TextModel):
|
|
|
308
303
|
(torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
|
|
309
304
|
|
|
310
305
|
Examples:
|
|
311
|
-
>>> model = MobileCLIPTS("cpu")
|
|
306
|
+
>>> model = MobileCLIPTS(device=torch.device("cpu"))
|
|
312
307
|
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
|
313
308
|
>>> strict_tokens = model.tokenize(
|
|
314
309
|
... ["a very long caption"], truncate=False
|
ultralytics/solutions/ai_gym.py
CHANGED
|
@@ -13,7 +13,7 @@ class AIGym(BaseSolution):
|
|
|
13
13
|
repetitions of exercises based on predefined angle thresholds for up and down positions.
|
|
14
14
|
|
|
15
15
|
Attributes:
|
|
16
|
-
states (dict[
|
|
16
|
+
states (dict[int, dict[str, float | int | str]]): Per-track angle, rep count, and stage for workout monitoring.
|
|
17
17
|
up_angle (float): Angle threshold for considering the 'up' position of an exercise.
|
|
18
18
|
down_angle (float): Angle threshold for considering the 'down' position of an exercise.
|
|
19
19
|
kpts (list[int]): Indices of keypoints used for angle calculation.
|
|
@@ -56,7 +56,7 @@ class Analytics(BaseSolution):
|
|
|
56
56
|
from matplotlib.backends.backend_agg import FigureCanvasAgg
|
|
57
57
|
from matplotlib.figure import Figure
|
|
58
58
|
|
|
59
|
-
self.type = self.CFG["analytics_type"] # type
|
|
59
|
+
self.type = self.CFG["analytics_type"] # Chart type: "line", "pie", "bar", or "area".
|
|
60
60
|
self.x_label = "Classes" if self.type in {"bar", "pie"} else "Frame#"
|
|
61
61
|
self.y_label = "Total Counts"
|
|
62
62
|
|
|
@@ -66,10 +66,10 @@ class Analytics(BaseSolution):
|
|
|
66
66
|
self.title = "Ultralytics Solutions" # window name
|
|
67
67
|
self.max_points = 45 # maximum points to be drawn on window
|
|
68
68
|
self.fontsize = 25 # text font size for display
|
|
69
|
-
figsize = self.CFG["figsize"] #
|
|
69
|
+
figsize = self.CFG["figsize"] # Output size, e.g. (12.8, 7.2) -> 1280x720.
|
|
70
70
|
self.color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"])
|
|
71
71
|
|
|
72
|
-
self.total_counts = 0 #
|
|
72
|
+
self.total_counts = 0 # Stores total counts for line charts.
|
|
73
73
|
self.clswise_count = {} # dictionary for class-wise counts
|
|
74
74
|
self.update_every = kwargs.get("update_every", 30) # Only update graph every 30 frames by default
|
|
75
75
|
self.last_plot_im = None # Cache of the last rendered chart
|
|
@@ -104,7 +104,7 @@ class Analytics(BaseSolution):
|
|
|
104
104
|
and 'classwise_count' (dict, per-class object count).
|
|
105
105
|
|
|
106
106
|
Raises:
|
|
107
|
-
|
|
107
|
+
ValueError: If an unsupported chart type is specified.
|
|
108
108
|
|
|
109
109
|
Examples:
|
|
110
110
|
>>> analytics = Analytics(analytics_type="line")
|
|
@@ -131,9 +131,9 @@ class Analytics(BaseSolution):
|
|
|
131
131
|
)
|
|
132
132
|
plot_im = self.last_plot_im
|
|
133
133
|
else:
|
|
134
|
-
raise
|
|
134
|
+
raise ValueError(f"Unsupported analytics_type='{self.type}'. Supported types: line, bar, pie, area.")
|
|
135
135
|
|
|
136
|
-
#
|
|
136
|
+
# Return results for downstream use.
|
|
137
137
|
return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), classwise_count=self.clswise_count)
|
|
138
138
|
|
|
139
139
|
def update_graph(
|
ultralytics/solutions/config.py
CHANGED
|
@@ -35,7 +35,7 @@ class SolutionConfig:
|
|
|
35
35
|
vision_point (tuple[int, int]): Reference point for directional tracking or perspective drawing.
|
|
36
36
|
crop_dir (str): Directory path to save cropped detection images.
|
|
37
37
|
json_file (str): Path to a JSON file containing data for parking areas.
|
|
38
|
-
line_width (int): Width for visual display
|
|
38
|
+
line_width (int): Width for visual display, e.g. bounding boxes, keypoints, and counts.
|
|
39
39
|
records (int): Number of detection records to send email alerts.
|
|
40
40
|
fps (float): Frame rate (Frames Per Second) for speed estimation calculation.
|
|
41
41
|
max_hist (int): Maximum number of historical points or states stored per tracked object for speed estimation.
|
|
@@ -17,7 +17,7 @@ class DistanceCalculation(BaseSolution):
|
|
|
17
17
|
|
|
18
18
|
Attributes:
|
|
19
19
|
left_mouse_count (int): Counter for left mouse button clicks.
|
|
20
|
-
selected_boxes (dict[int,
|
|
20
|
+
selected_boxes (dict[int, Any]): Dictionary to store selected bounding boxes keyed by track ID.
|
|
21
21
|
centroids (list[list[int]]): List to store centroids of selected bounding boxes.
|
|
22
22
|
|
|
23
23
|
Methods:
|
|
@@ -19,7 +19,7 @@ class ObjectCounter(BaseSolution):
|
|
|
19
19
|
in_count (int): Counter for objects moving inward.
|
|
20
20
|
out_count (int): Counter for objects moving outward.
|
|
21
21
|
counted_ids (list[int]): List of IDs of objects that have been counted.
|
|
22
|
-
|
|
22
|
+
classwise_count (dict[str, dict[str, int]]): Dictionary for counts, categorized by object class.
|
|
23
23
|
region_initialized (bool): Flag indicating whether the counting region has been initialized.
|
|
24
24
|
show_in (bool): Flag to control display of inward count.
|
|
25
25
|
show_out (bool): Flag to control display of outward count.
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
-
import os
|
|
4
3
|
from pathlib import Path
|
|
5
4
|
from typing import Any
|
|
6
5
|
|
|
@@ -40,12 +39,10 @@ class ObjectCropper(BaseSolution):
|
|
|
40
39
|
super().__init__(**kwargs)
|
|
41
40
|
|
|
42
41
|
self.crop_dir = self.CFG["crop_dir"] # Directory for storing cropped detections
|
|
43
|
-
|
|
44
|
-
os.mkdir(self.crop_dir) # Create directory if it does not exist
|
|
42
|
+
Path(self.crop_dir).mkdir(parents=True, exist_ok=True)
|
|
45
43
|
if self.CFG["show"]:
|
|
46
|
-
self.LOGGER.warning(
|
|
47
|
-
|
|
48
|
-
)
|
|
44
|
+
self.LOGGER.warning(f"show=True is not supported for ObjectCropper; saving crops to '{self.crop_dir}'.")
|
|
45
|
+
self.CFG["show"] = False
|
|
49
46
|
self.crop_idx = 0 # Initialize counter for total cropped objects
|
|
50
47
|
self.iou = self.CFG["iou"]
|
|
51
48
|
self.conf = self.CFG["conf"]
|