dgenerate-ultralytics-headless 8.3.194__py3-none-any.whl → 8.3.196__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/METADATA +1 -2
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/RECORD +107 -106
- tests/test_python.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +9 -8
- ultralytics/cfg/default.yaml +1 -0
- ultralytics/data/annotator.py +1 -1
- ultralytics/data/augment.py +76 -76
- ultralytics/data/base.py +12 -12
- ultralytics/data/build.py +5 -1
- ultralytics/data/converter.py +4 -4
- ultralytics/data/dataset.py +7 -7
- ultralytics/data/loaders.py +15 -15
- ultralytics/data/split_dota.py +10 -10
- ultralytics/data/utils.py +12 -12
- ultralytics/engine/exporter.py +19 -31
- ultralytics/engine/model.py +13 -13
- ultralytics/engine/predictor.py +16 -14
- ultralytics/engine/results.py +21 -21
- ultralytics/engine/trainer.py +15 -4
- ultralytics/engine/validator.py +6 -2
- ultralytics/hub/google/__init__.py +2 -2
- ultralytics/hub/session.py +7 -7
- ultralytics/models/fastsam/model.py +5 -5
- ultralytics/models/fastsam/predict.py +11 -11
- ultralytics/models/nas/model.py +1 -1
- ultralytics/models/rtdetr/predict.py +2 -2
- ultralytics/models/rtdetr/val.py +4 -4
- ultralytics/models/sam/amg.py +6 -6
- ultralytics/models/sam/build.py +9 -9
- ultralytics/models/sam/model.py +7 -7
- ultralytics/models/sam/modules/blocks.py +6 -6
- ultralytics/models/sam/modules/decoders.py +1 -1
- ultralytics/models/sam/modules/encoders.py +27 -27
- ultralytics/models/sam/modules/sam.py +4 -4
- ultralytics/models/sam/modules/tiny_encoder.py +18 -18
- ultralytics/models/sam/modules/utils.py +8 -8
- ultralytics/models/sam/predict.py +63 -63
- ultralytics/models/utils/loss.py +22 -22
- ultralytics/models/utils/ops.py +8 -8
- ultralytics/models/yolo/classify/predict.py +2 -2
- ultralytics/models/yolo/classify/train.py +9 -19
- ultralytics/models/yolo/classify/val.py +4 -4
- ultralytics/models/yolo/detect/predict.py +3 -3
- ultralytics/models/yolo/detect/train.py +38 -12
- ultralytics/models/yolo/detect/val.py +38 -37
- ultralytics/models/yolo/model.py +6 -6
- ultralytics/models/yolo/obb/train.py +1 -10
- ultralytics/models/yolo/obb/val.py +13 -13
- ultralytics/models/yolo/pose/train.py +1 -9
- ultralytics/models/yolo/pose/val.py +12 -12
- ultralytics/models/yolo/segment/predict.py +4 -4
- ultralytics/models/yolo/segment/train.py +2 -10
- ultralytics/models/yolo/segment/val.py +15 -15
- ultralytics/models/yolo/world/train.py +13 -13
- ultralytics/models/yolo/world/train_world.py +3 -3
- ultralytics/models/yolo/yoloe/predict.py +4 -4
- ultralytics/models/yolo/yoloe/train.py +7 -16
- ultralytics/models/yolo/yoloe/val.py +0 -7
- ultralytics/nn/autobackend.py +2 -2
- ultralytics/nn/modules/block.py +6 -6
- ultralytics/nn/modules/conv.py +2 -2
- ultralytics/nn/modules/head.py +6 -5
- ultralytics/nn/tasks.py +17 -15
- ultralytics/nn/text_model.py +3 -3
- ultralytics/solutions/ai_gym.py +2 -2
- ultralytics/solutions/analytics.py +3 -3
- ultralytics/solutions/config.py +5 -5
- ultralytics/solutions/distance_calculation.py +2 -2
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +4 -4
- ultralytics/solutions/object_counter.py +4 -4
- ultralytics/solutions/parking_management.py +7 -7
- ultralytics/solutions/queue_management.py +3 -3
- ultralytics/solutions/region_counter.py +4 -4
- ultralytics/solutions/similarity_search.py +2 -2
- ultralytics/solutions/solutions.py +48 -48
- ultralytics/solutions/streamlit_inference.py +1 -1
- ultralytics/solutions/trackzone.py +4 -4
- ultralytics/solutions/vision_eye.py +1 -1
- ultralytics/trackers/byte_tracker.py +11 -11
- ultralytics/trackers/utils/gmc.py +3 -3
- ultralytics/trackers/utils/matching.py +5 -5
- ultralytics/utils/__init__.py +30 -19
- ultralytics/utils/autodevice.py +2 -2
- ultralytics/utils/benchmarks.py +10 -10
- ultralytics/utils/callbacks/clearml.py +1 -1
- ultralytics/utils/callbacks/comet.py +5 -5
- ultralytics/utils/callbacks/tensorboard.py +2 -2
- ultralytics/utils/checks.py +7 -5
- ultralytics/utils/cpu.py +90 -0
- ultralytics/utils/dist.py +1 -1
- ultralytics/utils/downloads.py +2 -2
- ultralytics/utils/export.py +5 -5
- ultralytics/utils/instance.py +2 -2
- ultralytics/utils/loss.py +14 -8
- ultralytics/utils/metrics.py +35 -35
- ultralytics/utils/nms.py +4 -4
- ultralytics/utils/ops.py +1 -1
- ultralytics/utils/patches.py +2 -2
- ultralytics/utils/plotting.py +10 -9
- ultralytics/utils/torch_utils.py +113 -15
- ultralytics/utils/triton.py +5 -5
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/top_level.txt +0 -0
@@ -57,13 +57,13 @@ class SegmentationValidator(DetectionValidator):
|
|
57
57
|
Preprocess batch of images for YOLO segmentation validation.
|
58
58
|
|
59
59
|
Args:
|
60
|
-
batch (
|
60
|
+
batch (dict[str, Any]): Batch containing images and annotations.
|
61
61
|
|
62
62
|
Returns:
|
63
|
-
(
|
63
|
+
(dict[str, Any]): Preprocessed batch.
|
64
64
|
"""
|
65
65
|
batch = super().preprocess(batch)
|
66
|
-
batch["masks"] = batch["masks"].
|
66
|
+
batch["masks"] = batch["masks"].float()
|
67
67
|
return batch
|
68
68
|
|
69
69
|
def init_metrics(self, model: torch.nn.Module) -> None:
|
@@ -100,10 +100,10 @@ class SegmentationValidator(DetectionValidator):
|
|
100
100
|
Post-process YOLO predictions and return output detections with proto.
|
101
101
|
|
102
102
|
Args:
|
103
|
-
preds (
|
103
|
+
preds (list[torch.Tensor]): Raw predictions from the model.
|
104
104
|
|
105
105
|
Returns:
|
106
|
-
|
106
|
+
list[dict[str, torch.Tensor]]: Processed detection predictions with masks.
|
107
107
|
"""
|
108
108
|
proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
|
109
109
|
preds = super().postprocess(preds[0])
|
@@ -127,10 +127,10 @@ class SegmentationValidator(DetectionValidator):
|
|
127
127
|
|
128
128
|
Args:
|
129
129
|
si (int): Batch index.
|
130
|
-
batch (
|
130
|
+
batch (dict[str, Any]): Batch data containing images and annotations.
|
131
131
|
|
132
132
|
Returns:
|
133
|
-
(
|
133
|
+
(dict[str, Any]): Prepared batch with processed annotations.
|
134
134
|
"""
|
135
135
|
prepared_batch = super()._prepare_batch(si, batch)
|
136
136
|
nl = len(prepared_batch["cls"])
|
@@ -151,11 +151,11 @@ class SegmentationValidator(DetectionValidator):
|
|
151
151
|
Compute correct prediction matrix for a batch based on bounding boxes and optional masks.
|
152
152
|
|
153
153
|
Args:
|
154
|
-
preds (
|
155
|
-
batch (
|
154
|
+
preds (dict[str, torch.Tensor]): Dictionary containing predictions with keys like 'cls' and 'masks'.
|
155
|
+
batch (dict[str, Any]): Dictionary containing batch data with keys like 'cls' and 'masks'.
|
156
156
|
|
157
157
|
Returns:
|
158
|
-
(
|
158
|
+
(dict[str, np.ndarray]): A dictionary containing correct prediction matrices including 'tp_m' for mask IoU.
|
159
159
|
|
160
160
|
Notes:
|
161
161
|
- If `masks` is True, the function computes IoU between predicted and ground truth masks.
|
@@ -181,8 +181,8 @@ class SegmentationValidator(DetectionValidator):
|
|
181
181
|
Plot batch predictions with masks and bounding boxes.
|
182
182
|
|
183
183
|
Args:
|
184
|
-
batch (
|
185
|
-
preds (
|
184
|
+
batch (dict[str, Any]): Batch containing images and annotations.
|
185
|
+
preds (list[dict[str, torch.Tensor]]): List of predictions from the model.
|
186
186
|
ni (int): Batch index.
|
187
187
|
"""
|
188
188
|
for p in preds:
|
@@ -199,7 +199,7 @@ class SegmentationValidator(DetectionValidator):
|
|
199
199
|
Args:
|
200
200
|
predn (torch.Tensor): Predictions in the format (x1, y1, x2, y2, conf, class).
|
201
201
|
save_conf (bool): Whether to save confidence scores.
|
202
|
-
shape (
|
202
|
+
shape (tuple[int, int]): Shape of the original image.
|
203
203
|
file (Path): File path to save the detections.
|
204
204
|
"""
|
205
205
|
from ultralytics.engine.results import Results
|
@@ -217,8 +217,8 @@ class SegmentationValidator(DetectionValidator):
|
|
217
217
|
Save one JSON result for COCO evaluation.
|
218
218
|
|
219
219
|
Args:
|
220
|
-
predn (
|
221
|
-
pbatch (
|
220
|
+
predn (dict[str, torch.Tensor]): Predictions containing bboxes, masks, confidence scores, and classes.
|
221
|
+
pbatch (dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
|
222
222
|
"""
|
223
223
|
from faster_coco_eval.core.mask import encode # noqa
|
224
224
|
|
@@ -12,7 +12,7 @@ from ultralytics.data import build_yolo_dataset
|
|
12
12
|
from ultralytics.models.yolo.detect import DetectionTrainer
|
13
13
|
from ultralytics.nn.tasks import WorldModel
|
14
14
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
|
15
|
-
from ultralytics.utils.torch_utils import
|
15
|
+
from ultralytics.utils.torch_utils import unwrap_model
|
16
16
|
|
17
17
|
|
18
18
|
def on_pretrain_routine_end(trainer) -> None:
|
@@ -20,7 +20,7 @@ def on_pretrain_routine_end(trainer) -> None:
|
|
20
20
|
if RANK in {-1, 0}:
|
21
21
|
# Set class names for evaluation
|
22
22
|
names = [name.split("/", 1)[0] for name in list(trainer.test_loader.dataset.data["names"].values())]
|
23
|
-
|
23
|
+
unwrap_model(trainer.ema.ema).set_classes(names, cache_clip_model=False)
|
24
24
|
|
25
25
|
|
26
26
|
class WorldTrainer(DetectionTrainer):
|
@@ -32,10 +32,10 @@ class WorldTrainer(DetectionTrainer):
|
|
32
32
|
accelerate training with multi-modal data.
|
33
33
|
|
34
34
|
Attributes:
|
35
|
-
text_embeddings (
|
35
|
+
text_embeddings (dict[str, torch.Tensor] | None): Cached text embeddings for category names to accelerate
|
36
36
|
training.
|
37
37
|
model (WorldModel): The YOLO World model being trained.
|
38
|
-
data (
|
38
|
+
data (dict[str, Any]): Dataset configuration containing class information.
|
39
39
|
args (Any): Training arguments and configuration.
|
40
40
|
|
41
41
|
Methods:
|
@@ -58,9 +58,9 @@ class WorldTrainer(DetectionTrainer):
|
|
58
58
|
Initialize a WorldTrainer object with given arguments.
|
59
59
|
|
60
60
|
Args:
|
61
|
-
cfg (
|
62
|
-
overrides (
|
63
|
-
_callbacks (
|
61
|
+
cfg (dict[str, Any]): Configuration for the trainer.
|
62
|
+
overrides (dict[str, Any], optional): Configuration overrides.
|
63
|
+
_callbacks (list[Any], optional): List of callback functions.
|
64
64
|
"""
|
65
65
|
if overrides is None:
|
66
66
|
overrides = {}
|
@@ -72,7 +72,7 @@ class WorldTrainer(DetectionTrainer):
|
|
72
72
|
Return WorldModel initialized with specified config and weights.
|
73
73
|
|
74
74
|
Args:
|
75
|
-
cfg (
|
75
|
+
cfg (dict[str, Any] | str, optional): Model configuration.
|
76
76
|
weights (str, optional): Path to pretrained weights.
|
77
77
|
verbose (bool): Whether to display model info.
|
78
78
|
|
@@ -105,7 +105,7 @@ class WorldTrainer(DetectionTrainer):
|
|
105
105
|
Returns:
|
106
106
|
(Any): YOLO dataset configured for training or validation.
|
107
107
|
"""
|
108
|
-
gs = max(int(
|
108
|
+
gs = max(int(unwrap_model(self.model).stride.max() if self.model else 0), 32)
|
109
109
|
dataset = build_yolo_dataset(
|
110
110
|
self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs, multi_modal=mode == "train"
|
111
111
|
)
|
@@ -121,7 +121,7 @@ class WorldTrainer(DetectionTrainer):
|
|
121
121
|
for these categories to improve training efficiency.
|
122
122
|
|
123
123
|
Args:
|
124
|
-
datasets (
|
124
|
+
datasets (list[Any]): List of datasets from which to extract category names.
|
125
125
|
batch (int | None): Batch size used for processing.
|
126
126
|
|
127
127
|
Notes:
|
@@ -144,12 +144,12 @@ class WorldTrainer(DetectionTrainer):
|
|
144
144
|
Generate text embeddings for a list of text samples.
|
145
145
|
|
146
146
|
Args:
|
147
|
-
texts (
|
147
|
+
texts (list[str]): List of text samples to encode.
|
148
148
|
batch (int): Batch size for processing.
|
149
149
|
cache_dir (Path): Directory to save/load cached embeddings.
|
150
150
|
|
151
151
|
Returns:
|
152
|
-
(
|
152
|
+
(dict[str, torch.Tensor]): Dictionary mapping text samples to their embeddings.
|
153
153
|
"""
|
154
154
|
model = "clip:ViT-B/32"
|
155
155
|
cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
|
@@ -160,7 +160,7 @@ class WorldTrainer(DetectionTrainer):
|
|
160
160
|
return txt_map
|
161
161
|
LOGGER.info(f"Caching text embeddings to '{cache_path}'")
|
162
162
|
assert self.model is not None
|
163
|
-
txt_feats =
|
163
|
+
txt_feats = unwrap_model(self.model).get_text_pe(texts, batch, cache_clip_model=False)
|
164
164
|
txt_map = dict(zip(texts, txt_feats.squeeze(0)))
|
165
165
|
torch.save(txt_map, cache_path)
|
166
166
|
return txt_map
|
@@ -6,7 +6,7 @@ from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_data
|
|
6
6
|
from ultralytics.data.utils import check_det_dataset
|
7
7
|
from ultralytics.models.yolo.world import WorldTrainer
|
8
8
|
from ultralytics.utils import DATASETS_DIR, DEFAULT_CFG, LOGGER
|
9
|
-
from ultralytics.utils.torch_utils import
|
9
|
+
from ultralytics.utils.torch_utils import unwrap_model
|
10
10
|
|
11
11
|
|
12
12
|
class WorldTrainerFromScratch(WorldTrainer):
|
@@ -94,14 +94,14 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
94
94
|
standard YOLO datasets and grounding datasets with different formats.
|
95
95
|
|
96
96
|
Args:
|
97
|
-
img_path (
|
97
|
+
img_path (list[str] | str): Path to the folder containing images or list of paths.
|
98
98
|
mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
|
99
99
|
batch (int, optional): Size of batches, used for rectangular training/validation.
|
100
100
|
|
101
101
|
Returns:
|
102
102
|
(YOLOConcatDataset | Dataset): The constructed dataset for training or validation.
|
103
103
|
"""
|
104
|
-
gs = max(int(
|
104
|
+
gs = max(int(unwrap_model(self.model).stride.max() if self.model else 0), 32)
|
105
105
|
if mode != "train":
|
106
106
|
return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=False, stride=gs)
|
107
107
|
datasets = [
|
@@ -75,12 +75,12 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
|
|
75
75
|
else:
|
76
76
|
# NOTE: only supports bboxes as prompts for now
|
77
77
|
assert bboxes is not None, f"Expected bboxes, but got {bboxes}!"
|
78
|
-
# NOTE: needs
|
78
|
+
# NOTE: needs list[np.ndarray]
|
79
79
|
assert isinstance(bboxes, list) and all(isinstance(b, np.ndarray) for b in bboxes), (
|
80
|
-
f"Expected
|
80
|
+
f"Expected list[np.ndarray], but got {bboxes}!"
|
81
81
|
)
|
82
82
|
assert isinstance(category, list) and all(isinstance(b, np.ndarray) for b in category), (
|
83
|
-
f"Expected
|
83
|
+
f"Expected list[np.ndarray], but got {category}!"
|
84
84
|
)
|
85
85
|
assert len(im) == len(category) == len(bboxes), (
|
86
86
|
f"Expected same length for all inputs, but got {len(im)}vs{len(category)}vs{len(bboxes)}!"
|
@@ -149,7 +149,7 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
|
|
149
149
|
Process the source to get the visual prompt embeddings (VPE).
|
150
150
|
|
151
151
|
Args:
|
152
|
-
source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor |
|
152
|
+
source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor | list | tuple): The source
|
153
153
|
of the image to make predictions on. Accepts various types including file paths, URLs, PIL
|
154
154
|
images, numpy arrays, and torch tensors.
|
155
155
|
|
@@ -13,7 +13,7 @@ from ultralytics.data.augment import LoadVisualPrompt
|
|
13
13
|
from ultralytics.models.yolo.detect import DetectionTrainer, DetectionValidator
|
14
14
|
from ultralytics.nn.tasks import YOLOEModel
|
15
15
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
|
16
|
-
from ultralytics.utils.torch_utils import
|
16
|
+
from ultralytics.utils.torch_utils import unwrap_model
|
17
17
|
|
18
18
|
from ..world.train_world import WorldTrainerFromScratch
|
19
19
|
from .val import YOLOEDetectValidator
|
@@ -39,9 +39,6 @@ class YOLOETrainer(DetectionTrainer):
|
|
39
39
|
"""
|
40
40
|
Initialize the YOLOE Trainer with specified configurations.
|
41
41
|
|
42
|
-
This method sets up the YOLOE trainer with the provided configuration and overrides, initializing
|
43
|
-
the training environment, model, and callbacks for YOLOE object detection training.
|
44
|
-
|
45
42
|
Args:
|
46
43
|
cfg (dict): Configuration dictionary with default training settings from DEFAULT_CFG.
|
47
44
|
overrides (dict, optional): Dictionary of parameter overrides for the default configuration.
|
@@ -102,7 +99,7 @@ class YOLOETrainer(DetectionTrainer):
|
|
102
99
|
Returns:
|
103
100
|
(Dataset): YOLO dataset configured for training or validation.
|
104
101
|
"""
|
105
|
-
gs = max(int(
|
102
|
+
gs = max(int(unwrap_model(self.model).stride.max() if self.model else 0), 32)
|
106
103
|
return build_yolo_dataset(
|
107
104
|
self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs, multi_modal=mode == "train"
|
108
105
|
)
|
@@ -183,7 +180,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
|
183
180
|
standard YOLO datasets and grounding datasets with different formats.
|
184
181
|
|
185
182
|
Args:
|
186
|
-
img_path (
|
183
|
+
img_path (list[str] | str): Path to the folder containing images or list of paths.
|
187
184
|
mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
|
188
185
|
batch (int, optional): Size of batches, used for rectangular training/validation.
|
189
186
|
|
@@ -207,7 +204,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
|
207
204
|
Generate text embeddings for a list of text samples.
|
208
205
|
|
209
206
|
Args:
|
210
|
-
texts (
|
207
|
+
texts (list[str]): List of text samples to encode.
|
211
208
|
batch (int): Batch size for processing.
|
212
209
|
cache_dir (Path): Directory to save/load cached embeddings.
|
213
210
|
|
@@ -223,7 +220,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
|
223
220
|
return txt_map
|
224
221
|
LOGGER.info(f"Caching text embeddings to '{cache_path}'")
|
225
222
|
assert self.model is not None
|
226
|
-
txt_feats =
|
223
|
+
txt_feats = unwrap_model(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
|
227
224
|
txt_map = dict(zip(texts, txt_feats.squeeze(0)))
|
228
225
|
torch.save(txt_map, cache_path)
|
229
226
|
return txt_map
|
@@ -262,7 +259,7 @@ class YOLOEPEFreeTrainer(YOLOEPETrainer, YOLOETrainerFromScratch):
|
|
262
259
|
in the parent directory of the first dataset's image path.
|
263
260
|
|
264
261
|
Args:
|
265
|
-
datasets (
|
262
|
+
datasets (list[Dataset]): List of datasets containing category names to process.
|
266
263
|
batch (int): Batch size for processing text embeddings.
|
267
264
|
|
268
265
|
Notes:
|
@@ -290,7 +287,7 @@ class YOLOEVPTrainer(YOLOETrainerFromScratch):
|
|
290
287
|
Build YOLO Dataset for training or validation with visual prompts.
|
291
288
|
|
292
289
|
Args:
|
293
|
-
img_path (
|
290
|
+
img_path (list[str] | str): Path to the folder containing images or list of paths.
|
294
291
|
mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
|
295
292
|
batch (int, optional): Size of batches, used for rectangular training/validation.
|
296
293
|
|
@@ -313,9 +310,3 @@ class YOLOEVPTrainer(YOLOETrainerFromScratch):
|
|
313
310
|
d.transforms.append(LoadVisualPrompt())
|
314
311
|
else:
|
315
312
|
self.train_loader.dataset.transforms.append(LoadVisualPrompt())
|
316
|
-
|
317
|
-
def preprocess_batch(self, batch):
|
318
|
-
"""Preprocess a batch of images for YOLOE training, moving visual prompts to the appropriate device."""
|
319
|
-
batch = super().preprocess_batch(batch)
|
320
|
-
batch["visuals"] = batch["visuals"].to(self.device, non_blocking=True)
|
321
|
-
return batch
|
@@ -98,13 +98,6 @@ class YOLOEDetectValidator(DetectionValidator):
|
|
98
98
|
visual_pe[cls_visual_num == 0] = 0
|
99
99
|
return visual_pe.unsqueeze(0)
|
100
100
|
|
101
|
-
def preprocess(self, batch: dict[str, Any]) -> dict[str, Any]:
|
102
|
-
"""Preprocess batch data, ensuring visuals are on the same device as images."""
|
103
|
-
batch = super().preprocess(batch)
|
104
|
-
if "visuals" in batch:
|
105
|
-
batch["visuals"] = batch["visuals"].to(batch["img"].device, non_blocking=True)
|
106
|
-
return batch
|
107
|
-
|
108
101
|
def get_vpe_dataloader(self, data: dict[str, Any]) -> torch.utils.data.DataLoader:
|
109
102
|
"""
|
110
103
|
Create a dataloader for LVIS training visual prompt samples.
|
ultralytics/nn/autobackend.py
CHANGED
@@ -624,7 +624,7 @@ class AutoBackend(nn.Module):
|
|
624
624
|
**kwargs (Any): Additional keyword arguments for model configuration.
|
625
625
|
|
626
626
|
Returns:
|
627
|
-
(torch.Tensor |
|
627
|
+
(torch.Tensor | list[torch.Tensor]): The raw output tensor(s) from the model.
|
628
628
|
"""
|
629
629
|
b, ch, h, w = im.shape # batch, channel, height, width
|
630
630
|
if self.fp16 and im.dtype != torch.float16:
|
@@ -860,7 +860,7 @@ class AutoBackend(nn.Module):
|
|
860
860
|
p (str): Path to the model file.
|
861
861
|
|
862
862
|
Returns:
|
863
|
-
(
|
863
|
+
(list[bool]): List of booleans indicating the model type.
|
864
864
|
|
865
865
|
Examples:
|
866
866
|
>>> model = AutoBackend(model="path/to/model.onnx")
|
ultralytics/nn/modules/block.py
CHANGED
@@ -745,7 +745,7 @@ class ImagePoolingAttn(nn.Module):
|
|
745
745
|
Forward pass of ImagePoolingAttn.
|
746
746
|
|
747
747
|
Args:
|
748
|
-
x (
|
748
|
+
x (list[torch.Tensor]): List of input feature maps.
|
749
749
|
text (torch.Tensor): Text embeddings.
|
750
750
|
|
751
751
|
Returns:
|
@@ -1032,7 +1032,7 @@ class CBLinear(nn.Module):
|
|
1032
1032
|
|
1033
1033
|
Args:
|
1034
1034
|
c1 (int): Input channels.
|
1035
|
-
c2s (
|
1035
|
+
c2s (list[int]): List of output channel sizes.
|
1036
1036
|
k (int): Kernel size.
|
1037
1037
|
s (int): Stride.
|
1038
1038
|
p (int | None): Padding.
|
@@ -1055,7 +1055,7 @@ class CBFuse(nn.Module):
|
|
1055
1055
|
Initialize CBFuse module.
|
1056
1056
|
|
1057
1057
|
Args:
|
1058
|
-
idx (
|
1058
|
+
idx (list[int]): Indices for feature selection.
|
1059
1059
|
"""
|
1060
1060
|
super().__init__()
|
1061
1061
|
self.idx = idx
|
@@ -1065,7 +1065,7 @@ class CBFuse(nn.Module):
|
|
1065
1065
|
Forward pass through CBFuse layer.
|
1066
1066
|
|
1067
1067
|
Args:
|
1068
|
-
xs (
|
1068
|
+
xs (list[torch.Tensor]): List of input tensors.
|
1069
1069
|
|
1070
1070
|
Returns:
|
1071
1071
|
(torch.Tensor): Fused output tensor.
|
@@ -1676,7 +1676,7 @@ class TorchVision(nn.Module):
|
|
1676
1676
|
x (torch.Tensor): Input tensor.
|
1677
1677
|
|
1678
1678
|
Returns:
|
1679
|
-
(torch.Tensor |
|
1679
|
+
(torch.Tensor | list[torch.Tensor]): Output tensor or list of tensors.
|
1680
1680
|
"""
|
1681
1681
|
if self.split:
|
1682
1682
|
y = [x]
|
@@ -1979,7 +1979,7 @@ class SAVPE(nn.Module):
|
|
1979
1979
|
Initialize SAVPE module with channels, intermediate channels, and embedding dimension.
|
1980
1980
|
|
1981
1981
|
Args:
|
1982
|
-
ch (
|
1982
|
+
ch (list[int]): List of input channel dimensions.
|
1983
1983
|
c3 (int): Intermediate channels.
|
1984
1984
|
embed (int): Embedding dimension.
|
1985
1985
|
"""
|
ultralytics/nn/modules/conv.py
CHANGED
@@ -675,7 +675,7 @@ class Concat(nn.Module):
|
|
675
675
|
Concatenate input tensors along specified dimension.
|
676
676
|
|
677
677
|
Args:
|
678
|
-
x (
|
678
|
+
x (list[torch.Tensor]): List of input tensors.
|
679
679
|
|
680
680
|
Returns:
|
681
681
|
(torch.Tensor): Concatenated tensor.
|
@@ -706,7 +706,7 @@ class Index(nn.Module):
|
|
706
706
|
Select and return a particular index from input.
|
707
707
|
|
708
708
|
Args:
|
709
|
-
x (
|
709
|
+
x (list[torch.Tensor]): List of input tensors.
|
710
710
|
|
711
711
|
Returns:
|
712
712
|
(torch.Tensor): Selected tensor.
|
ultralytics/nn/modules/head.py
CHANGED
@@ -13,7 +13,7 @@ from torch.nn.init import constant_, xavier_uniform_
|
|
13
13
|
|
14
14
|
from ultralytics.utils import NOT_MACOS14
|
15
15
|
from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
|
16
|
-
from ultralytics.utils.torch_utils import fuse_conv_and_bn, smart_inference_mode
|
16
|
+
from ultralytics.utils.torch_utils import disable_dynamo, fuse_conv_and_bn, smart_inference_mode
|
17
17
|
|
18
18
|
from .block import DFL, SAVPE, BNContrastiveHead, ContrastiveHead, Proto, Residual, SwiGLUFFN
|
19
19
|
from .conv import Conv, DWConv
|
@@ -130,7 +130,7 @@ class Detect(nn.Module):
|
|
130
130
|
Perform forward pass of the v10Detect module.
|
131
131
|
|
132
132
|
Args:
|
133
|
-
x (
|
133
|
+
x (list[torch.Tensor]): Input feature maps from different levels.
|
134
134
|
|
135
135
|
Returns:
|
136
136
|
outputs (dict | tuple): Training mode returns dict with one2many and one2one outputs.
|
@@ -149,12 +149,13 @@ class Detect(nn.Module):
|
|
149
149
|
y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
|
150
150
|
return y if self.export else (y, {"one2many": x, "one2one": one2one})
|
151
151
|
|
152
|
+
@disable_dynamo
|
152
153
|
def _inference(self, x: list[torch.Tensor]) -> torch.Tensor:
|
153
154
|
"""
|
154
155
|
Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
|
155
156
|
|
156
157
|
Args:
|
157
|
-
x (
|
158
|
+
x (list[torch.Tensor]): List of feature maps from different detection layers.
|
158
159
|
|
159
160
|
Returns:
|
160
161
|
(torch.Tensor): Concatenated tensor of decoded bounding boxes and class probabilities.
|
@@ -985,7 +986,7 @@ class RTDETRDecoder(nn.Module):
|
|
985
986
|
Run the forward pass of the module, returning bounding box and classification scores for the input.
|
986
987
|
|
987
988
|
Args:
|
988
|
-
x (
|
989
|
+
x (list[torch.Tensor]): List of feature maps from the backbone.
|
989
990
|
batch (dict, optional): Batch information for training.
|
990
991
|
|
991
992
|
Returns:
|
@@ -1075,7 +1076,7 @@ class RTDETRDecoder(nn.Module):
|
|
1075
1076
|
Process and return encoder inputs by getting projection features from input and concatenating them.
|
1076
1077
|
|
1077
1078
|
Args:
|
1078
|
-
x (
|
1079
|
+
x (list[torch.Tensor]): List of feature maps from the backbone.
|
1079
1080
|
|
1080
1081
|
Returns:
|
1081
1082
|
feats (torch.Tensor): Processed features.
|
ultralytics/nn/tasks.py
CHANGED
@@ -69,7 +69,7 @@ from ultralytics.nn.modules import (
|
|
69
69
|
YOLOESegment,
|
70
70
|
v10Detect,
|
71
71
|
)
|
72
|
-
from ultralytics.utils import DEFAULT_CFG_DICT,
|
72
|
+
from ultralytics.utils import DEFAULT_CFG_DICT, LOGGER, YAML, colorstr, emojis
|
73
73
|
from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml
|
74
74
|
from ultralytics.utils.loss import (
|
75
75
|
E2EDetectLoss,
|
@@ -329,12 +329,13 @@ class BaseModel(torch.nn.Module):
|
|
329
329
|
|
330
330
|
Args:
|
331
331
|
batch (dict): Batch to compute loss on.
|
332
|
-
preds (torch.Tensor |
|
332
|
+
preds (torch.Tensor | list[torch.Tensor], optional): Predictions.
|
333
333
|
"""
|
334
334
|
if getattr(self, "criterion", None) is None:
|
335
335
|
self.criterion = self.init_criterion()
|
336
336
|
|
337
|
-
|
337
|
+
if preds is None:
|
338
|
+
preds = self.forward(batch["img"])
|
338
339
|
return self.criterion(preds, batch)
|
339
340
|
|
340
341
|
def init_criterion(self):
|
@@ -480,10 +481,10 @@ class DetectionModel(BaseModel):
|
|
480
481
|
Clip YOLO augmented inference tails.
|
481
482
|
|
482
483
|
Args:
|
483
|
-
y (
|
484
|
+
y (list[torch.Tensor]): List of detection tensors.
|
484
485
|
|
485
486
|
Returns:
|
486
|
-
(
|
487
|
+
(list[torch.Tensor]): Clipped detection tensors.
|
487
488
|
"""
|
488
489
|
nl = self.model[-1].nl # number of detection layers (P3-P5)
|
489
490
|
g = sum(4**x for x in range(nl)) # grid points
|
@@ -775,7 +776,8 @@ class RTDETRDetectionModel(DetectionModel):
|
|
775
776
|
"gt_groups": gt_groups,
|
776
777
|
}
|
777
778
|
|
778
|
-
|
779
|
+
if preds is None:
|
780
|
+
preds = self.predict(img, batch=targets)
|
779
781
|
dec_bboxes, dec_scores, enc_bboxes, enc_scores, dn_meta = preds if self.training else preds[1]
|
780
782
|
if dn_meta is None:
|
781
783
|
dn_bboxes, dn_scores = None, None
|
@@ -874,7 +876,7 @@ class WorldModel(DetectionModel):
|
|
874
876
|
Set classes in advance so that model could do offline-inference without clip model.
|
875
877
|
|
876
878
|
Args:
|
877
|
-
text (
|
879
|
+
text (list[str]): List of class names.
|
878
880
|
batch (int): Batch size for processing text tokens.
|
879
881
|
cache_clip_model (bool): Whether to cache the CLIP model.
|
880
882
|
"""
|
@@ -886,7 +888,7 @@ class WorldModel(DetectionModel):
|
|
886
888
|
Set classes in advance so that model could do offline-inference without clip model.
|
887
889
|
|
888
890
|
Args:
|
889
|
-
text (
|
891
|
+
text (list[str]): List of class names.
|
890
892
|
batch (int): Batch size for processing text tokens.
|
891
893
|
cache_clip_model (bool): Whether to cache the CLIP model.
|
892
894
|
|
@@ -956,7 +958,7 @@ class WorldModel(DetectionModel):
|
|
956
958
|
|
957
959
|
Args:
|
958
960
|
batch (dict): Batch to compute loss on.
|
959
|
-
preds (torch.Tensor |
|
961
|
+
preds (torch.Tensor | list[torch.Tensor], optional): Predictions.
|
960
962
|
"""
|
961
963
|
if not hasattr(self, "criterion"):
|
962
964
|
self.criterion = self.init_criterion()
|
@@ -1012,7 +1014,7 @@ class YOLOEModel(DetectionModel):
|
|
1012
1014
|
Set classes in advance so that model could do offline-inference without clip model.
|
1013
1015
|
|
1014
1016
|
Args:
|
1015
|
-
text (
|
1017
|
+
text (list[str]): List of class names.
|
1016
1018
|
batch (int): Batch size for processing text tokens.
|
1017
1019
|
cache_clip_model (bool): Whether to cache the CLIP model.
|
1018
1020
|
without_reprta (bool): Whether to return text embeddings cooperated with reprta module.
|
@@ -1060,7 +1062,7 @@ class YOLOEModel(DetectionModel):
|
|
1060
1062
|
|
1061
1063
|
Args:
|
1062
1064
|
vocab (nn.ModuleList): List of vocabulary items.
|
1063
|
-
names (
|
1065
|
+
names (list[str]): List of class names.
|
1064
1066
|
"""
|
1065
1067
|
assert not self.training
|
1066
1068
|
head = self.model[-1]
|
@@ -1114,7 +1116,7 @@ class YOLOEModel(DetectionModel):
|
|
1114
1116
|
Set classes in advance so that model could do offline-inference without clip model.
|
1115
1117
|
|
1116
1118
|
Args:
|
1117
|
-
names (
|
1119
|
+
names (list[str]): List of class names.
|
1118
1120
|
embeddings (torch.Tensor): Embeddings tensor.
|
1119
1121
|
"""
|
1120
1122
|
assert not hasattr(self.model[-1], "lrpc"), (
|
@@ -1203,7 +1205,7 @@ class YOLOEModel(DetectionModel):
|
|
1203
1205
|
|
1204
1206
|
Args:
|
1205
1207
|
batch (dict): Batch to compute loss on.
|
1206
|
-
preds (torch.Tensor |
|
1208
|
+
preds (torch.Tensor | list[torch.Tensor], optional): Predictions.
|
1207
1209
|
"""
|
1208
1210
|
if not hasattr(self, "criterion"):
|
1209
1211
|
from ultralytics.utils.loss import TVPDetectLoss
|
@@ -1251,7 +1253,7 @@ class YOLOESegModel(YOLOEModel, SegmentationModel):
|
|
1251
1253
|
|
1252
1254
|
Args:
|
1253
1255
|
batch (dict): Batch to compute loss on.
|
1254
|
-
preds (torch.Tensor |
|
1256
|
+
preds (torch.Tensor | list[torch.Tensor], optional): Predictions.
|
1255
1257
|
"""
|
1256
1258
|
if not hasattr(self, "criterion"):
|
1257
1259
|
from ultralytics.utils.loss import TVPSegmentLoss
|
@@ -1502,7 +1504,7 @@ def load_checkpoint(weight, device=None, inplace=True, fuse=False):
|
|
1502
1504
|
model = (ckpt.get("ema") or ckpt["model"]).float() # FP32 model
|
1503
1505
|
|
1504
1506
|
# Model compatibility updates
|
1505
|
-
model.args =
|
1507
|
+
model.args = args # attach args to model
|
1506
1508
|
model.pt_path = weight # attach *.pt file path to model
|
1507
1509
|
model.task = getattr(model, "task", guess_model_task(model))
|
1508
1510
|
if not hasattr(model, "stride"):
|
ultralytics/nn/text_model.py
CHANGED
@@ -97,7 +97,7 @@ class CLIP(TextModel):
|
|
97
97
|
Convert input texts to CLIP tokens.
|
98
98
|
|
99
99
|
Args:
|
100
|
-
texts (str |
|
100
|
+
texts (str | list[str]): Input text or list of texts to tokenize.
|
101
101
|
|
102
102
|
Returns:
|
103
103
|
(torch.Tensor): Tokenized text tensor with shape (batch_size, context_length) ready for model processing.
|
@@ -240,7 +240,7 @@ class MobileCLIP(TextModel):
|
|
240
240
|
Convert input texts to MobileCLIP tokens.
|
241
241
|
|
242
242
|
Args:
|
243
|
-
texts (
|
243
|
+
texts (list[str]): List of text strings to tokenize.
|
244
244
|
|
245
245
|
Returns:
|
246
246
|
(torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
|
@@ -325,7 +325,7 @@ class MobileCLIPTS(TextModel):
|
|
325
325
|
Convert input texts to MobileCLIP tokens.
|
326
326
|
|
327
327
|
Args:
|
328
|
-
texts (
|
328
|
+
texts (list[str]): List of text strings to tokenize.
|
329
329
|
|
330
330
|
Returns:
|
331
331
|
(torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
|
ultralytics/solutions/ai_gym.py
CHANGED
@@ -14,10 +14,10 @@ class AIGym(BaseSolution):
|
|
14
14
|
repetitions of exercises based on predefined angle thresholds for up and down positions.
|
15
15
|
|
16
16
|
Attributes:
|
17
|
-
states (
|
17
|
+
states (dict[float, int, str]): Stores per-track angle, count, and stage for workout monitoring.
|
18
18
|
up_angle (float): Angle threshold for considering the 'up' position of an exercise.
|
19
19
|
down_angle (float): Angle threshold for considering the 'down' position of an exercise.
|
20
|
-
kpts (
|
20
|
+
kpts (list[int]): Indices of keypoints used for angle calculation.
|
21
21
|
|
22
22
|
Methods:
|
23
23
|
process: Process a frame to detect poses, calculate angles, and count repetitions.
|