dgenerate-ultralytics-headless 8.3.190__py3-none-any.whl → 8.3.192__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/RECORD +103 -102
- tests/test_cuda.py +6 -5
- tests/test_exports.py +1 -6
- tests/test_python.py +1 -4
- tests/test_solutions.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +16 -14
- ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
- ultralytics/cfg/datasets/VisDrone.yaml +4 -4
- ultralytics/data/annotator.py +6 -6
- ultralytics/data/augment.py +53 -51
- ultralytics/data/base.py +15 -13
- ultralytics/data/build.py +7 -4
- ultralytics/data/converter.py +9 -10
- ultralytics/data/dataset.py +24 -22
- ultralytics/data/loaders.py +13 -11
- ultralytics/data/split.py +4 -3
- ultralytics/data/split_dota.py +14 -12
- ultralytics/data/utils.py +29 -23
- ultralytics/engine/exporter.py +2 -2
- ultralytics/engine/model.py +16 -14
- ultralytics/engine/predictor.py +8 -6
- ultralytics/engine/results.py +54 -52
- ultralytics/engine/trainer.py +8 -3
- ultralytics/engine/tuner.py +230 -42
- ultralytics/hub/google/__init__.py +7 -6
- ultralytics/hub/session.py +8 -6
- ultralytics/hub/utils.py +3 -4
- ultralytics/models/fastsam/model.py +8 -6
- ultralytics/models/nas/model.py +5 -3
- ultralytics/models/rtdetr/train.py +4 -3
- ultralytics/models/rtdetr/val.py +6 -4
- ultralytics/models/sam/amg.py +13 -10
- ultralytics/models/sam/model.py +3 -2
- ultralytics/models/sam/modules/blocks.py +21 -21
- ultralytics/models/sam/modules/decoders.py +11 -11
- ultralytics/models/sam/modules/encoders.py +25 -25
- ultralytics/models/sam/modules/memory_attention.py +9 -8
- ultralytics/models/sam/modules/sam.py +8 -10
- ultralytics/models/sam/modules/tiny_encoder.py +21 -20
- ultralytics/models/sam/modules/transformer.py +6 -5
- ultralytics/models/sam/modules/utils.py +7 -5
- ultralytics/models/sam/predict.py +32 -31
- ultralytics/models/utils/loss.py +29 -27
- ultralytics/models/utils/ops.py +10 -8
- ultralytics/models/yolo/classify/train.py +9 -7
- ultralytics/models/yolo/classify/val.py +11 -9
- ultralytics/models/yolo/detect/predict.py +1 -1
- ultralytics/models/yolo/detect/train.py +8 -6
- ultralytics/models/yolo/detect/val.py +22 -20
- ultralytics/models/yolo/model.py +14 -14
- ultralytics/models/yolo/obb/train.py +5 -3
- ultralytics/models/yolo/obb/val.py +11 -9
- ultralytics/models/yolo/pose/train.py +7 -5
- ultralytics/models/yolo/pose/val.py +12 -10
- ultralytics/models/yolo/segment/train.py +4 -5
- ultralytics/models/yolo/segment/val.py +13 -11
- ultralytics/models/yolo/world/train.py +10 -8
- ultralytics/models/yolo/yoloe/train.py +10 -10
- ultralytics/models/yolo/yoloe/val.py +11 -9
- ultralytics/nn/autobackend.py +17 -19
- ultralytics/nn/modules/block.py +12 -12
- ultralytics/nn/modules/conv.py +4 -3
- ultralytics/nn/modules/head.py +41 -37
- ultralytics/nn/modules/transformer.py +22 -21
- ultralytics/nn/tasks.py +2 -2
- ultralytics/nn/text_model.py +6 -5
- ultralytics/solutions/analytics.py +7 -5
- ultralytics/solutions/config.py +12 -10
- ultralytics/solutions/distance_calculation.py +3 -3
- ultralytics/solutions/heatmap.py +4 -2
- ultralytics/solutions/object_counter.py +5 -3
- ultralytics/solutions/parking_management.py +4 -2
- ultralytics/solutions/region_counter.py +7 -5
- ultralytics/solutions/similarity_search.py +5 -3
- ultralytics/solutions/solutions.py +38 -36
- ultralytics/solutions/streamlit_inference.py +8 -7
- ultralytics/trackers/bot_sort.py +11 -9
- ultralytics/trackers/byte_tracker.py +17 -15
- ultralytics/trackers/utils/gmc.py +4 -3
- ultralytics/utils/__init__.py +16 -88
- ultralytics/utils/autobatch.py +3 -2
- ultralytics/utils/autodevice.py +10 -10
- ultralytics/utils/benchmarks.py +11 -10
- ultralytics/utils/callbacks/comet.py +9 -9
- ultralytics/utils/checks.py +17 -26
- ultralytics/utils/export.py +12 -11
- ultralytics/utils/files.py +8 -7
- ultralytics/utils/git.py +139 -0
- ultralytics/utils/instance.py +8 -7
- ultralytics/utils/loss.py +15 -13
- ultralytics/utils/metrics.py +62 -62
- ultralytics/utils/ops.py +3 -2
- ultralytics/utils/patches.py +6 -4
- ultralytics/utils/plotting.py +20 -18
- ultralytics/utils/torch_utils.py +4 -2
- ultralytics/utils/tqdm.py +18 -14
- ultralytics/utils/triton.py +3 -2
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,10 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from multiprocessing.pool import ThreadPool
|
4
6
|
from pathlib import Path
|
5
|
-
from typing import Any
|
7
|
+
from typing import Any
|
6
8
|
|
7
9
|
import numpy as np
|
8
10
|
import torch
|
@@ -50,7 +52,7 @@ class SegmentationValidator(DetectionValidator):
|
|
50
52
|
self.args.task = "segment"
|
51
53
|
self.metrics = SegmentMetrics()
|
52
54
|
|
53
|
-
def preprocess(self, batch:
|
55
|
+
def preprocess(self, batch: dict[str, Any]) -> dict[str, Any]:
|
54
56
|
"""
|
55
57
|
Preprocess batch of images for YOLO segmentation validation.
|
56
58
|
|
@@ -61,7 +63,7 @@ class SegmentationValidator(DetectionValidator):
|
|
61
63
|
(Dict[str, Any]): Preprocessed batch.
|
62
64
|
"""
|
63
65
|
batch = super().preprocess(batch)
|
64
|
-
batch["masks"] = batch["masks"].to(self.device).float()
|
66
|
+
batch["masks"] = batch["masks"].to(self.device, non_blocking=True).float()
|
65
67
|
return batch
|
66
68
|
|
67
69
|
def init_metrics(self, model: torch.nn.Module) -> None:
|
@@ -93,7 +95,7 @@ class SegmentationValidator(DetectionValidator):
|
|
93
95
|
"mAP50-95)",
|
94
96
|
)
|
95
97
|
|
96
|
-
def postprocess(self, preds:
|
98
|
+
def postprocess(self, preds: list[torch.Tensor]) -> list[dict[str, torch.Tensor]]:
|
97
99
|
"""
|
98
100
|
Post-process YOLO predictions and return output detections with proto.
|
99
101
|
|
@@ -119,7 +121,7 @@ class SegmentationValidator(DetectionValidator):
|
|
119
121
|
)
|
120
122
|
return preds
|
121
123
|
|
122
|
-
def _prepare_batch(self, si: int, batch:
|
124
|
+
def _prepare_batch(self, si: int, batch: dict[str, Any]) -> dict[str, Any]:
|
123
125
|
"""
|
124
126
|
Prepare a batch for training or inference by processing images and targets.
|
125
127
|
|
@@ -135,7 +137,7 @@ class SegmentationValidator(DetectionValidator):
|
|
135
137
|
prepared_batch["masks"] = batch["masks"][midx]
|
136
138
|
return prepared_batch
|
137
139
|
|
138
|
-
def _process_batch(self, preds:
|
140
|
+
def _process_batch(self, preds: dict[str, torch.Tensor], batch: dict[str, Any]) -> dict[str, np.ndarray]:
|
139
141
|
"""
|
140
142
|
Compute correct prediction matrix for a batch based on bounding boxes and optional masks.
|
141
143
|
|
@@ -174,7 +176,7 @@ class SegmentationValidator(DetectionValidator):
|
|
174
176
|
tp.update({"tp_m": tp_m}) # update tp with mask IoU
|
175
177
|
return tp
|
176
178
|
|
177
|
-
def plot_predictions(self, batch:
|
179
|
+
def plot_predictions(self, batch: dict[str, Any], preds: list[dict[str, torch.Tensor]], ni: int) -> None:
|
178
180
|
"""
|
179
181
|
Plot batch predictions with masks and bounding boxes.
|
180
182
|
|
@@ -190,7 +192,7 @@ class SegmentationValidator(DetectionValidator):
|
|
190
192
|
p["masks"] = torch.as_tensor(masks[:50], dtype=torch.uint8).cpu()
|
191
193
|
super().plot_predictions(batch, preds, ni, max_det=50) # plot bboxes
|
192
194
|
|
193
|
-
def save_one_txt(self, predn: torch.Tensor, save_conf: bool, shape:
|
195
|
+
def save_one_txt(self, predn: torch.Tensor, save_conf: bool, shape: tuple[int, int], file: Path) -> None:
|
194
196
|
"""
|
195
197
|
Save YOLO detections to a txt file in normalized coordinates in a specific format.
|
196
198
|
|
@@ -210,7 +212,7 @@ class SegmentationValidator(DetectionValidator):
|
|
210
212
|
masks=torch.as_tensor(predn["masks"], dtype=torch.uint8),
|
211
213
|
).save_txt(file, save_conf=save_conf)
|
212
214
|
|
213
|
-
def pred_to_json(self, predn:
|
215
|
+
def pred_to_json(self, predn: dict[str, torch.Tensor], pbatch: dict[str, Any]) -> None:
|
214
216
|
"""
|
215
217
|
Save one JSON result for COCO evaluation.
|
216
218
|
|
@@ -233,7 +235,7 @@ class SegmentationValidator(DetectionValidator):
|
|
233
235
|
for i, r in enumerate(rles):
|
234
236
|
self.jdict[-len(rles) + i]["segmentation"] = r # segmentation
|
235
237
|
|
236
|
-
def scale_preds(self, predn:
|
238
|
+
def scale_preds(self, predn: dict[str, torch.Tensor], pbatch: dict[str, Any]) -> dict[str, torch.Tensor]:
|
237
239
|
"""Scales predictions to the original image size."""
|
238
240
|
return {
|
239
241
|
**super().scale_preds(predn, pbatch),
|
@@ -244,7 +246,7 @@ class SegmentationValidator(DetectionValidator):
|
|
244
246
|
),
|
245
247
|
}
|
246
248
|
|
247
|
-
def eval_json(self, stats:
|
249
|
+
def eval_json(self, stats: dict[str, Any]) -> dict[str, Any]:
|
248
250
|
"""Return COCO-style instance segmentation evaluation metrics."""
|
249
251
|
pred_json = self.save_dir / "predictions.json" # predictions
|
250
252
|
anno_json = (
|
@@ -1,8 +1,10 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import itertools
|
4
6
|
from pathlib import Path
|
5
|
-
from typing import Any
|
7
|
+
from typing import Any
|
6
8
|
|
7
9
|
import torch
|
8
10
|
|
@@ -51,7 +53,7 @@ class WorldTrainer(DetectionTrainer):
|
|
51
53
|
>>> trainer.train()
|
52
54
|
"""
|
53
55
|
|
54
|
-
def __init__(self, cfg=DEFAULT_CFG, overrides:
|
56
|
+
def __init__(self, cfg=DEFAULT_CFG, overrides: dict[str, Any] | None = None, _callbacks=None):
|
55
57
|
"""
|
56
58
|
Initialize a WorldTrainer object with given arguments.
|
57
59
|
|
@@ -65,7 +67,7 @@ class WorldTrainer(DetectionTrainer):
|
|
65
67
|
super().__init__(cfg, overrides, _callbacks)
|
66
68
|
self.text_embeddings = None
|
67
69
|
|
68
|
-
def get_model(self, cfg=None, weights:
|
70
|
+
def get_model(self, cfg=None, weights: str | None = None, verbose: bool = True) -> WorldModel:
|
69
71
|
"""
|
70
72
|
Return WorldModel initialized with specified config and weights.
|
71
73
|
|
@@ -91,7 +93,7 @@ class WorldTrainer(DetectionTrainer):
|
|
91
93
|
|
92
94
|
return model
|
93
95
|
|
94
|
-
def build_dataset(self, img_path: str, mode: str = "train", batch:
|
96
|
+
def build_dataset(self, img_path: str, mode: str = "train", batch: int | None = None):
|
95
97
|
"""
|
96
98
|
Build YOLO Dataset for training or validation.
|
97
99
|
|
@@ -111,7 +113,7 @@ class WorldTrainer(DetectionTrainer):
|
|
111
113
|
self.set_text_embeddings([dataset], batch) # cache text embeddings to accelerate training
|
112
114
|
return dataset
|
113
115
|
|
114
|
-
def set_text_embeddings(self, datasets:
|
116
|
+
def set_text_embeddings(self, datasets: list[Any], batch: int | None) -> None:
|
115
117
|
"""
|
116
118
|
Set text embeddings for datasets to accelerate training by caching category names.
|
117
119
|
|
@@ -137,7 +139,7 @@ class WorldTrainer(DetectionTrainer):
|
|
137
139
|
)
|
138
140
|
self.text_embeddings = text_embeddings
|
139
141
|
|
140
|
-
def generate_text_embeddings(self, texts:
|
142
|
+
def generate_text_embeddings(self, texts: list[str], batch: int, cache_dir: Path) -> dict[str, torch.Tensor]:
|
141
143
|
"""
|
142
144
|
Generate text embeddings for a list of text samples.
|
143
145
|
|
@@ -163,13 +165,13 @@ class WorldTrainer(DetectionTrainer):
|
|
163
165
|
torch.save(txt_map, cache_path)
|
164
166
|
return txt_map
|
165
167
|
|
166
|
-
def preprocess_batch(self, batch:
|
168
|
+
def preprocess_batch(self, batch: dict[str, Any]) -> dict[str, Any]:
|
167
169
|
"""Preprocess a batch of images and text for YOLOWorld training."""
|
168
170
|
batch = DetectionTrainer.preprocess_batch(self, batch)
|
169
171
|
|
170
172
|
# Add text features
|
171
173
|
texts = list(itertools.chain(*batch["texts"]))
|
172
|
-
txt_feats = torch.stack([self.text_embeddings[text] for text in texts]).to(self.device)
|
174
|
+
txt_feats = torch.stack([self.text_embeddings[text] for text in texts]).to(self.device, non_blocking=True)
|
173
175
|
txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
|
174
176
|
batch["txt_feats"] = txt_feats.reshape(len(batch["texts"]), -1, txt_feats.shape[-1])
|
175
177
|
return batch
|
@@ -1,9 +1,10 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import itertools
|
4
6
|
from copy import copy, deepcopy
|
5
7
|
from pathlib import Path
|
6
|
-
from typing import Dict, List, Optional, Union
|
7
8
|
|
8
9
|
import torch
|
9
10
|
|
@@ -34,7 +35,7 @@ class YOLOETrainer(DetectionTrainer):
|
|
34
35
|
build_dataset: Build YOLO dataset with multi-modal support for training.
|
35
36
|
"""
|
36
37
|
|
37
|
-
def __init__(self, cfg=DEFAULT_CFG, overrides:
|
38
|
+
def __init__(self, cfg=DEFAULT_CFG, overrides: dict | None = None, _callbacks=None):
|
38
39
|
"""
|
39
40
|
Initialize the YOLOE Trainer with specified configurations.
|
40
41
|
|
@@ -89,7 +90,7 @@ class YOLOETrainer(DetectionTrainer):
|
|
89
90
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
90
91
|
)
|
91
92
|
|
92
|
-
def build_dataset(self, img_path: str, mode: str = "train", batch:
|
93
|
+
def build_dataset(self, img_path: str, mode: str = "train", batch: int | None = None):
|
93
94
|
"""
|
94
95
|
Build YOLO Dataset.
|
95
96
|
|
@@ -174,7 +175,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
|
174
175
|
generate_text_embeddings: Generate and cache text embeddings for training.
|
175
176
|
"""
|
176
177
|
|
177
|
-
def build_dataset(self, img_path:
|
178
|
+
def build_dataset(self, img_path: list[str] | str, mode: str = "train", batch: int | None = None):
|
178
179
|
"""
|
179
180
|
Build YOLO Dataset for training or validation.
|
180
181
|
|
@@ -196,12 +197,12 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
|
196
197
|
batch = DetectionTrainer.preprocess_batch(self, batch)
|
197
198
|
|
198
199
|
texts = list(itertools.chain(*batch["texts"]))
|
199
|
-
txt_feats = torch.stack([self.text_embeddings[text] for text in texts]).to(self.device)
|
200
|
+
txt_feats = torch.stack([self.text_embeddings[text] for text in texts]).to(self.device, non_blocking=True)
|
200
201
|
txt_feats = txt_feats.reshape(len(batch["texts"]), -1, txt_feats.shape[-1])
|
201
202
|
batch["txt_feats"] = txt_feats
|
202
203
|
return batch
|
203
204
|
|
204
|
-
def generate_text_embeddings(self, texts:
|
205
|
+
def generate_text_embeddings(self, texts: list[str], batch: int, cache_dir: Path):
|
205
206
|
"""
|
206
207
|
Generate text embeddings for a list of text samples.
|
207
208
|
|
@@ -250,8 +251,7 @@ class YOLOEPEFreeTrainer(YOLOEPETrainer, YOLOETrainerFromScratch):
|
|
250
251
|
|
251
252
|
def preprocess_batch(self, batch):
|
252
253
|
"""Preprocess a batch of images for YOLOE training, adjusting formatting and dimensions as needed."""
|
253
|
-
|
254
|
-
return batch
|
254
|
+
return DetectionTrainer.preprocess_batch(self, batch)
|
255
255
|
|
256
256
|
def set_text_embeddings(self, datasets, batch: int):
|
257
257
|
"""
|
@@ -285,7 +285,7 @@ class YOLOEVPTrainer(YOLOETrainerFromScratch):
|
|
285
285
|
preprocess_batch: Preprocess batches with visual prompts.
|
286
286
|
"""
|
287
287
|
|
288
|
-
def build_dataset(self, img_path:
|
288
|
+
def build_dataset(self, img_path: list[str] | str, mode: str = "train", batch: int | None = None):
|
289
289
|
"""
|
290
290
|
Build YOLO Dataset for training or validation with visual prompts.
|
291
291
|
|
@@ -317,5 +317,5 @@ class YOLOEVPTrainer(YOLOETrainerFromScratch):
|
|
317
317
|
def preprocess_batch(self, batch):
|
318
318
|
"""Preprocess a batch of images for YOLOE training, moving visual prompts to the appropriate device."""
|
319
319
|
batch = super().preprocess_batch(batch)
|
320
|
-
batch["visuals"] = batch["visuals"].to(self.device)
|
320
|
+
batch["visuals"] = batch["visuals"].to(self.device, non_blocking=True)
|
321
321
|
return batch
|
@@ -1,8 +1,10 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from copy import deepcopy
|
4
6
|
from pathlib import Path
|
5
|
-
from typing import Any
|
7
|
+
from typing import Any
|
6
8
|
|
7
9
|
import torch
|
8
10
|
from torch.nn import functional as F
|
@@ -96,14 +98,14 @@ class YOLOEDetectValidator(DetectionValidator):
|
|
96
98
|
visual_pe[cls_visual_num == 0] = 0
|
97
99
|
return visual_pe.unsqueeze(0)
|
98
100
|
|
99
|
-
def preprocess(self, batch:
|
101
|
+
def preprocess(self, batch: dict[str, Any]) -> dict[str, Any]:
|
100
102
|
"""Preprocess batch data, ensuring visuals are on the same device as images."""
|
101
103
|
batch = super().preprocess(batch)
|
102
104
|
if "visuals" in batch:
|
103
|
-
batch["visuals"] = batch["visuals"].to(batch["img"].device)
|
105
|
+
batch["visuals"] = batch["visuals"].to(batch["img"].device, non_blocking=True)
|
104
106
|
return batch
|
105
107
|
|
106
|
-
def get_vpe_dataloader(self, data:
|
108
|
+
def get_vpe_dataloader(self, data: dict[str, Any]) -> torch.utils.data.DataLoader:
|
107
109
|
"""
|
108
110
|
Create a dataloader for LVIS training visual prompt samples.
|
109
111
|
|
@@ -141,11 +143,11 @@ class YOLOEDetectValidator(DetectionValidator):
|
|
141
143
|
@smart_inference_mode()
|
142
144
|
def __call__(
|
143
145
|
self,
|
144
|
-
trainer:
|
145
|
-
model:
|
146
|
-
refer_data:
|
146
|
+
trainer: Any | None = None,
|
147
|
+
model: YOLOEModel | str | None = None,
|
148
|
+
refer_data: str | None = None,
|
147
149
|
load_vp: bool = False,
|
148
|
-
) ->
|
150
|
+
) -> dict[str, Any]:
|
149
151
|
"""
|
150
152
|
Run validation on the model using either text or visual prompt embeddings.
|
151
153
|
|
@@ -186,7 +188,7 @@ class YOLOEDetectValidator(DetectionValidator):
|
|
186
188
|
if isinstance(model, (str, Path)):
|
187
189
|
from ultralytics.nn.tasks import attempt_load_weights
|
188
190
|
|
189
|
-
model = attempt_load_weights(model, device=self.device
|
191
|
+
model = attempt_load_weights(model, device=self.device)
|
190
192
|
model.eval().to(self.device)
|
191
193
|
data = check_det_dataset(refer_data or self.args.data)
|
192
194
|
names = [name.split("/", 1)[0] for name in list(data["names"].values())]
|
ultralytics/nn/autobackend.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import ast
|
4
6
|
import json
|
5
7
|
import platform
|
6
8
|
import zipfile
|
7
9
|
from collections import OrderedDict, namedtuple
|
8
10
|
from pathlib import Path
|
9
|
-
from typing import Any
|
11
|
+
from typing import Any
|
10
12
|
|
11
13
|
import cv2
|
12
14
|
import numpy as np
|
@@ -19,7 +21,7 @@ from ultralytics.utils.checks import check_requirements, check_suffix, check_ver
|
|
19
21
|
from ultralytics.utils.downloads import attempt_download_asset, is_url
|
20
22
|
|
21
23
|
|
22
|
-
def check_class_names(names:
|
24
|
+
def check_class_names(names: list | dict) -> dict[int, str]:
|
23
25
|
"""
|
24
26
|
Check class names and convert to dict format if needed.
|
25
27
|
|
@@ -49,7 +51,7 @@ def check_class_names(names: Union[List, Dict]) -> Dict[int, str]:
|
|
49
51
|
return names
|
50
52
|
|
51
53
|
|
52
|
-
def default_class_names(data:
|
54
|
+
def default_class_names(data: str | Path | None = None) -> dict[int, str]:
|
53
55
|
"""
|
54
56
|
Apply default class names to an input YAML file or return numerical class names.
|
55
57
|
|
@@ -134,10 +136,10 @@ class AutoBackend(nn.Module):
|
|
134
136
|
@torch.no_grad()
|
135
137
|
def __init__(
|
136
138
|
self,
|
137
|
-
model:
|
139
|
+
model: str | torch.nn.Module = "yolo11n.pt",
|
138
140
|
device: torch.device = torch.device("cpu"),
|
139
141
|
dnn: bool = False,
|
140
|
-
data:
|
142
|
+
data: str | Path | None = None,
|
141
143
|
fp16: bool = False,
|
142
144
|
fuse: bool = True,
|
143
145
|
verbose: bool = True,
|
@@ -146,7 +148,7 @@ class AutoBackend(nn.Module):
|
|
146
148
|
Initialize the AutoBackend for inference.
|
147
149
|
|
148
150
|
Args:
|
149
|
-
model (str |
|
151
|
+
model (str | torch.nn.Module): Path to the model weights file or a module instance.
|
150
152
|
device (torch.device): Device to run the model on.
|
151
153
|
dnn (bool): Use OpenCV DNN module for ONNX inference.
|
152
154
|
data (str | Path, optional): Path to the additional data.yaml file containing class names.
|
@@ -155,7 +157,6 @@ class AutoBackend(nn.Module):
|
|
155
157
|
verbose (bool): Enable verbose logging.
|
156
158
|
"""
|
157
159
|
super().__init__()
|
158
|
-
w = str(model[0] if isinstance(model, list) else model)
|
159
160
|
nn_module = isinstance(model, torch.nn.Module)
|
160
161
|
(
|
161
162
|
pt,
|
@@ -175,7 +176,7 @@ class AutoBackend(nn.Module):
|
|
175
176
|
imx,
|
176
177
|
rknn,
|
177
178
|
triton,
|
178
|
-
) = self._model_type(
|
179
|
+
) = self._model_type("" if nn_module else model)
|
179
180
|
fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16
|
180
181
|
nhwc = coreml or saved_model or pb or tflite or edgetpu or rknn # BHWC formats (vs torch BCWH)
|
181
182
|
stride, ch = 32, 3 # default stride and channels
|
@@ -189,8 +190,7 @@ class AutoBackend(nn.Module):
|
|
189
190
|
cuda = False
|
190
191
|
|
191
192
|
# Download if not local
|
192
|
-
|
193
|
-
w = attempt_download_asset(w)
|
193
|
+
w = attempt_download_asset(model) if pt else model # weights path
|
194
194
|
|
195
195
|
# PyTorch (in-memory or file)
|
196
196
|
if nn_module or pt:
|
@@ -203,11 +203,9 @@ class AutoBackend(nn.Module):
|
|
203
203
|
model = model.fuse(verbose=verbose)
|
204
204
|
model = model.to(device)
|
205
205
|
else: # pt file
|
206
|
-
from ultralytics.nn.tasks import
|
206
|
+
from ultralytics.nn.tasks import attempt_load_one_weight
|
207
207
|
|
208
|
-
model =
|
209
|
-
model if isinstance(model, list) else w, device=device, inplace=True, fuse=fuse
|
210
|
-
)
|
208
|
+
model, _ = attempt_load_one_weight(model, device=device, fuse=fuse) # load model, ckpt
|
211
209
|
|
212
210
|
# Common PyTorch model processing
|
213
211
|
if hasattr(model, "kpt_shape"):
|
@@ -480,7 +478,7 @@ class AutoBackend(nn.Module):
|
|
480
478
|
|
481
479
|
# TF.js
|
482
480
|
elif tfjs:
|
483
|
-
raise NotImplementedError("
|
481
|
+
raise NotImplementedError("Ultralytics TF.js inference is not currently supported.")
|
484
482
|
|
485
483
|
# PaddlePaddle
|
486
484
|
elif paddle:
|
@@ -612,9 +610,9 @@ class AutoBackend(nn.Module):
|
|
612
610
|
im: torch.Tensor,
|
613
611
|
augment: bool = False,
|
614
612
|
visualize: bool = False,
|
615
|
-
embed:
|
613
|
+
embed: list | None = None,
|
616
614
|
**kwargs: Any,
|
617
|
-
) ->
|
615
|
+
) -> torch.Tensor | list[torch.Tensor]:
|
618
616
|
"""
|
619
617
|
Run inference on an AutoBackend model.
|
620
618
|
|
@@ -843,7 +841,7 @@ class AutoBackend(nn.Module):
|
|
843
841
|
"""
|
844
842
|
return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
|
845
843
|
|
846
|
-
def warmup(self, imgsz:
|
844
|
+
def warmup(self, imgsz: tuple[int, int, int, int] = (1, 3, 640, 640)) -> None:
|
847
845
|
"""
|
848
846
|
Warm up the model by running one forward pass with a dummy input.
|
849
847
|
|
@@ -857,7 +855,7 @@ class AutoBackend(nn.Module):
|
|
857
855
|
self.forward(im) # warmup
|
858
856
|
|
859
857
|
@staticmethod
|
860
|
-
def _model_type(p: str = "path/to/model.pt") ->
|
858
|
+
def _model_type(p: str = "path/to/model.pt") -> list[bool]:
|
861
859
|
"""
|
862
860
|
Take a path to a model file and return the model type.
|
863
861
|
|
ultralytics/nn/modules/block.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
"""Block modules."""
|
3
3
|
|
4
|
-
from
|
4
|
+
from __future__ import annotations
|
5
5
|
|
6
6
|
import torch
|
7
7
|
import torch.nn as nn
|
@@ -192,7 +192,7 @@ class HGBlock(nn.Module):
|
|
192
192
|
class SPP(nn.Module):
|
193
193
|
"""Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729."""
|
194
194
|
|
195
|
-
def __init__(self, c1: int, c2: int, k:
|
195
|
+
def __init__(self, c1: int, c2: int, k: tuple[int, ...] = (5, 9, 13)):
|
196
196
|
"""
|
197
197
|
Initialize the SPP layer with input/output channels and pooling kernel sizes.
|
198
198
|
|
@@ -471,7 +471,7 @@ class Bottleneck(nn.Module):
|
|
471
471
|
"""Standard bottleneck."""
|
472
472
|
|
473
473
|
def __init__(
|
474
|
-
self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k:
|
474
|
+
self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: tuple[int, int] = (3, 3), e: float = 0.5
|
475
475
|
):
|
476
476
|
"""
|
477
477
|
Initialize a standard bottleneck module.
|
@@ -711,7 +711,7 @@ class ImagePoolingAttn(nn.Module):
|
|
711
711
|
"""ImagePoolingAttn: Enhance the text embeddings with image-aware information."""
|
712
712
|
|
713
713
|
def __init__(
|
714
|
-
self, ec: int = 256, ch:
|
714
|
+
self, ec: int = 256, ch: tuple[int, ...] = (), ct: int = 512, nh: int = 8, k: int = 3, scale: bool = False
|
715
715
|
):
|
716
716
|
"""
|
717
717
|
Initialize ImagePoolingAttn module.
|
@@ -740,7 +740,7 @@ class ImagePoolingAttn(nn.Module):
|
|
740
740
|
self.hc = ec // nh
|
741
741
|
self.k = k
|
742
742
|
|
743
|
-
def forward(self, x:
|
743
|
+
def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> torch.Tensor:
|
744
744
|
"""
|
745
745
|
Forward pass of ImagePoolingAttn.
|
746
746
|
|
@@ -856,7 +856,7 @@ class RepBottleneck(Bottleneck):
|
|
856
856
|
"""Rep bottleneck."""
|
857
857
|
|
858
858
|
def __init__(
|
859
|
-
self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k:
|
859
|
+
self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: tuple[int, int] = (3, 3), e: float = 0.5
|
860
860
|
):
|
861
861
|
"""
|
862
862
|
Initialize RepBottleneck.
|
@@ -1026,7 +1026,7 @@ class SPPELAN(nn.Module):
|
|
1026
1026
|
class CBLinear(nn.Module):
|
1027
1027
|
"""CBLinear."""
|
1028
1028
|
|
1029
|
-
def __init__(self, c1: int, c2s:
|
1029
|
+
def __init__(self, c1: int, c2s: list[int], k: int = 1, s: int = 1, p: int | None = None, g: int = 1):
|
1030
1030
|
"""
|
1031
1031
|
Initialize CBLinear module.
|
1032
1032
|
|
@@ -1042,7 +1042,7 @@ class CBLinear(nn.Module):
|
|
1042
1042
|
self.c2s = c2s
|
1043
1043
|
self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True)
|
1044
1044
|
|
1045
|
-
def forward(self, x: torch.Tensor) ->
|
1045
|
+
def forward(self, x: torch.Tensor) -> list[torch.Tensor]:
|
1046
1046
|
"""Forward pass through CBLinear layer."""
|
1047
1047
|
return self.conv(x).split(self.c2s, dim=1)
|
1048
1048
|
|
@@ -1050,7 +1050,7 @@ class CBLinear(nn.Module):
|
|
1050
1050
|
class CBFuse(nn.Module):
|
1051
1051
|
"""CBFuse."""
|
1052
1052
|
|
1053
|
-
def __init__(self, idx:
|
1053
|
+
def __init__(self, idx: list[int]):
|
1054
1054
|
"""
|
1055
1055
|
Initialize CBFuse module.
|
1056
1056
|
|
@@ -1060,7 +1060,7 @@ class CBFuse(nn.Module):
|
|
1060
1060
|
super().__init__()
|
1061
1061
|
self.idx = idx
|
1062
1062
|
|
1063
|
-
def forward(self, xs:
|
1063
|
+
def forward(self, xs: list[torch.Tensor]) -> torch.Tensor:
|
1064
1064
|
"""
|
1065
1065
|
Forward pass through CBFuse layer.
|
1066
1066
|
|
@@ -1974,7 +1974,7 @@ class Residual(nn.Module):
|
|
1974
1974
|
class SAVPE(nn.Module):
|
1975
1975
|
"""Spatial-Aware Visual Prompt Embedding module for feature enhancement."""
|
1976
1976
|
|
1977
|
-
def __init__(self, ch:
|
1977
|
+
def __init__(self, ch: list[int], c3: int, embed: int):
|
1978
1978
|
"""
|
1979
1979
|
Initialize SAVPE module with channels, intermediate channels, and embedding dimension.
|
1980
1980
|
|
@@ -2002,7 +2002,7 @@ class SAVPE(nn.Module):
|
|
2002
2002
|
self.cv5 = nn.Conv2d(1, self.c, 3, padding=1)
|
2003
2003
|
self.cv6 = nn.Sequential(Conv(2 * self.c, self.c, 3), nn.Conv2d(self.c, self.c, 3, padding=1))
|
2004
2004
|
|
2005
|
-
def forward(self, x:
|
2005
|
+
def forward(self, x: list[torch.Tensor], vp: torch.Tensor) -> torch.Tensor:
|
2006
2006
|
"""Process input features and visual prompts to generate enhanced embeddings."""
|
2007
2007
|
y = [self.cv2[i](xi) for i, xi in enumerate(x)]
|
2008
2008
|
y = self.cv4(torch.cat(y, dim=1))
|
ultralytics/nn/modules/conv.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
"""Convolution modules."""
|
3
3
|
|
4
|
+
from __future__ import annotations
|
5
|
+
|
4
6
|
import math
|
5
|
-
from typing import List
|
6
7
|
|
7
8
|
import numpy as np
|
8
9
|
import torch
|
@@ -669,7 +670,7 @@ class Concat(nn.Module):
|
|
669
670
|
super().__init__()
|
670
671
|
self.d = dimension
|
671
672
|
|
672
|
-
def forward(self, x:
|
673
|
+
def forward(self, x: list[torch.Tensor]):
|
673
674
|
"""
|
674
675
|
Concatenate input tensors along specified dimension.
|
675
676
|
|
@@ -700,7 +701,7 @@ class Index(nn.Module):
|
|
700
701
|
super().__init__()
|
701
702
|
self.index = index
|
702
703
|
|
703
|
-
def forward(self, x:
|
704
|
+
def forward(self, x: list[torch.Tensor]):
|
704
705
|
"""
|
705
706
|
Select and return a particular index from input.
|
706
707
|
|