dgenerate-ultralytics-headless 8.3.253__py3-none-any.whl → 8.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/METADATA +41 -49
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/RECORD +85 -74
- tests/__init__.py +2 -2
- tests/conftest.py +1 -1
- tests/test_cuda.py +8 -2
- tests/test_engine.py +8 -8
- tests/test_exports.py +11 -4
- tests/test_integrations.py +9 -9
- tests/test_python.py +14 -14
- tests/test_solutions.py +3 -3
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +25 -27
- ultralytics/cfg/default.yaml +3 -1
- ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
- ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
- ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
- ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
- ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
- ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
- ultralytics/cfg/models/26/yolo26.yaml +52 -0
- ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
- ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
- ultralytics/data/annotator.py +2 -2
- ultralytics/data/augment.py +7 -0
- ultralytics/data/converter.py +57 -38
- ultralytics/data/dataset.py +1 -1
- ultralytics/engine/exporter.py +31 -26
- ultralytics/engine/model.py +34 -34
- ultralytics/engine/predictor.py +17 -17
- ultralytics/engine/results.py +14 -12
- ultralytics/engine/trainer.py +59 -29
- ultralytics/engine/tuner.py +19 -11
- ultralytics/engine/validator.py +16 -16
- ultralytics/models/fastsam/predict.py +1 -1
- ultralytics/models/yolo/classify/predict.py +1 -1
- ultralytics/models/yolo/classify/train.py +1 -1
- ultralytics/models/yolo/classify/val.py +1 -1
- ultralytics/models/yolo/detect/predict.py +2 -2
- ultralytics/models/yolo/detect/train.py +4 -3
- ultralytics/models/yolo/detect/val.py +7 -1
- ultralytics/models/yolo/model.py +8 -8
- ultralytics/models/yolo/obb/predict.py +2 -2
- ultralytics/models/yolo/obb/train.py +3 -3
- ultralytics/models/yolo/obb/val.py +1 -1
- ultralytics/models/yolo/pose/predict.py +1 -1
- ultralytics/models/yolo/pose/train.py +3 -1
- ultralytics/models/yolo/pose/val.py +1 -1
- ultralytics/models/yolo/segment/predict.py +3 -3
- ultralytics/models/yolo/segment/train.py +4 -4
- ultralytics/models/yolo/segment/val.py +4 -2
- ultralytics/models/yolo/yoloe/train.py +6 -1
- ultralytics/models/yolo/yoloe/train_seg.py +6 -1
- ultralytics/nn/autobackend.py +5 -5
- ultralytics/nn/modules/__init__.py +8 -0
- ultralytics/nn/modules/block.py +128 -8
- ultralytics/nn/modules/head.py +788 -203
- ultralytics/nn/tasks.py +86 -41
- ultralytics/nn/text_model.py +5 -2
- ultralytics/optim/__init__.py +5 -0
- ultralytics/optim/muon.py +338 -0
- ultralytics/solutions/ai_gym.py +3 -3
- ultralytics/solutions/config.py +1 -1
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +2 -2
- ultralytics/solutions/parking_management.py +1 -1
- ultralytics/solutions/solutions.py +2 -2
- ultralytics/trackers/track.py +1 -1
- ultralytics/utils/__init__.py +8 -8
- ultralytics/utils/benchmarks.py +23 -23
- ultralytics/utils/callbacks/platform.py +11 -7
- ultralytics/utils/checks.py +6 -6
- ultralytics/utils/downloads.py +5 -3
- ultralytics/utils/export/engine.py +19 -10
- ultralytics/utils/export/imx.py +19 -13
- ultralytics/utils/export/tensorflow.py +21 -21
- ultralytics/utils/files.py +2 -2
- ultralytics/utils/loss.py +587 -203
- ultralytics/utils/metrics.py +1 -0
- ultralytics/utils/ops.py +11 -2
- ultralytics/utils/tal.py +98 -19
- ultralytics/utils/tuner.py +2 -2
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import torch
|
|
6
|
+
from torch import optim
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def zeropower_via_newtonschulz5(G: torch.Tensor, eps: float = 1e-7) -> torch.Tensor:
|
|
10
|
+
"""Compute the zeroth power / orthogonalization of matrix G using Newton-Schulz iteration.
|
|
11
|
+
|
|
12
|
+
This function implements a quintic Newton-Schulz iteration to compute an approximate orthogonalization of the input
|
|
13
|
+
matrix G. The iteration coefficients are optimized to maximize convergence slope at zero, producing a result similar
|
|
14
|
+
to UV^T from SVD, where USV^T = G, but with relaxed convergence guarantees that empirically work well for
|
|
15
|
+
optimization purposes.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
G (torch.Tensor): Input 2D tensor/matrix to orthogonalize.
|
|
19
|
+
eps (float, optional): Small epsilon value added to norm for numerical stability. Default: 1e-7.
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
(torch.Tensor): Orthogonalized matrix with same shape as input G.
|
|
23
|
+
|
|
24
|
+
Examples:
|
|
25
|
+
>>> G = torch.randn(128, 64)
|
|
26
|
+
>>> G_ortho = zeropower_via_newtonschulz5(G)
|
|
27
|
+
>>> print(G_ortho.shape)
|
|
28
|
+
torch.Size([128, 64])
|
|
29
|
+
|
|
30
|
+
Notes:
|
|
31
|
+
- Uses bfloat16 precision for computation.
|
|
32
|
+
- Performs exactly 5 Newton-Schulz iteration steps with fixed coefficients.
|
|
33
|
+
- Automatically transposes for efficiency when rows > columns.
|
|
34
|
+
- Output approximates US'V^T where S' has diagonal entries ~ Uniform(0.5, 1.5).
|
|
35
|
+
- Does not produce exact UV^T but works well empirically for neural network optimization.
|
|
36
|
+
"""
|
|
37
|
+
assert len(G.shape) == 2
|
|
38
|
+
X = G.bfloat16()
|
|
39
|
+
X /= X.norm() + eps # ensure top singular value <= 1
|
|
40
|
+
if G.size(0) > G.size(1):
|
|
41
|
+
X = X.T
|
|
42
|
+
for a, b, c in [ # num_steps fixed at 5
|
|
43
|
+
# original params
|
|
44
|
+
(3.4445, -4.7750, 2.0315),
|
|
45
|
+
(3.4445, -4.7750, 2.0315),
|
|
46
|
+
(3.4445, -4.7750, 2.0315),
|
|
47
|
+
(3.4445, -4.7750, 2.0315),
|
|
48
|
+
(3.4445, -4.7750, 2.0315),
|
|
49
|
+
]:
|
|
50
|
+
# for _ in range(steps):
|
|
51
|
+
A = X @ X.T
|
|
52
|
+
B = b * A + c * A @ A
|
|
53
|
+
X = a * X + B @ X
|
|
54
|
+
if G.size(0) > G.size(1):
|
|
55
|
+
X = X.T
|
|
56
|
+
return X
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def muon_update(grad: torch.Tensor, momentum: torch.Tensor, beta: float = 0.95, nesterov: bool = True) -> torch.Tensor:
|
|
60
|
+
"""Compute Muon optimizer update with momentum and orthogonalization.
|
|
61
|
+
|
|
62
|
+
This function applies momentum to the gradient, optionally uses Nesterov acceleration, and then orthogonalizes the
|
|
63
|
+
update using Newton-Schulz iterations. For convolutional filters (4D tensors), it reshapes before orthogonalization
|
|
64
|
+
and scales the final update based on parameter dimensions.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
grad (torch.Tensor): Gradient tensor to update. Can be 2D or 4D (for conv filters).
|
|
68
|
+
momentum (torch.Tensor): Momentum buffer tensor, modified in-place via lerp.
|
|
69
|
+
beta (float, optional): Momentum coefficient for exponential moving average. Default: 0.95.
|
|
70
|
+
nesterov (bool, optional): Whether to use Nesterov momentum acceleration. Default: True.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
(torch.Tensor): Orthogonalized update tensor with same shape as input grad. For 4D inputs, returns reshaped
|
|
74
|
+
result matching original dimensions.
|
|
75
|
+
|
|
76
|
+
Examples:
|
|
77
|
+
>>> grad = torch.randn(64, 128)
|
|
78
|
+
>>> momentum = torch.zeros_like(grad)
|
|
79
|
+
>>> update = muon_update(grad, momentum, beta=0.95, nesterov=True)
|
|
80
|
+
>>> print(update.shape)
|
|
81
|
+
torch.Size([64, 128])
|
|
82
|
+
|
|
83
|
+
Notes:
|
|
84
|
+
- Momentum buffer is updated in-place: momentum = beta * momentum + (1-beta) * grad.
|
|
85
|
+
- With Nesterov: update = beta * momentum + (1-beta) * grad.
|
|
86
|
+
- Without Nesterov: update = momentum.
|
|
87
|
+
- 4D tensors (conv filters) are reshaped to 2D as (channels, height*width*depth) for orthogonalization.
|
|
88
|
+
- Final update is scaled by sqrt(max(dim[-2], dim[-1])) to account for parameter dimensions.
|
|
89
|
+
"""
|
|
90
|
+
momentum.lerp_(grad, 1 - beta)
|
|
91
|
+
update = grad.lerp(momentum, beta) if nesterov else momentum
|
|
92
|
+
if update.ndim == 4: # for the case of conv filters
|
|
93
|
+
update = update.view(len(update), -1)
|
|
94
|
+
update = zeropower_via_newtonschulz5(update)
|
|
95
|
+
update *= max(1, grad.size(-2) / grad.size(-1)) ** 0.5
|
|
96
|
+
return update
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class MuSGD(optim.Optimizer):
|
|
100
|
+
"""Hybrid optimizer combining Muon and SGD updates for neural network training.
|
|
101
|
+
|
|
102
|
+
This optimizer implements a combination of Muon (a momentum-based optimizer with orthogonalization via Newton-Schulz
|
|
103
|
+
iterations) and standard SGD with momentum. It allows different parameter groups to use either the hybrid Muon+SGD
|
|
104
|
+
approach or pure SGD.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
param_groups (list): List of parameter groups with their optimization settings.
|
|
108
|
+
muon (float, optional): Weight factor for Muon updates in hybrid mode. Default: 0.5.
|
|
109
|
+
sgd (float, optional): Weight factor for SGD updates in hybrid mode. Default: 0.5.
|
|
110
|
+
|
|
111
|
+
Attributes:
|
|
112
|
+
muon (float): Scaling factor applied to Muon learning rate.
|
|
113
|
+
sgd (float): Scaling factor applied to SGD learning rate in hybrid mode.
|
|
114
|
+
|
|
115
|
+
Examples:
|
|
116
|
+
>>> param_groups = [
|
|
117
|
+
... {
|
|
118
|
+
... "params": model.conv_params,
|
|
119
|
+
... "lr": 0.02,
|
|
120
|
+
... "use_muon": True,
|
|
121
|
+
... "momentum": 0.95,
|
|
122
|
+
... "nesterov": True,
|
|
123
|
+
... "weight_decay": 0.01,
|
|
124
|
+
... },
|
|
125
|
+
... {
|
|
126
|
+
... "params": model.other_params,
|
|
127
|
+
... "lr": 0.01,
|
|
128
|
+
... "use_muon": False,
|
|
129
|
+
... "momentum": 0.9,
|
|
130
|
+
... "nesterov": False,
|
|
131
|
+
... "weight_decay": 0,
|
|
132
|
+
... },
|
|
133
|
+
... ]
|
|
134
|
+
>>> optimizer = MuSGD(param_groups, muon=0.5, sgd=0.5)
|
|
135
|
+
>>> loss = model(data)
|
|
136
|
+
>>> loss.backward()
|
|
137
|
+
>>> optimizer.step()
|
|
138
|
+
|
|
139
|
+
Notes:
|
|
140
|
+
- Parameter groups with 'use_muon': True will receive both Muon and SGD updates.
|
|
141
|
+
- Parameter groups with 'use_muon': False will receive only SGD updates.
|
|
142
|
+
- The Muon update uses orthogonalization which works best for 2D+ parameter tensors.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
def __init__(
|
|
146
|
+
self,
|
|
147
|
+
params,
|
|
148
|
+
lr: float = 1e-3,
|
|
149
|
+
momentum: float = 0.0,
|
|
150
|
+
weight_decay: float = 0.0,
|
|
151
|
+
nesterov: bool = False,
|
|
152
|
+
use_muon: bool = False,
|
|
153
|
+
muon: float = 0.5,
|
|
154
|
+
sgd: float = 0.5,
|
|
155
|
+
):
|
|
156
|
+
"""Initialize MuSGD optimizer with hybrid Muon and SGD capabilities.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
params: Iterable of parameters to optimize or dicts defining parameter groups.
|
|
160
|
+
lr (float): Learning rate.
|
|
161
|
+
momentum (float): Momentum factor for SGD.
|
|
162
|
+
weight_decay (float): Weight decay (L2 penalty).
|
|
163
|
+
nesterov (bool): Whether to use Nesterov momentum.
|
|
164
|
+
use_muon (bool): Whether to enable Muon updates.
|
|
165
|
+
muon (float): Scaling factor for Muon component.
|
|
166
|
+
sgd (float): Scaling factor for SGD component.
|
|
167
|
+
"""
|
|
168
|
+
defaults = dict(
|
|
169
|
+
lr=lr,
|
|
170
|
+
momentum=momentum,
|
|
171
|
+
weight_decay=weight_decay,
|
|
172
|
+
nesterov=nesterov,
|
|
173
|
+
use_muon=use_muon,
|
|
174
|
+
)
|
|
175
|
+
super().__init__(params, defaults)
|
|
176
|
+
self.muon = muon
|
|
177
|
+
self.sgd = sgd
|
|
178
|
+
|
|
179
|
+
@torch.no_grad()
|
|
180
|
+
def step(self, closure=None):
|
|
181
|
+
"""Perform a single optimization step.
|
|
182
|
+
|
|
183
|
+
Applies either hybrid Muon+SGD updates or pure SGD updates depending on the
|
|
184
|
+
'use_muon' flag in each parameter group. For Muon-enabled groups, parameters
|
|
185
|
+
receive both an orthogonalized Muon update and a standard SGD momentum update.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
closure (Callable, optional): A closure that reevaluates the model
|
|
189
|
+
and returns the loss. Default: None.
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
(torch.Tensor | None): The loss value if closure is provided, otherwise None.
|
|
193
|
+
|
|
194
|
+
Notes:
|
|
195
|
+
- Parameters with None gradients are assigned zero gradients for synchronization.
|
|
196
|
+
- Muon updates use Newton-Schulz orthogonalization and work best on 2D+ tensors.
|
|
197
|
+
- Weight decay is applied only to the SGD component in hybrid mode.
|
|
198
|
+
"""
|
|
199
|
+
loss = None
|
|
200
|
+
if closure is not None:
|
|
201
|
+
with torch.enable_grad():
|
|
202
|
+
loss = closure()
|
|
203
|
+
|
|
204
|
+
for group in self.param_groups:
|
|
205
|
+
# Muon
|
|
206
|
+
if group["use_muon"]:
|
|
207
|
+
# generate weight updates in distributed fashion
|
|
208
|
+
for p in group["params"]:
|
|
209
|
+
lr = group["lr"]
|
|
210
|
+
if p.grad is None:
|
|
211
|
+
continue
|
|
212
|
+
grad = p.grad
|
|
213
|
+
state = self.state[p]
|
|
214
|
+
if len(state) == 0:
|
|
215
|
+
state["momentum_buffer"] = torch.zeros_like(p)
|
|
216
|
+
state["momentum_buffer_SGD"] = torch.zeros_like(p)
|
|
217
|
+
|
|
218
|
+
update = muon_update(
|
|
219
|
+
grad, state["momentum_buffer"], beta=group["momentum"], nesterov=group["nesterov"]
|
|
220
|
+
)
|
|
221
|
+
p.add_(update.reshape(p.shape), alpha=-(lr * self.muon))
|
|
222
|
+
|
|
223
|
+
# SGD update
|
|
224
|
+
if group["weight_decay"] != 0:
|
|
225
|
+
grad = grad.add(p, alpha=group["weight_decay"])
|
|
226
|
+
state["momentum_buffer_SGD"].mul_(group["momentum"]).add_(grad)
|
|
227
|
+
sgd_update = (
|
|
228
|
+
grad.add(state["momentum_buffer_SGD"], alpha=group["momentum"])
|
|
229
|
+
if group["nesterov"]
|
|
230
|
+
else state["momentum_buffer_SGD"]
|
|
231
|
+
)
|
|
232
|
+
p.add_(sgd_update, alpha=-(lr * self.sgd))
|
|
233
|
+
else: # SGD
|
|
234
|
+
for p in group["params"]:
|
|
235
|
+
lr = group["lr"]
|
|
236
|
+
if p.grad is None:
|
|
237
|
+
continue
|
|
238
|
+
grad = p.grad
|
|
239
|
+
if group["weight_decay"] != 0:
|
|
240
|
+
grad = grad.add(p, alpha=group["weight_decay"])
|
|
241
|
+
state = self.state[p]
|
|
242
|
+
if len(state) == 0:
|
|
243
|
+
state["momentum_buffer"] = torch.zeros_like(p)
|
|
244
|
+
state["momentum_buffer"].mul_(group["momentum"]).add_(grad)
|
|
245
|
+
update = (
|
|
246
|
+
grad.add(state["momentum_buffer"], alpha=group["momentum"])
|
|
247
|
+
if group["nesterov"]
|
|
248
|
+
else state["momentum_buffer"]
|
|
249
|
+
)
|
|
250
|
+
p.add_(update, alpha=-lr)
|
|
251
|
+
return loss
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class Muon(optim.Optimizer):
|
|
255
|
+
"""Muon optimizer for usage in non-distributed settings.
|
|
256
|
+
|
|
257
|
+
This optimizer implements the Muon algorithm, which combines momentum-based updates with orthogonalization via
|
|
258
|
+
Newton-Schulz iterations. It applies weight decay and learning rate scaling to parameter updates.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
params (iterable): Iterable of parameters to optimize or dicts defining parameter groups.
|
|
262
|
+
lr (float, optional): Learning rate. Default: 0.02.
|
|
263
|
+
weight_decay (float, optional): Weight decay (L2 penalty) coefficient. Default: 0.
|
|
264
|
+
momentum (float, optional): Momentum coefficient for exponential moving average. Default: 0.95.
|
|
265
|
+
|
|
266
|
+
Attributes:
|
|
267
|
+
param_groups (list): List of parameter groups with their optimization settings.
|
|
268
|
+
state (dict): Dictionary containing optimizer state for each parameter.
|
|
269
|
+
|
|
270
|
+
Examples:
|
|
271
|
+
>>> model = YourModel()
|
|
272
|
+
>>> optimizer = Muon(model.parameters(), lr=0.02, weight_decay=0.01, momentum=0.95)
|
|
273
|
+
>>> loss = model(data)
|
|
274
|
+
>>> loss.backward()
|
|
275
|
+
>>> optimizer.step()
|
|
276
|
+
|
|
277
|
+
Notes:
|
|
278
|
+
- Designed for non-distributed training environments.
|
|
279
|
+
- Uses Muon updates with orthogonalization for all parameters.
|
|
280
|
+
- Weight decay is applied multiplicatively before parameter update.
|
|
281
|
+
- Parameters with None gradients are assigned zero gradients for synchronization.
|
|
282
|
+
"""
|
|
283
|
+
|
|
284
|
+
def __init__(self, params, lr: float = 0.02, weight_decay: float = 0, momentum: float = 0.95):
|
|
285
|
+
"""Initialize Muon optimizer with orthogonalization-based updates.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
params: Iterable of parameters to optimize or dicts defining parameter groups.
|
|
289
|
+
lr (float): Learning rate.
|
|
290
|
+
weight_decay (float): Weight decay factor applied multiplicatively.
|
|
291
|
+
momentum (float): Momentum factor for gradient accumulation.
|
|
292
|
+
"""
|
|
293
|
+
defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum)
|
|
294
|
+
super().__init__(params, defaults)
|
|
295
|
+
|
|
296
|
+
@torch.no_grad()
|
|
297
|
+
def step(self, closure=None):
|
|
298
|
+
"""Perform a single optimization step.
|
|
299
|
+
|
|
300
|
+
Applies Muon updates to all parameters, incorporating momentum and orthogonalization.
|
|
301
|
+
Weight decay is applied multiplicatively before the parameter update.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
closure (Callable[[], torch.Tensor] | None, optional): A closure that reevaluates the model
|
|
305
|
+
and returns the loss. Default: None.
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
(torch.Tensor | None): The loss value if closure is provided, otherwise None.
|
|
309
|
+
|
|
310
|
+
Examples:
|
|
311
|
+
>>> optimizer = Muon(model.parameters())
|
|
312
|
+
>>> loss = model(inputs)
|
|
313
|
+
>>> loss.backward()
|
|
314
|
+
>>> optimizer.step()
|
|
315
|
+
|
|
316
|
+
Notes:
|
|
317
|
+
- Parameters with None gradients are assigned zero gradients for synchronization.
|
|
318
|
+
- Weight decay is applied as: p *= (1 - lr * weight_decay).
|
|
319
|
+
- Muon update uses Newton-Schulz orthogonalization and works best on 2D+ tensors.
|
|
320
|
+
"""
|
|
321
|
+
loss = None
|
|
322
|
+
if closure is not None:
|
|
323
|
+
with torch.enable_grad():
|
|
324
|
+
loss = closure()
|
|
325
|
+
|
|
326
|
+
for group in self.param_groups:
|
|
327
|
+
for p in group["params"]:
|
|
328
|
+
if p.grad is None:
|
|
329
|
+
# continue
|
|
330
|
+
p.grad = torch.zeros_like(p) # Force synchronization
|
|
331
|
+
state = self.state[p]
|
|
332
|
+
if len(state) == 0:
|
|
333
|
+
state["momentum_buffer"] = torch.zeros_like(p)
|
|
334
|
+
update = muon_update(p.grad, state["momentum_buffer"], beta=group["momentum"])
|
|
335
|
+
p.mul_(1 - group["lr"] * group["weight_decay"])
|
|
336
|
+
p.add_(update.reshape(p.shape), alpha=-group["lr"])
|
|
337
|
+
|
|
338
|
+
return loss
|
ultralytics/solutions/ai_gym.py
CHANGED
|
@@ -22,7 +22,7 @@ class AIGym(BaseSolution):
|
|
|
22
22
|
process: Process a frame to detect poses, calculate angles, and count repetitions.
|
|
23
23
|
|
|
24
24
|
Examples:
|
|
25
|
-
>>> gym = AIGym(model="
|
|
25
|
+
>>> gym = AIGym(model="yolo26n-pose.pt")
|
|
26
26
|
>>> image = cv2.imread("gym_scene.jpg")
|
|
27
27
|
>>> results = gym.process(image)
|
|
28
28
|
>>> processed_image = results.plot_im
|
|
@@ -35,9 +35,9 @@ class AIGym(BaseSolution):
|
|
|
35
35
|
|
|
36
36
|
Args:
|
|
37
37
|
**kwargs (Any): Keyword arguments passed to the parent class constructor including:
|
|
38
|
-
- model (str): Model name or path, defaults to "
|
|
38
|
+
- model (str): Model name or path, defaults to "yolo26n-pose.pt".
|
|
39
39
|
"""
|
|
40
|
-
kwargs["model"] = kwargs.get("model", "
|
|
40
|
+
kwargs["model"] = kwargs.get("model", "yolo26n-pose.pt")
|
|
41
41
|
super().__init__(**kwargs)
|
|
42
42
|
self.states = defaultdict(lambda: {"angle": 0, "count": 0, "stage": "-"}) # Dict for count, angle and stage
|
|
43
43
|
|
ultralytics/solutions/config.py
CHANGED
|
@@ -56,7 +56,7 @@ class SolutionConfig:
|
|
|
56
56
|
|
|
57
57
|
Examples:
|
|
58
58
|
>>> from ultralytics.solutions.config import SolutionConfig
|
|
59
|
-
>>> cfg = SolutionConfig(model="
|
|
59
|
+
>>> cfg = SolutionConfig(model="yolo26n.pt", region=[(0, 0), (100, 0), (100, 100), (0, 100)])
|
|
60
60
|
>>> cfg.update(show=False, conf=0.3)
|
|
61
61
|
>>> print(cfg.model)
|
|
62
62
|
"""
|
ultralytics/solutions/heatmap.py
CHANGED
|
@@ -29,7 +29,7 @@ class Heatmap(ObjectCounter):
|
|
|
29
29
|
|
|
30
30
|
Examples:
|
|
31
31
|
>>> from ultralytics.solutions import Heatmap
|
|
32
|
-
>>> heatmap = Heatmap(model="
|
|
32
|
+
>>> heatmap = Heatmap(model="yolo26n.pt", colormap=cv2.COLORMAP_JET)
|
|
33
33
|
>>> frame = cv2.imread("frame.jpg")
|
|
34
34
|
>>> processed_frame = heatmap.process(frame)
|
|
35
35
|
"""
|
|
@@ -39,9 +39,9 @@ class InstanceSegmentation(BaseSolution):
|
|
|
39
39
|
|
|
40
40
|
Args:
|
|
41
41
|
**kwargs (Any): Keyword arguments passed to the BaseSolution parent class including:
|
|
42
|
-
- model (str): Model name or path, defaults to "
|
|
42
|
+
- model (str): Model name or path, defaults to "yolo26n-seg.pt".
|
|
43
43
|
"""
|
|
44
|
-
kwargs["model"] = kwargs.get("model", "
|
|
44
|
+
kwargs["model"] = kwargs.get("model", "yolo26n-seg.pt")
|
|
45
45
|
super().__init__(**kwargs)
|
|
46
46
|
|
|
47
47
|
self.show_conf = self.CFG.get("show_conf", True)
|
|
@@ -195,7 +195,7 @@ class ParkingManagement(BaseSolution):
|
|
|
195
195
|
|
|
196
196
|
Examples:
|
|
197
197
|
>>> from ultralytics.solutions import ParkingManagement
|
|
198
|
-
>>> parking_manager = ParkingManagement(model="
|
|
198
|
+
>>> parking_manager = ParkingManagement(model="yolo26n.pt", json_file="parking_regions.json")
|
|
199
199
|
>>> print(f"Occupied spaces: {parking_manager.pr_info['Occupancy']}")
|
|
200
200
|
>>> print(f"Available spaces: {parking_manager.pr_info['Available']}")
|
|
201
201
|
"""
|
|
@@ -64,7 +64,7 @@ class BaseSolution:
|
|
|
64
64
|
process: Process method to be implemented by each Solution subclass.
|
|
65
65
|
|
|
66
66
|
Examples:
|
|
67
|
-
>>> solution = BaseSolution(model="
|
|
67
|
+
>>> solution = BaseSolution(model="yolo26n.pt", region=[(0, 0), (100, 0), (100, 100), (0, 100)])
|
|
68
68
|
>>> solution.initialize_region()
|
|
69
69
|
>>> image = cv2.imread("image.jpg")
|
|
70
70
|
>>> solution.extract_tracks(image)
|
|
@@ -106,7 +106,7 @@ class BaseSolution:
|
|
|
106
106
|
|
|
107
107
|
# Load Model and store additional information (classes, show_conf, show_label)
|
|
108
108
|
if self.CFG["model"] is None:
|
|
109
|
-
self.CFG["model"] = "
|
|
109
|
+
self.CFG["model"] = "yolo26n.pt"
|
|
110
110
|
self.model = YOLO(self.CFG["model"])
|
|
111
111
|
self.names = self.model.names
|
|
112
112
|
self.classes = self.CFG["classes"]
|
ultralytics/trackers/track.py
CHANGED
|
@@ -50,7 +50,7 @@ def on_predict_start(predictor: object, persist: bool = False) -> None:
|
|
|
50
50
|
and isinstance(predictor.model.model.model[-1], Detect)
|
|
51
51
|
and not predictor.model.model.model[-1].end2end
|
|
52
52
|
):
|
|
53
|
-
cfg.model = "
|
|
53
|
+
cfg.model = "yolo26n-cls.pt"
|
|
54
54
|
else:
|
|
55
55
|
# Register hook to extract input of Detect layer
|
|
56
56
|
def pre_hook(module, input):
|
ultralytics/utils/__init__.py
CHANGED
|
@@ -80,8 +80,8 @@ HELP_MSG = """
|
|
|
80
80
|
from ultralytics import YOLO
|
|
81
81
|
|
|
82
82
|
# Load a model
|
|
83
|
-
model = YOLO("
|
|
84
|
-
model = YOLO("
|
|
83
|
+
model = YOLO("yolo26n.yaml") # build a new model from scratch
|
|
84
|
+
model = YOLO("yolo26n.pt") # load a pretrained model (recommended for training)
|
|
85
85
|
|
|
86
86
|
# Use the model
|
|
87
87
|
results = model.train(data="coco8.yaml", epochs=3) # train the model
|
|
@@ -101,16 +101,16 @@ HELP_MSG = """
|
|
|
101
101
|
See all ARGS at https://docs.ultralytics.com/usage/cfg or with "yolo cfg"
|
|
102
102
|
|
|
103
103
|
- Train a detection model for 10 epochs with an initial learning_rate of 0.01
|
|
104
|
-
yolo detect train data=coco8.yaml model=
|
|
104
|
+
yolo detect train data=coco8.yaml model=yolo26n.pt epochs=10 lr0=0.01
|
|
105
105
|
|
|
106
106
|
- Predict a YouTube video using a pretrained segmentation model at image size 320:
|
|
107
|
-
yolo segment predict model=
|
|
107
|
+
yolo segment predict model=yolo26n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320
|
|
108
108
|
|
|
109
109
|
- Val a pretrained detection model at batch-size 1 and image size 640:
|
|
110
|
-
yolo detect val model=
|
|
110
|
+
yolo detect val model=yolo26n.pt data=coco8.yaml batch=1 imgsz=640
|
|
111
111
|
|
|
112
|
-
- Export a
|
|
113
|
-
yolo export model=
|
|
112
|
+
- Export a YOLO26n classification model to ONNX format at image size 224 by 128 (no TASK required)
|
|
113
|
+
yolo export model=yolo26n-cls.pt format=onnx imgsz=224,128
|
|
114
114
|
|
|
115
115
|
- Run special commands:
|
|
116
116
|
yolo help
|
|
@@ -161,7 +161,7 @@ class DataExportMixin:
|
|
|
161
161
|
tojson: Deprecated alias for `to_json()`.
|
|
162
162
|
|
|
163
163
|
Examples:
|
|
164
|
-
>>> model = YOLO("
|
|
164
|
+
>>> model = YOLO("yolo26n.pt")
|
|
165
165
|
>>> results = model("image.jpg")
|
|
166
166
|
>>> df = results.to_df()
|
|
167
167
|
>>> print(df)
|
ultralytics/utils/benchmarks.py
CHANGED
|
@@ -4,28 +4,28 @@ Benchmark YOLO model formats for speed and accuracy.
|
|
|
4
4
|
|
|
5
5
|
Usage:
|
|
6
6
|
from ultralytics.utils.benchmarks import ProfileModels, benchmark
|
|
7
|
-
ProfileModels(['
|
|
8
|
-
benchmark(model='
|
|
7
|
+
ProfileModels(['yolo26n.yaml', 'yolov8s.yaml']).run()
|
|
8
|
+
benchmark(model='yolo26n.pt', imgsz=160)
|
|
9
9
|
|
|
10
10
|
Format | `format=argument` | Model
|
|
11
11
|
--- | --- | ---
|
|
12
|
-
PyTorch | - |
|
|
13
|
-
TorchScript | `torchscript` |
|
|
14
|
-
ONNX | `onnx` |
|
|
15
|
-
OpenVINO | `openvino` |
|
|
16
|
-
TensorRT | `engine` |
|
|
17
|
-
CoreML | `coreml` |
|
|
18
|
-
TensorFlow SavedModel | `saved_model` |
|
|
19
|
-
TensorFlow GraphDef | `pb` |
|
|
20
|
-
TensorFlow Lite | `tflite` |
|
|
21
|
-
TensorFlow Edge TPU | `edgetpu` |
|
|
22
|
-
TensorFlow.js | `tfjs` |
|
|
23
|
-
PaddlePaddle | `paddle` |
|
|
24
|
-
MNN | `mnn` |
|
|
25
|
-
NCNN | `ncnn` |
|
|
26
|
-
IMX | `imx` |
|
|
27
|
-
RKNN | `rknn` |
|
|
28
|
-
ExecuTorch | `executorch` |
|
|
12
|
+
PyTorch | - | yolo26n.pt
|
|
13
|
+
TorchScript | `torchscript` | yolo26n.torchscript
|
|
14
|
+
ONNX | `onnx` | yolo26n.onnx
|
|
15
|
+
OpenVINO | `openvino` | yolo26n_openvino_model/
|
|
16
|
+
TensorRT | `engine` | yolo26n.engine
|
|
17
|
+
CoreML | `coreml` | yolo26n.mlpackage
|
|
18
|
+
TensorFlow SavedModel | `saved_model` | yolo26n_saved_model/
|
|
19
|
+
TensorFlow GraphDef | `pb` | yolo26n.pb
|
|
20
|
+
TensorFlow Lite | `tflite` | yolo26n.tflite
|
|
21
|
+
TensorFlow Edge TPU | `edgetpu` | yolo26n_edgetpu.tflite
|
|
22
|
+
TensorFlow.js | `tfjs` | yolo26n_web_model/
|
|
23
|
+
PaddlePaddle | `paddle` | yolo26n_paddle_model/
|
|
24
|
+
MNN | `mnn` | yolo26n.mnn
|
|
25
|
+
NCNN | `ncnn` | yolo26n_ncnn_model/
|
|
26
|
+
IMX | `imx` | yolo26n_imx_model/
|
|
27
|
+
RKNN | `rknn` | yolo26n_rknn_model/
|
|
28
|
+
ExecuTorch | `executorch` | yolo26n_executorch_model/
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
31
|
from __future__ import annotations
|
|
@@ -52,7 +52,7 @@ from ultralytics.utils.torch_utils import get_cpu_info, select_device
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
def benchmark(
|
|
55
|
-
model=WEIGHTS_DIR / "
|
|
55
|
+
model=WEIGHTS_DIR / "yolo26n.pt",
|
|
56
56
|
data=None,
|
|
57
57
|
imgsz=160,
|
|
58
58
|
half=False,
|
|
@@ -84,7 +84,7 @@ def benchmark(
|
|
|
84
84
|
Examples:
|
|
85
85
|
Benchmark a YOLO model with default settings:
|
|
86
86
|
>>> from ultralytics.utils.benchmarks import benchmark
|
|
87
|
-
>>> benchmark(model="
|
|
87
|
+
>>> benchmark(model="yolo26n.pt", imgsz=640)
|
|
88
88
|
"""
|
|
89
89
|
imgsz = check_imgsz(imgsz)
|
|
90
90
|
assert imgsz[0] == imgsz[1] if isinstance(imgsz, list) else True, "benchmark() only supports square imgsz."
|
|
@@ -396,7 +396,7 @@ class ProfileModels:
|
|
|
396
396
|
Examples:
|
|
397
397
|
Profile models and print results
|
|
398
398
|
>>> from ultralytics.utils.benchmarks import ProfileModels
|
|
399
|
-
>>> profiler = ProfileModels(["
|
|
399
|
+
>>> profiler = ProfileModels(["yolo26n.yaml", "yolov8s.yaml"], imgsz=640)
|
|
400
400
|
>>> profiler.run()
|
|
401
401
|
"""
|
|
402
402
|
|
|
@@ -444,7 +444,7 @@ class ProfileModels:
|
|
|
444
444
|
Examples:
|
|
445
445
|
Profile models and print results
|
|
446
446
|
>>> from ultralytics.utils.benchmarks import ProfileModels
|
|
447
|
-
>>> profiler = ProfileModels(["
|
|
447
|
+
>>> profiler = ProfileModels(["yolo26n.yaml", "yolo11s.yaml"])
|
|
448
448
|
>>> results = profiler.run()
|
|
449
449
|
"""
|
|
450
450
|
files = self.get_files()
|
|
@@ -13,6 +13,10 @@ from ultralytics.utils import ENVIRONMENT, GIT, LOGGER, PYTHON_VERSION, RANK, SE
|
|
|
13
13
|
|
|
14
14
|
PREFIX = colorstr("Platform: ")
|
|
15
15
|
|
|
16
|
+
# Configurable platform URL for debugging (e.g. ULTRALYTICS_PLATFORM_URL=http://localhost:3000)
|
|
17
|
+
PLATFORM_URL = os.getenv("ULTRALYTICS_PLATFORM_URL", "https://platform.ultralytics.com").rstrip("/")
|
|
18
|
+
PLATFORM_API_URL = f"{PLATFORM_URL}/api/webhooks"
|
|
19
|
+
|
|
16
20
|
|
|
17
21
|
def slugify(text):
|
|
18
22
|
"""Convert text to URL-safe slug (e.g., 'My Project 1' -> 'my-project-1')."""
|
|
@@ -66,9 +70,9 @@ def resolve_platform_uri(uri, hard=True):
|
|
|
66
70
|
|
|
67
71
|
api_key = os.getenv("ULTRALYTICS_API_KEY") or SETTINGS.get("api_key")
|
|
68
72
|
if not api_key:
|
|
69
|
-
raise ValueError(f"ULTRALYTICS_API_KEY required for '{uri}'. Get key at
|
|
73
|
+
raise ValueError(f"ULTRALYTICS_API_KEY required for '{uri}'. Get key at {PLATFORM_URL}/settings")
|
|
70
74
|
|
|
71
|
-
base =
|
|
75
|
+
base = PLATFORM_API_URL
|
|
72
76
|
headers = {"Authorization": f"Bearer {api_key}"}
|
|
73
77
|
|
|
74
78
|
# ul://username/datasets/slug
|
|
@@ -150,7 +154,7 @@ def _send(event, data, project, name, model_id=None):
|
|
|
150
154
|
if model_id:
|
|
151
155
|
payload["modelId"] = model_id
|
|
152
156
|
r = requests.post(
|
|
153
|
-
"
|
|
157
|
+
f"{PLATFORM_API_URL}/training/metrics",
|
|
154
158
|
json=payload,
|
|
155
159
|
headers={"Authorization": f"Bearer {_api_key}"},
|
|
156
160
|
timeout=10,
|
|
@@ -176,7 +180,7 @@ def _upload_model(model_path, project, name):
|
|
|
176
180
|
|
|
177
181
|
# Get signed upload URL
|
|
178
182
|
response = requests.post(
|
|
179
|
-
"
|
|
183
|
+
f"{PLATFORM_API_URL}/models/upload",
|
|
180
184
|
json={"project": project, "name": name, "filename": model_path.name},
|
|
181
185
|
headers={"Authorization": f"Bearer {_api_key}"},
|
|
182
186
|
timeout=10,
|
|
@@ -193,7 +197,7 @@ def _upload_model(model_path, project, name):
|
|
|
193
197
|
timeout=600, # 10 min timeout for large models
|
|
194
198
|
).raise_for_status()
|
|
195
199
|
|
|
196
|
-
# url = f"
|
|
200
|
+
# url = f"{PLATFORM_URL}/{project}/{name}"
|
|
197
201
|
# LOGGER.info(f"{PREFIX}Model uploaded to {url}")
|
|
198
202
|
return data.get("gcsPath")
|
|
199
203
|
|
|
@@ -276,7 +280,7 @@ def on_pretrain_routine_start(trainer):
|
|
|
276
280
|
trainer._platform_last_upload = time()
|
|
277
281
|
|
|
278
282
|
project, name = _get_project_name(trainer)
|
|
279
|
-
url = f"
|
|
283
|
+
url = f"{PLATFORM_URL}/{project}/{name}"
|
|
280
284
|
LOGGER.info(f"{PREFIX}Streaming to {url}")
|
|
281
285
|
|
|
282
286
|
# Create callback to send console output to Platform
|
|
@@ -437,7 +441,7 @@ def on_train_end(trainer):
|
|
|
437
441
|
name,
|
|
438
442
|
getattr(trainer, "_platform_model_id", None),
|
|
439
443
|
)
|
|
440
|
-
url = f"
|
|
444
|
+
url = f"{PLATFORM_URL}/{project}/{name}"
|
|
441
445
|
LOGGER.info(f"{PREFIX}View results at {url}")
|
|
442
446
|
|
|
443
447
|
|