dgenerate-ultralytics-headless 8.3.253__py3-none-any.whl → 8.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.1.dist-info}/METADATA +31 -39
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.1.dist-info}/RECORD +61 -50
- tests/__init__.py +2 -2
- tests/conftest.py +1 -1
- tests/test_cuda.py +8 -2
- tests/test_engine.py +8 -8
- tests/test_exports.py +13 -4
- tests/test_integrations.py +9 -9
- tests/test_python.py +14 -14
- tests/test_solutions.py +3 -3
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +6 -6
- ultralytics/cfg/default.yaml +3 -1
- ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
- ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
- ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
- ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
- ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
- ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
- ultralytics/cfg/models/26/yolo26.yaml +52 -0
- ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
- ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
- ultralytics/data/augment.py +7 -0
- ultralytics/data/dataset.py +1 -1
- ultralytics/engine/exporter.py +9 -4
- ultralytics/engine/model.py +1 -1
- ultralytics/engine/trainer.py +40 -15
- ultralytics/engine/tuner.py +15 -7
- ultralytics/models/fastsam/predict.py +1 -1
- ultralytics/models/yolo/detect/train.py +3 -2
- ultralytics/models/yolo/detect/val.py +6 -0
- ultralytics/models/yolo/model.py +1 -1
- ultralytics/models/yolo/obb/predict.py +1 -1
- ultralytics/models/yolo/obb/train.py +1 -1
- ultralytics/models/yolo/pose/train.py +1 -1
- ultralytics/models/yolo/segment/predict.py +1 -1
- ultralytics/models/yolo/segment/train.py +1 -1
- ultralytics/models/yolo/segment/val.py +3 -1
- ultralytics/models/yolo/yoloe/train.py +6 -1
- ultralytics/models/yolo/yoloe/train_seg.py +6 -1
- ultralytics/nn/autobackend.py +3 -3
- ultralytics/nn/modules/__init__.py +8 -0
- ultralytics/nn/modules/block.py +128 -8
- ultralytics/nn/modules/head.py +789 -204
- ultralytics/nn/tasks.py +74 -29
- ultralytics/nn/text_model.py +5 -2
- ultralytics/optim/__init__.py +5 -0
- ultralytics/optim/muon.py +338 -0
- ultralytics/utils/callbacks/platform.py +9 -7
- ultralytics/utils/downloads.py +3 -1
- ultralytics/utils/export/engine.py +19 -10
- ultralytics/utils/export/imx.py +22 -11
- ultralytics/utils/export/tensorflow.py +21 -21
- ultralytics/utils/loss.py +587 -203
- ultralytics/utils/metrics.py +1 -0
- ultralytics/utils/ops.py +11 -2
- ultralytics/utils/tal.py +98 -19
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.1.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.1.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.1.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
|
+
|
|
3
|
+
# Ultralytics YOLOE-26 open-vocabulary instance segmentation model with P3/8 - P5/32 outputs
|
|
4
|
+
# Model docs: https://docs.ultralytics.com/models/yolo26
|
|
5
|
+
# Task docs: https://docs.ultralytics.com/tasks/segment
|
|
6
|
+
|
|
7
|
+
# Parameters
|
|
8
|
+
nc: 80 # number of classes
|
|
9
|
+
end2end: True # whether to use end-to-end mode
|
|
10
|
+
reg_max: 1 # DFL bins
|
|
11
|
+
text_model: mobileclip2:b
|
|
12
|
+
scales: # model compound scaling constants, i.e. 'model=yoloe-26n-seg.yaml' will call yoloe-26-seg.yaml with scale 'n'
|
|
13
|
+
# [depth, width, max_channels]
|
|
14
|
+
n: [0.50, 0.25, 1024] # summary: 347 layers, 5,615,540 parameters, 5,615,540 gradients, 11.7 GFLOPs
|
|
15
|
+
s: [0.50, 0.50, 1024] # summary: 347 layers, 15,272,852 parameters, 15,272,852 gradients, 39.3 GFLOPs
|
|
16
|
+
m: [0.50, 1.00, 512] # summary: 367 layers, 34,922,132 parameters, 34,922,132 gradients, 136.3 GFLOPs
|
|
17
|
+
l: [1.00, 1.00, 512] # summary: 479 layers, 39,325,588 parameters, 39,325,588 gradients, 154.7 GFLOPs
|
|
18
|
+
x: [1.00, 1.50, 512] # summary: 479 layers, 85,397,684 parameters, 85,397,684 gradients, 343.3 GFLOPs
|
|
19
|
+
|
|
20
|
+
# YOLOE26n backbone
|
|
21
|
+
backbone:
|
|
22
|
+
# [from, repeats, module, args]
|
|
23
|
+
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
|
24
|
+
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
|
25
|
+
- [-1, 2, C3k2, [256, False, 0.25]]
|
|
26
|
+
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
|
27
|
+
- [-1, 2, C3k2, [512, False, 0.25]]
|
|
28
|
+
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
|
29
|
+
- [-1, 2, C3k2, [512, True]]
|
|
30
|
+
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
|
31
|
+
- [-1, 2, C3k2, [1024, True]]
|
|
32
|
+
- [-1, 1, SPPF, [1024, 5, 3, True]] # 9
|
|
33
|
+
- [-1, 2, C2PSA, [1024]] # 10
|
|
34
|
+
|
|
35
|
+
# YOLOE26n head
|
|
36
|
+
head:
|
|
37
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
38
|
+
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
|
39
|
+
- [-1, 2, C3k2, [512, True]] # 13
|
|
40
|
+
|
|
41
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
42
|
+
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
|
43
|
+
- [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
|
|
44
|
+
|
|
45
|
+
- [-1, 1, Conv, [256, 3, 2]]
|
|
46
|
+
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
|
47
|
+
- [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
|
|
48
|
+
|
|
49
|
+
- [-1, 1, Conv, [512, 3, 2]]
|
|
50
|
+
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
|
51
|
+
- [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
|
|
52
|
+
|
|
53
|
+
- [[16, 19, 22], 1, YOLOESegment26, [nc, 32, 256, 512, True]] # YOLOESegment26(P3, P4, P5)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
|
+
|
|
3
|
+
# Ultralytics YOLOE-26 open-vocabulary object detection model with P3/8 - P5/32 outputs
|
|
4
|
+
# Model docs: https://docs.ultralytics.com/models/yolo26
|
|
5
|
+
# Task docs: https://docs.ultralytics.com/tasks/detect
|
|
6
|
+
|
|
7
|
+
# Parameters
|
|
8
|
+
nc: 80 # number of classes
|
|
9
|
+
end2end: True # whether to use end-to-end mode
|
|
10
|
+
reg_max: 1 # DFL bins
|
|
11
|
+
text_model: mobileclip2:b
|
|
12
|
+
scales: # model compound scaling constants, i.e. 'model=yoloe-26n.yaml' will call yoloe-26.yaml with scale 'n'
|
|
13
|
+
# [depth, width, max_channels]
|
|
14
|
+
n: [0.50, 0.25, 1024] # summary: 298 layers, 5,061,540 parameters, 5,061,540 gradients, 7.3 GFLOPs
|
|
15
|
+
s: [0.50, 0.50, 1024] # summary: 298 layers, 13,776,836 parameters, 13,776,836 gradients, 24.8 GFLOPs
|
|
16
|
+
m: [0.50, 1.00, 512] # summary: 318 layers, 29,706,308 parameters, 29,706,308 gradients, 79.2 GFLOPs
|
|
17
|
+
l: [1.00, 1.00, 512] # summary: 430 layers, 34,109,764 parameters, 34,109,764 gradients, 97.6 GFLOPs
|
|
18
|
+
x: [1.00, 1.50, 512] # summary: 430 layers, 73,697,252 parameters, 73,697,252 gradients, 215.2 GFLOPs
|
|
19
|
+
|
|
20
|
+
# YOLOE26n backbone
|
|
21
|
+
backbone:
|
|
22
|
+
# [from, repeats, module, args]
|
|
23
|
+
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
|
|
24
|
+
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
|
|
25
|
+
- [-1, 2, C3k2, [256, False, 0.25]]
|
|
26
|
+
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
|
|
27
|
+
- [-1, 2, C3k2, [512, False, 0.25]]
|
|
28
|
+
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
|
|
29
|
+
- [-1, 2, C3k2, [512, True]]
|
|
30
|
+
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
|
|
31
|
+
- [-1, 2, C3k2, [1024, True]]
|
|
32
|
+
- [-1, 1, SPPF, [1024, 5, 3, True]] # 9
|
|
33
|
+
- [-1, 2, C2PSA, [1024]] # 10
|
|
34
|
+
|
|
35
|
+
# YOLOE26n head
|
|
36
|
+
head:
|
|
37
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
38
|
+
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
|
|
39
|
+
- [-1, 2, C3k2, [512, True]] # 13
|
|
40
|
+
|
|
41
|
+
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
|
|
42
|
+
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
|
|
43
|
+
- [-1, 2, C3k2, [256, True]] # 16 (P3/8-small)
|
|
44
|
+
|
|
45
|
+
- [-1, 1, Conv, [256, 3, 2]]
|
|
46
|
+
- [[-1, 13], 1, Concat, [1]] # cat head P4
|
|
47
|
+
- [-1, 2, C3k2, [512, True]] # 19 (P4/16-medium)
|
|
48
|
+
|
|
49
|
+
- [-1, 1, Conv, [512, 3, 2]]
|
|
50
|
+
- [[-1, 10], 1, Concat, [1]] # cat head P5
|
|
51
|
+
- [-1, 1, C3k2, [1024, True, 0.5, True]] # 22 (P5/32-large)
|
|
52
|
+
|
|
53
|
+
- [[16, 19, 22], 1, YOLOEDetect, [nc, 512, True]] # YOLOEDetect(P3, P4, P5)
|
ultralytics/data/augment.py
CHANGED
|
@@ -2062,11 +2062,18 @@ class Format:
|
|
|
2062
2062
|
if nl:
|
|
2063
2063
|
masks, instances, cls = self._format_segments(instances, cls, w, h)
|
|
2064
2064
|
masks = torch.from_numpy(masks)
|
|
2065
|
+
cls_tensor = torch.from_numpy(cls.squeeze(1))
|
|
2066
|
+
if self.mask_overlap:
|
|
2067
|
+
sem_masks = cls_tensor[masks[0].long() - 1] # (H, W) from (1, H, W) instance indices
|
|
2068
|
+
else:
|
|
2069
|
+
sem_masks = (masks * cls_tensor[:, None, None]).max(0).values # (H, W) from (N, H, W) binary
|
|
2065
2070
|
else:
|
|
2066
2071
|
masks = torch.zeros(
|
|
2067
2072
|
1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio
|
|
2068
2073
|
)
|
|
2074
|
+
sem_masks = torch.zeros(img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio)
|
|
2069
2075
|
labels["masks"] = masks
|
|
2076
|
+
labels["sem_masks"] = sem_masks.float()
|
|
2070
2077
|
labels["img"] = self._format_img(img)
|
|
2071
2078
|
labels["cls"] = torch.from_numpy(cls) if nl else torch.zeros(nl, 1)
|
|
2072
2079
|
labels["bboxes"] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
|
ultralytics/data/dataset.py
CHANGED
|
@@ -294,7 +294,7 @@ class YOLODataset(BaseDataset):
|
|
|
294
294
|
values = list(zip(*[list(b.values()) for b in batch]))
|
|
295
295
|
for i, k in enumerate(keys):
|
|
296
296
|
value = values[i]
|
|
297
|
-
if k in {"img", "text_feats"}:
|
|
297
|
+
if k in {"img", "text_feats", "sem_masks"}:
|
|
298
298
|
value = torch.stack(value, 0)
|
|
299
299
|
elif k == "visuals":
|
|
300
300
|
value = torch.nn.utils.rnn.pad_sequence(value, batch_first=True)
|
ultralytics/engine/exporter.py
CHANGED
|
@@ -463,6 +463,9 @@ class Exporter:
|
|
|
463
463
|
)
|
|
464
464
|
if tfjs and (ARM64 and LINUX):
|
|
465
465
|
raise SystemError("TF.js exports are not currently supported on ARM64 Linux")
|
|
466
|
+
if ncnn and hasattr(model.model[-1], "one2one_cv2"):
|
|
467
|
+
del model.model[-1].one2one_cv2 # Disable end2end branch for NCNN export as it does not support topk
|
|
468
|
+
LOGGER.warning("NCNN export does not support end2end models, disabling end2end branch.")
|
|
466
469
|
# Recommend OpenVINO if export and Intel CPU
|
|
467
470
|
if SETTINGS.get("openvino_msg"):
|
|
468
471
|
if is_intel():
|
|
@@ -503,7 +506,9 @@ class Exporter:
|
|
|
503
506
|
m.dynamic = self.args.dynamic
|
|
504
507
|
m.export = True
|
|
505
508
|
m.format = self.args.format
|
|
506
|
-
|
|
509
|
+
# Clamp max_det to anchor count for small image sizes (required for TensorRT compatibility)
|
|
510
|
+
anchors = sum(int(self.imgsz[0] / s) * int(self.imgsz[1] / s) for s in model.stride.tolist())
|
|
511
|
+
m.max_det = min(self.args.max_det, anchors)
|
|
507
512
|
m.xyxy = self.args.nms and not coreml
|
|
508
513
|
m.shape = None # reset cached shape for new export input size
|
|
509
514
|
if hasattr(model, "pe") and hasattr(m, "fuse"): # for YOLOE models
|
|
@@ -551,6 +556,8 @@ class Exporter:
|
|
|
551
556
|
self.metadata["kpt_shape"] = model.model[-1].kpt_shape
|
|
552
557
|
if hasattr(model, "kpt_names"):
|
|
553
558
|
self.metadata["kpt_names"] = model.kpt_names
|
|
559
|
+
if getattr(model.model[-1], "end2end", False):
|
|
560
|
+
self.metadata["end2end"] = True
|
|
554
561
|
|
|
555
562
|
LOGGER.info(
|
|
556
563
|
f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
|
|
@@ -787,7 +794,6 @@ class Exporter:
|
|
|
787
794
|
f".*{head_module_name}/.*/Sub*",
|
|
788
795
|
f".*{head_module_name}/.*/Mul*",
|
|
789
796
|
f".*{head_module_name}/.*/Div*",
|
|
790
|
-
f".*{head_module_name}\\.dfl.*",
|
|
791
797
|
],
|
|
792
798
|
types=["Sigmoid"],
|
|
793
799
|
)
|
|
@@ -860,8 +866,7 @@ class Exporter:
|
|
|
860
866
|
@try_export
|
|
861
867
|
def export_ncnn(self, prefix=colorstr("NCNN:")):
|
|
862
868
|
"""Export YOLO model to NCNN format using PNNX https://github.com/pnnx/pnnx."""
|
|
863
|
-
#
|
|
864
|
-
check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn", cmds="--no-deps")
|
|
869
|
+
check_requirements("ncnn", cmds="--no-deps") # no deps to avoid installing opencv-python
|
|
865
870
|
check_requirements("pnnx")
|
|
866
871
|
import ncnn
|
|
867
872
|
import pnnx
|
ultralytics/engine/model.py
CHANGED
|
@@ -825,7 +825,7 @@ class Model(torch.nn.Module):
|
|
|
825
825
|
|
|
826
826
|
custom = {} # method defaults
|
|
827
827
|
args = {**self.overrides, **custom, **kwargs, "mode": "train"} # highest priority args on the right
|
|
828
|
-
return Tuner(args=args, _callbacks=self.callbacks)(
|
|
828
|
+
return Tuner(args=args, _callbacks=self.callbacks)(iterations=iterations)
|
|
829
829
|
|
|
830
830
|
def _apply(self, fn) -> Model:
|
|
831
831
|
"""Apply a function to model tensors that are not parameters or registered buffers.
|
ultralytics/engine/trainer.py
CHANGED
|
@@ -27,6 +27,7 @@ from ultralytics import __version__
|
|
|
27
27
|
from ultralytics.cfg import get_cfg, get_save_dir
|
|
28
28
|
from ultralytics.data.utils import check_cls_dataset, check_det_dataset
|
|
29
29
|
from ultralytics.nn.tasks import load_checkpoint
|
|
30
|
+
from ultralytics.optim import MuSGD
|
|
30
31
|
from ultralytics.utils import (
|
|
31
32
|
DEFAULT_CFG,
|
|
32
33
|
GIT,
|
|
@@ -464,6 +465,9 @@ class BaseTrainer:
|
|
|
464
465
|
|
|
465
466
|
self.run_callbacks("on_train_batch_end")
|
|
466
467
|
|
|
468
|
+
if hasattr(unwrap_model(self.model).criterion, "update"):
|
|
469
|
+
unwrap_model(self.model).criterion.update()
|
|
470
|
+
|
|
467
471
|
self.lr = {f"lr/pg{ir}": x["lr"] for ir, x in enumerate(self.optimizer.param_groups)} # for loggers
|
|
468
472
|
|
|
469
473
|
self.run_callbacks("on_train_epoch_end")
|
|
@@ -930,7 +934,7 @@ class BaseTrainer:
|
|
|
930
934
|
Returns:
|
|
931
935
|
(torch.optim.Optimizer): The constructed optimizer.
|
|
932
936
|
"""
|
|
933
|
-
g = [
|
|
937
|
+
g = [{}, {}, {}, {}] # optimizer parameter groups
|
|
934
938
|
bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d()
|
|
935
939
|
if name == "auto":
|
|
936
940
|
LOGGER.info(
|
|
@@ -940,38 +944,59 @@ class BaseTrainer:
|
|
|
940
944
|
)
|
|
941
945
|
nc = self.data.get("nc", 10) # number of classes
|
|
942
946
|
lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places
|
|
943
|
-
name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("
|
|
947
|
+
name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("MuSGD", lr_fit, 0.9)
|
|
944
948
|
self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam
|
|
945
949
|
|
|
946
|
-
|
|
950
|
+
use_muon = name == "MuSGD"
|
|
951
|
+
for module_name, module in unwrap_model(model).named_modules():
|
|
947
952
|
for param_name, param in module.named_parameters(recurse=False):
|
|
948
953
|
fullname = f"{module_name}.{param_name}" if module_name else param_name
|
|
949
|
-
if
|
|
950
|
-
g[
|
|
954
|
+
if param.ndim >= 2 and use_muon:
|
|
955
|
+
g[3][fullname] = param # muon params
|
|
956
|
+
elif "bias" in fullname: # bias (no decay)
|
|
957
|
+
g[2][fullname] = param
|
|
951
958
|
elif isinstance(module, bn) or "logit_scale" in fullname: # weight (no decay)
|
|
952
959
|
# ContrastiveHead and BNContrastiveHead included here with 'logit_scale'
|
|
953
|
-
g[1]
|
|
960
|
+
g[1][fullname] = param
|
|
954
961
|
else: # weight (with decay)
|
|
955
|
-
g[0]
|
|
962
|
+
g[0][fullname] = param
|
|
963
|
+
if not use_muon:
|
|
964
|
+
g = [x.values() for x in g[:3]] # convert to list of params
|
|
956
965
|
|
|
957
|
-
optimizers = {"Adam", "Adamax", "AdamW", "NAdam", "RAdam", "RMSProp", "SGD", "auto"}
|
|
966
|
+
optimizers = {"Adam", "Adamax", "AdamW", "NAdam", "RAdam", "RMSProp", "SGD", "MuSGD", "auto"}
|
|
958
967
|
name = {x.lower(): x for x in optimizers}.get(name.lower())
|
|
959
968
|
if name in {"Adam", "Adamax", "AdamW", "NAdam", "RAdam"}:
|
|
960
|
-
|
|
969
|
+
optim_args = dict(lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
|
|
961
970
|
elif name == "RMSProp":
|
|
962
|
-
|
|
963
|
-
elif name == "SGD":
|
|
964
|
-
|
|
971
|
+
optim_args = dict(lr=lr, momentum=momentum)
|
|
972
|
+
elif name == "SGD" or name == "MuSGD":
|
|
973
|
+
optim_args = dict(lr=lr, momentum=momentum, nesterov=True)
|
|
965
974
|
else:
|
|
966
975
|
raise NotImplementedError(
|
|
967
976
|
f"Optimizer '{name}' not found in list of available optimizers {optimizers}. "
|
|
968
977
|
"Request support for addition optimizers at https://github.com/ultralytics/ultralytics."
|
|
969
978
|
)
|
|
970
979
|
|
|
971
|
-
|
|
972
|
-
|
|
980
|
+
g[2] = {"params": g[2], **optim_args}
|
|
981
|
+
g[0] = {"params": g[0], **optim_args, "weight_decay": decay}
|
|
982
|
+
g[1] = {"params": g[1], **optim_args, "weight_decay": 0.0}
|
|
983
|
+
if name == "MuSGD":
|
|
984
|
+
g[3] = {"params": g[3], **optim_args, "weight_decay": decay, "use_muon": True}
|
|
985
|
+
import re
|
|
986
|
+
|
|
987
|
+
# higher lr for certain parameters in MuSGD
|
|
988
|
+
pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg|flow_model")
|
|
989
|
+
g_ = [] # new param groups
|
|
990
|
+
for x in g:
|
|
991
|
+
p = x.pop("params")
|
|
992
|
+
p1 = [v for k, v in p.items() if pattern.search(k)]
|
|
993
|
+
p2 = [v for k, v in p.items() if not pattern.search(k)]
|
|
994
|
+
g_.extend([{"params": p1, **x, "lr": lr * 3}, {"params": p2, **x}])
|
|
995
|
+
g = g_
|
|
996
|
+
optimizer = getattr(optim, name, MuSGD)(params=g)
|
|
997
|
+
|
|
973
998
|
LOGGER.info(
|
|
974
999
|
f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
|
|
975
|
-
f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)"
|
|
1000
|
+
f"{len(g[1]['params'])} weight(decay=0.0), {len(g[0]['params']) if len(g[0]) else len(g[3]['params'])} weight(decay={decay}), {len(g[2]['params'])} bias(decay=0.0)"
|
|
976
1001
|
)
|
|
977
1002
|
return optimizer
|
ultralytics/engine/tuner.py
CHANGED
|
@@ -90,15 +90,15 @@ class Tuner:
|
|
|
90
90
|
"""
|
|
91
91
|
self.space = args.pop("space", None) or { # key: (min, max, gain(optional))
|
|
92
92
|
# 'optimizer': tune.choice(['SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp']),
|
|
93
|
-
"lr0": (1e-5, 1e-
|
|
94
|
-
"lrf": (0.
|
|
93
|
+
"lr0": (1e-5, 1e-2), # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
|
|
94
|
+
"lrf": (0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
|
|
95
95
|
"momentum": (0.7, 0.98, 0.3), # SGD momentum/Adam beta1
|
|
96
96
|
"weight_decay": (0.0, 0.001), # optimizer weight decay 5e-4
|
|
97
97
|
"warmup_epochs": (0.0, 5.0), # warmup epochs (fractions ok)
|
|
98
98
|
"warmup_momentum": (0.0, 0.95), # warmup initial momentum
|
|
99
99
|
"box": (1.0, 20.0), # box loss gain
|
|
100
100
|
"cls": (0.1, 4.0), # cls loss gain (scale with pixels)
|
|
101
|
-
"dfl": (0.4,
|
|
101
|
+
"dfl": (0.4, 12.0), # dfl loss gain
|
|
102
102
|
"hsv_h": (0.0, 0.1), # image HSV-Hue augmentation (fraction)
|
|
103
103
|
"hsv_s": (0.0, 0.9), # image HSV-Saturation augmentation (fraction)
|
|
104
104
|
"hsv_v": (0.0, 0.9), # image HSV-Value augmentation (fraction)
|
|
@@ -254,7 +254,7 @@ class Tuner:
|
|
|
254
254
|
f.write(headers)
|
|
255
255
|
for result in all_results:
|
|
256
256
|
fitness = result["fitness"]
|
|
257
|
-
hyp_values = [result["hyperparameters"]
|
|
257
|
+
hyp_values = [result["hyperparameters"].get(k, self.args.get(k)) for k in self.space.keys()]
|
|
258
258
|
log_row = [round(fitness, 5), *hyp_values]
|
|
259
259
|
f.write(",".join(map(str, log_row)) + "\n")
|
|
260
260
|
|
|
@@ -273,6 +273,8 @@ class Tuner:
|
|
|
273
273
|
parents_mat = np.stack([x[i][1:] for i in idxs], 0) # (k, ng) strip fitness
|
|
274
274
|
lo, hi = parents_mat.min(0), parents_mat.max(0)
|
|
275
275
|
span = hi - lo
|
|
276
|
+
# given a small value when span is zero to avoid no mutation
|
|
277
|
+
span = np.where(span == 0, np.random.uniform(0.01, 0.1, span.shape), span)
|
|
276
278
|
return np.random.uniform(lo - alpha * span, hi + alpha * span)
|
|
277
279
|
|
|
278
280
|
def _mutate(
|
|
@@ -297,7 +299,12 @@ class Tuner:
|
|
|
297
299
|
if self.mongodb:
|
|
298
300
|
if results := self._get_mongodb_results(n):
|
|
299
301
|
# MongoDB already sorted by fitness DESC, so results[0] is best
|
|
300
|
-
x = np.array(
|
|
302
|
+
x = np.array(
|
|
303
|
+
[
|
|
304
|
+
[r["fitness"]] + [r["hyperparameters"].get(k, self.args.get(k)) for k in self.space.keys()]
|
|
305
|
+
for r in results
|
|
306
|
+
]
|
|
307
|
+
)
|
|
301
308
|
elif self.collection.name in self.collection.database.list_collection_names(): # Tuner started elsewhere
|
|
302
309
|
x = np.array([[0.0] + [getattr(self.args, k) for k in self.space.keys()]])
|
|
303
310
|
|
|
@@ -335,10 +342,12 @@ class Tuner:
|
|
|
335
342
|
# Update types
|
|
336
343
|
if "close_mosaic" in hyp:
|
|
337
344
|
hyp["close_mosaic"] = round(hyp["close_mosaic"])
|
|
345
|
+
if "epochs" in hyp:
|
|
346
|
+
hyp["epochs"] = round(hyp["epochs"])
|
|
338
347
|
|
|
339
348
|
return hyp
|
|
340
349
|
|
|
341
|
-
def __call__(self,
|
|
350
|
+
def __call__(self, iterations: int = 10, cleanup: bool = True):
|
|
342
351
|
"""Execute the hyperparameter evolution process when the Tuner instance is called.
|
|
343
352
|
|
|
344
353
|
This method iterates through the specified number of iterations, performing the following steps:
|
|
@@ -349,7 +358,6 @@ class Tuner:
|
|
|
349
358
|
5. Track the best performing configuration across all iterations
|
|
350
359
|
|
|
351
360
|
Args:
|
|
352
|
-
model (Model | None, optional): A pre-initialized YOLO model to be used for training.
|
|
353
361
|
iterations (int): The number of generations to run the evolution for.
|
|
354
362
|
cleanup (bool): Whether to delete iteration weights to reduce storage space during tuning.
|
|
355
363
|
"""
|
|
@@ -63,7 +63,7 @@ class FastSAMPredictor(SegmentationPredictor):
|
|
|
63
63
|
results = super().postprocess(preds, img, orig_imgs)
|
|
64
64
|
for result in results:
|
|
65
65
|
full_box = torch.tensor(
|
|
66
|
-
[0, 0, result.orig_shape[1], result.orig_shape[0]], device=
|
|
66
|
+
[0, 0, result.orig_shape[1], result.orig_shape[0]], device=result.boxes.data.device, dtype=torch.float32
|
|
67
67
|
)
|
|
68
68
|
boxes = adjust_bboxes_to_image_border(result.boxes.xyxy, result.orig_shape)
|
|
69
69
|
idx = torch.nonzero(box_iou(full_box[None], boxes) > 0.9).flatten()
|
|
@@ -117,10 +117,11 @@ class DetectionTrainer(BaseTrainer):
|
|
|
117
117
|
if isinstance(v, torch.Tensor):
|
|
118
118
|
batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
|
|
119
119
|
batch["img"] = batch["img"].float() / 255
|
|
120
|
-
|
|
120
|
+
multi_scale = self.args.multi_scale
|
|
121
|
+
if random.random() < multi_scale:
|
|
121
122
|
imgs = batch["img"]
|
|
122
123
|
sz = (
|
|
123
|
-
random.randrange(int(self.args.imgsz * 0.5), int(self.args.imgsz * 1
|
|
124
|
+
random.randrange(int(self.args.imgsz * 0.5), int(self.args.imgsz * 1 + self.stride))
|
|
124
125
|
// self.stride
|
|
125
126
|
* self.stride
|
|
126
127
|
) # size
|
|
@@ -494,6 +494,12 @@ class DetectionValidator(BaseValidator):
|
|
|
494
494
|
# update mAP50-95 and mAP50
|
|
495
495
|
stats[f"metrics/mAP50({suffix[i][0]})"] = val.stats_as_dict["AP_50"]
|
|
496
496
|
stats[f"metrics/mAP50-95({suffix[i][0]})"] = val.stats_as_dict["AP_all"]
|
|
497
|
+
# record mAP for small, medium, large objects as well
|
|
498
|
+
stats["metrics/mAP_small(B)"] = val.stats_as_dict["AP_small"]
|
|
499
|
+
stats["metrics/mAP_medium(B)"] = val.stats_as_dict["AP_medium"]
|
|
500
|
+
stats["metrics/mAP_large(B)"] = val.stats_as_dict["AP_large"]
|
|
501
|
+
# update fitness
|
|
502
|
+
stats["fitness"] = 0.9 * val.stats_as_dict["AP_all"] + 0.1 * val.stats_as_dict["AP_50"]
|
|
497
503
|
|
|
498
504
|
if self.is_lvis:
|
|
499
505
|
stats[f"metrics/APr({suffix[i][0]})"] = val.stats_as_dict["APr"]
|
ultralytics/models/yolo/model.py
CHANGED
|
@@ -399,7 +399,7 @@ class YOLOE(Model):
|
|
|
399
399
|
"batch": 1,
|
|
400
400
|
"device": kwargs.get("device", None),
|
|
401
401
|
"half": kwargs.get("half", False),
|
|
402
|
-
"imgsz": kwargs.get("imgsz", self.overrides
|
|
402
|
+
"imgsz": kwargs.get("imgsz", self.overrides.get("imgsz", 640)),
|
|
403
403
|
},
|
|
404
404
|
_callbacks=self.callbacks,
|
|
405
405
|
)
|
|
@@ -50,7 +50,7 @@ class OBBPredictor(DetectionPredictor):
|
|
|
50
50
|
(Results): The result object containing the original image, image path, class names, and oriented bounding
|
|
51
51
|
boxes.
|
|
52
52
|
"""
|
|
53
|
-
rboxes =
|
|
53
|
+
rboxes = torch.cat([pred[:, :4], pred[:, -1:]], dim=-1)
|
|
54
54
|
rboxes[:, :4] = ops.scale_boxes(img.shape[2:], rboxes[:, :4], orig_img.shape, xywh=True)
|
|
55
55
|
obb = torch.cat([rboxes, pred[:, 4:6]], dim=-1)
|
|
56
56
|
return Results(orig_img, path=img_path, names=self.model.names, obb=obb)
|
|
@@ -73,7 +73,7 @@ class OBBTrainer(yolo.detect.DetectionTrainer):
|
|
|
73
73
|
|
|
74
74
|
def get_validator(self):
|
|
75
75
|
"""Return an instance of OBBValidator for validation of YOLO model."""
|
|
76
|
-
self.loss_names = "box_loss", "cls_loss", "dfl_loss"
|
|
76
|
+
self.loss_names = "box_loss", "cls_loss", "dfl_loss", "angle_loss"
|
|
77
77
|
return yolo.obb.OBBValidator(
|
|
78
78
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
|
79
79
|
)
|
|
@@ -90,7 +90,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
|
90
90
|
|
|
91
91
|
def get_validator(self):
|
|
92
92
|
"""Return an instance of the PoseValidator class for validation."""
|
|
93
|
-
self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss"
|
|
93
|
+
self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss", "rle_loss"
|
|
94
94
|
return yolo.pose.PoseValidator(
|
|
95
95
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
|
96
96
|
)
|
|
@@ -60,7 +60,7 @@ class SegmentationPredictor(DetectionPredictor):
|
|
|
60
60
|
>>> results = predictor.postprocess(preds, img, orig_img)
|
|
61
61
|
"""
|
|
62
62
|
# Extract protos - tuple if PyTorch model or array if exported
|
|
63
|
-
protos = preds[
|
|
63
|
+
protos = preds[0][-1] if isinstance(preds[0], tuple) else preds[-1]
|
|
64
64
|
return super().postprocess(preds[0], img, orig_imgs, protos=protos)
|
|
65
65
|
|
|
66
66
|
def construct_results(self, preds, img, orig_imgs, protos):
|
|
@@ -63,7 +63,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
|
|
63
63
|
|
|
64
64
|
def get_validator(self):
|
|
65
65
|
"""Return an instance of SegmentationValidator for validation of YOLO model."""
|
|
66
|
-
self.loss_names = "box_loss", "seg_loss", "cls_loss", "dfl_loss"
|
|
66
|
+
self.loss_names = "box_loss", "seg_loss", "cls_loss", "dfl_loss", "sem_loss"
|
|
67
67
|
return yolo.segment.SegmentationValidator(
|
|
68
68
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
|
69
69
|
)
|
|
@@ -99,7 +99,9 @@ class SegmentationValidator(DetectionValidator):
|
|
|
99
99
|
Returns:
|
|
100
100
|
list[dict[str, torch.Tensor]]: Processed detection predictions with masks.
|
|
101
101
|
"""
|
|
102
|
-
proto =
|
|
102
|
+
proto = (
|
|
103
|
+
preds[0][-1] if isinstance(preds[0], tuple) else preds[-1]
|
|
104
|
+
) # second output is len 3 if pt, but only 1 if exported
|
|
103
105
|
preds = super().postprocess(preds[0])
|
|
104
106
|
imgsz = [4 * x for x in proto.shape[2:]] # get image size from proto
|
|
105
107
|
for i, pred in enumerate(preds):
|
|
@@ -147,7 +147,12 @@ class YOLOEPETrainer(DetectionTrainer):
|
|
|
147
147
|
model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
148
148
|
model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
149
149
|
model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
150
|
-
|
|
150
|
+
|
|
151
|
+
if getattr(model.model[-1], "one2one_cv3", None) is not None:
|
|
152
|
+
model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
153
|
+
model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
154
|
+
model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
155
|
+
|
|
151
156
|
model.train()
|
|
152
157
|
|
|
153
158
|
return model
|
|
@@ -104,7 +104,12 @@ class YOLOEPESegTrainer(SegmentationTrainer):
|
|
|
104
104
|
model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
105
105
|
model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
106
106
|
model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
107
|
-
|
|
107
|
+
|
|
108
|
+
if getattr(model.model[-1], "one2one_cv3", None) is not None:
|
|
109
|
+
model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
110
|
+
model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
111
|
+
model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
112
|
+
|
|
108
113
|
model.train()
|
|
109
114
|
|
|
110
115
|
return model
|
ultralytics/nn/autobackend.py
CHANGED
|
@@ -221,6 +221,7 @@ class AutoBackend(nn.Module):
|
|
|
221
221
|
for p in model.parameters():
|
|
222
222
|
p.requires_grad = False
|
|
223
223
|
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
|
224
|
+
end2end = getattr(model, "end2end", False)
|
|
224
225
|
|
|
225
226
|
# TorchScript
|
|
226
227
|
elif jit:
|
|
@@ -545,8 +546,7 @@ class AutoBackend(nn.Module):
|
|
|
545
546
|
# NCNN
|
|
546
547
|
elif ncnn:
|
|
547
548
|
LOGGER.info(f"Loading {w} for NCNN inference...")
|
|
548
|
-
|
|
549
|
-
check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn", cmds="--no-deps")
|
|
549
|
+
check_requirements("ncnn", cmds="--no-deps")
|
|
550
550
|
import ncnn as pyncnn
|
|
551
551
|
|
|
552
552
|
net = pyncnn.Net()
|
|
@@ -657,7 +657,7 @@ class AutoBackend(nn.Module):
|
|
|
657
657
|
names = metadata["names"]
|
|
658
658
|
kpt_shape = metadata.get("kpt_shape")
|
|
659
659
|
kpt_names = metadata.get("kpt_names")
|
|
660
|
-
end2end = metadata.get("args", {}).get("nms", False)
|
|
660
|
+
end2end = metadata.get("end2end", False) or metadata.get("args", {}).get("nms", False)
|
|
661
661
|
dynamic = metadata.get("args", {}).get("dynamic", dynamic)
|
|
662
662
|
ch = metadata.get("channels", 3)
|
|
663
663
|
elif not (pt or triton or nn_module):
|
|
@@ -78,15 +78,19 @@ from .conv import (
|
|
|
78
78
|
)
|
|
79
79
|
from .head import (
|
|
80
80
|
OBB,
|
|
81
|
+
OBB26,
|
|
81
82
|
Classify,
|
|
82
83
|
Detect,
|
|
83
84
|
LRPCHead,
|
|
84
85
|
Pose,
|
|
86
|
+
Pose26,
|
|
85
87
|
RTDETRDecoder,
|
|
86
88
|
Segment,
|
|
89
|
+
Segment26,
|
|
87
90
|
WorldDetect,
|
|
88
91
|
YOLOEDetect,
|
|
89
92
|
YOLOESegment,
|
|
93
|
+
YOLOESegment26,
|
|
90
94
|
v10Detect,
|
|
91
95
|
)
|
|
92
96
|
from .transformer import (
|
|
@@ -115,6 +119,7 @@ __all__ = (
|
|
|
115
119
|
"ELAN1",
|
|
116
120
|
"MLP",
|
|
117
121
|
"OBB",
|
|
122
|
+
"OBB26",
|
|
118
123
|
"PSA",
|
|
119
124
|
"SPP",
|
|
120
125
|
"SPPELAN",
|
|
@@ -161,6 +166,7 @@ __all__ = (
|
|
|
161
166
|
"MSDeformAttn",
|
|
162
167
|
"MaxSigmoidAttnBlock",
|
|
163
168
|
"Pose",
|
|
169
|
+
"Pose26",
|
|
164
170
|
"Proto",
|
|
165
171
|
"RTDETRDecoder",
|
|
166
172
|
"RepC3",
|
|
@@ -170,6 +176,7 @@ __all__ = (
|
|
|
170
176
|
"ResNetLayer",
|
|
171
177
|
"SCDown",
|
|
172
178
|
"Segment",
|
|
179
|
+
"Segment26",
|
|
173
180
|
"SpatialAttention",
|
|
174
181
|
"TorchVision",
|
|
175
182
|
"TransformerBlock",
|
|
@@ -178,5 +185,6 @@ __all__ = (
|
|
|
178
185
|
"WorldDetect",
|
|
179
186
|
"YOLOEDetect",
|
|
180
187
|
"YOLOESegment",
|
|
188
|
+
"YOLOESegment26",
|
|
181
189
|
"v10Detect",
|
|
182
190
|
)
|