dgenerate-ultralytics-headless 8.3.248__py3-none-any.whl → 8.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.248.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/METADATA +52 -61
- {dgenerate_ultralytics_headless-8.3.248.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/RECORD +97 -84
- {dgenerate_ultralytics_headless-8.3.248.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/WHEEL +1 -1
- tests/__init__.py +2 -2
- tests/conftest.py +1 -1
- tests/test_cuda.py +8 -2
- tests/test_engine.py +8 -8
- tests/test_exports.py +11 -4
- tests/test_integrations.py +9 -9
- tests/test_python.py +41 -16
- tests/test_solutions.py +3 -3
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +31 -31
- ultralytics/cfg/datasets/TT100K.yaml +346 -0
- ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
- ultralytics/cfg/default.yaml +3 -1
- ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
- ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
- ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
- ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
- ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
- ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
- ultralytics/cfg/models/26/yolo26.yaml +52 -0
- ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
- ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
- ultralytics/data/annotator.py +2 -2
- ultralytics/data/augment.py +15 -0
- ultralytics/data/converter.py +76 -45
- ultralytics/data/dataset.py +1 -1
- ultralytics/data/utils.py +2 -2
- ultralytics/engine/exporter.py +34 -28
- ultralytics/engine/model.py +38 -37
- ultralytics/engine/predictor.py +17 -17
- ultralytics/engine/results.py +22 -15
- ultralytics/engine/trainer.py +83 -48
- ultralytics/engine/tuner.py +20 -11
- ultralytics/engine/validator.py +16 -16
- ultralytics/models/fastsam/predict.py +1 -1
- ultralytics/models/yolo/classify/predict.py +1 -1
- ultralytics/models/yolo/classify/train.py +1 -1
- ultralytics/models/yolo/classify/val.py +1 -1
- ultralytics/models/yolo/detect/predict.py +2 -2
- ultralytics/models/yolo/detect/train.py +6 -3
- ultralytics/models/yolo/detect/val.py +7 -1
- ultralytics/models/yolo/model.py +8 -8
- ultralytics/models/yolo/obb/predict.py +2 -2
- ultralytics/models/yolo/obb/train.py +3 -3
- ultralytics/models/yolo/obb/val.py +1 -1
- ultralytics/models/yolo/pose/predict.py +1 -1
- ultralytics/models/yolo/pose/train.py +3 -1
- ultralytics/models/yolo/pose/val.py +1 -1
- ultralytics/models/yolo/segment/predict.py +3 -3
- ultralytics/models/yolo/segment/train.py +4 -4
- ultralytics/models/yolo/segment/val.py +2 -2
- ultralytics/models/yolo/yoloe/train.py +6 -1
- ultralytics/models/yolo/yoloe/train_seg.py +6 -1
- ultralytics/nn/autobackend.py +14 -8
- ultralytics/nn/modules/__init__.py +8 -0
- ultralytics/nn/modules/block.py +128 -8
- ultralytics/nn/modules/head.py +788 -203
- ultralytics/nn/tasks.py +86 -41
- ultralytics/nn/text_model.py +5 -2
- ultralytics/optim/__init__.py +5 -0
- ultralytics/optim/muon.py +338 -0
- ultralytics/solutions/ai_gym.py +3 -3
- ultralytics/solutions/config.py +1 -1
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +2 -2
- ultralytics/solutions/object_counter.py +1 -1
- ultralytics/solutions/parking_management.py +1 -1
- ultralytics/solutions/solutions.py +2 -2
- ultralytics/trackers/byte_tracker.py +7 -7
- ultralytics/trackers/track.py +1 -1
- ultralytics/utils/__init__.py +8 -8
- ultralytics/utils/benchmarks.py +26 -26
- ultralytics/utils/callbacks/platform.py +173 -64
- ultralytics/utils/callbacks/tensorboard.py +2 -0
- ultralytics/utils/callbacks/wb.py +6 -1
- ultralytics/utils/checks.py +28 -9
- ultralytics/utils/dist.py +1 -0
- ultralytics/utils/downloads.py +5 -3
- ultralytics/utils/export/engine.py +19 -10
- ultralytics/utils/export/imx.py +38 -20
- ultralytics/utils/export/tensorflow.py +21 -21
- ultralytics/utils/files.py +2 -2
- ultralytics/utils/loss.py +597 -203
- ultralytics/utils/metrics.py +2 -1
- ultralytics/utils/ops.py +11 -2
- ultralytics/utils/patches.py +42 -0
- ultralytics/utils/plotting.py +3 -0
- ultralytics/utils/tal.py +100 -20
- ultralytics/utils/torch_utils.py +1 -1
- ultralytics/utils/tqdm.py +4 -1
- ultralytics/utils/tuner.py +2 -5
- {dgenerate_ultralytics_headless-8.3.248.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.248.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.248.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/top_level.txt +0 -0
|
@@ -21,7 +21,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
|
|
21
21
|
|
|
22
22
|
Examples:
|
|
23
23
|
>>> from ultralytics.models.yolo.segment import SegmentationTrainer
|
|
24
|
-
>>> args = dict(model="
|
|
24
|
+
>>> args = dict(model="yolo26n-seg.pt", data="coco8-seg.yaml", epochs=3)
|
|
25
25
|
>>> trainer = SegmentationTrainer(overrides=args)
|
|
26
26
|
>>> trainer.train()
|
|
27
27
|
"""
|
|
@@ -52,8 +52,8 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
|
|
52
52
|
|
|
53
53
|
Examples:
|
|
54
54
|
>>> trainer = SegmentationTrainer()
|
|
55
|
-
>>> model = trainer.get_model(cfg="
|
|
56
|
-
>>> model = trainer.get_model(weights="
|
|
55
|
+
>>> model = trainer.get_model(cfg="yolo26n-seg.yaml")
|
|
56
|
+
>>> model = trainer.get_model(weights="yolo26n-seg.pt", verbose=False)
|
|
57
57
|
"""
|
|
58
58
|
model = SegmentationModel(cfg, nc=self.data["nc"], ch=self.data["channels"], verbose=verbose and RANK == -1)
|
|
59
59
|
if weights:
|
|
@@ -63,7 +63,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
|
|
63
63
|
|
|
64
64
|
def get_validator(self):
|
|
65
65
|
"""Return an instance of SegmentationValidator for validation of YOLO model."""
|
|
66
|
-
self.loss_names = "box_loss", "seg_loss", "cls_loss", "dfl_loss"
|
|
66
|
+
self.loss_names = "box_loss", "seg_loss", "cls_loss", "dfl_loss", "sem_loss"
|
|
67
67
|
return yolo.segment.SegmentationValidator(
|
|
68
68
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
|
69
69
|
)
|
|
@@ -30,7 +30,7 @@ class SegmentationValidator(DetectionValidator):
|
|
|
30
30
|
|
|
31
31
|
Examples:
|
|
32
32
|
>>> from ultralytics.models.yolo.segment import SegmentationValidator
|
|
33
|
-
>>> args = dict(model="
|
|
33
|
+
>>> args = dict(model="yolo26n-seg.pt", data="coco8-seg.yaml")
|
|
34
34
|
>>> validator = SegmentationValidator(args=args)
|
|
35
35
|
>>> validator()
|
|
36
36
|
"""
|
|
@@ -99,7 +99,7 @@ class SegmentationValidator(DetectionValidator):
|
|
|
99
99
|
Returns:
|
|
100
100
|
list[dict[str, torch.Tensor]]: Processed detection predictions with masks.
|
|
101
101
|
"""
|
|
102
|
-
proto = preds[
|
|
102
|
+
proto = preds[0][1] if isinstance(preds[0], tuple) else preds[1]
|
|
103
103
|
preds = super().postprocess(preds[0])
|
|
104
104
|
imgsz = [4 * x for x in proto.shape[2:]] # get image size from proto
|
|
105
105
|
for i, pred in enumerate(preds):
|
|
@@ -147,7 +147,12 @@ class YOLOEPETrainer(DetectionTrainer):
|
|
|
147
147
|
model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
148
148
|
model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
149
149
|
model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
150
|
-
|
|
150
|
+
|
|
151
|
+
if getattr(model.model[-1], "one2one_cv3", None) is not None:
|
|
152
|
+
model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
153
|
+
model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
154
|
+
model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
155
|
+
|
|
151
156
|
model.train()
|
|
152
157
|
|
|
153
158
|
return model
|
|
@@ -104,7 +104,12 @@ class YOLOEPESegTrainer(SegmentationTrainer):
|
|
|
104
104
|
model.model[-1].cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
105
105
|
model.model[-1].cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
106
106
|
model.model[-1].cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
107
|
-
|
|
107
|
+
|
|
108
|
+
if getattr(model.model[-1], "one2one_cv3", None) is not None:
|
|
109
|
+
model.model[-1].one2one_cv3[0][2] = deepcopy(model.model[-1].cv3[0][2]).requires_grad_(True)
|
|
110
|
+
model.model[-1].one2one_cv3[1][2] = deepcopy(model.model[-1].cv3[1][2]).requires_grad_(True)
|
|
111
|
+
model.model[-1].one2one_cv3[2][2] = deepcopy(model.model[-1].cv3[2][2]).requires_grad_(True)
|
|
112
|
+
|
|
108
113
|
model.train()
|
|
109
114
|
|
|
110
115
|
return model
|
ultralytics/nn/autobackend.py
CHANGED
|
@@ -132,14 +132,14 @@ class AutoBackend(nn.Module):
|
|
|
132
132
|
_model_type: Determine the model type from file path.
|
|
133
133
|
|
|
134
134
|
Examples:
|
|
135
|
-
>>> model = AutoBackend(model="
|
|
135
|
+
>>> model = AutoBackend(model="yolo26n.pt", device="cuda")
|
|
136
136
|
>>> results = model(img)
|
|
137
137
|
"""
|
|
138
138
|
|
|
139
139
|
@torch.no_grad()
|
|
140
140
|
def __init__(
|
|
141
141
|
self,
|
|
142
|
-
model: str | torch.nn.Module = "
|
|
142
|
+
model: str | torch.nn.Module = "yolo26n.pt",
|
|
143
143
|
device: torch.device = torch.device("cpu"),
|
|
144
144
|
dnn: bool = False,
|
|
145
145
|
data: str | Path | None = None,
|
|
@@ -221,6 +221,7 @@ class AutoBackend(nn.Module):
|
|
|
221
221
|
for p in model.parameters():
|
|
222
222
|
p.requires_grad = False
|
|
223
223
|
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
|
224
|
+
end2end = getattr(model, "end2end", False)
|
|
224
225
|
|
|
225
226
|
# TorchScript
|
|
226
227
|
elif jit:
|
|
@@ -497,11 +498,11 @@ class AutoBackend(nn.Module):
|
|
|
497
498
|
elif paddle:
|
|
498
499
|
LOGGER.info(f"Loading {w} for PaddlePaddle inference...")
|
|
499
500
|
check_requirements(
|
|
500
|
-
"paddlepaddle-gpu"
|
|
501
|
+
"paddlepaddle-gpu>=3.0.0,!=3.3.0" # exclude 3.3.0 https://github.com/PaddlePaddle/Paddle/issues/77340
|
|
501
502
|
if torch.cuda.is_available()
|
|
502
503
|
else "paddlepaddle==3.0.0" # pin 3.0.0 for ARM64
|
|
503
504
|
if ARM64
|
|
504
|
-
else "paddlepaddle>=3.0.0"
|
|
505
|
+
else "paddlepaddle>=3.0.0,!=3.3.0" # exclude 3.3.0 https://github.com/PaddlePaddle/Paddle/issues/77340
|
|
505
506
|
)
|
|
506
507
|
import paddle.inference as pdi
|
|
507
508
|
|
|
@@ -545,11 +546,16 @@ class AutoBackend(nn.Module):
|
|
|
545
546
|
# NCNN
|
|
546
547
|
elif ncnn:
|
|
547
548
|
LOGGER.info(f"Loading {w} for NCNN inference...")
|
|
548
|
-
check_requirements("
|
|
549
|
+
check_requirements("ncnn", cmds="--no-deps")
|
|
549
550
|
import ncnn as pyncnn
|
|
550
551
|
|
|
551
552
|
net = pyncnn.Net()
|
|
552
|
-
|
|
553
|
+
if isinstance(cuda, torch.device):
|
|
554
|
+
net.opt.use_vulkan_compute = cuda
|
|
555
|
+
elif isinstance(device, str) and device.startswith("vulkan"):
|
|
556
|
+
net.opt.use_vulkan_compute = True
|
|
557
|
+
net.set_vulkan_device(int(device.split(":")[1]))
|
|
558
|
+
device = torch.device("cpu")
|
|
553
559
|
w = Path(w)
|
|
554
560
|
if not w.is_file(): # if not *.param
|
|
555
561
|
w = next(w.glob("*.param")) # get *.param file from *_ncnn_model dir
|
|
@@ -651,7 +657,7 @@ class AutoBackend(nn.Module):
|
|
|
651
657
|
names = metadata["names"]
|
|
652
658
|
kpt_shape = metadata.get("kpt_shape")
|
|
653
659
|
kpt_names = metadata.get("kpt_names")
|
|
654
|
-
end2end = metadata.get("args", {}).get("nms", False)
|
|
660
|
+
end2end = metadata.get("end2end", False) or metadata.get("args", {}).get("nms", False)
|
|
655
661
|
dynamic = metadata.get("args", {}).get("dynamic", dynamic)
|
|
656
662
|
ch = metadata.get("channels", 3)
|
|
657
663
|
elif not (pt or triton or nn_module):
|
|
@@ -881,7 +887,7 @@ class AutoBackend(nn.Module):
|
|
|
881
887
|
x[:, 6::3] *= h
|
|
882
888
|
y.append(x)
|
|
883
889
|
# TF segment fixes: export is reversed vs ONNX export and protos are transposed
|
|
884
|
-
if
|
|
890
|
+
if self.task == "segment": # segment with (det, proto) output order reversed
|
|
885
891
|
if len(y[1].shape) != 4:
|
|
886
892
|
y = list(reversed(y)) # should be y = (1, 116, 8400), (1, 160, 160, 32)
|
|
887
893
|
if y[1].shape[-1] == 6: # end-to-end model
|
|
@@ -78,15 +78,19 @@ from .conv import (
|
|
|
78
78
|
)
|
|
79
79
|
from .head import (
|
|
80
80
|
OBB,
|
|
81
|
+
OBB26,
|
|
81
82
|
Classify,
|
|
82
83
|
Detect,
|
|
83
84
|
LRPCHead,
|
|
84
85
|
Pose,
|
|
86
|
+
Pose26,
|
|
85
87
|
RTDETRDecoder,
|
|
86
88
|
Segment,
|
|
89
|
+
Segment26,
|
|
87
90
|
WorldDetect,
|
|
88
91
|
YOLOEDetect,
|
|
89
92
|
YOLOESegment,
|
|
93
|
+
YOLOESegment26,
|
|
90
94
|
v10Detect,
|
|
91
95
|
)
|
|
92
96
|
from .transformer import (
|
|
@@ -115,6 +119,7 @@ __all__ = (
|
|
|
115
119
|
"ELAN1",
|
|
116
120
|
"MLP",
|
|
117
121
|
"OBB",
|
|
122
|
+
"OBB26",
|
|
118
123
|
"PSA",
|
|
119
124
|
"SPP",
|
|
120
125
|
"SPPELAN",
|
|
@@ -161,6 +166,7 @@ __all__ = (
|
|
|
161
166
|
"MSDeformAttn",
|
|
162
167
|
"MaxSigmoidAttnBlock",
|
|
163
168
|
"Pose",
|
|
169
|
+
"Pose26",
|
|
164
170
|
"Proto",
|
|
165
171
|
"RTDETRDecoder",
|
|
166
172
|
"RepC3",
|
|
@@ -170,6 +176,7 @@ __all__ = (
|
|
|
170
176
|
"ResNetLayer",
|
|
171
177
|
"SCDown",
|
|
172
178
|
"Segment",
|
|
179
|
+
"Segment26",
|
|
173
180
|
"SpatialAttention",
|
|
174
181
|
"TorchVision",
|
|
175
182
|
"TransformerBlock",
|
|
@@ -178,5 +185,6 @@ __all__ = (
|
|
|
178
185
|
"WorldDetect",
|
|
179
186
|
"YOLOEDetect",
|
|
180
187
|
"YOLOESegment",
|
|
188
|
+
"YOLOESegment26",
|
|
181
189
|
"v10Detect",
|
|
182
190
|
)
|
ultralytics/nn/modules/block.py
CHANGED
|
@@ -208,28 +208,33 @@ class SPP(nn.Module):
|
|
|
208
208
|
class SPPF(nn.Module):
|
|
209
209
|
"""Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
|
|
210
210
|
|
|
211
|
-
def __init__(self, c1: int, c2: int, k: int = 5):
|
|
211
|
+
def __init__(self, c1: int, c2: int, k: int = 5, n: int = 3, shortcut: bool = False):
|
|
212
212
|
"""Initialize the SPPF layer with given input/output channels and kernel size.
|
|
213
213
|
|
|
214
214
|
Args:
|
|
215
215
|
c1 (int): Input channels.
|
|
216
216
|
c2 (int): Output channels.
|
|
217
217
|
k (int): Kernel size.
|
|
218
|
+
n (int): Number of pooling iterations.
|
|
219
|
+
shortcut (bool): Whether to use shortcut connection.
|
|
218
220
|
|
|
219
221
|
Notes:
|
|
220
222
|
This module is equivalent to SPP(k=(5, 9, 13)).
|
|
221
223
|
"""
|
|
222
224
|
super().__init__()
|
|
223
225
|
c_ = c1 // 2 # hidden channels
|
|
224
|
-
self.cv1 = Conv(c1, c_, 1, 1)
|
|
225
|
-
self.cv2 = Conv(c_ *
|
|
226
|
+
self.cv1 = Conv(c1, c_, 1, 1, act=False)
|
|
227
|
+
self.cv2 = Conv(c_ * (n + 1), c2, 1, 1)
|
|
226
228
|
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
|
|
229
|
+
self.n = n
|
|
230
|
+
self.add = shortcut and c1 == c2
|
|
227
231
|
|
|
228
232
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
229
233
|
"""Apply sequential pooling operations to input and return concatenated feature maps."""
|
|
230
234
|
y = [self.cv1(x)]
|
|
231
|
-
y.extend(self.m(y[-1]) for _ in range(3))
|
|
232
|
-
|
|
235
|
+
y.extend(self.m(y[-1]) for _ in range(getattr(self, "n", 3)))
|
|
236
|
+
y = self.cv2(torch.cat(y, 1))
|
|
237
|
+
return y + x if getattr(self, "add", False) else y
|
|
233
238
|
|
|
234
239
|
|
|
235
240
|
class C1(nn.Module):
|
|
@@ -1065,7 +1070,15 @@ class C3k2(C2f):
|
|
|
1065
1070
|
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
|
|
1066
1071
|
|
|
1067
1072
|
def __init__(
|
|
1068
|
-
self,
|
|
1073
|
+
self,
|
|
1074
|
+
c1: int,
|
|
1075
|
+
c2: int,
|
|
1076
|
+
n: int = 1,
|
|
1077
|
+
c3k: bool = False,
|
|
1078
|
+
e: float = 0.5,
|
|
1079
|
+
attn: bool = False,
|
|
1080
|
+
g: int = 1,
|
|
1081
|
+
shortcut: bool = True,
|
|
1069
1082
|
):
|
|
1070
1083
|
"""Initialize C3k2 module.
|
|
1071
1084
|
|
|
@@ -1075,12 +1088,21 @@ class C3k2(C2f):
|
|
|
1075
1088
|
n (int): Number of blocks.
|
|
1076
1089
|
c3k (bool): Whether to use C3k blocks.
|
|
1077
1090
|
e (float): Expansion ratio.
|
|
1091
|
+
attn (bool): Whether to use attention blocks.
|
|
1078
1092
|
g (int): Groups for convolutions.
|
|
1079
1093
|
shortcut (bool): Whether to use shortcut connections.
|
|
1080
1094
|
"""
|
|
1081
1095
|
super().__init__(c1, c2, n, shortcut, g, e)
|
|
1082
1096
|
self.m = nn.ModuleList(
|
|
1083
|
-
|
|
1097
|
+
nn.Sequential(
|
|
1098
|
+
Bottleneck(self.c, self.c, shortcut, g),
|
|
1099
|
+
PSABlock(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1)),
|
|
1100
|
+
)
|
|
1101
|
+
if attn
|
|
1102
|
+
else C3k(self.c, self.c, 2, shortcut, g)
|
|
1103
|
+
if c3k
|
|
1104
|
+
else Bottleneck(self.c, self.c, shortcut, g)
|
|
1105
|
+
for _ in range(n)
|
|
1084
1106
|
)
|
|
1085
1107
|
|
|
1086
1108
|
|
|
@@ -1148,6 +1170,8 @@ class RepVGGDW(torch.nn.Module):
|
|
|
1148
1170
|
|
|
1149
1171
|
This method fuses the convolutional layers and updates the weights and biases accordingly.
|
|
1150
1172
|
"""
|
|
1173
|
+
if not hasattr(self, "conv1"):
|
|
1174
|
+
return # already fused
|
|
1151
1175
|
conv = fuse_conv_and_bn(self.conv.conv, self.conv.bn)
|
|
1152
1176
|
conv1 = fuse_conv_and_bn(self.conv1.conv, self.conv1.bn)
|
|
1153
1177
|
|
|
@@ -1391,7 +1415,7 @@ class PSA(nn.Module):
|
|
|
1391
1415
|
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
|
|
1392
1416
|
self.cv2 = Conv(2 * self.c, c1, 1)
|
|
1393
1417
|
|
|
1394
|
-
self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
|
|
1418
|
+
self.attn = Attention(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1))
|
|
1395
1419
|
self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))
|
|
1396
1420
|
|
|
1397
1421
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
@@ -1945,3 +1969,99 @@ class SAVPE(nn.Module):
|
|
|
1945
1969
|
aggregated = score.transpose(-2, -3) @ x.reshape(B, self.c, C // self.c, -1).transpose(-1, -2)
|
|
1946
1970
|
|
|
1947
1971
|
return F.normalize(aggregated.transpose(-2, -3).reshape(B, Q, -1), dim=-1, p=2)
|
|
1972
|
+
|
|
1973
|
+
|
|
1974
|
+
class Proto26(Proto):
|
|
1975
|
+
"""Ultralytics YOLO26 models mask Proto module for segmentation models."""
|
|
1976
|
+
|
|
1977
|
+
def __init__(self, ch: tuple = (), c_: int = 256, c2: int = 32, nc: int = 80):
|
|
1978
|
+
"""Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
|
|
1979
|
+
|
|
1980
|
+
Args:
|
|
1981
|
+
ch (tuple): Tuple of channel sizes from backbone feature maps.
|
|
1982
|
+
c_ (int): Intermediate channels.
|
|
1983
|
+
c2 (int): Output channels (number of protos).
|
|
1984
|
+
nc (int): Number of classes for semantic segmentation.
|
|
1985
|
+
"""
|
|
1986
|
+
super().__init__(c_, c_, c2)
|
|
1987
|
+
self.feat_refine = nn.ModuleList(Conv(x, ch[0], k=1) for x in ch[1:])
|
|
1988
|
+
self.feat_fuse = Conv(ch[0], c_, k=3)
|
|
1989
|
+
self.semseg = nn.Sequential(Conv(ch[0], c_, k=3), Conv(c_, c_, k=3), nn.Conv2d(c_, nc, 1))
|
|
1990
|
+
|
|
1991
|
+
def forward(self, x: torch.Tensor, return_semseg: bool = True) -> torch.Tensor:
|
|
1992
|
+
"""Perform a forward pass through layers using an upsampled input image."""
|
|
1993
|
+
feat = x[0]
|
|
1994
|
+
for i, f in enumerate(self.feat_refine):
|
|
1995
|
+
up_feat = f(x[i + 1])
|
|
1996
|
+
up_feat = F.interpolate(up_feat, size=feat.shape[2:], mode="nearest")
|
|
1997
|
+
feat = feat + up_feat
|
|
1998
|
+
p = super().forward(self.feat_fuse(feat))
|
|
1999
|
+
if self.training and return_semseg:
|
|
2000
|
+
semseg = self.semseg(feat)
|
|
2001
|
+
return (p, semseg)
|
|
2002
|
+
return p
|
|
2003
|
+
|
|
2004
|
+
def fuse(self):
|
|
2005
|
+
"""Fuse the model for inference by removing the semantic segmentation head."""
|
|
2006
|
+
self.semseg = None
|
|
2007
|
+
|
|
2008
|
+
|
|
2009
|
+
class RealNVP(nn.Module):
|
|
2010
|
+
"""RealNVP: a flow-based generative model.
|
|
2011
|
+
|
|
2012
|
+
References:
|
|
2013
|
+
https://arxiv.org/abs/1605.08803
|
|
2014
|
+
https://github.com/open-mmlab/mmpose/blob/main/mmpose/models/utils/realnvp.py
|
|
2015
|
+
"""
|
|
2016
|
+
|
|
2017
|
+
@staticmethod
|
|
2018
|
+
def nets():
|
|
2019
|
+
"""Get the scale model in a single invertable mapping."""
|
|
2020
|
+
return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2), nn.Tanh())
|
|
2021
|
+
|
|
2022
|
+
@staticmethod
|
|
2023
|
+
def nett():
|
|
2024
|
+
"""Get the translation model in a single invertable mapping."""
|
|
2025
|
+
return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2))
|
|
2026
|
+
|
|
2027
|
+
@property
|
|
2028
|
+
def prior(self):
|
|
2029
|
+
"""The prior distribution."""
|
|
2030
|
+
return torch.distributions.MultivariateNormal(self.loc, self.cov)
|
|
2031
|
+
|
|
2032
|
+
def __init__(self):
|
|
2033
|
+
super().__init__()
|
|
2034
|
+
|
|
2035
|
+
self.register_buffer("loc", torch.zeros(2))
|
|
2036
|
+
self.register_buffer("cov", torch.eye(2))
|
|
2037
|
+
self.register_buffer("mask", torch.tensor([[0, 1], [1, 0]] * 3, dtype=torch.float32))
|
|
2038
|
+
|
|
2039
|
+
self.s = torch.nn.ModuleList([self.nets() for _ in range(len(self.mask))])
|
|
2040
|
+
self.t = torch.nn.ModuleList([self.nett() for _ in range(len(self.mask))])
|
|
2041
|
+
self.init_weights()
|
|
2042
|
+
|
|
2043
|
+
def init_weights(self):
|
|
2044
|
+
"""Initialization model weights."""
|
|
2045
|
+
for m in self.modules():
|
|
2046
|
+
if isinstance(m, nn.Linear):
|
|
2047
|
+
nn.init.xavier_uniform_(m.weight, gain=0.01)
|
|
2048
|
+
|
|
2049
|
+
def backward_p(self, x):
|
|
2050
|
+
"""Apply mapping form the data space to the latent space and calculate the log determinant of the Jacobian
|
|
2051
|
+
matrix.
|
|
2052
|
+
"""
|
|
2053
|
+
log_det_jacob, z = x.new_zeros(x.shape[0]), x
|
|
2054
|
+
for i in reversed(range(len(self.t))):
|
|
2055
|
+
z_ = self.mask[i] * z
|
|
2056
|
+
s = self.s[i](z_) * (1 - self.mask[i])
|
|
2057
|
+
t = self.t[i](z_) * (1 - self.mask[i])
|
|
2058
|
+
z = (1 - self.mask[i]) * (z - t) * torch.exp(-s) + z_
|
|
2059
|
+
log_det_jacob -= s.sum(dim=1)
|
|
2060
|
+
return z, log_det_jacob
|
|
2061
|
+
|
|
2062
|
+
def log_prob(self, x):
|
|
2063
|
+
"""Calculate the log probability of given sample in data space."""
|
|
2064
|
+
if x.dtype == torch.float32 and self.s[0][0].weight.dtype != torch.float32:
|
|
2065
|
+
self.float()
|
|
2066
|
+
z, log_det = self.backward_p(x)
|
|
2067
|
+
return self.prior.log_prob(z) + log_det
|