dgenerate-ultralytics-headless 8.3.214__py3-none-any.whl → 8.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/METADATA +64 -74
- dgenerate_ultralytics_headless-8.4.7.dist-info/RECORD +311 -0
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/WHEEL +1 -1
- tests/__init__.py +7 -9
- tests/conftest.py +8 -15
- tests/test_cli.py +1 -1
- tests/test_cuda.py +13 -10
- tests/test_engine.py +9 -9
- tests/test_exports.py +65 -13
- tests/test_integrations.py +13 -13
- tests/test_python.py +125 -69
- tests/test_solutions.py +161 -152
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +86 -92
- ultralytics/cfg/datasets/Argoverse.yaml +7 -6
- ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
- ultralytics/cfg/datasets/ImageNet.yaml +1 -1
- ultralytics/cfg/datasets/TT100K.yaml +346 -0
- ultralytics/cfg/datasets/VOC.yaml +15 -16
- ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
- ultralytics/cfg/datasets/coco-pose.yaml +21 -0
- ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
- ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
- ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
- ultralytics/cfg/datasets/dog-pose.yaml +28 -0
- ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/dota8.yaml +2 -2
- ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
- ultralytics/cfg/datasets/kitti.yaml +27 -0
- ultralytics/cfg/datasets/lvis.yaml +5 -5
- ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
- ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
- ultralytics/cfg/datasets/xView.yaml +16 -16
- ultralytics/cfg/default.yaml +4 -2
- ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
- ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
- ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
- ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
- ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
- ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
- ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
- ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
- ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
- ultralytics/cfg/models/26/yolo26.yaml +52 -0
- ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
- ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
- ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
- ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
- ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
- ultralytics/cfg/models/v6/yolov6.yaml +1 -1
- ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
- ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
- ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
- ultralytics/data/__init__.py +4 -4
- ultralytics/data/annotator.py +5 -6
- ultralytics/data/augment.py +300 -475
- ultralytics/data/base.py +18 -26
- ultralytics/data/build.py +147 -25
- ultralytics/data/converter.py +108 -87
- ultralytics/data/dataset.py +47 -75
- ultralytics/data/loaders.py +42 -49
- ultralytics/data/split.py +5 -6
- ultralytics/data/split_dota.py +8 -15
- ultralytics/data/utils.py +36 -45
- ultralytics/engine/exporter.py +351 -263
- ultralytics/engine/model.py +186 -225
- ultralytics/engine/predictor.py +45 -54
- ultralytics/engine/results.py +198 -325
- ultralytics/engine/trainer.py +165 -106
- ultralytics/engine/tuner.py +41 -43
- ultralytics/engine/validator.py +55 -38
- ultralytics/hub/__init__.py +16 -19
- ultralytics/hub/auth.py +6 -12
- ultralytics/hub/google/__init__.py +7 -10
- ultralytics/hub/session.py +15 -25
- ultralytics/hub/utils.py +5 -8
- ultralytics/models/__init__.py +1 -1
- ultralytics/models/fastsam/__init__.py +1 -1
- ultralytics/models/fastsam/model.py +8 -10
- ultralytics/models/fastsam/predict.py +18 -30
- ultralytics/models/fastsam/utils.py +1 -2
- ultralytics/models/fastsam/val.py +5 -7
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/nas/model.py +5 -8
- ultralytics/models/nas/predict.py +7 -9
- ultralytics/models/nas/val.py +1 -2
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/rtdetr/model.py +5 -8
- ultralytics/models/rtdetr/predict.py +15 -19
- ultralytics/models/rtdetr/train.py +10 -13
- ultralytics/models/rtdetr/val.py +21 -23
- ultralytics/models/sam/__init__.py +15 -2
- ultralytics/models/sam/amg.py +14 -20
- ultralytics/models/sam/build.py +26 -19
- ultralytics/models/sam/build_sam3.py +377 -0
- ultralytics/models/sam/model.py +29 -32
- ultralytics/models/sam/modules/blocks.py +83 -144
- ultralytics/models/sam/modules/decoders.py +19 -37
- ultralytics/models/sam/modules/encoders.py +44 -101
- ultralytics/models/sam/modules/memory_attention.py +16 -30
- ultralytics/models/sam/modules/sam.py +200 -73
- ultralytics/models/sam/modules/tiny_encoder.py +64 -83
- ultralytics/models/sam/modules/transformer.py +18 -28
- ultralytics/models/sam/modules/utils.py +174 -50
- ultralytics/models/sam/predict.py +2248 -350
- ultralytics/models/sam/sam3/__init__.py +3 -0
- ultralytics/models/sam/sam3/decoder.py +546 -0
- ultralytics/models/sam/sam3/encoder.py +529 -0
- ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
- ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
- ultralytics/models/sam/sam3/model_misc.py +199 -0
- ultralytics/models/sam/sam3/necks.py +129 -0
- ultralytics/models/sam/sam3/sam3_image.py +339 -0
- ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
- ultralytics/models/sam/sam3/vitdet.py +547 -0
- ultralytics/models/sam/sam3/vl_combiner.py +160 -0
- ultralytics/models/utils/loss.py +14 -26
- ultralytics/models/utils/ops.py +13 -17
- ultralytics/models/yolo/__init__.py +1 -1
- ultralytics/models/yolo/classify/predict.py +10 -13
- ultralytics/models/yolo/classify/train.py +12 -33
- ultralytics/models/yolo/classify/val.py +30 -29
- ultralytics/models/yolo/detect/predict.py +9 -12
- ultralytics/models/yolo/detect/train.py +17 -23
- ultralytics/models/yolo/detect/val.py +77 -59
- ultralytics/models/yolo/model.py +43 -60
- ultralytics/models/yolo/obb/predict.py +7 -16
- ultralytics/models/yolo/obb/train.py +14 -17
- ultralytics/models/yolo/obb/val.py +40 -37
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/pose/predict.py +7 -22
- ultralytics/models/yolo/pose/train.py +13 -16
- ultralytics/models/yolo/pose/val.py +39 -58
- ultralytics/models/yolo/segment/predict.py +17 -21
- ultralytics/models/yolo/segment/train.py +7 -10
- ultralytics/models/yolo/segment/val.py +95 -47
- ultralytics/models/yolo/world/train.py +8 -14
- ultralytics/models/yolo/world/train_world.py +11 -34
- ultralytics/models/yolo/yoloe/__init__.py +7 -7
- ultralytics/models/yolo/yoloe/predict.py +16 -23
- ultralytics/models/yolo/yoloe/train.py +36 -44
- ultralytics/models/yolo/yoloe/train_seg.py +11 -11
- ultralytics/models/yolo/yoloe/val.py +15 -20
- ultralytics/nn/__init__.py +7 -7
- ultralytics/nn/autobackend.py +159 -85
- ultralytics/nn/modules/__init__.py +68 -60
- ultralytics/nn/modules/activation.py +4 -6
- ultralytics/nn/modules/block.py +260 -224
- ultralytics/nn/modules/conv.py +52 -97
- ultralytics/nn/modules/head.py +831 -299
- ultralytics/nn/modules/transformer.py +76 -88
- ultralytics/nn/modules/utils.py +16 -21
- ultralytics/nn/tasks.py +180 -195
- ultralytics/nn/text_model.py +45 -69
- ultralytics/optim/__init__.py +5 -0
- ultralytics/optim/muon.py +338 -0
- ultralytics/solutions/__init__.py +12 -12
- ultralytics/solutions/ai_gym.py +13 -19
- ultralytics/solutions/analytics.py +15 -16
- ultralytics/solutions/config.py +6 -7
- ultralytics/solutions/distance_calculation.py +10 -13
- ultralytics/solutions/heatmap.py +8 -14
- ultralytics/solutions/instance_segmentation.py +6 -9
- ultralytics/solutions/object_blurrer.py +7 -10
- ultralytics/solutions/object_counter.py +12 -19
- ultralytics/solutions/object_cropper.py +8 -14
- ultralytics/solutions/parking_management.py +34 -32
- ultralytics/solutions/queue_management.py +10 -12
- ultralytics/solutions/region_counter.py +9 -12
- ultralytics/solutions/security_alarm.py +15 -20
- ultralytics/solutions/similarity_search.py +10 -15
- ultralytics/solutions/solutions.py +77 -76
- ultralytics/solutions/speed_estimation.py +7 -10
- ultralytics/solutions/streamlit_inference.py +2 -4
- ultralytics/solutions/templates/similarity-search.html +7 -18
- ultralytics/solutions/trackzone.py +7 -10
- ultralytics/solutions/vision_eye.py +5 -8
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/basetrack.py +3 -5
- ultralytics/trackers/bot_sort.py +10 -27
- ultralytics/trackers/byte_tracker.py +21 -37
- ultralytics/trackers/track.py +4 -7
- ultralytics/trackers/utils/gmc.py +11 -22
- ultralytics/trackers/utils/kalman_filter.py +37 -48
- ultralytics/trackers/utils/matching.py +12 -15
- ultralytics/utils/__init__.py +124 -124
- ultralytics/utils/autobatch.py +2 -4
- ultralytics/utils/autodevice.py +17 -18
- ultralytics/utils/benchmarks.py +57 -71
- ultralytics/utils/callbacks/base.py +8 -10
- ultralytics/utils/callbacks/clearml.py +5 -13
- ultralytics/utils/callbacks/comet.py +32 -46
- ultralytics/utils/callbacks/dvc.py +13 -18
- ultralytics/utils/callbacks/mlflow.py +4 -5
- ultralytics/utils/callbacks/neptune.py +7 -15
- ultralytics/utils/callbacks/platform.py +423 -38
- ultralytics/utils/callbacks/raytune.py +3 -4
- ultralytics/utils/callbacks/tensorboard.py +25 -31
- ultralytics/utils/callbacks/wb.py +16 -14
- ultralytics/utils/checks.py +127 -85
- ultralytics/utils/cpu.py +3 -8
- ultralytics/utils/dist.py +9 -12
- ultralytics/utils/downloads.py +25 -33
- ultralytics/utils/errors.py +6 -14
- ultralytics/utils/events.py +2 -4
- ultralytics/utils/export/__init__.py +4 -236
- ultralytics/utils/export/engine.py +246 -0
- ultralytics/utils/export/imx.py +117 -63
- ultralytics/utils/export/tensorflow.py +231 -0
- ultralytics/utils/files.py +26 -30
- ultralytics/utils/git.py +9 -11
- ultralytics/utils/instance.py +30 -51
- ultralytics/utils/logger.py +212 -114
- ultralytics/utils/loss.py +601 -215
- ultralytics/utils/metrics.py +128 -156
- ultralytics/utils/nms.py +13 -16
- ultralytics/utils/ops.py +117 -166
- ultralytics/utils/patches.py +75 -21
- ultralytics/utils/plotting.py +75 -80
- ultralytics/utils/tal.py +125 -59
- ultralytics/utils/torch_utils.py +53 -79
- ultralytics/utils/tqdm.py +24 -21
- ultralytics/utils/triton.py +13 -19
- ultralytics/utils/tuner.py +19 -10
- dgenerate_ultralytics_headless-8.3.214.dist-info/RECORD +0 -283
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/top_level.txt +0 -0
ultralytics/nn/modules/block.py
CHANGED
|
@@ -13,58 +13,56 @@ from .conv import Conv, DWConv, GhostConv, LightConv, RepConv, autopad
|
|
|
13
13
|
from .transformer import TransformerBlock
|
|
14
14
|
|
|
15
15
|
__all__ = (
|
|
16
|
-
"DFL",
|
|
17
|
-
"HGBlock",
|
|
18
|
-
"HGStem",
|
|
19
|
-
"SPP",
|
|
20
|
-
"SPPF",
|
|
21
16
|
"C1",
|
|
22
17
|
"C2",
|
|
18
|
+
"C2PSA",
|
|
23
19
|
"C3",
|
|
20
|
+
"C3TR",
|
|
21
|
+
"CIB",
|
|
22
|
+
"DFL",
|
|
23
|
+
"ELAN1",
|
|
24
|
+
"PSA",
|
|
25
|
+
"SPP",
|
|
26
|
+
"SPPELAN",
|
|
27
|
+
"SPPF",
|
|
28
|
+
"AConv",
|
|
29
|
+
"ADown",
|
|
30
|
+
"Attention",
|
|
31
|
+
"BNContrastiveHead",
|
|
32
|
+
"Bottleneck",
|
|
33
|
+
"BottleneckCSP",
|
|
24
34
|
"C2f",
|
|
25
35
|
"C2fAttn",
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"BNContrastiveHead",
|
|
29
|
-
"C3x",
|
|
30
|
-
"C3TR",
|
|
36
|
+
"C2fCIB",
|
|
37
|
+
"C2fPSA",
|
|
31
38
|
"C3Ghost",
|
|
39
|
+
"C3k2",
|
|
40
|
+
"C3x",
|
|
41
|
+
"CBFuse",
|
|
42
|
+
"CBLinear",
|
|
43
|
+
"ContrastiveHead",
|
|
32
44
|
"GhostBottleneck",
|
|
33
|
-
"
|
|
34
|
-
"
|
|
45
|
+
"HGBlock",
|
|
46
|
+
"HGStem",
|
|
47
|
+
"ImagePoolingAttn",
|
|
35
48
|
"Proto",
|
|
36
49
|
"RepC3",
|
|
37
|
-
"ResNetLayer",
|
|
38
50
|
"RepNCSPELAN4",
|
|
39
|
-
"ELAN1",
|
|
40
|
-
"ADown",
|
|
41
|
-
"AConv",
|
|
42
|
-
"SPPELAN",
|
|
43
|
-
"CBFuse",
|
|
44
|
-
"CBLinear",
|
|
45
|
-
"C3k2",
|
|
46
|
-
"C2fPSA",
|
|
47
|
-
"C2PSA",
|
|
48
51
|
"RepVGGDW",
|
|
49
|
-
"
|
|
50
|
-
"C2fCIB",
|
|
51
|
-
"Attention",
|
|
52
|
-
"PSA",
|
|
52
|
+
"ResNetLayer",
|
|
53
53
|
"SCDown",
|
|
54
54
|
"TorchVision",
|
|
55
55
|
)
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
class DFL(nn.Module):
|
|
59
|
-
"""
|
|
60
|
-
Integral module of Distribution Focal Loss (DFL).
|
|
59
|
+
"""Integral module of Distribution Focal Loss (DFL).
|
|
61
60
|
|
|
62
61
|
Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
|
|
63
62
|
"""
|
|
64
63
|
|
|
65
64
|
def __init__(self, c1: int = 16):
|
|
66
|
-
"""
|
|
67
|
-
Initialize a convolutional layer with a given number of input channels.
|
|
65
|
+
"""Initialize a convolutional layer with a given number of input channels.
|
|
68
66
|
|
|
69
67
|
Args:
|
|
70
68
|
c1 (int): Number of input channels.
|
|
@@ -86,8 +84,7 @@ class Proto(nn.Module):
|
|
|
86
84
|
"""Ultralytics YOLO models mask Proto module for segmentation models."""
|
|
87
85
|
|
|
88
86
|
def __init__(self, c1: int, c_: int = 256, c2: int = 32):
|
|
89
|
-
"""
|
|
90
|
-
Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
|
|
87
|
+
"""Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
|
|
91
88
|
|
|
92
89
|
Args:
|
|
93
90
|
c1 (int): Input channels.
|
|
@@ -106,15 +103,13 @@ class Proto(nn.Module):
|
|
|
106
103
|
|
|
107
104
|
|
|
108
105
|
class HGStem(nn.Module):
|
|
109
|
-
"""
|
|
110
|
-
StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
|
|
106
|
+
"""StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
|
|
111
107
|
|
|
112
108
|
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
|
|
113
109
|
"""
|
|
114
110
|
|
|
115
111
|
def __init__(self, c1: int, cm: int, c2: int):
|
|
116
|
-
"""
|
|
117
|
-
Initialize the StemBlock of PPHGNetV2.
|
|
112
|
+
"""Initialize the StemBlock of PPHGNetV2.
|
|
118
113
|
|
|
119
114
|
Args:
|
|
120
115
|
c1 (int): Input channels.
|
|
@@ -144,8 +139,7 @@ class HGStem(nn.Module):
|
|
|
144
139
|
|
|
145
140
|
|
|
146
141
|
class HGBlock(nn.Module):
|
|
147
|
-
"""
|
|
148
|
-
HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
|
|
142
|
+
"""HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
|
|
149
143
|
|
|
150
144
|
https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
|
|
151
145
|
"""
|
|
@@ -161,8 +155,7 @@ class HGBlock(nn.Module):
|
|
|
161
155
|
shortcut: bool = False,
|
|
162
156
|
act: nn.Module = nn.ReLU(),
|
|
163
157
|
):
|
|
164
|
-
"""
|
|
165
|
-
Initialize HGBlock with specified parameters.
|
|
158
|
+
"""Initialize HGBlock with specified parameters.
|
|
166
159
|
|
|
167
160
|
Args:
|
|
168
161
|
c1 (int): Input channels.
|
|
@@ -193,8 +186,7 @@ class SPP(nn.Module):
|
|
|
193
186
|
"""Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729."""
|
|
194
187
|
|
|
195
188
|
def __init__(self, c1: int, c2: int, k: tuple[int, ...] = (5, 9, 13)):
|
|
196
|
-
"""
|
|
197
|
-
Initialize the SPP layer with input/output channels and pooling kernel sizes.
|
|
189
|
+
"""Initialize the SPP layer with input/output channels and pooling kernel sizes.
|
|
198
190
|
|
|
199
191
|
Args:
|
|
200
192
|
c1 (int): Input channels.
|
|
@@ -216,37 +208,40 @@ class SPP(nn.Module):
|
|
|
216
208
|
class SPPF(nn.Module):
|
|
217
209
|
"""Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
|
|
218
210
|
|
|
219
|
-
def __init__(self, c1: int, c2: int, k: int = 5):
|
|
220
|
-
"""
|
|
221
|
-
Initialize the SPPF layer with given input/output channels and kernel size.
|
|
211
|
+
def __init__(self, c1: int, c2: int, k: int = 5, n: int = 3, shortcut: bool = False):
|
|
212
|
+
"""Initialize the SPPF layer with given input/output channels and kernel size.
|
|
222
213
|
|
|
223
214
|
Args:
|
|
224
215
|
c1 (int): Input channels.
|
|
225
216
|
c2 (int): Output channels.
|
|
226
217
|
k (int): Kernel size.
|
|
218
|
+
n (int): Number of pooling iterations.
|
|
219
|
+
shortcut (bool): Whether to use shortcut connection.
|
|
227
220
|
|
|
228
221
|
Notes:
|
|
229
222
|
This module is equivalent to SPP(k=(5, 9, 13)).
|
|
230
223
|
"""
|
|
231
224
|
super().__init__()
|
|
232
225
|
c_ = c1 // 2 # hidden channels
|
|
233
|
-
self.cv1 = Conv(c1, c_, 1, 1)
|
|
234
|
-
self.cv2 = Conv(c_ *
|
|
226
|
+
self.cv1 = Conv(c1, c_, 1, 1, act=False)
|
|
227
|
+
self.cv2 = Conv(c_ * (n + 1), c2, 1, 1)
|
|
235
228
|
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
|
|
229
|
+
self.n = n
|
|
230
|
+
self.add = shortcut and c1 == c2
|
|
236
231
|
|
|
237
232
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
238
233
|
"""Apply sequential pooling operations to input and return concatenated feature maps."""
|
|
239
234
|
y = [self.cv1(x)]
|
|
240
|
-
y.extend(self.m(y[-1]) for _ in range(3))
|
|
241
|
-
|
|
235
|
+
y.extend(self.m(y[-1]) for _ in range(getattr(self, "n", 3)))
|
|
236
|
+
y = self.cv2(torch.cat(y, 1))
|
|
237
|
+
return y + x if getattr(self, "add", False) else y
|
|
242
238
|
|
|
243
239
|
|
|
244
240
|
class C1(nn.Module):
|
|
245
241
|
"""CSP Bottleneck with 1 convolution."""
|
|
246
242
|
|
|
247
243
|
def __init__(self, c1: int, c2: int, n: int = 1):
|
|
248
|
-
"""
|
|
249
|
-
Initialize the CSP Bottleneck with 1 convolution.
|
|
244
|
+
"""Initialize the CSP Bottleneck with 1 convolution.
|
|
250
245
|
|
|
251
246
|
Args:
|
|
252
247
|
c1 (int): Input channels.
|
|
@@ -267,8 +262,7 @@ class C2(nn.Module):
|
|
|
267
262
|
"""CSP Bottleneck with 2 convolutions."""
|
|
268
263
|
|
|
269
264
|
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
|
270
|
-
"""
|
|
271
|
-
Initialize a CSP Bottleneck with 2 convolutions.
|
|
265
|
+
"""Initialize a CSP Bottleneck with 2 convolutions.
|
|
272
266
|
|
|
273
267
|
Args:
|
|
274
268
|
c1 (int): Input channels.
|
|
@@ -295,8 +289,7 @@ class C2f(nn.Module):
|
|
|
295
289
|
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
|
|
296
290
|
|
|
297
291
|
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = False, g: int = 1, e: float = 0.5):
|
|
298
|
-
"""
|
|
299
|
-
Initialize a CSP bottleneck with 2 convolutions.
|
|
292
|
+
"""Initialize a CSP bottleneck with 2 convolutions.
|
|
300
293
|
|
|
301
294
|
Args:
|
|
302
295
|
c1 (int): Input channels.
|
|
@@ -330,8 +323,7 @@ class C3(nn.Module):
|
|
|
330
323
|
"""CSP Bottleneck with 3 convolutions."""
|
|
331
324
|
|
|
332
325
|
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
|
333
|
-
"""
|
|
334
|
-
Initialize the CSP Bottleneck with 3 convolutions.
|
|
326
|
+
"""Initialize the CSP Bottleneck with 3 convolutions.
|
|
335
327
|
|
|
336
328
|
Args:
|
|
337
329
|
c1 (int): Input channels.
|
|
@@ -357,8 +349,7 @@ class C3x(C3):
|
|
|
357
349
|
"""C3 module with cross-convolutions."""
|
|
358
350
|
|
|
359
351
|
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
|
360
|
-
"""
|
|
361
|
-
Initialize C3 module with cross-convolutions.
|
|
352
|
+
"""Initialize C3 module with cross-convolutions.
|
|
362
353
|
|
|
363
354
|
Args:
|
|
364
355
|
c1 (int): Input channels.
|
|
@@ -377,8 +368,7 @@ class RepC3(nn.Module):
|
|
|
377
368
|
"""Rep C3."""
|
|
378
369
|
|
|
379
370
|
def __init__(self, c1: int, c2: int, n: int = 3, e: float = 1.0):
|
|
380
|
-
"""
|
|
381
|
-
Initialize CSP Bottleneck with a single convolution.
|
|
371
|
+
"""Initialize CSP Bottleneck with a single convolution.
|
|
382
372
|
|
|
383
373
|
Args:
|
|
384
374
|
c1 (int): Input channels.
|
|
@@ -402,8 +392,7 @@ class C3TR(C3):
|
|
|
402
392
|
"""C3 module with TransformerBlock()."""
|
|
403
393
|
|
|
404
394
|
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
|
405
|
-
"""
|
|
406
|
-
Initialize C3 module with TransformerBlock.
|
|
395
|
+
"""Initialize C3 module with TransformerBlock.
|
|
407
396
|
|
|
408
397
|
Args:
|
|
409
398
|
c1 (int): Input channels.
|
|
@@ -422,8 +411,7 @@ class C3Ghost(C3):
|
|
|
422
411
|
"""C3 module with GhostBottleneck()."""
|
|
423
412
|
|
|
424
413
|
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
|
425
|
-
"""
|
|
426
|
-
Initialize C3 module with GhostBottleneck.
|
|
414
|
+
"""Initialize C3 module with GhostBottleneck.
|
|
427
415
|
|
|
428
416
|
Args:
|
|
429
417
|
c1 (int): Input channels.
|
|
@@ -442,8 +430,7 @@ class GhostBottleneck(nn.Module):
|
|
|
442
430
|
"""Ghost Bottleneck https://github.com/huawei-noah/Efficient-AI-Backbones."""
|
|
443
431
|
|
|
444
432
|
def __init__(self, c1: int, c2: int, k: int = 3, s: int = 1):
|
|
445
|
-
"""
|
|
446
|
-
Initialize Ghost Bottleneck module.
|
|
433
|
+
"""Initialize Ghost Bottleneck module.
|
|
447
434
|
|
|
448
435
|
Args:
|
|
449
436
|
c1 (int): Input channels.
|
|
@@ -473,8 +460,7 @@ class Bottleneck(nn.Module):
|
|
|
473
460
|
def __init__(
|
|
474
461
|
self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: tuple[int, int] = (3, 3), e: float = 0.5
|
|
475
462
|
):
|
|
476
|
-
"""
|
|
477
|
-
Initialize a standard bottleneck module.
|
|
463
|
+
"""Initialize a standard bottleneck module.
|
|
478
464
|
|
|
479
465
|
Args:
|
|
480
466
|
c1 (int): Input channels.
|
|
@@ -499,8 +485,7 @@ class BottleneckCSP(nn.Module):
|
|
|
499
485
|
"""CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks."""
|
|
500
486
|
|
|
501
487
|
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
|
502
|
-
"""
|
|
503
|
-
Initialize CSP Bottleneck.
|
|
488
|
+
"""Initialize CSP Bottleneck.
|
|
504
489
|
|
|
505
490
|
Args:
|
|
506
491
|
c1 (int): Input channels.
|
|
@@ -531,8 +516,7 @@ class ResNetBlock(nn.Module):
|
|
|
531
516
|
"""ResNet block with standard convolution layers."""
|
|
532
517
|
|
|
533
518
|
def __init__(self, c1: int, c2: int, s: int = 1, e: int = 4):
|
|
534
|
-
"""
|
|
535
|
-
Initialize ResNet block.
|
|
519
|
+
"""Initialize ResNet block.
|
|
536
520
|
|
|
537
521
|
Args:
|
|
538
522
|
c1 (int): Input channels.
|
|
@@ -556,8 +540,7 @@ class ResNetLayer(nn.Module):
|
|
|
556
540
|
"""ResNet layer with multiple ResNet blocks."""
|
|
557
541
|
|
|
558
542
|
def __init__(self, c1: int, c2: int, s: int = 1, is_first: bool = False, n: int = 1, e: int = 4):
|
|
559
|
-
"""
|
|
560
|
-
Initialize ResNet layer.
|
|
543
|
+
"""Initialize ResNet layer.
|
|
561
544
|
|
|
562
545
|
Args:
|
|
563
546
|
c1 (int): Input channels.
|
|
@@ -588,8 +571,7 @@ class MaxSigmoidAttnBlock(nn.Module):
|
|
|
588
571
|
"""Max Sigmoid attention block."""
|
|
589
572
|
|
|
590
573
|
def __init__(self, c1: int, c2: int, nh: int = 1, ec: int = 128, gc: int = 512, scale: bool = False):
|
|
591
|
-
"""
|
|
592
|
-
Initialize MaxSigmoidAttnBlock.
|
|
574
|
+
"""Initialize MaxSigmoidAttnBlock.
|
|
593
575
|
|
|
594
576
|
Args:
|
|
595
577
|
c1 (int): Input channels.
|
|
@@ -609,8 +591,7 @@ class MaxSigmoidAttnBlock(nn.Module):
|
|
|
609
591
|
self.scale = nn.Parameter(torch.ones(1, nh, 1, 1)) if scale else 1.0
|
|
610
592
|
|
|
611
593
|
def forward(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
|
|
612
|
-
"""
|
|
613
|
-
Forward pass of MaxSigmoidAttnBlock.
|
|
594
|
+
"""Forward pass of MaxSigmoidAttnBlock.
|
|
614
595
|
|
|
615
596
|
Args:
|
|
616
597
|
x (torch.Tensor): Input tensor.
|
|
@@ -653,8 +634,7 @@ class C2fAttn(nn.Module):
|
|
|
653
634
|
g: int = 1,
|
|
654
635
|
e: float = 0.5,
|
|
655
636
|
):
|
|
656
|
-
"""
|
|
657
|
-
Initialize C2f module with attention mechanism.
|
|
637
|
+
"""Initialize C2f module with attention mechanism.
|
|
658
638
|
|
|
659
639
|
Args:
|
|
660
640
|
c1 (int): Input channels.
|
|
@@ -675,8 +655,7 @@ class C2fAttn(nn.Module):
|
|
|
675
655
|
self.attn = MaxSigmoidAttnBlock(self.c, self.c, gc=gc, ec=ec, nh=nh)
|
|
676
656
|
|
|
677
657
|
def forward(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
|
|
678
|
-
"""
|
|
679
|
-
Forward pass through C2f layer with attention.
|
|
658
|
+
"""Forward pass through C2f layer with attention.
|
|
680
659
|
|
|
681
660
|
Args:
|
|
682
661
|
x (torch.Tensor): Input tensor.
|
|
@@ -691,8 +670,7 @@ class C2fAttn(nn.Module):
|
|
|
691
670
|
return self.cv2(torch.cat(y, 1))
|
|
692
671
|
|
|
693
672
|
def forward_split(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
|
|
694
|
-
"""
|
|
695
|
-
Forward pass using split() instead of chunk().
|
|
673
|
+
"""Forward pass using split() instead of chunk().
|
|
696
674
|
|
|
697
675
|
Args:
|
|
698
676
|
x (torch.Tensor): Input tensor.
|
|
@@ -713,8 +691,7 @@ class ImagePoolingAttn(nn.Module):
|
|
|
713
691
|
def __init__(
|
|
714
692
|
self, ec: int = 256, ch: tuple[int, ...] = (), ct: int = 512, nh: int = 8, k: int = 3, scale: bool = False
|
|
715
693
|
):
|
|
716
|
-
"""
|
|
717
|
-
Initialize ImagePoolingAttn module.
|
|
694
|
+
"""Initialize ImagePoolingAttn module.
|
|
718
695
|
|
|
719
696
|
Args:
|
|
720
697
|
ec (int): Embedding channels.
|
|
@@ -741,8 +718,7 @@ class ImagePoolingAttn(nn.Module):
|
|
|
741
718
|
self.k = k
|
|
742
719
|
|
|
743
720
|
def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> torch.Tensor:
|
|
744
|
-
"""
|
|
745
|
-
Forward pass of ImagePoolingAttn.
|
|
721
|
+
"""Forward pass of ImagePoolingAttn.
|
|
746
722
|
|
|
747
723
|
Args:
|
|
748
724
|
x (list[torch.Tensor]): List of input feature maps.
|
|
@@ -785,8 +761,7 @@ class ContrastiveHead(nn.Module):
|
|
|
785
761
|
self.logit_scale = nn.Parameter(torch.ones([]) * torch.tensor(1 / 0.07).log())
|
|
786
762
|
|
|
787
763
|
def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
|
|
788
|
-
"""
|
|
789
|
-
Forward function of contrastive learning.
|
|
764
|
+
"""Forward function of contrastive learning.
|
|
790
765
|
|
|
791
766
|
Args:
|
|
792
767
|
x (torch.Tensor): Image features.
|
|
@@ -802,16 +777,14 @@ class ContrastiveHead(nn.Module):
|
|
|
802
777
|
|
|
803
778
|
|
|
804
779
|
class BNContrastiveHead(nn.Module):
|
|
805
|
-
"""
|
|
806
|
-
Batch Norm Contrastive Head using batch norm instead of l2-normalization.
|
|
780
|
+
"""Batch Norm Contrastive Head using batch norm instead of l2-normalization.
|
|
807
781
|
|
|
808
782
|
Args:
|
|
809
783
|
embed_dims (int): Embed dimensions of text and image features.
|
|
810
784
|
"""
|
|
811
785
|
|
|
812
786
|
def __init__(self, embed_dims: int):
|
|
813
|
-
"""
|
|
814
|
-
Initialize BNContrastiveHead.
|
|
787
|
+
"""Initialize BNContrastiveHead.
|
|
815
788
|
|
|
816
789
|
Args:
|
|
817
790
|
embed_dims (int): Embedding dimensions for features.
|
|
@@ -830,13 +803,13 @@ class BNContrastiveHead(nn.Module):
|
|
|
830
803
|
del self.logit_scale
|
|
831
804
|
self.forward = self.forward_fuse
|
|
832
805
|
|
|
833
|
-
|
|
806
|
+
@staticmethod
|
|
807
|
+
def forward_fuse(x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
|
|
834
808
|
"""Passes input out unchanged."""
|
|
835
809
|
return x
|
|
836
810
|
|
|
837
811
|
def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
|
|
838
|
-
"""
|
|
839
|
-
Forward function of contrastive learning with batch normalization.
|
|
812
|
+
"""Forward function of contrastive learning with batch normalization.
|
|
840
813
|
|
|
841
814
|
Args:
|
|
842
815
|
x (torch.Tensor): Image features.
|
|
@@ -858,8 +831,7 @@ class RepBottleneck(Bottleneck):
|
|
|
858
831
|
def __init__(
|
|
859
832
|
self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: tuple[int, int] = (3, 3), e: float = 0.5
|
|
860
833
|
):
|
|
861
|
-
"""
|
|
862
|
-
Initialize RepBottleneck.
|
|
834
|
+
"""Initialize RepBottleneck.
|
|
863
835
|
|
|
864
836
|
Args:
|
|
865
837
|
c1 (int): Input channels.
|
|
@@ -878,8 +850,7 @@ class RepCSP(C3):
|
|
|
878
850
|
"""Repeatable Cross Stage Partial Network (RepCSP) module for efficient feature extraction."""
|
|
879
851
|
|
|
880
852
|
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
|
|
881
|
-
"""
|
|
882
|
-
Initialize RepCSP layer.
|
|
853
|
+
"""Initialize RepCSP layer.
|
|
883
854
|
|
|
884
855
|
Args:
|
|
885
856
|
c1 (int): Input channels.
|
|
@@ -898,8 +869,7 @@ class RepNCSPELAN4(nn.Module):
|
|
|
898
869
|
"""CSP-ELAN."""
|
|
899
870
|
|
|
900
871
|
def __init__(self, c1: int, c2: int, c3: int, c4: int, n: int = 1):
|
|
901
|
-
"""
|
|
902
|
-
Initialize CSP-ELAN layer.
|
|
872
|
+
"""Initialize CSP-ELAN layer.
|
|
903
873
|
|
|
904
874
|
Args:
|
|
905
875
|
c1 (int): Input channels.
|
|
@@ -932,8 +902,7 @@ class ELAN1(RepNCSPELAN4):
|
|
|
932
902
|
"""ELAN1 module with 4 convolutions."""
|
|
933
903
|
|
|
934
904
|
def __init__(self, c1: int, c2: int, c3: int, c4: int):
|
|
935
|
-
"""
|
|
936
|
-
Initialize ELAN1 layer.
|
|
905
|
+
"""Initialize ELAN1 layer.
|
|
937
906
|
|
|
938
907
|
Args:
|
|
939
908
|
c1 (int): Input channels.
|
|
@@ -953,8 +922,7 @@ class AConv(nn.Module):
|
|
|
953
922
|
"""AConv."""
|
|
954
923
|
|
|
955
924
|
def __init__(self, c1: int, c2: int):
|
|
956
|
-
"""
|
|
957
|
-
Initialize AConv module.
|
|
925
|
+
"""Initialize AConv module.
|
|
958
926
|
|
|
959
927
|
Args:
|
|
960
928
|
c1 (int): Input channels.
|
|
@@ -973,8 +941,7 @@ class ADown(nn.Module):
|
|
|
973
941
|
"""ADown."""
|
|
974
942
|
|
|
975
943
|
def __init__(self, c1: int, c2: int):
|
|
976
|
-
"""
|
|
977
|
-
Initialize ADown module.
|
|
944
|
+
"""Initialize ADown module.
|
|
978
945
|
|
|
979
946
|
Args:
|
|
980
947
|
c1 (int): Input channels.
|
|
@@ -999,8 +966,7 @@ class SPPELAN(nn.Module):
|
|
|
999
966
|
"""SPP-ELAN."""
|
|
1000
967
|
|
|
1001
968
|
def __init__(self, c1: int, c2: int, c3: int, k: int = 5):
|
|
1002
|
-
"""
|
|
1003
|
-
Initialize SPP-ELAN block.
|
|
969
|
+
"""Initialize SPP-ELAN block.
|
|
1004
970
|
|
|
1005
971
|
Args:
|
|
1006
972
|
c1 (int): Input channels.
|
|
@@ -1027,8 +993,7 @@ class CBLinear(nn.Module):
|
|
|
1027
993
|
"""CBLinear."""
|
|
1028
994
|
|
|
1029
995
|
def __init__(self, c1: int, c2s: list[int], k: int = 1, s: int = 1, p: int | None = None, g: int = 1):
|
|
1030
|
-
"""
|
|
1031
|
-
Initialize CBLinear module.
|
|
996
|
+
"""Initialize CBLinear module.
|
|
1032
997
|
|
|
1033
998
|
Args:
|
|
1034
999
|
c1 (int): Input channels.
|
|
@@ -1051,8 +1016,7 @@ class CBFuse(nn.Module):
|
|
|
1051
1016
|
"""CBFuse."""
|
|
1052
1017
|
|
|
1053
1018
|
def __init__(self, idx: list[int]):
|
|
1054
|
-
"""
|
|
1055
|
-
Initialize CBFuse module.
|
|
1019
|
+
"""Initialize CBFuse module.
|
|
1056
1020
|
|
|
1057
1021
|
Args:
|
|
1058
1022
|
idx (list[int]): Indices for feature selection.
|
|
@@ -1061,8 +1025,7 @@ class CBFuse(nn.Module):
|
|
|
1061
1025
|
self.idx = idx
|
|
1062
1026
|
|
|
1063
1027
|
def forward(self, xs: list[torch.Tensor]) -> torch.Tensor:
|
|
1064
|
-
"""
|
|
1065
|
-
Forward pass through CBFuse layer.
|
|
1028
|
+
"""Forward pass through CBFuse layer.
|
|
1066
1029
|
|
|
1067
1030
|
Args:
|
|
1068
1031
|
xs (list[torch.Tensor]): List of input tensors.
|
|
@@ -1079,8 +1042,7 @@ class C3f(nn.Module):
|
|
|
1079
1042
|
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
|
|
1080
1043
|
|
|
1081
1044
|
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = False, g: int = 1, e: float = 0.5):
|
|
1082
|
-
"""
|
|
1083
|
-
Initialize CSP bottleneck layer with two convolutions.
|
|
1045
|
+
"""Initialize CSP bottleneck layer with two convolutions.
|
|
1084
1046
|
|
|
1085
1047
|
Args:
|
|
1086
1048
|
c1 (int): Input channels.
|
|
@@ -1108,10 +1070,17 @@ class C3k2(C2f):
|
|
|
1108
1070
|
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
|
|
1109
1071
|
|
|
1110
1072
|
def __init__(
|
|
1111
|
-
self,
|
|
1073
|
+
self,
|
|
1074
|
+
c1: int,
|
|
1075
|
+
c2: int,
|
|
1076
|
+
n: int = 1,
|
|
1077
|
+
c3k: bool = False,
|
|
1078
|
+
e: float = 0.5,
|
|
1079
|
+
attn: bool = False,
|
|
1080
|
+
g: int = 1,
|
|
1081
|
+
shortcut: bool = True,
|
|
1112
1082
|
):
|
|
1113
|
-
"""
|
|
1114
|
-
Initialize C3k2 module.
|
|
1083
|
+
"""Initialize C3k2 module.
|
|
1115
1084
|
|
|
1116
1085
|
Args:
|
|
1117
1086
|
c1 (int): Input channels.
|
|
@@ -1119,12 +1088,21 @@ class C3k2(C2f):
|
|
|
1119
1088
|
n (int): Number of blocks.
|
|
1120
1089
|
c3k (bool): Whether to use C3k blocks.
|
|
1121
1090
|
e (float): Expansion ratio.
|
|
1091
|
+
attn (bool): Whether to use attention blocks.
|
|
1122
1092
|
g (int): Groups for convolutions.
|
|
1123
1093
|
shortcut (bool): Whether to use shortcut connections.
|
|
1124
1094
|
"""
|
|
1125
1095
|
super().__init__(c1, c2, n, shortcut, g, e)
|
|
1126
1096
|
self.m = nn.ModuleList(
|
|
1127
|
-
|
|
1097
|
+
nn.Sequential(
|
|
1098
|
+
Bottleneck(self.c, self.c, shortcut, g),
|
|
1099
|
+
PSABlock(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1)),
|
|
1100
|
+
)
|
|
1101
|
+
if attn
|
|
1102
|
+
else C3k(self.c, self.c, 2, shortcut, g)
|
|
1103
|
+
if c3k
|
|
1104
|
+
else Bottleneck(self.c, self.c, shortcut, g)
|
|
1105
|
+
for _ in range(n)
|
|
1128
1106
|
)
|
|
1129
1107
|
|
|
1130
1108
|
|
|
@@ -1132,8 +1110,7 @@ class C3k(C3):
|
|
|
1132
1110
|
"""C3k is a CSP bottleneck module with customizable kernel sizes for feature extraction in neural networks."""
|
|
1133
1111
|
|
|
1134
1112
|
def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5, k: int = 3):
|
|
1135
|
-
"""
|
|
1136
|
-
Initialize C3k module.
|
|
1113
|
+
"""Initialize C3k module.
|
|
1137
1114
|
|
|
1138
1115
|
Args:
|
|
1139
1116
|
c1 (int): Input channels.
|
|
@@ -1154,8 +1131,7 @@ class RepVGGDW(torch.nn.Module):
|
|
|
1154
1131
|
"""RepVGGDW is a class that represents a depth wise separable convolutional block in RepVGG architecture."""
|
|
1155
1132
|
|
|
1156
1133
|
def __init__(self, ed: int) -> None:
|
|
1157
|
-
"""
|
|
1158
|
-
Initialize RepVGGDW module.
|
|
1134
|
+
"""Initialize RepVGGDW module.
|
|
1159
1135
|
|
|
1160
1136
|
Args:
|
|
1161
1137
|
ed (int): Input and output channels.
|
|
@@ -1167,8 +1143,7 @@ class RepVGGDW(torch.nn.Module):
|
|
|
1167
1143
|
self.act = nn.SiLU()
|
|
1168
1144
|
|
|
1169
1145
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
1170
|
-
"""
|
|
1171
|
-
Perform a forward pass of the RepVGGDW block.
|
|
1146
|
+
"""Perform a forward pass of the RepVGGDW block.
|
|
1172
1147
|
|
|
1173
1148
|
Args:
|
|
1174
1149
|
x (torch.Tensor): Input tensor.
|
|
@@ -1179,8 +1154,7 @@ class RepVGGDW(torch.nn.Module):
|
|
|
1179
1154
|
return self.act(self.conv(x) + self.conv1(x))
|
|
1180
1155
|
|
|
1181
1156
|
def forward_fuse(self, x: torch.Tensor) -> torch.Tensor:
|
|
1182
|
-
"""
|
|
1183
|
-
Perform a forward pass of the RepVGGDW block without fusing the convolutions.
|
|
1157
|
+
"""Perform a forward pass of the RepVGGDW block without fusing the convolutions.
|
|
1184
1158
|
|
|
1185
1159
|
Args:
|
|
1186
1160
|
x (torch.Tensor): Input tensor.
|
|
@@ -1192,11 +1166,12 @@ class RepVGGDW(torch.nn.Module):
|
|
|
1192
1166
|
|
|
1193
1167
|
@torch.no_grad()
|
|
1194
1168
|
def fuse(self):
|
|
1195
|
-
"""
|
|
1196
|
-
Fuse the convolutional layers in the RepVGGDW block.
|
|
1169
|
+
"""Fuse the convolutional layers in the RepVGGDW block.
|
|
1197
1170
|
|
|
1198
1171
|
This method fuses the convolutional layers and updates the weights and biases accordingly.
|
|
1199
1172
|
"""
|
|
1173
|
+
if not hasattr(self, "conv1"):
|
|
1174
|
+
return # already fused
|
|
1200
1175
|
conv = fuse_conv_and_bn(self.conv.conv, self.conv.bn)
|
|
1201
1176
|
conv1 = fuse_conv_and_bn(self.conv1.conv, self.conv1.bn)
|
|
1202
1177
|
|
|
@@ -1218,8 +1193,7 @@ class RepVGGDW(torch.nn.Module):
|
|
|
1218
1193
|
|
|
1219
1194
|
|
|
1220
1195
|
class CIB(nn.Module):
|
|
1221
|
-
"""
|
|
1222
|
-
Conditional Identity Block (CIB) module.
|
|
1196
|
+
"""Compact Inverted Block (CIB) module.
|
|
1223
1197
|
|
|
1224
1198
|
Args:
|
|
1225
1199
|
c1 (int): Number of input channels.
|
|
@@ -1230,8 +1204,7 @@ class CIB(nn.Module):
|
|
|
1230
1204
|
"""
|
|
1231
1205
|
|
|
1232
1206
|
def __init__(self, c1: int, c2: int, shortcut: bool = True, e: float = 0.5, lk: bool = False):
|
|
1233
|
-
"""
|
|
1234
|
-
Initialize the CIB module.
|
|
1207
|
+
"""Initialize the CIB module.
|
|
1235
1208
|
|
|
1236
1209
|
Args:
|
|
1237
1210
|
c1 (int): Input channels.
|
|
@@ -1253,8 +1226,7 @@ class CIB(nn.Module):
|
|
|
1253
1226
|
self.add = shortcut and c1 == c2
|
|
1254
1227
|
|
|
1255
1228
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
1256
|
-
"""
|
|
1257
|
-
Forward pass of the CIB module.
|
|
1229
|
+
"""Forward pass of the CIB module.
|
|
1258
1230
|
|
|
1259
1231
|
Args:
|
|
1260
1232
|
x (torch.Tensor): Input tensor.
|
|
@@ -1266,15 +1238,14 @@ class CIB(nn.Module):
|
|
|
1266
1238
|
|
|
1267
1239
|
|
|
1268
1240
|
class C2fCIB(C2f):
|
|
1269
|
-
"""
|
|
1270
|
-
C2fCIB class represents a convolutional block with C2f and CIB modules.
|
|
1241
|
+
"""C2fCIB class represents a convolutional block with C2f and CIB modules.
|
|
1271
1242
|
|
|
1272
1243
|
Args:
|
|
1273
1244
|
c1 (int): Number of input channels.
|
|
1274
1245
|
c2 (int): Number of output channels.
|
|
1275
1246
|
n (int, optional): Number of CIB modules to stack. Defaults to 1.
|
|
1276
1247
|
shortcut (bool, optional): Whether to use shortcut connection. Defaults to False.
|
|
1277
|
-
lk (bool, optional): Whether to use
|
|
1248
|
+
lk (bool, optional): Whether to use large kernel. Defaults to False.
|
|
1278
1249
|
g (int, optional): Number of groups for grouped convolution. Defaults to 1.
|
|
1279
1250
|
e (float, optional): Expansion ratio for CIB modules. Defaults to 0.5.
|
|
1280
1251
|
"""
|
|
@@ -1282,15 +1253,14 @@ class C2fCIB(C2f):
|
|
|
1282
1253
|
def __init__(
|
|
1283
1254
|
self, c1: int, c2: int, n: int = 1, shortcut: bool = False, lk: bool = False, g: int = 1, e: float = 0.5
|
|
1284
1255
|
):
|
|
1285
|
-
"""
|
|
1286
|
-
Initialize C2fCIB module.
|
|
1256
|
+
"""Initialize C2fCIB module.
|
|
1287
1257
|
|
|
1288
1258
|
Args:
|
|
1289
1259
|
c1 (int): Input channels.
|
|
1290
1260
|
c2 (int): Output channels.
|
|
1291
1261
|
n (int): Number of CIB modules.
|
|
1292
1262
|
shortcut (bool): Whether to use shortcut connection.
|
|
1293
|
-
lk (bool): Whether to use
|
|
1263
|
+
lk (bool): Whether to use large kernel.
|
|
1294
1264
|
g (int): Groups for convolutions.
|
|
1295
1265
|
e (float): Expansion ratio.
|
|
1296
1266
|
"""
|
|
@@ -1299,8 +1269,7 @@ class C2fCIB(C2f):
|
|
|
1299
1269
|
|
|
1300
1270
|
|
|
1301
1271
|
class Attention(nn.Module):
|
|
1302
|
-
"""
|
|
1303
|
-
Attention module that performs self-attention on the input tensor.
|
|
1272
|
+
"""Attention module that performs self-attention on the input tensor.
|
|
1304
1273
|
|
|
1305
1274
|
Args:
|
|
1306
1275
|
dim (int): The input tensor dimension.
|
|
@@ -1318,8 +1287,7 @@ class Attention(nn.Module):
|
|
|
1318
1287
|
"""
|
|
1319
1288
|
|
|
1320
1289
|
def __init__(self, dim: int, num_heads: int = 8, attn_ratio: float = 0.5):
|
|
1321
|
-
"""
|
|
1322
|
-
Initialize multi-head attention module.
|
|
1290
|
+
"""Initialize multi-head attention module.
|
|
1323
1291
|
|
|
1324
1292
|
Args:
|
|
1325
1293
|
dim (int): Input dimension.
|
|
@@ -1338,8 +1306,7 @@ class Attention(nn.Module):
|
|
|
1338
1306
|
self.pe = Conv(dim, dim, 3, 1, g=dim, act=False)
|
|
1339
1307
|
|
|
1340
1308
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
1341
|
-
"""
|
|
1342
|
-
Forward pass of the Attention module.
|
|
1309
|
+
"""Forward pass of the Attention module.
|
|
1343
1310
|
|
|
1344
1311
|
Args:
|
|
1345
1312
|
x (torch.Tensor): The input tensor.
|
|
@@ -1362,8 +1329,7 @@ class Attention(nn.Module):
|
|
|
1362
1329
|
|
|
1363
1330
|
|
|
1364
1331
|
class PSABlock(nn.Module):
|
|
1365
|
-
"""
|
|
1366
|
-
PSABlock class implementing a Position-Sensitive Attention block for neural networks.
|
|
1332
|
+
"""PSABlock class implementing a Position-Sensitive Attention block for neural networks.
|
|
1367
1333
|
|
|
1368
1334
|
This class encapsulates the functionality for applying multi-head attention and feed-forward neural network layers
|
|
1369
1335
|
with optional shortcut connections.
|
|
@@ -1384,8 +1350,7 @@ class PSABlock(nn.Module):
|
|
|
1384
1350
|
"""
|
|
1385
1351
|
|
|
1386
1352
|
def __init__(self, c: int, attn_ratio: float = 0.5, num_heads: int = 4, shortcut: bool = True) -> None:
|
|
1387
|
-
"""
|
|
1388
|
-
Initialize the PSABlock.
|
|
1353
|
+
"""Initialize the PSABlock.
|
|
1389
1354
|
|
|
1390
1355
|
Args:
|
|
1391
1356
|
c (int): Input and output channels.
|
|
@@ -1400,8 +1365,7 @@ class PSABlock(nn.Module):
|
|
|
1400
1365
|
self.add = shortcut
|
|
1401
1366
|
|
|
1402
1367
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
1403
|
-
"""
|
|
1404
|
-
Execute a forward pass through PSABlock.
|
|
1368
|
+
"""Execute a forward pass through PSABlock.
|
|
1405
1369
|
|
|
1406
1370
|
Args:
|
|
1407
1371
|
x (torch.Tensor): Input tensor.
|
|
@@ -1415,8 +1379,7 @@ class PSABlock(nn.Module):
|
|
|
1415
1379
|
|
|
1416
1380
|
|
|
1417
1381
|
class PSA(nn.Module):
|
|
1418
|
-
"""
|
|
1419
|
-
PSA class for implementing Position-Sensitive Attention in neural networks.
|
|
1382
|
+
"""PSA class for implementing Position-Sensitive Attention in neural networks.
|
|
1420
1383
|
|
|
1421
1384
|
This class encapsulates the functionality for applying position-sensitive attention and feed-forward networks to
|
|
1422
1385
|
input tensors, enhancing feature extraction and processing capabilities.
|
|
@@ -1439,8 +1402,7 @@ class PSA(nn.Module):
|
|
|
1439
1402
|
"""
|
|
1440
1403
|
|
|
1441
1404
|
def __init__(self, c1: int, c2: int, e: float = 0.5):
|
|
1442
|
-
"""
|
|
1443
|
-
Initialize PSA module.
|
|
1405
|
+
"""Initialize PSA module.
|
|
1444
1406
|
|
|
1445
1407
|
Args:
|
|
1446
1408
|
c1 (int): Input channels.
|
|
@@ -1453,12 +1415,11 @@ class PSA(nn.Module):
|
|
|
1453
1415
|
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
|
|
1454
1416
|
self.cv2 = Conv(2 * self.c, c1, 1)
|
|
1455
1417
|
|
|
1456
|
-
self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
|
|
1418
|
+
self.attn = Attention(self.c, attn_ratio=0.5, num_heads=max(self.c // 64, 1))
|
|
1457
1419
|
self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))
|
|
1458
1420
|
|
|
1459
1421
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
1460
|
-
"""
|
|
1461
|
-
Execute forward pass in PSA module.
|
|
1422
|
+
"""Execute forward pass in PSA module.
|
|
1462
1423
|
|
|
1463
1424
|
Args:
|
|
1464
1425
|
x (torch.Tensor): Input tensor.
|
|
@@ -1473,8 +1434,7 @@ class PSA(nn.Module):
|
|
|
1473
1434
|
|
|
1474
1435
|
|
|
1475
1436
|
class C2PSA(nn.Module):
|
|
1476
|
-
"""
|
|
1477
|
-
C2PSA module with attention mechanism for enhanced feature extraction and processing.
|
|
1437
|
+
"""C2PSA module with attention mechanism for enhanced feature extraction and processing.
|
|
1478
1438
|
|
|
1479
1439
|
This module implements a convolutional block with attention mechanisms to enhance feature extraction and processing
|
|
1480
1440
|
capabilities. It includes a series of PSABlock modules for self-attention and feed-forward operations.
|
|
@@ -1488,18 +1448,17 @@ class C2PSA(nn.Module):
|
|
|
1488
1448
|
Methods:
|
|
1489
1449
|
forward: Performs a forward pass through the C2PSA module, applying attention and feed-forward operations.
|
|
1490
1450
|
|
|
1491
|
-
Notes:
|
|
1492
|
-
This module essentially is the same as PSA module, but refactored to allow stacking more PSABlock modules.
|
|
1493
|
-
|
|
1494
1451
|
Examples:
|
|
1495
1452
|
>>> c2psa = C2PSA(c1=256, c2=256, n=3, e=0.5)
|
|
1496
1453
|
>>> input_tensor = torch.randn(1, 256, 64, 64)
|
|
1497
1454
|
>>> output_tensor = c2psa(input_tensor)
|
|
1455
|
+
|
|
1456
|
+
Notes:
|
|
1457
|
+
This module essentially is the same as PSA module, but refactored to allow stacking more PSABlock modules.
|
|
1498
1458
|
"""
|
|
1499
1459
|
|
|
1500
1460
|
def __init__(self, c1: int, c2: int, n: int = 1, e: float = 0.5):
|
|
1501
|
-
"""
|
|
1502
|
-
Initialize C2PSA module.
|
|
1461
|
+
"""Initialize C2PSA module.
|
|
1503
1462
|
|
|
1504
1463
|
Args:
|
|
1505
1464
|
c1 (int): Input channels.
|
|
@@ -1516,8 +1475,7 @@ class C2PSA(nn.Module):
|
|
|
1516
1475
|
self.m = nn.Sequential(*(PSABlock(self.c, attn_ratio=0.5, num_heads=self.c // 64) for _ in range(n)))
|
|
1517
1476
|
|
|
1518
1477
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
1519
|
-
"""
|
|
1520
|
-
Process the input tensor through a series of PSA blocks.
|
|
1478
|
+
"""Process the input tensor through a series of PSA blocks.
|
|
1521
1479
|
|
|
1522
1480
|
Args:
|
|
1523
1481
|
x (torch.Tensor): Input tensor.
|
|
@@ -1531,10 +1489,10 @@ class C2PSA(nn.Module):
|
|
|
1531
1489
|
|
|
1532
1490
|
|
|
1533
1491
|
class C2fPSA(C2f):
|
|
1534
|
-
"""
|
|
1535
|
-
C2fPSA module with enhanced feature extraction using PSA blocks.
|
|
1492
|
+
"""C2fPSA module with enhanced feature extraction using PSA blocks.
|
|
1536
1493
|
|
|
1537
|
-
This class extends the C2f module by incorporating PSA blocks for improved attention mechanisms and feature
|
|
1494
|
+
This class extends the C2f module by incorporating PSA blocks for improved attention mechanisms and feature
|
|
1495
|
+
extraction.
|
|
1538
1496
|
|
|
1539
1497
|
Attributes:
|
|
1540
1498
|
c (int): Number of hidden channels.
|
|
@@ -1556,8 +1514,7 @@ class C2fPSA(C2f):
|
|
|
1556
1514
|
"""
|
|
1557
1515
|
|
|
1558
1516
|
def __init__(self, c1: int, c2: int, n: int = 1, e: float = 0.5):
|
|
1559
|
-
"""
|
|
1560
|
-
Initialize C2fPSA module.
|
|
1517
|
+
"""Initialize C2fPSA module.
|
|
1561
1518
|
|
|
1562
1519
|
Args:
|
|
1563
1520
|
c1 (int): Input channels.
|
|
@@ -1571,8 +1528,7 @@ class C2fPSA(C2f):
|
|
|
1571
1528
|
|
|
1572
1529
|
|
|
1573
1530
|
class SCDown(nn.Module):
|
|
1574
|
-
"""
|
|
1575
|
-
SCDown module for downsampling with separable convolutions.
|
|
1531
|
+
"""SCDown module for downsampling with separable convolutions.
|
|
1576
1532
|
|
|
1577
1533
|
This module performs downsampling using a combination of pointwise and depthwise convolutions, which helps in
|
|
1578
1534
|
efficiently reducing the spatial dimensions of the input tensor while maintaining the channel information.
|
|
@@ -1595,8 +1551,7 @@ class SCDown(nn.Module):
|
|
|
1595
1551
|
"""
|
|
1596
1552
|
|
|
1597
1553
|
def __init__(self, c1: int, c2: int, k: int, s: int):
|
|
1598
|
-
"""
|
|
1599
|
-
Initialize SCDown module.
|
|
1554
|
+
"""Initialize SCDown module.
|
|
1600
1555
|
|
|
1601
1556
|
Args:
|
|
1602
1557
|
c1 (int): Input channels.
|
|
@@ -1609,8 +1564,7 @@ class SCDown(nn.Module):
|
|
|
1609
1564
|
self.cv2 = Conv(c2, c2, k=k, s=s, g=c2, act=False)
|
|
1610
1565
|
|
|
1611
1566
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
1612
|
-
"""
|
|
1613
|
-
Apply convolution and downsampling to the input tensor.
|
|
1567
|
+
"""Apply convolution and downsampling to the input tensor.
|
|
1614
1568
|
|
|
1615
1569
|
Args:
|
|
1616
1570
|
x (torch.Tensor): Input tensor.
|
|
@@ -1622,27 +1576,26 @@ class SCDown(nn.Module):
|
|
|
1622
1576
|
|
|
1623
1577
|
|
|
1624
1578
|
class TorchVision(nn.Module):
|
|
1625
|
-
"""
|
|
1626
|
-
TorchVision module to allow loading any torchvision model.
|
|
1579
|
+
"""TorchVision module to allow loading any torchvision model.
|
|
1627
1580
|
|
|
1628
|
-
This class provides a way to load a model from the torchvision library, optionally load pre-trained weights, and
|
|
1629
|
-
|
|
1630
|
-
Attributes:
|
|
1631
|
-
m (nn.Module): The loaded torchvision model, possibly truncated and unwrapped.
|
|
1581
|
+
This class provides a way to load a model from the torchvision library, optionally load pre-trained weights, and
|
|
1582
|
+
customize the model by truncating or unwrapping layers.
|
|
1632
1583
|
|
|
1633
1584
|
Args:
|
|
1634
1585
|
model (str): Name of the torchvision model to load.
|
|
1635
1586
|
weights (str, optional): Pre-trained weights to load. Default is "DEFAULT".
|
|
1636
|
-
unwrap (bool, optional):
|
|
1587
|
+
unwrap (bool, optional): Unwraps the model to a sequential containing all but the last `truncate` layers.
|
|
1637
1588
|
truncate (int, optional): Number of layers to truncate from the end if `unwrap` is True. Default is 2.
|
|
1638
1589
|
split (bool, optional): Returns output from intermediate child modules as list. Default is False.
|
|
1590
|
+
|
|
1591
|
+
Attributes:
|
|
1592
|
+
m (nn.Module): The loaded torchvision model, possibly truncated and unwrapped.
|
|
1639
1593
|
"""
|
|
1640
1594
|
|
|
1641
1595
|
def __init__(
|
|
1642
1596
|
self, model: str, weights: str = "DEFAULT", unwrap: bool = True, truncate: int = 2, split: bool = False
|
|
1643
1597
|
):
|
|
1644
|
-
"""
|
|
1645
|
-
Load the model and weights from torchvision.
|
|
1598
|
+
"""Load the model and weights from torchvision.
|
|
1646
1599
|
|
|
1647
1600
|
Args:
|
|
1648
1601
|
model (str): Name of the torchvision model to load.
|
|
@@ -1669,8 +1622,7 @@ class TorchVision(nn.Module):
|
|
|
1669
1622
|
self.m.head = self.m.heads = nn.Identity()
|
|
1670
1623
|
|
|
1671
1624
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
1672
|
-
"""
|
|
1673
|
-
Forward pass through the model.
|
|
1625
|
+
"""Forward pass through the model.
|
|
1674
1626
|
|
|
1675
1627
|
Args:
|
|
1676
1628
|
x (torch.Tensor): Input tensor.
|
|
@@ -1687,8 +1639,7 @@ class TorchVision(nn.Module):
|
|
|
1687
1639
|
|
|
1688
1640
|
|
|
1689
1641
|
class AAttn(nn.Module):
|
|
1690
|
-
"""
|
|
1691
|
-
Area-attention module for YOLO models, providing efficient attention mechanisms.
|
|
1642
|
+
"""Area-attention module for YOLO models, providing efficient attention mechanisms.
|
|
1692
1643
|
|
|
1693
1644
|
This module implements an area-based attention mechanism that processes input features in a spatially-aware manner,
|
|
1694
1645
|
making it particularly effective for object detection tasks.
|
|
@@ -1713,8 +1664,7 @@ class AAttn(nn.Module):
|
|
|
1713
1664
|
"""
|
|
1714
1665
|
|
|
1715
1666
|
def __init__(self, dim: int, num_heads: int, area: int = 1):
|
|
1716
|
-
"""
|
|
1717
|
-
Initialize an Area-attention module for YOLO models.
|
|
1667
|
+
"""Initialize an Area-attention module for YOLO models.
|
|
1718
1668
|
|
|
1719
1669
|
Args:
|
|
1720
1670
|
dim (int): Number of hidden channels.
|
|
@@ -1733,8 +1683,7 @@ class AAttn(nn.Module):
|
|
|
1733
1683
|
self.pe = Conv(all_head_dim, dim, 7, 1, 3, g=dim, act=False)
|
|
1734
1684
|
|
|
1735
1685
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
1736
|
-
"""
|
|
1737
|
-
Process the input tensor through the area-attention.
|
|
1686
|
+
"""Process the input tensor through the area-attention.
|
|
1738
1687
|
|
|
1739
1688
|
Args:
|
|
1740
1689
|
x (torch.Tensor): Input tensor.
|
|
@@ -1773,8 +1722,7 @@ class AAttn(nn.Module):
|
|
|
1773
1722
|
|
|
1774
1723
|
|
|
1775
1724
|
class ABlock(nn.Module):
|
|
1776
|
-
"""
|
|
1777
|
-
Area-attention block module for efficient feature extraction in YOLO models.
|
|
1725
|
+
"""Area-attention block module for efficient feature extraction in YOLO models.
|
|
1778
1726
|
|
|
1779
1727
|
This module implements an area-attention mechanism combined with a feed-forward network for processing feature maps.
|
|
1780
1728
|
It uses a novel area-based attention approach that is more efficient than traditional self-attention while
|
|
@@ -1797,8 +1745,7 @@ class ABlock(nn.Module):
|
|
|
1797
1745
|
"""
|
|
1798
1746
|
|
|
1799
1747
|
def __init__(self, dim: int, num_heads: int, mlp_ratio: float = 1.2, area: int = 1):
|
|
1800
|
-
"""
|
|
1801
|
-
Initialize an Area-attention block module.
|
|
1748
|
+
"""Initialize an Area-attention block module.
|
|
1802
1749
|
|
|
1803
1750
|
Args:
|
|
1804
1751
|
dim (int): Number of input channels.
|
|
@@ -1814,9 +1761,9 @@ class ABlock(nn.Module):
|
|
|
1814
1761
|
|
|
1815
1762
|
self.apply(self._init_weights)
|
|
1816
1763
|
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
Initialize weights using a truncated normal distribution.
|
|
1764
|
+
@staticmethod
|
|
1765
|
+
def _init_weights(m: nn.Module):
|
|
1766
|
+
"""Initialize weights using a truncated normal distribution.
|
|
1820
1767
|
|
|
1821
1768
|
Args:
|
|
1822
1769
|
m (nn.Module): Module to initialize.
|
|
@@ -1827,8 +1774,7 @@ class ABlock(nn.Module):
|
|
|
1827
1774
|
nn.init.constant_(m.bias, 0)
|
|
1828
1775
|
|
|
1829
1776
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
1830
|
-
"""
|
|
1831
|
-
Forward pass through ABlock.
|
|
1777
|
+
"""Forward pass through ABlock.
|
|
1832
1778
|
|
|
1833
1779
|
Args:
|
|
1834
1780
|
x (torch.Tensor): Input tensor.
|
|
@@ -1841,8 +1787,7 @@ class ABlock(nn.Module):
|
|
|
1841
1787
|
|
|
1842
1788
|
|
|
1843
1789
|
class A2C2f(nn.Module):
|
|
1844
|
-
"""
|
|
1845
|
-
Area-Attention C2f module for enhanced feature extraction with area-based attention mechanisms.
|
|
1790
|
+
"""Area-Attention C2f module for enhanced feature extraction with area-based attention mechanisms.
|
|
1846
1791
|
|
|
1847
1792
|
This module extends the C2f architecture by incorporating area-attention and ABlock layers for improved feature
|
|
1848
1793
|
processing. It supports both area-attention and standard convolution modes.
|
|
@@ -1877,8 +1822,7 @@ class A2C2f(nn.Module):
|
|
|
1877
1822
|
g: int = 1,
|
|
1878
1823
|
shortcut: bool = True,
|
|
1879
1824
|
):
|
|
1880
|
-
"""
|
|
1881
|
-
Initialize Area-Attention C2f module.
|
|
1825
|
+
"""Initialize Area-Attention C2f module.
|
|
1882
1826
|
|
|
1883
1827
|
Args:
|
|
1884
1828
|
c1 (int): Number of input channels.
|
|
@@ -1894,7 +1838,7 @@ class A2C2f(nn.Module):
|
|
|
1894
1838
|
"""
|
|
1895
1839
|
super().__init__()
|
|
1896
1840
|
c_ = int(c2 * e) # hidden channels
|
|
1897
|
-
assert c_ % 32 == 0, "Dimension of ABlock be a multiple of 32."
|
|
1841
|
+
assert c_ % 32 == 0, "Dimension of ABlock must be a multiple of 32."
|
|
1898
1842
|
|
|
1899
1843
|
self.cv1 = Conv(c1, c_, 1, 1)
|
|
1900
1844
|
self.cv2 = Conv((1 + n) * c_, c2, 1)
|
|
@@ -1908,8 +1852,7 @@ class A2C2f(nn.Module):
|
|
|
1908
1852
|
)
|
|
1909
1853
|
|
|
1910
1854
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
1911
|
-
"""
|
|
1912
|
-
Forward pass through A2C2f layer.
|
|
1855
|
+
"""Forward pass through A2C2f layer.
|
|
1913
1856
|
|
|
1914
1857
|
Args:
|
|
1915
1858
|
x (torch.Tensor): Input tensor.
|
|
@@ -1929,8 +1872,7 @@ class SwiGLUFFN(nn.Module):
|
|
|
1929
1872
|
"""SwiGLU Feed-Forward Network for transformer-based architectures."""
|
|
1930
1873
|
|
|
1931
1874
|
def __init__(self, gc: int, ec: int, e: int = 4) -> None:
|
|
1932
|
-
"""
|
|
1933
|
-
Initialize SwiGLU FFN with input dimension, output dimension, and expansion factor.
|
|
1875
|
+
"""Initialize SwiGLU FFN with input dimension, output dimension, and expansion factor.
|
|
1934
1876
|
|
|
1935
1877
|
Args:
|
|
1936
1878
|
gc (int): Guide channels.
|
|
@@ -1953,8 +1895,7 @@ class Residual(nn.Module):
|
|
|
1953
1895
|
"""Residual connection wrapper for neural network modules."""
|
|
1954
1896
|
|
|
1955
1897
|
def __init__(self, m: nn.Module) -> None:
|
|
1956
|
-
"""
|
|
1957
|
-
Initialize residual module with the wrapped module.
|
|
1898
|
+
"""Initialize residual module with the wrapped module.
|
|
1958
1899
|
|
|
1959
1900
|
Args:
|
|
1960
1901
|
m (nn.Module): Module to wrap with residual connection.
|
|
@@ -1975,8 +1916,7 @@ class SAVPE(nn.Module):
|
|
|
1975
1916
|
"""Spatial-Aware Visual Prompt Embedding module for feature enhancement."""
|
|
1976
1917
|
|
|
1977
1918
|
def __init__(self, ch: list[int], c3: int, embed: int):
|
|
1978
|
-
"""
|
|
1979
|
-
Initialize SAVPE module with channels, intermediate channels, and embedding dimension.
|
|
1919
|
+
"""Initialize SAVPE module with channels, intermediate channels, and embedding dimension.
|
|
1980
1920
|
|
|
1981
1921
|
Args:
|
|
1982
1922
|
ch (list[int]): List of input channel dimensions.
|
|
@@ -2029,3 +1969,99 @@ class SAVPE(nn.Module):
|
|
|
2029
1969
|
aggregated = score.transpose(-2, -3) @ x.reshape(B, self.c, C // self.c, -1).transpose(-1, -2)
|
|
2030
1970
|
|
|
2031
1971
|
return F.normalize(aggregated.transpose(-2, -3).reshape(B, Q, -1), dim=-1, p=2)
|
|
1972
|
+
|
|
1973
|
+
|
|
1974
|
+
class Proto26(Proto):
|
|
1975
|
+
"""Ultralytics YOLO26 models mask Proto module for segmentation models."""
|
|
1976
|
+
|
|
1977
|
+
def __init__(self, ch: tuple = (), c_: int = 256, c2: int = 32, nc: int = 80):
|
|
1978
|
+
"""Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
|
|
1979
|
+
|
|
1980
|
+
Args:
|
|
1981
|
+
ch (tuple): Tuple of channel sizes from backbone feature maps.
|
|
1982
|
+
c_ (int): Intermediate channels.
|
|
1983
|
+
c2 (int): Output channels (number of protos).
|
|
1984
|
+
nc (int): Number of classes for semantic segmentation.
|
|
1985
|
+
"""
|
|
1986
|
+
super().__init__(c_, c_, c2)
|
|
1987
|
+
self.feat_refine = nn.ModuleList(Conv(x, ch[0], k=1) for x in ch[1:])
|
|
1988
|
+
self.feat_fuse = Conv(ch[0], c_, k=3)
|
|
1989
|
+
self.semseg = nn.Sequential(Conv(ch[0], c_, k=3), Conv(c_, c_, k=3), nn.Conv2d(c_, nc, 1))
|
|
1990
|
+
|
|
1991
|
+
def forward(self, x: torch.Tensor, return_semseg: bool = True) -> torch.Tensor:
|
|
1992
|
+
"""Perform a forward pass through layers using an upsampled input image."""
|
|
1993
|
+
feat = x[0]
|
|
1994
|
+
for i, f in enumerate(self.feat_refine):
|
|
1995
|
+
up_feat = f(x[i + 1])
|
|
1996
|
+
up_feat = F.interpolate(up_feat, size=feat.shape[2:], mode="nearest")
|
|
1997
|
+
feat = feat + up_feat
|
|
1998
|
+
p = super().forward(self.feat_fuse(feat))
|
|
1999
|
+
if self.training and return_semseg:
|
|
2000
|
+
semseg = self.semseg(feat)
|
|
2001
|
+
return (p, semseg)
|
|
2002
|
+
return p
|
|
2003
|
+
|
|
2004
|
+
def fuse(self):
|
|
2005
|
+
"""Fuse the model for inference by removing the semantic segmentation head."""
|
|
2006
|
+
self.semseg = None
|
|
2007
|
+
|
|
2008
|
+
|
|
2009
|
+
class RealNVP(nn.Module):
|
|
2010
|
+
"""RealNVP: a flow-based generative model.
|
|
2011
|
+
|
|
2012
|
+
References:
|
|
2013
|
+
https://arxiv.org/abs/1605.08803
|
|
2014
|
+
https://github.com/open-mmlab/mmpose/blob/main/mmpose/models/utils/realnvp.py
|
|
2015
|
+
"""
|
|
2016
|
+
|
|
2017
|
+
@staticmethod
|
|
2018
|
+
def nets():
|
|
2019
|
+
"""Get the scale model in a single invertable mapping."""
|
|
2020
|
+
return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2), nn.Tanh())
|
|
2021
|
+
|
|
2022
|
+
@staticmethod
|
|
2023
|
+
def nett():
|
|
2024
|
+
"""Get the translation model in a single invertable mapping."""
|
|
2025
|
+
return nn.Sequential(nn.Linear(2, 64), nn.SiLU(), nn.Linear(64, 64), nn.SiLU(), nn.Linear(64, 2))
|
|
2026
|
+
|
|
2027
|
+
@property
|
|
2028
|
+
def prior(self):
|
|
2029
|
+
"""The prior distribution."""
|
|
2030
|
+
return torch.distributions.MultivariateNormal(self.loc, self.cov)
|
|
2031
|
+
|
|
2032
|
+
def __init__(self):
|
|
2033
|
+
super().__init__()
|
|
2034
|
+
|
|
2035
|
+
self.register_buffer("loc", torch.zeros(2))
|
|
2036
|
+
self.register_buffer("cov", torch.eye(2))
|
|
2037
|
+
self.register_buffer("mask", torch.tensor([[0, 1], [1, 0]] * 3, dtype=torch.float32))
|
|
2038
|
+
|
|
2039
|
+
self.s = torch.nn.ModuleList([self.nets() for _ in range(len(self.mask))])
|
|
2040
|
+
self.t = torch.nn.ModuleList([self.nett() for _ in range(len(self.mask))])
|
|
2041
|
+
self.init_weights()
|
|
2042
|
+
|
|
2043
|
+
def init_weights(self):
|
|
2044
|
+
"""Initialization model weights."""
|
|
2045
|
+
for m in self.modules():
|
|
2046
|
+
if isinstance(m, nn.Linear):
|
|
2047
|
+
nn.init.xavier_uniform_(m.weight, gain=0.01)
|
|
2048
|
+
|
|
2049
|
+
def backward_p(self, x):
|
|
2050
|
+
"""Apply mapping form the data space to the latent space and calculate the log determinant of the Jacobian
|
|
2051
|
+
matrix.
|
|
2052
|
+
"""
|
|
2053
|
+
log_det_jacob, z = x.new_zeros(x.shape[0]), x
|
|
2054
|
+
for i in reversed(range(len(self.t))):
|
|
2055
|
+
z_ = self.mask[i] * z
|
|
2056
|
+
s = self.s[i](z_) * (1 - self.mask[i])
|
|
2057
|
+
t = self.t[i](z_) * (1 - self.mask[i])
|
|
2058
|
+
z = (1 - self.mask[i]) * (z - t) * torch.exp(-s) + z_
|
|
2059
|
+
log_det_jacob -= s.sum(dim=1)
|
|
2060
|
+
return z, log_det_jacob
|
|
2061
|
+
|
|
2062
|
+
def log_prob(self, x):
|
|
2063
|
+
"""Calculate the log probability of given sample in data space."""
|
|
2064
|
+
if x.dtype == torch.float32 and self.s[0][0].weight.dtype != torch.float32:
|
|
2065
|
+
self.float()
|
|
2066
|
+
z, log_det = self.backward_p(x)
|
|
2067
|
+
return self.prior.log_prob(z) + log_det
|