dgenerate-ultralytics-headless 8.3.214__py3-none-any.whl → 8.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/METADATA +64 -74
- dgenerate_ultralytics_headless-8.4.7.dist-info/RECORD +311 -0
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/WHEEL +1 -1
- tests/__init__.py +7 -9
- tests/conftest.py +8 -15
- tests/test_cli.py +1 -1
- tests/test_cuda.py +13 -10
- tests/test_engine.py +9 -9
- tests/test_exports.py +65 -13
- tests/test_integrations.py +13 -13
- tests/test_python.py +125 -69
- tests/test_solutions.py +161 -152
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +86 -92
- ultralytics/cfg/datasets/Argoverse.yaml +7 -6
- ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
- ultralytics/cfg/datasets/ImageNet.yaml +1 -1
- ultralytics/cfg/datasets/TT100K.yaml +346 -0
- ultralytics/cfg/datasets/VOC.yaml +15 -16
- ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
- ultralytics/cfg/datasets/coco-pose.yaml +21 -0
- ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
- ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
- ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
- ultralytics/cfg/datasets/dog-pose.yaml +28 -0
- ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/dota8.yaml +2 -2
- ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
- ultralytics/cfg/datasets/kitti.yaml +27 -0
- ultralytics/cfg/datasets/lvis.yaml +5 -5
- ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
- ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
- ultralytics/cfg/datasets/xView.yaml +16 -16
- ultralytics/cfg/default.yaml +4 -2
- ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
- ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
- ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
- ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
- ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
- ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
- ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
- ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
- ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
- ultralytics/cfg/models/26/yolo26.yaml +52 -0
- ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
- ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
- ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
- ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
- ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
- ultralytics/cfg/models/v6/yolov6.yaml +1 -1
- ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
- ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
- ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
- ultralytics/data/__init__.py +4 -4
- ultralytics/data/annotator.py +5 -6
- ultralytics/data/augment.py +300 -475
- ultralytics/data/base.py +18 -26
- ultralytics/data/build.py +147 -25
- ultralytics/data/converter.py +108 -87
- ultralytics/data/dataset.py +47 -75
- ultralytics/data/loaders.py +42 -49
- ultralytics/data/split.py +5 -6
- ultralytics/data/split_dota.py +8 -15
- ultralytics/data/utils.py +36 -45
- ultralytics/engine/exporter.py +351 -263
- ultralytics/engine/model.py +186 -225
- ultralytics/engine/predictor.py +45 -54
- ultralytics/engine/results.py +198 -325
- ultralytics/engine/trainer.py +165 -106
- ultralytics/engine/tuner.py +41 -43
- ultralytics/engine/validator.py +55 -38
- ultralytics/hub/__init__.py +16 -19
- ultralytics/hub/auth.py +6 -12
- ultralytics/hub/google/__init__.py +7 -10
- ultralytics/hub/session.py +15 -25
- ultralytics/hub/utils.py +5 -8
- ultralytics/models/__init__.py +1 -1
- ultralytics/models/fastsam/__init__.py +1 -1
- ultralytics/models/fastsam/model.py +8 -10
- ultralytics/models/fastsam/predict.py +18 -30
- ultralytics/models/fastsam/utils.py +1 -2
- ultralytics/models/fastsam/val.py +5 -7
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/nas/model.py +5 -8
- ultralytics/models/nas/predict.py +7 -9
- ultralytics/models/nas/val.py +1 -2
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/rtdetr/model.py +5 -8
- ultralytics/models/rtdetr/predict.py +15 -19
- ultralytics/models/rtdetr/train.py +10 -13
- ultralytics/models/rtdetr/val.py +21 -23
- ultralytics/models/sam/__init__.py +15 -2
- ultralytics/models/sam/amg.py +14 -20
- ultralytics/models/sam/build.py +26 -19
- ultralytics/models/sam/build_sam3.py +377 -0
- ultralytics/models/sam/model.py +29 -32
- ultralytics/models/sam/modules/blocks.py +83 -144
- ultralytics/models/sam/modules/decoders.py +19 -37
- ultralytics/models/sam/modules/encoders.py +44 -101
- ultralytics/models/sam/modules/memory_attention.py +16 -30
- ultralytics/models/sam/modules/sam.py +200 -73
- ultralytics/models/sam/modules/tiny_encoder.py +64 -83
- ultralytics/models/sam/modules/transformer.py +18 -28
- ultralytics/models/sam/modules/utils.py +174 -50
- ultralytics/models/sam/predict.py +2248 -350
- ultralytics/models/sam/sam3/__init__.py +3 -0
- ultralytics/models/sam/sam3/decoder.py +546 -0
- ultralytics/models/sam/sam3/encoder.py +529 -0
- ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
- ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
- ultralytics/models/sam/sam3/model_misc.py +199 -0
- ultralytics/models/sam/sam3/necks.py +129 -0
- ultralytics/models/sam/sam3/sam3_image.py +339 -0
- ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
- ultralytics/models/sam/sam3/vitdet.py +547 -0
- ultralytics/models/sam/sam3/vl_combiner.py +160 -0
- ultralytics/models/utils/loss.py +14 -26
- ultralytics/models/utils/ops.py +13 -17
- ultralytics/models/yolo/__init__.py +1 -1
- ultralytics/models/yolo/classify/predict.py +10 -13
- ultralytics/models/yolo/classify/train.py +12 -33
- ultralytics/models/yolo/classify/val.py +30 -29
- ultralytics/models/yolo/detect/predict.py +9 -12
- ultralytics/models/yolo/detect/train.py +17 -23
- ultralytics/models/yolo/detect/val.py +77 -59
- ultralytics/models/yolo/model.py +43 -60
- ultralytics/models/yolo/obb/predict.py +7 -16
- ultralytics/models/yolo/obb/train.py +14 -17
- ultralytics/models/yolo/obb/val.py +40 -37
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/pose/predict.py +7 -22
- ultralytics/models/yolo/pose/train.py +13 -16
- ultralytics/models/yolo/pose/val.py +39 -58
- ultralytics/models/yolo/segment/predict.py +17 -21
- ultralytics/models/yolo/segment/train.py +7 -10
- ultralytics/models/yolo/segment/val.py +95 -47
- ultralytics/models/yolo/world/train.py +8 -14
- ultralytics/models/yolo/world/train_world.py +11 -34
- ultralytics/models/yolo/yoloe/__init__.py +7 -7
- ultralytics/models/yolo/yoloe/predict.py +16 -23
- ultralytics/models/yolo/yoloe/train.py +36 -44
- ultralytics/models/yolo/yoloe/train_seg.py +11 -11
- ultralytics/models/yolo/yoloe/val.py +15 -20
- ultralytics/nn/__init__.py +7 -7
- ultralytics/nn/autobackend.py +159 -85
- ultralytics/nn/modules/__init__.py +68 -60
- ultralytics/nn/modules/activation.py +4 -6
- ultralytics/nn/modules/block.py +260 -224
- ultralytics/nn/modules/conv.py +52 -97
- ultralytics/nn/modules/head.py +831 -299
- ultralytics/nn/modules/transformer.py +76 -88
- ultralytics/nn/modules/utils.py +16 -21
- ultralytics/nn/tasks.py +180 -195
- ultralytics/nn/text_model.py +45 -69
- ultralytics/optim/__init__.py +5 -0
- ultralytics/optim/muon.py +338 -0
- ultralytics/solutions/__init__.py +12 -12
- ultralytics/solutions/ai_gym.py +13 -19
- ultralytics/solutions/analytics.py +15 -16
- ultralytics/solutions/config.py +6 -7
- ultralytics/solutions/distance_calculation.py +10 -13
- ultralytics/solutions/heatmap.py +8 -14
- ultralytics/solutions/instance_segmentation.py +6 -9
- ultralytics/solutions/object_blurrer.py +7 -10
- ultralytics/solutions/object_counter.py +12 -19
- ultralytics/solutions/object_cropper.py +8 -14
- ultralytics/solutions/parking_management.py +34 -32
- ultralytics/solutions/queue_management.py +10 -12
- ultralytics/solutions/region_counter.py +9 -12
- ultralytics/solutions/security_alarm.py +15 -20
- ultralytics/solutions/similarity_search.py +10 -15
- ultralytics/solutions/solutions.py +77 -76
- ultralytics/solutions/speed_estimation.py +7 -10
- ultralytics/solutions/streamlit_inference.py +2 -4
- ultralytics/solutions/templates/similarity-search.html +7 -18
- ultralytics/solutions/trackzone.py +7 -10
- ultralytics/solutions/vision_eye.py +5 -8
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/basetrack.py +3 -5
- ultralytics/trackers/bot_sort.py +10 -27
- ultralytics/trackers/byte_tracker.py +21 -37
- ultralytics/trackers/track.py +4 -7
- ultralytics/trackers/utils/gmc.py +11 -22
- ultralytics/trackers/utils/kalman_filter.py +37 -48
- ultralytics/trackers/utils/matching.py +12 -15
- ultralytics/utils/__init__.py +124 -124
- ultralytics/utils/autobatch.py +2 -4
- ultralytics/utils/autodevice.py +17 -18
- ultralytics/utils/benchmarks.py +57 -71
- ultralytics/utils/callbacks/base.py +8 -10
- ultralytics/utils/callbacks/clearml.py +5 -13
- ultralytics/utils/callbacks/comet.py +32 -46
- ultralytics/utils/callbacks/dvc.py +13 -18
- ultralytics/utils/callbacks/mlflow.py +4 -5
- ultralytics/utils/callbacks/neptune.py +7 -15
- ultralytics/utils/callbacks/platform.py +423 -38
- ultralytics/utils/callbacks/raytune.py +3 -4
- ultralytics/utils/callbacks/tensorboard.py +25 -31
- ultralytics/utils/callbacks/wb.py +16 -14
- ultralytics/utils/checks.py +127 -85
- ultralytics/utils/cpu.py +3 -8
- ultralytics/utils/dist.py +9 -12
- ultralytics/utils/downloads.py +25 -33
- ultralytics/utils/errors.py +6 -14
- ultralytics/utils/events.py +2 -4
- ultralytics/utils/export/__init__.py +4 -236
- ultralytics/utils/export/engine.py +246 -0
- ultralytics/utils/export/imx.py +117 -63
- ultralytics/utils/export/tensorflow.py +231 -0
- ultralytics/utils/files.py +26 -30
- ultralytics/utils/git.py +9 -11
- ultralytics/utils/instance.py +30 -51
- ultralytics/utils/logger.py +212 -114
- ultralytics/utils/loss.py +601 -215
- ultralytics/utils/metrics.py +128 -156
- ultralytics/utils/nms.py +13 -16
- ultralytics/utils/ops.py +117 -166
- ultralytics/utils/patches.py +75 -21
- ultralytics/utils/plotting.py +75 -80
- ultralytics/utils/tal.py +125 -59
- ultralytics/utils/torch_utils.py +53 -79
- ultralytics/utils/tqdm.py +24 -21
- ultralytics/utils/triton.py +13 -19
- ultralytics/utils/tuner.py +19 -10
- dgenerate_ultralytics_headless-8.3.214.dist-info/RECORD +0 -283
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.4.7.dist-info}/top_level.txt +0 -0
|
@@ -16,22 +16,21 @@ from .conv import Conv
|
|
|
16
16
|
from .utils import _get_clones, inverse_sigmoid, multi_scale_deformable_attn_pytorch
|
|
17
17
|
|
|
18
18
|
__all__ = (
|
|
19
|
-
"TransformerEncoderLayer",
|
|
20
|
-
"TransformerLayer",
|
|
21
|
-
"TransformerBlock",
|
|
22
|
-
"MLPBlock",
|
|
23
|
-
"LayerNorm2d",
|
|
24
19
|
"AIFI",
|
|
20
|
+
"MLP",
|
|
25
21
|
"DeformableTransformerDecoder",
|
|
26
22
|
"DeformableTransformerDecoderLayer",
|
|
23
|
+
"LayerNorm2d",
|
|
24
|
+
"MLPBlock",
|
|
27
25
|
"MSDeformAttn",
|
|
28
|
-
"
|
|
26
|
+
"TransformerBlock",
|
|
27
|
+
"TransformerEncoderLayer",
|
|
28
|
+
"TransformerLayer",
|
|
29
29
|
)
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class TransformerEncoderLayer(nn.Module):
|
|
33
|
-
"""
|
|
34
|
-
A single layer of the transformer encoder.
|
|
33
|
+
"""A single layer of the transformer encoder.
|
|
35
34
|
|
|
36
35
|
This class implements a standard transformer encoder layer with multi-head attention and feedforward network,
|
|
37
36
|
supporting both pre-normalization and post-normalization configurations.
|
|
@@ -58,8 +57,7 @@ class TransformerEncoderLayer(nn.Module):
|
|
|
58
57
|
act: nn.Module = nn.GELU(),
|
|
59
58
|
normalize_before: bool = False,
|
|
60
59
|
):
|
|
61
|
-
"""
|
|
62
|
-
Initialize the TransformerEncoderLayer with specified parameters.
|
|
60
|
+
"""Initialize the TransformerEncoderLayer with specified parameters.
|
|
63
61
|
|
|
64
62
|
Args:
|
|
65
63
|
c1 (int): Input dimension.
|
|
@@ -102,8 +100,7 @@ class TransformerEncoderLayer(nn.Module):
|
|
|
102
100
|
src_key_padding_mask: torch.Tensor | None = None,
|
|
103
101
|
pos: torch.Tensor | None = None,
|
|
104
102
|
) -> torch.Tensor:
|
|
105
|
-
"""
|
|
106
|
-
Perform forward pass with post-normalization.
|
|
103
|
+
"""Perform forward pass with post-normalization.
|
|
107
104
|
|
|
108
105
|
Args:
|
|
109
106
|
src (torch.Tensor): Input tensor.
|
|
@@ -129,8 +126,7 @@ class TransformerEncoderLayer(nn.Module):
|
|
|
129
126
|
src_key_padding_mask: torch.Tensor | None = None,
|
|
130
127
|
pos: torch.Tensor | None = None,
|
|
131
128
|
) -> torch.Tensor:
|
|
132
|
-
"""
|
|
133
|
-
Perform forward pass with pre-normalization.
|
|
129
|
+
"""Perform forward pass with pre-normalization.
|
|
134
130
|
|
|
135
131
|
Args:
|
|
136
132
|
src (torch.Tensor): Input tensor.
|
|
@@ -156,8 +152,7 @@ class TransformerEncoderLayer(nn.Module):
|
|
|
156
152
|
src_key_padding_mask: torch.Tensor | None = None,
|
|
157
153
|
pos: torch.Tensor | None = None,
|
|
158
154
|
) -> torch.Tensor:
|
|
159
|
-
"""
|
|
160
|
-
Forward propagate the input through the encoder module.
|
|
155
|
+
"""Forward propagate the input through the encoder module.
|
|
161
156
|
|
|
162
157
|
Args:
|
|
163
158
|
src (torch.Tensor): Input tensor.
|
|
@@ -174,8 +169,7 @@ class TransformerEncoderLayer(nn.Module):
|
|
|
174
169
|
|
|
175
170
|
|
|
176
171
|
class AIFI(TransformerEncoderLayer):
|
|
177
|
-
"""
|
|
178
|
-
AIFI transformer layer for 2D data with positional embeddings.
|
|
172
|
+
"""AIFI transformer layer for 2D data with positional embeddings.
|
|
179
173
|
|
|
180
174
|
This class extends TransformerEncoderLayer to work with 2D feature maps by adding 2D sine-cosine positional
|
|
181
175
|
embeddings and handling the spatial dimensions appropriately.
|
|
@@ -190,8 +184,7 @@ class AIFI(TransformerEncoderLayer):
|
|
|
190
184
|
act: nn.Module = nn.GELU(),
|
|
191
185
|
normalize_before: bool = False,
|
|
192
186
|
):
|
|
193
|
-
"""
|
|
194
|
-
Initialize the AIFI instance with specified parameters.
|
|
187
|
+
"""Initialize the AIFI instance with specified parameters.
|
|
195
188
|
|
|
196
189
|
Args:
|
|
197
190
|
c1 (int): Input dimension.
|
|
@@ -204,8 +197,7 @@ class AIFI(TransformerEncoderLayer):
|
|
|
204
197
|
super().__init__(c1, cm, num_heads, dropout, act, normalize_before)
|
|
205
198
|
|
|
206
199
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
207
|
-
"""
|
|
208
|
-
Forward pass for the AIFI transformer layer.
|
|
200
|
+
"""Forward pass for the AIFI transformer layer.
|
|
209
201
|
|
|
210
202
|
Args:
|
|
211
203
|
x (torch.Tensor): Input tensor with shape [B, C, H, W].
|
|
@@ -223,8 +215,7 @@ class AIFI(TransformerEncoderLayer):
|
|
|
223
215
|
def build_2d_sincos_position_embedding(
|
|
224
216
|
w: int, h: int, embed_dim: int = 256, temperature: float = 10000.0
|
|
225
217
|
) -> torch.Tensor:
|
|
226
|
-
"""
|
|
227
|
-
Build 2D sine-cosine position embedding.
|
|
218
|
+
"""Build 2D sine-cosine position embedding.
|
|
228
219
|
|
|
229
220
|
Args:
|
|
230
221
|
w (int): Width of the feature map.
|
|
@@ -253,8 +244,7 @@ class TransformerLayer(nn.Module):
|
|
|
253
244
|
"""Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)."""
|
|
254
245
|
|
|
255
246
|
def __init__(self, c: int, num_heads: int):
|
|
256
|
-
"""
|
|
257
|
-
Initialize a self-attention mechanism using linear transformations and multi-head attention.
|
|
247
|
+
"""Initialize a self-attention mechanism using linear transformations and multi-head attention.
|
|
258
248
|
|
|
259
249
|
Args:
|
|
260
250
|
c (int): Input and output channel dimension.
|
|
@@ -269,8 +259,7 @@ class TransformerLayer(nn.Module):
|
|
|
269
259
|
self.fc2 = nn.Linear(c, c, bias=False)
|
|
270
260
|
|
|
271
261
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
272
|
-
"""
|
|
273
|
-
Apply a transformer block to the input x and return the output.
|
|
262
|
+
"""Apply a transformer block to the input x and return the output.
|
|
274
263
|
|
|
275
264
|
Args:
|
|
276
265
|
x (torch.Tensor): Input tensor.
|
|
@@ -283,11 +272,10 @@ class TransformerLayer(nn.Module):
|
|
|
283
272
|
|
|
284
273
|
|
|
285
274
|
class TransformerBlock(nn.Module):
|
|
286
|
-
"""
|
|
287
|
-
Vision Transformer block based on https://arxiv.org/abs/2010.11929.
|
|
275
|
+
"""Vision Transformer block based on https://arxiv.org/abs/2010.11929.
|
|
288
276
|
|
|
289
|
-
This class implements a complete transformer block with optional convolution layer for channel adjustment,
|
|
290
|
-
|
|
277
|
+
This class implements a complete transformer block with optional convolution layer for channel adjustment, learnable
|
|
278
|
+
position embedding, and multiple transformer layers.
|
|
291
279
|
|
|
292
280
|
Attributes:
|
|
293
281
|
conv (Conv, optional): Convolution layer if input and output channels differ.
|
|
@@ -297,8 +285,7 @@ class TransformerBlock(nn.Module):
|
|
|
297
285
|
"""
|
|
298
286
|
|
|
299
287
|
def __init__(self, c1: int, c2: int, num_heads: int, num_layers: int):
|
|
300
|
-
"""
|
|
301
|
-
Initialize a Transformer module with position embedding and specified number of heads and layers.
|
|
288
|
+
"""Initialize a Transformer module with position embedding and specified number of heads and layers.
|
|
302
289
|
|
|
303
290
|
Args:
|
|
304
291
|
c1 (int): Input channel dimension.
|
|
@@ -315,28 +302,26 @@ class TransformerBlock(nn.Module):
|
|
|
315
302
|
self.c2 = c2
|
|
316
303
|
|
|
317
304
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
318
|
-
"""
|
|
319
|
-
Forward propagate the input through the transformer block.
|
|
305
|
+
"""Forward propagate the input through the transformer block.
|
|
320
306
|
|
|
321
307
|
Args:
|
|
322
|
-
x (torch.Tensor): Input tensor with shape [b, c1,
|
|
308
|
+
x (torch.Tensor): Input tensor with shape [b, c1, h, w].
|
|
323
309
|
|
|
324
310
|
Returns:
|
|
325
|
-
(torch.Tensor): Output tensor with shape [b, c2,
|
|
311
|
+
(torch.Tensor): Output tensor with shape [b, c2, h, w].
|
|
326
312
|
"""
|
|
327
313
|
if self.conv is not None:
|
|
328
314
|
x = self.conv(x)
|
|
329
|
-
b, _,
|
|
315
|
+
b, _, h, w = x.shape
|
|
330
316
|
p = x.flatten(2).permute(2, 0, 1)
|
|
331
|
-
return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2,
|
|
317
|
+
return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, h, w)
|
|
332
318
|
|
|
333
319
|
|
|
334
320
|
class MLPBlock(nn.Module):
|
|
335
321
|
"""A single block of a multi-layer perceptron."""
|
|
336
322
|
|
|
337
323
|
def __init__(self, embedding_dim: int, mlp_dim: int, act=nn.GELU):
|
|
338
|
-
"""
|
|
339
|
-
Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function.
|
|
324
|
+
"""Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function.
|
|
340
325
|
|
|
341
326
|
Args:
|
|
342
327
|
embedding_dim (int): Input and output dimension.
|
|
@@ -349,8 +334,7 @@ class MLPBlock(nn.Module):
|
|
|
349
334
|
self.act = act()
|
|
350
335
|
|
|
351
336
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
352
|
-
"""
|
|
353
|
-
Forward pass for the MLPBlock.
|
|
337
|
+
"""Forward pass for the MLPBlock.
|
|
354
338
|
|
|
355
339
|
Args:
|
|
356
340
|
x (torch.Tensor): Input tensor.
|
|
@@ -362,11 +346,10 @@ class MLPBlock(nn.Module):
|
|
|
362
346
|
|
|
363
347
|
|
|
364
348
|
class MLP(nn.Module):
|
|
365
|
-
"""
|
|
366
|
-
A simple multi-layer perceptron (also called FFN).
|
|
349
|
+
"""A simple multi-layer perceptron (also called FFN).
|
|
367
350
|
|
|
368
|
-
This class implements a configurable MLP with multiple linear layers, activation functions, and optional
|
|
369
|
-
|
|
351
|
+
This class implements a configurable MLP with multiple linear layers, activation functions, and optional sigmoid
|
|
352
|
+
output activation.
|
|
370
353
|
|
|
371
354
|
Attributes:
|
|
372
355
|
num_layers (int): Number of layers in the MLP.
|
|
@@ -376,10 +359,17 @@ class MLP(nn.Module):
|
|
|
376
359
|
"""
|
|
377
360
|
|
|
378
361
|
def __init__(
|
|
379
|
-
self,
|
|
362
|
+
self,
|
|
363
|
+
input_dim: int,
|
|
364
|
+
hidden_dim: int,
|
|
365
|
+
output_dim: int,
|
|
366
|
+
num_layers: int,
|
|
367
|
+
act=nn.ReLU,
|
|
368
|
+
sigmoid: bool = False,
|
|
369
|
+
residual: bool = False,
|
|
370
|
+
out_norm: nn.Module = None,
|
|
380
371
|
):
|
|
381
|
-
"""
|
|
382
|
-
Initialize the MLP with specified input, hidden, output dimensions and number of layers.
|
|
372
|
+
"""Initialize the MLP with specified input, hidden, output dimensions and number of layers.
|
|
383
373
|
|
|
384
374
|
Args:
|
|
385
375
|
input_dim (int): Input dimension.
|
|
@@ -388,17 +378,24 @@ class MLP(nn.Module):
|
|
|
388
378
|
num_layers (int): Number of layers.
|
|
389
379
|
act (nn.Module): Activation function.
|
|
390
380
|
sigmoid (bool): Whether to apply sigmoid to the output.
|
|
381
|
+
residual (bool): Whether to use residual connections.
|
|
382
|
+
out_norm (nn.Module, optional): Normalization layer for the output.
|
|
391
383
|
"""
|
|
392
384
|
super().__init__()
|
|
393
385
|
self.num_layers = num_layers
|
|
394
386
|
h = [hidden_dim] * (num_layers - 1)
|
|
395
|
-
self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim
|
|
387
|
+
self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim, *h], [*h, output_dim]))
|
|
396
388
|
self.sigmoid = sigmoid
|
|
397
389
|
self.act = act()
|
|
390
|
+
if residual and input_dim != output_dim:
|
|
391
|
+
raise ValueError("residual is only supported if input_dim == output_dim")
|
|
392
|
+
self.residual = residual
|
|
393
|
+
# whether to apply a normalization layer to the output
|
|
394
|
+
assert isinstance(out_norm, nn.Module) or out_norm is None
|
|
395
|
+
self.out_norm = out_norm or nn.Identity()
|
|
398
396
|
|
|
399
397
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
400
|
-
"""
|
|
401
|
-
Forward pass for the entire MLP.
|
|
398
|
+
"""Forward pass for the entire MLP.
|
|
402
399
|
|
|
403
400
|
Args:
|
|
404
401
|
x (torch.Tensor): Input tensor.
|
|
@@ -406,17 +403,20 @@ class MLP(nn.Module):
|
|
|
406
403
|
Returns:
|
|
407
404
|
(torch.Tensor): Output tensor after MLP.
|
|
408
405
|
"""
|
|
406
|
+
orig_x = x
|
|
409
407
|
for i, layer in enumerate(self.layers):
|
|
410
408
|
x = getattr(self, "act", nn.ReLU())(layer(x)) if i < self.num_layers - 1 else layer(x)
|
|
409
|
+
if getattr(self, "residual", False):
|
|
410
|
+
x = x + orig_x
|
|
411
|
+
x = getattr(self, "out_norm", nn.Identity())(x)
|
|
411
412
|
return x.sigmoid() if getattr(self, "sigmoid", False) else x
|
|
412
413
|
|
|
413
414
|
|
|
414
415
|
class LayerNorm2d(nn.Module):
|
|
415
|
-
"""
|
|
416
|
-
2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.
|
|
416
|
+
"""2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.
|
|
417
417
|
|
|
418
|
-
This class implements layer normalization for 2D feature maps, normalizing across the channel dimension
|
|
419
|
-
|
|
418
|
+
This class implements layer normalization for 2D feature maps, normalizing across the channel dimension while
|
|
419
|
+
preserving spatial dimensions.
|
|
420
420
|
|
|
421
421
|
Attributes:
|
|
422
422
|
weight (nn.Parameter): Learnable scale parameter.
|
|
@@ -429,8 +429,7 @@ class LayerNorm2d(nn.Module):
|
|
|
429
429
|
"""
|
|
430
430
|
|
|
431
431
|
def __init__(self, num_channels: int, eps: float = 1e-6):
|
|
432
|
-
"""
|
|
433
|
-
Initialize LayerNorm2d with the given parameters.
|
|
432
|
+
"""Initialize LayerNorm2d with the given parameters.
|
|
434
433
|
|
|
435
434
|
Args:
|
|
436
435
|
num_channels (int): Number of channels in the input.
|
|
@@ -442,8 +441,7 @@ class LayerNorm2d(nn.Module):
|
|
|
442
441
|
self.eps = eps
|
|
443
442
|
|
|
444
443
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
445
|
-
"""
|
|
446
|
-
Perform forward pass for 2D layer normalization.
|
|
444
|
+
"""Perform forward pass for 2D layer normalization.
|
|
447
445
|
|
|
448
446
|
Args:
|
|
449
447
|
x (torch.Tensor): Input tensor.
|
|
@@ -458,11 +456,10 @@ class LayerNorm2d(nn.Module):
|
|
|
458
456
|
|
|
459
457
|
|
|
460
458
|
class MSDeformAttn(nn.Module):
|
|
461
|
-
"""
|
|
462
|
-
Multiscale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.
|
|
459
|
+
"""Multiscale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.
|
|
463
460
|
|
|
464
|
-
This module implements multiscale deformable attention that can attend to features at multiple scales
|
|
465
|
-
|
|
461
|
+
This module implements multiscale deformable attention that can attend to features at multiple scales with learnable
|
|
462
|
+
sampling locations and attention weights.
|
|
466
463
|
|
|
467
464
|
Attributes:
|
|
468
465
|
im2col_step (int): Step size for im2col operations.
|
|
@@ -480,8 +477,7 @@ class MSDeformAttn(nn.Module):
|
|
|
480
477
|
"""
|
|
481
478
|
|
|
482
479
|
def __init__(self, d_model: int = 256, n_levels: int = 4, n_heads: int = 8, n_points: int = 4):
|
|
483
|
-
"""
|
|
484
|
-
Initialize MSDeformAttn with the given parameters.
|
|
480
|
+
"""Initialize MSDeformAttn with the given parameters.
|
|
485
481
|
|
|
486
482
|
Args:
|
|
487
483
|
d_model (int): Model dimension.
|
|
@@ -539,13 +535,12 @@ class MSDeformAttn(nn.Module):
|
|
|
539
535
|
value_shapes: list,
|
|
540
536
|
value_mask: torch.Tensor | None = None,
|
|
541
537
|
) -> torch.Tensor:
|
|
542
|
-
"""
|
|
543
|
-
Perform forward pass for multiscale deformable attention.
|
|
538
|
+
"""Perform forward pass for multiscale deformable attention.
|
|
544
539
|
|
|
545
540
|
Args:
|
|
546
541
|
query (torch.Tensor): Query tensor with shape [bs, query_length, C].
|
|
547
|
-
refer_bbox (torch.Tensor): Reference bounding boxes with shape [bs, query_length, n_levels, 2],
|
|
548
|
-
|
|
542
|
+
refer_bbox (torch.Tensor): Reference bounding boxes with shape [bs, query_length, n_levels, 2], range in [0,
|
|
543
|
+
1], top-left (0,0), bottom-right (1, 1), including padding area.
|
|
549
544
|
value (torch.Tensor): Value tensor with shape [bs, value_length, C].
|
|
550
545
|
value_shapes (list): List with shape [n_levels, 2], [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})].
|
|
551
546
|
value_mask (torch.Tensor, optional): Mask tensor with shape [bs, value_length], True for non-padding
|
|
@@ -584,8 +579,7 @@ class MSDeformAttn(nn.Module):
|
|
|
584
579
|
|
|
585
580
|
|
|
586
581
|
class DeformableTransformerDecoderLayer(nn.Module):
|
|
587
|
-
"""
|
|
588
|
-
Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.
|
|
582
|
+
"""Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.
|
|
589
583
|
|
|
590
584
|
This class implements a single decoder layer with self-attention, cross-attention using multiscale deformable
|
|
591
585
|
attention, and a feedforward network.
|
|
@@ -619,8 +613,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|
|
619
613
|
n_levels: int = 4,
|
|
620
614
|
n_points: int = 4,
|
|
621
615
|
):
|
|
622
|
-
"""
|
|
623
|
-
Initialize the DeformableTransformerDecoderLayer with the given parameters.
|
|
616
|
+
"""Initialize the DeformableTransformerDecoderLayer with the given parameters.
|
|
624
617
|
|
|
625
618
|
Args:
|
|
626
619
|
d_model (int): Model dimension.
|
|
@@ -657,8 +650,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|
|
657
650
|
return tensor if pos is None else tensor + pos
|
|
658
651
|
|
|
659
652
|
def forward_ffn(self, tgt: torch.Tensor) -> torch.Tensor:
|
|
660
|
-
"""
|
|
661
|
-
Perform forward pass through the Feed-Forward Network part of the layer.
|
|
653
|
+
"""Perform forward pass through the Feed-Forward Network part of the layer.
|
|
662
654
|
|
|
663
655
|
Args:
|
|
664
656
|
tgt (torch.Tensor): Input tensor.
|
|
@@ -680,8 +672,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|
|
680
672
|
attn_mask: torch.Tensor | None = None,
|
|
681
673
|
query_pos: torch.Tensor | None = None,
|
|
682
674
|
) -> torch.Tensor:
|
|
683
|
-
"""
|
|
684
|
-
Perform the forward pass through the entire decoder layer.
|
|
675
|
+
"""Perform the forward pass through the entire decoder layer.
|
|
685
676
|
|
|
686
677
|
Args:
|
|
687
678
|
embed (torch.Tensor): Input embeddings.
|
|
@@ -715,11 +706,10 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|
|
715
706
|
|
|
716
707
|
|
|
717
708
|
class DeformableTransformerDecoder(nn.Module):
|
|
718
|
-
"""
|
|
719
|
-
Deformable Transformer Decoder based on PaddleDetection implementation.
|
|
709
|
+
"""Deformable Transformer Decoder based on PaddleDetection implementation.
|
|
720
710
|
|
|
721
|
-
This class implements a complete deformable transformer decoder with multiple decoder layers and prediction
|
|
722
|
-
|
|
711
|
+
This class implements a complete deformable transformer decoder with multiple decoder layers and prediction heads
|
|
712
|
+
for bounding box regression and classification.
|
|
723
713
|
|
|
724
714
|
Attributes:
|
|
725
715
|
layers (nn.ModuleList): List of decoder layers.
|
|
@@ -732,8 +722,7 @@ class DeformableTransformerDecoder(nn.Module):
|
|
|
732
722
|
"""
|
|
733
723
|
|
|
734
724
|
def __init__(self, hidden_dim: int, decoder_layer: nn.Module, num_layers: int, eval_idx: int = -1):
|
|
735
|
-
"""
|
|
736
|
-
Initialize the DeformableTransformerDecoder with the given parameters.
|
|
725
|
+
"""Initialize the DeformableTransformerDecoder with the given parameters.
|
|
737
726
|
|
|
738
727
|
Args:
|
|
739
728
|
hidden_dim (int): Hidden dimension.
|
|
@@ -759,8 +748,7 @@ class DeformableTransformerDecoder(nn.Module):
|
|
|
759
748
|
attn_mask: torch.Tensor | None = None,
|
|
760
749
|
padding_mask: torch.Tensor | None = None,
|
|
761
750
|
):
|
|
762
|
-
"""
|
|
763
|
-
Perform the forward pass through the entire decoder.
|
|
751
|
+
"""Perform the forward pass through the entire decoder.
|
|
764
752
|
|
|
765
753
|
Args:
|
|
766
754
|
embed (torch.Tensor): Decoder embeddings.
|
ultralytics/nn/modules/utils.py
CHANGED
|
@@ -9,12 +9,11 @@ import torch.nn as nn
|
|
|
9
9
|
import torch.nn.functional as F
|
|
10
10
|
from torch.nn.init import uniform_
|
|
11
11
|
|
|
12
|
-
__all__ = "
|
|
12
|
+
__all__ = "inverse_sigmoid", "multi_scale_deformable_attn_pytorch"
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def _get_clones(module, n):
|
|
16
|
-
"""
|
|
17
|
-
Create a list of cloned modules from the given module.
|
|
16
|
+
"""Create a list of cloned modules from the given module.
|
|
18
17
|
|
|
19
18
|
Args:
|
|
20
19
|
module (nn.Module): The module to be cloned.
|
|
@@ -34,12 +33,11 @@ def _get_clones(module, n):
|
|
|
34
33
|
|
|
35
34
|
|
|
36
35
|
def bias_init_with_prob(prior_prob=0.01):
|
|
37
|
-
"""
|
|
38
|
-
Initialize conv/fc bias value according to a given probability value.
|
|
36
|
+
"""Initialize conv/fc bias value according to a given probability value.
|
|
39
37
|
|
|
40
|
-
This function calculates the bias initialization value based on a prior probability using the inverse error
|
|
41
|
-
It's commonly used in object detection models to initialize classification layers with a specific positive
|
|
42
|
-
probability.
|
|
38
|
+
This function calculates the bias initialization value based on a prior probability using the inverse error
|
|
39
|
+
function. It's commonly used in object detection models to initialize classification layers with a specific positive
|
|
40
|
+
prediction probability.
|
|
43
41
|
|
|
44
42
|
Args:
|
|
45
43
|
prior_prob (float, optional): Prior probability for bias initialization.
|
|
@@ -56,11 +54,10 @@ def bias_init_with_prob(prior_prob=0.01):
|
|
|
56
54
|
|
|
57
55
|
|
|
58
56
|
def linear_init(module):
|
|
59
|
-
"""
|
|
60
|
-
Initialize the weights and biases of a linear module.
|
|
57
|
+
"""Initialize the weights and biases of a linear module.
|
|
61
58
|
|
|
62
|
-
This function initializes the weights of a linear module using a uniform distribution within bounds calculated
|
|
63
|
-
|
|
59
|
+
This function initializes the weights of a linear module using a uniform distribution within bounds calculated from
|
|
60
|
+
the input dimension. If the module has a bias, it is also initialized.
|
|
64
61
|
|
|
65
62
|
Args:
|
|
66
63
|
module (nn.Module): Linear module to initialize.
|
|
@@ -80,8 +77,7 @@ def linear_init(module):
|
|
|
80
77
|
|
|
81
78
|
|
|
82
79
|
def inverse_sigmoid(x, eps=1e-5):
|
|
83
|
-
"""
|
|
84
|
-
Calculate the inverse sigmoid function for a tensor.
|
|
80
|
+
"""Calculate the inverse sigmoid function for a tensor.
|
|
85
81
|
|
|
86
82
|
This function applies the inverse of the sigmoid function to a tensor, which is useful in various neural network
|
|
87
83
|
operations, particularly in attention mechanisms and coordinate transformations.
|
|
@@ -110,8 +106,7 @@ def multi_scale_deformable_attn_pytorch(
|
|
|
110
106
|
sampling_locations: torch.Tensor,
|
|
111
107
|
attention_weights: torch.Tensor,
|
|
112
108
|
) -> torch.Tensor:
|
|
113
|
-
"""
|
|
114
|
-
Implement multi-scale deformable attention in PyTorch.
|
|
109
|
+
"""Implement multi-scale deformable attention in PyTorch.
|
|
115
110
|
|
|
116
111
|
This function performs deformable attention across multiple feature map scales, allowing the model to attend to
|
|
117
112
|
different spatial locations with learned offsets.
|
|
@@ -119,10 +114,10 @@ def multi_scale_deformable_attn_pytorch(
|
|
|
119
114
|
Args:
|
|
120
115
|
value (torch.Tensor): The value tensor with shape (bs, num_keys, num_heads, embed_dims).
|
|
121
116
|
value_spatial_shapes (torch.Tensor): Spatial shapes of the value tensor with shape (num_levels, 2).
|
|
122
|
-
sampling_locations (torch.Tensor): The sampling locations with shape
|
|
123
|
-
|
|
124
|
-
attention_weights (torch.Tensor): The attention weights with shape
|
|
125
|
-
|
|
117
|
+
sampling_locations (torch.Tensor): The sampling locations with shape (bs, num_queries, num_heads, num_levels,
|
|
118
|
+
num_points, 2).
|
|
119
|
+
attention_weights (torch.Tensor): The attention weights with shape (bs, num_queries, num_heads, num_levels,
|
|
120
|
+
num_points).
|
|
126
121
|
|
|
127
122
|
Returns:
|
|
128
123
|
(torch.Tensor): The output tensor with shape (bs, num_queries, embed_dims).
|
|
@@ -152,7 +147,7 @@ def multi_scale_deformable_attn_pytorch(
|
|
|
152
147
|
sampling_value_list.append(sampling_value_l_)
|
|
153
148
|
# (bs, num_queries, num_heads, num_levels, num_points) ->
|
|
154
149
|
# (bs, num_heads, num_queries, num_levels, num_points) ->
|
|
155
|
-
# (bs
|
|
150
|
+
# (bs*num_heads, 1, num_queries, num_levels*num_points)
|
|
156
151
|
attention_weights = attention_weights.transpose(1, 2).reshape(
|
|
157
152
|
bs * num_heads, 1, num_queries, num_levels * num_points
|
|
158
153
|
)
|