dgenerate-ultralytics-headless 8.3.196__py3-none-any.whl → 8.3.248__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/METADATA +33 -34
- dgenerate_ultralytics_headless-8.3.248.dist-info/RECORD +298 -0
- tests/__init__.py +5 -7
- tests/conftest.py +8 -15
- tests/test_cli.py +8 -10
- tests/test_cuda.py +9 -10
- tests/test_engine.py +29 -2
- tests/test_exports.py +69 -21
- tests/test_integrations.py +8 -11
- tests/test_python.py +109 -71
- tests/test_solutions.py +170 -159
- ultralytics/__init__.py +27 -9
- ultralytics/cfg/__init__.py +57 -64
- ultralytics/cfg/datasets/Argoverse.yaml +7 -6
- ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
- ultralytics/cfg/datasets/ImageNet.yaml +1 -1
- ultralytics/cfg/datasets/Objects365.yaml +19 -15
- ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
- ultralytics/cfg/datasets/VOC.yaml +19 -21
- ultralytics/cfg/datasets/VisDrone.yaml +5 -5
- ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
- ultralytics/cfg/datasets/coco-pose.yaml +24 -2
- ultralytics/cfg/datasets/coco.yaml +2 -2
- ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
- ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
- ultralytics/cfg/datasets/construction-ppe.yaml +32 -0
- ultralytics/cfg/datasets/dog-pose.yaml +28 -0
- ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/dota8.yaml +2 -2
- ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
- ultralytics/cfg/datasets/kitti.yaml +27 -0
- ultralytics/cfg/datasets/lvis.yaml +7 -7
- ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
- ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
- ultralytics/cfg/datasets/xView.yaml +16 -16
- ultralytics/cfg/default.yaml +96 -94
- ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
- ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
- ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
- ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
- ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
- ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
- ultralytics/cfg/models/v6/yolov6.yaml +1 -1
- ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
- ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
- ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
- ultralytics/cfg/trackers/botsort.yaml +16 -17
- ultralytics/cfg/trackers/bytetrack.yaml +9 -11
- ultralytics/data/__init__.py +4 -4
- ultralytics/data/annotator.py +3 -4
- ultralytics/data/augment.py +286 -476
- ultralytics/data/base.py +18 -26
- ultralytics/data/build.py +151 -26
- ultralytics/data/converter.py +38 -50
- ultralytics/data/dataset.py +47 -75
- ultralytics/data/loaders.py +42 -49
- ultralytics/data/split.py +5 -6
- ultralytics/data/split_dota.py +8 -15
- ultralytics/data/utils.py +41 -45
- ultralytics/engine/exporter.py +462 -462
- ultralytics/engine/model.py +150 -191
- ultralytics/engine/predictor.py +30 -40
- ultralytics/engine/results.py +177 -311
- ultralytics/engine/trainer.py +193 -120
- ultralytics/engine/tuner.py +77 -63
- ultralytics/engine/validator.py +39 -22
- ultralytics/hub/__init__.py +16 -19
- ultralytics/hub/auth.py +6 -12
- ultralytics/hub/google/__init__.py +7 -10
- ultralytics/hub/session.py +15 -25
- ultralytics/hub/utils.py +5 -8
- ultralytics/models/__init__.py +1 -1
- ultralytics/models/fastsam/__init__.py +1 -1
- ultralytics/models/fastsam/model.py +8 -10
- ultralytics/models/fastsam/predict.py +19 -30
- ultralytics/models/fastsam/utils.py +1 -2
- ultralytics/models/fastsam/val.py +5 -7
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/nas/model.py +5 -8
- ultralytics/models/nas/predict.py +7 -9
- ultralytics/models/nas/val.py +1 -2
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/rtdetr/model.py +7 -8
- ultralytics/models/rtdetr/predict.py +15 -19
- ultralytics/models/rtdetr/train.py +10 -13
- ultralytics/models/rtdetr/val.py +21 -23
- ultralytics/models/sam/__init__.py +15 -2
- ultralytics/models/sam/amg.py +14 -20
- ultralytics/models/sam/build.py +26 -19
- ultralytics/models/sam/build_sam3.py +377 -0
- ultralytics/models/sam/model.py +29 -32
- ultralytics/models/sam/modules/blocks.py +83 -144
- ultralytics/models/sam/modules/decoders.py +22 -40
- ultralytics/models/sam/modules/encoders.py +44 -101
- ultralytics/models/sam/modules/memory_attention.py +16 -30
- ultralytics/models/sam/modules/sam.py +206 -79
- ultralytics/models/sam/modules/tiny_encoder.py +64 -83
- ultralytics/models/sam/modules/transformer.py +18 -28
- ultralytics/models/sam/modules/utils.py +174 -50
- ultralytics/models/sam/predict.py +2268 -366
- ultralytics/models/sam/sam3/__init__.py +3 -0
- ultralytics/models/sam/sam3/decoder.py +546 -0
- ultralytics/models/sam/sam3/encoder.py +529 -0
- ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
- ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
- ultralytics/models/sam/sam3/model_misc.py +199 -0
- ultralytics/models/sam/sam3/necks.py +129 -0
- ultralytics/models/sam/sam3/sam3_image.py +339 -0
- ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
- ultralytics/models/sam/sam3/vitdet.py +547 -0
- ultralytics/models/sam/sam3/vl_combiner.py +160 -0
- ultralytics/models/utils/loss.py +14 -26
- ultralytics/models/utils/ops.py +13 -17
- ultralytics/models/yolo/__init__.py +1 -1
- ultralytics/models/yolo/classify/predict.py +9 -12
- ultralytics/models/yolo/classify/train.py +15 -41
- ultralytics/models/yolo/classify/val.py +34 -32
- ultralytics/models/yolo/detect/predict.py +8 -11
- ultralytics/models/yolo/detect/train.py +13 -32
- ultralytics/models/yolo/detect/val.py +75 -63
- ultralytics/models/yolo/model.py +37 -53
- ultralytics/models/yolo/obb/predict.py +5 -14
- ultralytics/models/yolo/obb/train.py +11 -14
- ultralytics/models/yolo/obb/val.py +42 -39
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/pose/predict.py +7 -22
- ultralytics/models/yolo/pose/train.py +10 -22
- ultralytics/models/yolo/pose/val.py +40 -59
- ultralytics/models/yolo/segment/predict.py +16 -20
- ultralytics/models/yolo/segment/train.py +3 -12
- ultralytics/models/yolo/segment/val.py +106 -56
- ultralytics/models/yolo/world/train.py +12 -16
- ultralytics/models/yolo/world/train_world.py +11 -34
- ultralytics/models/yolo/yoloe/__init__.py +7 -7
- ultralytics/models/yolo/yoloe/predict.py +16 -23
- ultralytics/models/yolo/yoloe/train.py +31 -56
- ultralytics/models/yolo/yoloe/train_seg.py +5 -10
- ultralytics/models/yolo/yoloe/val.py +16 -21
- ultralytics/nn/__init__.py +7 -7
- ultralytics/nn/autobackend.py +152 -80
- ultralytics/nn/modules/__init__.py +60 -60
- ultralytics/nn/modules/activation.py +4 -6
- ultralytics/nn/modules/block.py +133 -217
- ultralytics/nn/modules/conv.py +52 -97
- ultralytics/nn/modules/head.py +64 -116
- ultralytics/nn/modules/transformer.py +79 -89
- ultralytics/nn/modules/utils.py +16 -21
- ultralytics/nn/tasks.py +111 -156
- ultralytics/nn/text_model.py +40 -67
- ultralytics/solutions/__init__.py +12 -12
- ultralytics/solutions/ai_gym.py +11 -17
- ultralytics/solutions/analytics.py +15 -16
- ultralytics/solutions/config.py +5 -6
- ultralytics/solutions/distance_calculation.py +10 -13
- ultralytics/solutions/heatmap.py +7 -13
- ultralytics/solutions/instance_segmentation.py +5 -8
- ultralytics/solutions/object_blurrer.py +7 -10
- ultralytics/solutions/object_counter.py +12 -19
- ultralytics/solutions/object_cropper.py +8 -14
- ultralytics/solutions/parking_management.py +33 -31
- ultralytics/solutions/queue_management.py +10 -12
- ultralytics/solutions/region_counter.py +9 -12
- ultralytics/solutions/security_alarm.py +15 -20
- ultralytics/solutions/similarity_search.py +13 -17
- ultralytics/solutions/solutions.py +75 -74
- ultralytics/solutions/speed_estimation.py +7 -10
- ultralytics/solutions/streamlit_inference.py +4 -7
- ultralytics/solutions/templates/similarity-search.html +7 -18
- ultralytics/solutions/trackzone.py +7 -10
- ultralytics/solutions/vision_eye.py +5 -8
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/basetrack.py +3 -5
- ultralytics/trackers/bot_sort.py +10 -27
- ultralytics/trackers/byte_tracker.py +14 -30
- ultralytics/trackers/track.py +3 -6
- ultralytics/trackers/utils/gmc.py +11 -22
- ultralytics/trackers/utils/kalman_filter.py +37 -48
- ultralytics/trackers/utils/matching.py +12 -15
- ultralytics/utils/__init__.py +116 -116
- ultralytics/utils/autobatch.py +2 -4
- ultralytics/utils/autodevice.py +17 -18
- ultralytics/utils/benchmarks.py +70 -70
- ultralytics/utils/callbacks/base.py +8 -10
- ultralytics/utils/callbacks/clearml.py +5 -13
- ultralytics/utils/callbacks/comet.py +32 -46
- ultralytics/utils/callbacks/dvc.py +13 -18
- ultralytics/utils/callbacks/mlflow.py +4 -5
- ultralytics/utils/callbacks/neptune.py +7 -15
- ultralytics/utils/callbacks/platform.py +314 -38
- ultralytics/utils/callbacks/raytune.py +3 -4
- ultralytics/utils/callbacks/tensorboard.py +23 -31
- ultralytics/utils/callbacks/wb.py +10 -13
- ultralytics/utils/checks.py +151 -87
- ultralytics/utils/cpu.py +3 -8
- ultralytics/utils/dist.py +19 -15
- ultralytics/utils/downloads.py +29 -41
- ultralytics/utils/errors.py +6 -14
- ultralytics/utils/events.py +2 -4
- ultralytics/utils/export/__init__.py +7 -0
- ultralytics/utils/{export.py → export/engine.py} +16 -16
- ultralytics/utils/export/imx.py +325 -0
- ultralytics/utils/export/tensorflow.py +231 -0
- ultralytics/utils/files.py +24 -28
- ultralytics/utils/git.py +9 -11
- ultralytics/utils/instance.py +30 -51
- ultralytics/utils/logger.py +212 -114
- ultralytics/utils/loss.py +15 -24
- ultralytics/utils/metrics.py +131 -160
- ultralytics/utils/nms.py +21 -30
- ultralytics/utils/ops.py +107 -165
- ultralytics/utils/patches.py +33 -21
- ultralytics/utils/plotting.py +122 -119
- ultralytics/utils/tal.py +28 -44
- ultralytics/utils/torch_utils.py +70 -187
- ultralytics/utils/tqdm.py +20 -20
- ultralytics/utils/triton.py +13 -19
- ultralytics/utils/tuner.py +17 -5
- dgenerate_ultralytics_headless-8.3.196.dist-info/RECORD +0 -281
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.196.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/top_level.txt +0 -0
ultralytics/nn/modules/head.py
CHANGED
|
@@ -12,20 +12,19 @@ import torch.nn.functional as F
|
|
|
12
12
|
from torch.nn.init import constant_, xavier_uniform_
|
|
13
13
|
|
|
14
14
|
from ultralytics.utils import NOT_MACOS14
|
|
15
|
-
from ultralytics.utils.tal import
|
|
16
|
-
from ultralytics.utils.torch_utils import
|
|
15
|
+
from ultralytics.utils.tal import dist2bbox, dist2rbox, make_anchors
|
|
16
|
+
from ultralytics.utils.torch_utils import TORCH_1_11, fuse_conv_and_bn, smart_inference_mode
|
|
17
17
|
|
|
18
18
|
from .block import DFL, SAVPE, BNContrastiveHead, ContrastiveHead, Proto, Residual, SwiGLUFFN
|
|
19
19
|
from .conv import Conv, DWConv
|
|
20
20
|
from .transformer import MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer
|
|
21
21
|
from .utils import bias_init_with_prob, linear_init
|
|
22
22
|
|
|
23
|
-
__all__ = "
|
|
23
|
+
__all__ = "OBB", "Classify", "Detect", "Pose", "RTDETRDecoder", "Segment", "YOLOEDetect", "YOLOESegment", "v10Detect"
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class Detect(nn.Module):
|
|
27
|
-
"""
|
|
28
|
-
YOLO Detect head for object detection models.
|
|
27
|
+
"""YOLO Detect head for object detection models.
|
|
29
28
|
|
|
30
29
|
This class implements the detection head used in YOLO models for predicting bounding boxes and class probabilities.
|
|
31
30
|
It supports both training and inference modes, with optional end-to-end detection capabilities.
|
|
@@ -78,8 +77,7 @@ class Detect(nn.Module):
|
|
|
78
77
|
xyxy = False # xyxy or xywh output
|
|
79
78
|
|
|
80
79
|
def __init__(self, nc: int = 80, ch: tuple = ()):
|
|
81
|
-
"""
|
|
82
|
-
Initialize the YOLO detection layer with specified number of classes and channels.
|
|
80
|
+
"""Initialize the YOLO detection layer with specified number of classes and channels.
|
|
83
81
|
|
|
84
82
|
Args:
|
|
85
83
|
nc (int): Number of classes.
|
|
@@ -126,15 +124,14 @@ class Detect(nn.Module):
|
|
|
126
124
|
return y if self.export else (y, x)
|
|
127
125
|
|
|
128
126
|
def forward_end2end(self, x: list[torch.Tensor]) -> dict | tuple:
|
|
129
|
-
"""
|
|
130
|
-
Perform forward pass of the v10Detect module.
|
|
127
|
+
"""Perform forward pass of the v10Detect module.
|
|
131
128
|
|
|
132
129
|
Args:
|
|
133
130
|
x (list[torch.Tensor]): Input feature maps from different levels.
|
|
134
131
|
|
|
135
132
|
Returns:
|
|
136
|
-
outputs (dict | tuple): Training mode returns dict with one2many and one2one outputs.
|
|
137
|
-
|
|
133
|
+
outputs (dict | tuple): Training mode returns dict with one2many and one2one outputs. Inference mode returns
|
|
134
|
+
processed detections or tuple with detections and raw outputs.
|
|
138
135
|
"""
|
|
139
136
|
x_detach = [xi.detach() for xi in x]
|
|
140
137
|
one2one = [
|
|
@@ -149,10 +146,8 @@ class Detect(nn.Module):
|
|
|
149
146
|
y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
|
|
150
147
|
return y if self.export else (y, {"one2many": x, "one2one": one2one})
|
|
151
148
|
|
|
152
|
-
@disable_dynamo
|
|
153
149
|
def _inference(self, x: list[torch.Tensor]) -> torch.Tensor:
|
|
154
|
-
"""
|
|
155
|
-
Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
|
|
150
|
+
"""Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
|
|
156
151
|
|
|
157
152
|
Args:
|
|
158
153
|
x (list[torch.Tensor]): List of feature maps from different detection layers.
|
|
@@ -163,28 +158,12 @@ class Detect(nn.Module):
|
|
|
163
158
|
# Inference path
|
|
164
159
|
shape = x[0].shape # BCHW
|
|
165
160
|
x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
|
|
166
|
-
if self.
|
|
161
|
+
if self.dynamic or self.shape != shape:
|
|
167
162
|
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
|
|
168
163
|
self.shape = shape
|
|
169
164
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
cls = x_cat[:, self.reg_max * 4 :]
|
|
173
|
-
else:
|
|
174
|
-
box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
|
|
175
|
-
|
|
176
|
-
if self.export and self.format in {"tflite", "edgetpu"}:
|
|
177
|
-
# Precompute normalization factor to increase numerical stability
|
|
178
|
-
# See https://github.com/ultralytics/ultralytics/issues/7371
|
|
179
|
-
grid_h = shape[2]
|
|
180
|
-
grid_w = shape[3]
|
|
181
|
-
grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
|
|
182
|
-
norm = self.strides / (self.stride[0] * grid_size)
|
|
183
|
-
dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
|
|
184
|
-
else:
|
|
185
|
-
dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
|
|
186
|
-
if self.export and self.format == "imx":
|
|
187
|
-
return dbox.transpose(1, 2), cls.sigmoid().permute(0, 2, 1)
|
|
165
|
+
box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
|
|
166
|
+
dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
|
|
188
167
|
return torch.cat((dbox, cls.sigmoid()), 1)
|
|
189
168
|
|
|
190
169
|
def bias_init(self):
|
|
@@ -211,8 +190,7 @@ class Detect(nn.Module):
|
|
|
211
190
|
|
|
212
191
|
@staticmethod
|
|
213
192
|
def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80) -> torch.Tensor:
|
|
214
|
-
"""
|
|
215
|
-
Post-process YOLO model predictions.
|
|
193
|
+
"""Post-process YOLO model predictions.
|
|
216
194
|
|
|
217
195
|
Args:
|
|
218
196
|
preds (torch.Tensor): Raw predictions with shape (batch_size, num_anchors, 4 + nc) with last dimension
|
|
@@ -235,8 +213,7 @@ class Detect(nn.Module):
|
|
|
235
213
|
|
|
236
214
|
|
|
237
215
|
class Segment(Detect):
|
|
238
|
-
"""
|
|
239
|
-
YOLO Segment head for segmentation models.
|
|
216
|
+
"""YOLO Segment head for segmentation models.
|
|
240
217
|
|
|
241
218
|
This class extends the Detect head to include mask prediction capabilities for instance segmentation tasks.
|
|
242
219
|
|
|
@@ -257,8 +234,7 @@ class Segment(Detect):
|
|
|
257
234
|
"""
|
|
258
235
|
|
|
259
236
|
def __init__(self, nc: int = 80, nm: int = 32, npr: int = 256, ch: tuple = ()):
|
|
260
|
-
"""
|
|
261
|
-
Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.
|
|
237
|
+
"""Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.
|
|
262
238
|
|
|
263
239
|
Args:
|
|
264
240
|
nc (int): Number of classes.
|
|
@@ -287,8 +263,7 @@ class Segment(Detect):
|
|
|
287
263
|
|
|
288
264
|
|
|
289
265
|
class OBB(Detect):
|
|
290
|
-
"""
|
|
291
|
-
YOLO OBB detection head for detection with rotation models.
|
|
266
|
+
"""YOLO OBB detection head for detection with rotation models.
|
|
292
267
|
|
|
293
268
|
This class extends the Detect head to include oriented bounding box prediction with rotation angles.
|
|
294
269
|
|
|
@@ -309,8 +284,7 @@ class OBB(Detect):
|
|
|
309
284
|
"""
|
|
310
285
|
|
|
311
286
|
def __init__(self, nc: int = 80, ne: int = 1, ch: tuple = ()):
|
|
312
|
-
"""
|
|
313
|
-
Initialize OBB with number of classes `nc` and layer channels `ch`.
|
|
287
|
+
"""Initialize OBB with number of classes `nc` and layer channels `ch`.
|
|
314
288
|
|
|
315
289
|
Args:
|
|
316
290
|
nc (int): Number of classes.
|
|
@@ -343,8 +317,7 @@ class OBB(Detect):
|
|
|
343
317
|
|
|
344
318
|
|
|
345
319
|
class Pose(Detect):
|
|
346
|
-
"""
|
|
347
|
-
YOLO Pose head for keypoints models.
|
|
320
|
+
"""YOLO Pose head for keypoints models.
|
|
348
321
|
|
|
349
322
|
This class extends the Detect head to include keypoint prediction capabilities for pose estimation tasks.
|
|
350
323
|
|
|
@@ -365,8 +338,7 @@ class Pose(Detect):
|
|
|
365
338
|
"""
|
|
366
339
|
|
|
367
340
|
def __init__(self, nc: int = 80, kpt_shape: tuple = (17, 3), ch: tuple = ()):
|
|
368
|
-
"""
|
|
369
|
-
Initialize YOLO network with default parameters and Convolutional Layers.
|
|
341
|
+
"""Initialize YOLO network with default parameters and Convolutional Layers.
|
|
370
342
|
|
|
371
343
|
Args:
|
|
372
344
|
nc (int): Number of classes.
|
|
@@ -388,28 +360,15 @@ class Pose(Detect):
|
|
|
388
360
|
if self.training:
|
|
389
361
|
return x, kpt
|
|
390
362
|
pred_kpt = self.kpts_decode(bs, kpt)
|
|
391
|
-
if self.export and self.format == "imx":
|
|
392
|
-
return (*x, pred_kpt.permute(0, 2, 1))
|
|
393
363
|
return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt))
|
|
394
364
|
|
|
395
365
|
def kpts_decode(self, bs: int, kpts: torch.Tensor) -> torch.Tensor:
|
|
396
366
|
"""Decode keypoints from predictions."""
|
|
397
367
|
ndim = self.kpt_shape[1]
|
|
398
368
|
if self.export:
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
}: # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
|
|
403
|
-
# Precompute normalization factor to increase numerical stability
|
|
404
|
-
y = kpts.view(bs, *self.kpt_shape, -1)
|
|
405
|
-
grid_h, grid_w = self.shape[2], self.shape[3]
|
|
406
|
-
grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
|
|
407
|
-
norm = self.strides / (self.stride[0] * grid_size)
|
|
408
|
-
a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * norm
|
|
409
|
-
else:
|
|
410
|
-
# NCNN fix
|
|
411
|
-
y = kpts.view(bs, *self.kpt_shape, -1)
|
|
412
|
-
a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
|
|
369
|
+
# NCNN fix
|
|
370
|
+
y = kpts.view(bs, *self.kpt_shape, -1)
|
|
371
|
+
a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
|
|
413
372
|
if ndim == 3:
|
|
414
373
|
a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
|
|
415
374
|
return a.view(bs, self.nk, -1)
|
|
@@ -426,8 +385,7 @@ class Pose(Detect):
|
|
|
426
385
|
|
|
427
386
|
|
|
428
387
|
class Classify(nn.Module):
|
|
429
|
-
"""
|
|
430
|
-
YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2).
|
|
388
|
+
"""YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2).
|
|
431
389
|
|
|
432
390
|
This class implements a classification head that transforms feature maps into class predictions.
|
|
433
391
|
|
|
@@ -451,8 +409,7 @@ class Classify(nn.Module):
|
|
|
451
409
|
export = False # export mode
|
|
452
410
|
|
|
453
411
|
def __init__(self, c1: int, c2: int, k: int = 1, s: int = 1, p: int | None = None, g: int = 1):
|
|
454
|
-
"""
|
|
455
|
-
Initialize YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.
|
|
412
|
+
"""Initialize YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.
|
|
456
413
|
|
|
457
414
|
Args:
|
|
458
415
|
c1 (int): Number of input channels.
|
|
@@ -481,11 +438,10 @@ class Classify(nn.Module):
|
|
|
481
438
|
|
|
482
439
|
|
|
483
440
|
class WorldDetect(Detect):
|
|
484
|
-
"""
|
|
485
|
-
Head for integrating YOLO detection models with semantic understanding from text embeddings.
|
|
441
|
+
"""Head for integrating YOLO detection models with semantic understanding from text embeddings.
|
|
486
442
|
|
|
487
|
-
This class extends the standard Detect head to incorporate text embeddings for enhanced semantic understanding
|
|
488
|
-
|
|
443
|
+
This class extends the standard Detect head to incorporate text embeddings for enhanced semantic understanding in
|
|
444
|
+
object detection tasks.
|
|
489
445
|
|
|
490
446
|
Attributes:
|
|
491
447
|
cv3 (nn.ModuleList): Convolution layers for embedding features.
|
|
@@ -504,8 +460,7 @@ class WorldDetect(Detect):
|
|
|
504
460
|
"""
|
|
505
461
|
|
|
506
462
|
def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
|
|
507
|
-
"""
|
|
508
|
-
Initialize YOLO detection layer with nc classes and layer channels ch.
|
|
463
|
+
"""Initialize YOLO detection layer with nc classes and layer channels ch.
|
|
509
464
|
|
|
510
465
|
Args:
|
|
511
466
|
nc (int): Number of classes.
|
|
@@ -539,11 +494,10 @@ class WorldDetect(Detect):
|
|
|
539
494
|
|
|
540
495
|
|
|
541
496
|
class LRPCHead(nn.Module):
|
|
542
|
-
"""
|
|
543
|
-
Lightweight Region Proposal and Classification Head for efficient object detection.
|
|
497
|
+
"""Lightweight Region Proposal and Classification Head for efficient object detection.
|
|
544
498
|
|
|
545
|
-
This head combines region proposal filtering with classification to enable efficient detection with
|
|
546
|
-
|
|
499
|
+
This head combines region proposal filtering with classification to enable efficient detection with dynamic
|
|
500
|
+
vocabulary support.
|
|
547
501
|
|
|
548
502
|
Attributes:
|
|
549
503
|
vocab (nn.Module): Vocabulary/classification layer.
|
|
@@ -564,8 +518,7 @@ class LRPCHead(nn.Module):
|
|
|
564
518
|
"""
|
|
565
519
|
|
|
566
520
|
def __init__(self, vocab: nn.Module, pf: nn.Module, loc: nn.Module, enabled: bool = True):
|
|
567
|
-
"""
|
|
568
|
-
Initialize LRPCHead with vocabulary, proposal filter, and localization components.
|
|
521
|
+
"""Initialize LRPCHead with vocabulary, proposal filter, and localization components.
|
|
569
522
|
|
|
570
523
|
Args:
|
|
571
524
|
vocab (nn.Module): Vocabulary/classification module.
|
|
@@ -579,7 +532,8 @@ class LRPCHead(nn.Module):
|
|
|
579
532
|
self.loc = loc
|
|
580
533
|
self.enabled = enabled
|
|
581
534
|
|
|
582
|
-
|
|
535
|
+
@staticmethod
|
|
536
|
+
def conv2linear(conv: nn.Conv2d) -> nn.Linear:
|
|
583
537
|
"""Convert a 1x1 convolutional layer to a linear layer."""
|
|
584
538
|
assert isinstance(conv, nn.Conv2d) and conv.kernel_size == (1, 1)
|
|
585
539
|
linear = nn.Linear(conv.in_channels, conv.out_channels)
|
|
@@ -604,8 +558,7 @@ class LRPCHead(nn.Module):
|
|
|
604
558
|
|
|
605
559
|
|
|
606
560
|
class YOLOEDetect(Detect):
|
|
607
|
-
"""
|
|
608
|
-
Head for integrating YOLO detection models with semantic understanding from text embeddings.
|
|
561
|
+
"""Head for integrating YOLO detection models with semantic understanding from text embeddings.
|
|
609
562
|
|
|
610
563
|
This class extends the standard Detect head to support text-guided detection with enhanced semantic understanding
|
|
611
564
|
through text embeddings and visual prompt embeddings.
|
|
@@ -637,8 +590,7 @@ class YOLOEDetect(Detect):
|
|
|
637
590
|
is_fused = False
|
|
638
591
|
|
|
639
592
|
def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
|
|
640
|
-
"""
|
|
641
|
-
Initialize YOLO detection layer with nc classes and layer channels ch.
|
|
593
|
+
"""Initialize YOLO detection layer with nc classes and layer channels ch.
|
|
642
594
|
|
|
643
595
|
Args:
|
|
644
596
|
nc (int): Number of classes.
|
|
@@ -792,11 +744,10 @@ class YOLOEDetect(Detect):
|
|
|
792
744
|
|
|
793
745
|
|
|
794
746
|
class YOLOESegment(YOLOEDetect):
|
|
795
|
-
"""
|
|
796
|
-
YOLO segmentation head with text embedding capabilities.
|
|
747
|
+
"""YOLO segmentation head with text embedding capabilities.
|
|
797
748
|
|
|
798
|
-
This class extends YOLOEDetect to include mask prediction capabilities for instance segmentation tasks
|
|
799
|
-
|
|
749
|
+
This class extends YOLOEDetect to include mask prediction capabilities for instance segmentation tasks with
|
|
750
|
+
text-guided semantic understanding.
|
|
800
751
|
|
|
801
752
|
Attributes:
|
|
802
753
|
nm (int): Number of masks.
|
|
@@ -818,8 +769,7 @@ class YOLOESegment(YOLOEDetect):
|
|
|
818
769
|
def __init__(
|
|
819
770
|
self, nc: int = 80, nm: int = 32, npr: int = 256, embed: int = 512, with_bn: bool = False, ch: tuple = ()
|
|
820
771
|
):
|
|
821
|
-
"""
|
|
822
|
-
Initialize YOLOESegment with class count, mask parameters, and embedding dimensions.
|
|
772
|
+
"""Initialize YOLOESegment with class count, mask parameters, and embedding dimensions.
|
|
823
773
|
|
|
824
774
|
Args:
|
|
825
775
|
nc (int): Number of classes.
|
|
@@ -860,8 +810,7 @@ class YOLOESegment(YOLOEDetect):
|
|
|
860
810
|
|
|
861
811
|
|
|
862
812
|
class RTDETRDecoder(nn.Module):
|
|
863
|
-
"""
|
|
864
|
-
Real-Time Deformable Transformer Decoder (RTDETRDecoder) module for object detection.
|
|
813
|
+
"""Real-Time Deformable Transformer Decoder (RTDETRDecoder) module for object detection.
|
|
865
814
|
|
|
866
815
|
This decoder module utilizes Transformer architecture along with deformable convolutions to predict bounding boxes
|
|
867
816
|
and class labels for objects in an image. It integrates features from multiple layers and runs through a series of
|
|
@@ -901,6 +850,10 @@ class RTDETRDecoder(nn.Module):
|
|
|
901
850
|
"""
|
|
902
851
|
|
|
903
852
|
export = False # export mode
|
|
853
|
+
shapes = []
|
|
854
|
+
anchors = torch.empty(0)
|
|
855
|
+
valid_mask = torch.empty(0)
|
|
856
|
+
dynamic = False
|
|
904
857
|
|
|
905
858
|
def __init__(
|
|
906
859
|
self,
|
|
@@ -921,8 +874,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
921
874
|
box_noise_scale: float = 1.0,
|
|
922
875
|
learnt_init_query: bool = False,
|
|
923
876
|
):
|
|
924
|
-
"""
|
|
925
|
-
Initialize the RTDETRDecoder module with the given parameters.
|
|
877
|
+
"""Initialize the RTDETRDecoder module with the given parameters.
|
|
926
878
|
|
|
927
879
|
Args:
|
|
928
880
|
nc (int): Number of classes.
|
|
@@ -982,8 +934,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
982
934
|
self._reset_parameters()
|
|
983
935
|
|
|
984
936
|
def forward(self, x: list[torch.Tensor], batch: dict | None = None) -> tuple | torch.Tensor:
|
|
985
|
-
"""
|
|
986
|
-
Run the forward pass of the module, returning bounding box and classification scores for the input.
|
|
937
|
+
"""Run the forward pass of the module, returning bounding box and classification scores for the input.
|
|
987
938
|
|
|
988
939
|
Args:
|
|
989
940
|
x (list[torch.Tensor]): List of feature maps from the backbone.
|
|
@@ -1031,16 +982,15 @@ class RTDETRDecoder(nn.Module):
|
|
|
1031
982
|
y = torch.cat((dec_bboxes.squeeze(0), dec_scores.squeeze(0).sigmoid()), -1)
|
|
1032
983
|
return y if self.export else (y, x)
|
|
1033
984
|
|
|
985
|
+
@staticmethod
|
|
1034
986
|
def _generate_anchors(
|
|
1035
|
-
self,
|
|
1036
987
|
shapes: list[list[int]],
|
|
1037
988
|
grid_size: float = 0.05,
|
|
1038
989
|
dtype: torch.dtype = torch.float32,
|
|
1039
990
|
device: str = "cpu",
|
|
1040
991
|
eps: float = 1e-2,
|
|
1041
992
|
) -> tuple[torch.Tensor, torch.Tensor]:
|
|
1042
|
-
"""
|
|
1043
|
-
Generate anchor bounding boxes for given shapes with specific grid size and validate them.
|
|
993
|
+
"""Generate anchor bounding boxes for given shapes with specific grid size and validate them.
|
|
1044
994
|
|
|
1045
995
|
Args:
|
|
1046
996
|
shapes (list): List of feature map shapes.
|
|
@@ -1057,7 +1007,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
1057
1007
|
for i, (h, w) in enumerate(shapes):
|
|
1058
1008
|
sy = torch.arange(end=h, dtype=dtype, device=device)
|
|
1059
1009
|
sx = torch.arange(end=w, dtype=dtype, device=device)
|
|
1060
|
-
grid_y, grid_x = torch.meshgrid(sy, sx, indexing="ij") if
|
|
1010
|
+
grid_y, grid_x = torch.meshgrid(sy, sx, indexing="ij") if TORCH_1_11 else torch.meshgrid(sy, sx)
|
|
1061
1011
|
grid_xy = torch.stack([grid_x, grid_y], -1) # (h, w, 2)
|
|
1062
1012
|
|
|
1063
1013
|
valid_WH = torch.tensor([w, h], dtype=dtype, device=device)
|
|
@@ -1072,8 +1022,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
1072
1022
|
return anchors, valid_mask
|
|
1073
1023
|
|
|
1074
1024
|
def _get_encoder_input(self, x: list[torch.Tensor]) -> tuple[torch.Tensor, list[list[int]]]:
|
|
1075
|
-
"""
|
|
1076
|
-
Process and return encoder inputs by getting projection features from input and concatenating them.
|
|
1025
|
+
"""Process and return encoder inputs by getting projection features from input and concatenating them.
|
|
1077
1026
|
|
|
1078
1027
|
Args:
|
|
1079
1028
|
x (list[torch.Tensor]): List of feature maps from the backbone.
|
|
@@ -1105,8 +1054,7 @@ class RTDETRDecoder(nn.Module):
|
|
|
1105
1054
|
dn_embed: torch.Tensor | None = None,
|
|
1106
1055
|
dn_bbox: torch.Tensor | None = None,
|
|
1107
1056
|
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
|
|
1108
|
-
"""
|
|
1109
|
-
Generate and prepare the input required for the decoder from the provided features and shapes.
|
|
1057
|
+
"""Generate and prepare the input required for the decoder from the provided features and shapes.
|
|
1110
1058
|
|
|
1111
1059
|
Args:
|
|
1112
1060
|
feats (torch.Tensor): Processed features from encoder.
|
|
@@ -1121,22 +1069,24 @@ class RTDETRDecoder(nn.Module):
|
|
|
1121
1069
|
enc_scores (torch.Tensor): Encoded scores.
|
|
1122
1070
|
"""
|
|
1123
1071
|
bs = feats.shape[0]
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1072
|
+
if self.dynamic or self.shapes != shapes:
|
|
1073
|
+
self.anchors, self.valid_mask = self._generate_anchors(shapes, dtype=feats.dtype, device=feats.device)
|
|
1074
|
+
self.shapes = shapes
|
|
1127
1075
|
|
|
1076
|
+
# Prepare input for decoder
|
|
1077
|
+
features = self.enc_output(self.valid_mask * feats) # bs, h*w, 256
|
|
1128
1078
|
enc_outputs_scores = self.enc_score_head(features) # (bs, h*w, nc)
|
|
1129
1079
|
|
|
1130
1080
|
# Query selection
|
|
1131
|
-
# (bs,
|
|
1081
|
+
# (bs*num_queries,)
|
|
1132
1082
|
topk_ind = torch.topk(enc_outputs_scores.max(-1).values, self.num_queries, dim=1).indices.view(-1)
|
|
1133
|
-
# (bs,
|
|
1083
|
+
# (bs*num_queries,)
|
|
1134
1084
|
batch_ind = torch.arange(end=bs, dtype=topk_ind.dtype).unsqueeze(-1).repeat(1, self.num_queries).view(-1)
|
|
1135
1085
|
|
|
1136
1086
|
# (bs, num_queries, 256)
|
|
1137
1087
|
top_k_features = features[batch_ind, topk_ind].view(bs, self.num_queries, -1)
|
|
1138
1088
|
# (bs, num_queries, 4)
|
|
1139
|
-
top_k_anchors = anchors[:, topk_ind].view(bs, self.num_queries, -1)
|
|
1089
|
+
top_k_anchors = self.anchors[:, topk_ind].view(bs, self.num_queries, -1)
|
|
1140
1090
|
|
|
1141
1091
|
# Dynamic anchors + static content
|
|
1142
1092
|
refer_bbox = self.enc_bbox_head(top_k_features) + top_k_anchors
|
|
@@ -1182,11 +1132,10 @@ class RTDETRDecoder(nn.Module):
|
|
|
1182
1132
|
|
|
1183
1133
|
|
|
1184
1134
|
class v10Detect(Detect):
|
|
1185
|
-
"""
|
|
1186
|
-
v10 Detection head from https://arxiv.org/pdf/2405.14458.
|
|
1135
|
+
"""v10 Detection head from https://arxiv.org/pdf/2405.14458.
|
|
1187
1136
|
|
|
1188
|
-
This class implements the YOLOv10 detection head with dual-assignment training and consistent dual predictions
|
|
1189
|
-
|
|
1137
|
+
This class implements the YOLOv10 detection head with dual-assignment training and consistent dual predictions for
|
|
1138
|
+
improved efficiency and performance.
|
|
1190
1139
|
|
|
1191
1140
|
Attributes:
|
|
1192
1141
|
end2end (bool): End-to-end detection mode.
|
|
@@ -1210,8 +1159,7 @@ class v10Detect(Detect):
|
|
|
1210
1159
|
end2end = True
|
|
1211
1160
|
|
|
1212
1161
|
def __init__(self, nc: int = 80, ch: tuple = ()):
|
|
1213
|
-
"""
|
|
1214
|
-
Initialize the v10Detect object with the specified number of classes and input channels.
|
|
1162
|
+
"""Initialize the v10Detect object with the specified number of classes and input channels.
|
|
1215
1163
|
|
|
1216
1164
|
Args:
|
|
1217
1165
|
nc (int): Number of classes.
|