dgenerate-ultralytics-headless 8.3.190__py3-none-any.whl → 8.3.192__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/RECORD +103 -102
- tests/test_cuda.py +6 -5
- tests/test_exports.py +1 -6
- tests/test_python.py +1 -4
- tests/test_solutions.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +16 -14
- ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
- ultralytics/cfg/datasets/VisDrone.yaml +4 -4
- ultralytics/data/annotator.py +6 -6
- ultralytics/data/augment.py +53 -51
- ultralytics/data/base.py +15 -13
- ultralytics/data/build.py +7 -4
- ultralytics/data/converter.py +9 -10
- ultralytics/data/dataset.py +24 -22
- ultralytics/data/loaders.py +13 -11
- ultralytics/data/split.py +4 -3
- ultralytics/data/split_dota.py +14 -12
- ultralytics/data/utils.py +29 -23
- ultralytics/engine/exporter.py +2 -2
- ultralytics/engine/model.py +16 -14
- ultralytics/engine/predictor.py +8 -6
- ultralytics/engine/results.py +54 -52
- ultralytics/engine/trainer.py +8 -3
- ultralytics/engine/tuner.py +230 -42
- ultralytics/hub/google/__init__.py +7 -6
- ultralytics/hub/session.py +8 -6
- ultralytics/hub/utils.py +3 -4
- ultralytics/models/fastsam/model.py +8 -6
- ultralytics/models/nas/model.py +5 -3
- ultralytics/models/rtdetr/train.py +4 -3
- ultralytics/models/rtdetr/val.py +6 -4
- ultralytics/models/sam/amg.py +13 -10
- ultralytics/models/sam/model.py +3 -2
- ultralytics/models/sam/modules/blocks.py +21 -21
- ultralytics/models/sam/modules/decoders.py +11 -11
- ultralytics/models/sam/modules/encoders.py +25 -25
- ultralytics/models/sam/modules/memory_attention.py +9 -8
- ultralytics/models/sam/modules/sam.py +8 -10
- ultralytics/models/sam/modules/tiny_encoder.py +21 -20
- ultralytics/models/sam/modules/transformer.py +6 -5
- ultralytics/models/sam/modules/utils.py +7 -5
- ultralytics/models/sam/predict.py +32 -31
- ultralytics/models/utils/loss.py +29 -27
- ultralytics/models/utils/ops.py +10 -8
- ultralytics/models/yolo/classify/train.py +9 -7
- ultralytics/models/yolo/classify/val.py +11 -9
- ultralytics/models/yolo/detect/predict.py +1 -1
- ultralytics/models/yolo/detect/train.py +8 -6
- ultralytics/models/yolo/detect/val.py +22 -20
- ultralytics/models/yolo/model.py +14 -14
- ultralytics/models/yolo/obb/train.py +5 -3
- ultralytics/models/yolo/obb/val.py +11 -9
- ultralytics/models/yolo/pose/train.py +7 -5
- ultralytics/models/yolo/pose/val.py +12 -10
- ultralytics/models/yolo/segment/train.py +4 -5
- ultralytics/models/yolo/segment/val.py +13 -11
- ultralytics/models/yolo/world/train.py +10 -8
- ultralytics/models/yolo/yoloe/train.py +10 -10
- ultralytics/models/yolo/yoloe/val.py +11 -9
- ultralytics/nn/autobackend.py +17 -19
- ultralytics/nn/modules/block.py +12 -12
- ultralytics/nn/modules/conv.py +4 -3
- ultralytics/nn/modules/head.py +41 -37
- ultralytics/nn/modules/transformer.py +22 -21
- ultralytics/nn/tasks.py +2 -2
- ultralytics/nn/text_model.py +6 -5
- ultralytics/solutions/analytics.py +7 -5
- ultralytics/solutions/config.py +12 -10
- ultralytics/solutions/distance_calculation.py +3 -3
- ultralytics/solutions/heatmap.py +4 -2
- ultralytics/solutions/object_counter.py +5 -3
- ultralytics/solutions/parking_management.py +4 -2
- ultralytics/solutions/region_counter.py +7 -5
- ultralytics/solutions/similarity_search.py +5 -3
- ultralytics/solutions/solutions.py +38 -36
- ultralytics/solutions/streamlit_inference.py +8 -7
- ultralytics/trackers/bot_sort.py +11 -9
- ultralytics/trackers/byte_tracker.py +17 -15
- ultralytics/trackers/utils/gmc.py +4 -3
- ultralytics/utils/__init__.py +16 -88
- ultralytics/utils/autobatch.py +3 -2
- ultralytics/utils/autodevice.py +10 -10
- ultralytics/utils/benchmarks.py +11 -10
- ultralytics/utils/callbacks/comet.py +9 -9
- ultralytics/utils/checks.py +17 -26
- ultralytics/utils/export.py +12 -11
- ultralytics/utils/files.py +8 -7
- ultralytics/utils/git.py +139 -0
- ultralytics/utils/instance.py +8 -7
- ultralytics/utils/loss.py +15 -13
- ultralytics/utils/metrics.py +62 -62
- ultralytics/utils/ops.py +3 -2
- ultralytics/utils/patches.py +6 -4
- ultralytics/utils/plotting.py +20 -18
- ultralytics/utils/torch_utils.py +4 -2
- ultralytics/utils/tqdm.py +18 -14
- ultralytics/utils/triton.py +3 -2
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.190.dist-info → dgenerate_ultralytics_headless-8.3.192.dist-info}/top_level.txt +0 -0
ultralytics/nn/modules/head.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
"""Model head modules."""
|
3
3
|
|
4
|
+
from __future__ import annotations
|
5
|
+
|
4
6
|
import copy
|
5
7
|
import math
|
6
|
-
from typing import List, Optional, Tuple, Union
|
7
8
|
|
8
9
|
import torch
|
9
10
|
import torch.nn as nn
|
@@ -76,7 +77,7 @@ class Detect(nn.Module):
|
|
76
77
|
legacy = False # backward compatibility for v3/v5/v8/v9 models
|
77
78
|
xyxy = False # xyxy or xywh output
|
78
79
|
|
79
|
-
def __init__(self, nc: int = 80, ch:
|
80
|
+
def __init__(self, nc: int = 80, ch: tuple = ()):
|
80
81
|
"""
|
81
82
|
Initialize the YOLO detection layer with specified number of classes and channels.
|
82
83
|
|
@@ -112,7 +113,7 @@ class Detect(nn.Module):
|
|
112
113
|
self.one2one_cv2 = copy.deepcopy(self.cv2)
|
113
114
|
self.one2one_cv3 = copy.deepcopy(self.cv3)
|
114
115
|
|
115
|
-
def forward(self, x:
|
116
|
+
def forward(self, x: list[torch.Tensor]) -> list[torch.Tensor] | tuple:
|
116
117
|
"""Concatenate and return predicted bounding boxes and class probabilities."""
|
117
118
|
if self.end2end:
|
118
119
|
return self.forward_end2end(x)
|
@@ -124,7 +125,7 @@ class Detect(nn.Module):
|
|
124
125
|
y = self._inference(x)
|
125
126
|
return y if self.export else (y, x)
|
126
127
|
|
127
|
-
def forward_end2end(self, x:
|
128
|
+
def forward_end2end(self, x: list[torch.Tensor]) -> dict | tuple:
|
128
129
|
"""
|
129
130
|
Perform forward pass of the v10Detect module.
|
130
131
|
|
@@ -148,7 +149,7 @@ class Detect(nn.Module):
|
|
148
149
|
y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
|
149
150
|
return y if self.export else (y, {"one2many": x, "one2one": one2one})
|
150
151
|
|
151
|
-
def _inference(self, x:
|
152
|
+
def _inference(self, x: list[torch.Tensor]) -> torch.Tensor:
|
152
153
|
"""
|
153
154
|
Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
|
154
155
|
|
@@ -200,7 +201,12 @@ class Detect(nn.Module):
|
|
200
201
|
|
201
202
|
def decode_bboxes(self, bboxes: torch.Tensor, anchors: torch.Tensor, xywh: bool = True) -> torch.Tensor:
|
202
203
|
"""Decode bounding boxes from predictions."""
|
203
|
-
return dist2bbox(
|
204
|
+
return dist2bbox(
|
205
|
+
bboxes,
|
206
|
+
anchors,
|
207
|
+
xywh=xywh and not self.end2end and not self.xyxy,
|
208
|
+
dim=1,
|
209
|
+
)
|
204
210
|
|
205
211
|
@staticmethod
|
206
212
|
def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80) -> torch.Tensor:
|
@@ -249,7 +255,7 @@ class Segment(Detect):
|
|
249
255
|
>>> outputs = segment(x)
|
250
256
|
"""
|
251
257
|
|
252
|
-
def __init__(self, nc: int = 80, nm: int = 32, npr: int = 256, ch:
|
258
|
+
def __init__(self, nc: int = 80, nm: int = 32, npr: int = 256, ch: tuple = ()):
|
253
259
|
"""
|
254
260
|
Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.
|
255
261
|
|
@@ -267,7 +273,7 @@ class Segment(Detect):
|
|
267
273
|
c4 = max(ch[0] // 4, self.nm)
|
268
274
|
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch)
|
269
275
|
|
270
|
-
def forward(self, x:
|
276
|
+
def forward(self, x: list[torch.Tensor]) -> tuple | list[torch.Tensor]:
|
271
277
|
"""Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients."""
|
272
278
|
p = self.proto(x[0]) # mask protos
|
273
279
|
bs = p.shape[0] # batch size
|
@@ -301,7 +307,7 @@ class OBB(Detect):
|
|
301
307
|
>>> outputs = obb(x)
|
302
308
|
"""
|
303
309
|
|
304
|
-
def __init__(self, nc: int = 80, ne: int = 1, ch:
|
310
|
+
def __init__(self, nc: int = 80, ne: int = 1, ch: tuple = ()):
|
305
311
|
"""
|
306
312
|
Initialize OBB with number of classes `nc` and layer channels `ch`.
|
307
313
|
|
@@ -316,7 +322,7 @@ class OBB(Detect):
|
|
316
322
|
c4 = max(ch[0] // 4, self.ne)
|
317
323
|
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.ne, 1)) for x in ch)
|
318
324
|
|
319
|
-
def forward(self, x:
|
325
|
+
def forward(self, x: list[torch.Tensor]) -> torch.Tensor | tuple:
|
320
326
|
"""Concatenate and return predicted bounding boxes and class probabilities."""
|
321
327
|
bs = x[0].shape[0] # batch size
|
322
328
|
angle = torch.cat([self.cv4[i](x[i]).view(bs, self.ne, -1) for i in range(self.nl)], 2) # OBB theta logits
|
@@ -357,7 +363,7 @@ class Pose(Detect):
|
|
357
363
|
>>> outputs = pose(x)
|
358
364
|
"""
|
359
365
|
|
360
|
-
def __init__(self, nc: int = 80, kpt_shape:
|
366
|
+
def __init__(self, nc: int = 80, kpt_shape: tuple = (17, 3), ch: tuple = ()):
|
361
367
|
"""
|
362
368
|
Initialize YOLO network with default parameters and Convolutional Layers.
|
363
369
|
|
@@ -373,7 +379,7 @@ class Pose(Detect):
|
|
373
379
|
c4 = max(ch[0] // 4, self.nk)
|
374
380
|
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
|
375
381
|
|
376
|
-
def forward(self, x:
|
382
|
+
def forward(self, x: list[torch.Tensor]) -> torch.Tensor | tuple:
|
377
383
|
"""Perform forward pass through YOLO model and return predictions."""
|
378
384
|
bs = x[0].shape[0] # batch size
|
379
385
|
kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w)
|
@@ -443,7 +449,7 @@ class Classify(nn.Module):
|
|
443
449
|
|
444
450
|
export = False # export mode
|
445
451
|
|
446
|
-
def __init__(self, c1: int, c2: int, k: int = 1, s: int = 1, p:
|
452
|
+
def __init__(self, c1: int, c2: int, k: int = 1, s: int = 1, p: int | None = None, g: int = 1):
|
447
453
|
"""
|
448
454
|
Initialize YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.
|
449
455
|
|
@@ -462,7 +468,7 @@ class Classify(nn.Module):
|
|
462
468
|
self.drop = nn.Dropout(p=0.0, inplace=True)
|
463
469
|
self.linear = nn.Linear(c_, c2) # to x(b,c2)
|
464
470
|
|
465
|
-
def forward(self, x:
|
471
|
+
def forward(self, x: list[torch.Tensor] | torch.Tensor) -> torch.Tensor | tuple:
|
466
472
|
"""Perform forward pass of the YOLO model on input image data."""
|
467
473
|
if isinstance(x, list):
|
468
474
|
x = torch.cat(x, 1)
|
@@ -496,7 +502,7 @@ class WorldDetect(Detect):
|
|
496
502
|
>>> outputs = world_detect(x, text)
|
497
503
|
"""
|
498
504
|
|
499
|
-
def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch:
|
505
|
+
def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
|
500
506
|
"""
|
501
507
|
Initialize YOLO detection layer with nc classes and layer channels ch.
|
502
508
|
|
@@ -511,7 +517,7 @@ class WorldDetect(Detect):
|
|
511
517
|
self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)
|
512
518
|
self.cv4 = nn.ModuleList(BNContrastiveHead(embed) if with_bn else ContrastiveHead() for _ in ch)
|
513
519
|
|
514
|
-
def forward(self, x:
|
520
|
+
def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> list[torch.Tensor] | tuple:
|
515
521
|
"""Concatenate and return predicted bounding boxes and class probabilities."""
|
516
522
|
for i in range(self.nl):
|
517
523
|
x[i] = torch.cat((self.cv2[i](x[i]), self.cv4[i](self.cv3[i](x[i]), text)), 1)
|
@@ -580,7 +586,7 @@ class LRPCHead(nn.Module):
|
|
580
586
|
linear.bias.data = conv.bias.data
|
581
587
|
return linear
|
582
588
|
|
583
|
-
def forward(self, cls_feat: torch.Tensor, loc_feat: torch.Tensor, conf: float) ->
|
589
|
+
def forward(self, cls_feat: torch.Tensor, loc_feat: torch.Tensor, conf: float) -> tuple[tuple, torch.Tensor]:
|
584
590
|
"""Process classification and localization features to generate detection proposals."""
|
585
591
|
if self.enabled:
|
586
592
|
pf_score = self.pf(cls_feat)[0, 0].flatten(0)
|
@@ -629,7 +635,7 @@ class YOLOEDetect(Detect):
|
|
629
635
|
|
630
636
|
is_fused = False
|
631
637
|
|
632
|
-
def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch:
|
638
|
+
def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
|
633
639
|
"""
|
634
640
|
Initialize YOLO detection layer with nc classes and layer channels ch.
|
635
641
|
|
@@ -642,7 +648,7 @@ class YOLOEDetect(Detect):
|
|
642
648
|
super().__init__(nc, ch)
|
643
649
|
c3 = max(ch[0], min(self.nc, 100))
|
644
650
|
assert c3 <= embed
|
645
|
-
assert with_bn
|
651
|
+
assert with_bn
|
646
652
|
self.cv3 = (
|
647
653
|
nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)
|
648
654
|
if self.legacy
|
@@ -709,11 +715,11 @@ class YOLOEDetect(Detect):
|
|
709
715
|
self.reprta = nn.Identity()
|
710
716
|
self.is_fused = True
|
711
717
|
|
712
|
-
def get_tpe(self, tpe:
|
718
|
+
def get_tpe(self, tpe: torch.Tensor | None) -> torch.Tensor | None:
|
713
719
|
"""Get text prompt embeddings with normalization."""
|
714
720
|
return None if tpe is None else F.normalize(self.reprta(tpe), dim=-1, p=2)
|
715
721
|
|
716
|
-
def get_vpe(self, x:
|
722
|
+
def get_vpe(self, x: list[torch.Tensor], vpe: torch.Tensor) -> torch.Tensor:
|
717
723
|
"""Get visual prompt embeddings with spatial awareness."""
|
718
724
|
if vpe.shape[1] == 0: # no visual prompt embeddings
|
719
725
|
return torch.zeros(x[0].shape[0], 0, self.embed, device=x[0].device)
|
@@ -722,7 +728,7 @@ class YOLOEDetect(Detect):
|
|
722
728
|
assert vpe.ndim == 3 # (B, N, D)
|
723
729
|
return vpe
|
724
730
|
|
725
|
-
def forward_lrpc(self, x:
|
731
|
+
def forward_lrpc(self, x: list[torch.Tensor], return_mask: bool = False) -> torch.Tensor | tuple:
|
726
732
|
"""Process features with fused text embeddings to generate detections for prompt-free model."""
|
727
733
|
masks = []
|
728
734
|
assert self.is_fused, "Prompt-free inference requires model to be fused!"
|
@@ -760,9 +766,7 @@ class YOLOEDetect(Detect):
|
|
760
766
|
else:
|
761
767
|
return y if self.export else (y, x)
|
762
768
|
|
763
|
-
def forward(
|
764
|
-
self, x: List[torch.Tensor], cls_pe: torch.Tensor, return_mask: bool = False
|
765
|
-
) -> Union[torch.Tensor, Tuple]:
|
769
|
+
def forward(self, x: list[torch.Tensor], cls_pe: torch.Tensor, return_mask: bool = False) -> torch.Tensor | tuple:
|
766
770
|
"""Process features with class prompt embeddings to generate detections."""
|
767
771
|
if hasattr(self, "lrpc"): # for prompt-free inference
|
768
772
|
return self.forward_lrpc(x, return_mask)
|
@@ -811,7 +815,7 @@ class YOLOESegment(YOLOEDetect):
|
|
811
815
|
"""
|
812
816
|
|
813
817
|
def __init__(
|
814
|
-
self, nc: int = 80, nm: int = 32, npr: int = 256, embed: int = 512, with_bn: bool = False, ch:
|
818
|
+
self, nc: int = 80, nm: int = 32, npr: int = 256, embed: int = 512, with_bn: bool = False, ch: tuple = ()
|
815
819
|
):
|
816
820
|
"""
|
817
821
|
Initialize YOLOESegment with class count, mask parameters, and embedding dimensions.
|
@@ -832,7 +836,7 @@ class YOLOESegment(YOLOEDetect):
|
|
832
836
|
c5 = max(ch[0] // 4, self.nm)
|
833
837
|
self.cv5 = nn.ModuleList(nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nm, 1)) for x in ch)
|
834
838
|
|
835
|
-
def forward(self, x:
|
839
|
+
def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> tuple | torch.Tensor:
|
836
840
|
"""Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients."""
|
837
841
|
p = self.proto(x[0]) # mask protos
|
838
842
|
bs = p.shape[0] # batch size
|
@@ -900,7 +904,7 @@ class RTDETRDecoder(nn.Module):
|
|
900
904
|
def __init__(
|
901
905
|
self,
|
902
906
|
nc: int = 80,
|
903
|
-
ch:
|
907
|
+
ch: tuple = (512, 1024, 2048),
|
904
908
|
hd: int = 256, # hidden dim
|
905
909
|
nq: int = 300, # num queries
|
906
910
|
ndp: int = 4, # num decoder points
|
@@ -976,7 +980,7 @@ class RTDETRDecoder(nn.Module):
|
|
976
980
|
|
977
981
|
self._reset_parameters()
|
978
982
|
|
979
|
-
def forward(self, x:
|
983
|
+
def forward(self, x: list[torch.Tensor], batch: dict | None = None) -> tuple | torch.Tensor:
|
980
984
|
"""
|
981
985
|
Run the forward pass of the module, returning bounding box and classification scores for the input.
|
982
986
|
|
@@ -1028,12 +1032,12 @@ class RTDETRDecoder(nn.Module):
|
|
1028
1032
|
|
1029
1033
|
def _generate_anchors(
|
1030
1034
|
self,
|
1031
|
-
shapes:
|
1035
|
+
shapes: list[list[int]],
|
1032
1036
|
grid_size: float = 0.05,
|
1033
1037
|
dtype: torch.dtype = torch.float32,
|
1034
1038
|
device: str = "cpu",
|
1035
1039
|
eps: float = 1e-2,
|
1036
|
-
) ->
|
1040
|
+
) -> tuple[torch.Tensor, torch.Tensor]:
|
1037
1041
|
"""
|
1038
1042
|
Generate anchor bounding boxes for given shapes with specific grid size and validate them.
|
1039
1043
|
|
@@ -1066,7 +1070,7 @@ class RTDETRDecoder(nn.Module):
|
|
1066
1070
|
anchors = anchors.masked_fill(~valid_mask, float("inf"))
|
1067
1071
|
return anchors, valid_mask
|
1068
1072
|
|
1069
|
-
def _get_encoder_input(self, x:
|
1073
|
+
def _get_encoder_input(self, x: list[torch.Tensor]) -> tuple[torch.Tensor, list[list[int]]]:
|
1070
1074
|
"""
|
1071
1075
|
Process and return encoder inputs by getting projection features from input and concatenating them.
|
1072
1076
|
|
@@ -1096,10 +1100,10 @@ class RTDETRDecoder(nn.Module):
|
|
1096
1100
|
def _get_decoder_input(
|
1097
1101
|
self,
|
1098
1102
|
feats: torch.Tensor,
|
1099
|
-
shapes:
|
1100
|
-
dn_embed:
|
1101
|
-
dn_bbox:
|
1102
|
-
) ->
|
1103
|
+
shapes: list[list[int]],
|
1104
|
+
dn_embed: torch.Tensor | None = None,
|
1105
|
+
dn_bbox: torch.Tensor | None = None,
|
1106
|
+
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
|
1103
1107
|
"""
|
1104
1108
|
Generate and prepare the input required for the decoder from the provided features and shapes.
|
1105
1109
|
|
@@ -1204,7 +1208,7 @@ class v10Detect(Detect):
|
|
1204
1208
|
|
1205
1209
|
end2end = True
|
1206
1210
|
|
1207
|
-
def __init__(self, nc: int = 80, ch:
|
1211
|
+
def __init__(self, nc: int = 80, ch: tuple = ()):
|
1208
1212
|
"""
|
1209
1213
|
Initialize the v10Detect object with the specified number of classes and input channels.
|
1210
1214
|
|
@@ -1,8 +1,9 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
"""Transformer modules."""
|
3
3
|
|
4
|
+
from __future__ import annotations
|
5
|
+
|
4
6
|
import math
|
5
|
-
from typing import List, Optional
|
6
7
|
|
7
8
|
import torch
|
8
9
|
import torch.nn as nn
|
@@ -88,16 +89,16 @@ class TransformerEncoderLayer(nn.Module):
|
|
88
89
|
self.normalize_before = normalize_before
|
89
90
|
|
90
91
|
@staticmethod
|
91
|
-
def with_pos_embed(tensor: torch.Tensor, pos:
|
92
|
+
def with_pos_embed(tensor: torch.Tensor, pos: torch.Tensor | None = None) -> torch.Tensor:
|
92
93
|
"""Add position embeddings to the tensor if provided."""
|
93
94
|
return tensor if pos is None else tensor + pos
|
94
95
|
|
95
96
|
def forward_post(
|
96
97
|
self,
|
97
98
|
src: torch.Tensor,
|
98
|
-
src_mask:
|
99
|
-
src_key_padding_mask:
|
100
|
-
pos:
|
99
|
+
src_mask: torch.Tensor | None = None,
|
100
|
+
src_key_padding_mask: torch.Tensor | None = None,
|
101
|
+
pos: torch.Tensor | None = None,
|
101
102
|
) -> torch.Tensor:
|
102
103
|
"""
|
103
104
|
Perform forward pass with post-normalization.
|
@@ -122,9 +123,9 @@ class TransformerEncoderLayer(nn.Module):
|
|
122
123
|
def forward_pre(
|
123
124
|
self,
|
124
125
|
src: torch.Tensor,
|
125
|
-
src_mask:
|
126
|
-
src_key_padding_mask:
|
127
|
-
pos:
|
126
|
+
src_mask: torch.Tensor | None = None,
|
127
|
+
src_key_padding_mask: torch.Tensor | None = None,
|
128
|
+
pos: torch.Tensor | None = None,
|
128
129
|
) -> torch.Tensor:
|
129
130
|
"""
|
130
131
|
Perform forward pass with pre-normalization.
|
@@ -149,9 +150,9 @@ class TransformerEncoderLayer(nn.Module):
|
|
149
150
|
def forward(
|
150
151
|
self,
|
151
152
|
src: torch.Tensor,
|
152
|
-
src_mask:
|
153
|
-
src_key_padding_mask:
|
154
|
-
pos:
|
153
|
+
src_mask: torch.Tensor | None = None,
|
154
|
+
src_key_padding_mask: torch.Tensor | None = None,
|
155
|
+
pos: torch.Tensor | None = None,
|
155
156
|
) -> torch.Tensor:
|
156
157
|
"""
|
157
158
|
Forward propagate the input through the encoder module.
|
@@ -533,8 +534,8 @@ class MSDeformAttn(nn.Module):
|
|
533
534
|
query: torch.Tensor,
|
534
535
|
refer_bbox: torch.Tensor,
|
535
536
|
value: torch.Tensor,
|
536
|
-
value_shapes:
|
537
|
-
value_mask:
|
537
|
+
value_shapes: list,
|
538
|
+
value_mask: torch.Tensor | None = None,
|
538
539
|
) -> torch.Tensor:
|
539
540
|
"""
|
540
541
|
Perform forward pass for multiscale deformable attention.
|
@@ -649,7 +650,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|
649
650
|
self.norm3 = nn.LayerNorm(d_model)
|
650
651
|
|
651
652
|
@staticmethod
|
652
|
-
def with_pos_embed(tensor: torch.Tensor, pos:
|
653
|
+
def with_pos_embed(tensor: torch.Tensor, pos: torch.Tensor | None) -> torch.Tensor:
|
653
654
|
"""Add positional embeddings to the input tensor, if provided."""
|
654
655
|
return tensor if pos is None else tensor + pos
|
655
656
|
|
@@ -672,10 +673,10 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|
672
673
|
embed: torch.Tensor,
|
673
674
|
refer_bbox: torch.Tensor,
|
674
675
|
feats: torch.Tensor,
|
675
|
-
shapes:
|
676
|
-
padding_mask:
|
677
|
-
attn_mask:
|
678
|
-
query_pos:
|
676
|
+
shapes: list,
|
677
|
+
padding_mask: torch.Tensor | None = None,
|
678
|
+
attn_mask: torch.Tensor | None = None,
|
679
|
+
query_pos: torch.Tensor | None = None,
|
679
680
|
) -> torch.Tensor:
|
680
681
|
"""
|
681
682
|
Perform the forward pass through the entire decoder layer.
|
@@ -749,12 +750,12 @@ class DeformableTransformerDecoder(nn.Module):
|
|
749
750
|
embed: torch.Tensor, # decoder embeddings
|
750
751
|
refer_bbox: torch.Tensor, # anchor
|
751
752
|
feats: torch.Tensor, # image features
|
752
|
-
shapes:
|
753
|
+
shapes: list, # feature shapes
|
753
754
|
bbox_head: nn.Module,
|
754
755
|
score_head: nn.Module,
|
755
756
|
pos_mlp: nn.Module,
|
756
|
-
attn_mask:
|
757
|
-
padding_mask:
|
757
|
+
attn_mask: torch.Tensor | None = None,
|
758
|
+
padding_mask: torch.Tensor | None = None,
|
758
759
|
):
|
759
760
|
"""
|
760
761
|
Perform the forward pass through the entire decoder.
|
ultralytics/nn/tasks.py
CHANGED
@@ -1548,7 +1548,7 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
|
|
1548
1548
|
"""
|
1549
1549
|
ckpt, weight = torch_safe_load(weight) # load ckpt
|
1550
1550
|
args = {**DEFAULT_CFG_DICT, **(ckpt.get("train_args", {}))} # combine model and default args, preferring model args
|
1551
|
-
model = (ckpt.get("ema") or ckpt["model"]).
|
1551
|
+
model = (ckpt.get("ema") or ckpt["model"]).float() # FP32 model
|
1552
1552
|
|
1553
1553
|
# Model compatibility updates
|
1554
1554
|
model.args = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS} # attach args to model
|
@@ -1557,7 +1557,7 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
|
|
1557
1557
|
if not hasattr(model, "stride"):
|
1558
1558
|
model.stride = torch.tensor([32.0])
|
1559
1559
|
|
1560
|
-
model = model.fuse()
|
1560
|
+
model = (model.fuse() if fuse and hasattr(model, "fuse") else model).eval().to(device) # model in eval mode
|
1561
1561
|
|
1562
1562
|
# Module updates
|
1563
1563
|
for m in model.modules():
|
ultralytics/nn/text_model.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from abc import abstractmethod
|
4
6
|
from pathlib import Path
|
5
|
-
from typing import List, Union
|
6
7
|
|
7
8
|
import torch
|
8
9
|
import torch.nn as nn
|
@@ -91,7 +92,7 @@ class CLIP(TextModel):
|
|
91
92
|
self.device = device
|
92
93
|
self.eval()
|
93
94
|
|
94
|
-
def tokenize(self, texts:
|
95
|
+
def tokenize(self, texts: str | list[str]) -> torch.Tensor:
|
95
96
|
"""
|
96
97
|
Convert input texts to CLIP tokens.
|
97
98
|
|
@@ -135,7 +136,7 @@ class CLIP(TextModel):
|
|
135
136
|
return txt_feats
|
136
137
|
|
137
138
|
@smart_inference_mode()
|
138
|
-
def encode_image(self, image:
|
139
|
+
def encode_image(self, image: Image.Image | torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
|
139
140
|
"""
|
140
141
|
Encode preprocessed images into normalized feature vectors.
|
141
142
|
|
@@ -234,7 +235,7 @@ class MobileCLIP(TextModel):
|
|
234
235
|
self.device = device
|
235
236
|
self.eval()
|
236
237
|
|
237
|
-
def tokenize(self, texts:
|
238
|
+
def tokenize(self, texts: list[str]) -> torch.Tensor:
|
238
239
|
"""
|
239
240
|
Convert input texts to MobileCLIP tokens.
|
240
241
|
|
@@ -319,7 +320,7 @@ class MobileCLIPTS(TextModel):
|
|
319
320
|
self.tokenizer = clip.clip.tokenize
|
320
321
|
self.device = device
|
321
322
|
|
322
|
-
def tokenize(self, texts:
|
323
|
+
def tokenize(self, texts: list[str]) -> torch.Tensor:
|
323
324
|
"""
|
324
325
|
Convert input texts to MobileCLIP tokens.
|
325
326
|
|
@@ -1,7 +1,9 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from itertools import cycle
|
4
|
-
from typing import Any
|
6
|
+
from typing import Any
|
5
7
|
|
6
8
|
import cv2
|
7
9
|
import numpy as np
|
@@ -135,7 +137,7 @@ class Analytics(BaseSolution):
|
|
135
137
|
return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), classwise_count=self.clswise_count)
|
136
138
|
|
137
139
|
def update_graph(
|
138
|
-
self, frame_number: int, count_dict:
|
140
|
+
self, frame_number: int, count_dict: dict[str, int] | None = None, plot: str = "line"
|
139
141
|
) -> np.ndarray:
|
140
142
|
"""
|
141
143
|
Update the graph with new data for single or multiple classes.
|
@@ -204,7 +206,7 @@ class Analytics(BaseSolution):
|
|
204
206
|
markersize=self.line_width * 5,
|
205
207
|
label=f"{key} Data Points",
|
206
208
|
)
|
207
|
-
|
209
|
+
elif plot == "bar":
|
208
210
|
self.ax.clear() # clear bar data
|
209
211
|
for label in labels: # Map labels to colors
|
210
212
|
if label not in self.color_mapping:
|
@@ -224,12 +226,12 @@ class Analytics(BaseSolution):
|
|
224
226
|
for bar, label in zip(bars, labels):
|
225
227
|
bar.set_label(label) # Assign label to each bar
|
226
228
|
self.ax.legend(loc="upper left", fontsize=13, facecolor=self.fg_color, edgecolor=self.fg_color)
|
227
|
-
|
229
|
+
elif plot == "pie":
|
228
230
|
total = sum(counts)
|
229
231
|
percentages = [size / total * 100 for size in counts]
|
230
|
-
start_angle = 90
|
231
232
|
self.ax.clear()
|
232
233
|
|
234
|
+
start_angle = 90
|
233
235
|
# Create pie chart and create legend labels with percentages
|
234
236
|
wedges, _ = self.ax.pie(
|
235
237
|
counts, labels=labels, startangle=start_angle, textprops={"color": self.fg_color}, autopct=None
|
ultralytics/solutions/config.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from dataclasses import dataclass, field
|
4
|
-
from typing import Any
|
6
|
+
from typing import Any
|
5
7
|
|
6
8
|
import cv2
|
7
9
|
|
@@ -60,22 +62,22 @@ class SolutionConfig:
|
|
60
62
|
>>> print(cfg.model)
|
61
63
|
"""
|
62
64
|
|
63
|
-
source:
|
64
|
-
model:
|
65
|
-
classes:
|
65
|
+
source: str | None = None
|
66
|
+
model: str | None = None
|
67
|
+
classes: list[int] | None = None
|
66
68
|
show_conf: bool = True
|
67
69
|
show_labels: bool = True
|
68
|
-
region:
|
69
|
-
colormap:
|
70
|
+
region: list[tuple[int, int]] | None = None
|
71
|
+
colormap: int | None = cv2.COLORMAP_DEEPGREEN
|
70
72
|
show_in: bool = True
|
71
73
|
show_out: bool = True
|
72
74
|
up_angle: float = 145.0
|
73
75
|
down_angle: int = 90
|
74
|
-
kpts:
|
76
|
+
kpts: list[int] = field(default_factory=lambda: [6, 8, 10])
|
75
77
|
analytics_type: str = "line"
|
76
|
-
figsize:
|
78
|
+
figsize: tuple[int, int] | None = (12.8, 7.2)
|
77
79
|
blur_ratio: float = 0.5
|
78
|
-
vision_point:
|
80
|
+
vision_point: tuple[int, int] = (20, 20)
|
79
81
|
crop_dir: str = "cropped-detections"
|
80
82
|
json_file: str = None
|
81
83
|
line_width: int = 2
|
@@ -87,7 +89,7 @@ class SolutionConfig:
|
|
87
89
|
show: bool = False
|
88
90
|
iou: float = 0.7
|
89
91
|
conf: float = 0.25
|
90
|
-
device:
|
92
|
+
device: str | None = None
|
91
93
|
max_det: int = 300
|
92
94
|
half: bool = False
|
93
95
|
tracker: str = "botsort.yaml"
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
3
|
import math
|
4
|
-
from typing import Any
|
4
|
+
from typing import Any
|
5
5
|
|
6
6
|
import cv2
|
7
7
|
|
@@ -39,8 +39,8 @@ class DistanceCalculation(BaseSolution):
|
|
39
39
|
|
40
40
|
# Mouse event information
|
41
41
|
self.left_mouse_count = 0
|
42
|
-
self.selected_boxes:
|
43
|
-
self.centroids:
|
42
|
+
self.selected_boxes: dict[int, list[float]] = {}
|
43
|
+
self.centroids: list[list[int]] = [] # Store centroids of selected objects
|
44
44
|
|
45
45
|
def mouse_event_for_distance(self, event: int, x: int, y: int, flags: int, param: Any) -> None:
|
46
46
|
"""
|
ultralytics/solutions/heatmap.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
-
from
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import Any
|
4
6
|
|
5
7
|
import cv2
|
6
8
|
import numpy as np
|
@@ -50,7 +52,7 @@ class Heatmap(ObjectCounter):
|
|
50
52
|
self.colormap = self.CFG["colormap"]
|
51
53
|
self.heatmap = None
|
52
54
|
|
53
|
-
def heatmap_effect(self, box:
|
55
|
+
def heatmap_effect(self, box: list[float]) -> None:
|
54
56
|
"""
|
55
57
|
Efficiently calculate heatmap area and effect location for applying colormap.
|
56
58
|
|
@@ -1,7 +1,9 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from collections import defaultdict
|
4
|
-
from typing import Any
|
6
|
+
from typing import Any
|
5
7
|
|
6
8
|
from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
|
7
9
|
from ultralytics.utils.plotting import colors
|
@@ -52,9 +54,9 @@ class ObjectCounter(BaseSolution):
|
|
52
54
|
|
53
55
|
def count_objects(
|
54
56
|
self,
|
55
|
-
current_centroid:
|
57
|
+
current_centroid: tuple[float, float],
|
56
58
|
track_id: int,
|
57
|
-
prev_position:
|
59
|
+
prev_position: tuple[float, float] | None,
|
58
60
|
cls: int,
|
59
61
|
) -> None:
|
60
62
|
"""
|
@@ -1,7 +1,9 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import json
|
4
|
-
from typing import Any
|
6
|
+
from typing import Any
|
5
7
|
|
6
8
|
import cv2
|
7
9
|
import numpy as np
|
@@ -141,7 +143,7 @@ class ParkingPtsSelection:
|
|
141
143
|
self.draw_box(self.current_box)
|
142
144
|
self.current_box.clear()
|
143
145
|
|
144
|
-
def draw_box(self, box:
|
146
|
+
def draw_box(self, box: list[tuple[int, int]]) -> None:
|
145
147
|
"""Draw a bounding box on the canvas using the provided coordinates."""
|
146
148
|
for i in range(4):
|
147
149
|
self.canvas.create_line(box[i], box[(i + 1) % 4], fill="blue", width=2)
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
-
from
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import Any
|
4
6
|
|
5
7
|
import numpy as np
|
6
8
|
|
@@ -53,10 +55,10 @@ class RegionCounter(BaseSolution):
|
|
53
55
|
def add_region(
|
54
56
|
self,
|
55
57
|
name: str,
|
56
|
-
polygon_points:
|
57
|
-
region_color:
|
58
|
-
text_color:
|
59
|
-
) ->
|
58
|
+
polygon_points: list[tuple],
|
59
|
+
region_color: tuple[int, int, int],
|
60
|
+
text_color: tuple[int, int, int],
|
61
|
+
) -> dict[str, Any]:
|
60
62
|
"""
|
61
63
|
Add a new region to the counting list based on the provided template with specific attributes.
|
62
64
|
|