dgenerate-ultralytics-headless 8.3.189__py3-none-any.whl → 8.3.191__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/RECORD +111 -109
- tests/test_cuda.py +6 -5
- tests/test_exports.py +1 -6
- tests/test_python.py +1 -4
- tests/test_solutions.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +16 -14
- ultralytics/cfg/datasets/VisDrone.yaml +4 -4
- ultralytics/data/annotator.py +6 -6
- ultralytics/data/augment.py +53 -51
- ultralytics/data/base.py +15 -13
- ultralytics/data/build.py +7 -4
- ultralytics/data/converter.py +9 -10
- ultralytics/data/dataset.py +24 -22
- ultralytics/data/loaders.py +13 -11
- ultralytics/data/split.py +4 -3
- ultralytics/data/split_dota.py +14 -12
- ultralytics/data/utils.py +31 -25
- ultralytics/engine/exporter.py +7 -4
- ultralytics/engine/model.py +16 -14
- ultralytics/engine/predictor.py +9 -7
- ultralytics/engine/results.py +59 -57
- ultralytics/engine/trainer.py +7 -0
- ultralytics/engine/tuner.py +4 -3
- ultralytics/engine/validator.py +3 -1
- ultralytics/hub/__init__.py +6 -2
- ultralytics/hub/auth.py +2 -2
- ultralytics/hub/google/__init__.py +9 -8
- ultralytics/hub/session.py +11 -11
- ultralytics/hub/utils.py +8 -9
- ultralytics/models/fastsam/model.py +8 -6
- ultralytics/models/nas/model.py +5 -3
- ultralytics/models/rtdetr/train.py +4 -3
- ultralytics/models/rtdetr/val.py +6 -4
- ultralytics/models/sam/amg.py +13 -10
- ultralytics/models/sam/model.py +3 -2
- ultralytics/models/sam/modules/blocks.py +21 -21
- ultralytics/models/sam/modules/decoders.py +11 -11
- ultralytics/models/sam/modules/encoders.py +25 -25
- ultralytics/models/sam/modules/memory_attention.py +9 -8
- ultralytics/models/sam/modules/sam.py +8 -10
- ultralytics/models/sam/modules/tiny_encoder.py +21 -20
- ultralytics/models/sam/modules/transformer.py +6 -5
- ultralytics/models/sam/modules/utils.py +7 -5
- ultralytics/models/sam/predict.py +32 -31
- ultralytics/models/utils/loss.py +29 -27
- ultralytics/models/utils/ops.py +10 -8
- ultralytics/models/yolo/classify/train.py +7 -5
- ultralytics/models/yolo/classify/val.py +10 -8
- ultralytics/models/yolo/detect/predict.py +3 -3
- ultralytics/models/yolo/detect/train.py +8 -6
- ultralytics/models/yolo/detect/val.py +23 -21
- ultralytics/models/yolo/model.py +14 -14
- ultralytics/models/yolo/obb/train.py +5 -3
- ultralytics/models/yolo/obb/val.py +13 -10
- ultralytics/models/yolo/pose/train.py +7 -5
- ultralytics/models/yolo/pose/val.py +11 -9
- ultralytics/models/yolo/segment/train.py +4 -5
- ultralytics/models/yolo/segment/val.py +12 -10
- ultralytics/models/yolo/world/train.py +9 -7
- ultralytics/models/yolo/yoloe/train.py +7 -6
- ultralytics/models/yolo/yoloe/val.py +10 -8
- ultralytics/nn/autobackend.py +40 -52
- ultralytics/nn/modules/__init__.py +3 -3
- ultralytics/nn/modules/block.py +12 -12
- ultralytics/nn/modules/conv.py +4 -3
- ultralytics/nn/modules/head.py +46 -38
- ultralytics/nn/modules/transformer.py +22 -21
- ultralytics/nn/tasks.py +2 -2
- ultralytics/nn/text_model.py +6 -5
- ultralytics/solutions/analytics.py +7 -5
- ultralytics/solutions/config.py +12 -10
- ultralytics/solutions/distance_calculation.py +3 -3
- ultralytics/solutions/heatmap.py +4 -2
- ultralytics/solutions/object_counter.py +5 -3
- ultralytics/solutions/parking_management.py +4 -2
- ultralytics/solutions/region_counter.py +7 -5
- ultralytics/solutions/similarity_search.py +5 -3
- ultralytics/solutions/solutions.py +38 -36
- ultralytics/solutions/streamlit_inference.py +8 -7
- ultralytics/trackers/bot_sort.py +11 -9
- ultralytics/trackers/byte_tracker.py +17 -15
- ultralytics/trackers/utils/gmc.py +4 -3
- ultralytics/utils/__init__.py +27 -77
- ultralytics/utils/autobatch.py +3 -2
- ultralytics/utils/autodevice.py +10 -10
- ultralytics/utils/benchmarks.py +11 -10
- ultralytics/utils/callbacks/comet.py +9 -9
- ultralytics/utils/callbacks/platform.py +2 -1
- ultralytics/utils/checks.py +20 -29
- ultralytics/utils/downloads.py +2 -2
- ultralytics/utils/export.py +12 -11
- ultralytics/utils/files.py +8 -7
- ultralytics/utils/git.py +139 -0
- ultralytics/utils/instance.py +8 -7
- ultralytics/utils/logger.py +7 -6
- ultralytics/utils/loss.py +15 -13
- ultralytics/utils/metrics.py +62 -62
- ultralytics/utils/nms.py +346 -0
- ultralytics/utils/ops.py +83 -251
- ultralytics/utils/patches.py +6 -4
- ultralytics/utils/plotting.py +18 -16
- ultralytics/utils/tal.py +1 -1
- ultralytics/utils/torch_utils.py +4 -2
- ultralytics/utils/tqdm.py +47 -33
- ultralytics/utils/triton.py +3 -2
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.189.dist-info → dgenerate_ultralytics_headless-8.3.191.dist-info}/top_level.txt +0 -0
ultralytics/nn/modules/head.py
CHANGED
@@ -1,15 +1,17 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
"""Model head modules."""
|
3
3
|
|
4
|
+
from __future__ import annotations
|
5
|
+
|
4
6
|
import copy
|
5
7
|
import math
|
6
|
-
from typing import List, Optional, Tuple, Union
|
7
8
|
|
8
9
|
import torch
|
9
10
|
import torch.nn as nn
|
10
11
|
import torch.nn.functional as F
|
11
12
|
from torch.nn.init import constant_, xavier_uniform_
|
12
13
|
|
14
|
+
from ultralytics.utils import NOT_MACOS14
|
13
15
|
from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
|
14
16
|
from ultralytics.utils.torch_utils import fuse_conv_and_bn, smart_inference_mode
|
15
17
|
|
@@ -75,7 +77,7 @@ class Detect(nn.Module):
|
|
75
77
|
legacy = False # backward compatibility for v3/v5/v8/v9 models
|
76
78
|
xyxy = False # xyxy or xywh output
|
77
79
|
|
78
|
-
def __init__(self, nc: int = 80, ch:
|
80
|
+
def __init__(self, nc: int = 80, ch: tuple = ()):
|
79
81
|
"""
|
80
82
|
Initialize the YOLO detection layer with specified number of classes and channels.
|
81
83
|
|
@@ -111,7 +113,7 @@ class Detect(nn.Module):
|
|
111
113
|
self.one2one_cv2 = copy.deepcopy(self.cv2)
|
112
114
|
self.one2one_cv3 = copy.deepcopy(self.cv3)
|
113
115
|
|
114
|
-
def forward(self, x:
|
116
|
+
def forward(self, x: list[torch.Tensor]) -> list[torch.Tensor] | tuple:
|
115
117
|
"""Concatenate and return predicted bounding boxes and class probabilities."""
|
116
118
|
if self.end2end:
|
117
119
|
return self.forward_end2end(x)
|
@@ -123,7 +125,7 @@ class Detect(nn.Module):
|
|
123
125
|
y = self._inference(x)
|
124
126
|
return y if self.export else (y, x)
|
125
127
|
|
126
|
-
def forward_end2end(self, x:
|
128
|
+
def forward_end2end(self, x: list[torch.Tensor]) -> dict | tuple:
|
127
129
|
"""
|
128
130
|
Perform forward pass of the v10Detect module.
|
129
131
|
|
@@ -147,7 +149,7 @@ class Detect(nn.Module):
|
|
147
149
|
y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
|
148
150
|
return y if self.export else (y, {"one2many": x, "one2one": one2one})
|
149
151
|
|
150
|
-
def _inference(self, x:
|
152
|
+
def _inference(self, x: list[torch.Tensor]) -> torch.Tensor:
|
151
153
|
"""
|
152
154
|
Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
|
153
155
|
|
@@ -199,7 +201,12 @@ class Detect(nn.Module):
|
|
199
201
|
|
200
202
|
def decode_bboxes(self, bboxes: torch.Tensor, anchors: torch.Tensor, xywh: bool = True) -> torch.Tensor:
|
201
203
|
"""Decode bounding boxes from predictions."""
|
202
|
-
return dist2bbox(
|
204
|
+
return dist2bbox(
|
205
|
+
bboxes,
|
206
|
+
anchors,
|
207
|
+
xywh=xywh and not self.end2end and not self.xyxy,
|
208
|
+
dim=1,
|
209
|
+
)
|
203
210
|
|
204
211
|
@staticmethod
|
205
212
|
def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80) -> torch.Tensor:
|
@@ -248,7 +255,7 @@ class Segment(Detect):
|
|
248
255
|
>>> outputs = segment(x)
|
249
256
|
"""
|
250
257
|
|
251
|
-
def __init__(self, nc: int = 80, nm: int = 32, npr: int = 256, ch:
|
258
|
+
def __init__(self, nc: int = 80, nm: int = 32, npr: int = 256, ch: tuple = ()):
|
252
259
|
"""
|
253
260
|
Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.
|
254
261
|
|
@@ -266,7 +273,7 @@ class Segment(Detect):
|
|
266
273
|
c4 = max(ch[0] // 4, self.nm)
|
267
274
|
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch)
|
268
275
|
|
269
|
-
def forward(self, x:
|
276
|
+
def forward(self, x: list[torch.Tensor]) -> tuple | list[torch.Tensor]:
|
270
277
|
"""Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients."""
|
271
278
|
p = self.proto(x[0]) # mask protos
|
272
279
|
bs = p.shape[0] # batch size
|
@@ -300,7 +307,7 @@ class OBB(Detect):
|
|
300
307
|
>>> outputs = obb(x)
|
301
308
|
"""
|
302
309
|
|
303
|
-
def __init__(self, nc: int = 80, ne: int = 1, ch:
|
310
|
+
def __init__(self, nc: int = 80, ne: int = 1, ch: tuple = ()):
|
304
311
|
"""
|
305
312
|
Initialize OBB with number of classes `nc` and layer channels `ch`.
|
306
313
|
|
@@ -315,7 +322,7 @@ class OBB(Detect):
|
|
315
322
|
c4 = max(ch[0] // 4, self.ne)
|
316
323
|
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.ne, 1)) for x in ch)
|
317
324
|
|
318
|
-
def forward(self, x:
|
325
|
+
def forward(self, x: list[torch.Tensor]) -> torch.Tensor | tuple:
|
319
326
|
"""Concatenate and return predicted bounding boxes and class probabilities."""
|
320
327
|
bs = x[0].shape[0] # batch size
|
321
328
|
angle = torch.cat([self.cv4[i](x[i]).view(bs, self.ne, -1) for i in range(self.nl)], 2) # OBB theta logits
|
@@ -356,7 +363,7 @@ class Pose(Detect):
|
|
356
363
|
>>> outputs = pose(x)
|
357
364
|
"""
|
358
365
|
|
359
|
-
def __init__(self, nc: int = 80, kpt_shape:
|
366
|
+
def __init__(self, nc: int = 80, kpt_shape: tuple = (17, 3), ch: tuple = ()):
|
360
367
|
"""
|
361
368
|
Initialize YOLO network with default parameters and Convolutional Layers.
|
362
369
|
|
@@ -372,7 +379,7 @@ class Pose(Detect):
|
|
372
379
|
c4 = max(ch[0] // 4, self.nk)
|
373
380
|
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
|
374
381
|
|
375
|
-
def forward(self, x:
|
382
|
+
def forward(self, x: list[torch.Tensor]) -> torch.Tensor | tuple:
|
376
383
|
"""Perform forward pass through YOLO model and return predictions."""
|
377
384
|
bs = x[0].shape[0] # batch size
|
378
385
|
kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w)
|
@@ -408,7 +415,10 @@ class Pose(Detect):
|
|
408
415
|
else:
|
409
416
|
y = kpts.clone()
|
410
417
|
if ndim == 3:
|
411
|
-
|
418
|
+
if NOT_MACOS14:
|
419
|
+
y[:, 2::ndim].sigmoid_()
|
420
|
+
else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
|
421
|
+
y[:, 2::ndim] = y[:, 2::ndim].sigmoid()
|
412
422
|
y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides
|
413
423
|
y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides
|
414
424
|
return y
|
@@ -439,7 +449,7 @@ class Classify(nn.Module):
|
|
439
449
|
|
440
450
|
export = False # export mode
|
441
451
|
|
442
|
-
def __init__(self, c1: int, c2: int, k: int = 1, s: int = 1, p:
|
452
|
+
def __init__(self, c1: int, c2: int, k: int = 1, s: int = 1, p: int | None = None, g: int = 1):
|
443
453
|
"""
|
444
454
|
Initialize YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.
|
445
455
|
|
@@ -458,7 +468,7 @@ class Classify(nn.Module):
|
|
458
468
|
self.drop = nn.Dropout(p=0.0, inplace=True)
|
459
469
|
self.linear = nn.Linear(c_, c2) # to x(b,c2)
|
460
470
|
|
461
|
-
def forward(self, x:
|
471
|
+
def forward(self, x: list[torch.Tensor] | torch.Tensor) -> torch.Tensor | tuple:
|
462
472
|
"""Perform forward pass of the YOLO model on input image data."""
|
463
473
|
if isinstance(x, list):
|
464
474
|
x = torch.cat(x, 1)
|
@@ -492,7 +502,7 @@ class WorldDetect(Detect):
|
|
492
502
|
>>> outputs = world_detect(x, text)
|
493
503
|
"""
|
494
504
|
|
495
|
-
def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch:
|
505
|
+
def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
|
496
506
|
"""
|
497
507
|
Initialize YOLO detection layer with nc classes and layer channels ch.
|
498
508
|
|
@@ -507,7 +517,7 @@ class WorldDetect(Detect):
|
|
507
517
|
self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)
|
508
518
|
self.cv4 = nn.ModuleList(BNContrastiveHead(embed) if with_bn else ContrastiveHead() for _ in ch)
|
509
519
|
|
510
|
-
def forward(self, x:
|
520
|
+
def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> list[torch.Tensor] | tuple:
|
511
521
|
"""Concatenate and return predicted bounding boxes and class probabilities."""
|
512
522
|
for i in range(self.nl):
|
513
523
|
x[i] = torch.cat((self.cv2[i](x[i]), self.cv4[i](self.cv3[i](x[i]), text)), 1)
|
@@ -576,7 +586,7 @@ class LRPCHead(nn.Module):
|
|
576
586
|
linear.bias.data = conv.bias.data
|
577
587
|
return linear
|
578
588
|
|
579
|
-
def forward(self, cls_feat: torch.Tensor, loc_feat: torch.Tensor, conf: float) ->
|
589
|
+
def forward(self, cls_feat: torch.Tensor, loc_feat: torch.Tensor, conf: float) -> tuple[tuple, torch.Tensor]:
|
580
590
|
"""Process classification and localization features to generate detection proposals."""
|
581
591
|
if self.enabled:
|
582
592
|
pf_score = self.pf(cls_feat)[0, 0].flatten(0)
|
@@ -625,7 +635,7 @@ class YOLOEDetect(Detect):
|
|
625
635
|
|
626
636
|
is_fused = False
|
627
637
|
|
628
|
-
def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch:
|
638
|
+
def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
|
629
639
|
"""
|
630
640
|
Initialize YOLO detection layer with nc classes and layer channels ch.
|
631
641
|
|
@@ -638,7 +648,7 @@ class YOLOEDetect(Detect):
|
|
638
648
|
super().__init__(nc, ch)
|
639
649
|
c3 = max(ch[0], min(self.nc, 100))
|
640
650
|
assert c3 <= embed
|
641
|
-
assert with_bn
|
651
|
+
assert with_bn
|
642
652
|
self.cv3 = (
|
643
653
|
nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)
|
644
654
|
if self.legacy
|
@@ -705,11 +715,11 @@ class YOLOEDetect(Detect):
|
|
705
715
|
self.reprta = nn.Identity()
|
706
716
|
self.is_fused = True
|
707
717
|
|
708
|
-
def get_tpe(self, tpe:
|
718
|
+
def get_tpe(self, tpe: torch.Tensor | None) -> torch.Tensor | None:
|
709
719
|
"""Get text prompt embeddings with normalization."""
|
710
720
|
return None if tpe is None else F.normalize(self.reprta(tpe), dim=-1, p=2)
|
711
721
|
|
712
|
-
def get_vpe(self, x:
|
722
|
+
def get_vpe(self, x: list[torch.Tensor], vpe: torch.Tensor) -> torch.Tensor:
|
713
723
|
"""Get visual prompt embeddings with spatial awareness."""
|
714
724
|
if vpe.shape[1] == 0: # no visual prompt embeddings
|
715
725
|
return torch.zeros(x[0].shape[0], 0, self.embed, device=x[0].device)
|
@@ -718,7 +728,7 @@ class YOLOEDetect(Detect):
|
|
718
728
|
assert vpe.ndim == 3 # (B, N, D)
|
719
729
|
return vpe
|
720
730
|
|
721
|
-
def forward_lrpc(self, x:
|
731
|
+
def forward_lrpc(self, x: list[torch.Tensor], return_mask: bool = False) -> torch.Tensor | tuple:
|
722
732
|
"""Process features with fused text embeddings to generate detections for prompt-free model."""
|
723
733
|
masks = []
|
724
734
|
assert self.is_fused, "Prompt-free inference requires model to be fused!"
|
@@ -756,9 +766,7 @@ class YOLOEDetect(Detect):
|
|
756
766
|
else:
|
757
767
|
return y if self.export else (y, x)
|
758
768
|
|
759
|
-
def forward(
|
760
|
-
self, x: List[torch.Tensor], cls_pe: torch.Tensor, return_mask: bool = False
|
761
|
-
) -> Union[torch.Tensor, Tuple]:
|
769
|
+
def forward(self, x: list[torch.Tensor], cls_pe: torch.Tensor, return_mask: bool = False) -> torch.Tensor | tuple:
|
762
770
|
"""Process features with class prompt embeddings to generate detections."""
|
763
771
|
if hasattr(self, "lrpc"): # for prompt-free inference
|
764
772
|
return self.forward_lrpc(x, return_mask)
|
@@ -807,7 +815,7 @@ class YOLOESegment(YOLOEDetect):
|
|
807
815
|
"""
|
808
816
|
|
809
817
|
def __init__(
|
810
|
-
self, nc: int = 80, nm: int = 32, npr: int = 256, embed: int = 512, with_bn: bool = False, ch:
|
818
|
+
self, nc: int = 80, nm: int = 32, npr: int = 256, embed: int = 512, with_bn: bool = False, ch: tuple = ()
|
811
819
|
):
|
812
820
|
"""
|
813
821
|
Initialize YOLOESegment with class count, mask parameters, and embedding dimensions.
|
@@ -828,7 +836,7 @@ class YOLOESegment(YOLOEDetect):
|
|
828
836
|
c5 = max(ch[0] // 4, self.nm)
|
829
837
|
self.cv5 = nn.ModuleList(nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nm, 1)) for x in ch)
|
830
838
|
|
831
|
-
def forward(self, x:
|
839
|
+
def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> tuple | torch.Tensor:
|
832
840
|
"""Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients."""
|
833
841
|
p = self.proto(x[0]) # mask protos
|
834
842
|
bs = p.shape[0] # batch size
|
@@ -896,7 +904,7 @@ class RTDETRDecoder(nn.Module):
|
|
896
904
|
def __init__(
|
897
905
|
self,
|
898
906
|
nc: int = 80,
|
899
|
-
ch:
|
907
|
+
ch: tuple = (512, 1024, 2048),
|
900
908
|
hd: int = 256, # hidden dim
|
901
909
|
nq: int = 300, # num queries
|
902
910
|
ndp: int = 4, # num decoder points
|
@@ -972,7 +980,7 @@ class RTDETRDecoder(nn.Module):
|
|
972
980
|
|
973
981
|
self._reset_parameters()
|
974
982
|
|
975
|
-
def forward(self, x:
|
983
|
+
def forward(self, x: list[torch.Tensor], batch: dict | None = None) -> tuple | torch.Tensor:
|
976
984
|
"""
|
977
985
|
Run the forward pass of the module, returning bounding box and classification scores for the input.
|
978
986
|
|
@@ -1024,12 +1032,12 @@ class RTDETRDecoder(nn.Module):
|
|
1024
1032
|
|
1025
1033
|
def _generate_anchors(
|
1026
1034
|
self,
|
1027
|
-
shapes:
|
1035
|
+
shapes: list[list[int]],
|
1028
1036
|
grid_size: float = 0.05,
|
1029
1037
|
dtype: torch.dtype = torch.float32,
|
1030
1038
|
device: str = "cpu",
|
1031
1039
|
eps: float = 1e-2,
|
1032
|
-
) ->
|
1040
|
+
) -> tuple[torch.Tensor, torch.Tensor]:
|
1033
1041
|
"""
|
1034
1042
|
Generate anchor bounding boxes for given shapes with specific grid size and validate them.
|
1035
1043
|
|
@@ -1062,7 +1070,7 @@ class RTDETRDecoder(nn.Module):
|
|
1062
1070
|
anchors = anchors.masked_fill(~valid_mask, float("inf"))
|
1063
1071
|
return anchors, valid_mask
|
1064
1072
|
|
1065
|
-
def _get_encoder_input(self, x:
|
1073
|
+
def _get_encoder_input(self, x: list[torch.Tensor]) -> tuple[torch.Tensor, list[list[int]]]:
|
1066
1074
|
"""
|
1067
1075
|
Process and return encoder inputs by getting projection features from input and concatenating them.
|
1068
1076
|
|
@@ -1092,10 +1100,10 @@ class RTDETRDecoder(nn.Module):
|
|
1092
1100
|
def _get_decoder_input(
|
1093
1101
|
self,
|
1094
1102
|
feats: torch.Tensor,
|
1095
|
-
shapes:
|
1096
|
-
dn_embed:
|
1097
|
-
dn_bbox:
|
1098
|
-
) ->
|
1103
|
+
shapes: list[list[int]],
|
1104
|
+
dn_embed: torch.Tensor | None = None,
|
1105
|
+
dn_bbox: torch.Tensor | None = None,
|
1106
|
+
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
|
1099
1107
|
"""
|
1100
1108
|
Generate and prepare the input required for the decoder from the provided features and shapes.
|
1101
1109
|
|
@@ -1200,7 +1208,7 @@ class v10Detect(Detect):
|
|
1200
1208
|
|
1201
1209
|
end2end = True
|
1202
1210
|
|
1203
|
-
def __init__(self, nc: int = 80, ch:
|
1211
|
+
def __init__(self, nc: int = 80, ch: tuple = ()):
|
1204
1212
|
"""
|
1205
1213
|
Initialize the v10Detect object with the specified number of classes and input channels.
|
1206
1214
|
|
@@ -1,8 +1,9 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
"""Transformer modules."""
|
3
3
|
|
4
|
+
from __future__ import annotations
|
5
|
+
|
4
6
|
import math
|
5
|
-
from typing import List, Optional
|
6
7
|
|
7
8
|
import torch
|
8
9
|
import torch.nn as nn
|
@@ -88,16 +89,16 @@ class TransformerEncoderLayer(nn.Module):
|
|
88
89
|
self.normalize_before = normalize_before
|
89
90
|
|
90
91
|
@staticmethod
|
91
|
-
def with_pos_embed(tensor: torch.Tensor, pos:
|
92
|
+
def with_pos_embed(tensor: torch.Tensor, pos: torch.Tensor | None = None) -> torch.Tensor:
|
92
93
|
"""Add position embeddings to the tensor if provided."""
|
93
94
|
return tensor if pos is None else tensor + pos
|
94
95
|
|
95
96
|
def forward_post(
|
96
97
|
self,
|
97
98
|
src: torch.Tensor,
|
98
|
-
src_mask:
|
99
|
-
src_key_padding_mask:
|
100
|
-
pos:
|
99
|
+
src_mask: torch.Tensor | None = None,
|
100
|
+
src_key_padding_mask: torch.Tensor | None = None,
|
101
|
+
pos: torch.Tensor | None = None,
|
101
102
|
) -> torch.Tensor:
|
102
103
|
"""
|
103
104
|
Perform forward pass with post-normalization.
|
@@ -122,9 +123,9 @@ class TransformerEncoderLayer(nn.Module):
|
|
122
123
|
def forward_pre(
|
123
124
|
self,
|
124
125
|
src: torch.Tensor,
|
125
|
-
src_mask:
|
126
|
-
src_key_padding_mask:
|
127
|
-
pos:
|
126
|
+
src_mask: torch.Tensor | None = None,
|
127
|
+
src_key_padding_mask: torch.Tensor | None = None,
|
128
|
+
pos: torch.Tensor | None = None,
|
128
129
|
) -> torch.Tensor:
|
129
130
|
"""
|
130
131
|
Perform forward pass with pre-normalization.
|
@@ -149,9 +150,9 @@ class TransformerEncoderLayer(nn.Module):
|
|
149
150
|
def forward(
|
150
151
|
self,
|
151
152
|
src: torch.Tensor,
|
152
|
-
src_mask:
|
153
|
-
src_key_padding_mask:
|
154
|
-
pos:
|
153
|
+
src_mask: torch.Tensor | None = None,
|
154
|
+
src_key_padding_mask: torch.Tensor | None = None,
|
155
|
+
pos: torch.Tensor | None = None,
|
155
156
|
) -> torch.Tensor:
|
156
157
|
"""
|
157
158
|
Forward propagate the input through the encoder module.
|
@@ -533,8 +534,8 @@ class MSDeformAttn(nn.Module):
|
|
533
534
|
query: torch.Tensor,
|
534
535
|
refer_bbox: torch.Tensor,
|
535
536
|
value: torch.Tensor,
|
536
|
-
value_shapes:
|
537
|
-
value_mask:
|
537
|
+
value_shapes: list,
|
538
|
+
value_mask: torch.Tensor | None = None,
|
538
539
|
) -> torch.Tensor:
|
539
540
|
"""
|
540
541
|
Perform forward pass for multiscale deformable attention.
|
@@ -649,7 +650,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|
649
650
|
self.norm3 = nn.LayerNorm(d_model)
|
650
651
|
|
651
652
|
@staticmethod
|
652
|
-
def with_pos_embed(tensor: torch.Tensor, pos:
|
653
|
+
def with_pos_embed(tensor: torch.Tensor, pos: torch.Tensor | None) -> torch.Tensor:
|
653
654
|
"""Add positional embeddings to the input tensor, if provided."""
|
654
655
|
return tensor if pos is None else tensor + pos
|
655
656
|
|
@@ -672,10 +673,10 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|
672
673
|
embed: torch.Tensor,
|
673
674
|
refer_bbox: torch.Tensor,
|
674
675
|
feats: torch.Tensor,
|
675
|
-
shapes:
|
676
|
-
padding_mask:
|
677
|
-
attn_mask:
|
678
|
-
query_pos:
|
676
|
+
shapes: list,
|
677
|
+
padding_mask: torch.Tensor | None = None,
|
678
|
+
attn_mask: torch.Tensor | None = None,
|
679
|
+
query_pos: torch.Tensor | None = None,
|
679
680
|
) -> torch.Tensor:
|
680
681
|
"""
|
681
682
|
Perform the forward pass through the entire decoder layer.
|
@@ -749,12 +750,12 @@ class DeformableTransformerDecoder(nn.Module):
|
|
749
750
|
embed: torch.Tensor, # decoder embeddings
|
750
751
|
refer_bbox: torch.Tensor, # anchor
|
751
752
|
feats: torch.Tensor, # image features
|
752
|
-
shapes:
|
753
|
+
shapes: list, # feature shapes
|
753
754
|
bbox_head: nn.Module,
|
754
755
|
score_head: nn.Module,
|
755
756
|
pos_mlp: nn.Module,
|
756
|
-
attn_mask:
|
757
|
-
padding_mask:
|
757
|
+
attn_mask: torch.Tensor | None = None,
|
758
|
+
padding_mask: torch.Tensor | None = None,
|
758
759
|
):
|
759
760
|
"""
|
760
761
|
Perform the forward pass through the entire decoder.
|
ultralytics/nn/tasks.py
CHANGED
@@ -1548,7 +1548,7 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
|
|
1548
1548
|
"""
|
1549
1549
|
ckpt, weight = torch_safe_load(weight) # load ckpt
|
1550
1550
|
args = {**DEFAULT_CFG_DICT, **(ckpt.get("train_args", {}))} # combine model and default args, preferring model args
|
1551
|
-
model = (ckpt.get("ema") or ckpt["model"]).
|
1551
|
+
model = (ckpt.get("ema") or ckpt["model"]).float() # FP32 model
|
1552
1552
|
|
1553
1553
|
# Model compatibility updates
|
1554
1554
|
model.args = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS} # attach args to model
|
@@ -1557,7 +1557,7 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
|
|
1557
1557
|
if not hasattr(model, "stride"):
|
1558
1558
|
model.stride = torch.tensor([32.0])
|
1559
1559
|
|
1560
|
-
model = model.fuse()
|
1560
|
+
model = (model.fuse() if fuse and hasattr(model, "fuse") else model).eval().to(device) # model in eval mode
|
1561
1561
|
|
1562
1562
|
# Module updates
|
1563
1563
|
for m in model.modules():
|
ultralytics/nn/text_model.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from abc import abstractmethod
|
4
6
|
from pathlib import Path
|
5
|
-
from typing import List, Union
|
6
7
|
|
7
8
|
import torch
|
8
9
|
import torch.nn as nn
|
@@ -91,7 +92,7 @@ class CLIP(TextModel):
|
|
91
92
|
self.device = device
|
92
93
|
self.eval()
|
93
94
|
|
94
|
-
def tokenize(self, texts:
|
95
|
+
def tokenize(self, texts: str | list[str]) -> torch.Tensor:
|
95
96
|
"""
|
96
97
|
Convert input texts to CLIP tokens.
|
97
98
|
|
@@ -135,7 +136,7 @@ class CLIP(TextModel):
|
|
135
136
|
return txt_feats
|
136
137
|
|
137
138
|
@smart_inference_mode()
|
138
|
-
def encode_image(self, image:
|
139
|
+
def encode_image(self, image: Image.Image | torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
|
139
140
|
"""
|
140
141
|
Encode preprocessed images into normalized feature vectors.
|
141
142
|
|
@@ -234,7 +235,7 @@ class MobileCLIP(TextModel):
|
|
234
235
|
self.device = device
|
235
236
|
self.eval()
|
236
237
|
|
237
|
-
def tokenize(self, texts:
|
238
|
+
def tokenize(self, texts: list[str]) -> torch.Tensor:
|
238
239
|
"""
|
239
240
|
Convert input texts to MobileCLIP tokens.
|
240
241
|
|
@@ -319,7 +320,7 @@ class MobileCLIPTS(TextModel):
|
|
319
320
|
self.tokenizer = clip.clip.tokenize
|
320
321
|
self.device = device
|
321
322
|
|
322
|
-
def tokenize(self, texts:
|
323
|
+
def tokenize(self, texts: list[str]) -> torch.Tensor:
|
323
324
|
"""
|
324
325
|
Convert input texts to MobileCLIP tokens.
|
325
326
|
|
@@ -1,7 +1,9 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from itertools import cycle
|
4
|
-
from typing import Any
|
6
|
+
from typing import Any
|
5
7
|
|
6
8
|
import cv2
|
7
9
|
import numpy as np
|
@@ -135,7 +137,7 @@ class Analytics(BaseSolution):
|
|
135
137
|
return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), classwise_count=self.clswise_count)
|
136
138
|
|
137
139
|
def update_graph(
|
138
|
-
self, frame_number: int, count_dict:
|
140
|
+
self, frame_number: int, count_dict: dict[str, int] | None = None, plot: str = "line"
|
139
141
|
) -> np.ndarray:
|
140
142
|
"""
|
141
143
|
Update the graph with new data for single or multiple classes.
|
@@ -204,7 +206,7 @@ class Analytics(BaseSolution):
|
|
204
206
|
markersize=self.line_width * 5,
|
205
207
|
label=f"{key} Data Points",
|
206
208
|
)
|
207
|
-
|
209
|
+
elif plot == "bar":
|
208
210
|
self.ax.clear() # clear bar data
|
209
211
|
for label in labels: # Map labels to colors
|
210
212
|
if label not in self.color_mapping:
|
@@ -224,12 +226,12 @@ class Analytics(BaseSolution):
|
|
224
226
|
for bar, label in zip(bars, labels):
|
225
227
|
bar.set_label(label) # Assign label to each bar
|
226
228
|
self.ax.legend(loc="upper left", fontsize=13, facecolor=self.fg_color, edgecolor=self.fg_color)
|
227
|
-
|
229
|
+
elif plot == "pie":
|
228
230
|
total = sum(counts)
|
229
231
|
percentages = [size / total * 100 for size in counts]
|
230
|
-
start_angle = 90
|
231
232
|
self.ax.clear()
|
232
233
|
|
234
|
+
start_angle = 90
|
233
235
|
# Create pie chart and create legend labels with percentages
|
234
236
|
wedges, _ = self.ax.pie(
|
235
237
|
counts, labels=labels, startangle=start_angle, textprops={"color": self.fg_color}, autopct=None
|
ultralytics/solutions/config.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from dataclasses import dataclass, field
|
4
|
-
from typing import Any
|
6
|
+
from typing import Any
|
5
7
|
|
6
8
|
import cv2
|
7
9
|
|
@@ -60,22 +62,22 @@ class SolutionConfig:
|
|
60
62
|
>>> print(cfg.model)
|
61
63
|
"""
|
62
64
|
|
63
|
-
source:
|
64
|
-
model:
|
65
|
-
classes:
|
65
|
+
source: str | None = None
|
66
|
+
model: str | None = None
|
67
|
+
classes: list[int] | None = None
|
66
68
|
show_conf: bool = True
|
67
69
|
show_labels: bool = True
|
68
|
-
region:
|
69
|
-
colormap:
|
70
|
+
region: list[tuple[int, int]] | None = None
|
71
|
+
colormap: int | None = cv2.COLORMAP_DEEPGREEN
|
70
72
|
show_in: bool = True
|
71
73
|
show_out: bool = True
|
72
74
|
up_angle: float = 145.0
|
73
75
|
down_angle: int = 90
|
74
|
-
kpts:
|
76
|
+
kpts: list[int] = field(default_factory=lambda: [6, 8, 10])
|
75
77
|
analytics_type: str = "line"
|
76
|
-
figsize:
|
78
|
+
figsize: tuple[int, int] | None = (12.8, 7.2)
|
77
79
|
blur_ratio: float = 0.5
|
78
|
-
vision_point:
|
80
|
+
vision_point: tuple[int, int] = (20, 20)
|
79
81
|
crop_dir: str = "cropped-detections"
|
80
82
|
json_file: str = None
|
81
83
|
line_width: int = 2
|
@@ -87,7 +89,7 @@ class SolutionConfig:
|
|
87
89
|
show: bool = False
|
88
90
|
iou: float = 0.7
|
89
91
|
conf: float = 0.25
|
90
|
-
device:
|
92
|
+
device: str | None = None
|
91
93
|
max_det: int = 300
|
92
94
|
half: bool = False
|
93
95
|
tracker: str = "botsort.yaml"
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
3
|
import math
|
4
|
-
from typing import Any
|
4
|
+
from typing import Any
|
5
5
|
|
6
6
|
import cv2
|
7
7
|
|
@@ -39,8 +39,8 @@ class DistanceCalculation(BaseSolution):
|
|
39
39
|
|
40
40
|
# Mouse event information
|
41
41
|
self.left_mouse_count = 0
|
42
|
-
self.selected_boxes:
|
43
|
-
self.centroids:
|
42
|
+
self.selected_boxes: dict[int, list[float]] = {}
|
43
|
+
self.centroids: list[list[int]] = [] # Store centroids of selected objects
|
44
44
|
|
45
45
|
def mouse_event_for_distance(self, event: int, x: int, y: int, flags: int, param: Any) -> None:
|
46
46
|
"""
|
ultralytics/solutions/heatmap.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
-
from
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import Any
|
4
6
|
|
5
7
|
import cv2
|
6
8
|
import numpy as np
|
@@ -50,7 +52,7 @@ class Heatmap(ObjectCounter):
|
|
50
52
|
self.colormap = self.CFG["colormap"]
|
51
53
|
self.heatmap = None
|
52
54
|
|
53
|
-
def heatmap_effect(self, box:
|
55
|
+
def heatmap_effect(self, box: list[float]) -> None:
|
54
56
|
"""
|
55
57
|
Efficiently calculate heatmap area and effect location for applying colormap.
|
56
58
|
|
@@ -1,7 +1,9 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from collections import defaultdict
|
4
|
-
from typing import Any
|
6
|
+
from typing import Any
|
5
7
|
|
6
8
|
from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
|
7
9
|
from ultralytics.utils.plotting import colors
|
@@ -52,9 +54,9 @@ class ObjectCounter(BaseSolution):
|
|
52
54
|
|
53
55
|
def count_objects(
|
54
56
|
self,
|
55
|
-
current_centroid:
|
57
|
+
current_centroid: tuple[float, float],
|
56
58
|
track_id: int,
|
57
|
-
prev_position:
|
59
|
+
prev_position: tuple[float, float] | None,
|
58
60
|
cls: int,
|
59
61
|
) -> None:
|
60
62
|
"""
|
@@ -1,7 +1,9 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import json
|
4
|
-
from typing import Any
|
6
|
+
from typing import Any
|
5
7
|
|
6
8
|
import cv2
|
7
9
|
import numpy as np
|
@@ -141,7 +143,7 @@ class ParkingPtsSelection:
|
|
141
143
|
self.draw_box(self.current_box)
|
142
144
|
self.current_box.clear()
|
143
145
|
|
144
|
-
def draw_box(self, box:
|
146
|
+
def draw_box(self, box: list[tuple[int, int]]) -> None:
|
145
147
|
"""Draw a bounding box on the canvas using the provided coordinates."""
|
146
148
|
for i in range(4):
|
147
149
|
self.canvas.create_line(box[i], box[(i + 1) % 4], fill="blue", width=2)
|