dgenerate-ultralytics-headless 8.3.253__py3-none-any.whl → 8.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/METADATA +41 -49
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/RECORD +85 -74
- tests/__init__.py +2 -2
- tests/conftest.py +1 -1
- tests/test_cuda.py +8 -2
- tests/test_engine.py +8 -8
- tests/test_exports.py +11 -4
- tests/test_integrations.py +9 -9
- tests/test_python.py +14 -14
- tests/test_solutions.py +3 -3
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +25 -27
- ultralytics/cfg/default.yaml +3 -1
- ultralytics/cfg/models/26/yolo26-cls.yaml +33 -0
- ultralytics/cfg/models/26/yolo26-obb.yaml +52 -0
- ultralytics/cfg/models/26/yolo26-p2.yaml +60 -0
- ultralytics/cfg/models/26/yolo26-p6.yaml +62 -0
- ultralytics/cfg/models/26/yolo26-pose.yaml +53 -0
- ultralytics/cfg/models/26/yolo26-seg.yaml +52 -0
- ultralytics/cfg/models/26/yolo26.yaml +52 -0
- ultralytics/cfg/models/26/yoloe-26-seg.yaml +53 -0
- ultralytics/cfg/models/26/yoloe-26.yaml +53 -0
- ultralytics/data/annotator.py +2 -2
- ultralytics/data/augment.py +7 -0
- ultralytics/data/converter.py +57 -38
- ultralytics/data/dataset.py +1 -1
- ultralytics/engine/exporter.py +31 -26
- ultralytics/engine/model.py +34 -34
- ultralytics/engine/predictor.py +17 -17
- ultralytics/engine/results.py +14 -12
- ultralytics/engine/trainer.py +59 -29
- ultralytics/engine/tuner.py +19 -11
- ultralytics/engine/validator.py +16 -16
- ultralytics/models/fastsam/predict.py +1 -1
- ultralytics/models/yolo/classify/predict.py +1 -1
- ultralytics/models/yolo/classify/train.py +1 -1
- ultralytics/models/yolo/classify/val.py +1 -1
- ultralytics/models/yolo/detect/predict.py +2 -2
- ultralytics/models/yolo/detect/train.py +4 -3
- ultralytics/models/yolo/detect/val.py +7 -1
- ultralytics/models/yolo/model.py +8 -8
- ultralytics/models/yolo/obb/predict.py +2 -2
- ultralytics/models/yolo/obb/train.py +3 -3
- ultralytics/models/yolo/obb/val.py +1 -1
- ultralytics/models/yolo/pose/predict.py +1 -1
- ultralytics/models/yolo/pose/train.py +3 -1
- ultralytics/models/yolo/pose/val.py +1 -1
- ultralytics/models/yolo/segment/predict.py +3 -3
- ultralytics/models/yolo/segment/train.py +4 -4
- ultralytics/models/yolo/segment/val.py +4 -2
- ultralytics/models/yolo/yoloe/train.py +6 -1
- ultralytics/models/yolo/yoloe/train_seg.py +6 -1
- ultralytics/nn/autobackend.py +5 -5
- ultralytics/nn/modules/__init__.py +8 -0
- ultralytics/nn/modules/block.py +128 -8
- ultralytics/nn/modules/head.py +788 -203
- ultralytics/nn/tasks.py +86 -41
- ultralytics/nn/text_model.py +5 -2
- ultralytics/optim/__init__.py +5 -0
- ultralytics/optim/muon.py +338 -0
- ultralytics/solutions/ai_gym.py +3 -3
- ultralytics/solutions/config.py +1 -1
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +2 -2
- ultralytics/solutions/parking_management.py +1 -1
- ultralytics/solutions/solutions.py +2 -2
- ultralytics/trackers/track.py +1 -1
- ultralytics/utils/__init__.py +8 -8
- ultralytics/utils/benchmarks.py +23 -23
- ultralytics/utils/callbacks/platform.py +11 -7
- ultralytics/utils/checks.py +6 -6
- ultralytics/utils/downloads.py +5 -3
- ultralytics/utils/export/engine.py +19 -10
- ultralytics/utils/export/imx.py +19 -13
- ultralytics/utils/export/tensorflow.py +21 -21
- ultralytics/utils/files.py +2 -2
- ultralytics/utils/loss.py +587 -203
- ultralytics/utils/metrics.py +1 -0
- ultralytics/utils/ops.py +11 -2
- ultralytics/utils/tal.py +98 -19
- ultralytics/utils/tuner.py +2 -2
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.253.dist-info → dgenerate_ultralytics_headless-8.4.3.dist-info}/top_level.txt +0 -0
ultralytics/nn/tasks.py
CHANGED
|
@@ -20,6 +20,7 @@ from ultralytics.nn.modules import (
|
|
|
20
20
|
C3TR,
|
|
21
21
|
ELAN1,
|
|
22
22
|
OBB,
|
|
23
|
+
OBB26,
|
|
23
24
|
PSA,
|
|
24
25
|
SPP,
|
|
25
26
|
SPPELAN,
|
|
@@ -55,6 +56,7 @@ from ultralytics.nn.modules import (
|
|
|
55
56
|
Index,
|
|
56
57
|
LRPCHead,
|
|
57
58
|
Pose,
|
|
59
|
+
Pose26,
|
|
58
60
|
RepC3,
|
|
59
61
|
RepConv,
|
|
60
62
|
RepNCSPELAN4,
|
|
@@ -63,16 +65,19 @@ from ultralytics.nn.modules import (
|
|
|
63
65
|
RTDETRDecoder,
|
|
64
66
|
SCDown,
|
|
65
67
|
Segment,
|
|
68
|
+
Segment26,
|
|
66
69
|
TorchVision,
|
|
67
70
|
WorldDetect,
|
|
68
71
|
YOLOEDetect,
|
|
69
72
|
YOLOESegment,
|
|
73
|
+
YOLOESegment26,
|
|
70
74
|
v10Detect,
|
|
71
75
|
)
|
|
72
76
|
from ultralytics.utils import DEFAULT_CFG_DICT, LOGGER, YAML, colorstr, emojis
|
|
73
77
|
from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml
|
|
74
78
|
from ultralytics.utils.loss import (
|
|
75
|
-
|
|
79
|
+
E2ELoss,
|
|
80
|
+
PoseLoss26,
|
|
76
81
|
v8ClassificationLoss,
|
|
77
82
|
v8DetectionLoss,
|
|
78
83
|
v8OBBLoss,
|
|
@@ -241,7 +246,7 @@ class BaseModel(torch.nn.Module):
|
|
|
241
246
|
if isinstance(m, RepVGGDW):
|
|
242
247
|
m.fuse()
|
|
243
248
|
m.forward = m.forward_fuse
|
|
244
|
-
if isinstance(m,
|
|
249
|
+
if isinstance(m, Detect) and getattr(m, "end2end", False):
|
|
245
250
|
m.fuse() # remove one2many head
|
|
246
251
|
self.info(verbose=verbose)
|
|
247
252
|
|
|
@@ -356,11 +361,11 @@ class DetectionModel(BaseModel):
|
|
|
356
361
|
|
|
357
362
|
Examples:
|
|
358
363
|
Initialize a detection model
|
|
359
|
-
>>> model = DetectionModel("
|
|
364
|
+
>>> model = DetectionModel("yolo26n.yaml", ch=3, nc=80)
|
|
360
365
|
>>> results = model.predict(image_tensor)
|
|
361
366
|
"""
|
|
362
367
|
|
|
363
|
-
def __init__(self, cfg="
|
|
368
|
+
def __init__(self, cfg="yolo26n.yaml", ch=3, nc=None, verbose=True):
|
|
364
369
|
"""Initialize the YOLO detection model with the given config and parameters.
|
|
365
370
|
|
|
366
371
|
Args:
|
|
@@ -386,7 +391,6 @@ class DetectionModel(BaseModel):
|
|
|
386
391
|
self.model, self.save = parse_model(deepcopy(self.yaml), ch=ch, verbose=verbose) # model, savelist
|
|
387
392
|
self.names = {i: f"{i}" for i in range(self.yaml["nc"])} # default names dict
|
|
388
393
|
self.inplace = self.yaml.get("inplace", True)
|
|
389
|
-
self.end2end = getattr(self.model[-1], "end2end", False)
|
|
390
394
|
|
|
391
395
|
# Build strides
|
|
392
396
|
m = self.model[-1] # Detect()
|
|
@@ -396,9 +400,10 @@ class DetectionModel(BaseModel):
|
|
|
396
400
|
|
|
397
401
|
def _forward(x):
|
|
398
402
|
"""Perform a forward pass through the model, handling different Detect subclass types accordingly."""
|
|
403
|
+
output = self.forward(x)
|
|
399
404
|
if self.end2end:
|
|
400
|
-
|
|
401
|
-
return
|
|
405
|
+
output = output["one2many"]
|
|
406
|
+
return output["feats"]
|
|
402
407
|
|
|
403
408
|
self.model.eval() # Avoid changing batch statistics until training begins
|
|
404
409
|
m.training = True # Setting it to True to properly return strides
|
|
@@ -415,6 +420,11 @@ class DetectionModel(BaseModel):
|
|
|
415
420
|
self.info()
|
|
416
421
|
LOGGER.info("")
|
|
417
422
|
|
|
423
|
+
@property
|
|
424
|
+
def end2end(self):
|
|
425
|
+
"""Return whether the model uses end-to-end NMS-free detection."""
|
|
426
|
+
return getattr(self.model[-1], "end2end", False)
|
|
427
|
+
|
|
418
428
|
def _predict_augment(self, x):
|
|
419
429
|
"""Perform augmentations on input image x and return augmented inference and train outputs.
|
|
420
430
|
|
|
@@ -481,7 +491,7 @@ class DetectionModel(BaseModel):
|
|
|
481
491
|
|
|
482
492
|
def init_criterion(self):
|
|
483
493
|
"""Initialize the loss criterion for the DetectionModel."""
|
|
484
|
-
return
|
|
494
|
+
return E2ELoss(self) if getattr(self, "end2end", False) else v8DetectionLoss(self)
|
|
485
495
|
|
|
486
496
|
|
|
487
497
|
class OBBModel(DetectionModel):
|
|
@@ -496,11 +506,11 @@ class OBBModel(DetectionModel):
|
|
|
496
506
|
|
|
497
507
|
Examples:
|
|
498
508
|
Initialize an OBB model
|
|
499
|
-
>>> model = OBBModel("
|
|
509
|
+
>>> model = OBBModel("yolo26n-obb.yaml", ch=3, nc=80)
|
|
500
510
|
>>> results = model.predict(image_tensor)
|
|
501
511
|
"""
|
|
502
512
|
|
|
503
|
-
def __init__(self, cfg="
|
|
513
|
+
def __init__(self, cfg="yolo26n-obb.yaml", ch=3, nc=None, verbose=True):
|
|
504
514
|
"""Initialize YOLO OBB model with given config and parameters.
|
|
505
515
|
|
|
506
516
|
Args:
|
|
@@ -513,7 +523,7 @@ class OBBModel(DetectionModel):
|
|
|
513
523
|
|
|
514
524
|
def init_criterion(self):
|
|
515
525
|
"""Initialize the loss criterion for the model."""
|
|
516
|
-
return v8OBBLoss(self)
|
|
526
|
+
return E2ELoss(self, v8OBBLoss) if getattr(self, "end2end", False) else v8OBBLoss(self)
|
|
517
527
|
|
|
518
528
|
|
|
519
529
|
class SegmentationModel(DetectionModel):
|
|
@@ -528,11 +538,11 @@ class SegmentationModel(DetectionModel):
|
|
|
528
538
|
|
|
529
539
|
Examples:
|
|
530
540
|
Initialize a segmentation model
|
|
531
|
-
>>> model = SegmentationModel("
|
|
541
|
+
>>> model = SegmentationModel("yolo26n-seg.yaml", ch=3, nc=80)
|
|
532
542
|
>>> results = model.predict(image_tensor)
|
|
533
543
|
"""
|
|
534
544
|
|
|
535
|
-
def __init__(self, cfg="
|
|
545
|
+
def __init__(self, cfg="yolo26n-seg.yaml", ch=3, nc=None, verbose=True):
|
|
536
546
|
"""Initialize Ultralytics YOLO segmentation model with given config and parameters.
|
|
537
547
|
|
|
538
548
|
Args:
|
|
@@ -545,7 +555,7 @@ class SegmentationModel(DetectionModel):
|
|
|
545
555
|
|
|
546
556
|
def init_criterion(self):
|
|
547
557
|
"""Initialize the loss criterion for the SegmentationModel."""
|
|
548
|
-
return v8SegmentationLoss(self)
|
|
558
|
+
return E2ELoss(self, v8SegmentationLoss) if getattr(self, "end2end", False) else v8SegmentationLoss(self)
|
|
549
559
|
|
|
550
560
|
|
|
551
561
|
class PoseModel(DetectionModel):
|
|
@@ -563,11 +573,11 @@ class PoseModel(DetectionModel):
|
|
|
563
573
|
|
|
564
574
|
Examples:
|
|
565
575
|
Initialize a pose model
|
|
566
|
-
>>> model = PoseModel("
|
|
576
|
+
>>> model = PoseModel("yolo26n-pose.yaml", ch=3, nc=1, data_kpt_shape=(17, 3))
|
|
567
577
|
>>> results = model.predict(image_tensor)
|
|
568
578
|
"""
|
|
569
579
|
|
|
570
|
-
def __init__(self, cfg="
|
|
580
|
+
def __init__(self, cfg="yolo26n-pose.yaml", ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
|
|
571
581
|
"""Initialize Ultralytics YOLO Pose model.
|
|
572
582
|
|
|
573
583
|
Args:
|
|
@@ -586,7 +596,7 @@ class PoseModel(DetectionModel):
|
|
|
586
596
|
|
|
587
597
|
def init_criterion(self):
|
|
588
598
|
"""Initialize the loss criterion for the PoseModel."""
|
|
589
|
-
return v8PoseLoss(self)
|
|
599
|
+
return E2ELoss(self, PoseLoss26) if getattr(self, "end2end", False) else v8PoseLoss(self)
|
|
590
600
|
|
|
591
601
|
|
|
592
602
|
class ClassificationModel(BaseModel):
|
|
@@ -609,11 +619,11 @@ class ClassificationModel(BaseModel):
|
|
|
609
619
|
|
|
610
620
|
Examples:
|
|
611
621
|
Initialize a classification model
|
|
612
|
-
>>> model = ClassificationModel("
|
|
622
|
+
>>> model = ClassificationModel("yolo26n-cls.yaml", ch=3, nc=1000)
|
|
613
623
|
>>> results = model.predict(image_tensor)
|
|
614
624
|
"""
|
|
615
625
|
|
|
616
|
-
def __init__(self, cfg="
|
|
626
|
+
def __init__(self, cfg="yolo26n-cls.yaml", ch=3, nc=None, verbose=True):
|
|
617
627
|
"""Initialize ClassificationModel with YAML, channels, number of classes, verbose flag.
|
|
618
628
|
|
|
619
629
|
Args:
|
|
@@ -984,6 +994,7 @@ class YOLOEModel(DetectionModel):
|
|
|
984
994
|
verbose (bool): Whether to display model information.
|
|
985
995
|
"""
|
|
986
996
|
super().__init__(cfg=cfg, ch=ch, nc=nc, verbose=verbose)
|
|
997
|
+
self.text_model = self.yaml.get("text_model", "mobileclip:blt")
|
|
987
998
|
|
|
988
999
|
@smart_inference_mode()
|
|
989
1000
|
def get_text_pe(self, text, batch=80, cache_clip_model=False, without_reprta=False):
|
|
@@ -1003,9 +1014,13 @@ class YOLOEModel(DetectionModel):
|
|
|
1003
1014
|
device = next(self.model.parameters()).device
|
|
1004
1015
|
if not getattr(self, "clip_model", None) and cache_clip_model:
|
|
1005
1016
|
# For backwards compatibility of models lacking clip_model attribute
|
|
1006
|
-
self.clip_model = build_text_model("mobileclip:blt", device=device)
|
|
1017
|
+
self.clip_model = build_text_model(getattr(self, "text_model", "mobileclip:blt"), device=device)
|
|
1007
1018
|
|
|
1008
|
-
model =
|
|
1019
|
+
model = (
|
|
1020
|
+
self.clip_model
|
|
1021
|
+
if cache_clip_model
|
|
1022
|
+
else build_text_model(getattr(self, "text_model", "mobileclip:blt"), device=device)
|
|
1023
|
+
)
|
|
1009
1024
|
text_token = model.tokenize(text)
|
|
1010
1025
|
txt_feats = [model.encode_text(token).detach() for token in text_token.split(batch)]
|
|
1011
1026
|
txt_feats = txt_feats[0] if len(txt_feats) == 1 else torch.cat(txt_feats, dim=0)
|
|
@@ -1045,10 +1060,12 @@ class YOLOEModel(DetectionModel):
|
|
|
1045
1060
|
device = next(self.parameters()).device
|
|
1046
1061
|
self(torch.empty(1, 3, self.args["imgsz"], self.args["imgsz"]).to(device)) # warmup
|
|
1047
1062
|
|
|
1063
|
+
cv3 = getattr(head, "one2one_cv3", head.cv3)
|
|
1064
|
+
cv2 = getattr(head, "one2one_cv2", head.cv2)
|
|
1065
|
+
|
|
1048
1066
|
# re-parameterization for prompt-free model
|
|
1049
1067
|
self.model[-1].lrpc = nn.ModuleList(
|
|
1050
|
-
LRPCHead(cls, pf[-1], loc[-1], enabled=i != 2)
|
|
1051
|
-
for i, (cls, pf, loc) in enumerate(zip(vocab, head.cv3, head.cv2))
|
|
1068
|
+
LRPCHead(cls, pf[-1], loc[-1], enabled=i != 2) for i, (cls, pf, loc) in enumerate(zip(vocab, cv3, cv2))
|
|
1052
1069
|
)
|
|
1053
1070
|
for loc_head, cls_head in zip(head.cv2, head.cv3):
|
|
1054
1071
|
assert isinstance(loc_head, nn.Sequential)
|
|
@@ -1077,8 +1094,9 @@ class YOLOEModel(DetectionModel):
|
|
|
1077
1094
|
device = next(self.model.parameters()).device
|
|
1078
1095
|
head.fuse(self.pe.to(device)) # fuse prompt embeddings to classify head
|
|
1079
1096
|
|
|
1097
|
+
cv3 = getattr(head, "one2one_cv3", head.cv3)
|
|
1080
1098
|
vocab = nn.ModuleList()
|
|
1081
|
-
for cls_head in
|
|
1099
|
+
for cls_head in cv3:
|
|
1082
1100
|
assert isinstance(cls_head, nn.Sequential)
|
|
1083
1101
|
vocab.append(cls_head[-1])
|
|
1084
1102
|
return vocab
|
|
@@ -1155,9 +1173,8 @@ class YOLOEModel(DetectionModel):
|
|
|
1155
1173
|
cls_pe = self.get_cls_pe(m.get_tpe(tpe), vpe).to(device=x[0].device, dtype=x[0].dtype)
|
|
1156
1174
|
if cls_pe.shape[0] != b or m.export:
|
|
1157
1175
|
cls_pe = cls_pe.expand(b, -1, -1)
|
|
1158
|
-
x
|
|
1159
|
-
|
|
1160
|
-
x = m(x) # run
|
|
1176
|
+
x.append(cls_pe) # adding cls embedding
|
|
1177
|
+
x = m(x) # run
|
|
1161
1178
|
|
|
1162
1179
|
y.append(x if m.i in self.save else None) # save output
|
|
1163
1180
|
if visualize:
|
|
@@ -1179,10 +1196,17 @@ class YOLOEModel(DetectionModel):
|
|
|
1179
1196
|
from ultralytics.utils.loss import TVPDetectLoss
|
|
1180
1197
|
|
|
1181
1198
|
visual_prompt = batch.get("visuals", None) is not None # TODO
|
|
1182
|
-
self.criterion =
|
|
1183
|
-
|
|
1199
|
+
self.criterion = (
|
|
1200
|
+
(E2ELoss(self, TVPDetectLoss) if getattr(self, "end2end", False) else TVPDetectLoss(self))
|
|
1201
|
+
if visual_prompt
|
|
1202
|
+
else self.init_criterion()
|
|
1203
|
+
)
|
|
1184
1204
|
if preds is None:
|
|
1185
|
-
preds = self.forward(
|
|
1205
|
+
preds = self.forward(
|
|
1206
|
+
batch["img"],
|
|
1207
|
+
tpe=None if "visuals" in batch else batch.get("txt_feats", None),
|
|
1208
|
+
vpe=batch.get("visuals", None),
|
|
1209
|
+
)
|
|
1186
1210
|
return self.criterion(preds, batch)
|
|
1187
1211
|
|
|
1188
1212
|
|
|
@@ -1224,7 +1248,11 @@ class YOLOESegModel(YOLOEModel, SegmentationModel):
|
|
|
1224
1248
|
from ultralytics.utils.loss import TVPSegmentLoss
|
|
1225
1249
|
|
|
1226
1250
|
visual_prompt = batch.get("visuals", None) is not None # TODO
|
|
1227
|
-
self.criterion =
|
|
1251
|
+
self.criterion = (
|
|
1252
|
+
(E2ELoss(self, TVPSegmentLoss) if getattr(self, "end2end", False) else TVPSegmentLoss(self))
|
|
1253
|
+
if visual_prompt
|
|
1254
|
+
else self.init_criterion()
|
|
1255
|
+
)
|
|
1228
1256
|
|
|
1229
1257
|
if preds is None:
|
|
1230
1258
|
preds = self.forward(batch["img"], tpe=batch.get("txt_feats", None), vpe=batch.get("visuals", None))
|
|
@@ -1269,7 +1297,7 @@ class Ensemble(torch.nn.ModuleList):
|
|
|
1269
1297
|
y = [module(x, augment, profile, visualize)[0] for module in self]
|
|
1270
1298
|
# y = torch.stack(y).max(0)[0] # max ensemble
|
|
1271
1299
|
# y = torch.stack(y).mean(0) # mean ensemble
|
|
1272
|
-
y = torch.cat(y, 2) # nms ensemble, y shape(B, HW, C)
|
|
1300
|
+
y = torch.cat(y, 2) # nms ensemble, y shape(B, HW, C*num_models)
|
|
1273
1301
|
return y, None # inference, train output
|
|
1274
1302
|
|
|
1275
1303
|
|
|
@@ -1416,7 +1444,7 @@ def torch_safe_load(weight, safe_only=False):
|
|
|
1416
1444
|
f"with https://github.com/ultralytics/yolov5.\nThis model is NOT forwards compatible with "
|
|
1417
1445
|
f"YOLOv8 at https://github.com/ultralytics/ultralytics."
|
|
1418
1446
|
f"\nRecommend fixes are to train a new model using the latest 'ultralytics' package or to "
|
|
1419
|
-
f"run a command with an official Ultralytics model, i.e. 'yolo predict model=
|
|
1447
|
+
f"run a command with an official Ultralytics model, i.e. 'yolo predict model=yolo26n.pt'"
|
|
1420
1448
|
)
|
|
1421
1449
|
) from e
|
|
1422
1450
|
elif e.name == "numpy._core":
|
|
@@ -1429,7 +1457,7 @@ def torch_safe_load(weight, safe_only=False):
|
|
|
1429
1457
|
f"{weight} appears to require '{e.name}', which is not in Ultralytics requirements."
|
|
1430
1458
|
f"\nAutoInstall will run now for '{e.name}' but this feature will be removed in the future."
|
|
1431
1459
|
f"\nRecommend fixes are to train a new model using the latest 'ultralytics' package or to "
|
|
1432
|
-
f"run a command with an official Ultralytics model, i.e. 'yolo predict model=
|
|
1460
|
+
f"run a command with an official Ultralytics model, i.e. 'yolo predict model=yolo26n.pt'"
|
|
1433
1461
|
)
|
|
1434
1462
|
check_requirements(e.name) # install missing module
|
|
1435
1463
|
ckpt = torch_load(file, map_location="cpu")
|
|
@@ -1499,7 +1527,8 @@ def parse_model(d, ch, verbose=True):
|
|
|
1499
1527
|
# Args
|
|
1500
1528
|
legacy = True # backward compatibility for v3/v5/v8/v9 models
|
|
1501
1529
|
max_channels = float("inf")
|
|
1502
|
-
nc, act, scales = (d.get(x) for x in ("nc", "activation", "scales"))
|
|
1530
|
+
nc, act, scales, end2end = (d.get(x) for x in ("nc", "activation", "scales", "end2end"))
|
|
1531
|
+
reg_max = d.get("reg_max", 16)
|
|
1503
1532
|
depth, width, kpt_shape = (d.get(x, 1.0) for x in ("depth_multiple", "width_multiple", "kpt_shape"))
|
|
1504
1533
|
scale = d.get("scale")
|
|
1505
1534
|
if scales:
|
|
@@ -1624,13 +1653,29 @@ def parse_model(d, ch, verbose=True):
|
|
|
1624
1653
|
elif m is Concat:
|
|
1625
1654
|
c2 = sum(ch[x] for x in f)
|
|
1626
1655
|
elif m in frozenset(
|
|
1627
|
-
{
|
|
1656
|
+
{
|
|
1657
|
+
Detect,
|
|
1658
|
+
WorldDetect,
|
|
1659
|
+
YOLOEDetect,
|
|
1660
|
+
Segment,
|
|
1661
|
+
Segment26,
|
|
1662
|
+
YOLOESegment,
|
|
1663
|
+
YOLOESegment26,
|
|
1664
|
+
Pose,
|
|
1665
|
+
Pose26,
|
|
1666
|
+
OBB,
|
|
1667
|
+
OBB26,
|
|
1668
|
+
}
|
|
1628
1669
|
):
|
|
1629
|
-
args.
|
|
1630
|
-
if m is Segment or m is YOLOESegment:
|
|
1670
|
+
args.extend([reg_max, end2end, [ch[x] for x in f]])
|
|
1671
|
+
if m is Segment or m is YOLOESegment or m is Segment26 or m is YOLOESegment26:
|
|
1631
1672
|
args[2] = make_divisible(min(args[2], max_channels) * width, 8)
|
|
1632
|
-
if m in {Detect, YOLOEDetect, Segment, YOLOESegment, Pose, OBB}:
|
|
1673
|
+
if m in {Detect, YOLOEDetect, Segment, Segment26, YOLOESegment, YOLOESegment26, Pose, Pose26, OBB, OBB26}:
|
|
1633
1674
|
m.legacy = legacy
|
|
1675
|
+
elif m is v10Detect:
|
|
1676
|
+
args.append([ch[x] for x in f])
|
|
1677
|
+
elif m is ImagePoolingAttn:
|
|
1678
|
+
args.insert(1, [ch[x] for x in f]) # channels as second arg
|
|
1634
1679
|
elif m is RTDETRDecoder: # special case, channels arg must be passed in index 1
|
|
1635
1680
|
args.insert(1, [ch[x] for x in f])
|
|
1636
1681
|
elif m is CBLinear:
|
|
@@ -1717,9 +1762,9 @@ def guess_model_task(model):
|
|
|
1717
1762
|
return "detect"
|
|
1718
1763
|
if "segment" in m:
|
|
1719
1764
|
return "segment"
|
|
1720
|
-
if
|
|
1765
|
+
if "pose" in m:
|
|
1721
1766
|
return "pose"
|
|
1722
|
-
if
|
|
1767
|
+
if "obb" in m:
|
|
1723
1768
|
return "obb"
|
|
1724
1769
|
|
|
1725
1770
|
# Guess from model cfg
|
ultralytics/nn/text_model.py
CHANGED
|
@@ -275,7 +275,7 @@ class MobileCLIPTS(TextModel):
|
|
|
275
275
|
>>> features = text_encoder.encode_text(tokens)
|
|
276
276
|
"""
|
|
277
277
|
|
|
278
|
-
def __init__(self, device: torch.device):
|
|
278
|
+
def __init__(self, device: torch.device, weight: str = "mobileclip_blt.ts"):
|
|
279
279
|
"""Initialize the MobileCLIP TorchScript text encoder.
|
|
280
280
|
|
|
281
281
|
This class implements the TextModel interface using Apple's MobileCLIP model in TorchScript format for efficient
|
|
@@ -283,11 +283,12 @@ class MobileCLIPTS(TextModel):
|
|
|
283
283
|
|
|
284
284
|
Args:
|
|
285
285
|
device (torch.device): Device to load the model on.
|
|
286
|
+
weight (str): Path to the TorchScript model weights.
|
|
286
287
|
"""
|
|
287
288
|
super().__init__()
|
|
288
289
|
from ultralytics.utils.downloads import attempt_download_asset
|
|
289
290
|
|
|
290
|
-
self.encoder = torch.jit.load(attempt_download_asset(
|
|
291
|
+
self.encoder = torch.jit.load(attempt_download_asset(weight), map_location=device)
|
|
291
292
|
self.tokenizer = clip.clip.tokenize
|
|
292
293
|
self.device = device
|
|
293
294
|
|
|
@@ -352,5 +353,7 @@ def build_text_model(variant: str, device: torch.device = None) -> TextModel:
|
|
|
352
353
|
return CLIP(size, device)
|
|
353
354
|
elif base == "mobileclip":
|
|
354
355
|
return MobileCLIPTS(device)
|
|
356
|
+
elif base == "mobileclip2":
|
|
357
|
+
return MobileCLIPTS(device, weight="mobileclip2_b.ts")
|
|
355
358
|
else:
|
|
356
359
|
raise ValueError(f"Unrecognized base model: '{base}'. Supported base models: 'clip', 'mobileclip'.")
|