dgenerate-ultralytics-headless 8.3.134__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dgenerate_ultralytics_headless-8.3.134.dist-info/METADATA +400 -0
- dgenerate_ultralytics_headless-8.3.134.dist-info/RECORD +272 -0
- dgenerate_ultralytics_headless-8.3.134.dist-info/WHEEL +5 -0
- dgenerate_ultralytics_headless-8.3.134.dist-info/entry_points.txt +3 -0
- dgenerate_ultralytics_headless-8.3.134.dist-info/licenses/LICENSE +661 -0
- dgenerate_ultralytics_headless-8.3.134.dist-info/top_level.txt +1 -0
- tests/__init__.py +22 -0
- tests/conftest.py +83 -0
- tests/test_cli.py +138 -0
- tests/test_cuda.py +215 -0
- tests/test_engine.py +131 -0
- tests/test_exports.py +236 -0
- tests/test_integrations.py +154 -0
- tests/test_python.py +694 -0
- tests/test_solutions.py +187 -0
- ultralytics/__init__.py +30 -0
- ultralytics/assets/bus.jpg +0 -0
- ultralytics/assets/zidane.jpg +0 -0
- ultralytics/cfg/__init__.py +1023 -0
- ultralytics/cfg/datasets/Argoverse.yaml +77 -0
- ultralytics/cfg/datasets/DOTAv1.5.yaml +37 -0
- ultralytics/cfg/datasets/DOTAv1.yaml +36 -0
- ultralytics/cfg/datasets/GlobalWheat2020.yaml +68 -0
- ultralytics/cfg/datasets/HomeObjects-3K.yaml +33 -0
- ultralytics/cfg/datasets/ImageNet.yaml +2025 -0
- ultralytics/cfg/datasets/Objects365.yaml +443 -0
- ultralytics/cfg/datasets/SKU-110K.yaml +58 -0
- ultralytics/cfg/datasets/VOC.yaml +106 -0
- ultralytics/cfg/datasets/VisDrone.yaml +77 -0
- ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
- ultralytics/cfg/datasets/brain-tumor.yaml +23 -0
- ultralytics/cfg/datasets/carparts-seg.yaml +44 -0
- ultralytics/cfg/datasets/coco-pose.yaml +42 -0
- ultralytics/cfg/datasets/coco.yaml +118 -0
- ultralytics/cfg/datasets/coco128-seg.yaml +101 -0
- ultralytics/cfg/datasets/coco128.yaml +101 -0
- ultralytics/cfg/datasets/coco8-multispectral.yaml +104 -0
- ultralytics/cfg/datasets/coco8-pose.yaml +26 -0
- ultralytics/cfg/datasets/coco8-seg.yaml +101 -0
- ultralytics/cfg/datasets/coco8.yaml +101 -0
- ultralytics/cfg/datasets/crack-seg.yaml +22 -0
- ultralytics/cfg/datasets/dog-pose.yaml +24 -0
- ultralytics/cfg/datasets/dota8-multispectral.yaml +38 -0
- ultralytics/cfg/datasets/dota8.yaml +35 -0
- ultralytics/cfg/datasets/hand-keypoints.yaml +26 -0
- ultralytics/cfg/datasets/lvis.yaml +1240 -0
- ultralytics/cfg/datasets/medical-pills.yaml +22 -0
- ultralytics/cfg/datasets/open-images-v7.yaml +666 -0
- ultralytics/cfg/datasets/package-seg.yaml +22 -0
- ultralytics/cfg/datasets/signature.yaml +21 -0
- ultralytics/cfg/datasets/tiger-pose.yaml +25 -0
- ultralytics/cfg/datasets/xView.yaml +155 -0
- ultralytics/cfg/default.yaml +127 -0
- ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +17 -0
- ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
- ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
- ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
- ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
- ultralytics/cfg/models/11/yolo11.yaml +50 -0
- ultralytics/cfg/models/11/yoloe-11-seg.yaml +48 -0
- ultralytics/cfg/models/11/yoloe-11.yaml +48 -0
- ultralytics/cfg/models/12/yolo12-cls.yaml +32 -0
- ultralytics/cfg/models/12/yolo12-obb.yaml +48 -0
- ultralytics/cfg/models/12/yolo12-pose.yaml +49 -0
- ultralytics/cfg/models/12/yolo12-seg.yaml +48 -0
- ultralytics/cfg/models/12/yolo12.yaml +48 -0
- ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +53 -0
- ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +45 -0
- ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +45 -0
- ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +57 -0
- ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
- ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
- ultralytics/cfg/models/v3/yolov3-spp.yaml +49 -0
- ultralytics/cfg/models/v3/yolov3-tiny.yaml +40 -0
- ultralytics/cfg/models/v3/yolov3.yaml +49 -0
- ultralytics/cfg/models/v5/yolov5-p6.yaml +62 -0
- ultralytics/cfg/models/v5/yolov5.yaml +51 -0
- ultralytics/cfg/models/v6/yolov6.yaml +56 -0
- ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +45 -0
- ultralytics/cfg/models/v8/yoloe-v8.yaml +45 -0
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +28 -0
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +28 -0
- ultralytics/cfg/models/v8/yolov8-cls.yaml +32 -0
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +58 -0
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +60 -0
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +50 -0
- ultralytics/cfg/models/v8/yolov8-obb.yaml +49 -0
- ultralytics/cfg/models/v8/yolov8-p2.yaml +57 -0
- ultralytics/cfg/models/v8/yolov8-p6.yaml +59 -0
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +60 -0
- ultralytics/cfg/models/v8/yolov8-pose.yaml +50 -0
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +49 -0
- ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +59 -0
- ultralytics/cfg/models/v8/yolov8-seg.yaml +49 -0
- ultralytics/cfg/models/v8/yolov8-world.yaml +51 -0
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +49 -0
- ultralytics/cfg/models/v8/yolov8.yaml +49 -0
- ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9c.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
- ultralytics/cfg/models/v9/yolov9e.yaml +64 -0
- ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
- ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
- ultralytics/cfg/trackers/botsort.yaml +22 -0
- ultralytics/cfg/trackers/bytetrack.yaml +14 -0
- ultralytics/data/__init__.py +26 -0
- ultralytics/data/annotator.py +66 -0
- ultralytics/data/augment.py +2945 -0
- ultralytics/data/base.py +438 -0
- ultralytics/data/build.py +258 -0
- ultralytics/data/converter.py +754 -0
- ultralytics/data/dataset.py +834 -0
- ultralytics/data/loaders.py +676 -0
- ultralytics/data/scripts/download_weights.sh +18 -0
- ultralytics/data/scripts/get_coco.sh +61 -0
- ultralytics/data/scripts/get_coco128.sh +18 -0
- ultralytics/data/scripts/get_imagenet.sh +52 -0
- ultralytics/data/split.py +125 -0
- ultralytics/data/split_dota.py +325 -0
- ultralytics/data/utils.py +777 -0
- ultralytics/engine/__init__.py +1 -0
- ultralytics/engine/exporter.py +1519 -0
- ultralytics/engine/model.py +1156 -0
- ultralytics/engine/predictor.py +502 -0
- ultralytics/engine/results.py +1840 -0
- ultralytics/engine/trainer.py +853 -0
- ultralytics/engine/tuner.py +243 -0
- ultralytics/engine/validator.py +377 -0
- ultralytics/hub/__init__.py +168 -0
- ultralytics/hub/auth.py +137 -0
- ultralytics/hub/google/__init__.py +176 -0
- ultralytics/hub/session.py +446 -0
- ultralytics/hub/utils.py +248 -0
- ultralytics/models/__init__.py +9 -0
- ultralytics/models/fastsam/__init__.py +7 -0
- ultralytics/models/fastsam/model.py +61 -0
- ultralytics/models/fastsam/predict.py +181 -0
- ultralytics/models/fastsam/utils.py +24 -0
- ultralytics/models/fastsam/val.py +40 -0
- ultralytics/models/nas/__init__.py +7 -0
- ultralytics/models/nas/model.py +102 -0
- ultralytics/models/nas/predict.py +58 -0
- ultralytics/models/nas/val.py +39 -0
- ultralytics/models/rtdetr/__init__.py +7 -0
- ultralytics/models/rtdetr/model.py +63 -0
- ultralytics/models/rtdetr/predict.py +84 -0
- ultralytics/models/rtdetr/train.py +85 -0
- ultralytics/models/rtdetr/val.py +191 -0
- ultralytics/models/sam/__init__.py +6 -0
- ultralytics/models/sam/amg.py +260 -0
- ultralytics/models/sam/build.py +358 -0
- ultralytics/models/sam/model.py +170 -0
- ultralytics/models/sam/modules/__init__.py +1 -0
- ultralytics/models/sam/modules/blocks.py +1129 -0
- ultralytics/models/sam/modules/decoders.py +515 -0
- ultralytics/models/sam/modules/encoders.py +854 -0
- ultralytics/models/sam/modules/memory_attention.py +299 -0
- ultralytics/models/sam/modules/sam.py +1006 -0
- ultralytics/models/sam/modules/tiny_encoder.py +1002 -0
- ultralytics/models/sam/modules/transformer.py +351 -0
- ultralytics/models/sam/modules/utils.py +394 -0
- ultralytics/models/sam/predict.py +1605 -0
- ultralytics/models/utils/__init__.py +1 -0
- ultralytics/models/utils/loss.py +455 -0
- ultralytics/models/utils/ops.py +268 -0
- ultralytics/models/yolo/__init__.py +7 -0
- ultralytics/models/yolo/classify/__init__.py +7 -0
- ultralytics/models/yolo/classify/predict.py +88 -0
- ultralytics/models/yolo/classify/train.py +233 -0
- ultralytics/models/yolo/classify/val.py +215 -0
- ultralytics/models/yolo/detect/__init__.py +7 -0
- ultralytics/models/yolo/detect/predict.py +124 -0
- ultralytics/models/yolo/detect/train.py +217 -0
- ultralytics/models/yolo/detect/val.py +451 -0
- ultralytics/models/yolo/model.py +354 -0
- ultralytics/models/yolo/obb/__init__.py +7 -0
- ultralytics/models/yolo/obb/predict.py +66 -0
- ultralytics/models/yolo/obb/train.py +81 -0
- ultralytics/models/yolo/obb/val.py +283 -0
- ultralytics/models/yolo/pose/__init__.py +7 -0
- ultralytics/models/yolo/pose/predict.py +79 -0
- ultralytics/models/yolo/pose/train.py +154 -0
- ultralytics/models/yolo/pose/val.py +394 -0
- ultralytics/models/yolo/segment/__init__.py +7 -0
- ultralytics/models/yolo/segment/predict.py +113 -0
- ultralytics/models/yolo/segment/train.py +123 -0
- ultralytics/models/yolo/segment/val.py +428 -0
- ultralytics/models/yolo/world/__init__.py +5 -0
- ultralytics/models/yolo/world/train.py +119 -0
- ultralytics/models/yolo/world/train_world.py +176 -0
- ultralytics/models/yolo/yoloe/__init__.py +22 -0
- ultralytics/models/yolo/yoloe/predict.py +169 -0
- ultralytics/models/yolo/yoloe/train.py +298 -0
- ultralytics/models/yolo/yoloe/train_seg.py +124 -0
- ultralytics/models/yolo/yoloe/val.py +191 -0
- ultralytics/nn/__init__.py +29 -0
- ultralytics/nn/autobackend.py +842 -0
- ultralytics/nn/modules/__init__.py +182 -0
- ultralytics/nn/modules/activation.py +53 -0
- ultralytics/nn/modules/block.py +1966 -0
- ultralytics/nn/modules/conv.py +712 -0
- ultralytics/nn/modules/head.py +880 -0
- ultralytics/nn/modules/transformer.py +713 -0
- ultralytics/nn/modules/utils.py +164 -0
- ultralytics/nn/tasks.py +1627 -0
- ultralytics/nn/text_model.py +351 -0
- ultralytics/solutions/__init__.py +41 -0
- ultralytics/solutions/ai_gym.py +116 -0
- ultralytics/solutions/analytics.py +252 -0
- ultralytics/solutions/config.py +106 -0
- ultralytics/solutions/distance_calculation.py +124 -0
- ultralytics/solutions/heatmap.py +127 -0
- ultralytics/solutions/instance_segmentation.py +84 -0
- ultralytics/solutions/object_blurrer.py +90 -0
- ultralytics/solutions/object_counter.py +195 -0
- ultralytics/solutions/object_cropper.py +84 -0
- ultralytics/solutions/parking_management.py +273 -0
- ultralytics/solutions/queue_management.py +93 -0
- ultralytics/solutions/region_counter.py +120 -0
- ultralytics/solutions/security_alarm.py +154 -0
- ultralytics/solutions/similarity_search.py +172 -0
- ultralytics/solutions/solutions.py +724 -0
- ultralytics/solutions/speed_estimation.py +110 -0
- ultralytics/solutions/streamlit_inference.py +196 -0
- ultralytics/solutions/templates/similarity-search.html +160 -0
- ultralytics/solutions/trackzone.py +88 -0
- ultralytics/solutions/vision_eye.py +68 -0
- ultralytics/trackers/__init__.py +7 -0
- ultralytics/trackers/basetrack.py +124 -0
- ultralytics/trackers/bot_sort.py +260 -0
- ultralytics/trackers/byte_tracker.py +480 -0
- ultralytics/trackers/track.py +125 -0
- ultralytics/trackers/utils/__init__.py +1 -0
- ultralytics/trackers/utils/gmc.py +376 -0
- ultralytics/trackers/utils/kalman_filter.py +493 -0
- ultralytics/trackers/utils/matching.py +157 -0
- ultralytics/utils/__init__.py +1435 -0
- ultralytics/utils/autobatch.py +106 -0
- ultralytics/utils/autodevice.py +174 -0
- ultralytics/utils/benchmarks.py +695 -0
- ultralytics/utils/callbacks/__init__.py +5 -0
- ultralytics/utils/callbacks/base.py +234 -0
- ultralytics/utils/callbacks/clearml.py +153 -0
- ultralytics/utils/callbacks/comet.py +552 -0
- ultralytics/utils/callbacks/dvc.py +205 -0
- ultralytics/utils/callbacks/hub.py +108 -0
- ultralytics/utils/callbacks/mlflow.py +138 -0
- ultralytics/utils/callbacks/neptune.py +140 -0
- ultralytics/utils/callbacks/raytune.py +43 -0
- ultralytics/utils/callbacks/tensorboard.py +132 -0
- ultralytics/utils/callbacks/wb.py +185 -0
- ultralytics/utils/checks.py +897 -0
- ultralytics/utils/dist.py +119 -0
- ultralytics/utils/downloads.py +499 -0
- ultralytics/utils/errors.py +43 -0
- ultralytics/utils/export.py +219 -0
- ultralytics/utils/files.py +221 -0
- ultralytics/utils/instance.py +499 -0
- ultralytics/utils/loss.py +813 -0
- ultralytics/utils/metrics.py +1356 -0
- ultralytics/utils/ops.py +885 -0
- ultralytics/utils/patches.py +143 -0
- ultralytics/utils/plotting.py +1011 -0
- ultralytics/utils/tal.py +416 -0
- ultralytics/utils/torch_utils.py +990 -0
- ultralytics/utils/triton.py +116 -0
- ultralytics/utils/tuner.py +159 -0
@@ -0,0 +1,351 @@
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
|
+
|
3
|
+
from abc import abstractmethod
|
4
|
+
from pathlib import Path
|
5
|
+
|
6
|
+
import torch
|
7
|
+
import torch.nn as nn
|
8
|
+
|
9
|
+
from ultralytics.utils import checks
|
10
|
+
from ultralytics.utils.torch_utils import smart_inference_mode
|
11
|
+
|
12
|
+
try:
|
13
|
+
import clip
|
14
|
+
except ImportError:
|
15
|
+
checks.check_requirements("git+https://github.com/ultralytics/CLIP.git")
|
16
|
+
import clip
|
17
|
+
|
18
|
+
|
19
|
+
class TextModel(nn.Module):
|
20
|
+
"""
|
21
|
+
Abstract base class for text encoding models.
|
22
|
+
|
23
|
+
This class defines the interface for text encoding models used in vision-language tasks. Subclasses must implement
|
24
|
+
the tokenize and encode_text methods.
|
25
|
+
|
26
|
+
Methods:
|
27
|
+
tokenize: Convert input texts to tokens.
|
28
|
+
encode_text: Encode tokenized texts into feature vectors.
|
29
|
+
"""
|
30
|
+
|
31
|
+
def __init__(self):
|
32
|
+
"""Initialize the TextModel base class."""
|
33
|
+
super().__init__()
|
34
|
+
|
35
|
+
@abstractmethod
|
36
|
+
def tokenize(texts):
|
37
|
+
"""Convert input texts to tokens for model processing."""
|
38
|
+
pass
|
39
|
+
|
40
|
+
@abstractmethod
|
41
|
+
def encode_text(texts, dtype):
|
42
|
+
"""Encode tokenized texts into normalized feature vectors."""
|
43
|
+
pass
|
44
|
+
|
45
|
+
|
46
|
+
class CLIP(TextModel):
|
47
|
+
"""
|
48
|
+
Implements OpenAI's CLIP (Contrastive Language-Image Pre-training) text encoder.
|
49
|
+
|
50
|
+
This class provides a text encoder based on OpenAI's CLIP model, which can convert text into feature vectors
|
51
|
+
that are aligned with corresponding image features in a shared embedding space.
|
52
|
+
|
53
|
+
Attributes:
|
54
|
+
model (clip.model.CLIP): The loaded CLIP model.
|
55
|
+
device (torch.device): Device where the model is loaded.
|
56
|
+
|
57
|
+
Methods:
|
58
|
+
tokenize: Convert input texts to CLIP tokens.
|
59
|
+
encode_text: Encode tokenized texts into normalized feature vectors.
|
60
|
+
|
61
|
+
Examples:
|
62
|
+
>>> from ultralytics.models.sam import CLIP
|
63
|
+
>>> import torch
|
64
|
+
>>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
65
|
+
>>> clip_model = CLIP(size="ViT-B/32", device=device)
|
66
|
+
>>> tokens = clip_model.tokenize(["a photo of a cat", "a photo of a dog"])
|
67
|
+
>>> text_features = clip_model.encode_text(tokens)
|
68
|
+
>>> print(text_features.shape)
|
69
|
+
"""
|
70
|
+
|
71
|
+
def __init__(self, size, device):
|
72
|
+
"""
|
73
|
+
Initialize the CLIP text encoder.
|
74
|
+
|
75
|
+
This class implements the TextModel interface using OpenAI's CLIP model for text encoding. It loads
|
76
|
+
a pre-trained CLIP model of the specified size and prepares it for text encoding tasks.
|
77
|
+
|
78
|
+
Args:
|
79
|
+
size (str): Model size identifier (e.g., 'ViT-B/32').
|
80
|
+
device (torch.device): Device to load the model on.
|
81
|
+
|
82
|
+
Examples:
|
83
|
+
>>> import torch
|
84
|
+
>>> from ultralytics.models.sam.modules.clip import CLIP
|
85
|
+
>>> clip_model = CLIP("ViT-B/32", device=torch.device("cuda:0"))
|
86
|
+
>>> text_features = clip_model.encode_text(["a photo of a cat", "a photo of a dog"])
|
87
|
+
"""
|
88
|
+
super().__init__()
|
89
|
+
self.model = clip.load(size, device=device)[0]
|
90
|
+
self.to(device)
|
91
|
+
self.device = device
|
92
|
+
self.eval()
|
93
|
+
|
94
|
+
def tokenize(self, texts):
|
95
|
+
"""
|
96
|
+
Convert input texts to CLIP tokens.
|
97
|
+
|
98
|
+
Args:
|
99
|
+
texts (str | List[str]): Input text or list of texts to tokenize.
|
100
|
+
|
101
|
+
Returns:
|
102
|
+
(torch.Tensor): Tokenized text tensor with shape (batch_size, context_length) ready for model processing.
|
103
|
+
|
104
|
+
Examples:
|
105
|
+
>>> model = CLIP("ViT-B/32", device="cpu")
|
106
|
+
>>> tokens = model.tokenize("a photo of a cat")
|
107
|
+
>>> print(tokens.shape) # torch.Size([1, 77])
|
108
|
+
"""
|
109
|
+
return clip.tokenize(texts).to(self.device)
|
110
|
+
|
111
|
+
@smart_inference_mode()
|
112
|
+
def encode_text(self, texts, dtype=torch.float32):
|
113
|
+
"""
|
114
|
+
Encode tokenized texts into normalized feature vectors.
|
115
|
+
|
116
|
+
This method processes tokenized text inputs through the CLIP model to generate feature vectors, which are then
|
117
|
+
normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
|
118
|
+
|
119
|
+
Args:
|
120
|
+
texts (torch.Tensor): Tokenized text inputs, typically created using the tokenize() method.
|
121
|
+
dtype (torch.dtype, optional): Data type for output features. Default is torch.float32.
|
122
|
+
|
123
|
+
Returns:
|
124
|
+
(torch.Tensor): Normalized text feature vectors with unit length (L2 norm = 1).
|
125
|
+
|
126
|
+
Examples:
|
127
|
+
>>> clip_model = CLIP("ViT-B/32", device="cuda")
|
128
|
+
>>> tokens = clip_model.tokenize(["a photo of a cat", "a photo of a dog"])
|
129
|
+
>>> features = clip_model.encode_text(tokens)
|
130
|
+
>>> features.shape
|
131
|
+
torch.Size([2, 512])
|
132
|
+
"""
|
133
|
+
txt_feats = self.model.encode_text(texts).to(dtype)
|
134
|
+
txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
|
135
|
+
return txt_feats
|
136
|
+
|
137
|
+
|
138
|
+
class MobileCLIP(TextModel):
|
139
|
+
"""
|
140
|
+
Implement Apple's MobileCLIP text encoder for efficient text encoding.
|
141
|
+
|
142
|
+
This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
|
143
|
+
capabilities for vision-language tasks.
|
144
|
+
|
145
|
+
Attributes:
|
146
|
+
model (mobileclip.model.MobileCLIP): The loaded MobileCLIP model.
|
147
|
+
tokenizer (callable): Tokenizer function for processing text inputs.
|
148
|
+
device (torch.device): Device where the model is loaded.
|
149
|
+
config_size_map (dict): Mapping from size identifiers to model configuration names.
|
150
|
+
|
151
|
+
Methods:
|
152
|
+
tokenize: Convert input texts to MobileCLIP tokens.
|
153
|
+
encode_text: Encode tokenized texts into normalized feature vectors.
|
154
|
+
|
155
|
+
Examples:
|
156
|
+
>>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
157
|
+
>>> text_encoder = MobileCLIP(size="s0", device=device)
|
158
|
+
>>> tokens = text_encoder.tokenize(["a photo of a cat", "a photo of a dog"])
|
159
|
+
>>> features = text_encoder.encode_text(tokens)
|
160
|
+
"""
|
161
|
+
|
162
|
+
config_size_map = {"s0": "s0", "s1": "s1", "s2": "s2", "b": "b", "blt": "b"}
|
163
|
+
|
164
|
+
def __init__(self, size, device):
|
165
|
+
"""
|
166
|
+
Initialize the MobileCLIP text encoder.
|
167
|
+
|
168
|
+
This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
|
169
|
+
|
170
|
+
Args:
|
171
|
+
size (str): Model size identifier (e.g., 's0', 's1', 's2', 'b', 'blt').
|
172
|
+
device (torch.device): Device to load the model on.
|
173
|
+
|
174
|
+
Examples:
|
175
|
+
>>> from ultralytics.nn.modules import MobileCLIP
|
176
|
+
>>> import torch
|
177
|
+
>>> model = MobileCLIP("s0", device=torch.device("cpu"))
|
178
|
+
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
179
|
+
>>> features = model.encode_text(tokens)
|
180
|
+
"""
|
181
|
+
try:
|
182
|
+
import warnings
|
183
|
+
|
184
|
+
# Suppress 'timm.models.layers is deprecated, please import via timm.layers' warning from mobileclip usage
|
185
|
+
with warnings.catch_warnings():
|
186
|
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
187
|
+
import mobileclip
|
188
|
+
except ImportError:
|
189
|
+
# Ultralytics fork preferred since Apple MobileCLIP repo has incorrect version of torchvision
|
190
|
+
checks.check_requirements("git+https://github.com/ultralytics/mobileclip.git")
|
191
|
+
import mobileclip
|
192
|
+
|
193
|
+
super().__init__()
|
194
|
+
config = self.config_size_map[size]
|
195
|
+
file = f"mobileclip_{size}.pt"
|
196
|
+
if not Path(file).is_file():
|
197
|
+
from ultralytics import download
|
198
|
+
|
199
|
+
download(f"https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/{file}")
|
200
|
+
self.model = mobileclip.create_model_and_transforms(f"mobileclip_{config}", pretrained=file, device=device)[0]
|
201
|
+
self.tokenizer = mobileclip.get_tokenizer(f"mobileclip_{config}")
|
202
|
+
self.to(device)
|
203
|
+
self.device = device
|
204
|
+
self.eval()
|
205
|
+
|
206
|
+
def tokenize(self, texts):
|
207
|
+
"""
|
208
|
+
Convert input texts to MobileCLIP tokens.
|
209
|
+
|
210
|
+
Args:
|
211
|
+
texts (list[str]): List of text strings to tokenize.
|
212
|
+
|
213
|
+
Returns:
|
214
|
+
(torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
|
215
|
+
|
216
|
+
Examples:
|
217
|
+
>>> model = MobileCLIP("s0", "cpu")
|
218
|
+
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
219
|
+
"""
|
220
|
+
return self.tokenizer(texts).to(self.device)
|
221
|
+
|
222
|
+
@smart_inference_mode()
|
223
|
+
def encode_text(self, texts, dtype=torch.float32):
|
224
|
+
"""
|
225
|
+
Encode tokenized texts into normalized feature vectors.
|
226
|
+
|
227
|
+
Args:
|
228
|
+
texts (torch.Tensor): Tokenized text inputs.
|
229
|
+
dtype (torch.dtype, optional): Data type for output features.
|
230
|
+
|
231
|
+
Returns:
|
232
|
+
(torch.Tensor): Normalized text feature vectors with L2 normalization applied.
|
233
|
+
|
234
|
+
Examples:
|
235
|
+
>>> model = MobileCLIP("s0", device="cpu")
|
236
|
+
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
237
|
+
>>> features = model.encode_text(tokens)
|
238
|
+
>>> features.shape
|
239
|
+
torch.Size([2, 512]) # Actual dimension depends on model size
|
240
|
+
"""
|
241
|
+
text_features = self.model.encode_text(texts).to(dtype)
|
242
|
+
text_features /= text_features.norm(p=2, dim=-1, keepdim=True)
|
243
|
+
return text_features
|
244
|
+
|
245
|
+
|
246
|
+
class MobileCLIPTS(TextModel):
|
247
|
+
"""
|
248
|
+
Load a TorchScript traced version of MobileCLIP.
|
249
|
+
|
250
|
+
This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
|
251
|
+
capabilities for vision-language tasks.
|
252
|
+
|
253
|
+
Attributes:
|
254
|
+
encoder (mobileclip.model.MobileCLIP): The loaded MobileCLIP text encoder.
|
255
|
+
tokenizer (callable): Tokenizer function for processing text inputs.
|
256
|
+
device (torch.device): Device where the model is loaded.
|
257
|
+
|
258
|
+
Methods:
|
259
|
+
tokenize: Convert input texts to MobileCLIP tokens.
|
260
|
+
encode_text: Encode tokenized texts into normalized feature vectors.
|
261
|
+
|
262
|
+
Examples:
|
263
|
+
>>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
264
|
+
>>> text_encoder = MobileCLIP(device=device)
|
265
|
+
>>> tokens = text_encoder.tokenize(["a photo of a cat", "a photo of a dog"])
|
266
|
+
>>> features = text_encoder.encode_text(tokens)
|
267
|
+
"""
|
268
|
+
|
269
|
+
def __init__(self, device):
|
270
|
+
"""
|
271
|
+
Initialize the MobileCLIP text encoder.
|
272
|
+
|
273
|
+
This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
|
274
|
+
|
275
|
+
Args:
|
276
|
+
device (torch.device): Device to load the model on.
|
277
|
+
|
278
|
+
Examples:
|
279
|
+
>>> from ultralytics.nn.modules import MobileCLIP
|
280
|
+
>>> import torch
|
281
|
+
>>> model = MobileCLIP(device=torch.device("cpu"))
|
282
|
+
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
283
|
+
>>> features = model.encode_text(tokens)
|
284
|
+
"""
|
285
|
+
super().__init__()
|
286
|
+
from ultralytics.utils.downloads import attempt_download_asset
|
287
|
+
|
288
|
+
self.encoder = torch.jit.load(attempt_download_asset("mobileclip_blt.ts"), map_location=device)
|
289
|
+
self.tokenizer = clip.clip.tokenize
|
290
|
+
self.device = device
|
291
|
+
|
292
|
+
def tokenize(self, texts):
|
293
|
+
"""
|
294
|
+
Convert input texts to MobileCLIP tokens.
|
295
|
+
|
296
|
+
Args:
|
297
|
+
texts (list[str]): List of text strings to tokenize.
|
298
|
+
|
299
|
+
Returns:
|
300
|
+
(torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
|
301
|
+
|
302
|
+
Examples:
|
303
|
+
>>> model = MobileCLIP("cpu")
|
304
|
+
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
305
|
+
"""
|
306
|
+
return self.tokenizer(texts).to(self.device)
|
307
|
+
|
308
|
+
@smart_inference_mode()
|
309
|
+
def encode_text(self, texts, dtype=torch.float32):
|
310
|
+
"""
|
311
|
+
Encode tokenized texts into normalized feature vectors.
|
312
|
+
|
313
|
+
Args:
|
314
|
+
texts (torch.Tensor): Tokenized text inputs.
|
315
|
+
dtype (torch.dtype, optional): Data type for output features.
|
316
|
+
|
317
|
+
Returns:
|
318
|
+
(torch.Tensor): Normalized text feature vectors with L2 normalization applied.
|
319
|
+
|
320
|
+
Examples:
|
321
|
+
>>> model = MobileCLIP(device="cpu")
|
322
|
+
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
323
|
+
>>> features = model.encode_text(tokens)
|
324
|
+
>>> features.shape
|
325
|
+
torch.Size([2, 512]) # Actual dimension depends on model size
|
326
|
+
"""
|
327
|
+
return self.encoder(texts)
|
328
|
+
|
329
|
+
|
330
|
+
def build_text_model(variant, device=None):
|
331
|
+
"""
|
332
|
+
Build a text encoding model based on the specified variant.
|
333
|
+
|
334
|
+
Args:
|
335
|
+
variant (str): Model variant in format "base:size" (e.g., "clip:ViT-B/32" or "mobileclip:s0").
|
336
|
+
device (torch.device, optional): Device to load the model on.
|
337
|
+
|
338
|
+
Returns:
|
339
|
+
(TextModel): Instantiated text encoding model.
|
340
|
+
|
341
|
+
Examples:
|
342
|
+
>>> model = build_text_model("clip:ViT-B/32", device=torch.device("cuda"))
|
343
|
+
>>> model = build_text_model("mobileclip:s0", device=torch.device("cpu"))
|
344
|
+
"""
|
345
|
+
base, size = variant.split(":")
|
346
|
+
if base == "clip":
|
347
|
+
return CLIP(size, device)
|
348
|
+
elif base == "mobileclip":
|
349
|
+
return MobileCLIPTS(device)
|
350
|
+
else:
|
351
|
+
raise ValueError(f"Unrecognized base model: '{base}'. Supported base models: 'clip', 'mobileclip'.")
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
|
+
|
3
|
+
from .ai_gym import AIGym
|
4
|
+
from .analytics import Analytics
|
5
|
+
from .distance_calculation import DistanceCalculation
|
6
|
+
from .heatmap import Heatmap
|
7
|
+
from .instance_segmentation import InstanceSegmentation
|
8
|
+
from .object_blurrer import ObjectBlurrer
|
9
|
+
from .object_counter import ObjectCounter
|
10
|
+
from .object_cropper import ObjectCropper
|
11
|
+
from .parking_management import ParkingManagement, ParkingPtsSelection
|
12
|
+
from .queue_management import QueueManager
|
13
|
+
from .region_counter import RegionCounter
|
14
|
+
from .security_alarm import SecurityAlarm
|
15
|
+
from .similarity_search import SearchApp, VisualAISearch
|
16
|
+
from .speed_estimation import SpeedEstimator
|
17
|
+
from .streamlit_inference import Inference
|
18
|
+
from .trackzone import TrackZone
|
19
|
+
from .vision_eye import VisionEye
|
20
|
+
|
21
|
+
__all__ = (
|
22
|
+
"ObjectCounter",
|
23
|
+
"ObjectCropper",
|
24
|
+
"ObjectBlurrer",
|
25
|
+
"AIGym",
|
26
|
+
"RegionCounter",
|
27
|
+
"SecurityAlarm",
|
28
|
+
"Heatmap",
|
29
|
+
"InstanceSegmentation",
|
30
|
+
"VisionEye",
|
31
|
+
"SpeedEstimator",
|
32
|
+
"DistanceCalculation",
|
33
|
+
"QueueManager",
|
34
|
+
"ParkingManagement",
|
35
|
+
"ParkingPtsSelection",
|
36
|
+
"Analytics",
|
37
|
+
"Inference",
|
38
|
+
"TrackZone",
|
39
|
+
"SearchApp",
|
40
|
+
"VisualAISearch",
|
41
|
+
)
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
|
+
|
3
|
+
from collections import defaultdict
|
4
|
+
|
5
|
+
from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
|
6
|
+
|
7
|
+
|
8
|
+
class AIGym(BaseSolution):
|
9
|
+
"""
|
10
|
+
A class to manage gym steps of people in a real-time video stream based on their poses.
|
11
|
+
|
12
|
+
This class extends BaseSolution to monitor workouts using YOLO pose estimation models. It tracks and counts
|
13
|
+
repetitions of exercises based on predefined angle thresholds for up and down positions.
|
14
|
+
|
15
|
+
Attributes:
|
16
|
+
states (Dict[float, int, str]): Stores per-track angle, count, and stage for workout monitoring.
|
17
|
+
up_angle (float): Angle threshold for considering the 'up' position of an exercise.
|
18
|
+
down_angle (float): Angle threshold for considering the 'down' position of an exercise.
|
19
|
+
kpts (List[int]): Indices of keypoints used for angle calculation.
|
20
|
+
|
21
|
+
Methods:
|
22
|
+
process: Processes a frame to detect poses, calculate angles, and count repetitions.
|
23
|
+
|
24
|
+
Examples:
|
25
|
+
>>> gym = AIGym(model="yolo11n-pose.pt")
|
26
|
+
>>> image = cv2.imread("gym_scene.jpg")
|
27
|
+
>>> results = gym.process(image)
|
28
|
+
>>> processed_image = results.plot_im
|
29
|
+
>>> cv2.imshow("Processed Image", processed_image)
|
30
|
+
>>> cv2.waitKey(0)
|
31
|
+
"""
|
32
|
+
|
33
|
+
def __init__(self, **kwargs):
|
34
|
+
"""
|
35
|
+
Initialize AIGym for workout monitoring using pose estimation and predefined angles.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
**kwargs (Any): Keyword arguments passed to the parent class constructor.
|
39
|
+
model (str): Model name or path, defaults to "yolo11n-pose.pt".
|
40
|
+
"""
|
41
|
+
kwargs["model"] = kwargs.get("model", "yolo11n-pose.pt")
|
42
|
+
super().__init__(**kwargs)
|
43
|
+
self.states = defaultdict(lambda: {"angle": 0, "count": 0, "stage": "-"}) # Dict for count, angle and stage
|
44
|
+
|
45
|
+
# Extract details from CFG single time for usage later
|
46
|
+
self.up_angle = float(self.CFG["up_angle"]) # Pose up predefined angle to consider up pose
|
47
|
+
self.down_angle = float(self.CFG["down_angle"]) # Pose down predefined angle to consider down pose
|
48
|
+
self.kpts = self.CFG["kpts"] # User selected kpts of workouts storage for further usage
|
49
|
+
|
50
|
+
def process(self, im0):
|
51
|
+
"""
|
52
|
+
Monitor workouts using Ultralytics YOLO Pose Model.
|
53
|
+
|
54
|
+
This function processes an input image to track and analyze human poses for workout monitoring. It uses
|
55
|
+
the YOLO Pose model to detect keypoints, estimate angles, and count repetitions based on predefined
|
56
|
+
angle thresholds.
|
57
|
+
|
58
|
+
Args:
|
59
|
+
im0 (np.ndarray): Input image for processing.
|
60
|
+
|
61
|
+
Returns:
|
62
|
+
(SolutionResults): Contains processed image `plot_im`,
|
63
|
+
'workout_count' (list of completed reps),
|
64
|
+
'workout_stage' (list of current stages),
|
65
|
+
'workout_angle' (list of angles), and
|
66
|
+
'total_tracks' (total number of tracked individuals).
|
67
|
+
|
68
|
+
Examples:
|
69
|
+
>>> gym = AIGym()
|
70
|
+
>>> image = cv2.imread("workout.jpg")
|
71
|
+
>>> results = gym.process(image)
|
72
|
+
>>> processed_image = results.plot_im
|
73
|
+
"""
|
74
|
+
annotator = SolutionAnnotator(im0, line_width=self.line_width) # Initialize annotator
|
75
|
+
|
76
|
+
self.extract_tracks(im0) # Extract tracks (bounding boxes, classes, and masks)
|
77
|
+
tracks = self.tracks[0]
|
78
|
+
|
79
|
+
if tracks.boxes.id is not None:
|
80
|
+
track_ids = tracks.boxes.id.cpu().tolist()
|
81
|
+
kpt_data = tracks.keypoints.data.cpu() # Avoid repeated .cpu() calls
|
82
|
+
|
83
|
+
for i, k in enumerate(kpt_data):
|
84
|
+
track_id = int(track_ids[i]) # get track id
|
85
|
+
state = self.states[track_id] # get state details
|
86
|
+
# Get keypoints and estimate the angle
|
87
|
+
state["angle"] = annotator.estimate_pose_angle(*[k[int(idx)] for idx in self.kpts])
|
88
|
+
annotator.draw_specific_kpts(k, self.kpts, radius=self.line_width * 3)
|
89
|
+
|
90
|
+
# Determine stage and count logic based on angle thresholds
|
91
|
+
if state["angle"] < self.down_angle:
|
92
|
+
if state["stage"] == "up":
|
93
|
+
state["count"] += 1
|
94
|
+
state["stage"] = "down"
|
95
|
+
elif state["angle"] > self.up_angle:
|
96
|
+
state["stage"] = "up"
|
97
|
+
|
98
|
+
# Display angle, count, and stage text
|
99
|
+
if self.show_labels:
|
100
|
+
annotator.plot_angle_and_count_and_stage(
|
101
|
+
angle_text=state["angle"], # angle text for display
|
102
|
+
count_text=state["count"], # count text for workouts
|
103
|
+
stage_text=state["stage"], # stage position text
|
104
|
+
center_kpt=k[int(self.kpts[1])], # center keypoint for display
|
105
|
+
)
|
106
|
+
plot_im = annotator.result()
|
107
|
+
self.display_output(plot_im) # Display output image, if environment support display
|
108
|
+
|
109
|
+
# Return SolutionResults
|
110
|
+
return SolutionResults(
|
111
|
+
plot_im=plot_im,
|
112
|
+
workout_count=[v["count"] for v in self.states.values()],
|
113
|
+
workout_stage=[v["stage"] for v in self.states.values()],
|
114
|
+
workout_angle=[v["angle"] for v in self.states.values()],
|
115
|
+
total_tracks=len(self.track_ids),
|
116
|
+
)
|