dgenerate-ultralytics-headless 8.3.134__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (272) hide show
  1. dgenerate_ultralytics_headless-8.3.134.dist-info/METADATA +400 -0
  2. dgenerate_ultralytics_headless-8.3.134.dist-info/RECORD +272 -0
  3. dgenerate_ultralytics_headless-8.3.134.dist-info/WHEEL +5 -0
  4. dgenerate_ultralytics_headless-8.3.134.dist-info/entry_points.txt +3 -0
  5. dgenerate_ultralytics_headless-8.3.134.dist-info/licenses/LICENSE +661 -0
  6. dgenerate_ultralytics_headless-8.3.134.dist-info/top_level.txt +1 -0
  7. tests/__init__.py +22 -0
  8. tests/conftest.py +83 -0
  9. tests/test_cli.py +138 -0
  10. tests/test_cuda.py +215 -0
  11. tests/test_engine.py +131 -0
  12. tests/test_exports.py +236 -0
  13. tests/test_integrations.py +154 -0
  14. tests/test_python.py +694 -0
  15. tests/test_solutions.py +187 -0
  16. ultralytics/__init__.py +30 -0
  17. ultralytics/assets/bus.jpg +0 -0
  18. ultralytics/assets/zidane.jpg +0 -0
  19. ultralytics/cfg/__init__.py +1023 -0
  20. ultralytics/cfg/datasets/Argoverse.yaml +77 -0
  21. ultralytics/cfg/datasets/DOTAv1.5.yaml +37 -0
  22. ultralytics/cfg/datasets/DOTAv1.yaml +36 -0
  23. ultralytics/cfg/datasets/GlobalWheat2020.yaml +68 -0
  24. ultralytics/cfg/datasets/HomeObjects-3K.yaml +33 -0
  25. ultralytics/cfg/datasets/ImageNet.yaml +2025 -0
  26. ultralytics/cfg/datasets/Objects365.yaml +443 -0
  27. ultralytics/cfg/datasets/SKU-110K.yaml +58 -0
  28. ultralytics/cfg/datasets/VOC.yaml +106 -0
  29. ultralytics/cfg/datasets/VisDrone.yaml +77 -0
  30. ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
  31. ultralytics/cfg/datasets/brain-tumor.yaml +23 -0
  32. ultralytics/cfg/datasets/carparts-seg.yaml +44 -0
  33. ultralytics/cfg/datasets/coco-pose.yaml +42 -0
  34. ultralytics/cfg/datasets/coco.yaml +118 -0
  35. ultralytics/cfg/datasets/coco128-seg.yaml +101 -0
  36. ultralytics/cfg/datasets/coco128.yaml +101 -0
  37. ultralytics/cfg/datasets/coco8-multispectral.yaml +104 -0
  38. ultralytics/cfg/datasets/coco8-pose.yaml +26 -0
  39. ultralytics/cfg/datasets/coco8-seg.yaml +101 -0
  40. ultralytics/cfg/datasets/coco8.yaml +101 -0
  41. ultralytics/cfg/datasets/crack-seg.yaml +22 -0
  42. ultralytics/cfg/datasets/dog-pose.yaml +24 -0
  43. ultralytics/cfg/datasets/dota8-multispectral.yaml +38 -0
  44. ultralytics/cfg/datasets/dota8.yaml +35 -0
  45. ultralytics/cfg/datasets/hand-keypoints.yaml +26 -0
  46. ultralytics/cfg/datasets/lvis.yaml +1240 -0
  47. ultralytics/cfg/datasets/medical-pills.yaml +22 -0
  48. ultralytics/cfg/datasets/open-images-v7.yaml +666 -0
  49. ultralytics/cfg/datasets/package-seg.yaml +22 -0
  50. ultralytics/cfg/datasets/signature.yaml +21 -0
  51. ultralytics/cfg/datasets/tiger-pose.yaml +25 -0
  52. ultralytics/cfg/datasets/xView.yaml +155 -0
  53. ultralytics/cfg/default.yaml +127 -0
  54. ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +17 -0
  55. ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
  56. ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
  57. ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
  58. ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
  59. ultralytics/cfg/models/11/yolo11.yaml +50 -0
  60. ultralytics/cfg/models/11/yoloe-11-seg.yaml +48 -0
  61. ultralytics/cfg/models/11/yoloe-11.yaml +48 -0
  62. ultralytics/cfg/models/12/yolo12-cls.yaml +32 -0
  63. ultralytics/cfg/models/12/yolo12-obb.yaml +48 -0
  64. ultralytics/cfg/models/12/yolo12-pose.yaml +49 -0
  65. ultralytics/cfg/models/12/yolo12-seg.yaml +48 -0
  66. ultralytics/cfg/models/12/yolo12.yaml +48 -0
  67. ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +53 -0
  68. ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +45 -0
  69. ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +45 -0
  70. ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +57 -0
  71. ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
  72. ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
  73. ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
  74. ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
  75. ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
  76. ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
  77. ultralytics/cfg/models/v3/yolov3-spp.yaml +49 -0
  78. ultralytics/cfg/models/v3/yolov3-tiny.yaml +40 -0
  79. ultralytics/cfg/models/v3/yolov3.yaml +49 -0
  80. ultralytics/cfg/models/v5/yolov5-p6.yaml +62 -0
  81. ultralytics/cfg/models/v5/yolov5.yaml +51 -0
  82. ultralytics/cfg/models/v6/yolov6.yaml +56 -0
  83. ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +45 -0
  84. ultralytics/cfg/models/v8/yoloe-v8.yaml +45 -0
  85. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +28 -0
  86. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +28 -0
  87. ultralytics/cfg/models/v8/yolov8-cls.yaml +32 -0
  88. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +58 -0
  89. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +60 -0
  90. ultralytics/cfg/models/v8/yolov8-ghost.yaml +50 -0
  91. ultralytics/cfg/models/v8/yolov8-obb.yaml +49 -0
  92. ultralytics/cfg/models/v8/yolov8-p2.yaml +57 -0
  93. ultralytics/cfg/models/v8/yolov8-p6.yaml +59 -0
  94. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +60 -0
  95. ultralytics/cfg/models/v8/yolov8-pose.yaml +50 -0
  96. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +49 -0
  97. ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +59 -0
  98. ultralytics/cfg/models/v8/yolov8-seg.yaml +49 -0
  99. ultralytics/cfg/models/v8/yolov8-world.yaml +51 -0
  100. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +49 -0
  101. ultralytics/cfg/models/v8/yolov8.yaml +49 -0
  102. ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
  103. ultralytics/cfg/models/v9/yolov9c.yaml +41 -0
  104. ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
  105. ultralytics/cfg/models/v9/yolov9e.yaml +64 -0
  106. ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
  107. ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
  108. ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
  109. ultralytics/cfg/trackers/botsort.yaml +22 -0
  110. ultralytics/cfg/trackers/bytetrack.yaml +14 -0
  111. ultralytics/data/__init__.py +26 -0
  112. ultralytics/data/annotator.py +66 -0
  113. ultralytics/data/augment.py +2945 -0
  114. ultralytics/data/base.py +438 -0
  115. ultralytics/data/build.py +258 -0
  116. ultralytics/data/converter.py +754 -0
  117. ultralytics/data/dataset.py +834 -0
  118. ultralytics/data/loaders.py +676 -0
  119. ultralytics/data/scripts/download_weights.sh +18 -0
  120. ultralytics/data/scripts/get_coco.sh +61 -0
  121. ultralytics/data/scripts/get_coco128.sh +18 -0
  122. ultralytics/data/scripts/get_imagenet.sh +52 -0
  123. ultralytics/data/split.py +125 -0
  124. ultralytics/data/split_dota.py +325 -0
  125. ultralytics/data/utils.py +777 -0
  126. ultralytics/engine/__init__.py +1 -0
  127. ultralytics/engine/exporter.py +1519 -0
  128. ultralytics/engine/model.py +1156 -0
  129. ultralytics/engine/predictor.py +502 -0
  130. ultralytics/engine/results.py +1840 -0
  131. ultralytics/engine/trainer.py +853 -0
  132. ultralytics/engine/tuner.py +243 -0
  133. ultralytics/engine/validator.py +377 -0
  134. ultralytics/hub/__init__.py +168 -0
  135. ultralytics/hub/auth.py +137 -0
  136. ultralytics/hub/google/__init__.py +176 -0
  137. ultralytics/hub/session.py +446 -0
  138. ultralytics/hub/utils.py +248 -0
  139. ultralytics/models/__init__.py +9 -0
  140. ultralytics/models/fastsam/__init__.py +7 -0
  141. ultralytics/models/fastsam/model.py +61 -0
  142. ultralytics/models/fastsam/predict.py +181 -0
  143. ultralytics/models/fastsam/utils.py +24 -0
  144. ultralytics/models/fastsam/val.py +40 -0
  145. ultralytics/models/nas/__init__.py +7 -0
  146. ultralytics/models/nas/model.py +102 -0
  147. ultralytics/models/nas/predict.py +58 -0
  148. ultralytics/models/nas/val.py +39 -0
  149. ultralytics/models/rtdetr/__init__.py +7 -0
  150. ultralytics/models/rtdetr/model.py +63 -0
  151. ultralytics/models/rtdetr/predict.py +84 -0
  152. ultralytics/models/rtdetr/train.py +85 -0
  153. ultralytics/models/rtdetr/val.py +191 -0
  154. ultralytics/models/sam/__init__.py +6 -0
  155. ultralytics/models/sam/amg.py +260 -0
  156. ultralytics/models/sam/build.py +358 -0
  157. ultralytics/models/sam/model.py +170 -0
  158. ultralytics/models/sam/modules/__init__.py +1 -0
  159. ultralytics/models/sam/modules/blocks.py +1129 -0
  160. ultralytics/models/sam/modules/decoders.py +515 -0
  161. ultralytics/models/sam/modules/encoders.py +854 -0
  162. ultralytics/models/sam/modules/memory_attention.py +299 -0
  163. ultralytics/models/sam/modules/sam.py +1006 -0
  164. ultralytics/models/sam/modules/tiny_encoder.py +1002 -0
  165. ultralytics/models/sam/modules/transformer.py +351 -0
  166. ultralytics/models/sam/modules/utils.py +394 -0
  167. ultralytics/models/sam/predict.py +1605 -0
  168. ultralytics/models/utils/__init__.py +1 -0
  169. ultralytics/models/utils/loss.py +455 -0
  170. ultralytics/models/utils/ops.py +268 -0
  171. ultralytics/models/yolo/__init__.py +7 -0
  172. ultralytics/models/yolo/classify/__init__.py +7 -0
  173. ultralytics/models/yolo/classify/predict.py +88 -0
  174. ultralytics/models/yolo/classify/train.py +233 -0
  175. ultralytics/models/yolo/classify/val.py +215 -0
  176. ultralytics/models/yolo/detect/__init__.py +7 -0
  177. ultralytics/models/yolo/detect/predict.py +124 -0
  178. ultralytics/models/yolo/detect/train.py +217 -0
  179. ultralytics/models/yolo/detect/val.py +451 -0
  180. ultralytics/models/yolo/model.py +354 -0
  181. ultralytics/models/yolo/obb/__init__.py +7 -0
  182. ultralytics/models/yolo/obb/predict.py +66 -0
  183. ultralytics/models/yolo/obb/train.py +81 -0
  184. ultralytics/models/yolo/obb/val.py +283 -0
  185. ultralytics/models/yolo/pose/__init__.py +7 -0
  186. ultralytics/models/yolo/pose/predict.py +79 -0
  187. ultralytics/models/yolo/pose/train.py +154 -0
  188. ultralytics/models/yolo/pose/val.py +394 -0
  189. ultralytics/models/yolo/segment/__init__.py +7 -0
  190. ultralytics/models/yolo/segment/predict.py +113 -0
  191. ultralytics/models/yolo/segment/train.py +123 -0
  192. ultralytics/models/yolo/segment/val.py +428 -0
  193. ultralytics/models/yolo/world/__init__.py +5 -0
  194. ultralytics/models/yolo/world/train.py +119 -0
  195. ultralytics/models/yolo/world/train_world.py +176 -0
  196. ultralytics/models/yolo/yoloe/__init__.py +22 -0
  197. ultralytics/models/yolo/yoloe/predict.py +169 -0
  198. ultralytics/models/yolo/yoloe/train.py +298 -0
  199. ultralytics/models/yolo/yoloe/train_seg.py +124 -0
  200. ultralytics/models/yolo/yoloe/val.py +191 -0
  201. ultralytics/nn/__init__.py +29 -0
  202. ultralytics/nn/autobackend.py +842 -0
  203. ultralytics/nn/modules/__init__.py +182 -0
  204. ultralytics/nn/modules/activation.py +53 -0
  205. ultralytics/nn/modules/block.py +1966 -0
  206. ultralytics/nn/modules/conv.py +712 -0
  207. ultralytics/nn/modules/head.py +880 -0
  208. ultralytics/nn/modules/transformer.py +713 -0
  209. ultralytics/nn/modules/utils.py +164 -0
  210. ultralytics/nn/tasks.py +1627 -0
  211. ultralytics/nn/text_model.py +351 -0
  212. ultralytics/solutions/__init__.py +41 -0
  213. ultralytics/solutions/ai_gym.py +116 -0
  214. ultralytics/solutions/analytics.py +252 -0
  215. ultralytics/solutions/config.py +106 -0
  216. ultralytics/solutions/distance_calculation.py +124 -0
  217. ultralytics/solutions/heatmap.py +127 -0
  218. ultralytics/solutions/instance_segmentation.py +84 -0
  219. ultralytics/solutions/object_blurrer.py +90 -0
  220. ultralytics/solutions/object_counter.py +195 -0
  221. ultralytics/solutions/object_cropper.py +84 -0
  222. ultralytics/solutions/parking_management.py +273 -0
  223. ultralytics/solutions/queue_management.py +93 -0
  224. ultralytics/solutions/region_counter.py +120 -0
  225. ultralytics/solutions/security_alarm.py +154 -0
  226. ultralytics/solutions/similarity_search.py +172 -0
  227. ultralytics/solutions/solutions.py +724 -0
  228. ultralytics/solutions/speed_estimation.py +110 -0
  229. ultralytics/solutions/streamlit_inference.py +196 -0
  230. ultralytics/solutions/templates/similarity-search.html +160 -0
  231. ultralytics/solutions/trackzone.py +88 -0
  232. ultralytics/solutions/vision_eye.py +68 -0
  233. ultralytics/trackers/__init__.py +7 -0
  234. ultralytics/trackers/basetrack.py +124 -0
  235. ultralytics/trackers/bot_sort.py +260 -0
  236. ultralytics/trackers/byte_tracker.py +480 -0
  237. ultralytics/trackers/track.py +125 -0
  238. ultralytics/trackers/utils/__init__.py +1 -0
  239. ultralytics/trackers/utils/gmc.py +376 -0
  240. ultralytics/trackers/utils/kalman_filter.py +493 -0
  241. ultralytics/trackers/utils/matching.py +157 -0
  242. ultralytics/utils/__init__.py +1435 -0
  243. ultralytics/utils/autobatch.py +106 -0
  244. ultralytics/utils/autodevice.py +174 -0
  245. ultralytics/utils/benchmarks.py +695 -0
  246. ultralytics/utils/callbacks/__init__.py +5 -0
  247. ultralytics/utils/callbacks/base.py +234 -0
  248. ultralytics/utils/callbacks/clearml.py +153 -0
  249. ultralytics/utils/callbacks/comet.py +552 -0
  250. ultralytics/utils/callbacks/dvc.py +205 -0
  251. ultralytics/utils/callbacks/hub.py +108 -0
  252. ultralytics/utils/callbacks/mlflow.py +138 -0
  253. ultralytics/utils/callbacks/neptune.py +140 -0
  254. ultralytics/utils/callbacks/raytune.py +43 -0
  255. ultralytics/utils/callbacks/tensorboard.py +132 -0
  256. ultralytics/utils/callbacks/wb.py +185 -0
  257. ultralytics/utils/checks.py +897 -0
  258. ultralytics/utils/dist.py +119 -0
  259. ultralytics/utils/downloads.py +499 -0
  260. ultralytics/utils/errors.py +43 -0
  261. ultralytics/utils/export.py +219 -0
  262. ultralytics/utils/files.py +221 -0
  263. ultralytics/utils/instance.py +499 -0
  264. ultralytics/utils/loss.py +813 -0
  265. ultralytics/utils/metrics.py +1356 -0
  266. ultralytics/utils/ops.py +885 -0
  267. ultralytics/utils/patches.py +143 -0
  268. ultralytics/utils/plotting.py +1011 -0
  269. ultralytics/utils/tal.py +416 -0
  270. ultralytics/utils/torch_utils.py +990 -0
  271. ultralytics/utils/triton.py +116 -0
  272. ultralytics/utils/tuner.py +159 -0
@@ -0,0 +1,351 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ from abc import abstractmethod
4
+ from pathlib import Path
5
+
6
+ import torch
7
+ import torch.nn as nn
8
+
9
+ from ultralytics.utils import checks
10
+ from ultralytics.utils.torch_utils import smart_inference_mode
11
+
12
+ try:
13
+ import clip
14
+ except ImportError:
15
+ checks.check_requirements("git+https://github.com/ultralytics/CLIP.git")
16
+ import clip
17
+
18
+
19
+ class TextModel(nn.Module):
20
+ """
21
+ Abstract base class for text encoding models.
22
+
23
+ This class defines the interface for text encoding models used in vision-language tasks. Subclasses must implement
24
+ the tokenize and encode_text methods.
25
+
26
+ Methods:
27
+ tokenize: Convert input texts to tokens.
28
+ encode_text: Encode tokenized texts into feature vectors.
29
+ """
30
+
31
+ def __init__(self):
32
+ """Initialize the TextModel base class."""
33
+ super().__init__()
34
+
35
+ @abstractmethod
36
+ def tokenize(texts):
37
+ """Convert input texts to tokens for model processing."""
38
+ pass
39
+
40
+ @abstractmethod
41
+ def encode_text(texts, dtype):
42
+ """Encode tokenized texts into normalized feature vectors."""
43
+ pass
44
+
45
+
46
+ class CLIP(TextModel):
47
+ """
48
+ Implements OpenAI's CLIP (Contrastive Language-Image Pre-training) text encoder.
49
+
50
+ This class provides a text encoder based on OpenAI's CLIP model, which can convert text into feature vectors
51
+ that are aligned with corresponding image features in a shared embedding space.
52
+
53
+ Attributes:
54
+ model (clip.model.CLIP): The loaded CLIP model.
55
+ device (torch.device): Device where the model is loaded.
56
+
57
+ Methods:
58
+ tokenize: Convert input texts to CLIP tokens.
59
+ encode_text: Encode tokenized texts into normalized feature vectors.
60
+
61
+ Examples:
62
+ >>> from ultralytics.models.sam import CLIP
63
+ >>> import torch
64
+ >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
65
+ >>> clip_model = CLIP(size="ViT-B/32", device=device)
66
+ >>> tokens = clip_model.tokenize(["a photo of a cat", "a photo of a dog"])
67
+ >>> text_features = clip_model.encode_text(tokens)
68
+ >>> print(text_features.shape)
69
+ """
70
+
71
+ def __init__(self, size, device):
72
+ """
73
+ Initialize the CLIP text encoder.
74
+
75
+ This class implements the TextModel interface using OpenAI's CLIP model for text encoding. It loads
76
+ a pre-trained CLIP model of the specified size and prepares it for text encoding tasks.
77
+
78
+ Args:
79
+ size (str): Model size identifier (e.g., 'ViT-B/32').
80
+ device (torch.device): Device to load the model on.
81
+
82
+ Examples:
83
+ >>> import torch
84
+ >>> from ultralytics.models.sam.modules.clip import CLIP
85
+ >>> clip_model = CLIP("ViT-B/32", device=torch.device("cuda:0"))
86
+ >>> text_features = clip_model.encode_text(["a photo of a cat", "a photo of a dog"])
87
+ """
88
+ super().__init__()
89
+ self.model = clip.load(size, device=device)[0]
90
+ self.to(device)
91
+ self.device = device
92
+ self.eval()
93
+
94
+ def tokenize(self, texts):
95
+ """
96
+ Convert input texts to CLIP tokens.
97
+
98
+ Args:
99
+ texts (str | List[str]): Input text or list of texts to tokenize.
100
+
101
+ Returns:
102
+ (torch.Tensor): Tokenized text tensor with shape (batch_size, context_length) ready for model processing.
103
+
104
+ Examples:
105
+ >>> model = CLIP("ViT-B/32", device="cpu")
106
+ >>> tokens = model.tokenize("a photo of a cat")
107
+ >>> print(tokens.shape) # torch.Size([1, 77])
108
+ """
109
+ return clip.tokenize(texts).to(self.device)
110
+
111
+ @smart_inference_mode()
112
+ def encode_text(self, texts, dtype=torch.float32):
113
+ """
114
+ Encode tokenized texts into normalized feature vectors.
115
+
116
+ This method processes tokenized text inputs through the CLIP model to generate feature vectors, which are then
117
+ normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
118
+
119
+ Args:
120
+ texts (torch.Tensor): Tokenized text inputs, typically created using the tokenize() method.
121
+ dtype (torch.dtype, optional): Data type for output features. Default is torch.float32.
122
+
123
+ Returns:
124
+ (torch.Tensor): Normalized text feature vectors with unit length (L2 norm = 1).
125
+
126
+ Examples:
127
+ >>> clip_model = CLIP("ViT-B/32", device="cuda")
128
+ >>> tokens = clip_model.tokenize(["a photo of a cat", "a photo of a dog"])
129
+ >>> features = clip_model.encode_text(tokens)
130
+ >>> features.shape
131
+ torch.Size([2, 512])
132
+ """
133
+ txt_feats = self.model.encode_text(texts).to(dtype)
134
+ txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
135
+ return txt_feats
136
+
137
+
138
+ class MobileCLIP(TextModel):
139
+ """
140
+ Implement Apple's MobileCLIP text encoder for efficient text encoding.
141
+
142
+ This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
143
+ capabilities for vision-language tasks.
144
+
145
+ Attributes:
146
+ model (mobileclip.model.MobileCLIP): The loaded MobileCLIP model.
147
+ tokenizer (callable): Tokenizer function for processing text inputs.
148
+ device (torch.device): Device where the model is loaded.
149
+ config_size_map (dict): Mapping from size identifiers to model configuration names.
150
+
151
+ Methods:
152
+ tokenize: Convert input texts to MobileCLIP tokens.
153
+ encode_text: Encode tokenized texts into normalized feature vectors.
154
+
155
+ Examples:
156
+ >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
157
+ >>> text_encoder = MobileCLIP(size="s0", device=device)
158
+ >>> tokens = text_encoder.tokenize(["a photo of a cat", "a photo of a dog"])
159
+ >>> features = text_encoder.encode_text(tokens)
160
+ """
161
+
162
+ config_size_map = {"s0": "s0", "s1": "s1", "s2": "s2", "b": "b", "blt": "b"}
163
+
164
+ def __init__(self, size, device):
165
+ """
166
+ Initialize the MobileCLIP text encoder.
167
+
168
+ This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
169
+
170
+ Args:
171
+ size (str): Model size identifier (e.g., 's0', 's1', 's2', 'b', 'blt').
172
+ device (torch.device): Device to load the model on.
173
+
174
+ Examples:
175
+ >>> from ultralytics.nn.modules import MobileCLIP
176
+ >>> import torch
177
+ >>> model = MobileCLIP("s0", device=torch.device("cpu"))
178
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
179
+ >>> features = model.encode_text(tokens)
180
+ """
181
+ try:
182
+ import warnings
183
+
184
+ # Suppress 'timm.models.layers is deprecated, please import via timm.layers' warning from mobileclip usage
185
+ with warnings.catch_warnings():
186
+ warnings.filterwarnings("ignore", category=FutureWarning)
187
+ import mobileclip
188
+ except ImportError:
189
+ # Ultralytics fork preferred since Apple MobileCLIP repo has incorrect version of torchvision
190
+ checks.check_requirements("git+https://github.com/ultralytics/mobileclip.git")
191
+ import mobileclip
192
+
193
+ super().__init__()
194
+ config = self.config_size_map[size]
195
+ file = f"mobileclip_{size}.pt"
196
+ if not Path(file).is_file():
197
+ from ultralytics import download
198
+
199
+ download(f"https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/{file}")
200
+ self.model = mobileclip.create_model_and_transforms(f"mobileclip_{config}", pretrained=file, device=device)[0]
201
+ self.tokenizer = mobileclip.get_tokenizer(f"mobileclip_{config}")
202
+ self.to(device)
203
+ self.device = device
204
+ self.eval()
205
+
206
+ def tokenize(self, texts):
207
+ """
208
+ Convert input texts to MobileCLIP tokens.
209
+
210
+ Args:
211
+ texts (list[str]): List of text strings to tokenize.
212
+
213
+ Returns:
214
+ (torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
215
+
216
+ Examples:
217
+ >>> model = MobileCLIP("s0", "cpu")
218
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
219
+ """
220
+ return self.tokenizer(texts).to(self.device)
221
+
222
+ @smart_inference_mode()
223
+ def encode_text(self, texts, dtype=torch.float32):
224
+ """
225
+ Encode tokenized texts into normalized feature vectors.
226
+
227
+ Args:
228
+ texts (torch.Tensor): Tokenized text inputs.
229
+ dtype (torch.dtype, optional): Data type for output features.
230
+
231
+ Returns:
232
+ (torch.Tensor): Normalized text feature vectors with L2 normalization applied.
233
+
234
+ Examples:
235
+ >>> model = MobileCLIP("s0", device="cpu")
236
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
237
+ >>> features = model.encode_text(tokens)
238
+ >>> features.shape
239
+ torch.Size([2, 512]) # Actual dimension depends on model size
240
+ """
241
+ text_features = self.model.encode_text(texts).to(dtype)
242
+ text_features /= text_features.norm(p=2, dim=-1, keepdim=True)
243
+ return text_features
244
+
245
+
246
+ class MobileCLIPTS(TextModel):
247
+ """
248
+ Load a TorchScript traced version of MobileCLIP.
249
+
250
+ This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
251
+ capabilities for vision-language tasks.
252
+
253
+ Attributes:
254
+ encoder (mobileclip.model.MobileCLIP): The loaded MobileCLIP text encoder.
255
+ tokenizer (callable): Tokenizer function for processing text inputs.
256
+ device (torch.device): Device where the model is loaded.
257
+
258
+ Methods:
259
+ tokenize: Convert input texts to MobileCLIP tokens.
260
+ encode_text: Encode tokenized texts into normalized feature vectors.
261
+
262
+ Examples:
263
+ >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
264
+ >>> text_encoder = MobileCLIP(device=device)
265
+ >>> tokens = text_encoder.tokenize(["a photo of a cat", "a photo of a dog"])
266
+ >>> features = text_encoder.encode_text(tokens)
267
+ """
268
+
269
+ def __init__(self, device):
270
+ """
271
+ Initialize the MobileCLIP text encoder.
272
+
273
+ This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
274
+
275
+ Args:
276
+ device (torch.device): Device to load the model on.
277
+
278
+ Examples:
279
+ >>> from ultralytics.nn.modules import MobileCLIP
280
+ >>> import torch
281
+ >>> model = MobileCLIP(device=torch.device("cpu"))
282
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
283
+ >>> features = model.encode_text(tokens)
284
+ """
285
+ super().__init__()
286
+ from ultralytics.utils.downloads import attempt_download_asset
287
+
288
+ self.encoder = torch.jit.load(attempt_download_asset("mobileclip_blt.ts"), map_location=device)
289
+ self.tokenizer = clip.clip.tokenize
290
+ self.device = device
291
+
292
+ def tokenize(self, texts):
293
+ """
294
+ Convert input texts to MobileCLIP tokens.
295
+
296
+ Args:
297
+ texts (list[str]): List of text strings to tokenize.
298
+
299
+ Returns:
300
+ (torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
301
+
302
+ Examples:
303
+ >>> model = MobileCLIP("cpu")
304
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
305
+ """
306
+ return self.tokenizer(texts).to(self.device)
307
+
308
+ @smart_inference_mode()
309
+ def encode_text(self, texts, dtype=torch.float32):
310
+ """
311
+ Encode tokenized texts into normalized feature vectors.
312
+
313
+ Args:
314
+ texts (torch.Tensor): Tokenized text inputs.
315
+ dtype (torch.dtype, optional): Data type for output features.
316
+
317
+ Returns:
318
+ (torch.Tensor): Normalized text feature vectors with L2 normalization applied.
319
+
320
+ Examples:
321
+ >>> model = MobileCLIP(device="cpu")
322
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
323
+ >>> features = model.encode_text(tokens)
324
+ >>> features.shape
325
+ torch.Size([2, 512]) # Actual dimension depends on model size
326
+ """
327
+ return self.encoder(texts)
328
+
329
+
330
+ def build_text_model(variant, device=None):
331
+ """
332
+ Build a text encoding model based on the specified variant.
333
+
334
+ Args:
335
+ variant (str): Model variant in format "base:size" (e.g., "clip:ViT-B/32" or "mobileclip:s0").
336
+ device (torch.device, optional): Device to load the model on.
337
+
338
+ Returns:
339
+ (TextModel): Instantiated text encoding model.
340
+
341
+ Examples:
342
+ >>> model = build_text_model("clip:ViT-B/32", device=torch.device("cuda"))
343
+ >>> model = build_text_model("mobileclip:s0", device=torch.device("cpu"))
344
+ """
345
+ base, size = variant.split(":")
346
+ if base == "clip":
347
+ return CLIP(size, device)
348
+ elif base == "mobileclip":
349
+ return MobileCLIPTS(device)
350
+ else:
351
+ raise ValueError(f"Unrecognized base model: '{base}'. Supported base models: 'clip', 'mobileclip'.")
@@ -0,0 +1,41 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ from .ai_gym import AIGym
4
+ from .analytics import Analytics
5
+ from .distance_calculation import DistanceCalculation
6
+ from .heatmap import Heatmap
7
+ from .instance_segmentation import InstanceSegmentation
8
+ from .object_blurrer import ObjectBlurrer
9
+ from .object_counter import ObjectCounter
10
+ from .object_cropper import ObjectCropper
11
+ from .parking_management import ParkingManagement, ParkingPtsSelection
12
+ from .queue_management import QueueManager
13
+ from .region_counter import RegionCounter
14
+ from .security_alarm import SecurityAlarm
15
+ from .similarity_search import SearchApp, VisualAISearch
16
+ from .speed_estimation import SpeedEstimator
17
+ from .streamlit_inference import Inference
18
+ from .trackzone import TrackZone
19
+ from .vision_eye import VisionEye
20
+
21
+ __all__ = (
22
+ "ObjectCounter",
23
+ "ObjectCropper",
24
+ "ObjectBlurrer",
25
+ "AIGym",
26
+ "RegionCounter",
27
+ "SecurityAlarm",
28
+ "Heatmap",
29
+ "InstanceSegmentation",
30
+ "VisionEye",
31
+ "SpeedEstimator",
32
+ "DistanceCalculation",
33
+ "QueueManager",
34
+ "ParkingManagement",
35
+ "ParkingPtsSelection",
36
+ "Analytics",
37
+ "Inference",
38
+ "TrackZone",
39
+ "SearchApp",
40
+ "VisualAISearch",
41
+ )
@@ -0,0 +1,116 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ from collections import defaultdict
4
+
5
+ from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
6
+
7
+
8
+ class AIGym(BaseSolution):
9
+ """
10
+ A class to manage gym steps of people in a real-time video stream based on their poses.
11
+
12
+ This class extends BaseSolution to monitor workouts using YOLO pose estimation models. It tracks and counts
13
+ repetitions of exercises based on predefined angle thresholds for up and down positions.
14
+
15
+ Attributes:
16
+ states (Dict[float, int, str]): Stores per-track angle, count, and stage for workout monitoring.
17
+ up_angle (float): Angle threshold for considering the 'up' position of an exercise.
18
+ down_angle (float): Angle threshold for considering the 'down' position of an exercise.
19
+ kpts (List[int]): Indices of keypoints used for angle calculation.
20
+
21
+ Methods:
22
+ process: Processes a frame to detect poses, calculate angles, and count repetitions.
23
+
24
+ Examples:
25
+ >>> gym = AIGym(model="yolo11n-pose.pt")
26
+ >>> image = cv2.imread("gym_scene.jpg")
27
+ >>> results = gym.process(image)
28
+ >>> processed_image = results.plot_im
29
+ >>> cv2.imshow("Processed Image", processed_image)
30
+ >>> cv2.waitKey(0)
31
+ """
32
+
33
+ def __init__(self, **kwargs):
34
+ """
35
+ Initialize AIGym for workout monitoring using pose estimation and predefined angles.
36
+
37
+ Args:
38
+ **kwargs (Any): Keyword arguments passed to the parent class constructor.
39
+ model (str): Model name or path, defaults to "yolo11n-pose.pt".
40
+ """
41
+ kwargs["model"] = kwargs.get("model", "yolo11n-pose.pt")
42
+ super().__init__(**kwargs)
43
+ self.states = defaultdict(lambda: {"angle": 0, "count": 0, "stage": "-"}) # Dict for count, angle and stage
44
+
45
+ # Extract details from CFG single time for usage later
46
+ self.up_angle = float(self.CFG["up_angle"]) # Pose up predefined angle to consider up pose
47
+ self.down_angle = float(self.CFG["down_angle"]) # Pose down predefined angle to consider down pose
48
+ self.kpts = self.CFG["kpts"] # User selected kpts of workouts storage for further usage
49
+
50
+ def process(self, im0):
51
+ """
52
+ Monitor workouts using Ultralytics YOLO Pose Model.
53
+
54
+ This function processes an input image to track and analyze human poses for workout monitoring. It uses
55
+ the YOLO Pose model to detect keypoints, estimate angles, and count repetitions based on predefined
56
+ angle thresholds.
57
+
58
+ Args:
59
+ im0 (np.ndarray): Input image for processing.
60
+
61
+ Returns:
62
+ (SolutionResults): Contains processed image `plot_im`,
63
+ 'workout_count' (list of completed reps),
64
+ 'workout_stage' (list of current stages),
65
+ 'workout_angle' (list of angles), and
66
+ 'total_tracks' (total number of tracked individuals).
67
+
68
+ Examples:
69
+ >>> gym = AIGym()
70
+ >>> image = cv2.imread("workout.jpg")
71
+ >>> results = gym.process(image)
72
+ >>> processed_image = results.plot_im
73
+ """
74
+ annotator = SolutionAnnotator(im0, line_width=self.line_width) # Initialize annotator
75
+
76
+ self.extract_tracks(im0) # Extract tracks (bounding boxes, classes, and masks)
77
+ tracks = self.tracks[0]
78
+
79
+ if tracks.boxes.id is not None:
80
+ track_ids = tracks.boxes.id.cpu().tolist()
81
+ kpt_data = tracks.keypoints.data.cpu() # Avoid repeated .cpu() calls
82
+
83
+ for i, k in enumerate(kpt_data):
84
+ track_id = int(track_ids[i]) # get track id
85
+ state = self.states[track_id] # get state details
86
+ # Get keypoints and estimate the angle
87
+ state["angle"] = annotator.estimate_pose_angle(*[k[int(idx)] for idx in self.kpts])
88
+ annotator.draw_specific_kpts(k, self.kpts, radius=self.line_width * 3)
89
+
90
+ # Determine stage and count logic based on angle thresholds
91
+ if state["angle"] < self.down_angle:
92
+ if state["stage"] == "up":
93
+ state["count"] += 1
94
+ state["stage"] = "down"
95
+ elif state["angle"] > self.up_angle:
96
+ state["stage"] = "up"
97
+
98
+ # Display angle, count, and stage text
99
+ if self.show_labels:
100
+ annotator.plot_angle_and_count_and_stage(
101
+ angle_text=state["angle"], # angle text for display
102
+ count_text=state["count"], # count text for workouts
103
+ stage_text=state["stage"], # stage position text
104
+ center_kpt=k[int(self.kpts[1])], # center keypoint for display
105
+ )
106
+ plot_im = annotator.result()
107
+ self.display_output(plot_im) # Display output image, if environment support display
108
+
109
+ # Return SolutionResults
110
+ return SolutionResults(
111
+ plot_im=plot_im,
112
+ workout_count=[v["count"] for v in self.states.values()],
113
+ workout_stage=[v["stage"] for v in self.states.values()],
114
+ workout_angle=[v["angle"] for v in self.states.values()],
115
+ total_tracks=len(self.track_ids),
116
+ )