dgenerate-ultralytics-headless 8.3.214__py3-none-any.whl → 8.3.248__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/METADATA +13 -14
- dgenerate_ultralytics_headless-8.3.248.dist-info/RECORD +298 -0
- tests/__init__.py +5 -7
- tests/conftest.py +8 -15
- tests/test_cli.py +1 -1
- tests/test_cuda.py +5 -8
- tests/test_engine.py +1 -1
- tests/test_exports.py +57 -12
- tests/test_integrations.py +4 -4
- tests/test_python.py +84 -53
- tests/test_solutions.py +160 -151
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +56 -62
- ultralytics/cfg/datasets/Argoverse.yaml +7 -6
- ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
- ultralytics/cfg/datasets/ImageNet.yaml +1 -1
- ultralytics/cfg/datasets/VOC.yaml +15 -16
- ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
- ultralytics/cfg/datasets/coco-pose.yaml +21 -0
- ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
- ultralytics/cfg/datasets/coco8-pose.yaml +21 -0
- ultralytics/cfg/datasets/dog-pose.yaml +28 -0
- ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/dota8.yaml +2 -2
- ultralytics/cfg/datasets/hand-keypoints.yaml +26 -2
- ultralytics/cfg/datasets/kitti.yaml +27 -0
- ultralytics/cfg/datasets/lvis.yaml +5 -5
- ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
- ultralytics/cfg/datasets/tiger-pose.yaml +16 -0
- ultralytics/cfg/datasets/xView.yaml +16 -16
- ultralytics/cfg/default.yaml +1 -1
- ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
- ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
- ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
- ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +1 -1
- ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +1 -1
- ultralytics/cfg/models/v10/yolov10b.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10l.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10m.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10n.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10s.yaml +2 -2
- ultralytics/cfg/models/v10/yolov10x.yaml +2 -2
- ultralytics/cfg/models/v3/yolov3-tiny.yaml +1 -1
- ultralytics/cfg/models/v6/yolov6.yaml +1 -1
- ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
- ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
- ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
- ultralytics/data/__init__.py +4 -4
- ultralytics/data/annotator.py +3 -4
- ultralytics/data/augment.py +285 -475
- ultralytics/data/base.py +18 -26
- ultralytics/data/build.py +147 -25
- ultralytics/data/converter.py +36 -46
- ultralytics/data/dataset.py +46 -74
- ultralytics/data/loaders.py +42 -49
- ultralytics/data/split.py +5 -6
- ultralytics/data/split_dota.py +8 -15
- ultralytics/data/utils.py +34 -43
- ultralytics/engine/exporter.py +319 -237
- ultralytics/engine/model.py +148 -188
- ultralytics/engine/predictor.py +29 -38
- ultralytics/engine/results.py +177 -311
- ultralytics/engine/trainer.py +83 -59
- ultralytics/engine/tuner.py +23 -34
- ultralytics/engine/validator.py +39 -22
- ultralytics/hub/__init__.py +16 -19
- ultralytics/hub/auth.py +6 -12
- ultralytics/hub/google/__init__.py +7 -10
- ultralytics/hub/session.py +15 -25
- ultralytics/hub/utils.py +5 -8
- ultralytics/models/__init__.py +1 -1
- ultralytics/models/fastsam/__init__.py +1 -1
- ultralytics/models/fastsam/model.py +8 -10
- ultralytics/models/fastsam/predict.py +17 -29
- ultralytics/models/fastsam/utils.py +1 -2
- ultralytics/models/fastsam/val.py +5 -7
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/nas/model.py +5 -8
- ultralytics/models/nas/predict.py +7 -9
- ultralytics/models/nas/val.py +1 -2
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/rtdetr/model.py +5 -8
- ultralytics/models/rtdetr/predict.py +15 -19
- ultralytics/models/rtdetr/train.py +10 -13
- ultralytics/models/rtdetr/val.py +21 -23
- ultralytics/models/sam/__init__.py +15 -2
- ultralytics/models/sam/amg.py +14 -20
- ultralytics/models/sam/build.py +26 -19
- ultralytics/models/sam/build_sam3.py +377 -0
- ultralytics/models/sam/model.py +29 -32
- ultralytics/models/sam/modules/blocks.py +83 -144
- ultralytics/models/sam/modules/decoders.py +19 -37
- ultralytics/models/sam/modules/encoders.py +44 -101
- ultralytics/models/sam/modules/memory_attention.py +16 -30
- ultralytics/models/sam/modules/sam.py +200 -73
- ultralytics/models/sam/modules/tiny_encoder.py +64 -83
- ultralytics/models/sam/modules/transformer.py +18 -28
- ultralytics/models/sam/modules/utils.py +174 -50
- ultralytics/models/sam/predict.py +2248 -350
- ultralytics/models/sam/sam3/__init__.py +3 -0
- ultralytics/models/sam/sam3/decoder.py +546 -0
- ultralytics/models/sam/sam3/encoder.py +529 -0
- ultralytics/models/sam/sam3/geometry_encoders.py +415 -0
- ultralytics/models/sam/sam3/maskformer_segmentation.py +286 -0
- ultralytics/models/sam/sam3/model_misc.py +199 -0
- ultralytics/models/sam/sam3/necks.py +129 -0
- ultralytics/models/sam/sam3/sam3_image.py +339 -0
- ultralytics/models/sam/sam3/text_encoder_ve.py +307 -0
- ultralytics/models/sam/sam3/vitdet.py +547 -0
- ultralytics/models/sam/sam3/vl_combiner.py +160 -0
- ultralytics/models/utils/loss.py +14 -26
- ultralytics/models/utils/ops.py +13 -17
- ultralytics/models/yolo/__init__.py +1 -1
- ultralytics/models/yolo/classify/predict.py +9 -12
- ultralytics/models/yolo/classify/train.py +11 -32
- ultralytics/models/yolo/classify/val.py +29 -28
- ultralytics/models/yolo/detect/predict.py +7 -10
- ultralytics/models/yolo/detect/train.py +11 -20
- ultralytics/models/yolo/detect/val.py +70 -58
- ultralytics/models/yolo/model.py +36 -53
- ultralytics/models/yolo/obb/predict.py +5 -14
- ultralytics/models/yolo/obb/train.py +11 -14
- ultralytics/models/yolo/obb/val.py +39 -36
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/pose/predict.py +6 -21
- ultralytics/models/yolo/pose/train.py +10 -15
- ultralytics/models/yolo/pose/val.py +38 -57
- ultralytics/models/yolo/segment/predict.py +14 -18
- ultralytics/models/yolo/segment/train.py +3 -6
- ultralytics/models/yolo/segment/val.py +93 -45
- ultralytics/models/yolo/world/train.py +8 -14
- ultralytics/models/yolo/world/train_world.py +11 -34
- ultralytics/models/yolo/yoloe/__init__.py +7 -7
- ultralytics/models/yolo/yoloe/predict.py +16 -23
- ultralytics/models/yolo/yoloe/train.py +30 -43
- ultralytics/models/yolo/yoloe/train_seg.py +5 -10
- ultralytics/models/yolo/yoloe/val.py +15 -20
- ultralytics/nn/__init__.py +7 -7
- ultralytics/nn/autobackend.py +145 -77
- ultralytics/nn/modules/__init__.py +60 -60
- ultralytics/nn/modules/activation.py +4 -6
- ultralytics/nn/modules/block.py +132 -216
- ultralytics/nn/modules/conv.py +52 -97
- ultralytics/nn/modules/head.py +50 -103
- ultralytics/nn/modules/transformer.py +76 -88
- ultralytics/nn/modules/utils.py +16 -21
- ultralytics/nn/tasks.py +94 -154
- ultralytics/nn/text_model.py +40 -67
- ultralytics/solutions/__init__.py +12 -12
- ultralytics/solutions/ai_gym.py +11 -17
- ultralytics/solutions/analytics.py +15 -16
- ultralytics/solutions/config.py +5 -6
- ultralytics/solutions/distance_calculation.py +10 -13
- ultralytics/solutions/heatmap.py +7 -13
- ultralytics/solutions/instance_segmentation.py +5 -8
- ultralytics/solutions/object_blurrer.py +7 -10
- ultralytics/solutions/object_counter.py +12 -19
- ultralytics/solutions/object_cropper.py +8 -14
- ultralytics/solutions/parking_management.py +33 -31
- ultralytics/solutions/queue_management.py +10 -12
- ultralytics/solutions/region_counter.py +9 -12
- ultralytics/solutions/security_alarm.py +15 -20
- ultralytics/solutions/similarity_search.py +10 -15
- ultralytics/solutions/solutions.py +75 -74
- ultralytics/solutions/speed_estimation.py +7 -10
- ultralytics/solutions/streamlit_inference.py +2 -4
- ultralytics/solutions/templates/similarity-search.html +7 -18
- ultralytics/solutions/trackzone.py +7 -10
- ultralytics/solutions/vision_eye.py +5 -8
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/basetrack.py +3 -5
- ultralytics/trackers/bot_sort.py +10 -27
- ultralytics/trackers/byte_tracker.py +14 -30
- ultralytics/trackers/track.py +3 -6
- ultralytics/trackers/utils/gmc.py +11 -22
- ultralytics/trackers/utils/kalman_filter.py +37 -48
- ultralytics/trackers/utils/matching.py +12 -15
- ultralytics/utils/__init__.py +116 -116
- ultralytics/utils/autobatch.py +2 -4
- ultralytics/utils/autodevice.py +17 -18
- ultralytics/utils/benchmarks.py +32 -46
- ultralytics/utils/callbacks/base.py +8 -10
- ultralytics/utils/callbacks/clearml.py +5 -13
- ultralytics/utils/callbacks/comet.py +32 -46
- ultralytics/utils/callbacks/dvc.py +13 -18
- ultralytics/utils/callbacks/mlflow.py +4 -5
- ultralytics/utils/callbacks/neptune.py +7 -15
- ultralytics/utils/callbacks/platform.py +314 -38
- ultralytics/utils/callbacks/raytune.py +3 -4
- ultralytics/utils/callbacks/tensorboard.py +23 -31
- ultralytics/utils/callbacks/wb.py +10 -13
- ultralytics/utils/checks.py +99 -76
- ultralytics/utils/cpu.py +3 -8
- ultralytics/utils/dist.py +8 -12
- ultralytics/utils/downloads.py +20 -30
- ultralytics/utils/errors.py +6 -14
- ultralytics/utils/events.py +2 -4
- ultralytics/utils/export/__init__.py +4 -236
- ultralytics/utils/export/engine.py +237 -0
- ultralytics/utils/export/imx.py +91 -55
- ultralytics/utils/export/tensorflow.py +231 -0
- ultralytics/utils/files.py +24 -28
- ultralytics/utils/git.py +9 -11
- ultralytics/utils/instance.py +30 -51
- ultralytics/utils/logger.py +212 -114
- ultralytics/utils/loss.py +14 -22
- ultralytics/utils/metrics.py +126 -155
- ultralytics/utils/nms.py +13 -16
- ultralytics/utils/ops.py +107 -165
- ultralytics/utils/patches.py +33 -21
- ultralytics/utils/plotting.py +72 -80
- ultralytics/utils/tal.py +25 -39
- ultralytics/utils/torch_utils.py +52 -78
- ultralytics/utils/tqdm.py +20 -20
- ultralytics/utils/triton.py +13 -19
- ultralytics/utils/tuner.py +17 -5
- dgenerate_ultralytics_headless-8.3.214.dist-info/RECORD +0 -283
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.214.dist-info → dgenerate_ultralytics_headless-8.3.248.dist-info}/top_level.txt +0 -0
ultralytics/nn/text_model.py
CHANGED
|
@@ -20,8 +20,7 @@ except ImportError:
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class TextModel(nn.Module):
|
|
23
|
-
"""
|
|
24
|
-
Abstract base class for text encoding models.
|
|
23
|
+
"""Abstract base class for text encoding models.
|
|
25
24
|
|
|
26
25
|
This class defines the interface for text encoding models used in vision-language tasks. Subclasses must implement
|
|
27
26
|
the tokenize and encode_text methods to provide text tokenization and encoding functionality.
|
|
@@ -47,11 +46,10 @@ class TextModel(nn.Module):
|
|
|
47
46
|
|
|
48
47
|
|
|
49
48
|
class CLIP(TextModel):
|
|
50
|
-
"""
|
|
51
|
-
Implements OpenAI's CLIP (Contrastive Language-Image Pre-training) text encoder.
|
|
49
|
+
"""Implements OpenAI's CLIP (Contrastive Language-Image Pre-training) text encoder.
|
|
52
50
|
|
|
53
|
-
This class provides a text encoder based on OpenAI's CLIP model, which can convert text into feature vectors
|
|
54
|
-
|
|
51
|
+
This class provides a text encoder based on OpenAI's CLIP model, which can convert text into feature vectors that
|
|
52
|
+
are aligned with corresponding image features in a shared embedding space.
|
|
55
53
|
|
|
56
54
|
Attributes:
|
|
57
55
|
model (clip.model.CLIP): The loaded CLIP model.
|
|
@@ -71,20 +69,14 @@ class CLIP(TextModel):
|
|
|
71
69
|
"""
|
|
72
70
|
|
|
73
71
|
def __init__(self, size: str, device: torch.device) -> None:
|
|
74
|
-
"""
|
|
75
|
-
Initialize the CLIP text encoder.
|
|
72
|
+
"""Initialize the CLIP text encoder.
|
|
76
73
|
|
|
77
|
-
This class implements the TextModel interface using OpenAI's CLIP model for text encoding. It loads
|
|
78
|
-
|
|
74
|
+
This class implements the TextModel interface using OpenAI's CLIP model for text encoding. It loads a
|
|
75
|
+
pre-trained CLIP model of the specified size and prepares it for text encoding tasks.
|
|
79
76
|
|
|
80
77
|
Args:
|
|
81
78
|
size (str): Model size identifier (e.g., 'ViT-B/32').
|
|
82
79
|
device (torch.device): Device to load the model on.
|
|
83
|
-
|
|
84
|
-
Examples:
|
|
85
|
-
>>> import torch
|
|
86
|
-
>>> clip_model = CLIP("ViT-B/32", device=torch.device("cuda:0"))
|
|
87
|
-
>>> text_features = clip_model.encode_text(["a photo of a cat", "a photo of a dog"])
|
|
88
80
|
"""
|
|
89
81
|
super().__init__()
|
|
90
82
|
self.model, self.image_preprocess = clip.load(size, device=device)
|
|
@@ -92,12 +84,13 @@ class CLIP(TextModel):
|
|
|
92
84
|
self.device = device
|
|
93
85
|
self.eval()
|
|
94
86
|
|
|
95
|
-
def tokenize(self, texts: str | list[str]) -> torch.Tensor:
|
|
96
|
-
"""
|
|
97
|
-
Convert input texts to CLIP tokens.
|
|
87
|
+
def tokenize(self, texts: str | list[str], truncate: bool = True) -> torch.Tensor:
|
|
88
|
+
"""Convert input texts to CLIP tokens.
|
|
98
89
|
|
|
99
90
|
Args:
|
|
100
91
|
texts (str | list[str]): Input text or list of texts to tokenize.
|
|
92
|
+
truncate (bool, optional): Whether to trim texts that exceed CLIP's context length. Defaults to True to
|
|
93
|
+
avoid RuntimeError from overly long inputs while still allowing explicit opt-out.
|
|
101
94
|
|
|
102
95
|
Returns:
|
|
103
96
|
(torch.Tensor): Tokenized text tensor with shape (batch_size, context_length) ready for model processing.
|
|
@@ -106,13 +99,14 @@ class CLIP(TextModel):
|
|
|
106
99
|
>>> model = CLIP("ViT-B/32", device="cpu")
|
|
107
100
|
>>> tokens = model.tokenize("a photo of a cat")
|
|
108
101
|
>>> print(tokens.shape) # torch.Size([1, 77])
|
|
102
|
+
>>> strict_tokens = model.tokenize("a photo of a cat", truncate=False) # Enforce strict length checks
|
|
103
|
+
>>> print(strict_tokens.shape) # Same shape/content as tokens since prompt less than 77 tokens
|
|
109
104
|
"""
|
|
110
|
-
return clip.tokenize(texts).to(self.device)
|
|
105
|
+
return clip.tokenize(texts, truncate=truncate).to(self.device)
|
|
111
106
|
|
|
112
107
|
@smart_inference_mode()
|
|
113
108
|
def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
|
|
114
|
-
"""
|
|
115
|
-
Encode tokenized texts into normalized feature vectors.
|
|
109
|
+
"""Encode tokenized texts into normalized feature vectors.
|
|
116
110
|
|
|
117
111
|
This method processes tokenized text inputs through the CLIP model to generate feature vectors, which are then
|
|
118
112
|
normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
|
|
@@ -137,15 +131,14 @@ class CLIP(TextModel):
|
|
|
137
131
|
|
|
138
132
|
@smart_inference_mode()
|
|
139
133
|
def encode_image(self, image: Image.Image | torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
|
|
140
|
-
"""
|
|
141
|
-
Encode preprocessed images into normalized feature vectors.
|
|
134
|
+
"""Encode preprocessed images into normalized feature vectors.
|
|
142
135
|
|
|
143
|
-
This method processes preprocessed image inputs through the CLIP model to generate feature vectors, which are
|
|
144
|
-
normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
|
|
136
|
+
This method processes preprocessed image inputs through the CLIP model to generate feature vectors, which are
|
|
137
|
+
then normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
|
|
145
138
|
|
|
146
139
|
Args:
|
|
147
|
-
image (PIL.Image | torch.Tensor): Preprocessed image input. If a PIL Image is provided, it will be
|
|
148
|
-
|
|
140
|
+
image (PIL.Image | torch.Tensor): Preprocessed image input. If a PIL Image is provided, it will be converted
|
|
141
|
+
to a tensor using the model's image preprocessing function.
|
|
149
142
|
dtype (torch.dtype, optional): Data type for output features.
|
|
150
143
|
|
|
151
144
|
Returns:
|
|
@@ -169,8 +162,7 @@ class CLIP(TextModel):
|
|
|
169
162
|
|
|
170
163
|
|
|
171
164
|
class MobileCLIP(TextModel):
|
|
172
|
-
"""
|
|
173
|
-
Implement Apple's MobileCLIP text encoder for efficient text encoding.
|
|
165
|
+
"""Implement Apple's MobileCLIP text encoder for efficient text encoding.
|
|
174
166
|
|
|
175
167
|
This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
|
|
176
168
|
capabilities for vision-language tasks with reduced computational requirements compared to standard CLIP models.
|
|
@@ -195,28 +187,16 @@ class MobileCLIP(TextModel):
|
|
|
195
187
|
config_size_map = {"s0": "s0", "s1": "s1", "s2": "s2", "b": "b", "blt": "b"}
|
|
196
188
|
|
|
197
189
|
def __init__(self, size: str, device: torch.device) -> None:
|
|
198
|
-
"""
|
|
199
|
-
Initialize the MobileCLIP text encoder.
|
|
190
|
+
"""Initialize the MobileCLIP text encoder.
|
|
200
191
|
|
|
201
192
|
This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
|
|
202
193
|
|
|
203
194
|
Args:
|
|
204
195
|
size (str): Model size identifier (e.g., 's0', 's1', 's2', 'b', 'blt').
|
|
205
196
|
device (torch.device): Device to load the model on.
|
|
206
|
-
|
|
207
|
-
Examples:
|
|
208
|
-
>>> import torch
|
|
209
|
-
>>> model = MobileCLIP("s0", device=torch.device("cpu"))
|
|
210
|
-
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
|
211
|
-
>>> features = model.encode_text(tokens)
|
|
212
197
|
"""
|
|
213
198
|
try:
|
|
214
|
-
import
|
|
215
|
-
|
|
216
|
-
# Suppress 'timm.models.layers is deprecated, please import via timm.layers' warning from mobileclip usage
|
|
217
|
-
with warnings.catch_warnings():
|
|
218
|
-
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
219
|
-
import mobileclip
|
|
199
|
+
import mobileclip
|
|
220
200
|
except ImportError:
|
|
221
201
|
# Ultralytics fork preferred since Apple MobileCLIP repo has incorrect version of torchvision
|
|
222
202
|
checks.check_requirements("git+https://github.com/ultralytics/mobileclip.git")
|
|
@@ -236,8 +216,7 @@ class MobileCLIP(TextModel):
|
|
|
236
216
|
self.eval()
|
|
237
217
|
|
|
238
218
|
def tokenize(self, texts: list[str]) -> torch.Tensor:
|
|
239
|
-
"""
|
|
240
|
-
Convert input texts to MobileCLIP tokens.
|
|
219
|
+
"""Convert input texts to MobileCLIP tokens.
|
|
241
220
|
|
|
242
221
|
Args:
|
|
243
222
|
texts (list[str]): List of text strings to tokenize.
|
|
@@ -253,8 +232,7 @@ class MobileCLIP(TextModel):
|
|
|
253
232
|
|
|
254
233
|
@smart_inference_mode()
|
|
255
234
|
def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
|
|
256
|
-
"""
|
|
257
|
-
Encode tokenized texts into normalized feature vectors.
|
|
235
|
+
"""Encode tokenized texts into normalized feature vectors.
|
|
258
236
|
|
|
259
237
|
Args:
|
|
260
238
|
texts (torch.Tensor): Tokenized text inputs.
|
|
@@ -276,8 +254,7 @@ class MobileCLIP(TextModel):
|
|
|
276
254
|
|
|
277
255
|
|
|
278
256
|
class MobileCLIPTS(TextModel):
|
|
279
|
-
"""
|
|
280
|
-
Load a TorchScript traced version of MobileCLIP.
|
|
257
|
+
"""Load a TorchScript traced version of MobileCLIP.
|
|
281
258
|
|
|
282
259
|
This class implements the TextModel interface using Apple's MobileCLIP model in TorchScript format, providing
|
|
283
260
|
efficient text encoding capabilities for vision-language tasks with optimized inference performance.
|
|
@@ -299,19 +276,13 @@ class MobileCLIPTS(TextModel):
|
|
|
299
276
|
"""
|
|
300
277
|
|
|
301
278
|
def __init__(self, device: torch.device):
|
|
302
|
-
"""
|
|
303
|
-
Initialize the MobileCLIP TorchScript text encoder.
|
|
279
|
+
"""Initialize the MobileCLIP TorchScript text encoder.
|
|
304
280
|
|
|
305
|
-
This class implements the TextModel interface using Apple's MobileCLIP model in TorchScript format for
|
|
306
|
-
|
|
281
|
+
This class implements the TextModel interface using Apple's MobileCLIP model in TorchScript format for efficient
|
|
282
|
+
text encoding with optimized inference performance.
|
|
307
283
|
|
|
308
284
|
Args:
|
|
309
285
|
device (torch.device): Device to load the model on.
|
|
310
|
-
|
|
311
|
-
Examples:
|
|
312
|
-
>>> model = MobileCLIPTS(device=torch.device("cpu"))
|
|
313
|
-
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
|
314
|
-
>>> features = model.encode_text(tokens)
|
|
315
286
|
"""
|
|
316
287
|
super().__init__()
|
|
317
288
|
from ultralytics.utils.downloads import attempt_download_asset
|
|
@@ -320,26 +291,29 @@ class MobileCLIPTS(TextModel):
|
|
|
320
291
|
self.tokenizer = clip.clip.tokenize
|
|
321
292
|
self.device = device
|
|
322
293
|
|
|
323
|
-
def tokenize(self, texts: list[str]) -> torch.Tensor:
|
|
324
|
-
"""
|
|
325
|
-
Convert input texts to MobileCLIP tokens.
|
|
294
|
+
def tokenize(self, texts: list[str], truncate: bool = True) -> torch.Tensor:
|
|
295
|
+
"""Convert input texts to MobileCLIP tokens.
|
|
326
296
|
|
|
327
297
|
Args:
|
|
328
298
|
texts (list[str]): List of text strings to tokenize.
|
|
299
|
+
truncate (bool, optional): Whether to trim texts that exceed the tokenizer context length. Defaults to True,
|
|
300
|
+
matching CLIP's behavior to prevent runtime failures on long captions.
|
|
329
301
|
|
|
330
302
|
Returns:
|
|
331
303
|
(torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
|
|
332
304
|
|
|
333
305
|
Examples:
|
|
334
|
-
>>> model = MobileCLIPTS("cpu")
|
|
306
|
+
>>> model = MobileCLIPTS(device=torch.device("cpu"))
|
|
335
307
|
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
|
308
|
+
>>> strict_tokens = model.tokenize(
|
|
309
|
+
... ["a very long caption"], truncate=False
|
|
310
|
+
... ) # RuntimeError if exceeds 77-token
|
|
336
311
|
"""
|
|
337
|
-
return self.tokenizer(texts).to(self.device)
|
|
312
|
+
return self.tokenizer(texts, truncate=truncate).to(self.device)
|
|
338
313
|
|
|
339
314
|
@smart_inference_mode()
|
|
340
315
|
def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
|
|
341
|
-
"""
|
|
342
|
-
Encode tokenized texts into normalized feature vectors.
|
|
316
|
+
"""Encode tokenized texts into normalized feature vectors.
|
|
343
317
|
|
|
344
318
|
Args:
|
|
345
319
|
texts (torch.Tensor): Tokenized text inputs.
|
|
@@ -360,8 +334,7 @@ class MobileCLIPTS(TextModel):
|
|
|
360
334
|
|
|
361
335
|
|
|
362
336
|
def build_text_model(variant: str, device: torch.device = None) -> TextModel:
|
|
363
|
-
"""
|
|
364
|
-
Build a text encoding model based on the specified variant.
|
|
337
|
+
"""Build a text encoding model based on the specified variant.
|
|
365
338
|
|
|
366
339
|
Args:
|
|
367
340
|
variant (str): Model variant in format "base:size" (e.g., "clip:ViT-B/32" or "mobileclip:s0").
|
|
@@ -19,23 +19,23 @@ from .trackzone import TrackZone
|
|
|
19
19
|
from .vision_eye import VisionEye
|
|
20
20
|
|
|
21
21
|
__all__ = (
|
|
22
|
-
"ObjectCounter",
|
|
23
|
-
"ObjectCropper",
|
|
24
|
-
"ObjectBlurrer",
|
|
25
22
|
"AIGym",
|
|
26
|
-
"
|
|
27
|
-
"
|
|
23
|
+
"Analytics",
|
|
24
|
+
"DistanceCalculation",
|
|
28
25
|
"Heatmap",
|
|
26
|
+
"Inference",
|
|
29
27
|
"InstanceSegmentation",
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"QueueManager",
|
|
28
|
+
"ObjectBlurrer",
|
|
29
|
+
"ObjectCounter",
|
|
30
|
+
"ObjectCropper",
|
|
34
31
|
"ParkingManagement",
|
|
35
32
|
"ParkingPtsSelection",
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"TrackZone",
|
|
33
|
+
"QueueManager",
|
|
34
|
+
"RegionCounter",
|
|
39
35
|
"SearchApp",
|
|
36
|
+
"SecurityAlarm",
|
|
37
|
+
"SpeedEstimator",
|
|
38
|
+
"TrackZone",
|
|
39
|
+
"VisionEye",
|
|
40
40
|
"VisualAISearch",
|
|
41
41
|
)
|
ultralytics/solutions/ai_gym.py
CHANGED
|
@@ -7,14 +7,13 @@ from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, Sol
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class AIGym(BaseSolution):
|
|
10
|
-
"""
|
|
11
|
-
A class to manage gym steps of people in a real-time video stream based on their poses.
|
|
10
|
+
"""A class to manage gym steps of people in a real-time video stream based on their poses.
|
|
12
11
|
|
|
13
12
|
This class extends BaseSolution to monitor workouts using YOLO pose estimation models. It tracks and counts
|
|
14
13
|
repetitions of exercises based on predefined angle thresholds for up and down positions.
|
|
15
14
|
|
|
16
15
|
Attributes:
|
|
17
|
-
states (dict[
|
|
16
|
+
states (dict[int, dict[str, float | int | str]]): Per-track angle, rep count, and stage for workout monitoring.
|
|
18
17
|
up_angle (float): Angle threshold for considering the 'up' position of an exercise.
|
|
19
18
|
down_angle (float): Angle threshold for considering the 'down' position of an exercise.
|
|
20
19
|
kpts (list[int]): Indices of keypoints used for angle calculation.
|
|
@@ -32,12 +31,11 @@ class AIGym(BaseSolution):
|
|
|
32
31
|
"""
|
|
33
32
|
|
|
34
33
|
def __init__(self, **kwargs: Any) -> None:
|
|
35
|
-
"""
|
|
36
|
-
Initialize AIGym for workout monitoring using pose estimation and predefined angles.
|
|
34
|
+
"""Initialize AIGym for workout monitoring using pose estimation and predefined angles.
|
|
37
35
|
|
|
38
36
|
Args:
|
|
39
|
-
**kwargs (Any): Keyword arguments passed to the parent class constructor
|
|
40
|
-
model (str): Model name or path, defaults to "yolo11n-pose.pt".
|
|
37
|
+
**kwargs (Any): Keyword arguments passed to the parent class constructor including:
|
|
38
|
+
- model (str): Model name or path, defaults to "yolo11n-pose.pt".
|
|
41
39
|
"""
|
|
42
40
|
kwargs["model"] = kwargs.get("model", "yolo11n-pose.pt")
|
|
43
41
|
super().__init__(**kwargs)
|
|
@@ -49,22 +47,18 @@ class AIGym(BaseSolution):
|
|
|
49
47
|
self.kpts = self.CFG["kpts"] # User selected kpts of workouts storage for further usage
|
|
50
48
|
|
|
51
49
|
def process(self, im0) -> SolutionResults:
|
|
52
|
-
"""
|
|
53
|
-
Monitor workouts using Ultralytics YOLO Pose Model.
|
|
50
|
+
"""Monitor workouts using Ultralytics YOLO Pose Model.
|
|
54
51
|
|
|
55
|
-
This function processes an input image to track and analyze human poses for workout monitoring. It uses
|
|
56
|
-
|
|
57
|
-
angle thresholds.
|
|
52
|
+
This function processes an input image to track and analyze human poses for workout monitoring. It uses the YOLO
|
|
53
|
+
Pose model to detect keypoints, estimate angles, and count repetitions based on predefined angle thresholds.
|
|
58
54
|
|
|
59
55
|
Args:
|
|
60
56
|
im0 (np.ndarray): Input image for processing.
|
|
61
57
|
|
|
62
58
|
Returns:
|
|
63
|
-
(SolutionResults): Contains processed image `plot_im`,
|
|
64
|
-
'
|
|
65
|
-
|
|
66
|
-
'workout_angle' (list of angles), and
|
|
67
|
-
'total_tracks' (total number of tracked individuals).
|
|
59
|
+
(SolutionResults): Contains processed image `plot_im`, 'workout_count' (list of completed reps),
|
|
60
|
+
'workout_stage' (list of current stages), 'workout_angle' (list of angles), and 'total_tracks' (total
|
|
61
|
+
number of tracked individuals).
|
|
68
62
|
|
|
69
63
|
Examples:
|
|
70
64
|
>>> gym = AIGym()
|
|
@@ -9,14 +9,14 @@ import cv2
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
|
|
11
11
|
from ultralytics.solutions.solutions import BaseSolution, SolutionResults # Import a parent class
|
|
12
|
+
from ultralytics.utils import plt_settings
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class Analytics(BaseSolution):
|
|
15
|
-
"""
|
|
16
|
-
A class for creating and updating various types of charts for visual analytics.
|
|
16
|
+
"""A class for creating and updating various types of charts for visual analytics.
|
|
17
17
|
|
|
18
|
-
This class extends BaseSolution to provide functionality for generating line, bar, pie, and area charts
|
|
19
|
-
|
|
18
|
+
This class extends BaseSolution to provide functionality for generating line, bar, pie, and area charts based on
|
|
19
|
+
object detection and tracking data.
|
|
20
20
|
|
|
21
21
|
Attributes:
|
|
22
22
|
type (str): The type of analytics chart to generate ('line', 'bar', 'pie', or 'area').
|
|
@@ -47,6 +47,7 @@ class Analytics(BaseSolution):
|
|
|
47
47
|
>>> cv2.imshow("Analytics", results.plot_im)
|
|
48
48
|
"""
|
|
49
49
|
|
|
50
|
+
@plt_settings()
|
|
50
51
|
def __init__(self, **kwargs: Any) -> None:
|
|
51
52
|
"""Initialize Analytics class with various chart types for visual data representation."""
|
|
52
53
|
super().__init__(**kwargs)
|
|
@@ -55,7 +56,7 @@ class Analytics(BaseSolution):
|
|
|
55
56
|
from matplotlib.backends.backend_agg import FigureCanvasAgg
|
|
56
57
|
from matplotlib.figure import Figure
|
|
57
58
|
|
|
58
|
-
self.type = self.CFG["analytics_type"] # type
|
|
59
|
+
self.type = self.CFG["analytics_type"] # Chart type: "line", "pie", "bar", or "area".
|
|
59
60
|
self.x_label = "Classes" if self.type in {"bar", "pie"} else "Frame#"
|
|
60
61
|
self.y_label = "Total Counts"
|
|
61
62
|
|
|
@@ -65,10 +66,10 @@ class Analytics(BaseSolution):
|
|
|
65
66
|
self.title = "Ultralytics Solutions" # window name
|
|
66
67
|
self.max_points = 45 # maximum points to be drawn on window
|
|
67
68
|
self.fontsize = 25 # text font size for display
|
|
68
|
-
figsize = self.CFG["figsize"] #
|
|
69
|
+
figsize = self.CFG["figsize"] # Output size, e.g. (12.8, 7.2) -> 1280x720.
|
|
69
70
|
self.color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"])
|
|
70
71
|
|
|
71
|
-
self.total_counts = 0 #
|
|
72
|
+
self.total_counts = 0 # Stores total counts for line charts.
|
|
72
73
|
self.clswise_count = {} # dictionary for class-wise counts
|
|
73
74
|
self.update_every = kwargs.get("update_every", 30) # Only update graph every 30 frames by default
|
|
74
75
|
self.last_plot_im = None # Cache of the last rendered chart
|
|
@@ -92,8 +93,7 @@ class Analytics(BaseSolution):
|
|
|
92
93
|
self.ax.axis("equal")
|
|
93
94
|
|
|
94
95
|
def process(self, im0: np.ndarray, frame_number: int) -> SolutionResults:
|
|
95
|
-
"""
|
|
96
|
-
Process image data and run object tracking to update analytics charts.
|
|
96
|
+
"""Process image data and run object tracking to update analytics charts.
|
|
97
97
|
|
|
98
98
|
Args:
|
|
99
99
|
im0 (np.ndarray): Input image for processing.
|
|
@@ -104,7 +104,7 @@ class Analytics(BaseSolution):
|
|
|
104
104
|
and 'classwise_count' (dict, per-class object count).
|
|
105
105
|
|
|
106
106
|
Raises:
|
|
107
|
-
|
|
107
|
+
ValueError: If an unsupported chart type is specified.
|
|
108
108
|
|
|
109
109
|
Examples:
|
|
110
110
|
>>> analytics = Analytics(analytics_type="line")
|
|
@@ -131,21 +131,20 @@ class Analytics(BaseSolution):
|
|
|
131
131
|
)
|
|
132
132
|
plot_im = self.last_plot_im
|
|
133
133
|
else:
|
|
134
|
-
raise
|
|
134
|
+
raise ValueError(f"Unsupported analytics_type='{self.type}'. Supported types: line, bar, pie, area.")
|
|
135
135
|
|
|
136
|
-
#
|
|
136
|
+
# Return results for downstream use.
|
|
137
137
|
return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), classwise_count=self.clswise_count)
|
|
138
138
|
|
|
139
139
|
def update_graph(
|
|
140
140
|
self, frame_number: int, count_dict: dict[str, int] | None = None, plot: str = "line"
|
|
141
141
|
) -> np.ndarray:
|
|
142
|
-
"""
|
|
143
|
-
Update the graph with new data for single or multiple classes.
|
|
142
|
+
"""Update the graph with new data for single or multiple classes.
|
|
144
143
|
|
|
145
144
|
Args:
|
|
146
145
|
frame_number (int): The current frame number.
|
|
147
|
-
count_dict (dict[str, int], optional): Dictionary with class names as keys and counts as values for
|
|
148
|
-
|
|
146
|
+
count_dict (dict[str, int], optional): Dictionary with class names as keys and counts as values for multiple
|
|
147
|
+
classes. If None, updates a single line graph.
|
|
149
148
|
plot (str): Type of the plot. Options are 'line', 'bar', 'pie', or 'area'.
|
|
150
149
|
|
|
151
150
|
Returns:
|
ultralytics/solutions/config.py
CHANGED
|
@@ -10,12 +10,11 @@ import cv2
|
|
|
10
10
|
|
|
11
11
|
@dataclass
|
|
12
12
|
class SolutionConfig:
|
|
13
|
-
"""
|
|
14
|
-
Manages configuration parameters for Ultralytics Vision AI solutions.
|
|
13
|
+
"""Manages configuration parameters for Ultralytics Vision AI solutions.
|
|
15
14
|
|
|
16
|
-
The SolutionConfig class serves as a centralized configuration container for all the
|
|
17
|
-
|
|
18
|
-
|
|
15
|
+
The SolutionConfig class serves as a centralized configuration container for all the Ultralytics solution modules:
|
|
16
|
+
https://docs.ultralytics.com/solutions/#solutions. It leverages Python `dataclass` for clear, type-safe, and
|
|
17
|
+
maintainable parameter definitions.
|
|
19
18
|
|
|
20
19
|
Attributes:
|
|
21
20
|
source (str, optional): Path to the input source (video, RTSP, etc.). Only usable with Solutions CLI.
|
|
@@ -36,7 +35,7 @@ class SolutionConfig:
|
|
|
36
35
|
vision_point (tuple[int, int]): Reference point for directional tracking or perspective drawing.
|
|
37
36
|
crop_dir (str): Directory path to save cropped detection images.
|
|
38
37
|
json_file (str): Path to a JSON file containing data for parking areas.
|
|
39
|
-
line_width (int): Width for visual display
|
|
38
|
+
line_width (int): Width for visual display, e.g. bounding boxes, keypoints, and counts.
|
|
40
39
|
records (int): Number of detection records to send email alerts.
|
|
41
40
|
fps (float): Frame rate (Frames Per Second) for speed estimation calculation.
|
|
42
41
|
max_hist (int): Maximum number of historical points or states stored per tracked object for speed estimation.
|
|
@@ -10,15 +10,14 @@ from ultralytics.utils.plotting import colors
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class DistanceCalculation(BaseSolution):
|
|
13
|
-
"""
|
|
14
|
-
A class to calculate distance between two objects in a real-time video stream based on their tracks.
|
|
13
|
+
"""A class to calculate distance between two objects in a real-time video stream based on their tracks.
|
|
15
14
|
|
|
16
|
-
This class extends BaseSolution to provide functionality for selecting objects and calculating the distance
|
|
17
|
-
|
|
15
|
+
This class extends BaseSolution to provide functionality for selecting objects and calculating the distance between
|
|
16
|
+
them in a video stream using YOLO object detection and tracking.
|
|
18
17
|
|
|
19
18
|
Attributes:
|
|
20
19
|
left_mouse_count (int): Counter for left mouse button clicks.
|
|
21
|
-
selected_boxes (dict[int,
|
|
20
|
+
selected_boxes (dict[int, Any]): Dictionary to store selected bounding boxes keyed by track ID.
|
|
22
21
|
centroids (list[list[int]]): List to store centroids of selected bounding boxes.
|
|
23
22
|
|
|
24
23
|
Methods:
|
|
@@ -43,8 +42,7 @@ class DistanceCalculation(BaseSolution):
|
|
|
43
42
|
self.centroids: list[list[int]] = [] # Store centroids of selected objects
|
|
44
43
|
|
|
45
44
|
def mouse_event_for_distance(self, event: int, x: int, y: int, flags: int, param: Any) -> None:
|
|
46
|
-
"""
|
|
47
|
-
Handle mouse events to select regions in a real-time video stream for distance calculation.
|
|
45
|
+
"""Handle mouse events to select regions in a real-time video stream for distance calculation.
|
|
48
46
|
|
|
49
47
|
Args:
|
|
50
48
|
event (int): Type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN).
|
|
@@ -69,18 +67,17 @@ class DistanceCalculation(BaseSolution):
|
|
|
69
67
|
self.left_mouse_count = 0
|
|
70
68
|
|
|
71
69
|
def process(self, im0) -> SolutionResults:
|
|
72
|
-
"""
|
|
73
|
-
Process a video frame and calculate the distance between two selected bounding boxes.
|
|
70
|
+
"""Process a video frame and calculate the distance between two selected bounding boxes.
|
|
74
71
|
|
|
75
|
-
This method extracts tracks from the input frame, annotates bounding boxes, and calculates the distance
|
|
76
|
-
|
|
72
|
+
This method extracts tracks from the input frame, annotates bounding boxes, and calculates the distance between
|
|
73
|
+
two user-selected objects if they have been chosen.
|
|
77
74
|
|
|
78
75
|
Args:
|
|
79
76
|
im0 (np.ndarray): The input image frame to process.
|
|
80
77
|
|
|
81
78
|
Returns:
|
|
82
|
-
(SolutionResults): Contains processed image `plot_im`, `total_tracks` (int) representing the total number
|
|
83
|
-
|
|
79
|
+
(SolutionResults): Contains processed image `plot_im`, `total_tracks` (int) representing the total number of
|
|
80
|
+
tracked objects, and `pixels_distance` (float) representing the distance between selected objects
|
|
84
81
|
in pixels.
|
|
85
82
|
|
|
86
83
|
Examples:
|
ultralytics/solutions/heatmap.py
CHANGED
|
@@ -12,8 +12,7 @@ from ultralytics.solutions.solutions import SolutionAnnotator, SolutionResults
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class Heatmap(ObjectCounter):
|
|
15
|
-
"""
|
|
16
|
-
A class to draw heatmaps in real-time video streams based on object tracks.
|
|
15
|
+
"""A class to draw heatmaps in real-time video streams based on object tracks.
|
|
17
16
|
|
|
18
17
|
This class extends the ObjectCounter class to generate and visualize heatmaps of object movements in video
|
|
19
18
|
streams. It uses tracked object positions to create a cumulative heatmap effect over time.
|
|
@@ -36,8 +35,7 @@ class Heatmap(ObjectCounter):
|
|
|
36
35
|
"""
|
|
37
36
|
|
|
38
37
|
def __init__(self, **kwargs: Any) -> None:
|
|
39
|
-
"""
|
|
40
|
-
Initialize the Heatmap class for real-time video stream heatmap generation based on object tracks.
|
|
38
|
+
"""Initialize the Heatmap class for real-time video stream heatmap generation based on object tracks.
|
|
41
39
|
|
|
42
40
|
Args:
|
|
43
41
|
**kwargs (Any): Keyword arguments passed to the parent ObjectCounter class.
|
|
@@ -53,8 +51,7 @@ class Heatmap(ObjectCounter):
|
|
|
53
51
|
self.heatmap = None
|
|
54
52
|
|
|
55
53
|
def heatmap_effect(self, box: list[float]) -> None:
|
|
56
|
-
"""
|
|
57
|
-
Efficiently calculate heatmap area and effect location for applying colormap.
|
|
54
|
+
"""Efficiently calculate heatmap area and effect location for applying colormap.
|
|
58
55
|
|
|
59
56
|
Args:
|
|
60
57
|
box (list[float]): Bounding box coordinates [x0, y0, x1, y1].
|
|
@@ -75,18 +72,15 @@ class Heatmap(ObjectCounter):
|
|
|
75
72
|
self.heatmap[y0:y1, x0:x1][within_radius] += 2
|
|
76
73
|
|
|
77
74
|
def process(self, im0: np.ndarray) -> SolutionResults:
|
|
78
|
-
"""
|
|
79
|
-
Generate heatmap for each frame using Ultralytics tracking.
|
|
75
|
+
"""Generate heatmap for each frame using Ultralytics tracking.
|
|
80
76
|
|
|
81
77
|
Args:
|
|
82
78
|
im0 (np.ndarray): Input image array for processing.
|
|
83
79
|
|
|
84
80
|
Returns:
|
|
85
|
-
(SolutionResults): Contains processed image `plot_im`,
|
|
86
|
-
'
|
|
87
|
-
'
|
|
88
|
-
'classwise_count' (dict, per-class object count), and
|
|
89
|
-
'total_tracks' (int, total number of tracked objects).
|
|
81
|
+
(SolutionResults): Contains processed image `plot_im`, 'in_count' (int, count of objects entering the
|
|
82
|
+
region), 'out_count' (int, count of objects exiting the region), 'classwise_count' (dict, per-class
|
|
83
|
+
object count), and 'total_tracks' (int, total number of tracked objects).
|
|
90
84
|
"""
|
|
91
85
|
if not self.initialized:
|
|
92
86
|
self.heatmap = np.zeros_like(im0, dtype=np.float32) * 0.99
|
|
@@ -7,8 +7,7 @@ from ultralytics.solutions.solutions import BaseSolution, SolutionResults
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class InstanceSegmentation(BaseSolution):
|
|
10
|
-
"""
|
|
11
|
-
A class to manage instance segmentation in images or video streams.
|
|
10
|
+
"""A class to manage instance segmentation in images or video streams.
|
|
12
11
|
|
|
13
12
|
This class extends the BaseSolution class and provides functionality for performing instance segmentation, including
|
|
14
13
|
drawing segmented masks with bounding boxes and labels.
|
|
@@ -36,12 +35,11 @@ class InstanceSegmentation(BaseSolution):
|
|
|
36
35
|
"""
|
|
37
36
|
|
|
38
37
|
def __init__(self, **kwargs: Any) -> None:
|
|
39
|
-
"""
|
|
40
|
-
Initialize the InstanceSegmentation class for detecting and annotating segmented instances.
|
|
38
|
+
"""Initialize the InstanceSegmentation class for detecting and annotating segmented instances.
|
|
41
39
|
|
|
42
40
|
Args:
|
|
43
|
-
**kwargs (Any): Keyword arguments passed to the BaseSolution parent class
|
|
44
|
-
model (str): Model name or path, defaults to "yolo11n-seg.pt".
|
|
41
|
+
**kwargs (Any): Keyword arguments passed to the BaseSolution parent class including:
|
|
42
|
+
- model (str): Model name or path, defaults to "yolo11n-seg.pt".
|
|
45
43
|
"""
|
|
46
44
|
kwargs["model"] = kwargs.get("model", "yolo11n-seg.pt")
|
|
47
45
|
super().__init__(**kwargs)
|
|
@@ -51,8 +49,7 @@ class InstanceSegmentation(BaseSolution):
|
|
|
51
49
|
self.show_boxes = self.CFG.get("show_boxes", True)
|
|
52
50
|
|
|
53
51
|
def process(self, im0) -> SolutionResults:
|
|
54
|
-
"""
|
|
55
|
-
Perform instance segmentation on the input image and annotate the results.
|
|
52
|
+
"""Perform instance segmentation on the input image and annotate the results.
|
|
56
53
|
|
|
57
54
|
Args:
|
|
58
55
|
im0 (np.ndarray): The input image for segmentation.
|
|
@@ -10,8 +10,7 @@ from ultralytics.utils.plotting import colors
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class ObjectBlurrer(BaseSolution):
|
|
13
|
-
"""
|
|
14
|
-
A class to manage the blurring of detected objects in a real-time video stream.
|
|
13
|
+
"""A class to manage the blurring of detected objects in a real-time video stream.
|
|
15
14
|
|
|
16
15
|
This class extends the BaseSolution class and provides functionality for blurring objects based on detected bounding
|
|
17
16
|
boxes. The blurred areas are updated directly in the input image, allowing for privacy preservation or other effects.
|
|
@@ -34,12 +33,11 @@ class ObjectBlurrer(BaseSolution):
|
|
|
34
33
|
"""
|
|
35
34
|
|
|
36
35
|
def __init__(self, **kwargs: Any) -> None:
|
|
37
|
-
"""
|
|
38
|
-
Initialize the ObjectBlurrer class for applying a blur effect to objects detected in video streams or images.
|
|
36
|
+
"""Initialize the ObjectBlurrer class for applying a blur effect to objects detected in video streams or images.
|
|
39
37
|
|
|
40
38
|
Args:
|
|
41
|
-
**kwargs (Any): Keyword arguments passed to the parent class and for configuration
|
|
42
|
-
blur_ratio (float): Intensity of the blur effect (0.1-1.0, default=0.5).
|
|
39
|
+
**kwargs (Any): Keyword arguments passed to the parent class and for configuration including:
|
|
40
|
+
- blur_ratio (float): Intensity of the blur effect (0.1-1.0, default=0.5).
|
|
43
41
|
"""
|
|
44
42
|
super().__init__(**kwargs)
|
|
45
43
|
blur_ratio = self.CFG["blur_ratio"]
|
|
@@ -49,11 +47,10 @@ class ObjectBlurrer(BaseSolution):
|
|
|
49
47
|
self.blur_ratio = int(blur_ratio * 100)
|
|
50
48
|
|
|
51
49
|
def process(self, im0) -> SolutionResults:
|
|
52
|
-
"""
|
|
53
|
-
Apply a blurring effect to detected objects in the input image.
|
|
50
|
+
"""Apply a blurring effect to detected objects in the input image.
|
|
54
51
|
|
|
55
|
-
This method extracts tracking information, applies blur to regions corresponding to detected objects,
|
|
56
|
-
|
|
52
|
+
This method extracts tracking information, applies blur to regions corresponding to detected objects, and
|
|
53
|
+
annotates the image with bounding boxes.
|
|
57
54
|
|
|
58
55
|
Args:
|
|
59
56
|
im0 (np.ndarray): The input image containing detected objects.
|