dgenerate-ultralytics-headless 8.3.222__py3-none-any.whl → 8.3.225__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/METADATA +2 -2
- dgenerate_ultralytics_headless-8.3.225.dist-info/RECORD +286 -0
- tests/conftest.py +5 -8
- tests/test_cli.py +1 -8
- tests/test_python.py +1 -2
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +34 -49
- ultralytics/cfg/datasets/ImageNet.yaml +1 -1
- ultralytics/cfg/datasets/kitti.yaml +27 -0
- ultralytics/cfg/datasets/lvis.yaml +5 -5
- ultralytics/cfg/datasets/open-images-v7.yaml +1 -1
- ultralytics/data/annotator.py +3 -4
- ultralytics/data/augment.py +244 -323
- ultralytics/data/base.py +12 -22
- ultralytics/data/build.py +47 -40
- ultralytics/data/converter.py +32 -42
- ultralytics/data/dataset.py +43 -71
- ultralytics/data/loaders.py +22 -34
- ultralytics/data/split.py +5 -6
- ultralytics/data/split_dota.py +8 -15
- ultralytics/data/utils.py +27 -36
- ultralytics/engine/exporter.py +49 -116
- ultralytics/engine/model.py +144 -180
- ultralytics/engine/predictor.py +18 -29
- ultralytics/engine/results.py +165 -231
- ultralytics/engine/trainer.py +11 -19
- ultralytics/engine/tuner.py +13 -23
- ultralytics/engine/validator.py +6 -10
- ultralytics/hub/__init__.py +7 -12
- ultralytics/hub/auth.py +6 -12
- ultralytics/hub/google/__init__.py +7 -10
- ultralytics/hub/session.py +15 -25
- ultralytics/hub/utils.py +3 -6
- ultralytics/models/fastsam/model.py +6 -8
- ultralytics/models/fastsam/predict.py +5 -10
- ultralytics/models/fastsam/utils.py +1 -2
- ultralytics/models/fastsam/val.py +2 -4
- ultralytics/models/nas/model.py +5 -8
- ultralytics/models/nas/predict.py +7 -9
- ultralytics/models/nas/val.py +1 -2
- ultralytics/models/rtdetr/model.py +5 -8
- ultralytics/models/rtdetr/predict.py +15 -18
- ultralytics/models/rtdetr/train.py +10 -13
- ultralytics/models/rtdetr/val.py +13 -20
- ultralytics/models/sam/amg.py +12 -18
- ultralytics/models/sam/build.py +6 -9
- ultralytics/models/sam/model.py +16 -23
- ultralytics/models/sam/modules/blocks.py +62 -84
- ultralytics/models/sam/modules/decoders.py +17 -24
- ultralytics/models/sam/modules/encoders.py +40 -56
- ultralytics/models/sam/modules/memory_attention.py +10 -16
- ultralytics/models/sam/modules/sam.py +41 -47
- ultralytics/models/sam/modules/tiny_encoder.py +64 -83
- ultralytics/models/sam/modules/transformer.py +17 -27
- ultralytics/models/sam/modules/utils.py +31 -42
- ultralytics/models/sam/predict.py +172 -209
- ultralytics/models/utils/loss.py +14 -26
- ultralytics/models/utils/ops.py +13 -17
- ultralytics/models/yolo/classify/predict.py +8 -11
- ultralytics/models/yolo/classify/train.py +8 -16
- ultralytics/models/yolo/classify/val.py +13 -20
- ultralytics/models/yolo/detect/predict.py +4 -8
- ultralytics/models/yolo/detect/train.py +11 -20
- ultralytics/models/yolo/detect/val.py +38 -48
- ultralytics/models/yolo/model.py +35 -47
- ultralytics/models/yolo/obb/predict.py +5 -8
- ultralytics/models/yolo/obb/train.py +11 -14
- ultralytics/models/yolo/obb/val.py +20 -28
- ultralytics/models/yolo/pose/predict.py +5 -8
- ultralytics/models/yolo/pose/train.py +4 -8
- ultralytics/models/yolo/pose/val.py +31 -39
- ultralytics/models/yolo/segment/predict.py +9 -14
- ultralytics/models/yolo/segment/train.py +3 -6
- ultralytics/models/yolo/segment/val.py +16 -26
- ultralytics/models/yolo/world/train.py +8 -14
- ultralytics/models/yolo/world/train_world.py +11 -16
- ultralytics/models/yolo/yoloe/predict.py +16 -23
- ultralytics/models/yolo/yoloe/train.py +30 -43
- ultralytics/models/yolo/yoloe/train_seg.py +5 -10
- ultralytics/models/yolo/yoloe/val.py +15 -20
- ultralytics/nn/autobackend.py +10 -18
- ultralytics/nn/modules/activation.py +4 -6
- ultralytics/nn/modules/block.py +99 -185
- ultralytics/nn/modules/conv.py +45 -90
- ultralytics/nn/modules/head.py +44 -98
- ultralytics/nn/modules/transformer.py +44 -76
- ultralytics/nn/modules/utils.py +14 -19
- ultralytics/nn/tasks.py +86 -146
- ultralytics/nn/text_model.py +25 -40
- ultralytics/solutions/ai_gym.py +10 -16
- ultralytics/solutions/analytics.py +7 -10
- ultralytics/solutions/config.py +4 -5
- ultralytics/solutions/distance_calculation.py +9 -12
- ultralytics/solutions/heatmap.py +7 -13
- ultralytics/solutions/instance_segmentation.py +5 -8
- ultralytics/solutions/object_blurrer.py +7 -10
- ultralytics/solutions/object_counter.py +8 -12
- ultralytics/solutions/object_cropper.py +5 -8
- ultralytics/solutions/parking_management.py +12 -14
- ultralytics/solutions/queue_management.py +4 -6
- ultralytics/solutions/region_counter.py +7 -10
- ultralytics/solutions/security_alarm.py +14 -19
- ultralytics/solutions/similarity_search.py +7 -12
- ultralytics/solutions/solutions.py +31 -53
- ultralytics/solutions/speed_estimation.py +6 -9
- ultralytics/solutions/streamlit_inference.py +2 -4
- ultralytics/solutions/trackzone.py +7 -10
- ultralytics/solutions/vision_eye.py +5 -8
- ultralytics/trackers/basetrack.py +2 -4
- ultralytics/trackers/bot_sort.py +6 -11
- ultralytics/trackers/byte_tracker.py +10 -15
- ultralytics/trackers/track.py +3 -6
- ultralytics/trackers/utils/gmc.py +6 -12
- ultralytics/trackers/utils/kalman_filter.py +35 -43
- ultralytics/trackers/utils/matching.py +6 -10
- ultralytics/utils/__init__.py +61 -100
- ultralytics/utils/autobatch.py +2 -4
- ultralytics/utils/autodevice.py +11 -13
- ultralytics/utils/benchmarks.py +25 -35
- ultralytics/utils/callbacks/base.py +8 -10
- ultralytics/utils/callbacks/clearml.py +2 -4
- ultralytics/utils/callbacks/comet.py +30 -44
- ultralytics/utils/callbacks/dvc.py +13 -18
- ultralytics/utils/callbacks/mlflow.py +4 -5
- ultralytics/utils/callbacks/neptune.py +4 -6
- ultralytics/utils/callbacks/raytune.py +3 -4
- ultralytics/utils/callbacks/tensorboard.py +4 -6
- ultralytics/utils/callbacks/wb.py +10 -13
- ultralytics/utils/checks.py +29 -56
- ultralytics/utils/cpu.py +1 -2
- ultralytics/utils/dist.py +8 -12
- ultralytics/utils/downloads.py +17 -27
- ultralytics/utils/errors.py +6 -8
- ultralytics/utils/events.py +2 -4
- ultralytics/utils/export/__init__.py +4 -239
- ultralytics/utils/export/engine.py +237 -0
- ultralytics/utils/export/imx.py +11 -17
- ultralytics/utils/export/tensorflow.py +217 -0
- ultralytics/utils/files.py +10 -15
- ultralytics/utils/git.py +5 -7
- ultralytics/utils/instance.py +30 -51
- ultralytics/utils/logger.py +11 -15
- ultralytics/utils/loss.py +8 -14
- ultralytics/utils/metrics.py +98 -138
- ultralytics/utils/nms.py +13 -16
- ultralytics/utils/ops.py +47 -74
- ultralytics/utils/patches.py +11 -18
- ultralytics/utils/plotting.py +29 -42
- ultralytics/utils/tal.py +25 -39
- ultralytics/utils/torch_utils.py +45 -73
- ultralytics/utils/tqdm.py +6 -8
- ultralytics/utils/triton.py +9 -12
- ultralytics/utils/tuner.py +1 -2
- dgenerate_ultralytics_headless-8.3.222.dist-info/RECORD +0 -283
- {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.222.dist-info → dgenerate_ultralytics_headless-8.3.225.dist-info}/top_level.txt +0 -0
ultralytics/nn/text_model.py
CHANGED
|
@@ -20,8 +20,7 @@ except ImportError:
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class TextModel(nn.Module):
|
|
23
|
-
"""
|
|
24
|
-
Abstract base class for text encoding models.
|
|
23
|
+
"""Abstract base class for text encoding models.
|
|
25
24
|
|
|
26
25
|
This class defines the interface for text encoding models used in vision-language tasks. Subclasses must implement
|
|
27
26
|
the tokenize and encode_text methods to provide text tokenization and encoding functionality.
|
|
@@ -47,11 +46,10 @@ class TextModel(nn.Module):
|
|
|
47
46
|
|
|
48
47
|
|
|
49
48
|
class CLIP(TextModel):
|
|
50
|
-
"""
|
|
51
|
-
Implements OpenAI's CLIP (Contrastive Language-Image Pre-training) text encoder.
|
|
49
|
+
"""Implements OpenAI's CLIP (Contrastive Language-Image Pre-training) text encoder.
|
|
52
50
|
|
|
53
|
-
This class provides a text encoder based on OpenAI's CLIP model, which can convert text into feature vectors
|
|
54
|
-
|
|
51
|
+
This class provides a text encoder based on OpenAI's CLIP model, which can convert text into feature vectors that
|
|
52
|
+
are aligned with corresponding image features in a shared embedding space.
|
|
55
53
|
|
|
56
54
|
Attributes:
|
|
57
55
|
model (clip.model.CLIP): The loaded CLIP model.
|
|
@@ -71,11 +69,10 @@ class CLIP(TextModel):
|
|
|
71
69
|
"""
|
|
72
70
|
|
|
73
71
|
def __init__(self, size: str, device: torch.device) -> None:
|
|
74
|
-
"""
|
|
75
|
-
Initialize the CLIP text encoder.
|
|
72
|
+
"""Initialize the CLIP text encoder.
|
|
76
73
|
|
|
77
|
-
This class implements the TextModel interface using OpenAI's CLIP model for text encoding. It loads
|
|
78
|
-
|
|
74
|
+
This class implements the TextModel interface using OpenAI's CLIP model for text encoding. It loads a
|
|
75
|
+
pre-trained CLIP model of the specified size and prepares it for text encoding tasks.
|
|
79
76
|
|
|
80
77
|
Args:
|
|
81
78
|
size (str): Model size identifier (e.g., 'ViT-B/32').
|
|
@@ -93,8 +90,7 @@ class CLIP(TextModel):
|
|
|
93
90
|
self.eval()
|
|
94
91
|
|
|
95
92
|
def tokenize(self, texts: str | list[str]) -> torch.Tensor:
|
|
96
|
-
"""
|
|
97
|
-
Convert input texts to CLIP tokens.
|
|
93
|
+
"""Convert input texts to CLIP tokens.
|
|
98
94
|
|
|
99
95
|
Args:
|
|
100
96
|
texts (str | list[str]): Input text or list of texts to tokenize.
|
|
@@ -111,8 +107,7 @@ class CLIP(TextModel):
|
|
|
111
107
|
|
|
112
108
|
@smart_inference_mode()
|
|
113
109
|
def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
|
|
114
|
-
"""
|
|
115
|
-
Encode tokenized texts into normalized feature vectors.
|
|
110
|
+
"""Encode tokenized texts into normalized feature vectors.
|
|
116
111
|
|
|
117
112
|
This method processes tokenized text inputs through the CLIP model to generate feature vectors, which are then
|
|
118
113
|
normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
|
|
@@ -137,15 +132,14 @@ class CLIP(TextModel):
|
|
|
137
132
|
|
|
138
133
|
@smart_inference_mode()
|
|
139
134
|
def encode_image(self, image: Image.Image | torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
|
|
140
|
-
"""
|
|
141
|
-
Encode preprocessed images into normalized feature vectors.
|
|
135
|
+
"""Encode preprocessed images into normalized feature vectors.
|
|
142
136
|
|
|
143
|
-
This method processes preprocessed image inputs through the CLIP model to generate feature vectors, which are
|
|
144
|
-
normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
|
|
137
|
+
This method processes preprocessed image inputs through the CLIP model to generate feature vectors, which are
|
|
138
|
+
then normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
|
|
145
139
|
|
|
146
140
|
Args:
|
|
147
|
-
image (PIL.Image | torch.Tensor): Preprocessed image input. If a PIL Image is provided, it will be
|
|
148
|
-
|
|
141
|
+
image (PIL.Image | torch.Tensor): Preprocessed image input. If a PIL Image is provided, it will be converted
|
|
142
|
+
to a tensor using the model's image preprocessing function.
|
|
149
143
|
dtype (torch.dtype, optional): Data type for output features.
|
|
150
144
|
|
|
151
145
|
Returns:
|
|
@@ -169,8 +163,7 @@ class CLIP(TextModel):
|
|
|
169
163
|
|
|
170
164
|
|
|
171
165
|
class MobileCLIP(TextModel):
|
|
172
|
-
"""
|
|
173
|
-
Implement Apple's MobileCLIP text encoder for efficient text encoding.
|
|
166
|
+
"""Implement Apple's MobileCLIP text encoder for efficient text encoding.
|
|
174
167
|
|
|
175
168
|
This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
|
|
176
169
|
capabilities for vision-language tasks with reduced computational requirements compared to standard CLIP models.
|
|
@@ -195,8 +188,7 @@ class MobileCLIP(TextModel):
|
|
|
195
188
|
config_size_map = {"s0": "s0", "s1": "s1", "s2": "s2", "b": "b", "blt": "b"}
|
|
196
189
|
|
|
197
190
|
def __init__(self, size: str, device: torch.device) -> None:
|
|
198
|
-
"""
|
|
199
|
-
Initialize the MobileCLIP text encoder.
|
|
191
|
+
"""Initialize the MobileCLIP text encoder.
|
|
200
192
|
|
|
201
193
|
This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
|
|
202
194
|
|
|
@@ -236,8 +228,7 @@ class MobileCLIP(TextModel):
|
|
|
236
228
|
self.eval()
|
|
237
229
|
|
|
238
230
|
def tokenize(self, texts: list[str]) -> torch.Tensor:
|
|
239
|
-
"""
|
|
240
|
-
Convert input texts to MobileCLIP tokens.
|
|
231
|
+
"""Convert input texts to MobileCLIP tokens.
|
|
241
232
|
|
|
242
233
|
Args:
|
|
243
234
|
texts (list[str]): List of text strings to tokenize.
|
|
@@ -253,8 +244,7 @@ class MobileCLIP(TextModel):
|
|
|
253
244
|
|
|
254
245
|
@smart_inference_mode()
|
|
255
246
|
def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
|
|
256
|
-
"""
|
|
257
|
-
Encode tokenized texts into normalized feature vectors.
|
|
247
|
+
"""Encode tokenized texts into normalized feature vectors.
|
|
258
248
|
|
|
259
249
|
Args:
|
|
260
250
|
texts (torch.Tensor): Tokenized text inputs.
|
|
@@ -276,8 +266,7 @@ class MobileCLIP(TextModel):
|
|
|
276
266
|
|
|
277
267
|
|
|
278
268
|
class MobileCLIPTS(TextModel):
|
|
279
|
-
"""
|
|
280
|
-
Load a TorchScript traced version of MobileCLIP.
|
|
269
|
+
"""Load a TorchScript traced version of MobileCLIP.
|
|
281
270
|
|
|
282
271
|
This class implements the TextModel interface using Apple's MobileCLIP model in TorchScript format, providing
|
|
283
272
|
efficient text encoding capabilities for vision-language tasks with optimized inference performance.
|
|
@@ -299,11 +288,10 @@ class MobileCLIPTS(TextModel):
|
|
|
299
288
|
"""
|
|
300
289
|
|
|
301
290
|
def __init__(self, device: torch.device):
|
|
302
|
-
"""
|
|
303
|
-
Initialize the MobileCLIP TorchScript text encoder.
|
|
291
|
+
"""Initialize the MobileCLIP TorchScript text encoder.
|
|
304
292
|
|
|
305
|
-
This class implements the TextModel interface using Apple's MobileCLIP model in TorchScript format for
|
|
306
|
-
|
|
293
|
+
This class implements the TextModel interface using Apple's MobileCLIP model in TorchScript format for efficient
|
|
294
|
+
text encoding with optimized inference performance.
|
|
307
295
|
|
|
308
296
|
Args:
|
|
309
297
|
device (torch.device): Device to load the model on.
|
|
@@ -321,8 +309,7 @@ class MobileCLIPTS(TextModel):
|
|
|
321
309
|
self.device = device
|
|
322
310
|
|
|
323
311
|
def tokenize(self, texts: list[str]) -> torch.Tensor:
|
|
324
|
-
"""
|
|
325
|
-
Convert input texts to MobileCLIP tokens.
|
|
312
|
+
"""Convert input texts to MobileCLIP tokens.
|
|
326
313
|
|
|
327
314
|
Args:
|
|
328
315
|
texts (list[str]): List of text strings to tokenize.
|
|
@@ -338,8 +325,7 @@ class MobileCLIPTS(TextModel):
|
|
|
338
325
|
|
|
339
326
|
@smart_inference_mode()
|
|
340
327
|
def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
|
|
341
|
-
"""
|
|
342
|
-
Encode tokenized texts into normalized feature vectors.
|
|
328
|
+
"""Encode tokenized texts into normalized feature vectors.
|
|
343
329
|
|
|
344
330
|
Args:
|
|
345
331
|
texts (torch.Tensor): Tokenized text inputs.
|
|
@@ -360,8 +346,7 @@ class MobileCLIPTS(TextModel):
|
|
|
360
346
|
|
|
361
347
|
|
|
362
348
|
def build_text_model(variant: str, device: torch.device = None) -> TextModel:
|
|
363
|
-
"""
|
|
364
|
-
Build a text encoding model based on the specified variant.
|
|
349
|
+
"""Build a text encoding model based on the specified variant.
|
|
365
350
|
|
|
366
351
|
Args:
|
|
367
352
|
variant (str): Model variant in format "base:size" (e.g., "clip:ViT-B/32" or "mobileclip:s0").
|
ultralytics/solutions/ai_gym.py
CHANGED
|
@@ -7,8 +7,7 @@ from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, Sol
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class AIGym(BaseSolution):
|
|
10
|
-
"""
|
|
11
|
-
A class to manage gym steps of people in a real-time video stream based on their poses.
|
|
10
|
+
"""A class to manage gym steps of people in a real-time video stream based on their poses.
|
|
12
11
|
|
|
13
12
|
This class extends BaseSolution to monitor workouts using YOLO pose estimation models. It tracks and counts
|
|
14
13
|
repetitions of exercises based on predefined angle thresholds for up and down positions.
|
|
@@ -32,12 +31,11 @@ class AIGym(BaseSolution):
|
|
|
32
31
|
"""
|
|
33
32
|
|
|
34
33
|
def __init__(self, **kwargs: Any) -> None:
|
|
35
|
-
"""
|
|
36
|
-
Initialize AIGym for workout monitoring using pose estimation and predefined angles.
|
|
34
|
+
"""Initialize AIGym for workout monitoring using pose estimation and predefined angles.
|
|
37
35
|
|
|
38
36
|
Args:
|
|
39
|
-
**kwargs (Any): Keyword arguments passed to the parent class constructor
|
|
40
|
-
model (str): Model name or path, defaults to "yolo11n-pose.pt".
|
|
37
|
+
**kwargs (Any): Keyword arguments passed to the parent class constructor including:
|
|
38
|
+
- model (str): Model name or path, defaults to "yolo11n-pose.pt".
|
|
41
39
|
"""
|
|
42
40
|
kwargs["model"] = kwargs.get("model", "yolo11n-pose.pt")
|
|
43
41
|
super().__init__(**kwargs)
|
|
@@ -49,22 +47,18 @@ class AIGym(BaseSolution):
|
|
|
49
47
|
self.kpts = self.CFG["kpts"] # User selected kpts of workouts storage for further usage
|
|
50
48
|
|
|
51
49
|
def process(self, im0) -> SolutionResults:
|
|
52
|
-
"""
|
|
53
|
-
Monitor workouts using Ultralytics YOLO Pose Model.
|
|
50
|
+
"""Monitor workouts using Ultralytics YOLO Pose Model.
|
|
54
51
|
|
|
55
|
-
This function processes an input image to track and analyze human poses for workout monitoring. It uses
|
|
56
|
-
|
|
57
|
-
angle thresholds.
|
|
52
|
+
This function processes an input image to track and analyze human poses for workout monitoring. It uses the YOLO
|
|
53
|
+
Pose model to detect keypoints, estimate angles, and count repetitions based on predefined angle thresholds.
|
|
58
54
|
|
|
59
55
|
Args:
|
|
60
56
|
im0 (np.ndarray): Input image for processing.
|
|
61
57
|
|
|
62
58
|
Returns:
|
|
63
|
-
(SolutionResults): Contains processed image `plot_im`,
|
|
64
|
-
'
|
|
65
|
-
|
|
66
|
-
'workout_angle' (list of angles), and
|
|
67
|
-
'total_tracks' (total number of tracked individuals).
|
|
59
|
+
(SolutionResults): Contains processed image `plot_im`, 'workout_count' (list of completed reps),
|
|
60
|
+
'workout_stage' (list of current stages), 'workout_angle' (list of angles), and 'total_tracks' (total
|
|
61
|
+
number of tracked individuals).
|
|
68
62
|
|
|
69
63
|
Examples:
|
|
70
64
|
>>> gym = AIGym()
|
|
@@ -12,11 +12,10 @@ from ultralytics.solutions.solutions import BaseSolution, SolutionResults # Imp
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class Analytics(BaseSolution):
|
|
15
|
-
"""
|
|
16
|
-
A class for creating and updating various types of charts for visual analytics.
|
|
15
|
+
"""A class for creating and updating various types of charts for visual analytics.
|
|
17
16
|
|
|
18
|
-
This class extends BaseSolution to provide functionality for generating line, bar, pie, and area charts
|
|
19
|
-
|
|
17
|
+
This class extends BaseSolution to provide functionality for generating line, bar, pie, and area charts based on
|
|
18
|
+
object detection and tracking data.
|
|
20
19
|
|
|
21
20
|
Attributes:
|
|
22
21
|
type (str): The type of analytics chart to generate ('line', 'bar', 'pie', or 'area').
|
|
@@ -92,8 +91,7 @@ class Analytics(BaseSolution):
|
|
|
92
91
|
self.ax.axis("equal")
|
|
93
92
|
|
|
94
93
|
def process(self, im0: np.ndarray, frame_number: int) -> SolutionResults:
|
|
95
|
-
"""
|
|
96
|
-
Process image data and run object tracking to update analytics charts.
|
|
94
|
+
"""Process image data and run object tracking to update analytics charts.
|
|
97
95
|
|
|
98
96
|
Args:
|
|
99
97
|
im0 (np.ndarray): Input image for processing.
|
|
@@ -139,13 +137,12 @@ class Analytics(BaseSolution):
|
|
|
139
137
|
def update_graph(
|
|
140
138
|
self, frame_number: int, count_dict: dict[str, int] | None = None, plot: str = "line"
|
|
141
139
|
) -> np.ndarray:
|
|
142
|
-
"""
|
|
143
|
-
Update the graph with new data for single or multiple classes.
|
|
140
|
+
"""Update the graph with new data for single or multiple classes.
|
|
144
141
|
|
|
145
142
|
Args:
|
|
146
143
|
frame_number (int): The current frame number.
|
|
147
|
-
count_dict (dict[str, int], optional): Dictionary with class names as keys and counts as values for
|
|
148
|
-
|
|
144
|
+
count_dict (dict[str, int], optional): Dictionary with class names as keys and counts as values for multiple
|
|
145
|
+
classes. If None, updates a single line graph.
|
|
149
146
|
plot (str): Type of the plot. Options are 'line', 'bar', 'pie', or 'area'.
|
|
150
147
|
|
|
151
148
|
Returns:
|
ultralytics/solutions/config.py
CHANGED
|
@@ -10,12 +10,11 @@ import cv2
|
|
|
10
10
|
|
|
11
11
|
@dataclass
|
|
12
12
|
class SolutionConfig:
|
|
13
|
-
"""
|
|
14
|
-
Manages configuration parameters for Ultralytics Vision AI solutions.
|
|
13
|
+
"""Manages configuration parameters for Ultralytics Vision AI solutions.
|
|
15
14
|
|
|
16
|
-
The SolutionConfig class serves as a centralized configuration container for all the
|
|
17
|
-
|
|
18
|
-
|
|
15
|
+
The SolutionConfig class serves as a centralized configuration container for all the Ultralytics solution modules:
|
|
16
|
+
https://docs.ultralytics.com/solutions/#solutions. It leverages Python `dataclass` for clear, type-safe, and
|
|
17
|
+
maintainable parameter definitions.
|
|
19
18
|
|
|
20
19
|
Attributes:
|
|
21
20
|
source (str, optional): Path to the input source (video, RTSP, etc.). Only usable with Solutions CLI.
|
|
@@ -10,11 +10,10 @@ from ultralytics.utils.plotting import colors
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class DistanceCalculation(BaseSolution):
|
|
13
|
-
"""
|
|
14
|
-
A class to calculate distance between two objects in a real-time video stream based on their tracks.
|
|
13
|
+
"""A class to calculate distance between two objects in a real-time video stream based on their tracks.
|
|
15
14
|
|
|
16
|
-
This class extends BaseSolution to provide functionality for selecting objects and calculating the distance
|
|
17
|
-
|
|
15
|
+
This class extends BaseSolution to provide functionality for selecting objects and calculating the distance between
|
|
16
|
+
them in a video stream using YOLO object detection and tracking.
|
|
18
17
|
|
|
19
18
|
Attributes:
|
|
20
19
|
left_mouse_count (int): Counter for left mouse button clicks.
|
|
@@ -43,8 +42,7 @@ class DistanceCalculation(BaseSolution):
|
|
|
43
42
|
self.centroids: list[list[int]] = [] # Store centroids of selected objects
|
|
44
43
|
|
|
45
44
|
def mouse_event_for_distance(self, event: int, x: int, y: int, flags: int, param: Any) -> None:
|
|
46
|
-
"""
|
|
47
|
-
Handle mouse events to select regions in a real-time video stream for distance calculation.
|
|
45
|
+
"""Handle mouse events to select regions in a real-time video stream for distance calculation.
|
|
48
46
|
|
|
49
47
|
Args:
|
|
50
48
|
event (int): Type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN).
|
|
@@ -69,18 +67,17 @@ class DistanceCalculation(BaseSolution):
|
|
|
69
67
|
self.left_mouse_count = 0
|
|
70
68
|
|
|
71
69
|
def process(self, im0) -> SolutionResults:
|
|
72
|
-
"""
|
|
73
|
-
Process a video frame and calculate the distance between two selected bounding boxes.
|
|
70
|
+
"""Process a video frame and calculate the distance between two selected bounding boxes.
|
|
74
71
|
|
|
75
|
-
This method extracts tracks from the input frame, annotates bounding boxes, and calculates the distance
|
|
76
|
-
|
|
72
|
+
This method extracts tracks from the input frame, annotates bounding boxes, and calculates the distance between
|
|
73
|
+
two user-selected objects if they have been chosen.
|
|
77
74
|
|
|
78
75
|
Args:
|
|
79
76
|
im0 (np.ndarray): The input image frame to process.
|
|
80
77
|
|
|
81
78
|
Returns:
|
|
82
|
-
(SolutionResults): Contains processed image `plot_im`, `total_tracks` (int) representing the total number
|
|
83
|
-
|
|
79
|
+
(SolutionResults): Contains processed image `plot_im`, `total_tracks` (int) representing the total number of
|
|
80
|
+
tracked objects, and `pixels_distance` (float) representing the distance between selected objects
|
|
84
81
|
in pixels.
|
|
85
82
|
|
|
86
83
|
Examples:
|
ultralytics/solutions/heatmap.py
CHANGED
|
@@ -12,8 +12,7 @@ from ultralytics.solutions.solutions import SolutionAnnotator, SolutionResults
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class Heatmap(ObjectCounter):
|
|
15
|
-
"""
|
|
16
|
-
A class to draw heatmaps in real-time video streams based on object tracks.
|
|
15
|
+
"""A class to draw heatmaps in real-time video streams based on object tracks.
|
|
17
16
|
|
|
18
17
|
This class extends the ObjectCounter class to generate and visualize heatmaps of object movements in video
|
|
19
18
|
streams. It uses tracked object positions to create a cumulative heatmap effect over time.
|
|
@@ -36,8 +35,7 @@ class Heatmap(ObjectCounter):
|
|
|
36
35
|
"""
|
|
37
36
|
|
|
38
37
|
def __init__(self, **kwargs: Any) -> None:
|
|
39
|
-
"""
|
|
40
|
-
Initialize the Heatmap class for real-time video stream heatmap generation based on object tracks.
|
|
38
|
+
"""Initialize the Heatmap class for real-time video stream heatmap generation based on object tracks.
|
|
41
39
|
|
|
42
40
|
Args:
|
|
43
41
|
**kwargs (Any): Keyword arguments passed to the parent ObjectCounter class.
|
|
@@ -53,8 +51,7 @@ class Heatmap(ObjectCounter):
|
|
|
53
51
|
self.heatmap = None
|
|
54
52
|
|
|
55
53
|
def heatmap_effect(self, box: list[float]) -> None:
|
|
56
|
-
"""
|
|
57
|
-
Efficiently calculate heatmap area and effect location for applying colormap.
|
|
54
|
+
"""Efficiently calculate heatmap area and effect location for applying colormap.
|
|
58
55
|
|
|
59
56
|
Args:
|
|
60
57
|
box (list[float]): Bounding box coordinates [x0, y0, x1, y1].
|
|
@@ -75,18 +72,15 @@ class Heatmap(ObjectCounter):
|
|
|
75
72
|
self.heatmap[y0:y1, x0:x1][within_radius] += 2
|
|
76
73
|
|
|
77
74
|
def process(self, im0: np.ndarray) -> SolutionResults:
|
|
78
|
-
"""
|
|
79
|
-
Generate heatmap for each frame using Ultralytics tracking.
|
|
75
|
+
"""Generate heatmap for each frame using Ultralytics tracking.
|
|
80
76
|
|
|
81
77
|
Args:
|
|
82
78
|
im0 (np.ndarray): Input image array for processing.
|
|
83
79
|
|
|
84
80
|
Returns:
|
|
85
|
-
(SolutionResults): Contains processed image `plot_im`,
|
|
86
|
-
'
|
|
87
|
-
'
|
|
88
|
-
'classwise_count' (dict, per-class object count), and
|
|
89
|
-
'total_tracks' (int, total number of tracked objects).
|
|
81
|
+
(SolutionResults): Contains processed image `plot_im`, 'in_count' (int, count of objects entering the
|
|
82
|
+
region), 'out_count' (int, count of objects exiting the region), 'classwise_count' (dict, per-class
|
|
83
|
+
object count), and 'total_tracks' (int, total number of tracked objects).
|
|
90
84
|
"""
|
|
91
85
|
if not self.initialized:
|
|
92
86
|
self.heatmap = np.zeros_like(im0, dtype=np.float32) * 0.99
|
|
@@ -7,8 +7,7 @@ from ultralytics.solutions.solutions import BaseSolution, SolutionResults
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class InstanceSegmentation(BaseSolution):
|
|
10
|
-
"""
|
|
11
|
-
A class to manage instance segmentation in images or video streams.
|
|
10
|
+
"""A class to manage instance segmentation in images or video streams.
|
|
12
11
|
|
|
13
12
|
This class extends the BaseSolution class and provides functionality for performing instance segmentation, including
|
|
14
13
|
drawing segmented masks with bounding boxes and labels.
|
|
@@ -36,12 +35,11 @@ class InstanceSegmentation(BaseSolution):
|
|
|
36
35
|
"""
|
|
37
36
|
|
|
38
37
|
def __init__(self, **kwargs: Any) -> None:
|
|
39
|
-
"""
|
|
40
|
-
Initialize the InstanceSegmentation class for detecting and annotating segmented instances.
|
|
38
|
+
"""Initialize the InstanceSegmentation class for detecting and annotating segmented instances.
|
|
41
39
|
|
|
42
40
|
Args:
|
|
43
|
-
**kwargs (Any): Keyword arguments passed to the BaseSolution parent class
|
|
44
|
-
model (str): Model name or path, defaults to "yolo11n-seg.pt".
|
|
41
|
+
**kwargs (Any): Keyword arguments passed to the BaseSolution parent class including:
|
|
42
|
+
- model (str): Model name or path, defaults to "yolo11n-seg.pt".
|
|
45
43
|
"""
|
|
46
44
|
kwargs["model"] = kwargs.get("model", "yolo11n-seg.pt")
|
|
47
45
|
super().__init__(**kwargs)
|
|
@@ -51,8 +49,7 @@ class InstanceSegmentation(BaseSolution):
|
|
|
51
49
|
self.show_boxes = self.CFG.get("show_boxes", True)
|
|
52
50
|
|
|
53
51
|
def process(self, im0) -> SolutionResults:
|
|
54
|
-
"""
|
|
55
|
-
Perform instance segmentation on the input image and annotate the results.
|
|
52
|
+
"""Perform instance segmentation on the input image and annotate the results.
|
|
56
53
|
|
|
57
54
|
Args:
|
|
58
55
|
im0 (np.ndarray): The input image for segmentation.
|
|
@@ -10,8 +10,7 @@ from ultralytics.utils.plotting import colors
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class ObjectBlurrer(BaseSolution):
|
|
13
|
-
"""
|
|
14
|
-
A class to manage the blurring of detected objects in a real-time video stream.
|
|
13
|
+
"""A class to manage the blurring of detected objects in a real-time video stream.
|
|
15
14
|
|
|
16
15
|
This class extends the BaseSolution class and provides functionality for blurring objects based on detected bounding
|
|
17
16
|
boxes. The blurred areas are updated directly in the input image, allowing for privacy preservation or other effects.
|
|
@@ -34,12 +33,11 @@ class ObjectBlurrer(BaseSolution):
|
|
|
34
33
|
"""
|
|
35
34
|
|
|
36
35
|
def __init__(self, **kwargs: Any) -> None:
|
|
37
|
-
"""
|
|
38
|
-
Initialize the ObjectBlurrer class for applying a blur effect to objects detected in video streams or images.
|
|
36
|
+
"""Initialize the ObjectBlurrer class for applying a blur effect to objects detected in video streams or images.
|
|
39
37
|
|
|
40
38
|
Args:
|
|
41
|
-
**kwargs (Any): Keyword arguments passed to the parent class and for configuration
|
|
42
|
-
blur_ratio (float): Intensity of the blur effect (0.1-1.0, default=0.5).
|
|
39
|
+
**kwargs (Any): Keyword arguments passed to the parent class and for configuration including:
|
|
40
|
+
- blur_ratio (float): Intensity of the blur effect (0.1-1.0, default=0.5).
|
|
43
41
|
"""
|
|
44
42
|
super().__init__(**kwargs)
|
|
45
43
|
blur_ratio = self.CFG["blur_ratio"]
|
|
@@ -49,11 +47,10 @@ class ObjectBlurrer(BaseSolution):
|
|
|
49
47
|
self.blur_ratio = int(blur_ratio * 100)
|
|
50
48
|
|
|
51
49
|
def process(self, im0) -> SolutionResults:
|
|
52
|
-
"""
|
|
53
|
-
Apply a blurring effect to detected objects in the input image.
|
|
50
|
+
"""Apply a blurring effect to detected objects in the input image.
|
|
54
51
|
|
|
55
|
-
This method extracts tracking information, applies blur to regions corresponding to detected objects,
|
|
56
|
-
|
|
52
|
+
This method extracts tracking information, applies blur to regions corresponding to detected objects, and
|
|
53
|
+
annotates the image with bounding boxes.
|
|
57
54
|
|
|
58
55
|
Args:
|
|
59
56
|
im0 (np.ndarray): The input image containing detected objects.
|
|
@@ -10,8 +10,7 @@ from ultralytics.utils.plotting import colors
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class ObjectCounter(BaseSolution):
|
|
13
|
-
"""
|
|
14
|
-
A class to manage the counting of objects in a real-time video stream based on their tracks.
|
|
13
|
+
"""A class to manage the counting of objects in a real-time video stream based on their tracks.
|
|
15
14
|
|
|
16
15
|
This class extends the BaseSolution class and provides functionality for counting objects moving in and out of a
|
|
17
16
|
specified region in a video stream. It supports both polygonal and linear regions for counting.
|
|
@@ -59,8 +58,7 @@ class ObjectCounter(BaseSolution):
|
|
|
59
58
|
prev_position: tuple[float, float] | None,
|
|
60
59
|
cls: int,
|
|
61
60
|
) -> None:
|
|
62
|
-
"""
|
|
63
|
-
Count objects within a polygonal or linear region based on their tracks.
|
|
61
|
+
"""Count objects within a polygonal or linear region based on their tracks.
|
|
64
62
|
|
|
65
63
|
Args:
|
|
66
64
|
current_centroid (tuple[float, float]): Current centroid coordinates (x, y) in the current frame.
|
|
@@ -117,8 +115,7 @@ class ObjectCounter(BaseSolution):
|
|
|
117
115
|
self.counted_ids.append(track_id)
|
|
118
116
|
|
|
119
117
|
def display_counts(self, plot_im) -> None:
|
|
120
|
-
"""
|
|
121
|
-
Display object counts on the input image or frame.
|
|
118
|
+
"""Display object counts on the input image or frame.
|
|
122
119
|
|
|
123
120
|
Args:
|
|
124
121
|
plot_im (np.ndarray): The image or frame to display counts on.
|
|
@@ -138,19 +135,18 @@ class ObjectCounter(BaseSolution):
|
|
|
138
135
|
self.annotator.display_analytics(plot_im, labels_dict, (104, 31, 17), (255, 255, 255), self.margin)
|
|
139
136
|
|
|
140
137
|
def process(self, im0) -> SolutionResults:
|
|
141
|
-
"""
|
|
142
|
-
Process input data (frames or object tracks) and update object counts.
|
|
138
|
+
"""Process input data (frames or object tracks) and update object counts.
|
|
143
139
|
|
|
144
|
-
This method initializes the counting region, extracts tracks, draws bounding boxes and regions, updates
|
|
145
|
-
|
|
140
|
+
This method initializes the counting region, extracts tracks, draws bounding boxes and regions, updates object
|
|
141
|
+
counts, and displays the results on the input image.
|
|
146
142
|
|
|
147
143
|
Args:
|
|
148
144
|
im0 (np.ndarray): The input image or frame to be processed.
|
|
149
145
|
|
|
150
146
|
Returns:
|
|
151
147
|
(SolutionResults): Contains processed image `im0`, 'in_count' (int, count of objects entering the region),
|
|
152
|
-
'out_count' (int, count of objects exiting the region), 'classwise_count' (dict, per-class object
|
|
153
|
-
and 'total_tracks' (int, total number of tracked objects).
|
|
148
|
+
'out_count' (int, count of objects exiting the region), 'classwise_count' (dict, per-class object
|
|
149
|
+
count), and 'total_tracks' (int, total number of tracked objects).
|
|
154
150
|
|
|
155
151
|
Examples:
|
|
156
152
|
>>> counter = ObjectCounter()
|
|
@@ -9,8 +9,7 @@ from ultralytics.utils.plotting import save_one_box
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class ObjectCropper(BaseSolution):
|
|
12
|
-
"""
|
|
13
|
-
A class to manage the cropping of detected objects in a real-time video stream or images.
|
|
12
|
+
"""A class to manage the cropping of detected objects in a real-time video stream or images.
|
|
14
13
|
|
|
15
14
|
This class extends the BaseSolution class and provides functionality for cropping objects based on detected bounding
|
|
16
15
|
boxes. The cropped images are saved to a specified directory for further analysis or usage.
|
|
@@ -32,12 +31,11 @@ class ObjectCropper(BaseSolution):
|
|
|
32
31
|
"""
|
|
33
32
|
|
|
34
33
|
def __init__(self, **kwargs: Any) -> None:
|
|
35
|
-
"""
|
|
36
|
-
Initialize the ObjectCropper class for cropping objects from detected bounding boxes.
|
|
34
|
+
"""Initialize the ObjectCropper class for cropping objects from detected bounding boxes.
|
|
37
35
|
|
|
38
36
|
Args:
|
|
39
|
-
**kwargs (Any): Keyword arguments passed to the parent class and used for configuration
|
|
40
|
-
crop_dir (str): Path to the directory for saving cropped object images.
|
|
37
|
+
**kwargs (Any): Keyword arguments passed to the parent class and used for configuration including:
|
|
38
|
+
- crop_dir (str): Path to the directory for saving cropped object images.
|
|
41
39
|
"""
|
|
42
40
|
super().__init__(**kwargs)
|
|
43
41
|
|
|
@@ -53,8 +51,7 @@ class ObjectCropper(BaseSolution):
|
|
|
53
51
|
self.conf = self.CFG["conf"]
|
|
54
52
|
|
|
55
53
|
def process(self, im0) -> SolutionResults:
|
|
56
|
-
"""
|
|
57
|
-
Crop detected objects from the input image and save them as separate images.
|
|
54
|
+
"""Crop detected objects from the input image and save them as separate images.
|
|
58
55
|
|
|
59
56
|
Args:
|
|
60
57
|
im0 (np.ndarray): The input image containing detected objects.
|
|
@@ -14,11 +14,10 @@ from ultralytics.utils.checks import check_imshow
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class ParkingPtsSelection:
|
|
17
|
-
"""
|
|
18
|
-
A class for selecting and managing parking zone points on images using a Tkinter-based UI.
|
|
17
|
+
"""A class for selecting and managing parking zone points on images using a Tkinter-based UI.
|
|
19
18
|
|
|
20
|
-
This class provides functionality to upload an image, select points to define parking zones, and save the
|
|
21
|
-
|
|
19
|
+
This class provides functionality to upload an image, select points to define parking zones, and save the selected
|
|
20
|
+
points to a JSON file. It uses Tkinter for the graphical user interface.
|
|
22
21
|
|
|
23
22
|
Attributes:
|
|
24
23
|
tk (module): The Tkinter module for GUI operations.
|
|
@@ -178,11 +177,10 @@ class ParkingPtsSelection:
|
|
|
178
177
|
|
|
179
178
|
|
|
180
179
|
class ParkingManagement(BaseSolution):
|
|
181
|
-
"""
|
|
182
|
-
Manages parking occupancy and availability using YOLO model for real-time monitoring and visualization.
|
|
180
|
+
"""Manages parking occupancy and availability using YOLO model for real-time monitoring and visualization.
|
|
183
181
|
|
|
184
|
-
This class extends BaseSolution to provide functionality for parking lot management, including detection of
|
|
185
|
-
|
|
182
|
+
This class extends BaseSolution to provide functionality for parking lot management, including detection of occupied
|
|
183
|
+
spaces, visualization of parking regions, and display of occupancy statistics.
|
|
186
184
|
|
|
187
185
|
Attributes:
|
|
188
186
|
json_file (str): Path to the JSON file containing parking region details.
|
|
@@ -221,19 +219,19 @@ class ParkingManagement(BaseSolution):
|
|
|
221
219
|
self.dc = (255, 0, 189) # Centroid color for each box
|
|
222
220
|
|
|
223
221
|
def process(self, im0: np.ndarray) -> SolutionResults:
|
|
224
|
-
"""
|
|
225
|
-
Process the input image for parking lot management and visualization.
|
|
222
|
+
"""Process the input image for parking lot management and visualization.
|
|
226
223
|
|
|
227
|
-
This function analyzes the input image, extracts tracks, and determines the occupancy status of parking
|
|
228
|
-
|
|
229
|
-
|
|
224
|
+
This function analyzes the input image, extracts tracks, and determines the occupancy status of parking regions
|
|
225
|
+
defined in the JSON file. It annotates the image with occupied and available parking spots, and updates the
|
|
226
|
+
parking information.
|
|
230
227
|
|
|
231
228
|
Args:
|
|
232
229
|
im0 (np.ndarray): The input inference image.
|
|
233
230
|
|
|
234
231
|
Returns:
|
|
235
232
|
(SolutionResults): Contains processed image `plot_im`, 'filled_slots' (number of occupied parking slots),
|
|
236
|
-
'available_slots' (number of available parking slots), and 'total_tracks' (total number of
|
|
233
|
+
'available_slots' (number of available parking slots), and 'total_tracks' (total number of
|
|
234
|
+
tracked objects).
|
|
237
235
|
|
|
238
236
|
Examples:
|
|
239
237
|
>>> parking_manager = ParkingManagement(json_file="parking_regions.json")
|
|
@@ -7,11 +7,10 @@ from ultralytics.utils.plotting import colors
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class QueueManager(BaseSolution):
|
|
10
|
-
"""
|
|
11
|
-
Manages queue counting in real-time video streams based on object tracks.
|
|
10
|
+
"""Manages queue counting in real-time video streams based on object tracks.
|
|
12
11
|
|
|
13
|
-
This class extends BaseSolution to provide functionality for tracking and counting objects within a specified
|
|
14
|
-
|
|
12
|
+
This class extends BaseSolution to provide functionality for tracking and counting objects within a specified region
|
|
13
|
+
in video frames.
|
|
15
14
|
|
|
16
15
|
Attributes:
|
|
17
16
|
counts (int): The current count of objects in the queue.
|
|
@@ -46,8 +45,7 @@ class QueueManager(BaseSolution):
|
|
|
46
45
|
self.region_length = len(self.region) # Store region length for further usage
|
|
47
46
|
|
|
48
47
|
def process(self, im0) -> SolutionResults:
|
|
49
|
-
"""
|
|
50
|
-
Process queue management for a single frame of video.
|
|
48
|
+
"""Process queue management for a single frame of video.
|
|
51
49
|
|
|
52
50
|
Args:
|
|
53
51
|
im0 (np.ndarray): Input image for processing, typically a frame from a video stream.
|