dgenerate-ultralytics-headless 8.3.193__py3-none-any.whl → 8.3.195__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.193.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/METADATA +1 -2
- {dgenerate_ultralytics_headless-8.3.193.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/RECORD +104 -102
- tests/test_exports.py +8 -5
- tests/test_python.py +2 -2
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +8 -8
- ultralytics/data/annotator.py +1 -1
- ultralytics/data/augment.py +75 -75
- ultralytics/data/base.py +12 -12
- ultralytics/data/converter.py +4 -4
- ultralytics/data/dataset.py +7 -7
- ultralytics/data/loaders.py +15 -15
- ultralytics/data/split_dota.py +10 -10
- ultralytics/data/utils.py +12 -12
- ultralytics/engine/exporter.py +4 -4
- ultralytics/engine/model.py +13 -13
- ultralytics/engine/predictor.py +13 -13
- ultralytics/engine/results.py +21 -21
- ultralytics/hub/__init__.py +1 -2
- ultralytics/hub/google/__init__.py +2 -2
- ultralytics/hub/session.py +7 -7
- ultralytics/hub/utils.py +0 -101
- ultralytics/models/fastsam/model.py +5 -5
- ultralytics/models/fastsam/predict.py +11 -11
- ultralytics/models/nas/model.py +1 -1
- ultralytics/models/rtdetr/predict.py +2 -2
- ultralytics/models/rtdetr/val.py +4 -4
- ultralytics/models/sam/amg.py +6 -6
- ultralytics/models/sam/build.py +9 -9
- ultralytics/models/sam/model.py +7 -7
- ultralytics/models/sam/modules/blocks.py +6 -6
- ultralytics/models/sam/modules/decoders.py +1 -1
- ultralytics/models/sam/modules/encoders.py +27 -27
- ultralytics/models/sam/modules/sam.py +4 -4
- ultralytics/models/sam/modules/tiny_encoder.py +18 -18
- ultralytics/models/sam/modules/utils.py +8 -8
- ultralytics/models/sam/predict.py +66 -66
- ultralytics/models/utils/loss.py +22 -22
- ultralytics/models/utils/ops.py +8 -8
- ultralytics/models/yolo/classify/predict.py +2 -2
- ultralytics/models/yolo/classify/train.py +8 -8
- ultralytics/models/yolo/classify/val.py +4 -4
- ultralytics/models/yolo/detect/predict.py +3 -3
- ultralytics/models/yolo/detect/train.py +6 -6
- ultralytics/models/yolo/detect/val.py +32 -32
- ultralytics/models/yolo/model.py +6 -6
- ultralytics/models/yolo/obb/train.py +1 -1
- ultralytics/models/yolo/obb/val.py +13 -13
- ultralytics/models/yolo/pose/val.py +11 -11
- ultralytics/models/yolo/segment/predict.py +4 -4
- ultralytics/models/yolo/segment/train.py +1 -1
- ultralytics/models/yolo/segment/val.py +14 -14
- ultralytics/models/yolo/world/train.py +9 -9
- ultralytics/models/yolo/world/train_world.py +1 -1
- ultralytics/models/yolo/yoloe/predict.py +4 -4
- ultralytics/models/yolo/yoloe/train.py +4 -4
- ultralytics/nn/autobackend.py +10 -13
- ultralytics/nn/modules/block.py +6 -6
- ultralytics/nn/modules/conv.py +2 -2
- ultralytics/nn/modules/head.py +4 -4
- ultralytics/nn/tasks.py +13 -13
- ultralytics/nn/text_model.py +3 -3
- ultralytics/solutions/ai_gym.py +2 -2
- ultralytics/solutions/analytics.py +3 -3
- ultralytics/solutions/config.py +5 -5
- ultralytics/solutions/distance_calculation.py +2 -2
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +4 -4
- ultralytics/solutions/object_counter.py +4 -4
- ultralytics/solutions/parking_management.py +7 -7
- ultralytics/solutions/queue_management.py +3 -3
- ultralytics/solutions/region_counter.py +4 -4
- ultralytics/solutions/similarity_search.py +2 -2
- ultralytics/solutions/solutions.py +48 -48
- ultralytics/solutions/streamlit_inference.py +1 -1
- ultralytics/solutions/trackzone.py +4 -4
- ultralytics/solutions/vision_eye.py +1 -1
- ultralytics/trackers/byte_tracker.py +11 -11
- ultralytics/trackers/utils/gmc.py +3 -3
- ultralytics/trackers/utils/matching.py +5 -5
- ultralytics/utils/__init__.py +1 -2
- ultralytics/utils/autodevice.py +2 -2
- ultralytics/utils/benchmarks.py +10 -10
- ultralytics/utils/callbacks/clearml.py +1 -1
- ultralytics/utils/callbacks/comet.py +5 -5
- ultralytics/utils/callbacks/hub.py +2 -1
- ultralytics/utils/checks.py +5 -5
- ultralytics/utils/cpu.py +90 -0
- ultralytics/utils/dist.py +1 -1
- ultralytics/utils/downloads.py +2 -2
- ultralytics/utils/events.py +115 -0
- ultralytics/utils/export.py +5 -5
- ultralytics/utils/instance.py +2 -2
- ultralytics/utils/metrics.py +35 -35
- ultralytics/utils/nms.py +4 -4
- ultralytics/utils/ops.py +4 -2
- ultralytics/utils/patches.py +2 -2
- ultralytics/utils/plotting.py +9 -9
- ultralytics/utils/torch_utils.py +2 -6
- ultralytics/utils/triton.py +5 -5
- {dgenerate_ultralytics_headless-8.3.193.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.193.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.193.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.193.dist-info → dgenerate_ultralytics_headless-8.3.195.dist-info}/top_level.txt +0 -0
@@ -75,12 +75,12 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
|
|
75
75
|
else:
|
76
76
|
# NOTE: only supports bboxes as prompts for now
|
77
77
|
assert bboxes is not None, f"Expected bboxes, but got {bboxes}!"
|
78
|
-
# NOTE: needs
|
78
|
+
# NOTE: needs list[np.ndarray]
|
79
79
|
assert isinstance(bboxes, list) and all(isinstance(b, np.ndarray) for b in bboxes), (
|
80
|
-
f"Expected
|
80
|
+
f"Expected list[np.ndarray], but got {bboxes}!"
|
81
81
|
)
|
82
82
|
assert isinstance(category, list) and all(isinstance(b, np.ndarray) for b in category), (
|
83
|
-
f"Expected
|
83
|
+
f"Expected list[np.ndarray], but got {category}!"
|
84
84
|
)
|
85
85
|
assert len(im) == len(category) == len(bboxes), (
|
86
86
|
f"Expected same length for all inputs, but got {len(im)}vs{len(category)}vs{len(bboxes)}!"
|
@@ -149,7 +149,7 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
|
|
149
149
|
Process the source to get the visual prompt embeddings (VPE).
|
150
150
|
|
151
151
|
Args:
|
152
|
-
source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor |
|
152
|
+
source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor | list | tuple): The source
|
153
153
|
of the image to make predictions on. Accepts various types including file paths, URLs, PIL
|
154
154
|
images, numpy arrays, and torch tensors.
|
155
155
|
|
@@ -183,7 +183,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
|
183
183
|
standard YOLO datasets and grounding datasets with different formats.
|
184
184
|
|
185
185
|
Args:
|
186
|
-
img_path (
|
186
|
+
img_path (list[str] | str): Path to the folder containing images or list of paths.
|
187
187
|
mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
|
188
188
|
batch (int, optional): Size of batches, used for rectangular training/validation.
|
189
189
|
|
@@ -207,7 +207,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
|
207
207
|
Generate text embeddings for a list of text samples.
|
208
208
|
|
209
209
|
Args:
|
210
|
-
texts (
|
210
|
+
texts (list[str]): List of text samples to encode.
|
211
211
|
batch (int): Batch size for processing.
|
212
212
|
cache_dir (Path): Directory to save/load cached embeddings.
|
213
213
|
|
@@ -262,7 +262,7 @@ class YOLOEPEFreeTrainer(YOLOEPETrainer, YOLOETrainerFromScratch):
|
|
262
262
|
in the parent directory of the first dataset's image path.
|
263
263
|
|
264
264
|
Args:
|
265
|
-
datasets (
|
265
|
+
datasets (list[Dataset]): List of datasets containing category names to process.
|
266
266
|
batch (int): Batch size for processing text embeddings.
|
267
267
|
|
268
268
|
Notes:
|
@@ -290,7 +290,7 @@ class YOLOEVPTrainer(YOLOETrainerFromScratch):
|
|
290
290
|
Build YOLO Dataset for training or validation with visual prompts.
|
291
291
|
|
292
292
|
Args:
|
293
|
-
img_path (
|
293
|
+
img_path (list[str] | str): Path to the folder containing images or list of paths.
|
294
294
|
mode (str): 'train' mode or 'val' mode, allowing customized augmentations for each mode.
|
295
295
|
batch (int, optional): Size of batches, used for rectangular training/validation.
|
296
296
|
|
ultralytics/nn/autobackend.py
CHANGED
@@ -624,7 +624,7 @@ class AutoBackend(nn.Module):
|
|
624
624
|
**kwargs (Any): Additional keyword arguments for model configuration.
|
625
625
|
|
626
626
|
Returns:
|
627
|
-
(torch.Tensor |
|
627
|
+
(torch.Tensor | list[torch.Tensor]): The raw output tensor(s) from the model.
|
628
628
|
"""
|
629
629
|
b, ch, h, w = im.shape # batch, channel, height, width
|
630
630
|
if self.fp16 and im.dtype != torch.float16:
|
@@ -724,17 +724,14 @@ class AutoBackend(nn.Module):
|
|
724
724
|
im_pil = Image.fromarray((im * 255).astype("uint8"))
|
725
725
|
# im = im.resize((192, 320), Image.BILINEAR)
|
726
726
|
y = self.model.predict({"image": im_pil}) # coordinates are xywh normalized
|
727
|
-
if "confidence" in y:
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
)
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
# conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float32)
|
736
|
-
# y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
|
737
|
-
y = list(y.values())
|
727
|
+
if "confidence" in y: # NMS included
|
728
|
+
from ultralytics.utils.ops import xywh2xyxy
|
729
|
+
|
730
|
+
box = xywh2xyxy(y["coordinates"] * [[w, h, w, h]]) # xyxy pixels
|
731
|
+
cls = y["confidence"].argmax(1, keepdims=True)
|
732
|
+
y = np.concatenate((box, np.take_along_axis(y["confidence"], cls, axis=1), cls), 1)[None]
|
733
|
+
else:
|
734
|
+
y = list(y.values())
|
738
735
|
if len(y) == 2 and len(y[1].shape) != 4: # segmentation model
|
739
736
|
y = list(reversed(y)) # reversed for segmentation models (pred, proto)
|
740
737
|
|
@@ -863,7 +860,7 @@ class AutoBackend(nn.Module):
|
|
863
860
|
p (str): Path to the model file.
|
864
861
|
|
865
862
|
Returns:
|
866
|
-
(
|
863
|
+
(list[bool]): List of booleans indicating the model type.
|
867
864
|
|
868
865
|
Examples:
|
869
866
|
>>> model = AutoBackend(model="path/to/model.onnx")
|
ultralytics/nn/modules/block.py
CHANGED
@@ -745,7 +745,7 @@ class ImagePoolingAttn(nn.Module):
|
|
745
745
|
Forward pass of ImagePoolingAttn.
|
746
746
|
|
747
747
|
Args:
|
748
|
-
x (
|
748
|
+
x (list[torch.Tensor]): List of input feature maps.
|
749
749
|
text (torch.Tensor): Text embeddings.
|
750
750
|
|
751
751
|
Returns:
|
@@ -1032,7 +1032,7 @@ class CBLinear(nn.Module):
|
|
1032
1032
|
|
1033
1033
|
Args:
|
1034
1034
|
c1 (int): Input channels.
|
1035
|
-
c2s (
|
1035
|
+
c2s (list[int]): List of output channel sizes.
|
1036
1036
|
k (int): Kernel size.
|
1037
1037
|
s (int): Stride.
|
1038
1038
|
p (int | None): Padding.
|
@@ -1055,7 +1055,7 @@ class CBFuse(nn.Module):
|
|
1055
1055
|
Initialize CBFuse module.
|
1056
1056
|
|
1057
1057
|
Args:
|
1058
|
-
idx (
|
1058
|
+
idx (list[int]): Indices for feature selection.
|
1059
1059
|
"""
|
1060
1060
|
super().__init__()
|
1061
1061
|
self.idx = idx
|
@@ -1065,7 +1065,7 @@ class CBFuse(nn.Module):
|
|
1065
1065
|
Forward pass through CBFuse layer.
|
1066
1066
|
|
1067
1067
|
Args:
|
1068
|
-
xs (
|
1068
|
+
xs (list[torch.Tensor]): List of input tensors.
|
1069
1069
|
|
1070
1070
|
Returns:
|
1071
1071
|
(torch.Tensor): Fused output tensor.
|
@@ -1676,7 +1676,7 @@ class TorchVision(nn.Module):
|
|
1676
1676
|
x (torch.Tensor): Input tensor.
|
1677
1677
|
|
1678
1678
|
Returns:
|
1679
|
-
(torch.Tensor |
|
1679
|
+
(torch.Tensor | list[torch.Tensor]): Output tensor or list of tensors.
|
1680
1680
|
"""
|
1681
1681
|
if self.split:
|
1682
1682
|
y = [x]
|
@@ -1979,7 +1979,7 @@ class SAVPE(nn.Module):
|
|
1979
1979
|
Initialize SAVPE module with channels, intermediate channels, and embedding dimension.
|
1980
1980
|
|
1981
1981
|
Args:
|
1982
|
-
ch (
|
1982
|
+
ch (list[int]): List of input channel dimensions.
|
1983
1983
|
c3 (int): Intermediate channels.
|
1984
1984
|
embed (int): Embedding dimension.
|
1985
1985
|
"""
|
ultralytics/nn/modules/conv.py
CHANGED
@@ -675,7 +675,7 @@ class Concat(nn.Module):
|
|
675
675
|
Concatenate input tensors along specified dimension.
|
676
676
|
|
677
677
|
Args:
|
678
|
-
x (
|
678
|
+
x (list[torch.Tensor]): List of input tensors.
|
679
679
|
|
680
680
|
Returns:
|
681
681
|
(torch.Tensor): Concatenated tensor.
|
@@ -706,7 +706,7 @@ class Index(nn.Module):
|
|
706
706
|
Select and return a particular index from input.
|
707
707
|
|
708
708
|
Args:
|
709
|
-
x (
|
709
|
+
x (list[torch.Tensor]): List of input tensors.
|
710
710
|
|
711
711
|
Returns:
|
712
712
|
(torch.Tensor): Selected tensor.
|
ultralytics/nn/modules/head.py
CHANGED
@@ -130,7 +130,7 @@ class Detect(nn.Module):
|
|
130
130
|
Perform forward pass of the v10Detect module.
|
131
131
|
|
132
132
|
Args:
|
133
|
-
x (
|
133
|
+
x (list[torch.Tensor]): Input feature maps from different levels.
|
134
134
|
|
135
135
|
Returns:
|
136
136
|
outputs (dict | tuple): Training mode returns dict with one2many and one2one outputs.
|
@@ -154,7 +154,7 @@ class Detect(nn.Module):
|
|
154
154
|
Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
|
155
155
|
|
156
156
|
Args:
|
157
|
-
x (
|
157
|
+
x (list[torch.Tensor]): List of feature maps from different detection layers.
|
158
158
|
|
159
159
|
Returns:
|
160
160
|
(torch.Tensor): Concatenated tensor of decoded bounding boxes and class probabilities.
|
@@ -985,7 +985,7 @@ class RTDETRDecoder(nn.Module):
|
|
985
985
|
Run the forward pass of the module, returning bounding box and classification scores for the input.
|
986
986
|
|
987
987
|
Args:
|
988
|
-
x (
|
988
|
+
x (list[torch.Tensor]): List of feature maps from the backbone.
|
989
989
|
batch (dict, optional): Batch information for training.
|
990
990
|
|
991
991
|
Returns:
|
@@ -1075,7 +1075,7 @@ class RTDETRDecoder(nn.Module):
|
|
1075
1075
|
Process and return encoder inputs by getting projection features from input and concatenating them.
|
1076
1076
|
|
1077
1077
|
Args:
|
1078
|
-
x (
|
1078
|
+
x (list[torch.Tensor]): List of feature maps from the backbone.
|
1079
1079
|
|
1080
1080
|
Returns:
|
1081
1081
|
feats (torch.Tensor): Processed features.
|
ultralytics/nn/tasks.py
CHANGED
@@ -69,7 +69,7 @@ from ultralytics.nn.modules import (
|
|
69
69
|
YOLOESegment,
|
70
70
|
v10Detect,
|
71
71
|
)
|
72
|
-
from ultralytics.utils import DEFAULT_CFG_DICT,
|
72
|
+
from ultralytics.utils import DEFAULT_CFG_DICT, LOGGER, YAML, colorstr, emojis
|
73
73
|
from ultralytics.utils.checks import check_requirements, check_suffix, check_yaml
|
74
74
|
from ultralytics.utils.loss import (
|
75
75
|
E2EDetectLoss,
|
@@ -329,7 +329,7 @@ class BaseModel(torch.nn.Module):
|
|
329
329
|
|
330
330
|
Args:
|
331
331
|
batch (dict): Batch to compute loss on.
|
332
|
-
preds (torch.Tensor |
|
332
|
+
preds (torch.Tensor | list[torch.Tensor], optional): Predictions.
|
333
333
|
"""
|
334
334
|
if getattr(self, "criterion", None) is None:
|
335
335
|
self.criterion = self.init_criterion()
|
@@ -480,10 +480,10 @@ class DetectionModel(BaseModel):
|
|
480
480
|
Clip YOLO augmented inference tails.
|
481
481
|
|
482
482
|
Args:
|
483
|
-
y (
|
483
|
+
y (list[torch.Tensor]): List of detection tensors.
|
484
484
|
|
485
485
|
Returns:
|
486
|
-
(
|
486
|
+
(list[torch.Tensor]): Clipped detection tensors.
|
487
487
|
"""
|
488
488
|
nl = self.model[-1].nl # number of detection layers (P3-P5)
|
489
489
|
g = sum(4**x for x in range(nl)) # grid points
|
@@ -874,7 +874,7 @@ class WorldModel(DetectionModel):
|
|
874
874
|
Set classes in advance so that model could do offline-inference without clip model.
|
875
875
|
|
876
876
|
Args:
|
877
|
-
text (
|
877
|
+
text (list[str]): List of class names.
|
878
878
|
batch (int): Batch size for processing text tokens.
|
879
879
|
cache_clip_model (bool): Whether to cache the CLIP model.
|
880
880
|
"""
|
@@ -886,7 +886,7 @@ class WorldModel(DetectionModel):
|
|
886
886
|
Set classes in advance so that model could do offline-inference without clip model.
|
887
887
|
|
888
888
|
Args:
|
889
|
-
text (
|
889
|
+
text (list[str]): List of class names.
|
890
890
|
batch (int): Batch size for processing text tokens.
|
891
891
|
cache_clip_model (bool): Whether to cache the CLIP model.
|
892
892
|
|
@@ -956,7 +956,7 @@ class WorldModel(DetectionModel):
|
|
956
956
|
|
957
957
|
Args:
|
958
958
|
batch (dict): Batch to compute loss on.
|
959
|
-
preds (torch.Tensor |
|
959
|
+
preds (torch.Tensor | list[torch.Tensor], optional): Predictions.
|
960
960
|
"""
|
961
961
|
if not hasattr(self, "criterion"):
|
962
962
|
self.criterion = self.init_criterion()
|
@@ -1012,7 +1012,7 @@ class YOLOEModel(DetectionModel):
|
|
1012
1012
|
Set classes in advance so that model could do offline-inference without clip model.
|
1013
1013
|
|
1014
1014
|
Args:
|
1015
|
-
text (
|
1015
|
+
text (list[str]): List of class names.
|
1016
1016
|
batch (int): Batch size for processing text tokens.
|
1017
1017
|
cache_clip_model (bool): Whether to cache the CLIP model.
|
1018
1018
|
without_reprta (bool): Whether to return text embeddings cooperated with reprta module.
|
@@ -1060,7 +1060,7 @@ class YOLOEModel(DetectionModel):
|
|
1060
1060
|
|
1061
1061
|
Args:
|
1062
1062
|
vocab (nn.ModuleList): List of vocabulary items.
|
1063
|
-
names (
|
1063
|
+
names (list[str]): List of class names.
|
1064
1064
|
"""
|
1065
1065
|
assert not self.training
|
1066
1066
|
head = self.model[-1]
|
@@ -1114,7 +1114,7 @@ class YOLOEModel(DetectionModel):
|
|
1114
1114
|
Set classes in advance so that model could do offline-inference without clip model.
|
1115
1115
|
|
1116
1116
|
Args:
|
1117
|
-
names (
|
1117
|
+
names (list[str]): List of class names.
|
1118
1118
|
embeddings (torch.Tensor): Embeddings tensor.
|
1119
1119
|
"""
|
1120
1120
|
assert not hasattr(self.model[-1], "lrpc"), (
|
@@ -1203,7 +1203,7 @@ class YOLOEModel(DetectionModel):
|
|
1203
1203
|
|
1204
1204
|
Args:
|
1205
1205
|
batch (dict): Batch to compute loss on.
|
1206
|
-
preds (torch.Tensor |
|
1206
|
+
preds (torch.Tensor | list[torch.Tensor], optional): Predictions.
|
1207
1207
|
"""
|
1208
1208
|
if not hasattr(self, "criterion"):
|
1209
1209
|
from ultralytics.utils.loss import TVPDetectLoss
|
@@ -1251,7 +1251,7 @@ class YOLOESegModel(YOLOEModel, SegmentationModel):
|
|
1251
1251
|
|
1252
1252
|
Args:
|
1253
1253
|
batch (dict): Batch to compute loss on.
|
1254
|
-
preds (torch.Tensor |
|
1254
|
+
preds (torch.Tensor | list[torch.Tensor], optional): Predictions.
|
1255
1255
|
"""
|
1256
1256
|
if not hasattr(self, "criterion"):
|
1257
1257
|
from ultralytics.utils.loss import TVPSegmentLoss
|
@@ -1502,7 +1502,7 @@ def load_checkpoint(weight, device=None, inplace=True, fuse=False):
|
|
1502
1502
|
model = (ckpt.get("ema") or ckpt["model"]).float() # FP32 model
|
1503
1503
|
|
1504
1504
|
# Model compatibility updates
|
1505
|
-
model.args =
|
1505
|
+
model.args = args # attach args to model
|
1506
1506
|
model.pt_path = weight # attach *.pt file path to model
|
1507
1507
|
model.task = getattr(model, "task", guess_model_task(model))
|
1508
1508
|
if not hasattr(model, "stride"):
|
ultralytics/nn/text_model.py
CHANGED
@@ -97,7 +97,7 @@ class CLIP(TextModel):
|
|
97
97
|
Convert input texts to CLIP tokens.
|
98
98
|
|
99
99
|
Args:
|
100
|
-
texts (str |
|
100
|
+
texts (str | list[str]): Input text or list of texts to tokenize.
|
101
101
|
|
102
102
|
Returns:
|
103
103
|
(torch.Tensor): Tokenized text tensor with shape (batch_size, context_length) ready for model processing.
|
@@ -240,7 +240,7 @@ class MobileCLIP(TextModel):
|
|
240
240
|
Convert input texts to MobileCLIP tokens.
|
241
241
|
|
242
242
|
Args:
|
243
|
-
texts (
|
243
|
+
texts (list[str]): List of text strings to tokenize.
|
244
244
|
|
245
245
|
Returns:
|
246
246
|
(torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
|
@@ -325,7 +325,7 @@ class MobileCLIPTS(TextModel):
|
|
325
325
|
Convert input texts to MobileCLIP tokens.
|
326
326
|
|
327
327
|
Args:
|
328
|
-
texts (
|
328
|
+
texts (list[str]): List of text strings to tokenize.
|
329
329
|
|
330
330
|
Returns:
|
331
331
|
(torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
|
ultralytics/solutions/ai_gym.py
CHANGED
@@ -14,10 +14,10 @@ class AIGym(BaseSolution):
|
|
14
14
|
repetitions of exercises based on predefined angle thresholds for up and down positions.
|
15
15
|
|
16
16
|
Attributes:
|
17
|
-
states (
|
17
|
+
states (dict[float, int, str]): Stores per-track angle, count, and stage for workout monitoring.
|
18
18
|
up_angle (float): Angle threshold for considering the 'up' position of an exercise.
|
19
19
|
down_angle (float): Angle threshold for considering the 'down' position of an exercise.
|
20
|
-
kpts (
|
20
|
+
kpts (list[int]): Indices of keypoints used for angle calculation.
|
21
21
|
|
22
22
|
Methods:
|
23
23
|
process: Process a frame to detect poses, calculate angles, and count repetitions.
|
@@ -29,12 +29,12 @@ class Analytics(BaseSolution):
|
|
29
29
|
fontsize (int): Font size for text display.
|
30
30
|
color_cycle (cycle): Cyclic iterator for chart colors.
|
31
31
|
total_counts (int): Total count of detected objects (used for line charts).
|
32
|
-
clswise_count (
|
32
|
+
clswise_count (dict[str, int]): Dictionary for class-wise object counts.
|
33
33
|
fig (Figure): Matplotlib figure object for the chart.
|
34
34
|
ax (Axes): Matplotlib axes object for the chart.
|
35
35
|
canvas (FigureCanvasAgg): Canvas for rendering the chart.
|
36
36
|
lines (dict): Dictionary to store line objects for area charts.
|
37
|
-
color_mapping (
|
37
|
+
color_mapping (dict[str, str]): Dictionary mapping class labels to colors for consistent visualization.
|
38
38
|
|
39
39
|
Methods:
|
40
40
|
process: Process image data and update the chart.
|
@@ -144,7 +144,7 @@ class Analytics(BaseSolution):
|
|
144
144
|
|
145
145
|
Args:
|
146
146
|
frame_number (int): The current frame number.
|
147
|
-
count_dict (
|
147
|
+
count_dict (dict[str, int], optional): Dictionary with class names as keys and counts as values for
|
148
148
|
multiple classes. If None, updates a single line graph.
|
149
149
|
plot (str): Type of the plot. Options are 'line', 'bar', 'pie', or 'area'.
|
150
150
|
|
ultralytics/solutions/config.py
CHANGED
@@ -20,20 +20,20 @@ class SolutionConfig:
|
|
20
20
|
Attributes:
|
21
21
|
source (str, optional): Path to the input source (video, RTSP, etc.). Only usable with Solutions CLI.
|
22
22
|
model (str, optional): Path to the Ultralytics YOLO model to be used for inference.
|
23
|
-
classes (
|
23
|
+
classes (list[int], optional): List of class indices to filter detections.
|
24
24
|
show_conf (bool): Whether to show confidence scores on the visual output.
|
25
25
|
show_labels (bool): Whether to display class labels on visual output.
|
26
|
-
region (
|
26
|
+
region (list[tuple[int, int]], optional): Polygonal region or line for object counting.
|
27
27
|
colormap (int, optional): OpenCV colormap constant for visual overlays (e.g., cv2.COLORMAP_JET).
|
28
28
|
show_in (bool): Whether to display count number for objects entering the region.
|
29
29
|
show_out (bool): Whether to display count number for objects leaving the region.
|
30
30
|
up_angle (float): Upper angle threshold used in pose-based workouts monitoring.
|
31
31
|
down_angle (int): Lower angle threshold used in pose-based workouts monitoring.
|
32
|
-
kpts (
|
32
|
+
kpts (list[int]): Keypoint indices to monitor, e.g., for pose analytics.
|
33
33
|
analytics_type (str): Type of analytics to perform ("line", "area", "bar", "pie", etc.).
|
34
|
-
figsize (
|
34
|
+
figsize (tuple[int, int], optional): Size of the matplotlib figure used for analytical plots (width, height).
|
35
35
|
blur_ratio (float): Ratio used to blur objects in the video frames (0.0 to 1.0).
|
36
|
-
vision_point (
|
36
|
+
vision_point (tuple[int, int]): Reference point for directional tracking or perspective drawing.
|
37
37
|
crop_dir (str): Directory path to save cropped detection images.
|
38
38
|
json_file (str): Path to a JSON file containing data for parking areas.
|
39
39
|
line_width (int): Width for visual display i.e. bounding boxes, keypoints, counts.
|
@@ -18,8 +18,8 @@ class DistanceCalculation(BaseSolution):
|
|
18
18
|
|
19
19
|
Attributes:
|
20
20
|
left_mouse_count (int): Counter for left mouse button clicks.
|
21
|
-
selected_boxes (
|
22
|
-
centroids (
|
21
|
+
selected_boxes (dict[int, list[float]]): Dictionary to store selected bounding boxes and their track IDs.
|
22
|
+
centroids (list[list[int]]): List to store centroids of selected bounding boxes.
|
23
23
|
|
24
24
|
Methods:
|
25
25
|
mouse_event_for_distance: Handle mouse events for selecting objects in the video stream.
|
ultralytics/solutions/heatmap.py
CHANGED
@@ -57,7 +57,7 @@ class Heatmap(ObjectCounter):
|
|
57
57
|
Efficiently calculate heatmap area and effect location for applying colormap.
|
58
58
|
|
59
59
|
Args:
|
60
|
-
box (
|
60
|
+
box (list[float]): Bounding box coordinates [x0, y0, x1, y1].
|
61
61
|
"""
|
62
62
|
x0, y0, x1, y1 = map(int, box)
|
63
63
|
radius_squared = (min(x1 - x0, y1 - y0) // 2) ** 2
|
@@ -16,10 +16,10 @@ class InstanceSegmentation(BaseSolution):
|
|
16
16
|
Attributes:
|
17
17
|
model (str): The segmentation model to use for inference.
|
18
18
|
line_width (int): Width of the bounding box and text lines.
|
19
|
-
names (
|
20
|
-
clss (
|
21
|
-
track_ids (
|
22
|
-
masks (
|
19
|
+
names (dict[int, str]): Dictionary mapping class indices to class names.
|
20
|
+
clss (list[int]): List of detected class indices.
|
21
|
+
track_ids (list[int]): List of track IDs for detected instances.
|
22
|
+
masks (list[np.ndarray]): List of segmentation masks for detected instances.
|
23
23
|
show_conf (bool): Whether to display confidence scores.
|
24
24
|
show_labels (bool): Whether to display class labels.
|
25
25
|
show_boxes (bool): Whether to display bounding boxes.
|
@@ -19,8 +19,8 @@ class ObjectCounter(BaseSolution):
|
|
19
19
|
Attributes:
|
20
20
|
in_count (int): Counter for objects moving inward.
|
21
21
|
out_count (int): Counter for objects moving outward.
|
22
|
-
counted_ids (
|
23
|
-
classwise_counts (
|
22
|
+
counted_ids (list[int]): List of IDs of objects that have been counted.
|
23
|
+
classwise_counts (dict[str, dict[str, int]]): Dictionary for counts, categorized by object class.
|
24
24
|
region_initialized (bool): Flag indicating whether the counting region has been initialized.
|
25
25
|
show_in (bool): Flag to control display of inward count.
|
26
26
|
show_out (bool): Flag to control display of outward count.
|
@@ -63,9 +63,9 @@ class ObjectCounter(BaseSolution):
|
|
63
63
|
Count objects within a polygonal or linear region based on their tracks.
|
64
64
|
|
65
65
|
Args:
|
66
|
-
current_centroid (
|
66
|
+
current_centroid (tuple[float, float]): Current centroid coordinates (x, y) in the current frame.
|
67
67
|
track_id (int): Unique identifier for the tracked object.
|
68
|
-
prev_position (
|
68
|
+
prev_position (tuple[float, float], optional): Last frame position coordinates (x, y) of the track.
|
69
69
|
cls (int): Class index for classwise count updates.
|
70
70
|
|
71
71
|
Examples:
|
@@ -28,8 +28,8 @@ class ParkingPtsSelection:
|
|
28
28
|
canvas (tk.Canvas): The canvas widget for displaying the image and drawing bounding boxes.
|
29
29
|
image (PIL.Image.Image): The uploaded image.
|
30
30
|
canvas_image (ImageTk.PhotoImage): The image displayed on the canvas.
|
31
|
-
rg_data (
|
32
|
-
current_box (
|
31
|
+
rg_data (list[list[tuple[int, int]]]): List of bounding boxes, each defined by 4 points.
|
32
|
+
current_box (list[tuple[int, int]]): Temporary storage for the points of the current bounding box.
|
33
33
|
imgw (int): Original width of the uploaded image.
|
34
34
|
imgh (int): Original height of the uploaded image.
|
35
35
|
canvas_max_width (int): Maximum width of the canvas.
|
@@ -186,11 +186,11 @@ class ParkingManagement(BaseSolution):
|
|
186
186
|
|
187
187
|
Attributes:
|
188
188
|
json_file (str): Path to the JSON file containing parking region details.
|
189
|
-
json (
|
190
|
-
pr_info (
|
191
|
-
arc (
|
192
|
-
occ (
|
193
|
-
dc (
|
189
|
+
json (list[dict]): Loaded JSON data containing parking region information.
|
190
|
+
pr_info (dict[str, int]): Dictionary storing parking information (Occupancy and Available spaces).
|
191
|
+
arc (tuple[int, int, int]): RGB color tuple for available region visualization.
|
192
|
+
occ (tuple[int, int, int]): RGB color tuple for occupied region visualization.
|
193
|
+
dc (tuple[int, int, int]): RGB color tuple for centroid visualization of detected objects.
|
194
194
|
|
195
195
|
Methods:
|
196
196
|
process: Process the input image for parking lot management and visualization.
|
@@ -15,10 +15,10 @@ class QueueManager(BaseSolution):
|
|
15
15
|
|
16
16
|
Attributes:
|
17
17
|
counts (int): The current count of objects in the queue.
|
18
|
-
rect_color (
|
18
|
+
rect_color (tuple[int, int, int]): RGB color tuple for drawing the queue region rectangle.
|
19
19
|
region_length (int): The number of points defining the queue region.
|
20
|
-
track_line (
|
21
|
-
track_history (
|
20
|
+
track_line (list[tuple[int, int]]): List of track line coordinates.
|
21
|
+
track_history (dict[int, list[tuple[int, int]]]): Dictionary storing tracking history for each object.
|
22
22
|
|
23
23
|
Methods:
|
24
24
|
initialize_region: Initialize the queue region.
|
@@ -64,12 +64,12 @@ class RegionCounter(BaseSolution):
|
|
64
64
|
|
65
65
|
Args:
|
66
66
|
name (str): Name assigned to the new region.
|
67
|
-
polygon_points (
|
68
|
-
region_color (
|
69
|
-
text_color (
|
67
|
+
polygon_points (list[tuple]): List of (x, y) coordinates defining the region's polygon.
|
68
|
+
region_color (tuple[int, int, int]): BGR color for region visualization.
|
69
|
+
text_color (tuple[int, int, int]): BGR color for the text within the region.
|
70
70
|
|
71
71
|
Returns:
|
72
|
-
(
|
72
|
+
(dict[str, any]): Returns a dictionary including the region information i.e. name, region_color etc.
|
73
73
|
"""
|
74
74
|
region = self.region_template.copy()
|
75
75
|
region.update(
|
@@ -33,7 +33,7 @@ class VisualAISearch:
|
|
33
33
|
data_dir (Path): Path object for the data directory.
|
34
34
|
model: Loaded CLIP model.
|
35
35
|
index: FAISS index for similarity search.
|
36
|
-
image_paths (
|
36
|
+
image_paths (list[str]): List of image file paths.
|
37
37
|
|
38
38
|
Methods:
|
39
39
|
extract_image_feature: Extract CLIP embedding from an image.
|
@@ -138,7 +138,7 @@ class VisualAISearch:
|
|
138
138
|
similarity_thresh (float, optional): Minimum similarity threshold for filtering results.
|
139
139
|
|
140
140
|
Returns:
|
141
|
-
(
|
141
|
+
(list[str]): List of image filenames ranked by similarity score.
|
142
142
|
|
143
143
|
Examples:
|
144
144
|
Search for images matching a query
|