dgenerate-ultralytics-headless 8.3.230__py3-none-any.whl → 8.3.232__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.230.dist-info → dgenerate_ultralytics_headless-8.3.232.dist-info}/METADATA +4 -6
- {dgenerate_ultralytics_headless-8.3.230.dist-info → dgenerate_ultralytics_headless-8.3.232.dist-info}/RECORD +36 -36
- ultralytics/__init__.py +1 -1
- ultralytics/data/augment.py +3 -131
- ultralytics/engine/exporter.py +1 -1
- ultralytics/engine/model.py +0 -5
- ultralytics/engine/results.py +0 -67
- ultralytics/models/sam/model.py +0 -4
- ultralytics/models/sam/modules/blocks.py +0 -51
- ultralytics/models/sam/modules/decoders.py +0 -10
- ultralytics/models/sam/modules/encoders.py +0 -44
- ultralytics/models/sam/modules/memory_attention.py +0 -12
- ultralytics/models/sam/modules/sam.py +0 -16
- ultralytics/models/sam/predict.py +0 -17
- ultralytics/models/yolo/classify/val.py +0 -6
- ultralytics/models/yolo/model.py +0 -5
- ultralytics/models/yolo/obb/predict.py +0 -6
- ultralytics/models/yolo/pose/predict.py +1 -13
- ultralytics/models/yolo/pose/train.py +1 -7
- ultralytics/models/yolo/pose/val.py +6 -17
- ultralytics/models/yolo/world/train_world.py +0 -18
- ultralytics/nn/text_model.py +0 -16
- ultralytics/trackers/bot_sort.py +0 -13
- ultralytics/trackers/byte_tracker.py +0 -11
- ultralytics/trackers/utils/gmc.py +0 -4
- ultralytics/trackers/utils/kalman_filter.py +0 -4
- ultralytics/utils/__init__.py +2 -1
- ultralytics/utils/benchmarks.py +0 -6
- ultralytics/utils/errors.py +0 -6
- ultralytics/utils/metrics.py +4 -1
- ultralytics/utils/tqdm.py +0 -5
- ultralytics/utils/triton.py +0 -4
- {dgenerate_ultralytics_headless-8.3.230.dist-info → dgenerate_ultralytics_headless-8.3.232.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.230.dist-info → dgenerate_ultralytics_headless-8.3.232.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.230.dist-info → dgenerate_ultralytics_headless-8.3.232.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.230.dist-info → dgenerate_ultralytics_headless-8.3.232.dist-info}/top_level.txt +0 -0
|
@@ -156,13 +156,6 @@ class CXBlock(nn.Module):
|
|
|
156
156
|
drop_path (float): Stochastic depth rate.
|
|
157
157
|
layer_scale_init_value (float): Initial value for Layer Scale.
|
|
158
158
|
use_dwconv (bool): Whether to use depthwise convolution.
|
|
159
|
-
|
|
160
|
-
Examples:
|
|
161
|
-
>>> block = CXBlock(dim=64, kernel_size=7, padding=3)
|
|
162
|
-
>>> x = torch.randn(1, 64, 32, 32)
|
|
163
|
-
>>> output = block(x)
|
|
164
|
-
>>> print(output.shape)
|
|
165
|
-
torch.Size([1, 64, 32, 32])
|
|
166
159
|
"""
|
|
167
160
|
super().__init__()
|
|
168
161
|
self.dwconv = nn.Conv2d(
|
|
@@ -231,12 +224,6 @@ class Fuser(nn.Module):
|
|
|
231
224
|
num_layers (int): The number of times to replicate the layer.
|
|
232
225
|
dim (int | None): The dimension for input projection, if used.
|
|
233
226
|
input_projection (bool): Whether to use input projection.
|
|
234
|
-
|
|
235
|
-
Examples:
|
|
236
|
-
>>> layer = nn.Linear(64, 64)
|
|
237
|
-
>>> fuser = Fuser(layer, num_layers=3, dim=64, input_projection=True)
|
|
238
|
-
>>> input_tensor = torch.randn(1, 64)
|
|
239
|
-
>>> output = fuser(input_tensor)
|
|
240
227
|
"""
|
|
241
228
|
super().__init__()
|
|
242
229
|
self.proj = nn.Identity()
|
|
@@ -304,12 +291,6 @@ class SAM2TwoWayAttentionBlock(TwoWayAttentionBlock):
|
|
|
304
291
|
activation (Type[nn.Module]): The activation function of the MLP block.
|
|
305
292
|
attention_downsample_rate (int): The downsample rate for attention computations.
|
|
306
293
|
skip_first_layer_pe (bool): Whether to skip the positional encoding in the first layer.
|
|
307
|
-
|
|
308
|
-
Examples:
|
|
309
|
-
>>> block = SAM2TwoWayAttentionBlock(embedding_dim=256, num_heads=8, mlp_dim=2048)
|
|
310
|
-
>>> sparse_inputs = torch.randn(1, 100, 256)
|
|
311
|
-
>>> dense_inputs = torch.randn(1, 256, 32, 32)
|
|
312
|
-
>>> sparse_outputs, dense_outputs = block(sparse_inputs, dense_inputs)
|
|
313
294
|
"""
|
|
314
295
|
super().__init__(embedding_dim, num_heads, mlp_dim, activation, attention_downsample_rate, skip_first_layer_pe)
|
|
315
296
|
self.mlp = MLP(embedding_dim, mlp_dim, embedding_dim, num_layers=2, act=activation)
|
|
@@ -364,17 +345,6 @@ class SAM2TwoWayTransformer(TwoWayTransformer):
|
|
|
364
345
|
mlp_dim (int): Channel dimension internal to the MLP block.
|
|
365
346
|
activation (Type[nn.Module]): Activation function to use in the MLP block.
|
|
366
347
|
attention_downsample_rate (int): Downsampling rate for attention computations.
|
|
367
|
-
|
|
368
|
-
Examples:
|
|
369
|
-
>>> transformer = SAM2TwoWayTransformer(depth=5, embedding_dim=256, num_heads=8, mlp_dim=2048)
|
|
370
|
-
>>> transformer
|
|
371
|
-
SAM2TwoWayTransformer(
|
|
372
|
-
(layers): ModuleList(
|
|
373
|
-
(0-4): 5 x SAM2TwoWayAttentionBlock(...)
|
|
374
|
-
)
|
|
375
|
-
(final_attn_token_to_image): Attention(...)
|
|
376
|
-
(norm_final_attn): LayerNorm(...)
|
|
377
|
-
)
|
|
378
348
|
"""
|
|
379
349
|
super().__init__(depth, embedding_dim, num_heads, mlp_dim, activation, attention_downsample_rate)
|
|
380
350
|
self.layers = nn.ModuleList()
|
|
@@ -917,13 +887,6 @@ class Block(nn.Module):
|
|
|
917
887
|
rel_pos_zero_init (bool): If True, initializes relative positional parameters to zero.
|
|
918
888
|
window_size (int): Size of attention window. If 0, uses global attention.
|
|
919
889
|
input_size (tuple[int, int] | None): Input resolution for calculating relative positional parameter size.
|
|
920
|
-
|
|
921
|
-
Examples:
|
|
922
|
-
>>> block = Block(dim=256, num_heads=8, window_size=7)
|
|
923
|
-
>>> x = torch.randn(1, 56, 56, 256)
|
|
924
|
-
>>> output = block(x)
|
|
925
|
-
>>> print(output.shape)
|
|
926
|
-
torch.Size([1, 56, 56, 256])
|
|
927
890
|
"""
|
|
928
891
|
super().__init__()
|
|
929
892
|
self.norm1 = norm_layer(dim)
|
|
@@ -1008,13 +971,6 @@ class REAttention(nn.Module):
|
|
|
1008
971
|
rel_pos_zero_init (bool): If True, initializes relative positional parameters to zero.
|
|
1009
972
|
input_size (tuple[int, int] | None): Input resolution for calculating relative positional parameter size.
|
|
1010
973
|
Required if use_rel_pos is True.
|
|
1011
|
-
|
|
1012
|
-
Examples:
|
|
1013
|
-
>>> attention = REAttention(dim=256, num_heads=8, input_size=(32, 32))
|
|
1014
|
-
>>> x = torch.randn(1, 32, 32, 256)
|
|
1015
|
-
>>> output = attention(x)
|
|
1016
|
-
>>> print(output.shape)
|
|
1017
|
-
torch.Size([1, 32, 32, 256])
|
|
1018
974
|
"""
|
|
1019
975
|
super().__init__()
|
|
1020
976
|
self.num_heads = num_heads
|
|
@@ -1089,13 +1045,6 @@ class PatchEmbed(nn.Module):
|
|
|
1089
1045
|
padding (tuple[int, int]): Padding applied to the input before convolution.
|
|
1090
1046
|
in_chans (int): Number of input image channels.
|
|
1091
1047
|
embed_dim (int): Dimensionality of the output patch embeddings.
|
|
1092
|
-
|
|
1093
|
-
Examples:
|
|
1094
|
-
>>> patch_embed = PatchEmbed(kernel_size=(16, 16), stride=(16, 16), in_chans=3, embed_dim=768)
|
|
1095
|
-
>>> x = torch.randn(1, 3, 224, 224)
|
|
1096
|
-
>>> output = patch_embed(x)
|
|
1097
|
-
>>> print(output.shape)
|
|
1098
|
-
torch.Size([1, 768, 14, 14])
|
|
1099
1048
|
"""
|
|
1100
1049
|
super().__init__()
|
|
1101
1050
|
|
|
@@ -55,11 +55,6 @@ class MaskDecoder(nn.Module):
|
|
|
55
55
|
activation (Type[nn.Module]): Type of activation to use when upscaling masks.
|
|
56
56
|
iou_head_depth (int): Depth of the MLP used to predict mask quality.
|
|
57
57
|
iou_head_hidden_dim (int): Hidden dimension of the MLP used to predict mask quality.
|
|
58
|
-
|
|
59
|
-
Examples:
|
|
60
|
-
>>> transformer = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=256, nhead=8), num_layers=6)
|
|
61
|
-
>>> decoder = MaskDecoder(transformer_dim=256, transformer=transformer)
|
|
62
|
-
>>> print(decoder)
|
|
63
58
|
"""
|
|
64
59
|
super().__init__()
|
|
65
60
|
self.transformer_dim = transformer_dim
|
|
@@ -249,11 +244,6 @@ class SAM2MaskDecoder(nn.Module):
|
|
|
249
244
|
pred_obj_scores (bool): Whether to predict object scores.
|
|
250
245
|
pred_obj_scores_mlp (bool): Whether to use MLP for object score prediction.
|
|
251
246
|
use_multimask_token_for_obj_ptr (bool): Whether to use multimask token for object pointer.
|
|
252
|
-
|
|
253
|
-
Examples:
|
|
254
|
-
>>> transformer = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=256, nhead=8), num_layers=6)
|
|
255
|
-
>>> decoder = SAM2MaskDecoder(transformer_dim=256, transformer=transformer)
|
|
256
|
-
>>> print(decoder)
|
|
257
247
|
"""
|
|
258
248
|
super().__init__()
|
|
259
249
|
self.transformer_dim = transformer_dim
|
|
@@ -82,12 +82,6 @@ class ImageEncoderViT(nn.Module):
|
|
|
82
82
|
rel_pos_zero_init (bool): If True, initializes relative positional parameters to zero.
|
|
83
83
|
window_size (int): Size of attention window for windowed attention blocks.
|
|
84
84
|
global_attn_indexes (tuple[int, ...]): Indices of blocks that use global attention.
|
|
85
|
-
|
|
86
|
-
Examples:
|
|
87
|
-
>>> encoder = ImageEncoderViT(img_size=224, patch_size=16, embed_dim=768, depth=12, num_heads=12)
|
|
88
|
-
>>> input_image = torch.randn(1, 3, 224, 224)
|
|
89
|
-
>>> output = encoder(input_image)
|
|
90
|
-
>>> print(output.shape)
|
|
91
85
|
"""
|
|
92
86
|
super().__init__()
|
|
93
87
|
self.img_size = img_size
|
|
@@ -198,15 +192,6 @@ class PromptEncoder(nn.Module):
|
|
|
198
192
|
input_image_size (tuple[int, int]): The padded size of the input image as (H, W).
|
|
199
193
|
mask_in_chans (int): The number of hidden channels used for encoding input masks.
|
|
200
194
|
activation (Type[nn.Module]): The activation function to use when encoding input masks.
|
|
201
|
-
|
|
202
|
-
Examples:
|
|
203
|
-
>>> prompt_encoder = PromptEncoder(256, (64, 64), (1024, 1024), 16)
|
|
204
|
-
>>> points = (torch.rand(1, 5, 2), torch.randint(0, 4, (1, 5)))
|
|
205
|
-
>>> boxes = torch.rand(1, 2, 2)
|
|
206
|
-
>>> masks = torch.rand(1, 1, 256, 256)
|
|
207
|
-
>>> sparse_embeddings, dense_embeddings = prompt_encoder(points, boxes, masks)
|
|
208
|
-
>>> print(sparse_embeddings.shape, dense_embeddings.shape)
|
|
209
|
-
torch.Size([1, 7, 256]) torch.Size([1, 256, 64, 64])
|
|
210
195
|
"""
|
|
211
196
|
super().__init__()
|
|
212
197
|
self.embed_dim = embed_dim
|
|
@@ -385,14 +370,6 @@ class MemoryEncoder(nn.Module):
|
|
|
385
370
|
Args:
|
|
386
371
|
out_dim (int): Output dimension of the encoded features.
|
|
387
372
|
in_dim (int): Input dimension of the pixel features.
|
|
388
|
-
|
|
389
|
-
Examples:
|
|
390
|
-
>>> encoder = MemoryEncoder(out_dim=256, in_dim=256)
|
|
391
|
-
>>> pix_feat = torch.randn(1, 256, 64, 64)
|
|
392
|
-
>>> masks = torch.randn(1, 1, 64, 64)
|
|
393
|
-
>>> encoded_feat, pos = encoder(pix_feat, masks)
|
|
394
|
-
>>> print(encoded_feat.shape, pos.shape)
|
|
395
|
-
torch.Size([1, 256, 64, 64]) torch.Size([1, 128, 64, 64])
|
|
396
373
|
"""
|
|
397
374
|
super().__init__()
|
|
398
375
|
|
|
@@ -468,15 +445,6 @@ class ImageEncoder(nn.Module):
|
|
|
468
445
|
trunk (nn.Module): The trunk network for initial feature extraction.
|
|
469
446
|
neck (nn.Module): The neck network for feature refinement and positional encoding generation.
|
|
470
447
|
scalp (int): Number of lowest resolution feature levels to discard.
|
|
471
|
-
|
|
472
|
-
Examples:
|
|
473
|
-
>>> trunk = SomeTrunkNetwork()
|
|
474
|
-
>>> neck = SomeNeckNetwork()
|
|
475
|
-
>>> encoder = ImageEncoder(trunk, neck, scalp=1)
|
|
476
|
-
>>> image = torch.randn(1, 3, 224, 224)
|
|
477
|
-
>>> output = encoder(image)
|
|
478
|
-
>>> print(output.keys())
|
|
479
|
-
dict_keys(['vision_features', 'vision_pos_enc', 'backbone_fpn'])
|
|
480
448
|
"""
|
|
481
449
|
super().__init__()
|
|
482
450
|
self.trunk = trunk
|
|
@@ -552,11 +520,6 @@ class FpnNeck(nn.Module):
|
|
|
552
520
|
fpn_interp_model (str): Interpolation mode for FPN feature resizing.
|
|
553
521
|
fuse_type (str): Type of feature fusion, either 'sum' or 'avg'.
|
|
554
522
|
fpn_top_down_levels (Optional[list[int]]): Levels to have top-down features in outputs.
|
|
555
|
-
|
|
556
|
-
Examples:
|
|
557
|
-
>>> backbone_channels = [64, 128, 256, 512]
|
|
558
|
-
>>> fpn_neck = FpnNeck(256, backbone_channels)
|
|
559
|
-
>>> print(fpn_neck)
|
|
560
523
|
"""
|
|
561
524
|
super().__init__()
|
|
562
525
|
self.position_encoding = PositionEmbeddingSine(num_pos_feats=256)
|
|
@@ -720,13 +683,6 @@ class Hiera(nn.Module):
|
|
|
720
683
|
window_spec (tuple[int, ...]): Window sizes for each stage when not using global attention.
|
|
721
684
|
global_att_blocks (tuple[int, ...]): Indices of blocks that use global attention.
|
|
722
685
|
return_interm_layers (bool): Whether to return intermediate layer outputs.
|
|
723
|
-
|
|
724
|
-
Examples:
|
|
725
|
-
>>> model = Hiera(embed_dim=96, num_heads=1, stages=(2, 3, 16, 3))
|
|
726
|
-
>>> input_tensor = torch.randn(1, 3, 224, 224)
|
|
727
|
-
>>> output_features = model(input_tensor)
|
|
728
|
-
>>> for feat in output_features:
|
|
729
|
-
... print(feat.shape)
|
|
730
686
|
"""
|
|
731
687
|
super().__init__()
|
|
732
688
|
|
|
@@ -213,18 +213,6 @@ class MemoryAttention(nn.Module):
|
|
|
213
213
|
layer (nn.Module): The attention layer to be used in the module.
|
|
214
214
|
num_layers (int): The number of attention layers.
|
|
215
215
|
batch_first (bool): Whether the input tensors are in batch-first format.
|
|
216
|
-
|
|
217
|
-
Examples:
|
|
218
|
-
>>> d_model = 256
|
|
219
|
-
>>> layer = MemoryAttentionLayer(d_model)
|
|
220
|
-
>>> attention = MemoryAttention(d_model, pos_enc_at_input=True, layer=layer, num_layers=3)
|
|
221
|
-
>>> curr = torch.randn(10, 32, d_model) # (seq_len, batch_size, d_model)
|
|
222
|
-
>>> memory = torch.randn(20, 32, d_model) # (mem_len, batch_size, d_model)
|
|
223
|
-
>>> curr_pos = torch.randn(10, 32, d_model)
|
|
224
|
-
>>> memory_pos = torch.randn(20, 32, d_model)
|
|
225
|
-
>>> output = attention(curr, memory, curr_pos, memory_pos)
|
|
226
|
-
>>> print(output.shape)
|
|
227
|
-
torch.Size([10, 32, 256])
|
|
228
216
|
"""
|
|
229
217
|
super().__init__()
|
|
230
218
|
self.d_model = d_model
|
|
@@ -69,13 +69,6 @@ class SAMModel(nn.Module):
|
|
|
69
69
|
pixel_mean (list[float]): Mean values for normalizing pixels in the input image.
|
|
70
70
|
pixel_std (list[float]): Standard deviation values for normalizing pixels in the input image.
|
|
71
71
|
|
|
72
|
-
Examples:
|
|
73
|
-
>>> image_encoder = ImageEncoderViT(...)
|
|
74
|
-
>>> prompt_encoder = PromptEncoder(...)
|
|
75
|
-
>>> mask_decoder = MaskDecoder(...)
|
|
76
|
-
>>> sam_model = SAMModel(image_encoder, prompt_encoder, mask_decoder)
|
|
77
|
-
>>> # Further usage depends on SAMPredictor class
|
|
78
|
-
|
|
79
72
|
Notes:
|
|
80
73
|
All forward() operations moved to SAMPredictor.
|
|
81
74
|
"""
|
|
@@ -253,15 +246,6 @@ class SAM2Model(torch.nn.Module):
|
|
|
253
246
|
no_obj_embed_spatial (bool): Whether add no obj embedding to spatial frames.
|
|
254
247
|
sam_mask_decoder_extra_args (dict | None): Extra arguments for constructing the SAM mask decoder.
|
|
255
248
|
compile_image_encoder (bool): Whether to compile the image encoder for faster inference.
|
|
256
|
-
|
|
257
|
-
Examples:
|
|
258
|
-
>>> image_encoder = ImageEncoderViT(...)
|
|
259
|
-
>>> memory_attention = SAM2TwoWayTransformer(...)
|
|
260
|
-
>>> memory_encoder = nn.Sequential(...)
|
|
261
|
-
>>> model = SAM2Model(image_encoder, memory_attention, memory_encoder)
|
|
262
|
-
>>> image_batch = torch.rand(1, 3, 512, 512)
|
|
263
|
-
>>> features = model.forward_image(image_batch)
|
|
264
|
-
>>> track_results = model.track_step(0, True, features, None, None, None, {})
|
|
265
249
|
"""
|
|
266
250
|
super().__init__()
|
|
267
251
|
|
|
@@ -90,11 +90,6 @@ class Predictor(BasePredictor):
|
|
|
90
90
|
cfg (dict): Configuration dictionary containing default settings.
|
|
91
91
|
overrides (dict | None): Dictionary of values to override default configuration.
|
|
92
92
|
_callbacks (dict | None): Dictionary of callback functions to customize behavior.
|
|
93
|
-
|
|
94
|
-
Examples:
|
|
95
|
-
>>> predictor_example = Predictor(cfg=DEFAULT_CFG)
|
|
96
|
-
>>> predictor_example_with_imgsz = Predictor(overrides={"imgsz": 640})
|
|
97
|
-
>>> predictor_example_with_callback = Predictor(_callbacks={"on_predict_start": custom_callback})
|
|
98
93
|
"""
|
|
99
94
|
if overrides is None:
|
|
100
95
|
overrides = {}
|
|
@@ -918,11 +913,6 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
|
918
913
|
cfg (dict): Configuration dictionary containing default settings.
|
|
919
914
|
overrides (dict | None): Dictionary of values to override default configuration.
|
|
920
915
|
_callbacks (dict | None): Dictionary of callback functions to customize behavior.
|
|
921
|
-
|
|
922
|
-
Examples:
|
|
923
|
-
>>> predictor = SAM2VideoPredictor(cfg=DEFAULT_CFG)
|
|
924
|
-
>>> predictor_example_with_imgsz = SAM2VideoPredictor(overrides={"imgsz": 640})
|
|
925
|
-
>>> predictor_example_with_callback = SAM2VideoPredictor(_callbacks={"on_predict_start": custom_callback})
|
|
926
916
|
"""
|
|
927
917
|
super().__init__(cfg, overrides, _callbacks)
|
|
928
918
|
self.inference_state = {}
|
|
@@ -1710,13 +1700,6 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
|
|
|
1710
1700
|
max_obj_num (int): Maximum number of objects to track. Default is 3. this is set to keep fix feature size
|
|
1711
1701
|
for the model.
|
|
1712
1702
|
_callbacks (dict[str, Any] | None): Dictionary of callback functions to customize behavior.
|
|
1713
|
-
|
|
1714
|
-
Examples:
|
|
1715
|
-
>>> predictor = SAM2DynamicInteractivePredictor(cfg=DEFAULT_CFG)
|
|
1716
|
-
>>> predictor_example_with_imgsz = SAM2DynamicInteractivePredictor(overrides={"imgsz": 640})
|
|
1717
|
-
>>> predictor_example_with_callback = SAM2DynamicInteractivePredictor(
|
|
1718
|
-
... _callbacks={"on_predict_start": custom_callback}
|
|
1719
|
-
... )
|
|
1720
1703
|
"""
|
|
1721
1704
|
super().__init__(cfg, overrides, _callbacks)
|
|
1722
1705
|
self.non_overlap_masks = True
|
|
@@ -61,12 +61,6 @@ class ClassificationValidator(BaseValidator):
|
|
|
61
61
|
save_dir (str | Path, optional): Directory to save results.
|
|
62
62
|
args (dict, optional): Arguments containing model and validation configuration.
|
|
63
63
|
_callbacks (list, optional): List of callback functions to be called during validation.
|
|
64
|
-
|
|
65
|
-
Examples:
|
|
66
|
-
>>> from ultralytics.models.yolo.classify import ClassificationValidator
|
|
67
|
-
>>> args = dict(model="yolo11n-cls.pt", data="imagenet10")
|
|
68
|
-
>>> validator = ClassificationValidator(args=args)
|
|
69
|
-
>>> validator()
|
|
70
64
|
"""
|
|
71
65
|
super().__init__(dataloader, save_dir, args, _callbacks)
|
|
72
66
|
self.targets = None
|
ultralytics/models/yolo/model.py
CHANGED
|
@@ -61,11 +61,6 @@ class YOLO(Model):
|
|
|
61
61
|
task (str, optional): YOLO task specification, i.e. 'detect', 'segment', 'classify', 'pose', 'obb'. Defaults
|
|
62
62
|
to auto-detection based on model.
|
|
63
63
|
verbose (bool): Display model info on load.
|
|
64
|
-
|
|
65
|
-
Examples:
|
|
66
|
-
>>> from ultralytics import YOLO
|
|
67
|
-
>>> model = YOLO("yolo11n.pt") # load a pretrained YOLO11n detection model
|
|
68
|
-
>>> model = YOLO("yolo11n-seg.pt") # load a pretrained YOLO11n segmentation model
|
|
69
64
|
"""
|
|
70
65
|
path = Path(model if isinstance(model, (str, Path)) else "")
|
|
71
66
|
if "-world" in path.stem and path.suffix in {".pt", ".yaml", ".yml"}: # if YOLOWorld PyTorch model
|
|
@@ -32,12 +32,6 @@ class OBBPredictor(DetectionPredictor):
|
|
|
32
32
|
cfg (dict, optional): Default configuration for the predictor.
|
|
33
33
|
overrides (dict, optional): Configuration overrides that take precedence over the default config.
|
|
34
34
|
_callbacks (list, optional): List of callback functions to be invoked during prediction.
|
|
35
|
-
|
|
36
|
-
Examples:
|
|
37
|
-
>>> from ultralytics.utils import ASSETS
|
|
38
|
-
>>> from ultralytics.models.yolo.obb import OBBPredictor
|
|
39
|
-
>>> args = dict(model="yolo11n-obb.pt", source=ASSETS)
|
|
40
|
-
>>> predictor = OBBPredictor(overrides=args)
|
|
41
35
|
"""
|
|
42
36
|
super().__init__(cfg, overrides, _callbacks)
|
|
43
37
|
self.args.task = "obb"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
3
|
from ultralytics.models.yolo.detect.predict import DetectionPredictor
|
|
4
|
-
from ultralytics.utils import DEFAULT_CFG,
|
|
4
|
+
from ultralytics.utils import DEFAULT_CFG, ops
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class PosePredictor(DetectionPredictor):
|
|
@@ -35,21 +35,9 @@ class PosePredictor(DetectionPredictor):
|
|
|
35
35
|
cfg (Any): Configuration for the predictor.
|
|
36
36
|
overrides (dict, optional): Configuration overrides that take precedence over cfg.
|
|
37
37
|
_callbacks (list, optional): List of callback functions to be invoked during prediction.
|
|
38
|
-
|
|
39
|
-
Examples:
|
|
40
|
-
>>> from ultralytics.utils import ASSETS
|
|
41
|
-
>>> from ultralytics.models.yolo.pose import PosePredictor
|
|
42
|
-
>>> args = dict(model="yolo11n-pose.pt", source=ASSETS)
|
|
43
|
-
>>> predictor = PosePredictor(overrides=args)
|
|
44
|
-
>>> predictor.predict_cli()
|
|
45
38
|
"""
|
|
46
39
|
super().__init__(cfg, overrides, _callbacks)
|
|
47
40
|
self.args.task = "pose"
|
|
48
|
-
if isinstance(self.args.device, str) and self.args.device.lower() == "mps":
|
|
49
|
-
LOGGER.warning(
|
|
50
|
-
"Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. "
|
|
51
|
-
"See https://github.com/ultralytics/ultralytics/issues/4031."
|
|
52
|
-
)
|
|
53
41
|
|
|
54
42
|
def construct_result(self, pred, img, orig_img, img_path):
|
|
55
43
|
"""Construct the result object from the prediction, including keypoints.
|
|
@@ -8,7 +8,7 @@ from typing import Any
|
|
|
8
8
|
|
|
9
9
|
from ultralytics.models import yolo
|
|
10
10
|
from ultralytics.nn.tasks import PoseModel
|
|
11
|
-
from ultralytics.utils import DEFAULT_CFG
|
|
11
|
+
from ultralytics.utils import DEFAULT_CFG
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
@@ -54,12 +54,6 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
|
54
54
|
overrides["task"] = "pose"
|
|
55
55
|
super().__init__(cfg, overrides, _callbacks)
|
|
56
56
|
|
|
57
|
-
if isinstance(self.args.device, str) and self.args.device.lower() == "mps":
|
|
58
|
-
LOGGER.warning(
|
|
59
|
-
"Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. "
|
|
60
|
-
"See https://github.com/ultralytics/ultralytics/issues/4031."
|
|
61
|
-
)
|
|
62
|
-
|
|
63
57
|
def get_model(
|
|
64
58
|
self,
|
|
65
59
|
cfg: str | Path | dict[str, Any] | None = None,
|
|
@@ -9,7 +9,7 @@ import numpy as np
|
|
|
9
9
|
import torch
|
|
10
10
|
|
|
11
11
|
from ultralytics.models.yolo.detect import DetectionValidator
|
|
12
|
-
from ultralytics.utils import
|
|
12
|
+
from ultralytics.utils import ops
|
|
13
13
|
from ultralytics.utils.metrics import OKS_SIGMA, PoseMetrics, kpt_iou
|
|
14
14
|
|
|
15
15
|
|
|
@@ -45,6 +45,11 @@ class PoseValidator(DetectionValidator):
|
|
|
45
45
|
>>> args = dict(model="yolo11n-pose.pt", data="coco8-pose.yaml")
|
|
46
46
|
>>> validator = PoseValidator(args=args)
|
|
47
47
|
>>> validator()
|
|
48
|
+
|
|
49
|
+
Notes:
|
|
50
|
+
This class extends DetectionValidator with pose-specific functionality. It initializes with sigma values
|
|
51
|
+
for OKS calculation and sets up PoseMetrics for evaluation. A warning is displayed when using Apple MPS
|
|
52
|
+
due to a known bug with pose models.
|
|
48
53
|
"""
|
|
49
54
|
|
|
50
55
|
def __init__(self, dataloader=None, save_dir=None, args=None, _callbacks=None) -> None:
|
|
@@ -58,28 +63,12 @@ class PoseValidator(DetectionValidator):
|
|
|
58
63
|
save_dir (Path | str, optional): Directory to save results.
|
|
59
64
|
args (dict, optional): Arguments for the validator including task set to "pose".
|
|
60
65
|
_callbacks (list, optional): List of callback functions to be executed during validation.
|
|
61
|
-
|
|
62
|
-
Examples:
|
|
63
|
-
>>> from ultralytics.models.yolo.pose import PoseValidator
|
|
64
|
-
>>> args = dict(model="yolo11n-pose.pt", data="coco8-pose.yaml")
|
|
65
|
-
>>> validator = PoseValidator(args=args)
|
|
66
|
-
>>> validator()
|
|
67
|
-
|
|
68
|
-
Notes:
|
|
69
|
-
This class extends DetectionValidator with pose-specific functionality. It initializes with sigma values
|
|
70
|
-
for OKS calculation and sets up PoseMetrics for evaluation. A warning is displayed when using Apple MPS
|
|
71
|
-
due to a known bug with pose models.
|
|
72
66
|
"""
|
|
73
67
|
super().__init__(dataloader, save_dir, args, _callbacks)
|
|
74
68
|
self.sigma = None
|
|
75
69
|
self.kpt_shape = None
|
|
76
70
|
self.args.task = "pose"
|
|
77
71
|
self.metrics = PoseMetrics()
|
|
78
|
-
if isinstance(self.args.device, str) and self.args.device.lower() == "mps":
|
|
79
|
-
LOGGER.warning(
|
|
80
|
-
"Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. "
|
|
81
|
-
"See https://github.com/ultralytics/ultralytics/issues/4031."
|
|
82
|
-
)
|
|
83
72
|
|
|
84
73
|
def preprocess(self, batch: dict[str, Any]) -> dict[str, Any]:
|
|
85
74
|
"""Preprocess batch by converting keypoints data to float and moving it to the device."""
|
|
@@ -61,24 +61,6 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
|
61
61
|
cfg (dict): Configuration dictionary with default parameters for model training.
|
|
62
62
|
overrides (dict, optional): Dictionary of parameter overrides to customize the configuration.
|
|
63
63
|
_callbacks (list, optional): List of callback functions to be executed during different stages of training.
|
|
64
|
-
|
|
65
|
-
Examples:
|
|
66
|
-
>>> from ultralytics.models.yolo.world.train_world import WorldTrainerFromScratch
|
|
67
|
-
>>> from ultralytics import YOLOWorld
|
|
68
|
-
>>> data = dict(
|
|
69
|
-
... train=dict(
|
|
70
|
-
... yolo_data=["Objects365.yaml"],
|
|
71
|
-
... grounding_data=[
|
|
72
|
-
... dict(
|
|
73
|
-
... img_path="flickr30k/images",
|
|
74
|
-
... json_file="flickr30k/final_flickr_separateGT_train.json",
|
|
75
|
-
... ),
|
|
76
|
-
... ],
|
|
77
|
-
... ),
|
|
78
|
-
... val=dict(yolo_data=["lvis.yaml"]),
|
|
79
|
-
... )
|
|
80
|
-
>>> model = YOLOWorld("yolov8s-worldv2.yaml")
|
|
81
|
-
>>> model.train(data=data, trainer=WorldTrainerFromScratch)
|
|
82
64
|
"""
|
|
83
65
|
if overrides is None:
|
|
84
66
|
overrides = {}
|
ultralytics/nn/text_model.py
CHANGED
|
@@ -77,11 +77,6 @@ class CLIP(TextModel):
|
|
|
77
77
|
Args:
|
|
78
78
|
size (str): Model size identifier (e.g., 'ViT-B/32').
|
|
79
79
|
device (torch.device): Device to load the model on.
|
|
80
|
-
|
|
81
|
-
Examples:
|
|
82
|
-
>>> import torch
|
|
83
|
-
>>> clip_model = CLIP("ViT-B/32", device=torch.device("cuda:0"))
|
|
84
|
-
>>> text_features = clip_model.encode_text(["a photo of a cat", "a photo of a dog"])
|
|
85
80
|
"""
|
|
86
81
|
super().__init__()
|
|
87
82
|
self.model, self.image_preprocess = clip.load(size, device=device)
|
|
@@ -199,12 +194,6 @@ class MobileCLIP(TextModel):
|
|
|
199
194
|
Args:
|
|
200
195
|
size (str): Model size identifier (e.g., 's0', 's1', 's2', 'b', 'blt').
|
|
201
196
|
device (torch.device): Device to load the model on.
|
|
202
|
-
|
|
203
|
-
Examples:
|
|
204
|
-
>>> import torch
|
|
205
|
-
>>> model = MobileCLIP("s0", device=torch.device("cpu"))
|
|
206
|
-
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
|
207
|
-
>>> features = model.encode_text(tokens)
|
|
208
197
|
"""
|
|
209
198
|
try:
|
|
210
199
|
import warnings
|
|
@@ -299,11 +288,6 @@ class MobileCLIPTS(TextModel):
|
|
|
299
288
|
|
|
300
289
|
Args:
|
|
301
290
|
device (torch.device): Device to load the model on.
|
|
302
|
-
|
|
303
|
-
Examples:
|
|
304
|
-
>>> model = MobileCLIPTS(device=torch.device("cpu"))
|
|
305
|
-
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
|
306
|
-
>>> features = model.encode_text(tokens)
|
|
307
291
|
"""
|
|
308
292
|
super().__init__()
|
|
309
293
|
from ultralytics.utils.downloads import attempt_download_asset
|
ultralytics/trackers/bot_sort.py
CHANGED
|
@@ -64,14 +64,6 @@ class BOTrack(STrack):
|
|
|
64
64
|
cls (int): Class ID of the detected object.
|
|
65
65
|
feat (np.ndarray, optional): Feature vector associated with the detection.
|
|
66
66
|
feat_history (int): Maximum length of the feature history deque.
|
|
67
|
-
|
|
68
|
-
Examples:
|
|
69
|
-
Initialize a BOTrack object with bounding box, score, class ID, and feature vector
|
|
70
|
-
>>> xywh = np.array([100, 150, 60, 50])
|
|
71
|
-
>>> score = 0.9
|
|
72
|
-
>>> cls = 1
|
|
73
|
-
>>> feat = np.random.rand(128)
|
|
74
|
-
>>> bo_track = BOTrack(xywh, score, cls, feat)
|
|
75
67
|
"""
|
|
76
68
|
super().__init__(xywh, score, cls)
|
|
77
69
|
|
|
@@ -184,11 +176,6 @@ class BOTSORT(BYTETracker):
|
|
|
184
176
|
Args:
|
|
185
177
|
args (Any): Parsed command-line arguments containing tracking parameters.
|
|
186
178
|
frame_rate (int): Frame rate of the video being processed.
|
|
187
|
-
|
|
188
|
-
Examples:
|
|
189
|
-
Initialize BOTSORT with command-line arguments and a specified frame rate:
|
|
190
|
-
>>> args = parse_args()
|
|
191
|
-
>>> bot_sort = BOTSORT(args, frame_rate=30)
|
|
192
179
|
"""
|
|
193
180
|
super().__init__(args, frame_rate)
|
|
194
181
|
self.gmc = GMC(method=args.gmc_method)
|
|
@@ -60,12 +60,6 @@ class STrack(BaseTrack):
|
|
|
60
60
|
y) is the center, (w, h) are width and height, [a] is optional aspect ratio, and idx is the id.
|
|
61
61
|
score (float): Confidence score of the detection.
|
|
62
62
|
cls (Any): Class label for the detected object.
|
|
63
|
-
|
|
64
|
-
Examples:
|
|
65
|
-
>>> xywh = [100.0, 150.0, 50.0, 75.0, 1]
|
|
66
|
-
>>> score = 0.9
|
|
67
|
-
>>> cls = "person"
|
|
68
|
-
>>> track = STrack(xywh, score, cls)
|
|
69
63
|
"""
|
|
70
64
|
super().__init__()
|
|
71
65
|
# xywh+idx or xywha+idx
|
|
@@ -275,11 +269,6 @@ class BYTETracker:
|
|
|
275
269
|
Args:
|
|
276
270
|
args (Namespace): Command-line arguments containing tracking parameters.
|
|
277
271
|
frame_rate (int): Frame rate of the video sequence.
|
|
278
|
-
|
|
279
|
-
Examples:
|
|
280
|
-
Initialize BYTETracker with command-line arguments and a frame rate of 30
|
|
281
|
-
>>> args = Namespace(track_buffer=30)
|
|
282
|
-
>>> tracker = BYTETracker(args, frame_rate=30)
|
|
283
272
|
"""
|
|
284
273
|
self.tracked_stracks = [] # type: list[STrack]
|
|
285
274
|
self.lost_stracks = [] # type: list[STrack]
|
|
@@ -47,10 +47,6 @@ class GMC:
|
|
|
47
47
|
Args:
|
|
48
48
|
method (str): The tracking method to use. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
|
|
49
49
|
downscale (int): Downscale factor for processing frames.
|
|
50
|
-
|
|
51
|
-
Examples:
|
|
52
|
-
Initialize a GMC object with the 'sparseOptFlow' method and a downscale factor of 2
|
|
53
|
-
>>> gmc = GMC(method="sparseOptFlow", downscale=2)
|
|
54
50
|
"""
|
|
55
51
|
super().__init__()
|
|
56
52
|
|
|
@@ -42,10 +42,6 @@ class KalmanFilterXYAH:
|
|
|
42
42
|
represents the bounding box center position, 'a' is the aspect ratio, 'h' is the height, and their respective
|
|
43
43
|
velocities are (vx, vy, va, vh). The filter uses a constant velocity model for object motion and a linear
|
|
44
44
|
observation model for bounding box location.
|
|
45
|
-
|
|
46
|
-
Examples:
|
|
47
|
-
Initialize a Kalman filter for tracking:
|
|
48
|
-
>>> kf = KalmanFilterXYAH()
|
|
49
45
|
"""
|
|
50
46
|
ndim, dt = 4, 1.0
|
|
51
47
|
|
ultralytics/utils/__init__.py
CHANGED
|
@@ -1181,7 +1181,8 @@ class JSONDict(dict):
|
|
|
1181
1181
|
try:
|
|
1182
1182
|
if self.file_path.exists():
|
|
1183
1183
|
with open(self.file_path) as f:
|
|
1184
|
-
|
|
1184
|
+
# Use the base dict update to avoid persisting during reads
|
|
1185
|
+
super().update(json.load(f))
|
|
1185
1186
|
except json.JSONDecodeError:
|
|
1186
1187
|
LOGGER.warning(f"Error decoding JSON from {self.file_path}. Starting with an empty dictionary.")
|
|
1187
1188
|
except Exception as e:
|
ultralytics/utils/benchmarks.py
CHANGED
|
@@ -423,12 +423,6 @@ class ProfileModels:
|
|
|
423
423
|
trt (bool): Flag to indicate whether to profile using TensorRT.
|
|
424
424
|
device (torch.device | str | None): Device used for profiling. If None, it is determined automatically.
|
|
425
425
|
|
|
426
|
-
Examples:
|
|
427
|
-
Initialize and profile models
|
|
428
|
-
>>> from ultralytics.utils.benchmarks import ProfileModels
|
|
429
|
-
>>> profiler = ProfileModels(["yolo11n.yaml", "yolov8s.yaml"], imgsz=640)
|
|
430
|
-
>>> profiler.run()
|
|
431
|
-
|
|
432
426
|
Notes:
|
|
433
427
|
FP16 'half' argument option removed for ONNX as slower on CPU than FP32.
|
|
434
428
|
"""
|
ultralytics/utils/errors.py
CHANGED
|
@@ -31,11 +31,5 @@ class HUBModelError(Exception):
|
|
|
31
31
|
|
|
32
32
|
Args:
|
|
33
33
|
message (str, optional): The error message to display when the exception is raised.
|
|
34
|
-
|
|
35
|
-
Examples:
|
|
36
|
-
>>> try:
|
|
37
|
-
... raise HUBModelError("Custom model error message")
|
|
38
|
-
... except HUBModelError as e:
|
|
39
|
-
... print(e)
|
|
40
34
|
"""
|
|
41
35
|
super().__init__(emojis(message))
|
ultralytics/utils/metrics.py
CHANGED
|
@@ -15,7 +15,10 @@ import torch
|
|
|
15
15
|
from ultralytics.utils import LOGGER, DataExportMixin, SimpleClass, TryExcept, checks, plt_settings
|
|
16
16
|
|
|
17
17
|
OKS_SIGMA = (
|
|
18
|
-
np.array(
|
|
18
|
+
np.array(
|
|
19
|
+
[0.26, 0.25, 0.25, 0.35, 0.35, 0.79, 0.79, 0.72, 0.72, 0.62, 0.62, 1.07, 1.07, 0.87, 0.87, 0.89, 0.89],
|
|
20
|
+
dtype=np.float32,
|
|
21
|
+
)
|
|
19
22
|
/ 10.0
|
|
20
23
|
)
|
|
21
24
|
|
ultralytics/utils/tqdm.py
CHANGED
|
@@ -109,11 +109,6 @@ class TQDM:
|
|
|
109
109
|
bar_format (str, optional): Custom bar format string.
|
|
110
110
|
initial (int, optional): Initial counter value.
|
|
111
111
|
**kwargs (Any): Additional keyword arguments for compatibility (ignored).
|
|
112
|
-
|
|
113
|
-
Examples:
|
|
114
|
-
>>> pbar = TQDM(range(100), desc="Processing")
|
|
115
|
-
>>> with TQDM(total=1000, unit="B", unit_scale=True) as pbar:
|
|
116
|
-
... pbar.update(1024) # Updates by 1KB
|
|
117
112
|
"""
|
|
118
113
|
# Disable if not verbose
|
|
119
114
|
if disable is None:
|