dgenerate-ultralytics-headless 8.3.230__py3-none-any.whl → 8.3.231__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {dgenerate_ultralytics_headless-8.3.230.dist-info → dgenerate_ultralytics_headless-8.3.231.dist-info}/METADATA +4 -6
  2. {dgenerate_ultralytics_headless-8.3.230.dist-info → dgenerate_ultralytics_headless-8.3.231.dist-info}/RECORD +35 -35
  3. ultralytics/__init__.py +1 -1
  4. ultralytics/data/augment.py +3 -131
  5. ultralytics/engine/model.py +0 -5
  6. ultralytics/engine/results.py +0 -67
  7. ultralytics/models/sam/model.py +0 -4
  8. ultralytics/models/sam/modules/blocks.py +0 -51
  9. ultralytics/models/sam/modules/decoders.py +0 -10
  10. ultralytics/models/sam/modules/encoders.py +0 -44
  11. ultralytics/models/sam/modules/memory_attention.py +0 -12
  12. ultralytics/models/sam/modules/sam.py +0 -16
  13. ultralytics/models/sam/predict.py +0 -17
  14. ultralytics/models/yolo/classify/val.py +0 -6
  15. ultralytics/models/yolo/model.py +0 -5
  16. ultralytics/models/yolo/obb/predict.py +0 -6
  17. ultralytics/models/yolo/pose/predict.py +1 -13
  18. ultralytics/models/yolo/pose/train.py +1 -7
  19. ultralytics/models/yolo/pose/val.py +6 -17
  20. ultralytics/models/yolo/world/train_world.py +0 -18
  21. ultralytics/nn/text_model.py +0 -16
  22. ultralytics/trackers/bot_sort.py +0 -13
  23. ultralytics/trackers/byte_tracker.py +0 -11
  24. ultralytics/trackers/utils/gmc.py +0 -4
  25. ultralytics/trackers/utils/kalman_filter.py +0 -4
  26. ultralytics/utils/__init__.py +2 -1
  27. ultralytics/utils/benchmarks.py +0 -6
  28. ultralytics/utils/errors.py +0 -6
  29. ultralytics/utils/metrics.py +4 -1
  30. ultralytics/utils/tqdm.py +0 -5
  31. ultralytics/utils/triton.py +0 -4
  32. {dgenerate_ultralytics_headless-8.3.230.dist-info → dgenerate_ultralytics_headless-8.3.231.dist-info}/WHEEL +0 -0
  33. {dgenerate_ultralytics_headless-8.3.230.dist-info → dgenerate_ultralytics_headless-8.3.231.dist-info}/entry_points.txt +0 -0
  34. {dgenerate_ultralytics_headless-8.3.230.dist-info → dgenerate_ultralytics_headless-8.3.231.dist-info}/licenses/LICENSE +0 -0
  35. {dgenerate_ultralytics_headless-8.3.230.dist-info → dgenerate_ultralytics_headless-8.3.231.dist-info}/top_level.txt +0 -0
@@ -156,13 +156,6 @@ class CXBlock(nn.Module):
156
156
  drop_path (float): Stochastic depth rate.
157
157
  layer_scale_init_value (float): Initial value for Layer Scale.
158
158
  use_dwconv (bool): Whether to use depthwise convolution.
159
-
160
- Examples:
161
- >>> block = CXBlock(dim=64, kernel_size=7, padding=3)
162
- >>> x = torch.randn(1, 64, 32, 32)
163
- >>> output = block(x)
164
- >>> print(output.shape)
165
- torch.Size([1, 64, 32, 32])
166
159
  """
167
160
  super().__init__()
168
161
  self.dwconv = nn.Conv2d(
@@ -231,12 +224,6 @@ class Fuser(nn.Module):
231
224
  num_layers (int): The number of times to replicate the layer.
232
225
  dim (int | None): The dimension for input projection, if used.
233
226
  input_projection (bool): Whether to use input projection.
234
-
235
- Examples:
236
- >>> layer = nn.Linear(64, 64)
237
- >>> fuser = Fuser(layer, num_layers=3, dim=64, input_projection=True)
238
- >>> input_tensor = torch.randn(1, 64)
239
- >>> output = fuser(input_tensor)
240
227
  """
241
228
  super().__init__()
242
229
  self.proj = nn.Identity()
@@ -304,12 +291,6 @@ class SAM2TwoWayAttentionBlock(TwoWayAttentionBlock):
304
291
  activation (Type[nn.Module]): The activation function of the MLP block.
305
292
  attention_downsample_rate (int): The downsample rate for attention computations.
306
293
  skip_first_layer_pe (bool): Whether to skip the positional encoding in the first layer.
307
-
308
- Examples:
309
- >>> block = SAM2TwoWayAttentionBlock(embedding_dim=256, num_heads=8, mlp_dim=2048)
310
- >>> sparse_inputs = torch.randn(1, 100, 256)
311
- >>> dense_inputs = torch.randn(1, 256, 32, 32)
312
- >>> sparse_outputs, dense_outputs = block(sparse_inputs, dense_inputs)
313
294
  """
314
295
  super().__init__(embedding_dim, num_heads, mlp_dim, activation, attention_downsample_rate, skip_first_layer_pe)
315
296
  self.mlp = MLP(embedding_dim, mlp_dim, embedding_dim, num_layers=2, act=activation)
@@ -364,17 +345,6 @@ class SAM2TwoWayTransformer(TwoWayTransformer):
364
345
  mlp_dim (int): Channel dimension internal to the MLP block.
365
346
  activation (Type[nn.Module]): Activation function to use in the MLP block.
366
347
  attention_downsample_rate (int): Downsampling rate for attention computations.
367
-
368
- Examples:
369
- >>> transformer = SAM2TwoWayTransformer(depth=5, embedding_dim=256, num_heads=8, mlp_dim=2048)
370
- >>> transformer
371
- SAM2TwoWayTransformer(
372
- (layers): ModuleList(
373
- (0-4): 5 x SAM2TwoWayAttentionBlock(...)
374
- )
375
- (final_attn_token_to_image): Attention(...)
376
- (norm_final_attn): LayerNorm(...)
377
- )
378
348
  """
379
349
  super().__init__(depth, embedding_dim, num_heads, mlp_dim, activation, attention_downsample_rate)
380
350
  self.layers = nn.ModuleList()
@@ -917,13 +887,6 @@ class Block(nn.Module):
917
887
  rel_pos_zero_init (bool): If True, initializes relative positional parameters to zero.
918
888
  window_size (int): Size of attention window. If 0, uses global attention.
919
889
  input_size (tuple[int, int] | None): Input resolution for calculating relative positional parameter size.
920
-
921
- Examples:
922
- >>> block = Block(dim=256, num_heads=8, window_size=7)
923
- >>> x = torch.randn(1, 56, 56, 256)
924
- >>> output = block(x)
925
- >>> print(output.shape)
926
- torch.Size([1, 56, 56, 256])
927
890
  """
928
891
  super().__init__()
929
892
  self.norm1 = norm_layer(dim)
@@ -1008,13 +971,6 @@ class REAttention(nn.Module):
1008
971
  rel_pos_zero_init (bool): If True, initializes relative positional parameters to zero.
1009
972
  input_size (tuple[int, int] | None): Input resolution for calculating relative positional parameter size.
1010
973
  Required if use_rel_pos is True.
1011
-
1012
- Examples:
1013
- >>> attention = REAttention(dim=256, num_heads=8, input_size=(32, 32))
1014
- >>> x = torch.randn(1, 32, 32, 256)
1015
- >>> output = attention(x)
1016
- >>> print(output.shape)
1017
- torch.Size([1, 32, 32, 256])
1018
974
  """
1019
975
  super().__init__()
1020
976
  self.num_heads = num_heads
@@ -1089,13 +1045,6 @@ class PatchEmbed(nn.Module):
1089
1045
  padding (tuple[int, int]): Padding applied to the input before convolution.
1090
1046
  in_chans (int): Number of input image channels.
1091
1047
  embed_dim (int): Dimensionality of the output patch embeddings.
1092
-
1093
- Examples:
1094
- >>> patch_embed = PatchEmbed(kernel_size=(16, 16), stride=(16, 16), in_chans=3, embed_dim=768)
1095
- >>> x = torch.randn(1, 3, 224, 224)
1096
- >>> output = patch_embed(x)
1097
- >>> print(output.shape)
1098
- torch.Size([1, 768, 14, 14])
1099
1048
  """
1100
1049
  super().__init__()
1101
1050
 
@@ -55,11 +55,6 @@ class MaskDecoder(nn.Module):
55
55
  activation (Type[nn.Module]): Type of activation to use when upscaling masks.
56
56
  iou_head_depth (int): Depth of the MLP used to predict mask quality.
57
57
  iou_head_hidden_dim (int): Hidden dimension of the MLP used to predict mask quality.
58
-
59
- Examples:
60
- >>> transformer = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=256, nhead=8), num_layers=6)
61
- >>> decoder = MaskDecoder(transformer_dim=256, transformer=transformer)
62
- >>> print(decoder)
63
58
  """
64
59
  super().__init__()
65
60
  self.transformer_dim = transformer_dim
@@ -249,11 +244,6 @@ class SAM2MaskDecoder(nn.Module):
249
244
  pred_obj_scores (bool): Whether to predict object scores.
250
245
  pred_obj_scores_mlp (bool): Whether to use MLP for object score prediction.
251
246
  use_multimask_token_for_obj_ptr (bool): Whether to use multimask token for object pointer.
252
-
253
- Examples:
254
- >>> transformer = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=256, nhead=8), num_layers=6)
255
- >>> decoder = SAM2MaskDecoder(transformer_dim=256, transformer=transformer)
256
- >>> print(decoder)
257
247
  """
258
248
  super().__init__()
259
249
  self.transformer_dim = transformer_dim
@@ -82,12 +82,6 @@ class ImageEncoderViT(nn.Module):
82
82
  rel_pos_zero_init (bool): If True, initializes relative positional parameters to zero.
83
83
  window_size (int): Size of attention window for windowed attention blocks.
84
84
  global_attn_indexes (tuple[int, ...]): Indices of blocks that use global attention.
85
-
86
- Examples:
87
- >>> encoder = ImageEncoderViT(img_size=224, patch_size=16, embed_dim=768, depth=12, num_heads=12)
88
- >>> input_image = torch.randn(1, 3, 224, 224)
89
- >>> output = encoder(input_image)
90
- >>> print(output.shape)
91
85
  """
92
86
  super().__init__()
93
87
  self.img_size = img_size
@@ -198,15 +192,6 @@ class PromptEncoder(nn.Module):
198
192
  input_image_size (tuple[int, int]): The padded size of the input image as (H, W).
199
193
  mask_in_chans (int): The number of hidden channels used for encoding input masks.
200
194
  activation (Type[nn.Module]): The activation function to use when encoding input masks.
201
-
202
- Examples:
203
- >>> prompt_encoder = PromptEncoder(256, (64, 64), (1024, 1024), 16)
204
- >>> points = (torch.rand(1, 5, 2), torch.randint(0, 4, (1, 5)))
205
- >>> boxes = torch.rand(1, 2, 2)
206
- >>> masks = torch.rand(1, 1, 256, 256)
207
- >>> sparse_embeddings, dense_embeddings = prompt_encoder(points, boxes, masks)
208
- >>> print(sparse_embeddings.shape, dense_embeddings.shape)
209
- torch.Size([1, 7, 256]) torch.Size([1, 256, 64, 64])
210
195
  """
211
196
  super().__init__()
212
197
  self.embed_dim = embed_dim
@@ -385,14 +370,6 @@ class MemoryEncoder(nn.Module):
385
370
  Args:
386
371
  out_dim (int): Output dimension of the encoded features.
387
372
  in_dim (int): Input dimension of the pixel features.
388
-
389
- Examples:
390
- >>> encoder = MemoryEncoder(out_dim=256, in_dim=256)
391
- >>> pix_feat = torch.randn(1, 256, 64, 64)
392
- >>> masks = torch.randn(1, 1, 64, 64)
393
- >>> encoded_feat, pos = encoder(pix_feat, masks)
394
- >>> print(encoded_feat.shape, pos.shape)
395
- torch.Size([1, 256, 64, 64]) torch.Size([1, 128, 64, 64])
396
373
  """
397
374
  super().__init__()
398
375
 
@@ -468,15 +445,6 @@ class ImageEncoder(nn.Module):
468
445
  trunk (nn.Module): The trunk network for initial feature extraction.
469
446
  neck (nn.Module): The neck network for feature refinement and positional encoding generation.
470
447
  scalp (int): Number of lowest resolution feature levels to discard.
471
-
472
- Examples:
473
- >>> trunk = SomeTrunkNetwork()
474
- >>> neck = SomeNeckNetwork()
475
- >>> encoder = ImageEncoder(trunk, neck, scalp=1)
476
- >>> image = torch.randn(1, 3, 224, 224)
477
- >>> output = encoder(image)
478
- >>> print(output.keys())
479
- dict_keys(['vision_features', 'vision_pos_enc', 'backbone_fpn'])
480
448
  """
481
449
  super().__init__()
482
450
  self.trunk = trunk
@@ -552,11 +520,6 @@ class FpnNeck(nn.Module):
552
520
  fpn_interp_model (str): Interpolation mode for FPN feature resizing.
553
521
  fuse_type (str): Type of feature fusion, either 'sum' or 'avg'.
554
522
  fpn_top_down_levels (Optional[list[int]]): Levels to have top-down features in outputs.
555
-
556
- Examples:
557
- >>> backbone_channels = [64, 128, 256, 512]
558
- >>> fpn_neck = FpnNeck(256, backbone_channels)
559
- >>> print(fpn_neck)
560
523
  """
561
524
  super().__init__()
562
525
  self.position_encoding = PositionEmbeddingSine(num_pos_feats=256)
@@ -720,13 +683,6 @@ class Hiera(nn.Module):
720
683
  window_spec (tuple[int, ...]): Window sizes for each stage when not using global attention.
721
684
  global_att_blocks (tuple[int, ...]): Indices of blocks that use global attention.
722
685
  return_interm_layers (bool): Whether to return intermediate layer outputs.
723
-
724
- Examples:
725
- >>> model = Hiera(embed_dim=96, num_heads=1, stages=(2, 3, 16, 3))
726
- >>> input_tensor = torch.randn(1, 3, 224, 224)
727
- >>> output_features = model(input_tensor)
728
- >>> for feat in output_features:
729
- ... print(feat.shape)
730
686
  """
731
687
  super().__init__()
732
688
 
@@ -213,18 +213,6 @@ class MemoryAttention(nn.Module):
213
213
  layer (nn.Module): The attention layer to be used in the module.
214
214
  num_layers (int): The number of attention layers.
215
215
  batch_first (bool): Whether the input tensors are in batch-first format.
216
-
217
- Examples:
218
- >>> d_model = 256
219
- >>> layer = MemoryAttentionLayer(d_model)
220
- >>> attention = MemoryAttention(d_model, pos_enc_at_input=True, layer=layer, num_layers=3)
221
- >>> curr = torch.randn(10, 32, d_model) # (seq_len, batch_size, d_model)
222
- >>> memory = torch.randn(20, 32, d_model) # (mem_len, batch_size, d_model)
223
- >>> curr_pos = torch.randn(10, 32, d_model)
224
- >>> memory_pos = torch.randn(20, 32, d_model)
225
- >>> output = attention(curr, memory, curr_pos, memory_pos)
226
- >>> print(output.shape)
227
- torch.Size([10, 32, 256])
228
216
  """
229
217
  super().__init__()
230
218
  self.d_model = d_model
@@ -69,13 +69,6 @@ class SAMModel(nn.Module):
69
69
  pixel_mean (list[float]): Mean values for normalizing pixels in the input image.
70
70
  pixel_std (list[float]): Standard deviation values for normalizing pixels in the input image.
71
71
 
72
- Examples:
73
- >>> image_encoder = ImageEncoderViT(...)
74
- >>> prompt_encoder = PromptEncoder(...)
75
- >>> mask_decoder = MaskDecoder(...)
76
- >>> sam_model = SAMModel(image_encoder, prompt_encoder, mask_decoder)
77
- >>> # Further usage depends on SAMPredictor class
78
-
79
72
  Notes:
80
73
  All forward() operations moved to SAMPredictor.
81
74
  """
@@ -253,15 +246,6 @@ class SAM2Model(torch.nn.Module):
253
246
  no_obj_embed_spatial (bool): Whether add no obj embedding to spatial frames.
254
247
  sam_mask_decoder_extra_args (dict | None): Extra arguments for constructing the SAM mask decoder.
255
248
  compile_image_encoder (bool): Whether to compile the image encoder for faster inference.
256
-
257
- Examples:
258
- >>> image_encoder = ImageEncoderViT(...)
259
- >>> memory_attention = SAM2TwoWayTransformer(...)
260
- >>> memory_encoder = nn.Sequential(...)
261
- >>> model = SAM2Model(image_encoder, memory_attention, memory_encoder)
262
- >>> image_batch = torch.rand(1, 3, 512, 512)
263
- >>> features = model.forward_image(image_batch)
264
- >>> track_results = model.track_step(0, True, features, None, None, None, {})
265
249
  """
266
250
  super().__init__()
267
251
 
@@ -90,11 +90,6 @@ class Predictor(BasePredictor):
90
90
  cfg (dict): Configuration dictionary containing default settings.
91
91
  overrides (dict | None): Dictionary of values to override default configuration.
92
92
  _callbacks (dict | None): Dictionary of callback functions to customize behavior.
93
-
94
- Examples:
95
- >>> predictor_example = Predictor(cfg=DEFAULT_CFG)
96
- >>> predictor_example_with_imgsz = Predictor(overrides={"imgsz": 640})
97
- >>> predictor_example_with_callback = Predictor(_callbacks={"on_predict_start": custom_callback})
98
93
  """
99
94
  if overrides is None:
100
95
  overrides = {}
@@ -918,11 +913,6 @@ class SAM2VideoPredictor(SAM2Predictor):
918
913
  cfg (dict): Configuration dictionary containing default settings.
919
914
  overrides (dict | None): Dictionary of values to override default configuration.
920
915
  _callbacks (dict | None): Dictionary of callback functions to customize behavior.
921
-
922
- Examples:
923
- >>> predictor = SAM2VideoPredictor(cfg=DEFAULT_CFG)
924
- >>> predictor_example_with_imgsz = SAM2VideoPredictor(overrides={"imgsz": 640})
925
- >>> predictor_example_with_callback = SAM2VideoPredictor(_callbacks={"on_predict_start": custom_callback})
926
916
  """
927
917
  super().__init__(cfg, overrides, _callbacks)
928
918
  self.inference_state = {}
@@ -1710,13 +1700,6 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
1710
1700
  max_obj_num (int): Maximum number of objects to track. Default is 3. this is set to keep fix feature size
1711
1701
  for the model.
1712
1702
  _callbacks (dict[str, Any] | None): Dictionary of callback functions to customize behavior.
1713
-
1714
- Examples:
1715
- >>> predictor = SAM2DynamicInteractivePredictor(cfg=DEFAULT_CFG)
1716
- >>> predictor_example_with_imgsz = SAM2DynamicInteractivePredictor(overrides={"imgsz": 640})
1717
- >>> predictor_example_with_callback = SAM2DynamicInteractivePredictor(
1718
- ... _callbacks={"on_predict_start": custom_callback}
1719
- ... )
1720
1703
  """
1721
1704
  super().__init__(cfg, overrides, _callbacks)
1722
1705
  self.non_overlap_masks = True
@@ -61,12 +61,6 @@ class ClassificationValidator(BaseValidator):
61
61
  save_dir (str | Path, optional): Directory to save results.
62
62
  args (dict, optional): Arguments containing model and validation configuration.
63
63
  _callbacks (list, optional): List of callback functions to be called during validation.
64
-
65
- Examples:
66
- >>> from ultralytics.models.yolo.classify import ClassificationValidator
67
- >>> args = dict(model="yolo11n-cls.pt", data="imagenet10")
68
- >>> validator = ClassificationValidator(args=args)
69
- >>> validator()
70
64
  """
71
65
  super().__init__(dataloader, save_dir, args, _callbacks)
72
66
  self.targets = None
@@ -61,11 +61,6 @@ class YOLO(Model):
61
61
  task (str, optional): YOLO task specification, i.e. 'detect', 'segment', 'classify', 'pose', 'obb'. Defaults
62
62
  to auto-detection based on model.
63
63
  verbose (bool): Display model info on load.
64
-
65
- Examples:
66
- >>> from ultralytics import YOLO
67
- >>> model = YOLO("yolo11n.pt") # load a pretrained YOLO11n detection model
68
- >>> model = YOLO("yolo11n-seg.pt") # load a pretrained YOLO11n segmentation model
69
64
  """
70
65
  path = Path(model if isinstance(model, (str, Path)) else "")
71
66
  if "-world" in path.stem and path.suffix in {".pt", ".yaml", ".yml"}: # if YOLOWorld PyTorch model
@@ -32,12 +32,6 @@ class OBBPredictor(DetectionPredictor):
32
32
  cfg (dict, optional): Default configuration for the predictor.
33
33
  overrides (dict, optional): Configuration overrides that take precedence over the default config.
34
34
  _callbacks (list, optional): List of callback functions to be invoked during prediction.
35
-
36
- Examples:
37
- >>> from ultralytics.utils import ASSETS
38
- >>> from ultralytics.models.yolo.obb import OBBPredictor
39
- >>> args = dict(model="yolo11n-obb.pt", source=ASSETS)
40
- >>> predictor = OBBPredictor(overrides=args)
41
35
  """
42
36
  super().__init__(cfg, overrides, _callbacks)
43
37
  self.args.task = "obb"
@@ -1,7 +1,7 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
3
  from ultralytics.models.yolo.detect.predict import DetectionPredictor
4
- from ultralytics.utils import DEFAULT_CFG, LOGGER, ops
4
+ from ultralytics.utils import DEFAULT_CFG, ops
5
5
 
6
6
 
7
7
  class PosePredictor(DetectionPredictor):
@@ -35,21 +35,9 @@ class PosePredictor(DetectionPredictor):
35
35
  cfg (Any): Configuration for the predictor.
36
36
  overrides (dict, optional): Configuration overrides that take precedence over cfg.
37
37
  _callbacks (list, optional): List of callback functions to be invoked during prediction.
38
-
39
- Examples:
40
- >>> from ultralytics.utils import ASSETS
41
- >>> from ultralytics.models.yolo.pose import PosePredictor
42
- >>> args = dict(model="yolo11n-pose.pt", source=ASSETS)
43
- >>> predictor = PosePredictor(overrides=args)
44
- >>> predictor.predict_cli()
45
38
  """
46
39
  super().__init__(cfg, overrides, _callbacks)
47
40
  self.args.task = "pose"
48
- if isinstance(self.args.device, str) and self.args.device.lower() == "mps":
49
- LOGGER.warning(
50
- "Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. "
51
- "See https://github.com/ultralytics/ultralytics/issues/4031."
52
- )
53
41
 
54
42
  def construct_result(self, pred, img, orig_img, img_path):
55
43
  """Construct the result object from the prediction, including keypoints.
@@ -8,7 +8,7 @@ from typing import Any
8
8
 
9
9
  from ultralytics.models import yolo
10
10
  from ultralytics.nn.tasks import PoseModel
11
- from ultralytics.utils import DEFAULT_CFG, LOGGER
11
+ from ultralytics.utils import DEFAULT_CFG
12
12
 
13
13
 
14
14
  class PoseTrainer(yolo.detect.DetectionTrainer):
@@ -54,12 +54,6 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
54
54
  overrides["task"] = "pose"
55
55
  super().__init__(cfg, overrides, _callbacks)
56
56
 
57
- if isinstance(self.args.device, str) and self.args.device.lower() == "mps":
58
- LOGGER.warning(
59
- "Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. "
60
- "See https://github.com/ultralytics/ultralytics/issues/4031."
61
- )
62
-
63
57
  def get_model(
64
58
  self,
65
59
  cfg: str | Path | dict[str, Any] | None = None,
@@ -9,7 +9,7 @@ import numpy as np
9
9
  import torch
10
10
 
11
11
  from ultralytics.models.yolo.detect import DetectionValidator
12
- from ultralytics.utils import LOGGER, ops
12
+ from ultralytics.utils import ops
13
13
  from ultralytics.utils.metrics import OKS_SIGMA, PoseMetrics, kpt_iou
14
14
 
15
15
 
@@ -45,6 +45,11 @@ class PoseValidator(DetectionValidator):
45
45
  >>> args = dict(model="yolo11n-pose.pt", data="coco8-pose.yaml")
46
46
  >>> validator = PoseValidator(args=args)
47
47
  >>> validator()
48
+
49
+ Notes:
50
+ This class extends DetectionValidator with pose-specific functionality. It initializes with sigma values
51
+ for OKS calculation and sets up PoseMetrics for evaluation. A warning is displayed when using Apple MPS
52
+ due to a known bug with pose models.
48
53
  """
49
54
 
50
55
  def __init__(self, dataloader=None, save_dir=None, args=None, _callbacks=None) -> None:
@@ -58,28 +63,12 @@ class PoseValidator(DetectionValidator):
58
63
  save_dir (Path | str, optional): Directory to save results.
59
64
  args (dict, optional): Arguments for the validator including task set to "pose".
60
65
  _callbacks (list, optional): List of callback functions to be executed during validation.
61
-
62
- Examples:
63
- >>> from ultralytics.models.yolo.pose import PoseValidator
64
- >>> args = dict(model="yolo11n-pose.pt", data="coco8-pose.yaml")
65
- >>> validator = PoseValidator(args=args)
66
- >>> validator()
67
-
68
- Notes:
69
- This class extends DetectionValidator with pose-specific functionality. It initializes with sigma values
70
- for OKS calculation and sets up PoseMetrics for evaluation. A warning is displayed when using Apple MPS
71
- due to a known bug with pose models.
72
66
  """
73
67
  super().__init__(dataloader, save_dir, args, _callbacks)
74
68
  self.sigma = None
75
69
  self.kpt_shape = None
76
70
  self.args.task = "pose"
77
71
  self.metrics = PoseMetrics()
78
- if isinstance(self.args.device, str) and self.args.device.lower() == "mps":
79
- LOGGER.warning(
80
- "Apple MPS known Pose bug. Recommend 'device=cpu' for Pose models. "
81
- "See https://github.com/ultralytics/ultralytics/issues/4031."
82
- )
83
72
 
84
73
  def preprocess(self, batch: dict[str, Any]) -> dict[str, Any]:
85
74
  """Preprocess batch by converting keypoints data to float and moving it to the device."""
@@ -61,24 +61,6 @@ class WorldTrainerFromScratch(WorldTrainer):
61
61
  cfg (dict): Configuration dictionary with default parameters for model training.
62
62
  overrides (dict, optional): Dictionary of parameter overrides to customize the configuration.
63
63
  _callbacks (list, optional): List of callback functions to be executed during different stages of training.
64
-
65
- Examples:
66
- >>> from ultralytics.models.yolo.world.train_world import WorldTrainerFromScratch
67
- >>> from ultralytics import YOLOWorld
68
- >>> data = dict(
69
- ... train=dict(
70
- ... yolo_data=["Objects365.yaml"],
71
- ... grounding_data=[
72
- ... dict(
73
- ... img_path="flickr30k/images",
74
- ... json_file="flickr30k/final_flickr_separateGT_train.json",
75
- ... ),
76
- ... ],
77
- ... ),
78
- ... val=dict(yolo_data=["lvis.yaml"]),
79
- ... )
80
- >>> model = YOLOWorld("yolov8s-worldv2.yaml")
81
- >>> model.train(data=data, trainer=WorldTrainerFromScratch)
82
64
  """
83
65
  if overrides is None:
84
66
  overrides = {}
@@ -77,11 +77,6 @@ class CLIP(TextModel):
77
77
  Args:
78
78
  size (str): Model size identifier (e.g., 'ViT-B/32').
79
79
  device (torch.device): Device to load the model on.
80
-
81
- Examples:
82
- >>> import torch
83
- >>> clip_model = CLIP("ViT-B/32", device=torch.device("cuda:0"))
84
- >>> text_features = clip_model.encode_text(["a photo of a cat", "a photo of a dog"])
85
80
  """
86
81
  super().__init__()
87
82
  self.model, self.image_preprocess = clip.load(size, device=device)
@@ -199,12 +194,6 @@ class MobileCLIP(TextModel):
199
194
  Args:
200
195
  size (str): Model size identifier (e.g., 's0', 's1', 's2', 'b', 'blt').
201
196
  device (torch.device): Device to load the model on.
202
-
203
- Examples:
204
- >>> import torch
205
- >>> model = MobileCLIP("s0", device=torch.device("cpu"))
206
- >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
207
- >>> features = model.encode_text(tokens)
208
197
  """
209
198
  try:
210
199
  import warnings
@@ -299,11 +288,6 @@ class MobileCLIPTS(TextModel):
299
288
 
300
289
  Args:
301
290
  device (torch.device): Device to load the model on.
302
-
303
- Examples:
304
- >>> model = MobileCLIPTS(device=torch.device("cpu"))
305
- >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
306
- >>> features = model.encode_text(tokens)
307
291
  """
308
292
  super().__init__()
309
293
  from ultralytics.utils.downloads import attempt_download_asset
@@ -64,14 +64,6 @@ class BOTrack(STrack):
64
64
  cls (int): Class ID of the detected object.
65
65
  feat (np.ndarray, optional): Feature vector associated with the detection.
66
66
  feat_history (int): Maximum length of the feature history deque.
67
-
68
- Examples:
69
- Initialize a BOTrack object with bounding box, score, class ID, and feature vector
70
- >>> xywh = np.array([100, 150, 60, 50])
71
- >>> score = 0.9
72
- >>> cls = 1
73
- >>> feat = np.random.rand(128)
74
- >>> bo_track = BOTrack(xywh, score, cls, feat)
75
67
  """
76
68
  super().__init__(xywh, score, cls)
77
69
 
@@ -184,11 +176,6 @@ class BOTSORT(BYTETracker):
184
176
  Args:
185
177
  args (Any): Parsed command-line arguments containing tracking parameters.
186
178
  frame_rate (int): Frame rate of the video being processed.
187
-
188
- Examples:
189
- Initialize BOTSORT with command-line arguments and a specified frame rate:
190
- >>> args = parse_args()
191
- >>> bot_sort = BOTSORT(args, frame_rate=30)
192
179
  """
193
180
  super().__init__(args, frame_rate)
194
181
  self.gmc = GMC(method=args.gmc_method)
@@ -60,12 +60,6 @@ class STrack(BaseTrack):
60
60
  y) is the center, (w, h) are width and height, [a] is optional aspect ratio, and idx is the id.
61
61
  score (float): Confidence score of the detection.
62
62
  cls (Any): Class label for the detected object.
63
-
64
- Examples:
65
- >>> xywh = [100.0, 150.0, 50.0, 75.0, 1]
66
- >>> score = 0.9
67
- >>> cls = "person"
68
- >>> track = STrack(xywh, score, cls)
69
63
  """
70
64
  super().__init__()
71
65
  # xywh+idx or xywha+idx
@@ -275,11 +269,6 @@ class BYTETracker:
275
269
  Args:
276
270
  args (Namespace): Command-line arguments containing tracking parameters.
277
271
  frame_rate (int): Frame rate of the video sequence.
278
-
279
- Examples:
280
- Initialize BYTETracker with command-line arguments and a frame rate of 30
281
- >>> args = Namespace(track_buffer=30)
282
- >>> tracker = BYTETracker(args, frame_rate=30)
283
272
  """
284
273
  self.tracked_stracks = [] # type: list[STrack]
285
274
  self.lost_stracks = [] # type: list[STrack]
@@ -47,10 +47,6 @@ class GMC:
47
47
  Args:
48
48
  method (str): The tracking method to use. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
49
49
  downscale (int): Downscale factor for processing frames.
50
-
51
- Examples:
52
- Initialize a GMC object with the 'sparseOptFlow' method and a downscale factor of 2
53
- >>> gmc = GMC(method="sparseOptFlow", downscale=2)
54
50
  """
55
51
  super().__init__()
56
52
 
@@ -42,10 +42,6 @@ class KalmanFilterXYAH:
42
42
  represents the bounding box center position, 'a' is the aspect ratio, 'h' is the height, and their respective
43
43
  velocities are (vx, vy, va, vh). The filter uses a constant velocity model for object motion and a linear
44
44
  observation model for bounding box location.
45
-
46
- Examples:
47
- Initialize a Kalman filter for tracking:
48
- >>> kf = KalmanFilterXYAH()
49
45
  """
50
46
  ndim, dt = 4, 1.0
51
47
 
@@ -1181,7 +1181,8 @@ class JSONDict(dict):
1181
1181
  try:
1182
1182
  if self.file_path.exists():
1183
1183
  with open(self.file_path) as f:
1184
- self.update(json.load(f))
1184
+ # Use the base dict update to avoid persisting during reads
1185
+ super().update(json.load(f))
1185
1186
  except json.JSONDecodeError:
1186
1187
  LOGGER.warning(f"Error decoding JSON from {self.file_path}. Starting with an empty dictionary.")
1187
1188
  except Exception as e:
@@ -423,12 +423,6 @@ class ProfileModels:
423
423
  trt (bool): Flag to indicate whether to profile using TensorRT.
424
424
  device (torch.device | str | None): Device used for profiling. If None, it is determined automatically.
425
425
 
426
- Examples:
427
- Initialize and profile models
428
- >>> from ultralytics.utils.benchmarks import ProfileModels
429
- >>> profiler = ProfileModels(["yolo11n.yaml", "yolov8s.yaml"], imgsz=640)
430
- >>> profiler.run()
431
-
432
426
  Notes:
433
427
  FP16 'half' argument option removed for ONNX as slower on CPU than FP32.
434
428
  """
@@ -31,11 +31,5 @@ class HUBModelError(Exception):
31
31
 
32
32
  Args:
33
33
  message (str, optional): The error message to display when the exception is raised.
34
-
35
- Examples:
36
- >>> try:
37
- ... raise HUBModelError("Custom model error message")
38
- ... except HUBModelError as e:
39
- ... print(e)
40
34
  """
41
35
  super().__init__(emojis(message))
@@ -15,7 +15,10 @@ import torch
15
15
  from ultralytics.utils import LOGGER, DataExportMixin, SimpleClass, TryExcept, checks, plt_settings
16
16
 
17
17
  OKS_SIGMA = (
18
- np.array([0.26, 0.25, 0.25, 0.35, 0.35, 0.79, 0.79, 0.72, 0.72, 0.62, 0.62, 1.07, 1.07, 0.87, 0.87, 0.89, 0.89])
18
+ np.array(
19
+ [0.26, 0.25, 0.25, 0.35, 0.35, 0.79, 0.79, 0.72, 0.72, 0.62, 0.62, 1.07, 1.07, 0.87, 0.87, 0.89, 0.89],
20
+ dtype=np.float32,
21
+ )
19
22
  / 10.0
20
23
  )
21
24
 
ultralytics/utils/tqdm.py CHANGED
@@ -109,11 +109,6 @@ class TQDM:
109
109
  bar_format (str, optional): Custom bar format string.
110
110
  initial (int, optional): Initial counter value.
111
111
  **kwargs (Any): Additional keyword arguments for compatibility (ignored).
112
-
113
- Examples:
114
- >>> pbar = TQDM(range(100), desc="Processing")
115
- >>> with TQDM(total=1000, unit="B", unit_scale=True) as pbar:
116
- ... pbar.update(1024) # Updates by 1KB
117
112
  """
118
113
  # Disable if not verbose
119
114
  if disable is None: