dgenerate-ultralytics-headless 8.3.143__py3-none-any.whl → 8.3.144__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/METADATA +1 -1
  2. dgenerate_ultralytics_headless-8.3.144.dist-info/RECORD +272 -0
  3. tests/conftest.py +7 -24
  4. tests/test_cli.py +1 -1
  5. tests/test_cuda.py +7 -2
  6. tests/test_engine.py +7 -8
  7. tests/test_exports.py +16 -16
  8. tests/test_integrations.py +1 -1
  9. tests/test_solutions.py +11 -11
  10. ultralytics/__init__.py +1 -1
  11. ultralytics/cfg/__init__.py +16 -13
  12. ultralytics/data/annotator.py +6 -5
  13. ultralytics/data/augment.py +127 -126
  14. ultralytics/data/base.py +54 -51
  15. ultralytics/data/build.py +47 -23
  16. ultralytics/data/converter.py +47 -43
  17. ultralytics/data/dataset.py +51 -50
  18. ultralytics/data/loaders.py +77 -44
  19. ultralytics/data/split.py +22 -9
  20. ultralytics/data/split_dota.py +63 -39
  21. ultralytics/data/utils.py +59 -39
  22. ultralytics/engine/exporter.py +79 -27
  23. ultralytics/engine/model.py +39 -39
  24. ultralytics/engine/predictor.py +37 -28
  25. ultralytics/engine/results.py +187 -157
  26. ultralytics/engine/trainer.py +36 -19
  27. ultralytics/engine/tuner.py +12 -9
  28. ultralytics/engine/validator.py +7 -9
  29. ultralytics/hub/__init__.py +11 -13
  30. ultralytics/hub/auth.py +22 -2
  31. ultralytics/hub/google/__init__.py +19 -19
  32. ultralytics/hub/session.py +37 -51
  33. ultralytics/hub/utils.py +19 -5
  34. ultralytics/models/fastsam/model.py +30 -12
  35. ultralytics/models/fastsam/predict.py +5 -6
  36. ultralytics/models/fastsam/utils.py +3 -3
  37. ultralytics/models/fastsam/val.py +10 -6
  38. ultralytics/models/nas/model.py +9 -5
  39. ultralytics/models/nas/predict.py +6 -6
  40. ultralytics/models/nas/val.py +3 -3
  41. ultralytics/models/rtdetr/model.py +7 -6
  42. ultralytics/models/rtdetr/predict.py +14 -7
  43. ultralytics/models/rtdetr/train.py +10 -4
  44. ultralytics/models/rtdetr/val.py +36 -9
  45. ultralytics/models/sam/amg.py +30 -12
  46. ultralytics/models/sam/build.py +22 -22
  47. ultralytics/models/sam/model.py +10 -9
  48. ultralytics/models/sam/modules/blocks.py +76 -80
  49. ultralytics/models/sam/modules/decoders.py +6 -8
  50. ultralytics/models/sam/modules/encoders.py +23 -26
  51. ultralytics/models/sam/modules/memory_attention.py +13 -1
  52. ultralytics/models/sam/modules/sam.py +57 -26
  53. ultralytics/models/sam/modules/tiny_encoder.py +232 -237
  54. ultralytics/models/sam/modules/transformer.py +13 -13
  55. ultralytics/models/sam/modules/utils.py +11 -19
  56. ultralytics/models/sam/predict.py +114 -101
  57. ultralytics/models/utils/loss.py +98 -77
  58. ultralytics/models/utils/ops.py +116 -67
  59. ultralytics/models/yolo/classify/predict.py +5 -5
  60. ultralytics/models/yolo/classify/train.py +32 -28
  61. ultralytics/models/yolo/classify/val.py +7 -8
  62. ultralytics/models/yolo/detect/predict.py +1 -0
  63. ultralytics/models/yolo/detect/train.py +15 -14
  64. ultralytics/models/yolo/detect/val.py +37 -36
  65. ultralytics/models/yolo/model.py +106 -23
  66. ultralytics/models/yolo/obb/predict.py +3 -4
  67. ultralytics/models/yolo/obb/train.py +14 -6
  68. ultralytics/models/yolo/obb/val.py +29 -23
  69. ultralytics/models/yolo/pose/predict.py +9 -8
  70. ultralytics/models/yolo/pose/train.py +24 -16
  71. ultralytics/models/yolo/pose/val.py +44 -26
  72. ultralytics/models/yolo/segment/predict.py +5 -5
  73. ultralytics/models/yolo/segment/train.py +11 -7
  74. ultralytics/models/yolo/segment/val.py +2 -2
  75. ultralytics/models/yolo/world/train.py +33 -23
  76. ultralytics/models/yolo/world/train_world.py +11 -3
  77. ultralytics/models/yolo/yoloe/predict.py +11 -11
  78. ultralytics/models/yolo/yoloe/train.py +73 -21
  79. ultralytics/models/yolo/yoloe/train_seg.py +10 -7
  80. ultralytics/models/yolo/yoloe/val.py +42 -18
  81. ultralytics/nn/autobackend.py +59 -15
  82. ultralytics/nn/modules/__init__.py +4 -4
  83. ultralytics/nn/modules/activation.py +4 -1
  84. ultralytics/nn/modules/block.py +178 -111
  85. ultralytics/nn/modules/conv.py +6 -5
  86. ultralytics/nn/modules/head.py +469 -121
  87. ultralytics/nn/modules/transformer.py +147 -58
  88. ultralytics/nn/tasks.py +227 -20
  89. ultralytics/nn/text_model.py +30 -33
  90. ultralytics/solutions/ai_gym.py +1 -1
  91. ultralytics/solutions/analytics.py +7 -4
  92. ultralytics/solutions/config.py +10 -10
  93. ultralytics/solutions/distance_calculation.py +11 -10
  94. ultralytics/solutions/heatmap.py +1 -1
  95. ultralytics/solutions/instance_segmentation.py +6 -3
  96. ultralytics/solutions/object_blurrer.py +3 -3
  97. ultralytics/solutions/object_counter.py +15 -7
  98. ultralytics/solutions/object_cropper.py +3 -2
  99. ultralytics/solutions/parking_management.py +29 -28
  100. ultralytics/solutions/queue_management.py +6 -6
  101. ultralytics/solutions/region_counter.py +10 -3
  102. ultralytics/solutions/security_alarm.py +3 -3
  103. ultralytics/solutions/similarity_search.py +85 -24
  104. ultralytics/solutions/solutions.py +184 -75
  105. ultralytics/solutions/speed_estimation.py +28 -22
  106. ultralytics/solutions/streamlit_inference.py +17 -12
  107. ultralytics/solutions/trackzone.py +4 -4
  108. ultralytics/trackers/basetrack.py +16 -23
  109. ultralytics/trackers/bot_sort.py +30 -20
  110. ultralytics/trackers/byte_tracker.py +70 -64
  111. ultralytics/trackers/track.py +4 -8
  112. ultralytics/trackers/utils/gmc.py +31 -58
  113. ultralytics/trackers/utils/kalman_filter.py +37 -37
  114. ultralytics/trackers/utils/matching.py +1 -1
  115. ultralytics/utils/__init__.py +105 -89
  116. ultralytics/utils/autobatch.py +16 -3
  117. ultralytics/utils/autodevice.py +54 -24
  118. ultralytics/utils/benchmarks.py +42 -28
  119. ultralytics/utils/callbacks/base.py +3 -3
  120. ultralytics/utils/callbacks/clearml.py +9 -9
  121. ultralytics/utils/callbacks/comet.py +67 -25
  122. ultralytics/utils/callbacks/dvc.py +7 -10
  123. ultralytics/utils/callbacks/mlflow.py +2 -5
  124. ultralytics/utils/callbacks/neptune.py +7 -13
  125. ultralytics/utils/callbacks/raytune.py +1 -1
  126. ultralytics/utils/callbacks/tensorboard.py +5 -6
  127. ultralytics/utils/callbacks/wb.py +14 -14
  128. ultralytics/utils/checks.py +14 -13
  129. ultralytics/utils/dist.py +5 -5
  130. ultralytics/utils/downloads.py +94 -67
  131. ultralytics/utils/errors.py +5 -5
  132. ultralytics/utils/export.py +61 -47
  133. ultralytics/utils/files.py +23 -22
  134. ultralytics/utils/instance.py +48 -52
  135. ultralytics/utils/loss.py +78 -40
  136. ultralytics/utils/metrics.py +186 -130
  137. ultralytics/utils/ops.py +186 -190
  138. ultralytics/utils/patches.py +15 -17
  139. ultralytics/utils/plotting.py +71 -27
  140. ultralytics/utils/tal.py +21 -15
  141. ultralytics/utils/torch_utils.py +53 -50
  142. ultralytics/utils/triton.py +5 -4
  143. ultralytics/utils/tuner.py +5 -5
  144. dgenerate_ultralytics_headless-8.3.143.dist-info/RECORD +0 -272
  145. {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/WHEEL +0 -0
  146. {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/entry_points.txt +0 -0
  147. {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/licenses/LICENSE +0 -0
  148. {dgenerate_ultralytics_headless-8.3.143.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/top_level.txt +0 -0
@@ -37,9 +37,11 @@ class SAMModel(nn.Module):
37
37
  image_encoder (ImageEncoderViT): Backbone for encoding images into embeddings.
38
38
  prompt_encoder (PromptEncoder): Encoder for various types of input prompts.
39
39
  mask_decoder (MaskDecoder): Predicts object masks from image and prompt embeddings.
40
+ pixel_mean (torch.Tensor): Mean values for normalizing pixels in the input image.
41
+ pixel_std (torch.Tensor): Standard deviation values for normalizing pixels in the input image.
40
42
 
41
43
  Methods:
42
- __init__: Initializes the SAMModel with encoders, decoder, and normalization parameters.
44
+ set_imgsz: Set image size to make model compatible with different image sizes.
43
45
 
44
46
  Examples:
45
47
  >>> image_encoder = ImageEncoderViT(...)
@@ -70,7 +72,7 @@ class SAMModel(nn.Module):
70
72
  prompt_encoder (PromptEncoder): Encodes various types of input prompts.
71
73
  mask_decoder (MaskDecoder): Predicts masks from the image embeddings and encoded prompts.
72
74
  pixel_mean (List[float]): Mean values for normalizing pixels in the input image.
73
- pixel_std (List[float]): Std values for normalizing pixels in the input image.
75
+ pixel_std (List[float]): Standard deviation values for normalizing pixels in the input image.
74
76
 
75
77
  Examples:
76
78
  >>> image_encoder = ImageEncoderViT(...)
@@ -90,12 +92,7 @@ class SAMModel(nn.Module):
90
92
  self.register_buffer("pixel_std", torch.Tensor(pixel_std).view(-1, 1, 1), False)
91
93
 
92
94
  def set_imgsz(self, imgsz):
93
- """
94
- Set image size to make model compatible with different image sizes.
95
-
96
- Args:
97
- imgsz (Tuple[int, int]): The size of the input image.
98
- """
95
+ """Set image size to make model compatible with different image sizes."""
99
96
  if hasattr(self.image_encoder, "set_imgsz"):
100
97
  self.image_encoder.set_imgsz(imgsz)
101
98
  self.prompt_encoder.input_image_size = imgsz
@@ -124,10 +121,48 @@ class SAM2Model(torch.nn.Module):
124
121
  sam_mask_decoder (SAM2MaskDecoder): Decoder for generating object masks.
125
122
  obj_ptr_proj (nn.Module): Projection layer for object pointers.
126
123
  obj_ptr_tpos_proj (nn.Module): Projection for temporal positional encoding in object pointers.
124
+ hidden_dim (int): Hidden dimension of the model.
125
+ mem_dim (int): Memory dimension for encoding features.
126
+ use_high_res_features_in_sam (bool): Whether to use high-resolution feature maps in the SAM mask decoder.
127
+ use_obj_ptrs_in_encoder (bool): Whether to cross-attend to object pointers from other frames in the encoder.
128
+ max_obj_ptrs_in_encoder (int): Maximum number of object pointers from other frames in encoder cross-attention.
129
+ add_tpos_enc_to_obj_ptrs (bool): Whether to add temporal positional encoding to object pointers.
130
+ proj_tpos_enc_in_obj_ptrs (bool): Whether to add an extra linear projection layer for temporal positional
131
+ encoding in object pointers.
132
+ use_signed_tpos_enc_to_obj_ptrs (bool): Whether to use signed distance in temporal positional encoding.
133
+ only_obj_ptrs_in_the_past_for_eval (bool): Whether to only attend to object pointers in the past during
134
+ evaluation.
135
+ pred_obj_scores (bool): Whether to predict if there is an object in the frame.
136
+ pred_obj_scores_mlp (bool): Whether to use an MLP to predict object scores.
137
+ fixed_no_obj_ptr (bool): Whether to have a fixed no-object pointer when there is no object present.
138
+ soft_no_obj_ptr (bool): Whether to mix in no-object pointer softly for easier recovery and error mitigation.
139
+ use_mlp_for_obj_ptr_proj (bool): Whether to use MLP for object pointer projection.
140
+ no_obj_embed_spatial (torch.Tensor | None): No-object embedding for spatial frames.
141
+ max_cond_frames_in_attn (int): Maximum number of conditioning frames to participate in memory attention.
142
+ directly_add_no_mem_embed (bool): Whether to directly add no-memory embedding to image feature on the
143
+ first frame.
144
+ multimask_output_in_sam (bool): Whether to output multiple masks for the first click on initial
145
+ conditioning frames.
146
+ multimask_min_pt_num (int): Minimum number of clicks to use multimask output in SAM.
147
+ multimask_max_pt_num (int): Maximum number of clicks to use multimask output in SAM.
148
+ multimask_output_for_tracking (bool): Whether to use multimask output for tracking.
149
+ use_multimask_token_for_obj_ptr (bool): Whether to use multimask tokens for object pointers.
150
+ iou_prediction_use_sigmoid (bool): Whether to use sigmoid to restrict IoU prediction to [0-1].
151
+ memory_temporal_stride_for_eval (int): Memory bank's temporal stride during evaluation.
152
+ non_overlap_masks_for_mem_enc (bool): Whether to apply non-overlapping constraints on object masks in
153
+ memory encoder during evaluation.
154
+ sigmoid_scale_for_mem_enc (float): Scale factor for mask sigmoid probability.
155
+ sigmoid_bias_for_mem_enc (float): Bias factor for mask sigmoid probability.
156
+ binarize_mask_from_pts_for_mem_enc (bool): Whether to binarize sigmoid mask logits on interacted frames
157
+ with clicks during evaluation.
158
+ use_mask_input_as_output_without_sam (bool): Whether to directly output the input mask without using SAM
159
+ prompt encoder and mask decoder on frames with mask input.
127
160
 
128
161
  Methods:
129
- forward_image: Processes image batch through encoder to extract multi-level features.
130
- track_step: Performs a single tracking step, updating object masks and memory features.
162
+ forward_image: Process image batch through encoder to extract multi-level features.
163
+ track_step: Perform a single tracking step, updating object masks and memory features.
164
+ set_binarize: Set binarize for VideoPredictor.
165
+ set_imgsz: Set image size to make model compatible with different image sizes.
131
166
 
132
167
  Examples:
133
168
  >>> model = SAM2Model(image_encoder, memory_attention, memory_encoder)
@@ -183,7 +218,7 @@ class SAM2Model(torch.nn.Module):
183
218
  image_encoder (nn.Module): Visual encoder for extracting image features.
184
219
  memory_attention (nn.Module): Module for attending to memory features.
185
220
  memory_encoder (nn.Module): Encoder for generating memory representations.
186
- num_maskmem (int): Number of accessible memory frames. Default is 7 (1 input frame + 6 previous frames).
221
+ num_maskmem (int): Number of accessible memory frames.
187
222
  image_size (int): Size of input images.
188
223
  backbone_stride (int): Stride of the image backbone output.
189
224
  sigmoid_scale_for_mem_enc (float): Scale factor for mask sigmoid probability.
@@ -193,11 +228,10 @@ class SAM2Model(torch.nn.Module):
193
228
  use_mask_input_as_output_without_sam (bool): Whether to directly output the input mask without using SAM
194
229
  prompt encoder and mask decoder on frames with mask input.
195
230
  max_cond_frames_in_attn (int): Maximum number of conditioning frames to participate in memory attention.
196
- -1 means no limit.
197
231
  directly_add_no_mem_embed (bool): Whether to directly add no-memory embedding to image feature on the
198
232
  first frame.
199
233
  use_high_res_features_in_sam (bool): Whether to use high-resolution feature maps in the SAM mask decoder.
200
- multimask_output_in_sam (bool): Whether to output multiple (3) masks for the first click on initial
234
+ multimask_output_in_sam (bool): Whether to output multiple masks for the first click on initial
201
235
  conditioning frames.
202
236
  multimask_min_pt_num (int): Minimum number of clicks to use multimask output in SAM.
203
237
  multimask_max_pt_num (int): Maximum number of clicks to use multimask output in SAM.
@@ -214,9 +248,8 @@ class SAM2Model(torch.nn.Module):
214
248
  the encoder.
215
249
  proj_tpos_enc_in_obj_ptrs (bool): Whether to add an extra linear projection layer for temporal positional
216
250
  encoding in object pointers.
217
- use_signed_tpos_enc_to_obj_ptrs (bool): Whether to use signed distance (instead of unsigned absolute distance)
218
- in the temporal positional encoding in the object pointers, only relevant when both
219
- `use_obj_ptrs_in_encoder=True` and `add_tpos_enc_to_obj_ptrs=True`.
251
+ use_signed_tpos_enc_to_obj_ptrs (bool): Whether to use signed distance in the temporal positional encoding
252
+ in the object pointers.
220
253
  only_obj_ptrs_in_the_past_for_eval (bool): Whether to only attend to object pointers in the past
221
254
  during evaluation.
222
255
  pred_obj_scores (bool): Whether to predict if there is an object in the frame.
@@ -225,7 +258,7 @@ class SAM2Model(torch.nn.Module):
225
258
  soft_no_obj_ptr (bool): Whether to mix in no-object pointer softly for easier recovery and error mitigation.
226
259
  use_mlp_for_obj_ptr_proj (bool): Whether to use MLP for object pointer projection.
227
260
  no_obj_embed_spatial (bool): Whether add no obj embedding to spatial frames.
228
- sam_mask_decoder_extra_args (Dict | None): Extra arguments for constructing the SAM mask decoder.
261
+ sam_mask_decoder_extra_args (dict | None): Extra arguments for constructing the SAM mask decoder.
229
262
  compile_image_encoder (bool): Whether to compile the image encoder for faster inference.
230
263
 
231
264
  Examples:
@@ -419,15 +452,13 @@ class SAM2Model(torch.nn.Module):
419
452
  output only 1 mask and its IoU estimate.
420
453
 
421
454
  Returns:
422
- (Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]):
423
- low_res_multimasks: Tensor of shape (B, M, H*4, W*4) with SAM output mask logits.
424
- high_res_multimasks: Tensor of shape (B, M, H*16, W*16) with upsampled mask logits.
425
- ious: Tensor of shape (B, M) with estimated IoU for each output mask.
426
- low_res_masks: Tensor of shape (B, 1, H*4, W*4) with the best low-resolution mask.
427
- high_res_masks: Tensor of shape (B, 1, H*16, W*16) with the best high-resolution mask.
428
- obj_ptr: Tensor of shape (B, C) with object pointer vector for the output mask.
429
- object_score_logits: Tensor of shape (B) with object score logits.
430
- Where M is 3 if multimask_output=True, and 1 if multimask_output=False.
455
+ low_res_multimasks (torch.Tensor): Tensor of shape (B, M, H*4, W*4) with SAM output mask logits.
456
+ high_res_multimasks (torch.Tensor): Tensor of shape (B, M, H*16, W*16) with upsampled mask logits.
457
+ ious (torch.Tensor): Tensor of shape (B, M) with estimated IoU for each output mask.
458
+ low_res_masks (torch.Tensor): Tensor of shape (B, 1, H*4, W*4) with the best low-resolution mask.
459
+ high_res_masks (torch.Tensor): Tensor of shape (B, 1, H*16, W*16) with the best high-resolution mask.
460
+ obj_ptr (torch.Tensor): Tensor of shape (B, C) with object pointer vector for the output mask.
461
+ object_score_logits (torch.Tensor): Tensor of shape (B) with object score logits.
431
462
 
432
463
  Examples:
433
464
  >>> backbone_features = torch.rand(1, 256, 32, 32)