dgenerate-ultralytics-headless 8.3.141__py3-none-any.whl → 8.3.144__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/METADATA +1 -1
- dgenerate_ultralytics_headless-8.3.144.dist-info/RECORD +272 -0
- tests/conftest.py +7 -24
- tests/test_cli.py +1 -1
- tests/test_cuda.py +7 -2
- tests/test_engine.py +7 -8
- tests/test_exports.py +16 -16
- tests/test_integrations.py +1 -1
- tests/test_solutions.py +12 -12
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +22 -19
- ultralytics/data/annotator.py +6 -5
- ultralytics/data/augment.py +127 -126
- ultralytics/data/base.py +54 -51
- ultralytics/data/build.py +47 -23
- ultralytics/data/converter.py +47 -43
- ultralytics/data/dataset.py +51 -50
- ultralytics/data/loaders.py +77 -44
- ultralytics/data/split.py +22 -9
- ultralytics/data/split_dota.py +63 -39
- ultralytics/data/utils.py +59 -39
- ultralytics/engine/exporter.py +79 -27
- ultralytics/engine/model.py +39 -39
- ultralytics/engine/predictor.py +37 -28
- ultralytics/engine/results.py +187 -158
- ultralytics/engine/trainer.py +36 -19
- ultralytics/engine/tuner.py +12 -9
- ultralytics/engine/validator.py +7 -9
- ultralytics/hub/__init__.py +11 -13
- ultralytics/hub/auth.py +22 -2
- ultralytics/hub/google/__init__.py +19 -19
- ultralytics/hub/session.py +37 -51
- ultralytics/hub/utils.py +19 -5
- ultralytics/models/fastsam/model.py +30 -12
- ultralytics/models/fastsam/predict.py +5 -6
- ultralytics/models/fastsam/utils.py +3 -3
- ultralytics/models/fastsam/val.py +10 -6
- ultralytics/models/nas/model.py +9 -5
- ultralytics/models/nas/predict.py +6 -6
- ultralytics/models/nas/val.py +3 -3
- ultralytics/models/rtdetr/model.py +7 -6
- ultralytics/models/rtdetr/predict.py +14 -7
- ultralytics/models/rtdetr/train.py +10 -4
- ultralytics/models/rtdetr/val.py +36 -9
- ultralytics/models/sam/amg.py +30 -12
- ultralytics/models/sam/build.py +22 -22
- ultralytics/models/sam/model.py +10 -9
- ultralytics/models/sam/modules/blocks.py +76 -80
- ultralytics/models/sam/modules/decoders.py +6 -8
- ultralytics/models/sam/modules/encoders.py +23 -26
- ultralytics/models/sam/modules/memory_attention.py +13 -1
- ultralytics/models/sam/modules/sam.py +57 -26
- ultralytics/models/sam/modules/tiny_encoder.py +232 -237
- ultralytics/models/sam/modules/transformer.py +13 -13
- ultralytics/models/sam/modules/utils.py +11 -19
- ultralytics/models/sam/predict.py +114 -101
- ultralytics/models/utils/loss.py +98 -77
- ultralytics/models/utils/ops.py +116 -67
- ultralytics/models/yolo/classify/predict.py +5 -5
- ultralytics/models/yolo/classify/train.py +32 -28
- ultralytics/models/yolo/classify/val.py +7 -8
- ultralytics/models/yolo/detect/predict.py +1 -0
- ultralytics/models/yolo/detect/train.py +15 -14
- ultralytics/models/yolo/detect/val.py +37 -36
- ultralytics/models/yolo/model.py +106 -23
- ultralytics/models/yolo/obb/predict.py +3 -4
- ultralytics/models/yolo/obb/train.py +14 -6
- ultralytics/models/yolo/obb/val.py +29 -23
- ultralytics/models/yolo/pose/predict.py +9 -8
- ultralytics/models/yolo/pose/train.py +24 -16
- ultralytics/models/yolo/pose/val.py +44 -26
- ultralytics/models/yolo/segment/predict.py +5 -5
- ultralytics/models/yolo/segment/train.py +11 -7
- ultralytics/models/yolo/segment/val.py +2 -2
- ultralytics/models/yolo/world/train.py +33 -23
- ultralytics/models/yolo/world/train_world.py +11 -3
- ultralytics/models/yolo/yoloe/predict.py +11 -11
- ultralytics/models/yolo/yoloe/train.py +73 -21
- ultralytics/models/yolo/yoloe/train_seg.py +10 -7
- ultralytics/models/yolo/yoloe/val.py +42 -18
- ultralytics/nn/autobackend.py +59 -15
- ultralytics/nn/modules/__init__.py +4 -4
- ultralytics/nn/modules/activation.py +4 -1
- ultralytics/nn/modules/block.py +178 -111
- ultralytics/nn/modules/conv.py +6 -5
- ultralytics/nn/modules/head.py +469 -121
- ultralytics/nn/modules/transformer.py +147 -58
- ultralytics/nn/tasks.py +227 -20
- ultralytics/nn/text_model.py +30 -33
- ultralytics/solutions/ai_gym.py +1 -1
- ultralytics/solutions/analytics.py +7 -4
- ultralytics/solutions/config.py +10 -10
- ultralytics/solutions/distance_calculation.py +13 -11
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +6 -3
- ultralytics/solutions/object_blurrer.py +3 -3
- ultralytics/solutions/object_counter.py +18 -12
- ultralytics/solutions/object_cropper.py +12 -5
- ultralytics/solutions/parking_management.py +29 -28
- ultralytics/solutions/queue_management.py +6 -6
- ultralytics/solutions/region_counter.py +10 -3
- ultralytics/solutions/security_alarm.py +3 -3
- ultralytics/solutions/similarity_search.py +85 -24
- ultralytics/solutions/solutions.py +215 -85
- ultralytics/solutions/speed_estimation.py +28 -22
- ultralytics/solutions/streamlit_inference.py +17 -12
- ultralytics/solutions/trackzone.py +4 -4
- ultralytics/trackers/basetrack.py +16 -23
- ultralytics/trackers/bot_sort.py +30 -20
- ultralytics/trackers/byte_tracker.py +70 -64
- ultralytics/trackers/track.py +4 -8
- ultralytics/trackers/utils/gmc.py +31 -58
- ultralytics/trackers/utils/kalman_filter.py +37 -37
- ultralytics/trackers/utils/matching.py +1 -1
- ultralytics/utils/__init__.py +105 -89
- ultralytics/utils/autobatch.py +16 -3
- ultralytics/utils/autodevice.py +54 -24
- ultralytics/utils/benchmarks.py +42 -28
- ultralytics/utils/callbacks/base.py +3 -3
- ultralytics/utils/callbacks/clearml.py +9 -9
- ultralytics/utils/callbacks/comet.py +67 -25
- ultralytics/utils/callbacks/dvc.py +7 -10
- ultralytics/utils/callbacks/mlflow.py +2 -5
- ultralytics/utils/callbacks/neptune.py +7 -13
- ultralytics/utils/callbacks/raytune.py +1 -1
- ultralytics/utils/callbacks/tensorboard.py +5 -6
- ultralytics/utils/callbacks/wb.py +14 -14
- ultralytics/utils/checks.py +14 -13
- ultralytics/utils/dist.py +5 -5
- ultralytics/utils/downloads.py +94 -67
- ultralytics/utils/errors.py +5 -5
- ultralytics/utils/export.py +61 -47
- ultralytics/utils/files.py +23 -22
- ultralytics/utils/instance.py +48 -52
- ultralytics/utils/loss.py +78 -40
- ultralytics/utils/metrics.py +186 -130
- ultralytics/utils/ops.py +186 -190
- ultralytics/utils/patches.py +15 -17
- ultralytics/utils/plotting.py +84 -42
- ultralytics/utils/tal.py +21 -15
- ultralytics/utils/torch_utils.py +53 -50
- ultralytics/utils/triton.py +5 -4
- ultralytics/utils/tuner.py +5 -5
- dgenerate_ultralytics_headless-8.3.141.dist-info/RECORD +0 -272
- {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.141.dist-info → dgenerate_ultralytics_headless-8.3.144.dist-info}/top_level.txt +0 -0
@@ -37,9 +37,11 @@ class SAMModel(nn.Module):
|
|
37
37
|
image_encoder (ImageEncoderViT): Backbone for encoding images into embeddings.
|
38
38
|
prompt_encoder (PromptEncoder): Encoder for various types of input prompts.
|
39
39
|
mask_decoder (MaskDecoder): Predicts object masks from image and prompt embeddings.
|
40
|
+
pixel_mean (torch.Tensor): Mean values for normalizing pixels in the input image.
|
41
|
+
pixel_std (torch.Tensor): Standard deviation values for normalizing pixels in the input image.
|
40
42
|
|
41
43
|
Methods:
|
42
|
-
|
44
|
+
set_imgsz: Set image size to make model compatible with different image sizes.
|
43
45
|
|
44
46
|
Examples:
|
45
47
|
>>> image_encoder = ImageEncoderViT(...)
|
@@ -70,7 +72,7 @@ class SAMModel(nn.Module):
|
|
70
72
|
prompt_encoder (PromptEncoder): Encodes various types of input prompts.
|
71
73
|
mask_decoder (MaskDecoder): Predicts masks from the image embeddings and encoded prompts.
|
72
74
|
pixel_mean (List[float]): Mean values for normalizing pixels in the input image.
|
73
|
-
pixel_std (List[float]):
|
75
|
+
pixel_std (List[float]): Standard deviation values for normalizing pixels in the input image.
|
74
76
|
|
75
77
|
Examples:
|
76
78
|
>>> image_encoder = ImageEncoderViT(...)
|
@@ -90,12 +92,7 @@ class SAMModel(nn.Module):
|
|
90
92
|
self.register_buffer("pixel_std", torch.Tensor(pixel_std).view(-1, 1, 1), False)
|
91
93
|
|
92
94
|
def set_imgsz(self, imgsz):
|
93
|
-
"""
|
94
|
-
Set image size to make model compatible with different image sizes.
|
95
|
-
|
96
|
-
Args:
|
97
|
-
imgsz (Tuple[int, int]): The size of the input image.
|
98
|
-
"""
|
95
|
+
"""Set image size to make model compatible with different image sizes."""
|
99
96
|
if hasattr(self.image_encoder, "set_imgsz"):
|
100
97
|
self.image_encoder.set_imgsz(imgsz)
|
101
98
|
self.prompt_encoder.input_image_size = imgsz
|
@@ -124,10 +121,48 @@ class SAM2Model(torch.nn.Module):
|
|
124
121
|
sam_mask_decoder (SAM2MaskDecoder): Decoder for generating object masks.
|
125
122
|
obj_ptr_proj (nn.Module): Projection layer for object pointers.
|
126
123
|
obj_ptr_tpos_proj (nn.Module): Projection for temporal positional encoding in object pointers.
|
124
|
+
hidden_dim (int): Hidden dimension of the model.
|
125
|
+
mem_dim (int): Memory dimension for encoding features.
|
126
|
+
use_high_res_features_in_sam (bool): Whether to use high-resolution feature maps in the SAM mask decoder.
|
127
|
+
use_obj_ptrs_in_encoder (bool): Whether to cross-attend to object pointers from other frames in the encoder.
|
128
|
+
max_obj_ptrs_in_encoder (int): Maximum number of object pointers from other frames in encoder cross-attention.
|
129
|
+
add_tpos_enc_to_obj_ptrs (bool): Whether to add temporal positional encoding to object pointers.
|
130
|
+
proj_tpos_enc_in_obj_ptrs (bool): Whether to add an extra linear projection layer for temporal positional
|
131
|
+
encoding in object pointers.
|
132
|
+
use_signed_tpos_enc_to_obj_ptrs (bool): Whether to use signed distance in temporal positional encoding.
|
133
|
+
only_obj_ptrs_in_the_past_for_eval (bool): Whether to only attend to object pointers in the past during
|
134
|
+
evaluation.
|
135
|
+
pred_obj_scores (bool): Whether to predict if there is an object in the frame.
|
136
|
+
pred_obj_scores_mlp (bool): Whether to use an MLP to predict object scores.
|
137
|
+
fixed_no_obj_ptr (bool): Whether to have a fixed no-object pointer when there is no object present.
|
138
|
+
soft_no_obj_ptr (bool): Whether to mix in no-object pointer softly for easier recovery and error mitigation.
|
139
|
+
use_mlp_for_obj_ptr_proj (bool): Whether to use MLP for object pointer projection.
|
140
|
+
no_obj_embed_spatial (torch.Tensor | None): No-object embedding for spatial frames.
|
141
|
+
max_cond_frames_in_attn (int): Maximum number of conditioning frames to participate in memory attention.
|
142
|
+
directly_add_no_mem_embed (bool): Whether to directly add no-memory embedding to image feature on the
|
143
|
+
first frame.
|
144
|
+
multimask_output_in_sam (bool): Whether to output multiple masks for the first click on initial
|
145
|
+
conditioning frames.
|
146
|
+
multimask_min_pt_num (int): Minimum number of clicks to use multimask output in SAM.
|
147
|
+
multimask_max_pt_num (int): Maximum number of clicks to use multimask output in SAM.
|
148
|
+
multimask_output_for_tracking (bool): Whether to use multimask output for tracking.
|
149
|
+
use_multimask_token_for_obj_ptr (bool): Whether to use multimask tokens for object pointers.
|
150
|
+
iou_prediction_use_sigmoid (bool): Whether to use sigmoid to restrict IoU prediction to [0-1].
|
151
|
+
memory_temporal_stride_for_eval (int): Memory bank's temporal stride during evaluation.
|
152
|
+
non_overlap_masks_for_mem_enc (bool): Whether to apply non-overlapping constraints on object masks in
|
153
|
+
memory encoder during evaluation.
|
154
|
+
sigmoid_scale_for_mem_enc (float): Scale factor for mask sigmoid probability.
|
155
|
+
sigmoid_bias_for_mem_enc (float): Bias factor for mask sigmoid probability.
|
156
|
+
binarize_mask_from_pts_for_mem_enc (bool): Whether to binarize sigmoid mask logits on interacted frames
|
157
|
+
with clicks during evaluation.
|
158
|
+
use_mask_input_as_output_without_sam (bool): Whether to directly output the input mask without using SAM
|
159
|
+
prompt encoder and mask decoder on frames with mask input.
|
127
160
|
|
128
161
|
Methods:
|
129
|
-
forward_image:
|
130
|
-
track_step:
|
162
|
+
forward_image: Process image batch through encoder to extract multi-level features.
|
163
|
+
track_step: Perform a single tracking step, updating object masks and memory features.
|
164
|
+
set_binarize: Set binarize for VideoPredictor.
|
165
|
+
set_imgsz: Set image size to make model compatible with different image sizes.
|
131
166
|
|
132
167
|
Examples:
|
133
168
|
>>> model = SAM2Model(image_encoder, memory_attention, memory_encoder)
|
@@ -183,7 +218,7 @@ class SAM2Model(torch.nn.Module):
|
|
183
218
|
image_encoder (nn.Module): Visual encoder for extracting image features.
|
184
219
|
memory_attention (nn.Module): Module for attending to memory features.
|
185
220
|
memory_encoder (nn.Module): Encoder for generating memory representations.
|
186
|
-
num_maskmem (int): Number of accessible memory frames.
|
221
|
+
num_maskmem (int): Number of accessible memory frames.
|
187
222
|
image_size (int): Size of input images.
|
188
223
|
backbone_stride (int): Stride of the image backbone output.
|
189
224
|
sigmoid_scale_for_mem_enc (float): Scale factor for mask sigmoid probability.
|
@@ -193,11 +228,10 @@ class SAM2Model(torch.nn.Module):
|
|
193
228
|
use_mask_input_as_output_without_sam (bool): Whether to directly output the input mask without using SAM
|
194
229
|
prompt encoder and mask decoder on frames with mask input.
|
195
230
|
max_cond_frames_in_attn (int): Maximum number of conditioning frames to participate in memory attention.
|
196
|
-
-1 means no limit.
|
197
231
|
directly_add_no_mem_embed (bool): Whether to directly add no-memory embedding to image feature on the
|
198
232
|
first frame.
|
199
233
|
use_high_res_features_in_sam (bool): Whether to use high-resolution feature maps in the SAM mask decoder.
|
200
|
-
multimask_output_in_sam (bool): Whether to output multiple
|
234
|
+
multimask_output_in_sam (bool): Whether to output multiple masks for the first click on initial
|
201
235
|
conditioning frames.
|
202
236
|
multimask_min_pt_num (int): Minimum number of clicks to use multimask output in SAM.
|
203
237
|
multimask_max_pt_num (int): Maximum number of clicks to use multimask output in SAM.
|
@@ -214,9 +248,8 @@ class SAM2Model(torch.nn.Module):
|
|
214
248
|
the encoder.
|
215
249
|
proj_tpos_enc_in_obj_ptrs (bool): Whether to add an extra linear projection layer for temporal positional
|
216
250
|
encoding in object pointers.
|
217
|
-
use_signed_tpos_enc_to_obj_ptrs (bool): Whether to use signed distance
|
218
|
-
in the
|
219
|
-
`use_obj_ptrs_in_encoder=True` and `add_tpos_enc_to_obj_ptrs=True`.
|
251
|
+
use_signed_tpos_enc_to_obj_ptrs (bool): Whether to use signed distance in the temporal positional encoding
|
252
|
+
in the object pointers.
|
220
253
|
only_obj_ptrs_in_the_past_for_eval (bool): Whether to only attend to object pointers in the past
|
221
254
|
during evaluation.
|
222
255
|
pred_obj_scores (bool): Whether to predict if there is an object in the frame.
|
@@ -225,7 +258,7 @@ class SAM2Model(torch.nn.Module):
|
|
225
258
|
soft_no_obj_ptr (bool): Whether to mix in no-object pointer softly for easier recovery and error mitigation.
|
226
259
|
use_mlp_for_obj_ptr_proj (bool): Whether to use MLP for object pointer projection.
|
227
260
|
no_obj_embed_spatial (bool): Whether add no obj embedding to spatial frames.
|
228
|
-
sam_mask_decoder_extra_args (
|
261
|
+
sam_mask_decoder_extra_args (dict | None): Extra arguments for constructing the SAM mask decoder.
|
229
262
|
compile_image_encoder (bool): Whether to compile the image encoder for faster inference.
|
230
263
|
|
231
264
|
Examples:
|
@@ -419,15 +452,13 @@ class SAM2Model(torch.nn.Module):
|
|
419
452
|
output only 1 mask and its IoU estimate.
|
420
453
|
|
421
454
|
Returns:
|
422
|
-
(
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
object_score_logits: Tensor of shape (B) with object score logits.
|
430
|
-
Where M is 3 if multimask_output=True, and 1 if multimask_output=False.
|
455
|
+
low_res_multimasks (torch.Tensor): Tensor of shape (B, M, H*4, W*4) with SAM output mask logits.
|
456
|
+
high_res_multimasks (torch.Tensor): Tensor of shape (B, M, H*16, W*16) with upsampled mask logits.
|
457
|
+
ious (torch.Tensor): Tensor of shape (B, M) with estimated IoU for each output mask.
|
458
|
+
low_res_masks (torch.Tensor): Tensor of shape (B, 1, H*4, W*4) with the best low-resolution mask.
|
459
|
+
high_res_masks (torch.Tensor): Tensor of shape (B, 1, H*16, W*16) with the best high-resolution mask.
|
460
|
+
obj_ptr (torch.Tensor): Tensor of shape (B, C) with object pointer vector for the output mask.
|
461
|
+
object_score_logits (torch.Tensor): Tensor of shape (B) with object score logits.
|
431
462
|
|
432
463
|
Examples:
|
433
464
|
>>> backbone_features = torch.rand(1, 256, 32, 32)
|