dgenerate-ultralytics-headless 8.3.237__py3-none-any.whl → 8.3.239__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/RECORD +104 -105
- tests/test_exports.py +3 -1
- tests/test_python.py +2 -2
- tests/test_solutions.py +6 -6
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +4 -4
- ultralytics/cfg/datasets/Argoverse.yaml +7 -6
- ultralytics/cfg/datasets/DOTAv1.5.yaml +1 -1
- ultralytics/cfg/datasets/DOTAv1.yaml +1 -1
- ultralytics/cfg/datasets/VOC.yaml +15 -16
- ultralytics/cfg/datasets/african-wildlife.yaml +1 -1
- ultralytics/cfg/datasets/coco128-seg.yaml +1 -1
- ultralytics/cfg/datasets/dota8-multispectral.yaml +1 -1
- ultralytics/cfg/datasets/dota8.yaml +2 -2
- ultralytics/cfg/datasets/kitti.yaml +1 -1
- ultralytics/cfg/datasets/xView.yaml +16 -16
- ultralytics/cfg/models/11/yolo11-pose.yaml +1 -1
- ultralytics/cfg/models/11/yoloe-11-seg.yaml +2 -2
- ultralytics/cfg/models/11/yoloe-11.yaml +2 -2
- ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +9 -6
- ultralytics/cfg/models/v8/yoloe-v8.yaml +9 -6
- ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +2 -2
- ultralytics/cfg/models/v8/yolov8-obb.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-p2.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-world.yaml +1 -1
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +6 -6
- ultralytics/data/augment.py +1 -1
- ultralytics/data/base.py +4 -2
- ultralytics/data/build.py +4 -4
- ultralytics/data/loaders.py +17 -12
- ultralytics/data/utils.py +4 -4
- ultralytics/engine/exporter.py +24 -16
- ultralytics/engine/predictor.py +5 -4
- ultralytics/engine/results.py +12 -13
- ultralytics/engine/trainer.py +2 -2
- ultralytics/engine/tuner.py +2 -3
- ultralytics/engine/validator.py +2 -2
- ultralytics/models/fastsam/model.py +2 -2
- ultralytics/models/fastsam/predict.py +2 -3
- ultralytics/models/fastsam/val.py +4 -4
- ultralytics/models/rtdetr/predict.py +2 -3
- ultralytics/models/rtdetr/val.py +5 -4
- ultralytics/models/sam/build.py +5 -5
- ultralytics/models/sam/build_sam3.py +9 -6
- ultralytics/models/sam/model.py +1 -1
- ultralytics/models/sam/modules/sam.py +10 -5
- ultralytics/models/sam/predict.py +24 -48
- ultralytics/models/sam/sam3/encoder.py +4 -4
- ultralytics/models/sam/sam3/geometry_encoders.py +3 -3
- ultralytics/models/sam/sam3/necks.py +17 -17
- ultralytics/models/sam/sam3/sam3_image.py +3 -21
- ultralytics/models/sam/sam3/vl_combiner.py +1 -6
- ultralytics/models/yolo/classify/val.py +1 -1
- ultralytics/models/yolo/detect/train.py +1 -1
- ultralytics/models/yolo/detect/val.py +7 -7
- ultralytics/models/yolo/obb/val.py +1 -1
- ultralytics/models/yolo/pose/val.py +1 -1
- ultralytics/models/yolo/segment/val.py +1 -1
- ultralytics/nn/autobackend.py +9 -9
- ultralytics/nn/modules/block.py +1 -1
- ultralytics/nn/tasks.py +3 -3
- ultralytics/nn/text_model.py +2 -7
- ultralytics/solutions/ai_gym.py +1 -1
- ultralytics/solutions/analytics.py +6 -6
- ultralytics/solutions/config.py +1 -1
- ultralytics/solutions/distance_calculation.py +1 -1
- ultralytics/solutions/object_counter.py +1 -1
- ultralytics/solutions/object_cropper.py +3 -6
- ultralytics/solutions/parking_management.py +21 -17
- ultralytics/solutions/queue_management.py +5 -5
- ultralytics/solutions/region_counter.py +2 -2
- ultralytics/solutions/security_alarm.py +1 -1
- ultralytics/solutions/solutions.py +45 -22
- ultralytics/solutions/speed_estimation.py +1 -1
- ultralytics/trackers/basetrack.py +1 -1
- ultralytics/trackers/bot_sort.py +4 -3
- ultralytics/trackers/byte_tracker.py +4 -4
- ultralytics/trackers/utils/gmc.py +6 -7
- ultralytics/trackers/utils/kalman_filter.py +2 -1
- ultralytics/trackers/utils/matching.py +4 -3
- ultralytics/utils/__init__.py +12 -3
- ultralytics/utils/benchmarks.py +2 -2
- ultralytics/utils/callbacks/tensorboard.py +19 -25
- ultralytics/utils/checks.py +2 -1
- ultralytics/utils/downloads.py +1 -1
- ultralytics/utils/export/tensorflow.py +16 -2
- ultralytics/utils/files.py +13 -12
- ultralytics/utils/logger.py +62 -27
- ultralytics/utils/metrics.py +1 -1
- ultralytics/utils/ops.py +6 -6
- ultralytics/utils/patches.py +3 -3
- ultralytics/utils/plotting.py +7 -12
- ultralytics/utils/tuner.py +1 -1
- ultralytics/models/sam/sam3/tokenizer_ve.py +0 -242
- {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.237.dist-info → dgenerate_ultralytics_headless-8.3.239.dist-info}/top_level.txt +0 -0
|
@@ -110,10 +110,12 @@ class Predictor(BasePredictor):
|
|
|
110
110
|
"""Preprocess the input image for model inference.
|
|
111
111
|
|
|
112
112
|
This method prepares the input image by applying transformations and normalization. It supports both
|
|
113
|
-
torch.Tensor and list of np.ndarray as input formats.
|
|
113
|
+
torch.Tensor and list of np.ndarray as input formats. For OpenCV-loaded images, the input is typically BGR and
|
|
114
|
+
is converted to RGB during preprocessing.
|
|
114
115
|
|
|
115
116
|
Args:
|
|
116
|
-
im (torch.Tensor | list[np.ndarray]): Input image(s) in BCHW tensor format or list of HWC
|
|
117
|
+
im (torch.Tensor | list[np.ndarray]): Input image(s) in BCHW tensor format or a list of HWC NumPy arrays.
|
|
118
|
+
NumPy arrays are expected to be in BGR order (as returned by OpenCV) and will be converted to RGB.
|
|
117
119
|
|
|
118
120
|
Returns:
|
|
119
121
|
(torch.Tensor): The preprocessed image tensor, normalized and converted to the appropriate dtype.
|
|
@@ -534,7 +536,7 @@ class Predictor(BasePredictor):
|
|
|
534
536
|
|
|
535
537
|
Args:
|
|
536
538
|
image (str | np.ndarray): Path to the image file as a string, or a numpy array representing an image read by
|
|
537
|
-
cv2.
|
|
539
|
+
cv2 (BGR channel order).
|
|
538
540
|
|
|
539
541
|
Raises:
|
|
540
542
|
AssertionError: If more than one image is attempted to be set.
|
|
@@ -1244,14 +1246,11 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
|
1244
1246
|
- If `batch` is greater than 1, the features are expanded to fit the batch size.
|
|
1245
1247
|
- The method leverages the model's `_prepare_backbone_features` method to prepare the backbone features.
|
|
1246
1248
|
"""
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
pos = pos.expand(batch, -1, -1, -1)
|
|
1253
|
-
backbone_out["vision_pos_enc"][i] = pos
|
|
1254
|
-
_, vis_feats, vis_pos_embed, feat_sizes = self.model._prepare_backbone_features(backbone_out)
|
|
1249
|
+
# check if there's precomputed backbone output
|
|
1250
|
+
backbone_out = getattr(self, "backbone_out", None)
|
|
1251
|
+
if backbone_out is None:
|
|
1252
|
+
backbone_out = self.model.forward_image(im)
|
|
1253
|
+
_, vis_feats, vis_pos_embed, feat_sizes = self.model._prepare_backbone_features(backbone_out, batch=batch)
|
|
1255
1254
|
return vis_feats, vis_pos_embed, feat_sizes
|
|
1256
1255
|
|
|
1257
1256
|
def _obj_id_to_idx(self, obj_id, inference_state: dict[str, Any] | None = None):
|
|
@@ -2055,11 +2054,12 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
|
|
|
2055
2054
|
self.memory_bank.append(consolidated_out)
|
|
2056
2055
|
|
|
2057
2056
|
def _prepare_memory_conditioned_features(self, obj_idx: int | None) -> torch.Tensor:
|
|
2058
|
-
"""Prepare
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2057
|
+
"""Prepare memory-conditioned features for the current image state.
|
|
2058
|
+
|
|
2059
|
+
If ``obj_idx`` is provided, features are prepared for a specific prompted object in the image. If ``obj_idx`` is
|
|
2060
|
+
None, features are prepared for all objects. If no memory is available, a no-memory embedding is added to the
|
|
2061
|
+
current vision features. Otherwise, memory from previous frames is used to condition the current vision features
|
|
2062
|
+
via a transformer attention mechanism.
|
|
2063
2063
|
|
|
2064
2064
|
Args:
|
|
2065
2065
|
obj_idx (int | None): The index of the object for which to prepare the features.
|
|
@@ -2068,8 +2068,8 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
|
|
|
2068
2068
|
pix_feat_with_mem (torch.Tensor): The memory-conditioned pixel features.
|
|
2069
2069
|
"""
|
|
2070
2070
|
if len(self.memory_bank) == 0 or isinstance(obj_idx, int):
|
|
2071
|
-
#
|
|
2072
|
-
#
|
|
2071
|
+
# For initial conditioning frames, encode without using any previous memory.
|
|
2072
|
+
# Directly add the no-memory embedding (instead of using the transformer encoder).
|
|
2073
2073
|
pix_feat_with_mem = self.vision_feats[-1] + self.model.no_mem_embed
|
|
2074
2074
|
else:
|
|
2075
2075
|
# for inference frames, use the memory features from previous frames
|
|
@@ -2081,7 +2081,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
|
|
|
2081
2081
|
memory_pos=memory_pos_embed,
|
|
2082
2082
|
num_obj_ptr_tokens=0, # num_obj_ptr_tokens
|
|
2083
2083
|
)
|
|
2084
|
-
#
|
|
2084
|
+
# Reshape output (HW)BC => BCHW
|
|
2085
2085
|
return pix_feat_with_mem.permute(1, 2, 0).view(
|
|
2086
2086
|
self._max_obj_num,
|
|
2087
2087
|
self.model.memory_attention.d_model,
|
|
@@ -2145,9 +2145,9 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
|
|
|
2145
2145
|
pix_feat = pix_feat.view(-1, self.model.memory_attention.d_model, *self.feat_sizes[-1])
|
|
2146
2146
|
_, _, _, low_res_masks, high_res_masks, obj_ptr, object_score_logits = self.model._use_mask_as_output(mask)
|
|
2147
2147
|
else:
|
|
2148
|
-
#
|
|
2148
|
+
# Fuse visual features with previous memory features in the memory bank.
|
|
2149
2149
|
pix_feat_with_mem = self._prepare_memory_conditioned_features(obj_idx)
|
|
2150
|
-
#
|
|
2150
|
+
# If ``obj_idx`` is provided (i.e., prompts are being added), keep only the first feature map.
|
|
2151
2151
|
pix_feat_with_mem = pix_feat_with_mem[:1] if obj_idx is not None else pix_feat_with_mem
|
|
2152
2152
|
_, _, _, low_res_masks, high_res_masks, obj_ptr, object_score_logits = self.model._forward_sam_heads(
|
|
2153
2153
|
backbone_features=pix_feat_with_mem,
|
|
@@ -2182,7 +2182,7 @@ class SAM3Predictor(SAM2Predictor):
|
|
|
2182
2182
|
self.std = torch.tensor([127.5, 127.5, 127.5]).view(-1, 1, 1).to(self.device)
|
|
2183
2183
|
|
|
2184
2184
|
def get_model(self):
|
|
2185
|
-
"""Retrieve and initialize the Segment Anything Model
|
|
2185
|
+
"""Retrieve and initialize the Segment Anything Model 3 (SAM3) for image segmentation tasks."""
|
|
2186
2186
|
from .build_sam3 import build_interactive_sam3 # slow import
|
|
2187
2187
|
|
|
2188
2188
|
return build_interactive_sam3(self.args.model, compile=self.args.compile)
|
|
@@ -2191,16 +2191,11 @@ class SAM3Predictor(SAM2Predictor):
|
|
|
2191
2191
|
class SAM3SemanticPredictor(SAM3Predictor):
|
|
2192
2192
|
"""Segment Anything Model 3 (SAM3) Predictor for image segmentation tasks."""
|
|
2193
2193
|
|
|
2194
|
-
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None, bpe_path=None):
|
|
2195
|
-
"""Initialize the SAM3SemanticPredictor with configuration and optional overrides."""
|
|
2196
|
-
super().__init__(cfg, overrides, _callbacks)
|
|
2197
|
-
self.bpe_path = bpe_path
|
|
2198
|
-
|
|
2199
2194
|
def get_model(self):
|
|
2200
2195
|
"""Retrieve and initialize the Segment Anything Model 3 (SAM3) for image segmentation tasks."""
|
|
2201
2196
|
from .build_sam3 import build_sam3_image_model # slow import
|
|
2202
2197
|
|
|
2203
|
-
return build_sam3_image_model(self.args.model,
|
|
2198
|
+
return build_sam3_image_model(self.args.model, compile=self.args.compile)
|
|
2204
2199
|
|
|
2205
2200
|
@smart_inference_mode()
|
|
2206
2201
|
def get_im_features(self, im):
|
|
@@ -2437,24 +2432,6 @@ class SAM3VideoPredictor(SAM2VideoPredictor, SAM3Predictor):
|
|
|
2437
2432
|
|
|
2438
2433
|
return obj_ids, pred_masks, obj_scores
|
|
2439
2434
|
|
|
2440
|
-
def get_im_features(self, im, batch=1):
|
|
2441
|
-
"""A wrapper to get image features, supporting pre-extracted backbone outputs."""
|
|
2442
|
-
if getattr(self, "backbone_out", None):
|
|
2443
|
-
backbone_out = self.backbone_out
|
|
2444
|
-
if batch > 1: # expand features if there's more than one prompt
|
|
2445
|
-
backbone_out = {
|
|
2446
|
-
"backbone_fpn": backbone_out["backbone_fpn"].copy(),
|
|
2447
|
-
"vision_pos_enc": backbone_out["vision_pos_enc"].copy(),
|
|
2448
|
-
}
|
|
2449
|
-
for i, feat in enumerate(backbone_out["backbone_fpn"]):
|
|
2450
|
-
backbone_out["backbone_fpn"][i] = feat.expand(batch, -1, -1, -1)
|
|
2451
|
-
for i, pos in enumerate(backbone_out["vision_pos_enc"]):
|
|
2452
|
-
pos = pos.expand(batch, -1, -1, -1)
|
|
2453
|
-
backbone_out["vision_pos_enc"][i] = pos
|
|
2454
|
-
_, vis_feats, vis_pos_embed, feat_sizes = self.model._prepare_backbone_features(backbone_out)
|
|
2455
|
-
return vis_feats, vis_pos_embed, feat_sizes
|
|
2456
|
-
return super().get_im_features(im, batch)
|
|
2457
|
-
|
|
2458
2435
|
|
|
2459
2436
|
class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
|
|
2460
2437
|
"""Segment Anything Model 3 (SAM3) Video Semantic Predictor."""
|
|
@@ -2479,7 +2456,6 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
|
|
|
2479
2456
|
cfg=DEFAULT_CFG,
|
|
2480
2457
|
overrides=None,
|
|
2481
2458
|
_callbacks=None,
|
|
2482
|
-
bpe_path="bpe_simple_vocab_16e6.txt.gz",
|
|
2483
2459
|
# prob threshold for detection outputs -- only keep detections above this threshold
|
|
2484
2460
|
# enters NMS and det-to-track matching
|
|
2485
2461
|
score_threshold_detection=0.5,
|
|
@@ -2523,7 +2499,7 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
|
|
|
2523
2499
|
reconstruction_bbox_det_score=0.0,
|
|
2524
2500
|
):
|
|
2525
2501
|
"""Initialize the SAM3VideoSemanticPredictor with configuration and optional overrides."""
|
|
2526
|
-
super().__init__(cfg, overrides, _callbacks
|
|
2502
|
+
super().__init__(cfg, overrides, _callbacks)
|
|
2527
2503
|
self.score_threshold_detection = score_threshold_detection
|
|
2528
2504
|
self.det_nms_thresh = det_nms_thresh
|
|
2529
2505
|
self.assoc_iou_thresh = assoc_iou_thresh
|
|
@@ -171,7 +171,7 @@ class TransformerEncoderLayer(nn.Module):
|
|
|
171
171
|
assert tgt.shape[0] % 2 == 0
|
|
172
172
|
other_tgt = tgt[tgt.shape[0] // 2 :]
|
|
173
173
|
tgt = tgt[: tgt.shape[0] // 2]
|
|
174
|
-
tgt2 = self.norm1(tgt)
|
|
174
|
+
tgt2 = self.norm1(tgt).contiguous()
|
|
175
175
|
q = k = tgt2 + query_pos if self.pos_enc_at_attn else tgt2
|
|
176
176
|
tgt2 = self.self_attn(q, k, value=tgt2, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask)[0]
|
|
177
177
|
tgt = tgt + self.dropout1(tgt2)
|
|
@@ -179,13 +179,13 @@ class TransformerEncoderLayer(nn.Module):
|
|
|
179
179
|
# Recombine
|
|
180
180
|
tgt = torch.cat((tgt, other_tgt), dim=0)
|
|
181
181
|
tgt2 = self.norm2(tgt)
|
|
182
|
+
memory = memory.to(tgt2.dtype).contiguous()
|
|
182
183
|
tgt2 = self.cross_attn_image(
|
|
183
184
|
query=tgt2 + query_pos if self.pos_enc_at_cross_attn_queries else tgt2,
|
|
184
|
-
key=memory
|
|
185
|
-
value=memory
|
|
185
|
+
key=memory + pos if self.pos_enc_at_cross_attn_keys else memory,
|
|
186
|
+
value=memory,
|
|
186
187
|
attn_mask=memory_mask,
|
|
187
188
|
key_padding_mask=memory_key_padding_mask,
|
|
188
|
-
# attn_bias=attn_bias,
|
|
189
189
|
)[0]
|
|
190
190
|
tgt = tgt + self.dropout2(tgt2)
|
|
191
191
|
tgt2 = self.norm3(tgt)
|
|
@@ -42,8 +42,8 @@ def concat_padded_sequences(seq1, mask1, seq2, mask2, return_index: bool = False
|
|
|
42
42
|
assert seq1_length == mask1.size(1)
|
|
43
43
|
assert seq2_length == mask2.size(1)
|
|
44
44
|
|
|
45
|
-
torch.
|
|
46
|
-
torch.
|
|
45
|
+
torch._assert(is_right_padded(mask1), "Mask is not right padded")
|
|
46
|
+
torch._assert(is_right_padded(mask2), "Mask is not right padded")
|
|
47
47
|
|
|
48
48
|
actual_seq1_lengths = (~mask1).sum(dim=-1)
|
|
49
49
|
actual_seq2_lengths = (~mask2).sum(dim=-1)
|
|
@@ -288,7 +288,7 @@ class SequenceGeometryEncoder(nn.Module):
|
|
|
288
288
|
# Convert boxes to xyxy format and denormalize
|
|
289
289
|
boxes_xyxy = xywh2xyxy(boxes.to(img_feats.dtype))
|
|
290
290
|
scale = torch.tensor([W, H, W, H], dtype=boxes_xyxy.dtype)
|
|
291
|
-
scale = scale.
|
|
291
|
+
scale = scale.to(device=boxes_xyxy.device, non_blocking=True)
|
|
292
292
|
scale = scale.view(1, 1, 4)
|
|
293
293
|
boxes_xyxy = boxes_xyxy * scale
|
|
294
294
|
|
|
@@ -103,27 +103,27 @@ class Sam3DualViTDetNeck(nn.Module):
|
|
|
103
103
|
|
|
104
104
|
def forward(
|
|
105
105
|
self, tensor_list: list[torch.Tensor]
|
|
106
|
-
) -> tuple[list[torch.Tensor], list[torch.Tensor], list[torch.Tensor], list[torch.Tensor]]:
|
|
107
|
-
"""Get
|
|
106
|
+
) -> tuple[list[torch.Tensor], list[torch.Tensor], list[torch.Tensor] | None, list[torch.Tensor] | None]:
|
|
107
|
+
"""Get feature maps and positional encodings from the neck."""
|
|
108
108
|
xs = self.trunk(tensor_list)
|
|
109
|
-
sam3_out, sam3_pos = [], []
|
|
110
|
-
sam2_out, sam2_pos = None, None
|
|
111
|
-
if self.sam2_convs is not None:
|
|
112
|
-
sam2_out, sam2_pos = [], []
|
|
113
109
|
x = xs[-1] # simpleFPN
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
sam3_pos.append(sam3_pos_out)
|
|
119
|
-
|
|
120
|
-
if self.sam2_convs is not None:
|
|
121
|
-
sam2_x_out = self.sam2_convs[i](x)
|
|
122
|
-
sam2_pos_out = self.position_encoding(sam2_x_out).to(sam2_x_out.dtype)
|
|
123
|
-
sam2_out.append(sam2_x_out)
|
|
124
|
-
sam2_pos.append(sam2_pos_out)
|
|
110
|
+
sam3_out, sam3_pos = self.sam_forward_feature_levels(x, self.convs)
|
|
111
|
+
if self.sam2_convs is None:
|
|
112
|
+
return sam3_out, sam3_pos, None, None
|
|
113
|
+
sam2_out, sam2_pos = self.sam_forward_feature_levels(x, self.sam2_convs)
|
|
125
114
|
return sam3_out, sam3_pos, sam2_out, sam2_pos
|
|
126
115
|
|
|
116
|
+
def sam_forward_feature_levels(
|
|
117
|
+
self, x: torch.Tensor, convs: nn.ModuleList
|
|
118
|
+
) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
|
|
119
|
+
"""Run neck convolutions and compute positional encodings for each feature level."""
|
|
120
|
+
outs, poss = [], []
|
|
121
|
+
for conv in convs:
|
|
122
|
+
feat = conv(x)
|
|
123
|
+
outs.append(feat)
|
|
124
|
+
poss.append(self.position_encoding(feat).to(feat.dtype))
|
|
125
|
+
return outs, poss
|
|
126
|
+
|
|
127
127
|
def set_imgsz(self, imgsz: list[int] = [1008, 1008]):
|
|
128
128
|
"""Set the image size for the trunk backbone."""
|
|
129
129
|
self.trunk.set_imgsz(imgsz)
|
|
@@ -11,6 +11,7 @@ import torch
|
|
|
11
11
|
from ultralytics.nn.modules.utils import inverse_sigmoid
|
|
12
12
|
from ultralytics.utils.ops import xywh2xyxy
|
|
13
13
|
|
|
14
|
+
from ..modules.sam import SAM2Model
|
|
14
15
|
from .geometry_encoders import Prompt
|
|
15
16
|
from .vl_combiner import SAM3VLBackbone
|
|
16
17
|
|
|
@@ -93,25 +94,6 @@ class SAM3SemanticModel(torch.nn.Module):
|
|
|
93
94
|
self.text_embeddings = {}
|
|
94
95
|
self.names = []
|
|
95
96
|
|
|
96
|
-
def _prepare_backbone_features(self, backbone_out, num_prompts=1):
|
|
97
|
-
"""Prepare and flatten visual features from the image backbone output for further processing."""
|
|
98
|
-
if num_prompts > 1: # expand features if there's more than one prompt
|
|
99
|
-
for i, feat in enumerate(backbone_out["backbone_fpn"]):
|
|
100
|
-
backbone_out["backbone_fpn"][i] = feat.expand(num_prompts, -1, -1, -1)
|
|
101
|
-
for i, pos in enumerate(backbone_out["vision_pos_enc"]):
|
|
102
|
-
pos = pos.expand(num_prompts, -1, -1, -1)
|
|
103
|
-
backbone_out["vision_pos_enc"][i] = pos
|
|
104
|
-
assert len(backbone_out["backbone_fpn"]) == len(backbone_out["vision_pos_enc"])
|
|
105
|
-
assert len(backbone_out["backbone_fpn"]) >= self.num_feature_levels
|
|
106
|
-
|
|
107
|
-
feature_maps = backbone_out["backbone_fpn"][-self.num_feature_levels :]
|
|
108
|
-
vision_pos_embeds = backbone_out["vision_pos_enc"][-self.num_feature_levels :]
|
|
109
|
-
feat_sizes = [(x.shape[-2], x.shape[-1]) for x in vision_pos_embeds]
|
|
110
|
-
# flatten NxCxHxW to HWxNxC
|
|
111
|
-
vision_feats = [x.flatten(2).permute(2, 0, 1) for x in feature_maps]
|
|
112
|
-
vision_pos_embeds = [x.flatten(2).permute(2, 0, 1) for x in vision_pos_embeds]
|
|
113
|
-
return backbone_out, vision_feats, vision_pos_embeds, feat_sizes
|
|
114
|
-
|
|
115
97
|
def _encode_prompt(
|
|
116
98
|
self,
|
|
117
99
|
img_feats,
|
|
@@ -304,8 +286,8 @@ class SAM3SemanticModel(torch.nn.Module):
|
|
|
304
286
|
self, backbone_out: dict[str, torch.Tensor], text_ids: torch.Tensor, geometric_prompt: Prompt = None
|
|
305
287
|
):
|
|
306
288
|
"""Forward pass for grounding (detection + segmentation) given input images and text."""
|
|
307
|
-
backbone_out, img_feats, img_pos_embeds, vis_feat_sizes =
|
|
308
|
-
backbone_out,
|
|
289
|
+
backbone_out, img_feats, img_pos_embeds, vis_feat_sizes = SAM2Model._prepare_backbone_features(
|
|
290
|
+
self, backbone_out, batch=len(text_ids)
|
|
309
291
|
)
|
|
310
292
|
backbone_out.update({k: v for k, v in self.text_embeddings.items()})
|
|
311
293
|
with torch.profiler.record_function("SAM3Image._encode_prompt"):
|
|
@@ -110,15 +110,10 @@ class SAM3VLBackbone(nn.Module):
|
|
|
110
110
|
def forward_image_sam2(self, samples: torch.Tensor):
|
|
111
111
|
"""Forward pass of the vision backbone to get SAM2 features only."""
|
|
112
112
|
xs = self.vision_backbone.trunk(samples)
|
|
113
|
-
sam2_features, sam2_pos = [], []
|
|
114
113
|
x = xs[-1] # simpleFPN
|
|
115
114
|
|
|
116
115
|
assert self.vision_backbone.sam2_convs is not None, "SAM2 neck is not available."
|
|
117
|
-
|
|
118
|
-
sam2_x_out = self.vision_backbone.sam2_convs[i](x)
|
|
119
|
-
sam2_pos_out = self.vision_backbone.position_encoding(sam2_x_out).to(sam2_x_out.dtype)
|
|
120
|
-
sam2_features.append(sam2_x_out)
|
|
121
|
-
sam2_pos.append(sam2_pos_out)
|
|
116
|
+
sam2_features, sam2_pos = self.vision_backbone.sam_forward_feature_levels(x, self.vision_backbone.sam2_convs)
|
|
122
117
|
|
|
123
118
|
if self.scalp > 0:
|
|
124
119
|
# Discard the lowest resolution features
|
|
@@ -57,7 +57,7 @@ class ClassificationValidator(BaseValidator):
|
|
|
57
57
|
"""Initialize ClassificationValidator with dataloader, save directory, and other parameters.
|
|
58
58
|
|
|
59
59
|
Args:
|
|
60
|
-
dataloader (torch.utils.data.DataLoader, optional):
|
|
60
|
+
dataloader (torch.utils.data.DataLoader, optional): DataLoader to use for validation.
|
|
61
61
|
save_dir (str | Path, optional): Directory to save results.
|
|
62
62
|
args (dict, optional): Arguments containing model and validation configuration.
|
|
63
63
|
_callbacks (list, optional): List of callback functions to be called during validation.
|
|
@@ -53,7 +53,7 @@ class DetectionTrainer(BaseTrainer):
|
|
|
53
53
|
"""
|
|
54
54
|
|
|
55
55
|
def __init__(self, cfg=DEFAULT_CFG, overrides: dict[str, Any] | None = None, _callbacks=None):
|
|
56
|
-
"""Initialize a DetectionTrainer object for training YOLO object detection
|
|
56
|
+
"""Initialize a DetectionTrainer object for training YOLO object detection models.
|
|
57
57
|
|
|
58
58
|
Args:
|
|
59
59
|
cfg (dict, optional): Default configuration dictionary containing training parameters.
|
|
@@ -46,7 +46,7 @@ class DetectionValidator(BaseValidator):
|
|
|
46
46
|
"""Initialize detection validator with necessary variables and settings.
|
|
47
47
|
|
|
48
48
|
Args:
|
|
49
|
-
dataloader (torch.utils.data.DataLoader, optional):
|
|
49
|
+
dataloader (torch.utils.data.DataLoader, optional): DataLoader to use for validation.
|
|
50
50
|
save_dir (Path, optional): Directory to save results.
|
|
51
51
|
args (dict[str, Any], optional): Arguments for the validator.
|
|
52
52
|
_callbacks (list[Any], optional): List of callback functions.
|
|
@@ -256,7 +256,7 @@ class DetectionValidator(BaseValidator):
|
|
|
256
256
|
pf = "%22s" + "%11i" * 2 + "%11.3g" * len(self.metrics.keys) # print format
|
|
257
257
|
LOGGER.info(pf % ("all", self.seen, self.metrics.nt_per_class.sum(), *self.metrics.mean_results()))
|
|
258
258
|
if self.metrics.nt_per_class.sum() == 0:
|
|
259
|
-
LOGGER.warning(f"no labels found in {self.args.task} set,
|
|
259
|
+
LOGGER.warning(f"no labels found in {self.args.task} set, cannot compute metrics without labels")
|
|
260
260
|
|
|
261
261
|
# Print results per class
|
|
262
262
|
if self.args.verbose and not self.training and self.nc > 1 and len(self.metrics.stats):
|
|
@@ -308,7 +308,7 @@ class DetectionValidator(BaseValidator):
|
|
|
308
308
|
batch_size (int): Size of each batch.
|
|
309
309
|
|
|
310
310
|
Returns:
|
|
311
|
-
(torch.utils.data.DataLoader):
|
|
311
|
+
(torch.utils.data.DataLoader): DataLoader for validation.
|
|
312
312
|
"""
|
|
313
313
|
dataset = self.build_dataset(dataset_path, batch=batch_size, mode="val")
|
|
314
314
|
return build_dataloader(
|
|
@@ -460,11 +460,11 @@ class DetectionValidator(BaseValidator):
|
|
|
460
460
|
|
|
461
461
|
Args:
|
|
462
462
|
stats (dict[str, Any]): Dictionary to store computed metrics and statistics.
|
|
463
|
-
pred_json (str | Path
|
|
464
|
-
anno_json (str | Path
|
|
465
|
-
iou_types (str | list[str]
|
|
463
|
+
pred_json (str | Path): Path to JSON file containing predictions in COCO format.
|
|
464
|
+
anno_json (str | Path): Path to JSON file containing ground truth annotations in COCO format.
|
|
465
|
+
iou_types (str | list[str]): IoU type(s) for evaluation. Can be single string or list of strings. Common
|
|
466
466
|
values include "bbox", "segm", "keypoints". Defaults to "bbox".
|
|
467
|
-
suffix (str | list[str]
|
|
467
|
+
suffix (str | list[str]): Suffix to append to metric names in stats dictionary. Should correspond to
|
|
468
468
|
iou_types if multiple types provided. Defaults to "Box".
|
|
469
469
|
|
|
470
470
|
Returns:
|
|
@@ -50,7 +50,7 @@ class OBBValidator(DetectionValidator):
|
|
|
50
50
|
extends the DetectionValidator class and configures it specifically for the OBB task.
|
|
51
51
|
|
|
52
52
|
Args:
|
|
53
|
-
dataloader (torch.utils.data.DataLoader, optional):
|
|
53
|
+
dataloader (torch.utils.data.DataLoader, optional): DataLoader to be used for validation.
|
|
54
54
|
save_dir (str | Path, optional): Directory to save results.
|
|
55
55
|
args (dict | SimpleNamespace, optional): Arguments containing validation parameters.
|
|
56
56
|
_callbacks (list, optional): List of callback functions to be called during validation.
|
|
@@ -59,7 +59,7 @@ class PoseValidator(DetectionValidator):
|
|
|
59
59
|
specialized metrics for pose evaluation.
|
|
60
60
|
|
|
61
61
|
Args:
|
|
62
|
-
dataloader (torch.utils.data.DataLoader, optional):
|
|
62
|
+
dataloader (torch.utils.data.DataLoader, optional): DataLoader to be used for validation.
|
|
63
63
|
save_dir (Path | str, optional): Directory to save results.
|
|
64
64
|
args (dict, optional): Arguments for the validator including task set to "pose".
|
|
65
65
|
_callbacks (list, optional): List of callback functions to be executed during validation.
|
|
@@ -39,7 +39,7 @@ class SegmentationValidator(DetectionValidator):
|
|
|
39
39
|
"""Initialize SegmentationValidator and set task to 'segment', metrics to SegmentMetrics.
|
|
40
40
|
|
|
41
41
|
Args:
|
|
42
|
-
dataloader (torch.utils.data.DataLoader, optional):
|
|
42
|
+
dataloader (torch.utils.data.DataLoader, optional): DataLoader to use for validation.
|
|
43
43
|
save_dir (Path, optional): Directory to save results.
|
|
44
44
|
args (namespace, optional): Arguments for the validator.
|
|
45
45
|
_callbacks (list, optional): List of callback functions.
|
ultralytics/nn/autobackend.py
CHANGED
|
@@ -127,7 +127,7 @@ class AutoBackend(nn.Module):
|
|
|
127
127
|
|
|
128
128
|
Methods:
|
|
129
129
|
forward: Run inference on an input image.
|
|
130
|
-
from_numpy: Convert
|
|
130
|
+
from_numpy: Convert NumPy arrays to tensors on the model device.
|
|
131
131
|
warmup: Warm up the model with a dummy input.
|
|
132
132
|
_model_type: Determine the model type from file path.
|
|
133
133
|
|
|
@@ -182,7 +182,7 @@ class AutoBackend(nn.Module):
|
|
|
182
182
|
triton,
|
|
183
183
|
) = self._model_type("" if nn_module else model)
|
|
184
184
|
fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16
|
|
185
|
-
nhwc = coreml or saved_model or pb or tflite or edgetpu or rknn # BHWC formats (vs torch
|
|
185
|
+
nhwc = coreml or saved_model or pb or tflite or edgetpu or rknn # BHWC formats (vs torch BCHW)
|
|
186
186
|
stride, ch = 32, 3 # default stride and channels
|
|
187
187
|
end2end, dynamic = False, False
|
|
188
188
|
metadata, task = None, None
|
|
@@ -894,14 +894,14 @@ class AutoBackend(nn.Module):
|
|
|
894
894
|
else:
|
|
895
895
|
return self.from_numpy(y)
|
|
896
896
|
|
|
897
|
-
def from_numpy(self, x: np.ndarray) -> torch.Tensor:
|
|
898
|
-
"""Convert a
|
|
897
|
+
def from_numpy(self, x: np.ndarray | torch.Tensor) -> torch.Tensor:
|
|
898
|
+
"""Convert a NumPy array to a torch tensor on the model device.
|
|
899
899
|
|
|
900
900
|
Args:
|
|
901
|
-
x (np.ndarray):
|
|
901
|
+
x (np.ndarray | torch.Tensor): Input array or tensor.
|
|
902
902
|
|
|
903
903
|
Returns:
|
|
904
|
-
(torch.Tensor):
|
|
904
|
+
(torch.Tensor): Tensor on `self.device`.
|
|
905
905
|
"""
|
|
906
906
|
return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
|
|
907
907
|
|
|
@@ -909,7 +909,7 @@ class AutoBackend(nn.Module):
|
|
|
909
909
|
"""Warm up the model by running one forward pass with a dummy input.
|
|
910
910
|
|
|
911
911
|
Args:
|
|
912
|
-
imgsz (tuple
|
|
912
|
+
imgsz (tuple[int, int, int, int]): Dummy input shape in (batch, channels, height, width) format.
|
|
913
913
|
"""
|
|
914
914
|
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module
|
|
915
915
|
if any(warmup_types) and (self.device.type != "cpu" or self.triton):
|
|
@@ -931,8 +931,8 @@ class AutoBackend(nn.Module):
|
|
|
931
931
|
(list[bool]): List of booleans indicating the model type.
|
|
932
932
|
|
|
933
933
|
Examples:
|
|
934
|
-
>>>
|
|
935
|
-
>>>
|
|
934
|
+
>>> types = AutoBackend._model_type("path/to/model.onnx")
|
|
935
|
+
>>> assert types[2] # onnx
|
|
936
936
|
"""
|
|
937
937
|
from ultralytics.engine.exporter import export_formats
|
|
938
938
|
|
ultralytics/nn/modules/block.py
CHANGED
|
@@ -1812,7 +1812,7 @@ class A2C2f(nn.Module):
|
|
|
1812
1812
|
"""
|
|
1813
1813
|
super().__init__()
|
|
1814
1814
|
c_ = int(c2 * e) # hidden channels
|
|
1815
|
-
assert c_ % 32 == 0, "Dimension of ABlock be a multiple of 32."
|
|
1815
|
+
assert c_ % 32 == 0, "Dimension of ABlock must be a multiple of 32."
|
|
1816
1816
|
|
|
1817
1817
|
self.cv1 = Conv(c1, c_, 1, 1)
|
|
1818
1818
|
self.cv2 = Conv((1 + n) * c_, c2, 1)
|
ultralytics/nn/tasks.py
CHANGED
|
@@ -866,7 +866,7 @@ class WorldModel(DetectionModel):
|
|
|
866
866
|
self.model[-1].nc = len(text)
|
|
867
867
|
|
|
868
868
|
def get_text_pe(self, text, batch=80, cache_clip_model=True):
|
|
869
|
-
"""
|
|
869
|
+
"""Get text positional embeddings for offline inference without CLIP model.
|
|
870
870
|
|
|
871
871
|
Args:
|
|
872
872
|
text (list[str]): List of class names.
|
|
@@ -987,13 +987,13 @@ class YOLOEModel(DetectionModel):
|
|
|
987
987
|
|
|
988
988
|
@smart_inference_mode()
|
|
989
989
|
def get_text_pe(self, text, batch=80, cache_clip_model=False, without_reprta=False):
|
|
990
|
-
"""
|
|
990
|
+
"""Get text positional embeddings for offline inference without CLIP model.
|
|
991
991
|
|
|
992
992
|
Args:
|
|
993
993
|
text (list[str]): List of class names.
|
|
994
994
|
batch (int): Batch size for processing text tokens.
|
|
995
995
|
cache_clip_model (bool): Whether to cache the CLIP model.
|
|
996
|
-
without_reprta (bool): Whether to return text embeddings
|
|
996
|
+
without_reprta (bool): Whether to return text embeddings without reprta module processing.
|
|
997
997
|
|
|
998
998
|
Returns:
|
|
999
999
|
(torch.Tensor): Text positional embeddings.
|
ultralytics/nn/text_model.py
CHANGED
|
@@ -196,12 +196,7 @@ class MobileCLIP(TextModel):
|
|
|
196
196
|
device (torch.device): Device to load the model on.
|
|
197
197
|
"""
|
|
198
198
|
try:
|
|
199
|
-
import
|
|
200
|
-
|
|
201
|
-
# Suppress 'timm.models.layers is deprecated, please import via timm.layers' warning from mobileclip usage
|
|
202
|
-
with warnings.catch_warnings():
|
|
203
|
-
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
204
|
-
import mobileclip
|
|
199
|
+
import mobileclip
|
|
205
200
|
except ImportError:
|
|
206
201
|
# Ultralytics fork preferred since Apple MobileCLIP repo has incorrect version of torchvision
|
|
207
202
|
checks.check_requirements("git+https://github.com/ultralytics/mobileclip.git")
|
|
@@ -308,7 +303,7 @@ class MobileCLIPTS(TextModel):
|
|
|
308
303
|
(torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
|
|
309
304
|
|
|
310
305
|
Examples:
|
|
311
|
-
>>> model = MobileCLIPTS("cpu")
|
|
306
|
+
>>> model = MobileCLIPTS(device=torch.device("cpu"))
|
|
312
307
|
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
|
313
308
|
>>> strict_tokens = model.tokenize(
|
|
314
309
|
... ["a very long caption"], truncate=False
|
ultralytics/solutions/ai_gym.py
CHANGED
|
@@ -13,7 +13,7 @@ class AIGym(BaseSolution):
|
|
|
13
13
|
repetitions of exercises based on predefined angle thresholds for up and down positions.
|
|
14
14
|
|
|
15
15
|
Attributes:
|
|
16
|
-
states (dict[
|
|
16
|
+
states (dict[int, dict[str, float | int | str]]): Per-track angle, rep count, and stage for workout monitoring.
|
|
17
17
|
up_angle (float): Angle threshold for considering the 'up' position of an exercise.
|
|
18
18
|
down_angle (float): Angle threshold for considering the 'down' position of an exercise.
|
|
19
19
|
kpts (list[int]): Indices of keypoints used for angle calculation.
|
|
@@ -56,7 +56,7 @@ class Analytics(BaseSolution):
|
|
|
56
56
|
from matplotlib.backends.backend_agg import FigureCanvasAgg
|
|
57
57
|
from matplotlib.figure import Figure
|
|
58
58
|
|
|
59
|
-
self.type = self.CFG["analytics_type"] # type
|
|
59
|
+
self.type = self.CFG["analytics_type"] # Chart type: "line", "pie", "bar", or "area".
|
|
60
60
|
self.x_label = "Classes" if self.type in {"bar", "pie"} else "Frame#"
|
|
61
61
|
self.y_label = "Total Counts"
|
|
62
62
|
|
|
@@ -66,10 +66,10 @@ class Analytics(BaseSolution):
|
|
|
66
66
|
self.title = "Ultralytics Solutions" # window name
|
|
67
67
|
self.max_points = 45 # maximum points to be drawn on window
|
|
68
68
|
self.fontsize = 25 # text font size for display
|
|
69
|
-
figsize = self.CFG["figsize"] #
|
|
69
|
+
figsize = self.CFG["figsize"] # Output size, e.g. (12.8, 7.2) -> 1280x720.
|
|
70
70
|
self.color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"])
|
|
71
71
|
|
|
72
|
-
self.total_counts = 0 #
|
|
72
|
+
self.total_counts = 0 # Stores total counts for line charts.
|
|
73
73
|
self.clswise_count = {} # dictionary for class-wise counts
|
|
74
74
|
self.update_every = kwargs.get("update_every", 30) # Only update graph every 30 frames by default
|
|
75
75
|
self.last_plot_im = None # Cache of the last rendered chart
|
|
@@ -104,7 +104,7 @@ class Analytics(BaseSolution):
|
|
|
104
104
|
and 'classwise_count' (dict, per-class object count).
|
|
105
105
|
|
|
106
106
|
Raises:
|
|
107
|
-
|
|
107
|
+
ValueError: If an unsupported chart type is specified.
|
|
108
108
|
|
|
109
109
|
Examples:
|
|
110
110
|
>>> analytics = Analytics(analytics_type="line")
|
|
@@ -131,9 +131,9 @@ class Analytics(BaseSolution):
|
|
|
131
131
|
)
|
|
132
132
|
plot_im = self.last_plot_im
|
|
133
133
|
else:
|
|
134
|
-
raise
|
|
134
|
+
raise ValueError(f"Unsupported analytics_type='{self.type}'. Supported types: line, bar, pie, area.")
|
|
135
135
|
|
|
136
|
-
#
|
|
136
|
+
# Return results for downstream use.
|
|
137
137
|
return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), classwise_count=self.clswise_count)
|
|
138
138
|
|
|
139
139
|
def update_graph(
|
ultralytics/solutions/config.py
CHANGED
|
@@ -35,7 +35,7 @@ class SolutionConfig:
|
|
|
35
35
|
vision_point (tuple[int, int]): Reference point for directional tracking or perspective drawing.
|
|
36
36
|
crop_dir (str): Directory path to save cropped detection images.
|
|
37
37
|
json_file (str): Path to a JSON file containing data for parking areas.
|
|
38
|
-
line_width (int): Width for visual display
|
|
38
|
+
line_width (int): Width for visual display, e.g. bounding boxes, keypoints, and counts.
|
|
39
39
|
records (int): Number of detection records to send email alerts.
|
|
40
40
|
fps (float): Frame rate (Frames Per Second) for speed estimation calculation.
|
|
41
41
|
max_hist (int): Maximum number of historical points or states stored per tracked object for speed estimation.
|
|
@@ -17,7 +17,7 @@ class DistanceCalculation(BaseSolution):
|
|
|
17
17
|
|
|
18
18
|
Attributes:
|
|
19
19
|
left_mouse_count (int): Counter for left mouse button clicks.
|
|
20
|
-
selected_boxes (dict[int,
|
|
20
|
+
selected_boxes (dict[int, Any]): Dictionary to store selected bounding boxes keyed by track ID.
|
|
21
21
|
centroids (list[list[int]]): List to store centroids of selected bounding boxes.
|
|
22
22
|
|
|
23
23
|
Methods:
|
|
@@ -19,7 +19,7 @@ class ObjectCounter(BaseSolution):
|
|
|
19
19
|
in_count (int): Counter for objects moving inward.
|
|
20
20
|
out_count (int): Counter for objects moving outward.
|
|
21
21
|
counted_ids (list[int]): List of IDs of objects that have been counted.
|
|
22
|
-
|
|
22
|
+
classwise_count (dict[str, dict[str, int]]): Dictionary for counts, categorized by object class.
|
|
23
23
|
region_initialized (bool): Flag indicating whether the counting region has been initialized.
|
|
24
24
|
show_in (bool): Flag to control display of inward count.
|
|
25
25
|
show_out (bool): Flag to control display of outward count.
|