dgenerate-ultralytics-headless 8.3.218__py3-none-any.whl → 8.3.221__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.218.dist-info → dgenerate_ultralytics_headless-8.3.221.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.218.dist-info → dgenerate_ultralytics_headless-8.3.221.dist-info}/RECORD +77 -77
- tests/__init__.py +5 -7
- tests/conftest.py +3 -7
- tests/test_cli.py +9 -2
- tests/test_engine.py +1 -1
- tests/test_exports.py +37 -9
- tests/test_integrations.py +4 -4
- tests/test_python.py +37 -44
- tests/test_solutions.py +154 -145
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +7 -5
- ultralytics/cfg/default.yaml +1 -1
- ultralytics/data/__init__.py +4 -4
- ultralytics/data/augment.py +10 -10
- ultralytics/data/base.py +1 -1
- ultralytics/data/build.py +1 -1
- ultralytics/data/converter.py +3 -3
- ultralytics/data/dataset.py +3 -3
- ultralytics/data/loaders.py +2 -2
- ultralytics/data/utils.py +2 -2
- ultralytics/engine/exporter.py +73 -20
- ultralytics/engine/model.py +1 -1
- ultralytics/engine/predictor.py +1 -0
- ultralytics/engine/trainer.py +5 -3
- ultralytics/engine/tuner.py +4 -4
- ultralytics/hub/__init__.py +9 -7
- ultralytics/hub/utils.py +2 -2
- ultralytics/models/__init__.py +1 -1
- ultralytics/models/fastsam/__init__.py +1 -1
- ultralytics/models/fastsam/predict.py +10 -16
- ultralytics/models/nas/__init__.py +1 -1
- ultralytics/models/rtdetr/__init__.py +1 -1
- ultralytics/models/sam/__init__.py +1 -1
- ultralytics/models/sam/amg.py +2 -2
- ultralytics/models/sam/modules/blocks.py +1 -1
- ultralytics/models/sam/modules/transformer.py +1 -1
- ultralytics/models/sam/predict.py +1 -1
- ultralytics/models/yolo/__init__.py +1 -1
- ultralytics/models/yolo/pose/__init__.py +1 -1
- ultralytics/models/yolo/segment/val.py +1 -1
- ultralytics/models/yolo/yoloe/__init__.py +7 -7
- ultralytics/nn/__init__.py +7 -7
- ultralytics/nn/autobackend.py +32 -5
- ultralytics/nn/modules/__init__.py +60 -60
- ultralytics/nn/modules/block.py +26 -26
- ultralytics/nn/modules/conv.py +7 -7
- ultralytics/nn/modules/head.py +1 -1
- ultralytics/nn/modules/transformer.py +7 -7
- ultralytics/nn/modules/utils.py +1 -1
- ultralytics/nn/tasks.py +3 -3
- ultralytics/solutions/__init__.py +12 -12
- ultralytics/solutions/object_counter.py +3 -6
- ultralytics/solutions/queue_management.py +1 -1
- ultralytics/solutions/similarity_search.py +3 -3
- ultralytics/trackers/__init__.py +1 -1
- ultralytics/trackers/byte_tracker.py +2 -2
- ultralytics/trackers/utils/matching.py +1 -1
- ultralytics/utils/__init__.py +2 -2
- ultralytics/utils/benchmarks.py +4 -4
- ultralytics/utils/callbacks/comet.py +2 -2
- ultralytics/utils/checks.py +2 -2
- ultralytics/utils/downloads.py +2 -2
- ultralytics/utils/export/__init__.py +1 -1
- ultralytics/utils/files.py +1 -1
- ultralytics/utils/git.py +1 -1
- ultralytics/utils/logger.py +1 -1
- ultralytics/utils/metrics.py +13 -9
- ultralytics/utils/ops.py +8 -8
- ultralytics/utils/plotting.py +2 -1
- ultralytics/utils/torch_utils.py +5 -4
- ultralytics/utils/triton.py +2 -2
- ultralytics/utils/tuner.py +4 -2
- {dgenerate_ultralytics_headless-8.3.218.dist-info → dgenerate_ultralytics_headless-8.3.221.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.218.dist-info → dgenerate_ultralytics_headless-8.3.221.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.218.dist-info → dgenerate_ultralytics_headless-8.3.221.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.218.dist-info → dgenerate_ultralytics_headless-8.3.221.dist-info}/top_level.txt +0 -0
ultralytics/models/sam/amg.py
CHANGED
|
@@ -145,7 +145,7 @@ def generate_crop_boxes(
|
|
|
145
145
|
|
|
146
146
|
def crop_len(orig_len, n_crops, overlap):
|
|
147
147
|
"""Calculate the length of each crop given the original length, number of crops, and overlap."""
|
|
148
|
-
return
|
|
148
|
+
return math.ceil((overlap * (n_crops - 1) + orig_len) / n_crops)
|
|
149
149
|
|
|
150
150
|
for i_layer in range(n_layers):
|
|
151
151
|
n_crops_per_side = 2 ** (i_layer + 1)
|
|
@@ -227,7 +227,7 @@ def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> tup
|
|
|
227
227
|
small_regions = [i + 1 for i, s in enumerate(sizes) if s < area_thresh]
|
|
228
228
|
if not small_regions:
|
|
229
229
|
return mask, False
|
|
230
|
-
fill_labels = [0
|
|
230
|
+
fill_labels = [0, *small_regions]
|
|
231
231
|
if not correct_holes:
|
|
232
232
|
# If every region is below threshold, keep largest
|
|
233
233
|
fill_labels = [i for i in range(n_labels) if i not in fill_labels] or [int(np.argmax(sizes)) + 1]
|
|
@@ -619,7 +619,7 @@ class MultiScaleBlock(nn.Module):
|
|
|
619
619
|
mlp_ratio: float = 4.0,
|
|
620
620
|
drop_path: float = 0.0,
|
|
621
621
|
norm_layer: nn.Module | str = "LayerNorm",
|
|
622
|
-
q_stride: tuple[int, int] = None,
|
|
622
|
+
q_stride: tuple[int, int] | None = None,
|
|
623
623
|
act_layer: type[nn.Module] = nn.GELU,
|
|
624
624
|
window_size: int = 0,
|
|
625
625
|
):
|
|
@@ -280,7 +280,7 @@ class Attention(nn.Module):
|
|
|
280
280
|
embedding_dim: int,
|
|
281
281
|
num_heads: int,
|
|
282
282
|
downsample_rate: int = 1,
|
|
283
|
-
kv_in_dim: int = None,
|
|
283
|
+
kv_in_dim: int | None = None,
|
|
284
284
|
) -> None:
|
|
285
285
|
"""
|
|
286
286
|
Initialize the Attention module with specified dimensions and settings.
|
|
@@ -1850,7 +1850,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
|
|
|
1850
1850
|
@smart_inference_mode()
|
|
1851
1851
|
def update_memory(
|
|
1852
1852
|
self,
|
|
1853
|
-
obj_ids: list[int] = None,
|
|
1853
|
+
obj_ids: list[int] | None = None,
|
|
1854
1854
|
points: torch.Tensor | None = None,
|
|
1855
1855
|
labels: torch.Tensor | None = None,
|
|
1856
1856
|
masks: torch.Tensor | None = None,
|
|
@@ -4,4 +4,4 @@ from ultralytics.models.yolo import classify, detect, obb, pose, segment, world,
|
|
|
4
4
|
|
|
5
5
|
from .model import YOLO, YOLOE, YOLOWorld
|
|
6
6
|
|
|
7
|
-
__all__ = "
|
|
7
|
+
__all__ = "YOLO", "YOLOE", "YOLOWorld", "classify", "detect", "obb", "pose", "segment", "world", "yoloe"
|
|
@@ -222,7 +222,7 @@ class SegmentationValidator(DetectionValidator):
|
|
|
222
222
|
predn (dict[str, torch.Tensor]): Predictions containing bboxes, masks, confidence scores, and classes.
|
|
223
223
|
pbatch (dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
|
|
224
224
|
"""
|
|
225
|
-
from faster_coco_eval.core.mask import encode
|
|
225
|
+
from faster_coco_eval.core.mask import encode
|
|
226
226
|
|
|
227
227
|
def single_encode(x):
|
|
228
228
|
"""Encode predicted masks as RLE and append results to jdict."""
|
|
@@ -6,17 +6,17 @@ from .train_seg import YOLOEPESegTrainer, YOLOESegTrainer, YOLOESegTrainerFromSc
|
|
|
6
6
|
from .val import YOLOEDetectValidator, YOLOESegValidator
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
|
-
"YOLOETrainer",
|
|
10
|
-
"YOLOEPETrainer",
|
|
11
|
-
"YOLOESegTrainer",
|
|
12
9
|
"YOLOEDetectValidator",
|
|
13
|
-
"
|
|
10
|
+
"YOLOEPEFreeTrainer",
|
|
14
11
|
"YOLOEPESegTrainer",
|
|
12
|
+
"YOLOEPETrainer",
|
|
13
|
+
"YOLOESegTrainer",
|
|
15
14
|
"YOLOESegTrainerFromScratch",
|
|
16
15
|
"YOLOESegVPTrainer",
|
|
17
|
-
"
|
|
18
|
-
"
|
|
16
|
+
"YOLOESegValidator",
|
|
17
|
+
"YOLOETrainer",
|
|
18
|
+
"YOLOETrainerFromScratch",
|
|
19
19
|
"YOLOEVPDetectPredictor",
|
|
20
20
|
"YOLOEVPSegPredictor",
|
|
21
|
-
"
|
|
21
|
+
"YOLOEVPTrainer",
|
|
22
22
|
]
|
ultralytics/nn/__init__.py
CHANGED
|
@@ -14,14 +14,14 @@ from .tasks import (
|
|
|
14
14
|
)
|
|
15
15
|
|
|
16
16
|
__all__ = (
|
|
17
|
+
"BaseModel",
|
|
18
|
+
"ClassificationModel",
|
|
19
|
+
"DetectionModel",
|
|
20
|
+
"SegmentationModel",
|
|
21
|
+
"guess_model_scale",
|
|
22
|
+
"guess_model_task",
|
|
17
23
|
"load_checkpoint",
|
|
18
24
|
"parse_model",
|
|
19
|
-
"yaml_model_load",
|
|
20
|
-
"guess_model_task",
|
|
21
|
-
"guess_model_scale",
|
|
22
25
|
"torch_safe_load",
|
|
23
|
-
"
|
|
24
|
-
"SegmentationModel",
|
|
25
|
-
"ClassificationModel",
|
|
26
|
-
"BaseModel",
|
|
26
|
+
"yaml_model_load",
|
|
27
27
|
)
|
ultralytics/nn/autobackend.py
CHANGED
|
@@ -96,6 +96,8 @@ class AutoBackend(nn.Module):
|
|
|
96
96
|
| NCNN | *_ncnn_model/ |
|
|
97
97
|
| IMX | *_imx_model/ |
|
|
98
98
|
| RKNN | *_rknn_model/ |
|
|
99
|
+
| Triton Inference | triton://model |
|
|
100
|
+
| ExecuTorch | *.pte |
|
|
99
101
|
|
|
100
102
|
Attributes:
|
|
101
103
|
model (torch.nn.Module): The loaded YOLO model.
|
|
@@ -122,6 +124,7 @@ class AutoBackend(nn.Module):
|
|
|
122
124
|
imx (bool): Whether the model is an IMX model.
|
|
123
125
|
rknn (bool): Whether the model is an RKNN model.
|
|
124
126
|
triton (bool): Whether the model is a Triton Inference Server model.
|
|
127
|
+
pte (bool): Whether the model is a PyTorch ExecuTorch model.
|
|
125
128
|
|
|
126
129
|
Methods:
|
|
127
130
|
forward: Run inference on an input image.
|
|
@@ -176,6 +179,7 @@ class AutoBackend(nn.Module):
|
|
|
176
179
|
ncnn,
|
|
177
180
|
imx,
|
|
178
181
|
rknn,
|
|
182
|
+
pte,
|
|
179
183
|
triton,
|
|
180
184
|
) = self._model_type("" if nn_module else model)
|
|
181
185
|
fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16
|
|
@@ -333,11 +337,11 @@ class AutoBackend(nn.Module):
|
|
|
333
337
|
check_requirements("numpy==1.23.5")
|
|
334
338
|
|
|
335
339
|
try: # https://developer.nvidia.com/nvidia-tensorrt-download
|
|
336
|
-
import tensorrt as trt
|
|
340
|
+
import tensorrt as trt
|
|
337
341
|
except ImportError:
|
|
338
342
|
if LINUX:
|
|
339
343
|
check_requirements("tensorrt>7.0.0,!=10.1.0")
|
|
340
|
-
import tensorrt as trt
|
|
344
|
+
import tensorrt as trt
|
|
341
345
|
check_version(trt.__version__, ">=7.0.0", hard=True)
|
|
342
346
|
check_version(trt.__version__, "!=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239")
|
|
343
347
|
if device.type == "cpu":
|
|
@@ -492,7 +496,7 @@ class AutoBackend(nn.Module):
|
|
|
492
496
|
if ARM64
|
|
493
497
|
else "paddlepaddle>=3.0.0"
|
|
494
498
|
)
|
|
495
|
-
import paddle.inference as pdi
|
|
499
|
+
import paddle.inference as pdi
|
|
496
500
|
|
|
497
501
|
w = Path(w)
|
|
498
502
|
model_file, params_file = None, None
|
|
@@ -570,6 +574,25 @@ class AutoBackend(nn.Module):
|
|
|
570
574
|
rknn_model.init_runtime()
|
|
571
575
|
metadata = w.parent / "metadata.yaml"
|
|
572
576
|
|
|
577
|
+
# ExecuTorch
|
|
578
|
+
elif pte:
|
|
579
|
+
LOGGER.info(f"Loading {w} for ExecuTorch inference...")
|
|
580
|
+
# TorchAO release compatibility table bug https://github.com/pytorch/ao/issues/2919
|
|
581
|
+
check_requirements("setuptools<71.0.0") # Setuptools bug: https://github.com/pypa/setuptools/issues/4483
|
|
582
|
+
check_requirements(("executorch==1.0.0", "flatbuffers"))
|
|
583
|
+
from executorch.runtime import Runtime
|
|
584
|
+
|
|
585
|
+
w = Path(w)
|
|
586
|
+
if w.is_dir():
|
|
587
|
+
model_file = next(w.rglob("*.pte"))
|
|
588
|
+
metadata = w / "metadata.yaml"
|
|
589
|
+
else:
|
|
590
|
+
model_file = w
|
|
591
|
+
metadata = w.parent / "metadata.yaml"
|
|
592
|
+
|
|
593
|
+
program = Runtime.get().load_program(str(model_file))
|
|
594
|
+
model = program.load_method("forward")
|
|
595
|
+
|
|
573
596
|
# Any other format (unsupported)
|
|
574
597
|
else:
|
|
575
598
|
from ultralytics.engine.exporter import export_formats
|
|
@@ -629,7 +652,7 @@ class AutoBackend(nn.Module):
|
|
|
629
652
|
Returns:
|
|
630
653
|
(torch.Tensor | list[torch.Tensor]): The raw output tensor(s) from the model.
|
|
631
654
|
"""
|
|
632
|
-
|
|
655
|
+
_b, _ch, h, w = im.shape # batch, channel, height, width
|
|
633
656
|
if self.fp16 and im.dtype != torch.float16:
|
|
634
657
|
im = im.half() # to FP16
|
|
635
658
|
if self.nhwc:
|
|
@@ -773,6 +796,10 @@ class AutoBackend(nn.Module):
|
|
|
773
796
|
im = im if isinstance(im, (list, tuple)) else [im]
|
|
774
797
|
y = self.rknn_model.inference(inputs=im)
|
|
775
798
|
|
|
799
|
+
# ExecuTorch
|
|
800
|
+
elif self.pte:
|
|
801
|
+
y = self.model.execute([im])
|
|
802
|
+
|
|
776
803
|
# TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
|
|
777
804
|
else:
|
|
778
805
|
im = im.cpu().numpy()
|
|
@@ -892,4 +919,4 @@ class AutoBackend(nn.Module):
|
|
|
892
919
|
url = urlsplit(p)
|
|
893
920
|
triton = bool(url.netloc) and bool(url.path) and url.scheme in {"http", "grpc"}
|
|
894
921
|
|
|
895
|
-
return types
|
|
922
|
+
return [*types, triton]
|
|
@@ -103,80 +103,80 @@ from .transformer import (
|
|
|
103
103
|
)
|
|
104
104
|
|
|
105
105
|
__all__ = (
|
|
106
|
-
"
|
|
107
|
-
"
|
|
108
|
-
"
|
|
109
|
-
"
|
|
110
|
-
"
|
|
111
|
-
"
|
|
112
|
-
"ConvTranspose",
|
|
113
|
-
"Focus",
|
|
114
|
-
"GhostConv",
|
|
115
|
-
"ChannelAttention",
|
|
116
|
-
"SpatialAttention",
|
|
106
|
+
"AIFI",
|
|
107
|
+
"C1",
|
|
108
|
+
"C2",
|
|
109
|
+
"C2PSA",
|
|
110
|
+
"C3",
|
|
111
|
+
"C3TR",
|
|
117
112
|
"CBAM",
|
|
118
|
-
"
|
|
119
|
-
"TransformerLayer",
|
|
120
|
-
"TransformerBlock",
|
|
121
|
-
"MLPBlock",
|
|
122
|
-
"LayerNorm2d",
|
|
113
|
+
"CIB",
|
|
123
114
|
"DFL",
|
|
124
|
-
"
|
|
125
|
-
"
|
|
115
|
+
"ELAN1",
|
|
116
|
+
"MLP",
|
|
117
|
+
"OBB",
|
|
118
|
+
"PSA",
|
|
126
119
|
"SPP",
|
|
120
|
+
"SPPELAN",
|
|
127
121
|
"SPPF",
|
|
128
|
-
"
|
|
129
|
-
"
|
|
130
|
-
"
|
|
122
|
+
"A2C2f",
|
|
123
|
+
"AConv",
|
|
124
|
+
"ADown",
|
|
125
|
+
"Attention",
|
|
126
|
+
"BNContrastiveHead",
|
|
127
|
+
"Bottleneck",
|
|
128
|
+
"BottleneckCSP",
|
|
131
129
|
"C2f",
|
|
132
|
-
"C3k2",
|
|
133
|
-
"SCDown",
|
|
134
|
-
"C2fPSA",
|
|
135
|
-
"C2PSA",
|
|
136
130
|
"C2fAttn",
|
|
137
|
-
"
|
|
138
|
-
"
|
|
131
|
+
"C2fCIB",
|
|
132
|
+
"C2fPSA",
|
|
139
133
|
"C3Ghost",
|
|
140
|
-
"
|
|
141
|
-
"
|
|
142
|
-
"
|
|
143
|
-
"
|
|
144
|
-
"
|
|
145
|
-
"Segment",
|
|
146
|
-
"Pose",
|
|
134
|
+
"C3k2",
|
|
135
|
+
"C3x",
|
|
136
|
+
"CBFuse",
|
|
137
|
+
"CBLinear",
|
|
138
|
+
"ChannelAttention",
|
|
147
139
|
"Classify",
|
|
148
|
-
"
|
|
149
|
-
"
|
|
150
|
-
"
|
|
151
|
-
"
|
|
140
|
+
"Concat",
|
|
141
|
+
"ContrastiveHead",
|
|
142
|
+
"Conv",
|
|
143
|
+
"Conv2",
|
|
144
|
+
"ConvTranspose",
|
|
145
|
+
"DWConv",
|
|
146
|
+
"DWConvTranspose2d",
|
|
152
147
|
"DeformableTransformerDecoder",
|
|
153
148
|
"DeformableTransformerDecoderLayer",
|
|
149
|
+
"Detect",
|
|
150
|
+
"Focus",
|
|
151
|
+
"GhostBottleneck",
|
|
152
|
+
"GhostConv",
|
|
153
|
+
"HGBlock",
|
|
154
|
+
"HGStem",
|
|
155
|
+
"ImagePoolingAttn",
|
|
156
|
+
"Index",
|
|
157
|
+
"LRPCHead",
|
|
158
|
+
"LayerNorm2d",
|
|
159
|
+
"LightConv",
|
|
160
|
+
"MLPBlock",
|
|
154
161
|
"MSDeformAttn",
|
|
155
|
-
"
|
|
162
|
+
"MaxSigmoidAttnBlock",
|
|
163
|
+
"Pose",
|
|
164
|
+
"Proto",
|
|
165
|
+
"RTDETRDecoder",
|
|
166
|
+
"RepC3",
|
|
167
|
+
"RepConv",
|
|
168
|
+
"RepNCSPELAN4",
|
|
169
|
+
"RepVGGDW",
|
|
156
170
|
"ResNetLayer",
|
|
157
|
-
"
|
|
171
|
+
"SCDown",
|
|
172
|
+
"Segment",
|
|
173
|
+
"SpatialAttention",
|
|
174
|
+
"TorchVision",
|
|
175
|
+
"TransformerBlock",
|
|
176
|
+
"TransformerEncoderLayer",
|
|
177
|
+
"TransformerLayer",
|
|
158
178
|
"WorldDetect",
|
|
159
179
|
"YOLOEDetect",
|
|
160
180
|
"YOLOESegment",
|
|
161
181
|
"v10Detect",
|
|
162
|
-
"LRPCHead",
|
|
163
|
-
"ImagePoolingAttn",
|
|
164
|
-
"MaxSigmoidAttnBlock",
|
|
165
|
-
"ContrastiveHead",
|
|
166
|
-
"BNContrastiveHead",
|
|
167
|
-
"RepNCSPELAN4",
|
|
168
|
-
"ADown",
|
|
169
|
-
"SPPELAN",
|
|
170
|
-
"CBFuse",
|
|
171
|
-
"CBLinear",
|
|
172
|
-
"AConv",
|
|
173
|
-
"ELAN1",
|
|
174
|
-
"RepVGGDW",
|
|
175
|
-
"CIB",
|
|
176
|
-
"C2fCIB",
|
|
177
|
-
"Attention",
|
|
178
|
-
"PSA",
|
|
179
|
-
"TorchVision",
|
|
180
|
-
"Index",
|
|
181
|
-
"A2C2f",
|
|
182
182
|
)
|
ultralytics/nn/modules/block.py
CHANGED
|
@@ -13,43 +13,43 @@ from .conv import Conv, DWConv, GhostConv, LightConv, RepConv, autopad
|
|
|
13
13
|
from .transformer import TransformerBlock
|
|
14
14
|
|
|
15
15
|
__all__ = (
|
|
16
|
-
"DFL",
|
|
17
|
-
"HGBlock",
|
|
18
|
-
"HGStem",
|
|
19
|
-
"SPP",
|
|
20
|
-
"SPPF",
|
|
21
16
|
"C1",
|
|
22
17
|
"C2",
|
|
18
|
+
"C2PSA",
|
|
23
19
|
"C3",
|
|
20
|
+
"C3TR",
|
|
21
|
+
"CIB",
|
|
22
|
+
"DFL",
|
|
23
|
+
"ELAN1",
|
|
24
|
+
"PSA",
|
|
25
|
+
"SPP",
|
|
26
|
+
"SPPELAN",
|
|
27
|
+
"SPPF",
|
|
28
|
+
"AConv",
|
|
29
|
+
"ADown",
|
|
30
|
+
"Attention",
|
|
31
|
+
"BNContrastiveHead",
|
|
32
|
+
"Bottleneck",
|
|
33
|
+
"BottleneckCSP",
|
|
24
34
|
"C2f",
|
|
25
35
|
"C2fAttn",
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"BNContrastiveHead",
|
|
29
|
-
"C3x",
|
|
30
|
-
"C3TR",
|
|
36
|
+
"C2fCIB",
|
|
37
|
+
"C2fPSA",
|
|
31
38
|
"C3Ghost",
|
|
39
|
+
"C3k2",
|
|
40
|
+
"C3x",
|
|
41
|
+
"CBFuse",
|
|
42
|
+
"CBLinear",
|
|
43
|
+
"ContrastiveHead",
|
|
32
44
|
"GhostBottleneck",
|
|
33
|
-
"
|
|
34
|
-
"
|
|
45
|
+
"HGBlock",
|
|
46
|
+
"HGStem",
|
|
47
|
+
"ImagePoolingAttn",
|
|
35
48
|
"Proto",
|
|
36
49
|
"RepC3",
|
|
37
|
-
"ResNetLayer",
|
|
38
50
|
"RepNCSPELAN4",
|
|
39
|
-
"ELAN1",
|
|
40
|
-
"ADown",
|
|
41
|
-
"AConv",
|
|
42
|
-
"SPPELAN",
|
|
43
|
-
"CBFuse",
|
|
44
|
-
"CBLinear",
|
|
45
|
-
"C3k2",
|
|
46
|
-
"C2fPSA",
|
|
47
|
-
"C2PSA",
|
|
48
51
|
"RepVGGDW",
|
|
49
|
-
"
|
|
50
|
-
"C2fCIB",
|
|
51
|
-
"Attention",
|
|
52
|
-
"PSA",
|
|
52
|
+
"ResNetLayer",
|
|
53
53
|
"SCDown",
|
|
54
54
|
"TorchVision",
|
|
55
55
|
)
|
ultralytics/nn/modules/conv.py
CHANGED
|
@@ -10,20 +10,20 @@ import torch
|
|
|
10
10
|
import torch.nn as nn
|
|
11
11
|
|
|
12
12
|
__all__ = (
|
|
13
|
+
"CBAM",
|
|
14
|
+
"ChannelAttention",
|
|
15
|
+
"Concat",
|
|
13
16
|
"Conv",
|
|
14
17
|
"Conv2",
|
|
15
|
-
"
|
|
18
|
+
"ConvTranspose",
|
|
16
19
|
"DWConv",
|
|
17
20
|
"DWConvTranspose2d",
|
|
18
|
-
"ConvTranspose",
|
|
19
21
|
"Focus",
|
|
20
22
|
"GhostConv",
|
|
21
|
-
"ChannelAttention",
|
|
22
|
-
"SpatialAttention",
|
|
23
|
-
"CBAM",
|
|
24
|
-
"Concat",
|
|
25
|
-
"RepConv",
|
|
26
23
|
"Index",
|
|
24
|
+
"LightConv",
|
|
25
|
+
"RepConv",
|
|
26
|
+
"SpatialAttention",
|
|
27
27
|
)
|
|
28
28
|
|
|
29
29
|
|
ultralytics/nn/modules/head.py
CHANGED
|
@@ -20,7 +20,7 @@ from .conv import Conv, DWConv
|
|
|
20
20
|
from .transformer import MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer
|
|
21
21
|
from .utils import bias_init_with_prob, linear_init
|
|
22
22
|
|
|
23
|
-
__all__ = "
|
|
23
|
+
__all__ = "OBB", "Classify", "Detect", "Pose", "RTDETRDecoder", "Segment", "YOLOEDetect", "YOLOESegment", "v10Detect"
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class Detect(nn.Module):
|
|
@@ -16,16 +16,16 @@ from .conv import Conv
|
|
|
16
16
|
from .utils import _get_clones, inverse_sigmoid, multi_scale_deformable_attn_pytorch
|
|
17
17
|
|
|
18
18
|
__all__ = (
|
|
19
|
-
"TransformerEncoderLayer",
|
|
20
|
-
"TransformerLayer",
|
|
21
|
-
"TransformerBlock",
|
|
22
|
-
"MLPBlock",
|
|
23
|
-
"LayerNorm2d",
|
|
24
19
|
"AIFI",
|
|
20
|
+
"MLP",
|
|
25
21
|
"DeformableTransformerDecoder",
|
|
26
22
|
"DeformableTransformerDecoderLayer",
|
|
23
|
+
"LayerNorm2d",
|
|
24
|
+
"MLPBlock",
|
|
27
25
|
"MSDeformAttn",
|
|
28
|
-
"
|
|
26
|
+
"TransformerBlock",
|
|
27
|
+
"TransformerEncoderLayer",
|
|
28
|
+
"TransformerLayer",
|
|
29
29
|
)
|
|
30
30
|
|
|
31
31
|
|
|
@@ -392,7 +392,7 @@ class MLP(nn.Module):
|
|
|
392
392
|
super().__init__()
|
|
393
393
|
self.num_layers = num_layers
|
|
394
394
|
h = [hidden_dim] * (num_layers - 1)
|
|
395
|
-
self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim
|
|
395
|
+
self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim, *h], [*h, output_dim]))
|
|
396
396
|
self.sigmoid = sigmoid
|
|
397
397
|
self.act = act()
|
|
398
398
|
|
ultralytics/nn/modules/utils.py
CHANGED
|
@@ -9,7 +9,7 @@ import torch.nn as nn
|
|
|
9
9
|
import torch.nn.functional as F
|
|
10
10
|
from torch.nn.init import uniform_
|
|
11
11
|
|
|
12
|
-
__all__ = "
|
|
12
|
+
__all__ = "inverse_sigmoid", "multi_scale_deformable_attn_pytorch"
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def _get_clones(module, n):
|
ultralytics/nn/tasks.py
CHANGED
|
@@ -1561,7 +1561,7 @@ def parse_model(d, ch, verbose=True):
|
|
|
1561
1561
|
scale = d.get("scale")
|
|
1562
1562
|
if scales:
|
|
1563
1563
|
if not scale:
|
|
1564
|
-
scale =
|
|
1564
|
+
scale = next(iter(scales.keys()))
|
|
1565
1565
|
LOGGER.warning(f"no model scale passed. Assuming scale='{scale}'.")
|
|
1566
1566
|
depth, width, max_channels = scales[scale]
|
|
1567
1567
|
|
|
@@ -1708,7 +1708,7 @@ def parse_model(d, ch, verbose=True):
|
|
|
1708
1708
|
m_.np = sum(x.numel() for x in m_.parameters()) # number params
|
|
1709
1709
|
m_.i, m_.f, m_.type = i, f, t # attach index, 'from' index, type
|
|
1710
1710
|
if verbose:
|
|
1711
|
-
LOGGER.info(f"{i:>3}{
|
|
1711
|
+
LOGGER.info(f"{i:>3}{f!s:>20}{n_:>3}{m_.np:10.0f} {t:<45}{args!s:<30}") # print
|
|
1712
1712
|
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
|
|
1713
1713
|
layers.append(m_)
|
|
1714
1714
|
if i == 0:
|
|
@@ -1752,7 +1752,7 @@ def guess_model_scale(model_path):
|
|
|
1752
1752
|
(str): The size character of the model's scale (n, s, m, l, or x).
|
|
1753
1753
|
"""
|
|
1754
1754
|
try:
|
|
1755
|
-
return re.search(r"yolo(e-)?[v]?\d+([nslmx])", Path(model_path).stem).group(2)
|
|
1755
|
+
return re.search(r"yolo(e-)?[v]?\d+([nslmx])", Path(model_path).stem).group(2)
|
|
1756
1756
|
except AttributeError:
|
|
1757
1757
|
return ""
|
|
1758
1758
|
|
|
@@ -19,23 +19,23 @@ from .trackzone import TrackZone
|
|
|
19
19
|
from .vision_eye import VisionEye
|
|
20
20
|
|
|
21
21
|
__all__ = (
|
|
22
|
-
"ObjectCounter",
|
|
23
|
-
"ObjectCropper",
|
|
24
|
-
"ObjectBlurrer",
|
|
25
22
|
"AIGym",
|
|
26
|
-
"
|
|
27
|
-
"
|
|
23
|
+
"Analytics",
|
|
24
|
+
"DistanceCalculation",
|
|
28
25
|
"Heatmap",
|
|
26
|
+
"Inference",
|
|
29
27
|
"InstanceSegmentation",
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"QueueManager",
|
|
28
|
+
"ObjectBlurrer",
|
|
29
|
+
"ObjectCounter",
|
|
30
|
+
"ObjectCropper",
|
|
34
31
|
"ParkingManagement",
|
|
35
32
|
"ParkingPtsSelection",
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"TrackZone",
|
|
33
|
+
"QueueManager",
|
|
34
|
+
"RegionCounter",
|
|
39
35
|
"SearchApp",
|
|
36
|
+
"SecurityAlarm",
|
|
37
|
+
"SpeedEstimator",
|
|
38
|
+
"TrackZone",
|
|
39
|
+
"VisionEye",
|
|
40
40
|
"VisualAISearch",
|
|
41
41
|
)
|
|
@@ -106,11 +106,8 @@ class ObjectCounter(BaseSolution):
|
|
|
106
106
|
region_width = max(p[0] for p in self.region) - min(p[0] for p in self.region)
|
|
107
107
|
region_height = max(p[1] for p in self.region) - min(p[1] for p in self.region)
|
|
108
108
|
|
|
109
|
-
if (
|
|
110
|
-
region_width
|
|
111
|
-
and current_centroid[0] > prev_position[0]
|
|
112
|
-
or region_width >= region_height
|
|
113
|
-
and current_centroid[1] > prev_position[1]
|
|
109
|
+
if (region_width < region_height and current_centroid[0] > prev_position[0]) or (
|
|
110
|
+
region_width >= region_height and current_centroid[1] > prev_position[1]
|
|
114
111
|
): # Moving right or downward
|
|
115
112
|
self.in_count += 1
|
|
116
113
|
self.classwise_count[self.names[cls]]["IN"] += 1
|
|
@@ -135,7 +132,7 @@ class ObjectCounter(BaseSolution):
|
|
|
135
132
|
str.capitalize(key): f"{'IN ' + str(value['IN']) if self.show_in else ''} "
|
|
136
133
|
f"{'OUT ' + str(value['OUT']) if self.show_out else ''}".strip()
|
|
137
134
|
for key, value in self.classwise_count.items()
|
|
138
|
-
if value["IN"] != 0 or value["OUT"] != 0 and (self.show_in or self.show_out)
|
|
135
|
+
if value["IN"] != 0 or (value["OUT"] != 0 and (self.show_in or self.show_out))
|
|
139
136
|
}
|
|
140
137
|
if labels_dict:
|
|
141
138
|
self.annotator.display_analytics(plot_im, labels_dict, (104, 31, 17), (255, 255, 255), self.margin)
|
|
@@ -78,11 +78,11 @@ class VisualAISearch:
|
|
|
78
78
|
|
|
79
79
|
def extract_image_feature(self, path: Path) -> np.ndarray:
|
|
80
80
|
"""Extract CLIP image embedding from the given image path."""
|
|
81
|
-
return self.model.encode_image(Image.open(path)).cpu().numpy()
|
|
81
|
+
return self.model.encode_image(Image.open(path)).detach().cpu().numpy()
|
|
82
82
|
|
|
83
83
|
def extract_text_feature(self, text: str) -> np.ndarray:
|
|
84
84
|
"""Extract CLIP text embedding from the given text query."""
|
|
85
|
-
return self.model.encode_text(self.model.tokenize([text])).cpu().numpy()
|
|
85
|
+
return self.model.encode_text(self.model.tokenize([text])).detach().cpu().numpy()
|
|
86
86
|
|
|
87
87
|
def load_or_build_index(self) -> None:
|
|
88
88
|
"""
|
|
@@ -189,7 +189,7 @@ class SearchApp:
|
|
|
189
189
|
>>> app.run(debug=True)
|
|
190
190
|
"""
|
|
191
191
|
|
|
192
|
-
def __init__(self, data: str = "images", device: str = None) -> None:
|
|
192
|
+
def __init__(self, data: str = "images", device: str | None = None) -> None:
|
|
193
193
|
"""
|
|
194
194
|
Initialize the SearchApp with VisualAISearch backend.
|
|
195
195
|
|
ultralytics/trackers/__init__.py
CHANGED
|
@@ -230,7 +230,7 @@ class STrack(BaseTrack):
|
|
|
230
230
|
def result(self) -> list[float]:
|
|
231
231
|
"""Get the current tracking results in the appropriate bounding box format."""
|
|
232
232
|
coords = self.xyxy if self.angle is None else self.xywha
|
|
233
|
-
return coords.tolist()
|
|
233
|
+
return [*coords.tolist(), self.track_id, self.score, self.cls, self.idx]
|
|
234
234
|
|
|
235
235
|
def __repr__(self) -> str:
|
|
236
236
|
"""Return a string representation of the STrack object including start frame, end frame, and track ID."""
|
|
@@ -356,7 +356,7 @@ class BYTETracker:
|
|
|
356
356
|
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
|
|
357
357
|
# TODO
|
|
358
358
|
dists = matching.iou_distance(r_tracked_stracks, detections_second)
|
|
359
|
-
matches, u_track,
|
|
359
|
+
matches, u_track, _u_detection_second = matching.linear_assignment(dists, thresh=0.5)
|
|
360
360
|
for itracked, idet in matches:
|
|
361
361
|
track = r_tracked_stracks[itracked]
|
|
362
362
|
det = detections_second[idet]
|