dgenerate-ultralytics-headless 8.3.197__py3-none-any.whl → 8.3.198__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.3.197.dist-info → dgenerate_ultralytics_headless-8.3.198.dist-info}/METADATA +1 -1
- {dgenerate_ultralytics_headless-8.3.197.dist-info → dgenerate_ultralytics_headless-8.3.198.dist-info}/RECORD +42 -42
- tests/test_engine.py +9 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +0 -1
- ultralytics/cfg/default.yaml +96 -94
- ultralytics/cfg/trackers/botsort.yaml +16 -17
- ultralytics/cfg/trackers/bytetrack.yaml +9 -11
- ultralytics/data/augment.py +1 -1
- ultralytics/data/dataset.py +1 -1
- ultralytics/engine/exporter.py +35 -35
- ultralytics/engine/predictor.py +1 -2
- ultralytics/engine/results.py +1 -1
- ultralytics/engine/trainer.py +5 -5
- ultralytics/engine/tuner.py +54 -32
- ultralytics/models/sam/modules/decoders.py +3 -3
- ultralytics/models/sam/modules/sam.py +5 -5
- ultralytics/models/sam/predict.py +11 -11
- ultralytics/models/yolo/classify/train.py +2 -7
- ultralytics/models/yolo/classify/val.py +2 -2
- ultralytics/models/yolo/detect/predict.py +1 -1
- ultralytics/models/yolo/detect/train.py +1 -6
- ultralytics/models/yolo/detect/val.py +4 -4
- ultralytics/models/yolo/obb/val.py +3 -3
- ultralytics/models/yolo/pose/predict.py +1 -1
- ultralytics/models/yolo/pose/train.py +0 -6
- ultralytics/models/yolo/pose/val.py +2 -2
- ultralytics/models/yolo/segment/predict.py +2 -2
- ultralytics/models/yolo/segment/train.py +0 -5
- ultralytics/models/yolo/segment/val.py +9 -7
- ultralytics/models/yolo/yoloe/val.py +1 -1
- ultralytics/nn/modules/block.py +1 -1
- ultralytics/nn/tasks.py +2 -2
- ultralytics/utils/checks.py +1 -1
- ultralytics/utils/metrics.py +6 -6
- ultralytics/utils/nms.py +5 -13
- ultralytics/utils/plotting.py +22 -36
- ultralytics/utils/torch_utils.py +9 -5
- {dgenerate_ultralytics_headless-8.3.197.dist-info → dgenerate_ultralytics_headless-8.3.198.dist-info}/WHEEL +0 -0
- {dgenerate_ultralytics_headless-8.3.197.dist-info → dgenerate_ultralytics_headless-8.3.198.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.3.197.dist-info → dgenerate_ultralytics_headless-8.3.198.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.3.197.dist-info → dgenerate_ultralytics_headless-8.3.198.dist-info}/top_level.txt +0 -0
ultralytics/engine/exporter.py
CHANGED
@@ -194,9 +194,9 @@ def try_export(inner_func):
|
|
194
194
|
dt = 0.0
|
195
195
|
try:
|
196
196
|
with Profile() as dt:
|
197
|
-
f
|
197
|
+
f = inner_func(*args, **kwargs)
|
198
198
|
LOGGER.info(f"{prefix} export success ✅ {dt.t:.1f}s, saved as '{f}' ({file_size(f):.1f} MB)")
|
199
|
-
return f
|
199
|
+
return f
|
200
200
|
except Exception as e:
|
201
201
|
LOGGER.error(f"{prefix} export failure {dt.t:.1f}s: {e}")
|
202
202
|
raise e
|
@@ -486,36 +486,36 @@ class Exporter:
|
|
486
486
|
# Exports
|
487
487
|
f = [""] * len(fmts) # exported filenames
|
488
488
|
if jit or ncnn: # TorchScript
|
489
|
-
f[0]
|
489
|
+
f[0] = self.export_torchscript()
|
490
490
|
if engine: # TensorRT required before ONNX
|
491
|
-
f[1]
|
491
|
+
f[1] = self.export_engine(dla=dla)
|
492
492
|
if onnx: # ONNX
|
493
|
-
f[2]
|
493
|
+
f[2] = self.export_onnx()
|
494
494
|
if xml: # OpenVINO
|
495
|
-
f[3]
|
495
|
+
f[3] = self.export_openvino()
|
496
496
|
if coreml: # CoreML
|
497
|
-
f[4]
|
497
|
+
f[4] = self.export_coreml()
|
498
498
|
if is_tf_format: # TensorFlow formats
|
499
499
|
self.args.int8 |= edgetpu
|
500
500
|
f[5], keras_model = self.export_saved_model()
|
501
501
|
if pb or tfjs: # pb prerequisite to tfjs
|
502
|
-
f[6]
|
502
|
+
f[6] = self.export_pb(keras_model=keras_model)
|
503
503
|
if tflite:
|
504
|
-
f[7]
|
504
|
+
f[7] = self.export_tflite()
|
505
505
|
if edgetpu:
|
506
|
-
f[8]
|
506
|
+
f[8] = self.export_edgetpu(tflite_model=Path(f[5]) / f"{self.file.stem}_full_integer_quant.tflite")
|
507
507
|
if tfjs:
|
508
|
-
f[9]
|
508
|
+
f[9] = self.export_tfjs()
|
509
509
|
if paddle: # PaddlePaddle
|
510
|
-
f[10]
|
510
|
+
f[10] = self.export_paddle()
|
511
511
|
if mnn: # MNN
|
512
|
-
f[11]
|
512
|
+
f[11] = self.export_mnn()
|
513
513
|
if ncnn: # NCNN
|
514
|
-
f[12]
|
514
|
+
f[12] = self.export_ncnn()
|
515
515
|
if imx:
|
516
|
-
f[13]
|
516
|
+
f[13] = self.export_imx()
|
517
517
|
if rknn:
|
518
|
-
f[14]
|
518
|
+
f[14] = self.export_rknn()
|
519
519
|
|
520
520
|
# Finish
|
521
521
|
f = [str(x) for x in f if x] # filter out '' and None
|
@@ -580,7 +580,7 @@ class Exporter:
|
|
580
580
|
optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files)
|
581
581
|
else:
|
582
582
|
ts.save(str(f), _extra_files=extra_files)
|
583
|
-
return f
|
583
|
+
return f
|
584
584
|
|
585
585
|
@try_export
|
586
586
|
def export_onnx(self, prefix=colorstr("ONNX:")):
|
@@ -639,7 +639,7 @@ class Exporter:
|
|
639
639
|
meta.key, meta.value = k, str(v)
|
640
640
|
|
641
641
|
onnx.save(model_onnx, f)
|
642
|
-
return f
|
642
|
+
return f
|
643
643
|
|
644
644
|
@try_export
|
645
645
|
def export_openvino(self, prefix=colorstr("OpenVINO:")):
|
@@ -708,13 +708,13 @@ class Exporter:
|
|
708
708
|
ignored_scope=ignored_scope,
|
709
709
|
)
|
710
710
|
serialize(quantized_ov_model, fq_ov)
|
711
|
-
return fq
|
711
|
+
return fq
|
712
712
|
|
713
713
|
f = str(self.file).replace(self.file.suffix, f"_openvino_model{os.sep}")
|
714
714
|
f_ov = str(Path(f) / self.file.with_suffix(".xml").name)
|
715
715
|
|
716
716
|
serialize(ov_model, f_ov)
|
717
|
-
return f
|
717
|
+
return f
|
718
718
|
|
719
719
|
@try_export
|
720
720
|
def export_paddle(self, prefix=colorstr("PaddlePaddle:")):
|
@@ -738,12 +738,12 @@ class Exporter:
|
|
738
738
|
|
739
739
|
pytorch2paddle(module=self.model, save_dir=f, jit_type="trace", input_examples=[self.im]) # export
|
740
740
|
YAML.save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml
|
741
|
-
return f
|
741
|
+
return f
|
742
742
|
|
743
743
|
@try_export
|
744
744
|
def export_mnn(self, prefix=colorstr("MNN:")):
|
745
745
|
"""Export YOLO model to MNN format using MNN https://github.com/alibaba/MNN."""
|
746
|
-
f_onnx
|
746
|
+
f_onnx = self.export_onnx() # get onnx model first
|
747
747
|
|
748
748
|
check_requirements("MNN>=2.9.6")
|
749
749
|
import MNN # noqa
|
@@ -763,7 +763,7 @@ class Exporter:
|
|
763
763
|
convert_scratch = Path(self.file.parent / ".__convert_external_data.bin")
|
764
764
|
if convert_scratch.exists():
|
765
765
|
convert_scratch.unlink()
|
766
|
-
return f
|
766
|
+
return f
|
767
767
|
|
768
768
|
@try_export
|
769
769
|
def export_ncnn(self, prefix=colorstr("NCNN:")):
|
@@ -831,7 +831,7 @@ class Exporter:
|
|
831
831
|
Path(f_debug).unlink(missing_ok=True)
|
832
832
|
|
833
833
|
YAML.save(f / "metadata.yaml", self.metadata) # add metadata.yaml
|
834
|
-
return str(f)
|
834
|
+
return str(f)
|
835
835
|
|
836
836
|
@try_export
|
837
837
|
def export_coreml(self, prefix=colorstr("CoreML:")):
|
@@ -910,13 +910,13 @@ class Exporter:
|
|
910
910
|
)
|
911
911
|
f = f.with_suffix(".mlmodel")
|
912
912
|
ct_model.save(str(f))
|
913
|
-
return f
|
913
|
+
return f
|
914
914
|
|
915
915
|
@try_export
|
916
916
|
def export_engine(self, dla=None, prefix=colorstr("TensorRT:")):
|
917
917
|
"""Export YOLO model to TensorRT format https://developer.nvidia.com/tensorrt."""
|
918
918
|
assert self.im.device.type != "cpu", "export running on CPU but must be on GPU, i.e. use 'device=0'"
|
919
|
-
f_onnx
|
919
|
+
f_onnx = self.export_onnx() # run before TRT import https://github.com/ultralytics/ultralytics/issues/7016
|
920
920
|
|
921
921
|
try:
|
922
922
|
import tensorrt as trt # noqa
|
@@ -946,7 +946,7 @@ class Exporter:
|
|
946
946
|
prefix=prefix,
|
947
947
|
)
|
948
948
|
|
949
|
-
return f
|
949
|
+
return f
|
950
950
|
|
951
951
|
@try_export
|
952
952
|
def export_saved_model(self, prefix=colorstr("TensorFlow SavedModel:")):
|
@@ -991,7 +991,7 @@ class Exporter:
|
|
991
991
|
|
992
992
|
# Export to ONNX
|
993
993
|
self.args.simplify = True
|
994
|
-
f_onnx
|
994
|
+
f_onnx = self.export_onnx()
|
995
995
|
|
996
996
|
# Export to TF
|
997
997
|
np_data = None
|
@@ -1051,7 +1051,7 @@ class Exporter:
|
|
1051
1051
|
frozen_func = convert_variables_to_constants_v2(m)
|
1052
1052
|
frozen_func.graph.as_graph_def()
|
1053
1053
|
tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False)
|
1054
|
-
return f
|
1054
|
+
return f
|
1055
1055
|
|
1056
1056
|
@try_export
|
1057
1057
|
def export_tflite(self, prefix=colorstr("TensorFlow Lite:")):
|
@@ -1067,7 +1067,7 @@ class Exporter:
|
|
1067
1067
|
f = saved_model / f"{self.file.stem}_float16.tflite" # fp32 in/out
|
1068
1068
|
else:
|
1069
1069
|
f = saved_model / f"{self.file.stem}_float32.tflite"
|
1070
|
-
return str(f)
|
1070
|
+
return str(f)
|
1071
1071
|
|
1072
1072
|
@try_export
|
1073
1073
|
def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")):
|
@@ -1102,7 +1102,7 @@ class Exporter:
|
|
1102
1102
|
LOGGER.info(f"{prefix} running '{cmd}'")
|
1103
1103
|
subprocess.run(cmd, shell=True)
|
1104
1104
|
self._add_tflite_metadata(f)
|
1105
|
-
return f
|
1105
|
+
return f
|
1106
1106
|
|
1107
1107
|
@try_export
|
1108
1108
|
def export_tfjs(self, prefix=colorstr("TensorFlow.js:")):
|
@@ -1135,7 +1135,7 @@ class Exporter:
|
|
1135
1135
|
|
1136
1136
|
# Add metadata
|
1137
1137
|
YAML.save(Path(f) / "metadata.yaml", self.metadata) # add metadata.yaml
|
1138
|
-
return f
|
1138
|
+
return f
|
1139
1139
|
|
1140
1140
|
@try_export
|
1141
1141
|
def export_rknn(self, prefix=colorstr("RKNN:")):
|
@@ -1151,7 +1151,7 @@ class Exporter:
|
|
1151
1151
|
|
1152
1152
|
from rknn.api import RKNN
|
1153
1153
|
|
1154
|
-
f
|
1154
|
+
f = self.export_onnx()
|
1155
1155
|
export_path = Path(f"{Path(f).stem}_rknn_model")
|
1156
1156
|
export_path.mkdir(exist_ok=True)
|
1157
1157
|
|
@@ -1162,7 +1162,7 @@ class Exporter:
|
|
1162
1162
|
f = f.replace(".onnx", f"-{self.args.name}.rknn")
|
1163
1163
|
rknn.export_rknn(f"{export_path / f}")
|
1164
1164
|
YAML.save(export_path / "metadata.yaml", self.metadata)
|
1165
|
-
return export_path
|
1165
|
+
return export_path
|
1166
1166
|
|
1167
1167
|
@try_export
|
1168
1168
|
def export_imx(self, prefix=colorstr("IMX:")):
|
@@ -1339,7 +1339,7 @@ class Exporter:
|
|
1339
1339
|
with open(f / "labels.txt", "w", encoding="utf-8") as file:
|
1340
1340
|
file.writelines([f"{name}\n" for _, name in self.model.names.items()])
|
1341
1341
|
|
1342
|
-
return f
|
1342
|
+
return f
|
1343
1343
|
|
1344
1344
|
def _add_tflite_metadata(self, file):
|
1345
1345
|
"""Add metadata to *.tflite models per https://ai.google.dev/edge/litert/models/metadata."""
|
ultralytics/engine/predictor.py
CHANGED
@@ -409,8 +409,7 @@ class BasePredictor:
|
|
409
409
|
if hasattr(self.model, "imgsz") and not getattr(self.model, "dynamic", False):
|
410
410
|
self.args.imgsz = self.model.imgsz # reuse imgsz from export metadata
|
411
411
|
self.model.eval()
|
412
|
-
|
413
|
-
self.model = attempt_compile(self.model, device=self.device)
|
412
|
+
self.model = attempt_compile(self.model, device=self.device, mode=self.args.compile)
|
414
413
|
|
415
414
|
def write_results(self, i: int, p: Path, im: torch.Tensor, s: list[str]) -> str:
|
416
415
|
"""
|
ultralytics/engine/results.py
CHANGED
@@ -900,7 +900,7 @@ class Boxes(BaseTensor):
|
|
900
900
|
Args:
|
901
901
|
boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape
|
902
902
|
(num_boxes, 6) or (num_boxes, 7). Columns should contain
|
903
|
-
[x1, y1, x2, y2,
|
903
|
+
[x1, y1, x2, y2, (optional) track_id, confidence, class].
|
904
904
|
orig_shape (tuple[int, int]): The original image shape as (height, width). Used for normalization.
|
905
905
|
|
906
906
|
Attributes:
|
ultralytics/engine/trainer.py
CHANGED
@@ -42,6 +42,7 @@ from ultralytics.utils.autobatch import check_train_batch_size
|
|
42
42
|
from ultralytics.utils.checks import check_amp, check_file, check_imgsz, check_model_file_from_stem, print_args
|
43
43
|
from ultralytics.utils.dist import ddp_cleanup, generate_ddp_command
|
44
44
|
from ultralytics.utils.files import get_latest_run
|
45
|
+
from ultralytics.utils.plotting import plot_results
|
45
46
|
from ultralytics.utils.torch_utils import (
|
46
47
|
TORCH_2_4,
|
47
48
|
EarlyStopping,
|
@@ -261,8 +262,7 @@ class BaseTrainer:
|
|
261
262
|
self.model.criterion = self.model.init_criterion()
|
262
263
|
|
263
264
|
# Compile model
|
264
|
-
|
265
|
-
self.model = attempt_compile(self.model, device=self.device)
|
265
|
+
self.model = attempt_compile(self.model, device=self.device, mode=self.args.compile)
|
266
266
|
|
267
267
|
# Freeze layers
|
268
268
|
freeze_list = (
|
@@ -414,7 +414,7 @@ class BaseTrainer:
|
|
414
414
|
batch = self.preprocess_batch(batch)
|
415
415
|
# decouple inference and loss calculations for torch.compile convenience
|
416
416
|
preds = self.model(batch["img"])
|
417
|
-
loss, self.loss_items = self.model.loss(batch, preds)
|
417
|
+
loss, self.loss_items = unwrap_model(self.model).loss(batch, preds)
|
418
418
|
self.loss = loss.sum()
|
419
419
|
if RANK != -1:
|
420
420
|
self.loss *= world_size
|
@@ -743,8 +743,8 @@ class BaseTrainer:
|
|
743
743
|
f.write(s + ("%.6g," * n % tuple([self.epoch + 1, t] + vals)).rstrip(",") + "\n")
|
744
744
|
|
745
745
|
def plot_metrics(self):
|
746
|
-
"""Plot
|
747
|
-
|
746
|
+
"""Plot metrics from a CSV file."""
|
747
|
+
plot_results(file=self.csv, on_plot=self.on_plot) # save results.png
|
748
748
|
|
749
749
|
def on_plot(self, name, data=None):
|
750
750
|
"""Register plots (e.g. to be consumed in callbacks)."""
|
ultralytics/engine/tuner.py
CHANGED
@@ -16,6 +16,7 @@ Examples:
|
|
16
16
|
|
17
17
|
from __future__ import annotations
|
18
18
|
|
19
|
+
import gc
|
19
20
|
import random
|
20
21
|
import shutil
|
21
22
|
import subprocess
|
@@ -23,6 +24,7 @@ import time
|
|
23
24
|
from datetime import datetime
|
24
25
|
|
25
26
|
import numpy as np
|
27
|
+
import torch
|
26
28
|
|
27
29
|
from ultralytics.cfg import get_cfg, get_save_dir
|
28
30
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, YAML, callbacks, colorstr, remove_colorstr
|
@@ -97,7 +99,7 @@ class Tuner:
|
|
97
99
|
"warmup_epochs": (0.0, 5.0), # warmup epochs (fractions ok)
|
98
100
|
"warmup_momentum": (0.0, 0.95), # warmup initial momentum
|
99
101
|
"box": (1.0, 20.0), # box loss gain
|
100
|
-
"cls": (0.
|
102
|
+
"cls": (0.1, 4.0), # cls loss gain (scale with pixels)
|
101
103
|
"dfl": (0.4, 6.0), # dfl loss gain
|
102
104
|
"hsv_h": (0.0, 0.1), # image HSV-Hue augmentation (fraction)
|
103
105
|
"hsv_s": (0.0, 0.9), # image HSV-Saturation augmentation (fraction)
|
@@ -114,6 +116,7 @@ class Tuner:
|
|
114
116
|
"mixup": (0.0, 1.0), # image mixup (probability)
|
115
117
|
"cutmix": (0.0, 1.0), # image cutmix (probability)
|
116
118
|
"copy_paste": (0.0, 1.0), # segment copy-paste (probability)
|
119
|
+
"close_mosaic": (0.0, 10.0), # close dataloader mosaic (epochs)
|
117
120
|
}
|
118
121
|
mongodb_uri = args.pop("mongodb_uri", None)
|
119
122
|
mongodb_db = args.pop("mongodb_db", "ultralytics")
|
@@ -266,19 +269,31 @@ class Tuner:
|
|
266
269
|
except Exception as e:
|
267
270
|
LOGGER.warning(f"{self.prefix}MongoDB to CSV sync failed: {e}")
|
268
271
|
|
272
|
+
def _crossover(self, x: np.ndarray, alpha: float = 0.2, k: int = 9) -> np.ndarray:
|
273
|
+
"""BLX-α crossover from up to top-k parents (x[:,0]=fitness, rest=genes)."""
|
274
|
+
k = min(k, len(x))
|
275
|
+
# fitness weights (shifted to >0); fallback to uniform if degenerate
|
276
|
+
weights = x[:, 0] - x[:, 0].min() + 1e-6
|
277
|
+
if not np.isfinite(weights).all() or weights.sum() == 0:
|
278
|
+
weights = np.ones_like(weights)
|
279
|
+
idxs = random.choices(range(len(x)), weights=weights, k=k)
|
280
|
+
parents_mat = np.stack([x[i][1:] for i in idxs], 0) # (k, ng) strip fitness
|
281
|
+
lo, hi = parents_mat.min(0), parents_mat.max(0)
|
282
|
+
span = hi - lo
|
283
|
+
return np.random.uniform(lo - alpha * span, hi + alpha * span)
|
284
|
+
|
269
285
|
def _mutate(
|
270
286
|
self,
|
271
|
-
|
272
|
-
|
273
|
-
mutation: float = 0.8,
|
287
|
+
n: int = 9,
|
288
|
+
mutation: float = 0.5,
|
274
289
|
sigma: float = 0.2,
|
275
290
|
) -> dict[str, float]:
|
276
291
|
"""
|
277
292
|
Mutate hyperparameters based on bounds and scaling factors specified in `self.space`.
|
278
293
|
|
279
294
|
Args:
|
280
|
-
parent (str): Parent selection method
|
281
|
-
n (int): Number of parents to consider.
|
295
|
+
parent (str): Parent selection method (kept for API compatibility, unused in BLX mode).
|
296
|
+
n (int): Number of top parents to consider.
|
282
297
|
mutation (float): Probability of a parameter mutation in any given iteration.
|
283
298
|
sigma (float): Standard deviation for Gaussian random number generator.
|
284
299
|
|
@@ -293,41 +308,40 @@ class Tuner:
|
|
293
308
|
if results:
|
294
309
|
# MongoDB already sorted by fitness DESC, so results[0] is best
|
295
310
|
x = np.array([[r["fitness"]] + [r["hyperparameters"][k] for k in self.space.keys()] for r in results])
|
296
|
-
n = min(n, len(x))
|
297
311
|
|
298
312
|
# Fall back to CSV if MongoDB unavailable or empty
|
299
313
|
if x is None and self.tune_csv.exists():
|
300
314
|
csv_data = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
|
301
315
|
if len(csv_data) > 0:
|
302
316
|
fitness = csv_data[:, 0] # first column
|
303
|
-
|
304
|
-
x = csv_data[
|
317
|
+
order = np.argsort(-fitness)
|
318
|
+
x = csv_data[order][:n] # top-n sorted by fitness DESC
|
305
319
|
|
306
320
|
# Mutate if we have data, otherwise use defaults
|
307
321
|
if x is not None:
|
308
|
-
|
309
|
-
if parent == "single" or len(x) <= 1:
|
310
|
-
x = x[random.choices(range(n), weights=w)[0]] # weighted selection
|
311
|
-
elif parent == "weighted":
|
312
|
-
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
|
313
|
-
|
314
|
-
# Mutate
|
315
|
-
r = np.random
|
316
|
-
r.seed(int(time.time()))
|
317
|
-
g = np.array([v[2] if len(v) == 3 else 1.0 for v in self.space.values()]) # gains 0-1
|
322
|
+
np.random.seed(int(time.time()))
|
318
323
|
ng = len(self.space)
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
324
|
+
|
325
|
+
# Crossover
|
326
|
+
genes = self._crossover(x)
|
327
|
+
|
328
|
+
# Mutation
|
329
|
+
gains = np.array([v[2] if len(v) == 3 else 1.0 for v in self.space.values()]) # gains 0-1
|
330
|
+
factors = np.ones(ng)
|
331
|
+
while np.all(factors == 1): # mutate until a change occurs (prevent duplicates)
|
332
|
+
mask = np.random.random(ng) < mutation
|
333
|
+
step = np.random.randn(ng) * (sigma * gains)
|
334
|
+
factors = np.where(mask, np.exp(step), 1.0).clip(0.25, 4.0)
|
335
|
+
hyp = {k: float(genes[i] * factors[i]) for i, k in enumerate(self.space.keys())}
|
323
336
|
else:
|
324
337
|
hyp = {k: getattr(self.args, k) for k in self.space.keys()}
|
325
338
|
|
326
339
|
# Constrain to limits
|
327
340
|
for k, bounds in self.space.items():
|
328
|
-
hyp[k] = max(hyp[k], bounds[0])
|
329
|
-
|
330
|
-
|
341
|
+
hyp[k] = round(min(max(hyp[k], bounds[0]), bounds[1]), 5)
|
342
|
+
|
343
|
+
# Update types
|
344
|
+
hyp["close_mosaic"] = int(round(hyp["close_mosaic"]))
|
331
345
|
|
332
346
|
return hyp
|
333
347
|
|
@@ -361,8 +375,12 @@ class Tuner:
|
|
361
375
|
start = x.shape[0]
|
362
376
|
LOGGER.info(f"{self.prefix}Resuming tuning run {self.tune_dir} from iteration {start + 1}...")
|
363
377
|
for i in range(start, iterations):
|
378
|
+
# Linearly decay sigma from 0.2 → 0.1 over first 300 iterations
|
379
|
+
frac = min(i / 300.0, 1.0)
|
380
|
+
sigma_i = 0.2 - 0.1 * frac
|
381
|
+
|
364
382
|
# Mutate hyperparameters
|
365
|
-
mutated_hyp = self._mutate()
|
383
|
+
mutated_hyp = self._mutate(sigma=sigma_i)
|
366
384
|
LOGGER.info(f"{self.prefix}Starting iteration {i + 1}/{iterations} with hyperparameters: {mutated_hyp}")
|
367
385
|
|
368
386
|
metrics = {}
|
@@ -378,6 +396,11 @@ class Tuner:
|
|
378
396
|
metrics = torch_load(ckpt_file)["train_metrics"]
|
379
397
|
assert return_code == 0, "training failed"
|
380
398
|
|
399
|
+
# Cleanup
|
400
|
+
time.sleep(1)
|
401
|
+
gc.collect()
|
402
|
+
torch.cuda.empty_cache()
|
403
|
+
|
381
404
|
except Exception as e:
|
382
405
|
LOGGER.error(f"training failure for hyperparameter tuning iteration {i + 1}\n{e}")
|
383
406
|
|
@@ -403,14 +426,14 @@ class Tuner:
|
|
403
426
|
x = np.loadtxt(self.tune_csv, ndmin=2, delimiter=",", skiprows=1)
|
404
427
|
fitness = x[:, 0] # first column
|
405
428
|
best_idx = fitness.argmax()
|
406
|
-
best_is_current = best_idx == i
|
429
|
+
best_is_current = best_idx == (i - start)
|
407
430
|
if best_is_current:
|
408
|
-
best_save_dir = save_dir
|
431
|
+
best_save_dir = str(save_dir)
|
409
432
|
best_metrics = {k: round(v, 5) for k, v in metrics.items()}
|
410
433
|
for ckpt in weights_dir.glob("*.pt"):
|
411
434
|
shutil.copy2(ckpt, self.tune_dir / "weights")
|
412
435
|
elif cleanup:
|
413
|
-
shutil.rmtree(
|
436
|
+
shutil.rmtree(best_save_dir, ignore_errors=True) # remove iteration dirs to reduce storage space
|
414
437
|
|
415
438
|
# Plot tune results
|
416
439
|
plot_tune_results(str(self.tune_csv))
|
@@ -421,8 +444,7 @@ class Tuner:
|
|
421
444
|
f"{self.prefix}Results saved to {colorstr('bold', self.tune_dir)}\n"
|
422
445
|
f"{self.prefix}Best fitness={fitness[best_idx]} observed at iteration {best_idx + 1}\n"
|
423
446
|
f"{self.prefix}Best fitness metrics are {best_metrics}\n"
|
424
|
-
f"{self.prefix}Best fitness model is {best_save_dir}
|
425
|
-
f"{self.prefix}Best fitness hyperparameters are printed below.\n"
|
447
|
+
f"{self.prefix}Best fitness model is {best_save_dir}"
|
426
448
|
)
|
427
449
|
LOGGER.info("\n" + header)
|
428
450
|
data = {k: float(x[best_idx, i + 1]) for i, k in enumerate(self.space.keys())}
|
@@ -402,7 +402,7 @@ class SAM2MaskDecoder(nn.Module):
|
|
402
402
|
s = 1
|
403
403
|
else:
|
404
404
|
output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
|
405
|
-
output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.
|
405
|
+
output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.shape[0], -1, -1)
|
406
406
|
tokens = torch.cat((output_tokens, sparse_prompt_embeddings), dim=1)
|
407
407
|
|
408
408
|
# Expand per-image data in batch direction to be per-mask
|
@@ -412,7 +412,7 @@ class SAM2MaskDecoder(nn.Module):
|
|
412
412
|
assert image_embeddings.shape[0] == tokens.shape[0]
|
413
413
|
src = image_embeddings
|
414
414
|
src = src + dense_prompt_embeddings
|
415
|
-
assert image_pe.
|
415
|
+
assert image_pe.shape[0] == 1, "image_pe should have size 1 in batch dim (from `get_dense_pe()`)"
|
416
416
|
pos_src = torch.repeat_interleave(image_pe, tokens.shape[0], dim=0)
|
417
417
|
b, c, h, w = src.shape
|
418
418
|
|
@@ -487,7 +487,7 @@ class SAM2MaskDecoder(nn.Module):
|
|
487
487
|
multimask_logits = all_mask_logits[:, 1:, :, :]
|
488
488
|
multimask_iou_scores = all_iou_scores[:, 1:]
|
489
489
|
best_scores_inds = torch.argmax(multimask_iou_scores, dim=-1)
|
490
|
-
batch_inds = torch.arange(multimask_iou_scores.
|
490
|
+
batch_inds = torch.arange(multimask_iou_scores.shape[0], device=all_iou_scores.device)
|
491
491
|
best_multimask_logits = multimask_logits[batch_inds, best_scores_inds]
|
492
492
|
best_multimask_logits = best_multimask_logits.unsqueeze(1)
|
493
493
|
best_multimask_iou_scores = multimask_iou_scores[batch_inds, best_scores_inds]
|
@@ -472,7 +472,7 @@ class SAM2Model(torch.nn.Module):
|
|
472
472
|
... object_score_logits,
|
473
473
|
... ) = results
|
474
474
|
"""
|
475
|
-
B = backbone_features.
|
475
|
+
B = backbone_features.shape[0]
|
476
476
|
device = backbone_features.device
|
477
477
|
assert backbone_features.size(1) == self.sam_prompt_embed_dim
|
478
478
|
assert backbone_features.size(2) == self.sam_image_embedding_size
|
@@ -482,7 +482,7 @@ class SAM2Model(torch.nn.Module):
|
|
482
482
|
if point_inputs is not None:
|
483
483
|
sam_point_coords = point_inputs["point_coords"]
|
484
484
|
sam_point_labels = point_inputs["point_labels"]
|
485
|
-
assert sam_point_coords.
|
485
|
+
assert sam_point_coords.shape[0] == B and sam_point_labels.shape[0] == B
|
486
486
|
else:
|
487
487
|
# If no points are provide, pad with an empty point (with label -1)
|
488
488
|
sam_point_coords = torch.zeros(B, 1, 2, device=device, dtype=backbone_features.dtype)
|
@@ -585,10 +585,10 @@ class SAM2Model(torch.nn.Module):
|
|
585
585
|
antialias=True, # use antialias for downsampling
|
586
586
|
)
|
587
587
|
# a dummy IoU prediction of all 1's under mask input
|
588
|
-
ious = mask_inputs.new_ones(mask_inputs.
|
588
|
+
ious = mask_inputs.new_ones(mask_inputs.shape[0], 1).float()
|
589
589
|
if not self.use_obj_ptrs_in_encoder or backbone_features is None or high_res_features is None:
|
590
590
|
# all zeros as a dummy object pointer (of shape [B, C])
|
591
|
-
obj_ptr = torch.zeros(mask_inputs.
|
591
|
+
obj_ptr = torch.zeros(mask_inputs.shape[0], self.hidden_dim, device=mask_inputs.device)
|
592
592
|
else:
|
593
593
|
# produce an object pointer using the SAM decoder from the mask input
|
594
594
|
_, _, _, _, _, obj_ptr, _ = self._forward_sam_heads(
|
@@ -1006,7 +1006,7 @@ class SAM2Model(torch.nn.Module):
|
|
1006
1006
|
@staticmethod
|
1007
1007
|
def _apply_non_overlapping_constraints(pred_masks):
|
1008
1008
|
"""Apply non-overlapping constraints to masks, keeping the highest scoring object per location."""
|
1009
|
-
batch_size = pred_masks.
|
1009
|
+
batch_size = pred_masks.shape[0]
|
1010
1010
|
if batch_size == 1:
|
1011
1011
|
return pred_masks
|
1012
1012
|
|
@@ -423,7 +423,7 @@ class Predictor(BasePredictor):
|
|
423
423
|
pred_masks.append(crop_masks)
|
424
424
|
pred_bboxes.append(crop_bboxes)
|
425
425
|
pred_scores.append(crop_scores)
|
426
|
-
region_areas.append(area.expand(
|
426
|
+
region_areas.append(area.expand(crop_masks.shape[0]))
|
427
427
|
|
428
428
|
pred_masks = torch.cat(pred_masks)
|
429
429
|
pred_bboxes = torch.cat(pred_bboxes)
|
@@ -504,14 +504,14 @@ class Predictor(BasePredictor):
|
|
504
504
|
# (N, 1, H, W), (N, 1)
|
505
505
|
pred_masks, pred_scores = preds[:2]
|
506
506
|
pred_bboxes = preds[2] if self.segment_all else None
|
507
|
-
names = dict(enumerate(str(i) for i in range(
|
507
|
+
names = dict(enumerate(str(i) for i in range(pred_masks.shape[0])))
|
508
508
|
|
509
509
|
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
|
510
510
|
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
|
511
511
|
|
512
512
|
results = []
|
513
513
|
for masks, orig_img, img_path in zip([pred_masks], orig_imgs, self.batch[0]):
|
514
|
-
if
|
514
|
+
if masks.shape[0] == 0:
|
515
515
|
masks, pred_bboxes = None, torch.zeros((0, 6), device=pred_masks.device)
|
516
516
|
else:
|
517
517
|
masks = ops.scale_masks(masks[None].float(), orig_img.shape[:2], padding=False)[0]
|
@@ -521,7 +521,7 @@ class Predictor(BasePredictor):
|
|
521
521
|
else:
|
522
522
|
pred_bboxes = batched_mask_to_box(masks)
|
523
523
|
# NOTE: SAM models do not return cls info. This `cls` here is just a placeholder for consistency.
|
524
|
-
cls = torch.arange(
|
524
|
+
cls = torch.arange(pred_masks.shape[0], dtype=torch.int32, device=pred_masks.device)
|
525
525
|
idx = pred_scores > self.args.conf
|
526
526
|
pred_bboxes = torch.cat([pred_bboxes, pred_scores[:, None], cls[:, None]], dim=-1)[idx]
|
527
527
|
masks = masks[idx]
|
@@ -633,7 +633,7 @@ class Predictor(BasePredictor):
|
|
633
633
|
"""
|
634
634
|
import torchvision # scope for faster 'import ultralytics'
|
635
635
|
|
636
|
-
if
|
636
|
+
if masks.shape[0] == 0:
|
637
637
|
return masks
|
638
638
|
|
639
639
|
# Filter small disconnected regions and holes
|
@@ -693,14 +693,14 @@ class Predictor(BasePredictor):
|
|
693
693
|
dst_shape = dst_shape or (self.args.imgsz, self.args.imgsz)
|
694
694
|
prompts = self._prepare_prompts(dst_shape, src_shape, bboxes, points, labels, masks)
|
695
695
|
pred_masks, pred_scores = self._inference_features(features, *prompts, multimask_output)
|
696
|
-
if
|
696
|
+
if pred_masks.shape[0] == 0:
|
697
697
|
pred_masks, pred_bboxes = None, torch.zeros((0, 6), device=pred_masks.device)
|
698
698
|
else:
|
699
699
|
pred_masks = ops.scale_masks(pred_masks[None].float(), src_shape, padding=False)[0]
|
700
700
|
pred_masks = pred_masks > self.model.mask_threshold # to bool
|
701
701
|
pred_bboxes = batched_mask_to_box(pred_masks)
|
702
702
|
# NOTE: SAM models do not return cls info. This `cls` here is just a placeholder for consistency.
|
703
|
-
cls = torch.arange(
|
703
|
+
cls = torch.arange(pred_masks.shape[0], dtype=torch.int32, device=pred_masks.device)
|
704
704
|
pred_bboxes = torch.cat([pred_bboxes, pred_scores[:, None], cls[:, None]], dim=-1)
|
705
705
|
return pred_masks, pred_bboxes
|
706
706
|
|
@@ -770,7 +770,7 @@ class SAM2Predictor(Predictor):
|
|
770
770
|
bboxes, points, labels, masks = super()._prepare_prompts(dst_shape, src_shape, bboxes, points, labels, masks)
|
771
771
|
if bboxes is not None:
|
772
772
|
bboxes = bboxes.view(-1, 2, 2)
|
773
|
-
bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(
|
773
|
+
bbox_labels = torch.tensor([[2, 3]], dtype=torch.int32, device=bboxes.device).expand(bboxes.shape[0], -1)
|
774
774
|
# NOTE: merge "boxes" and "points" into a single "points" input
|
775
775
|
# (where boxes are added at the beginning) to model.sam_prompt_encoder
|
776
776
|
if points is not None:
|
@@ -1025,7 +1025,7 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
1025
1025
|
pred_masks = current_out["pred_masks"].flatten(0, 1)
|
1026
1026
|
pred_masks = pred_masks[(pred_masks > self.model.mask_threshold).sum((1, 2)) > 0] # filter blank masks
|
1027
1027
|
|
1028
|
-
return pred_masks, torch.ones(
|
1028
|
+
return pred_masks, torch.ones(pred_masks.shape[0], dtype=pred_masks.dtype, device=pred_masks.device)
|
1029
1029
|
|
1030
1030
|
def postprocess(self, preds, img, orig_imgs):
|
1031
1031
|
"""
|
@@ -1465,7 +1465,7 @@ class SAM2VideoPredictor(SAM2Predictor):
|
|
1465
1465
|
else:
|
1466
1466
|
maskmem_pos_enc = model_constants["maskmem_pos_enc"]
|
1467
1467
|
# expand the cached maskmem_pos_enc to the actual batch size
|
1468
|
-
batch_size = out_maskmem_pos_enc[0].
|
1468
|
+
batch_size = out_maskmem_pos_enc[0].shape[0]
|
1469
1469
|
if batch_size > 1:
|
1470
1470
|
out_maskmem_pos_enc = [x.expand(batch_size, -1, -1, -1) for x in maskmem_pos_enc]
|
1471
1471
|
return out_maskmem_pos_enc
|
@@ -2028,7 +2028,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
|
|
2028
2028
|
point_inputs={"point_coords": point, "point_labels": label} if obj_idx is not None else None,
|
2029
2029
|
mask_inputs=mask,
|
2030
2030
|
multimask_output=False,
|
2031
|
-
high_res_features=[feat[: pix_feat_with_mem.
|
2031
|
+
high_res_features=[feat[: pix_feat_with_mem.shape[0]] for feat in self.high_res_features],
|
2032
2032
|
)
|
2033
2033
|
return {
|
2034
2034
|
"pred_masks": low_res_masks,
|
@@ -12,7 +12,7 @@ from ultralytics.engine.trainer import BaseTrainer
|
|
12
12
|
from ultralytics.models import yolo
|
13
13
|
from ultralytics.nn.tasks import ClassificationModel
|
14
14
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
|
15
|
-
from ultralytics.utils.plotting import plot_images
|
15
|
+
from ultralytics.utils.plotting import plot_images
|
16
16
|
from ultralytics.utils.torch_utils import is_parallel, strip_optimizer, torch_distributed_zero_first
|
17
17
|
|
18
18
|
|
@@ -39,7 +39,6 @@ class ClassificationTrainer(BaseTrainer):
|
|
39
39
|
progress_string: Return a formatted string showing training progress.
|
40
40
|
get_validator: Return an instance of ClassificationValidator.
|
41
41
|
label_loss_items: Return a loss dict with labelled training loss items.
|
42
|
-
plot_metrics: Plot metrics from a CSV file.
|
43
42
|
final_eval: Evaluate trained model and save validation results.
|
44
43
|
plot_training_samples: Plot training samples with their annotations.
|
45
44
|
|
@@ -195,10 +194,6 @@ class ClassificationTrainer(BaseTrainer):
|
|
195
194
|
loss_items = [round(float(loss_items), 5)]
|
196
195
|
return dict(zip(keys, loss_items))
|
197
196
|
|
198
|
-
def plot_metrics(self):
|
199
|
-
"""Plot metrics from a CSV file."""
|
200
|
-
plot_results(file=self.csv, classify=True, on_plot=self.on_plot) # save results.png
|
201
|
-
|
202
197
|
def final_eval(self):
|
203
198
|
"""Evaluate trained model and save validation results."""
|
204
199
|
for f in self.last, self.best:
|
@@ -220,7 +215,7 @@ class ClassificationTrainer(BaseTrainer):
|
|
220
215
|
batch (dict[str, torch.Tensor]): Batch containing images and class labels.
|
221
216
|
ni (int): Number of iterations.
|
222
217
|
"""
|
223
|
-
batch["batch_idx"] = torch.arange(
|
218
|
+
batch["batch_idx"] = torch.arange(batch["img"].shape[0]) # add batch index for plotting
|
224
219
|
plot_images(
|
225
220
|
labels=batch,
|
226
221
|
fname=self.save_dir / f"train_batch{ni}.jpg",
|