dgenerate-ultralytics-headless 8.4.6__py3-none-any.whl → 8.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/METADATA +3 -3
- {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/RECORD +37 -36
- {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/WHEEL +1 -1
- tests/test_cli.py +10 -3
- tests/test_exports.py +64 -43
- tests/test_python.py +40 -11
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +5 -4
- ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
- ultralytics/cfg/default.yaml +2 -1
- ultralytics/data/augment.py +8 -0
- ultralytics/data/converter.py +32 -9
- ultralytics/data/utils.py +2 -2
- ultralytics/engine/exporter.py +10 -6
- ultralytics/engine/predictor.py +5 -0
- ultralytics/engine/trainer.py +6 -4
- ultralytics/engine/tuner.py +2 -2
- ultralytics/engine/validator.py +5 -0
- ultralytics/models/sam/predict.py +2 -2
- ultralytics/models/yolo/classify/train.py +14 -1
- ultralytics/models/yolo/detect/train.py +8 -4
- ultralytics/models/yolo/pose/train.py +2 -1
- ultralytics/models/yolo/world/train_world.py +21 -1
- ultralytics/models/yolo/yoloe/train.py +1 -2
- ultralytics/nn/autobackend.py +1 -1
- ultralytics/nn/modules/head.py +13 -2
- ultralytics/nn/tasks.py +18 -0
- ultralytics/solutions/security_alarm.py +1 -1
- ultralytics/utils/benchmarks.py +3 -9
- ultralytics/utils/callbacks/wb.py +6 -1
- ultralytics/utils/loss.py +18 -9
- ultralytics/utils/patches.py +42 -0
- ultralytics/utils/tal.py +15 -5
- ultralytics/utils/torch_utils.py +1 -1
- {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/entry_points.txt +0 -0
- {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/licenses/LICENSE +0 -0
- {dgenerate_ultralytics_headless-8.4.6.dist-info → dgenerate_ultralytics_headless-8.4.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
|
+
|
|
3
|
+
# COCO12-Formats dataset (12 images testing all supported image formats) by Ultralytics
|
|
4
|
+
# Documentation: https://docs.ultralytics.com/datasets/detect/coco12-formats/
|
|
5
|
+
# Example usage: yolo train data=coco12-formats.yaml
|
|
6
|
+
# parent
|
|
7
|
+
# ├── ultralytics
|
|
8
|
+
# └── datasets
|
|
9
|
+
# └── coco12-formats ← downloads here (1 MB)
|
|
10
|
+
|
|
11
|
+
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
|
12
|
+
path: coco12-formats # dataset root dir
|
|
13
|
+
train: images/train # train images (relative to 'path') 6 images
|
|
14
|
+
val: images/val # val images (relative to 'path') 6 images
|
|
15
|
+
test: # test images (optional)
|
|
16
|
+
|
|
17
|
+
# Classes
|
|
18
|
+
names:
|
|
19
|
+
0: person
|
|
20
|
+
1: bicycle
|
|
21
|
+
2: car
|
|
22
|
+
3: motorcycle
|
|
23
|
+
4: airplane
|
|
24
|
+
5: bus
|
|
25
|
+
6: train
|
|
26
|
+
7: truck
|
|
27
|
+
8: boat
|
|
28
|
+
9: traffic light
|
|
29
|
+
10: fire hydrant
|
|
30
|
+
11: stop sign
|
|
31
|
+
12: parking meter
|
|
32
|
+
13: bench
|
|
33
|
+
14: bird
|
|
34
|
+
15: cat
|
|
35
|
+
16: dog
|
|
36
|
+
17: horse
|
|
37
|
+
18: sheep
|
|
38
|
+
19: cow
|
|
39
|
+
20: elephant
|
|
40
|
+
21: bear
|
|
41
|
+
22: zebra
|
|
42
|
+
23: giraffe
|
|
43
|
+
24: backpack
|
|
44
|
+
25: umbrella
|
|
45
|
+
26: handbag
|
|
46
|
+
27: tie
|
|
47
|
+
28: suitcase
|
|
48
|
+
29: frisbee
|
|
49
|
+
30: skis
|
|
50
|
+
31: snowboard
|
|
51
|
+
32: sports ball
|
|
52
|
+
33: kite
|
|
53
|
+
34: baseball bat
|
|
54
|
+
35: baseball glove
|
|
55
|
+
36: skateboard
|
|
56
|
+
37: surfboard
|
|
57
|
+
38: tennis racket
|
|
58
|
+
39: bottle
|
|
59
|
+
40: wine glass
|
|
60
|
+
41: cup
|
|
61
|
+
42: fork
|
|
62
|
+
43: knife
|
|
63
|
+
44: spoon
|
|
64
|
+
45: bowl
|
|
65
|
+
46: banana
|
|
66
|
+
47: apple
|
|
67
|
+
48: sandwich
|
|
68
|
+
49: orange
|
|
69
|
+
50: broccoli
|
|
70
|
+
51: carrot
|
|
71
|
+
52: hot dog
|
|
72
|
+
53: pizza
|
|
73
|
+
54: donut
|
|
74
|
+
55: cake
|
|
75
|
+
56: chair
|
|
76
|
+
57: couch
|
|
77
|
+
58: potted plant
|
|
78
|
+
59: bed
|
|
79
|
+
60: dining table
|
|
80
|
+
61: toilet
|
|
81
|
+
62: tv
|
|
82
|
+
63: laptop
|
|
83
|
+
64: mouse
|
|
84
|
+
65: remote
|
|
85
|
+
66: keyboard
|
|
86
|
+
67: cell phone
|
|
87
|
+
68: microwave
|
|
88
|
+
69: oven
|
|
89
|
+
70: toaster
|
|
90
|
+
71: sink
|
|
91
|
+
72: refrigerator
|
|
92
|
+
73: book
|
|
93
|
+
74: clock
|
|
94
|
+
75: vase
|
|
95
|
+
76: scissors
|
|
96
|
+
77: teddy bear
|
|
97
|
+
78: hair drier
|
|
98
|
+
79: toothbrush
|
|
99
|
+
|
|
100
|
+
# Download script/URL (optional)
|
|
101
|
+
download: https://github.com/ultralytics/assets/releases/download/v0.0.0/coco12-formats.zip
|
ultralytics/cfg/default.yaml
CHANGED
|
@@ -36,7 +36,7 @@ amp: True # (bool) Automatic Mixed Precision (AMP) training; True runs AMP capab
|
|
|
36
36
|
fraction: 1.0 # (float) fraction of training dataset to use (1.0 = all)
|
|
37
37
|
profile: False # (bool) profile ONNX/TensorRT speeds during training for loggers
|
|
38
38
|
freeze: # (int | list, optional) freeze first N layers (int) or specific layer indices (list)
|
|
39
|
-
multi_scale: 0.0 # (float)
|
|
39
|
+
multi_scale: 0.0 # (float) multi-scale range as a fraction of imgsz; sizes are rounded to stride multiples
|
|
40
40
|
compile: False # (bool | str) enable torch.compile() backend='inductor'; True="default", False=off, or "default|reduce-overhead|max-autotune-no-cudagraphs"
|
|
41
41
|
|
|
42
42
|
# Segmentation
|
|
@@ -56,6 +56,7 @@ max_det: 300 # (int) maximum number of detections per image
|
|
|
56
56
|
half: False # (bool) use half precision (FP16) if supported
|
|
57
57
|
dnn: False # (bool) use OpenCV DNN for ONNX inference
|
|
58
58
|
plots: True # (bool) save plots and images during train/val
|
|
59
|
+
end2end: # (bool, optional) whether to use end2end head(YOLO26, YOLOv10) for predict/val/export
|
|
59
60
|
|
|
60
61
|
# Predict settings -----------------------------------------------------------------------------------------------------
|
|
61
62
|
source: # (str, optional) path/dir/URL/stream for images or videos; e.g. 'ultralytics/assets' or '0' for webcam
|
ultralytics/data/augment.py
CHANGED
|
@@ -2066,7 +2066,15 @@ class Format:
|
|
|
2066
2066
|
if self.mask_overlap:
|
|
2067
2067
|
sem_masks = cls_tensor[masks[0].long() - 1] # (H, W) from (1, H, W) instance indices
|
|
2068
2068
|
else:
|
|
2069
|
+
# Create sem_masks consistent with mask_overlap=True
|
|
2069
2070
|
sem_masks = (masks * cls_tensor[:, None, None]).max(0).values # (H, W) from (N, H, W) binary
|
|
2071
|
+
overlap = masks.sum(dim=0) > 1 # (H, W)
|
|
2072
|
+
if overlap.any():
|
|
2073
|
+
weights = masks.sum(axis=(1, 2))
|
|
2074
|
+
weighted_masks = masks * weights[:, None, None] # (N, H, W)
|
|
2075
|
+
weighted_masks[masks == 0] = weights.max() + 1 # handle background
|
|
2076
|
+
smallest_idx = weighted_masks.argmin(dim=0) # (H, W)
|
|
2077
|
+
sem_masks[overlap] = cls_tensor[smallest_idx[overlap]]
|
|
2070
2078
|
else:
|
|
2071
2079
|
masks = torch.zeros(
|
|
2072
2080
|
1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio
|
ultralytics/data/converter.py
CHANGED
|
@@ -796,6 +796,17 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
|
|
|
796
796
|
# Check if this is a classification dataset
|
|
797
797
|
is_classification = dataset_record.get("task") == "classify"
|
|
798
798
|
class_names = {int(k): v for k, v in dataset_record.get("class_names", {}).items()}
|
|
799
|
+
len(class_names)
|
|
800
|
+
|
|
801
|
+
# Validate required fields before downloading images
|
|
802
|
+
task = dataset_record.get("task", "detect")
|
|
803
|
+
if not is_classification:
|
|
804
|
+
if "train" not in splits:
|
|
805
|
+
raise ValueError(f"Dataset missing required 'train' split. Found splits: {sorted(splits)}")
|
|
806
|
+
if "val" not in splits and "test" not in splits:
|
|
807
|
+
raise ValueError(f"Dataset missing required 'val' split. Found splits: {sorted(splits)}")
|
|
808
|
+
if task == "pose" and "kpt_shape" not in dataset_record:
|
|
809
|
+
raise ValueError("Pose dataset missing required 'kpt_shape'. See https://docs.ultralytics.com/datasets/pose/")
|
|
799
810
|
|
|
800
811
|
# Create base directories
|
|
801
812
|
dataset_dir.mkdir(parents=True, exist_ok=True)
|
|
@@ -838,14 +849,19 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
|
|
|
838
849
|
if http_url := record.get("url"):
|
|
839
850
|
if not image_path.exists():
|
|
840
851
|
image_path.parent.mkdir(parents=True, exist_ok=True)
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
852
|
+
# Retry with exponential backoff (3 attempts: 0s, 2s, 4s delays)
|
|
853
|
+
for attempt in range(3):
|
|
854
|
+
try:
|
|
855
|
+
async with session.get(http_url, timeout=aiohttp.ClientTimeout(total=30)) as response:
|
|
856
|
+
response.raise_for_status()
|
|
857
|
+
image_path.write_bytes(await response.read())
|
|
858
|
+
return True
|
|
859
|
+
except Exception as e:
|
|
860
|
+
if attempt < 2: # Don't sleep after last attempt
|
|
861
|
+
await asyncio.sleep(2**attempt) # 1s, 2s backoff
|
|
862
|
+
else:
|
|
863
|
+
LOGGER.warning(f"Failed to download {http_url} after 3 attempts: {e}")
|
|
864
|
+
return False
|
|
849
865
|
return True
|
|
850
866
|
|
|
851
867
|
# Process all images with async downloads (limit connections for small datasets)
|
|
@@ -861,9 +877,16 @@ async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Pat
|
|
|
861
877
|
pbar.update(1)
|
|
862
878
|
return result
|
|
863
879
|
|
|
864
|
-
await asyncio.gather(*[tracked_process(record) for record in image_records])
|
|
880
|
+
results = await asyncio.gather(*[tracked_process(record) for record in image_records])
|
|
865
881
|
pbar.close()
|
|
866
882
|
|
|
883
|
+
# Validate images were downloaded successfully
|
|
884
|
+
success_count = sum(1 for r in results if r)
|
|
885
|
+
if success_count == 0:
|
|
886
|
+
raise RuntimeError(f"Failed to download any images from {ndjson_path}. Check network connection and URLs.")
|
|
887
|
+
if success_count < len(image_records):
|
|
888
|
+
LOGGER.warning(f"Downloaded {success_count}/{len(image_records)} images from {ndjson_path}")
|
|
889
|
+
|
|
867
890
|
if is_classification:
|
|
868
891
|
# Classification: return dataset directory (check_cls_dataset expects a directory path)
|
|
869
892
|
return dataset_dir
|
ultralytics/data/utils.py
CHANGED
|
@@ -37,8 +37,8 @@ from ultralytics.utils.downloads import download, safe_download, unzip_file
|
|
|
37
37
|
from ultralytics.utils.ops import segments2boxes
|
|
38
38
|
|
|
39
39
|
HELP_URL = "See https://docs.ultralytics.com/datasets for dataset formatting guidance."
|
|
40
|
-
IMG_FORMATS = {"bmp", "dng", "
|
|
41
|
-
VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"} #
|
|
40
|
+
IMG_FORMATS = {"avif", "bmp", "dng", "heic", "jp2", "jpeg", "jpeg2000", "jpg", "mpo", "png", "tif", "tiff", "webp"}
|
|
41
|
+
VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"} # videos
|
|
42
42
|
FORMATS_HELP_MSG = f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
|
|
43
43
|
|
|
44
44
|
|
ultralytics/engine/exporter.py
CHANGED
|
@@ -404,6 +404,13 @@ class Exporter:
|
|
|
404
404
|
if not hasattr(model, "names"):
|
|
405
405
|
model.names = default_class_names()
|
|
406
406
|
model.names = check_class_names(model.names)
|
|
407
|
+
if hasattr(model, "end2end"):
|
|
408
|
+
if self.args.end2end is not None:
|
|
409
|
+
model.end2end = self.args.end2end
|
|
410
|
+
if rknn or ncnn or executorch or paddle or imx:
|
|
411
|
+
# Disable end2end branch for certain export formats as they does not support topk
|
|
412
|
+
model.end2end = False
|
|
413
|
+
LOGGER.warning(f"{fmt.upper()} export does not support end2end models, disabling end2end branch.")
|
|
407
414
|
if self.args.half and self.args.int8:
|
|
408
415
|
LOGGER.warning("half=True and int8=True are mutually exclusive, setting half=False.")
|
|
409
416
|
self.args.half = False
|
|
@@ -463,9 +470,6 @@ class Exporter:
|
|
|
463
470
|
)
|
|
464
471
|
if tfjs and (ARM64 and LINUX):
|
|
465
472
|
raise SystemError("TF.js exports are not currently supported on ARM64 Linux")
|
|
466
|
-
if ncnn and hasattr(model.model[-1], "one2one_cv2"):
|
|
467
|
-
del model.model[-1].one2one_cv2 # Disable end2end branch for NCNN export as it does not support topk
|
|
468
|
-
LOGGER.warning("NCNN export does not support end2end models, disabling end2end branch.")
|
|
469
473
|
# Recommend OpenVINO if export and Intel CPU
|
|
470
474
|
if SETTINGS.get("openvino_msg"):
|
|
471
475
|
if is_intel():
|
|
@@ -509,6 +513,7 @@ class Exporter:
|
|
|
509
513
|
# Clamp max_det to anchor count for small image sizes (required for TensorRT compatibility)
|
|
510
514
|
anchors = sum(int(self.imgsz[0] / s) * int(self.imgsz[1] / s) for s in model.stride.tolist())
|
|
511
515
|
m.max_det = min(self.args.max_det, anchors)
|
|
516
|
+
m.agnostic_nms = self.args.agnostic_nms
|
|
512
517
|
m.xyxy = self.args.nms and not coreml
|
|
513
518
|
m.shape = None # reset cached shape for new export input size
|
|
514
519
|
if hasattr(model, "pe") and hasattr(m, "fuse"): # for YOLOE models
|
|
@@ -549,6 +554,7 @@ class Exporter:
|
|
|
549
554
|
"names": model.names,
|
|
550
555
|
"args": {k: v for k, v in self.args if k in fmt_keys},
|
|
551
556
|
"channels": model.yaml.get("channels", 3),
|
|
557
|
+
"end2end": getattr(model, "end2end", False),
|
|
552
558
|
} # model metadata
|
|
553
559
|
if dla is not None:
|
|
554
560
|
self.metadata["dla"] = dla # make sure `AutoBackend` uses correct dla device if it has one
|
|
@@ -556,8 +562,6 @@ class Exporter:
|
|
|
556
562
|
self.metadata["kpt_shape"] = model.model[-1].kpt_shape
|
|
557
563
|
if hasattr(model, "kpt_names"):
|
|
558
564
|
self.metadata["kpt_names"] = model.kpt_names
|
|
559
|
-
if getattr(model.model[-1], "end2end", False):
|
|
560
|
-
self.metadata["end2end"] = True
|
|
561
565
|
|
|
562
566
|
LOGGER.info(
|
|
563
567
|
f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
|
|
@@ -1045,7 +1049,7 @@ class Exporter:
|
|
|
1045
1049
|
"onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package
|
|
1046
1050
|
"ai-edge-litert>=1.2.0" + (",<1.4.0" if MACOS else ""), # required by 'onnx2tf' package
|
|
1047
1051
|
"onnx>=1.12.0,<2.0.0",
|
|
1048
|
-
"onnx2tf>=1.26.3",
|
|
1052
|
+
"onnx2tf>=1.26.3,<1.29.0", # pin to avoid h5py build issues on aarch64
|
|
1049
1053
|
"onnxslim>=0.1.71",
|
|
1050
1054
|
"onnxruntime-gpu" if cuda else "onnxruntime",
|
|
1051
1055
|
"protobuf>=5",
|
ultralytics/engine/predictor.py
CHANGED
|
@@ -387,6 +387,11 @@ class BasePredictor:
|
|
|
387
387
|
model (str | Path | torch.nn.Module, optional): Model to load or use.
|
|
388
388
|
verbose (bool): Whether to print verbose output.
|
|
389
389
|
"""
|
|
390
|
+
if hasattr(model, "end2end"):
|
|
391
|
+
if self.args.end2end is not None:
|
|
392
|
+
model.end2end = self.args.end2end
|
|
393
|
+
if model.end2end:
|
|
394
|
+
model.set_head_attr(max_det=self.args.max_det, agnostic_nms=self.args.agnostic_nms)
|
|
390
395
|
self.model = AutoBackend(
|
|
391
396
|
model=model or self.args.model,
|
|
392
397
|
device=select_device(self.args.device, verbose=verbose),
|
ultralytics/engine/trainer.py
CHANGED
|
@@ -948,7 +948,7 @@ class BaseTrainer:
|
|
|
948
948
|
)
|
|
949
949
|
nc = self.data.get("nc", 10) # number of classes
|
|
950
950
|
lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places
|
|
951
|
-
name, lr, momentum = ("MuSGD", 0.01 if iterations > 10000 else lr_fit, 0.9)
|
|
951
|
+
name, lr, momentum = ("MuSGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
|
|
952
952
|
self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam
|
|
953
953
|
|
|
954
954
|
use_muon = name == "MuSGD"
|
|
@@ -981,16 +981,18 @@ class BaseTrainer:
|
|
|
981
981
|
"Request support for addition optimizers at https://github.com/ultralytics/ultralytics."
|
|
982
982
|
)
|
|
983
983
|
|
|
984
|
+
num_params = [len(g[0]), len(g[1]), len(g[2])] # number of param groups
|
|
984
985
|
g[2] = {"params": g[2], **optim_args, "param_group": "bias"}
|
|
985
986
|
g[0] = {"params": g[0], **optim_args, "weight_decay": decay, "param_group": "weight"}
|
|
986
987
|
g[1] = {"params": g[1], **optim_args, "weight_decay": 0.0, "param_group": "bn"}
|
|
987
|
-
muon, sgd = (0.
|
|
988
|
+
muon, sgd = (0.2, 1.0)
|
|
988
989
|
if use_muon:
|
|
990
|
+
num_params[0] = len(g[3]) # update number of params
|
|
989
991
|
g[3] = {"params": g[3], **optim_args, "weight_decay": decay, "use_muon": True, "param_group": "muon"}
|
|
990
992
|
import re
|
|
991
993
|
|
|
992
994
|
# higher lr for certain parameters in MuSGD when funetuning
|
|
993
|
-
pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg
|
|
995
|
+
pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg")
|
|
994
996
|
g_ = [] # new param groups
|
|
995
997
|
for x in g:
|
|
996
998
|
p = x.pop("params")
|
|
@@ -1002,6 +1004,6 @@ class BaseTrainer:
|
|
|
1002
1004
|
|
|
1003
1005
|
LOGGER.info(
|
|
1004
1006
|
f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
|
|
1005
|
-
f"{
|
|
1007
|
+
f"{num_params[1]} weight(decay=0.0), {num_params[0]} weight(decay={decay}), {num_params[2]} bias(decay=0.0)"
|
|
1006
1008
|
)
|
|
1007
1009
|
return optimizer
|
ultralytics/engine/tuner.py
CHANGED
|
@@ -26,7 +26,7 @@ from datetime import datetime
|
|
|
26
26
|
import numpy as np
|
|
27
27
|
import torch
|
|
28
28
|
|
|
29
|
-
from ultralytics.cfg import get_cfg, get_save_dir
|
|
29
|
+
from ultralytics.cfg import CFG_INT_KEYS, get_cfg, get_save_dir
|
|
30
30
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, YAML, callbacks, colorstr, remove_colorstr
|
|
31
31
|
from ultralytics.utils.checks import check_requirements
|
|
32
32
|
from ultralytics.utils.patches import torch_load
|
|
@@ -448,7 +448,7 @@ class Tuner:
|
|
|
448
448
|
f"{self.prefix}Best fitness model is {best_save_dir}"
|
|
449
449
|
)
|
|
450
450
|
LOGGER.info("\n" + header)
|
|
451
|
-
data = {k:
|
|
451
|
+
data = {k: int(v) if k in CFG_INT_KEYS else float(v) for k, v in zip(self.space.keys(), x[best_idx, 1:])}
|
|
452
452
|
YAML.save(
|
|
453
453
|
self.tune_dir / "best_hyperparameters.yaml",
|
|
454
454
|
data=data,
|
ultralytics/engine/validator.py
CHANGED
|
@@ -156,6 +156,11 @@ class BaseValidator:
|
|
|
156
156
|
if str(self.args.model).endswith(".yaml") and model is None:
|
|
157
157
|
LOGGER.warning("validating an untrained model YAML will result in 0 mAP.")
|
|
158
158
|
callbacks.add_integration_callbacks(self)
|
|
159
|
+
if hasattr(model, "end2end"):
|
|
160
|
+
if self.args.end2end is not None:
|
|
161
|
+
model.end2end = self.args.end2end
|
|
162
|
+
if model.end2end:
|
|
163
|
+
model.set_head_attr(max_det=self.args.max_det, agnostic_nms=self.args.agnostic_nms)
|
|
159
164
|
model = AutoBackend(
|
|
160
165
|
model=model or self.args.model,
|
|
161
166
|
device=select_device(self.args.device) if RANK == -1 else torch.device("cuda", RANK),
|
|
@@ -2619,6 +2619,7 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
|
|
|
2619
2619
|
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
|
|
2620
2620
|
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
|
|
2621
2621
|
|
|
2622
|
+
names = []
|
|
2622
2623
|
if len(curr_obj_ids) == 0:
|
|
2623
2624
|
pred_masks, pred_boxes = None, torch.zeros((0, 7), device=self.device)
|
|
2624
2625
|
else:
|
|
@@ -2656,9 +2657,8 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
|
|
|
2656
2657
|
background_value=0,
|
|
2657
2658
|
).squeeze(1)
|
|
2658
2659
|
) > 0
|
|
2660
|
+
names = self.model.names or dict(enumerate(str(i) for i in range(pred_boxes[:, 6].int().max())))
|
|
2659
2661
|
|
|
2660
|
-
# names = getattr(self.model, "names", [str(i) for i in range(pred_scores.shape[0])])
|
|
2661
|
-
names = dict(enumerate(str(i) for i in range(pred_boxes.shape[0])))
|
|
2662
2662
|
results = []
|
|
2663
2663
|
for masks, boxes, orig_img, img_path in zip([pred_masks], [pred_boxes], orig_imgs, self.batch[0]):
|
|
2664
2664
|
results.append(Results(orig_img, path=img_path, names=names, masks=masks, boxes=boxes))
|
|
@@ -11,7 +11,7 @@ from ultralytics.data import ClassificationDataset, build_dataloader
|
|
|
11
11
|
from ultralytics.engine.trainer import BaseTrainer
|
|
12
12
|
from ultralytics.models import yolo
|
|
13
13
|
from ultralytics.nn.tasks import ClassificationModel
|
|
14
|
-
from ultralytics.utils import DEFAULT_CFG, RANK
|
|
14
|
+
from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
|
|
15
15
|
from ultralytics.utils.plotting import plot_images
|
|
16
16
|
from ultralytics.utils.torch_utils import is_parallel, torch_distributed_zero_first
|
|
17
17
|
|
|
@@ -138,6 +138,19 @@ class ClassificationTrainer(BaseTrainer):
|
|
|
138
138
|
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
|
139
139
|
dataset = self.build_dataset(dataset_path, mode)
|
|
140
140
|
|
|
141
|
+
# Filter out samples with class indices >= nc (prevents CUDA assertion errors)
|
|
142
|
+
nc = self.data.get("nc", 0)
|
|
143
|
+
dataset_nc = len(dataset.base.classes)
|
|
144
|
+
if nc and dataset_nc > nc:
|
|
145
|
+
extra_classes = dataset.base.classes[nc:]
|
|
146
|
+
original_count = len(dataset.samples)
|
|
147
|
+
dataset.samples = [s for s in dataset.samples if s[1] < nc]
|
|
148
|
+
skipped = original_count - len(dataset.samples)
|
|
149
|
+
LOGGER.warning(
|
|
150
|
+
f"{mode} split has {dataset_nc} classes but model expects {nc}. "
|
|
151
|
+
f"Skipping {skipped} samples from extra classes: {extra_classes}"
|
|
152
|
+
)
|
|
153
|
+
|
|
141
154
|
loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank, drop_last=self.args.compile)
|
|
142
155
|
# Attach inference transforms
|
|
143
156
|
if mode != "train":
|
|
@@ -92,7 +92,7 @@ class DetectionTrainer(BaseTrainer):
|
|
|
92
92
|
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
|
93
93
|
dataset = self.build_dataset(dataset_path, mode, batch_size)
|
|
94
94
|
shuffle = mode == "train"
|
|
95
|
-
if getattr(dataset, "rect", False) and shuffle:
|
|
95
|
+
if getattr(dataset, "rect", False) and shuffle and not np.all(dataset.batch_shapes == dataset.batch_shapes[0]):
|
|
96
96
|
LOGGER.warning("'rect=True' is incompatible with DataLoader shuffle, setting shuffle=False")
|
|
97
97
|
shuffle = False
|
|
98
98
|
return build_dataloader(
|
|
@@ -117,11 +117,13 @@ class DetectionTrainer(BaseTrainer):
|
|
|
117
117
|
if isinstance(v, torch.Tensor):
|
|
118
118
|
batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
|
|
119
119
|
batch["img"] = batch["img"].float() / 255
|
|
120
|
-
|
|
121
|
-
if random.random() < multi_scale:
|
|
120
|
+
if self.args.multi_scale > 0.0:
|
|
122
121
|
imgs = batch["img"]
|
|
123
122
|
sz = (
|
|
124
|
-
random.randrange(
|
|
123
|
+
random.randrange(
|
|
124
|
+
int(self.args.imgsz * (1.0 - self.args.multi_scale)),
|
|
125
|
+
int(self.args.imgsz * (1.0 + self.args.multi_scale) + self.stride),
|
|
126
|
+
)
|
|
125
127
|
// self.stride
|
|
126
128
|
* self.stride
|
|
127
129
|
) # size
|
|
@@ -143,6 +145,8 @@ class DetectionTrainer(BaseTrainer):
|
|
|
143
145
|
self.model.nc = self.data["nc"] # attach number of classes to model
|
|
144
146
|
self.model.names = self.data["names"] # attach class names to model
|
|
145
147
|
self.model.args = self.args # attach hyperparameters to model
|
|
148
|
+
if getattr(self.model, "end2end"):
|
|
149
|
+
self.model.set_head_attr(max_det=self.args.max_det)
|
|
146
150
|
# TODO: self.model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc
|
|
147
151
|
|
|
148
152
|
def get_model(self, cfg: str | None = None, weights: str | None = None, verbose: bool = True):
|
|
@@ -9,6 +9,7 @@ from typing import Any
|
|
|
9
9
|
from ultralytics.models import yolo
|
|
10
10
|
from ultralytics.nn.tasks import PoseModel
|
|
11
11
|
from ultralytics.utils import DEFAULT_CFG
|
|
12
|
+
from ultralytics.utils.torch_utils import unwrap_model
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
@@ -91,7 +92,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
|
91
92
|
def get_validator(self):
|
|
92
93
|
"""Return an instance of the PoseValidator class for validation."""
|
|
93
94
|
self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss"
|
|
94
|
-
if getattr(self.model.model[-1], "flow_model", None) is not None:
|
|
95
|
+
if getattr(unwrap_model(self.model).model[-1], "flow_model", None) is not None:
|
|
95
96
|
self.loss_names += ("rle_loss",)
|
|
96
97
|
return yolo.pose.PoseValidator(
|
|
97
98
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
from pathlib import Path
|
|
4
6
|
|
|
5
7
|
from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset
|
|
6
8
|
from ultralytics.data.utils import check_det_dataset
|
|
7
9
|
from ultralytics.models.yolo.world import WorldTrainer
|
|
8
10
|
from ultralytics.utils import DATASETS_DIR, DEFAULT_CFG, LOGGER
|
|
11
|
+
from ultralytics.utils.checks import check_file
|
|
9
12
|
from ultralytics.utils.torch_utils import unwrap_model
|
|
10
13
|
|
|
11
14
|
|
|
@@ -100,6 +103,23 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
|
100
103
|
self.set_text_embeddings(datasets, batch) # cache text embeddings to accelerate training
|
|
101
104
|
return YOLOConcatDataset(datasets) if len(datasets) > 1 else datasets[0]
|
|
102
105
|
|
|
106
|
+
@staticmethod
|
|
107
|
+
def check_data_config(data: dict | str | Path) -> dict:
|
|
108
|
+
"""Check and load the data configuration from a YAML file or dictionary.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
data (dict | str | Path): Data configuration as a dictionary or path to a YAML file.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
(dict): Data configuration dictionary loaded from YAML file or passed directly.
|
|
115
|
+
"""
|
|
116
|
+
# If string, load from YAML file
|
|
117
|
+
if not isinstance(data, dict):
|
|
118
|
+
from ultralytics.utils import YAML
|
|
119
|
+
|
|
120
|
+
return YAML.load(check_file(data))
|
|
121
|
+
return data
|
|
122
|
+
|
|
103
123
|
def get_dataset(self):
|
|
104
124
|
"""Get train and validation paths from data dictionary.
|
|
105
125
|
|
|
@@ -114,7 +134,7 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
|
114
134
|
AssertionError: If train or validation datasets are not found, or if validation has multiple datasets.
|
|
115
135
|
"""
|
|
116
136
|
final_data = {}
|
|
117
|
-
data_yaml = self.args.data
|
|
137
|
+
self.args.data = data_yaml = self.check_data_config(self.args.data)
|
|
118
138
|
assert data_yaml.get("train", False), "train dataset not found" # object365.yaml
|
|
119
139
|
assert data_yaml.get("val", False), "validation dataset not found" # lvis.yaml
|
|
120
140
|
data = {k: [check_det_dataset(d) for d in v.get("yolo_data", [])] for k, v in data_yaml.items()}
|
|
@@ -196,7 +196,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
|
|
196
196
|
Returns:
|
|
197
197
|
(dict): Dictionary mapping text samples to their embeddings.
|
|
198
198
|
"""
|
|
199
|
-
model =
|
|
199
|
+
model = unwrap_model(self.model).text_model
|
|
200
200
|
cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
|
|
201
201
|
if cache_path.exists():
|
|
202
202
|
LOGGER.info(f"Reading existed cache from '{cache_path}'")
|
|
@@ -204,7 +204,6 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
|
|
204
204
|
if sorted(txt_map.keys()) == sorted(texts):
|
|
205
205
|
return txt_map
|
|
206
206
|
LOGGER.info(f"Caching text embeddings to '{cache_path}'")
|
|
207
|
-
assert self.model is not None
|
|
208
207
|
txt_feats = unwrap_model(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
|
|
209
208
|
txt_map = dict(zip(texts, txt_feats.squeeze(0)))
|
|
210
209
|
torch.save(txt_map, cache_path)
|
ultralytics/nn/autobackend.py
CHANGED
|
@@ -648,7 +648,7 @@ class AutoBackend(nn.Module):
|
|
|
648
648
|
for k, v in metadata.items():
|
|
649
649
|
if k in {"stride", "batch", "channels"}:
|
|
650
650
|
metadata[k] = int(v)
|
|
651
|
-
elif k in {"imgsz", "names", "kpt_shape", "kpt_names", "args"} and isinstance(v, str):
|
|
651
|
+
elif k in {"imgsz", "names", "kpt_shape", "kpt_names", "args", "end2end"} and isinstance(v, str):
|
|
652
652
|
metadata[k] = ast.literal_eval(v)
|
|
653
653
|
stride = metadata["stride"]
|
|
654
654
|
task = metadata["task"]
|
ultralytics/nn/modules/head.py
CHANGED
|
@@ -69,6 +69,7 @@ class Detect(nn.Module):
|
|
|
69
69
|
export = False # export mode
|
|
70
70
|
format = None # export format
|
|
71
71
|
max_det = 300 # max_det
|
|
72
|
+
agnostic_nms = False
|
|
72
73
|
shape = None
|
|
73
74
|
anchors = torch.empty(0) # init
|
|
74
75
|
strides = torch.empty(0) # init
|
|
@@ -125,7 +126,12 @@ class Detect(nn.Module):
|
|
|
125
126
|
@property
|
|
126
127
|
def end2end(self):
|
|
127
128
|
"""Checks if the model has one2one for v5/v5/v8/v9/11 backward compatibility."""
|
|
128
|
-
return hasattr(self, "one2one")
|
|
129
|
+
return getattr(self, "_end2end", True) and hasattr(self, "one2one")
|
|
130
|
+
|
|
131
|
+
@end2end.setter
|
|
132
|
+
def end2end(self, value):
|
|
133
|
+
"""Override the end-to-end detection mode."""
|
|
134
|
+
self._end2end = value
|
|
129
135
|
|
|
130
136
|
def forward_head(
|
|
131
137
|
self, x: list[torch.Tensor], box_head: torch.nn.Module = None, cls_head: torch.nn.Module = None
|
|
@@ -230,6 +236,11 @@ class Detect(nn.Module):
|
|
|
230
236
|
# Use max_det directly during export for TensorRT compatibility (requires k to be constant),
|
|
231
237
|
# otherwise use min(max_det, anchors) for safety with small inputs during Python inference
|
|
232
238
|
k = max_det if self.export else min(max_det, anchors)
|
|
239
|
+
if self.agnostic_nms:
|
|
240
|
+
scores, labels = scores.max(dim=-1, keepdim=True)
|
|
241
|
+
scores, indices = scores.topk(k, dim=1)
|
|
242
|
+
labels = labels.gather(1, indices)
|
|
243
|
+
return scores, labels, indices
|
|
233
244
|
ori_index = scores.max(dim=-1)[0].topk(k)[1].unsqueeze(-1)
|
|
234
245
|
scores = scores.gather(dim=1, index=ori_index.repeat(1, 1, nc))
|
|
235
246
|
scores, index = scores.flatten(1).topk(k)
|
|
@@ -1098,7 +1109,7 @@ class YOLOEDetect(Detect):
|
|
|
1098
1109
|
boxes, scores, index = [], [], []
|
|
1099
1110
|
bs = x[0].shape[0]
|
|
1100
1111
|
cv2 = self.cv2 if not self.end2end else self.one2one_cv2
|
|
1101
|
-
cv3 = self.cv3 if not self.end2end else self.
|
|
1112
|
+
cv3 = self.cv3 if not self.end2end else self.one2one_cv3
|
|
1102
1113
|
for i in range(self.nl):
|
|
1103
1114
|
cls_feat = cv3[i](x[i])
|
|
1104
1115
|
loc_feat = cv2[i](x[i])
|
ultralytics/nn/tasks.py
CHANGED
|
@@ -425,6 +425,24 @@ class DetectionModel(BaseModel):
|
|
|
425
425
|
"""Return whether the model uses end-to-end NMS-free detection."""
|
|
426
426
|
return getattr(self.model[-1], "end2end", False)
|
|
427
427
|
|
|
428
|
+
@end2end.setter
|
|
429
|
+
def end2end(self, value):
|
|
430
|
+
"""Override the end-to-end detection mode."""
|
|
431
|
+
self.set_head_attr(end2end=value)
|
|
432
|
+
|
|
433
|
+
def set_head_attr(self, **kwargs):
|
|
434
|
+
"""Set attributes of the model head (last layer).
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
**kwargs: Arbitrary keyword arguments representing attributes to set.
|
|
438
|
+
"""
|
|
439
|
+
head = self.model[-1]
|
|
440
|
+
for k, v in kwargs.items():
|
|
441
|
+
if not hasattr(head, k):
|
|
442
|
+
LOGGER.warning(f"Head has no attribute '{k}'.")
|
|
443
|
+
continue
|
|
444
|
+
setattr(head, k, v)
|
|
445
|
+
|
|
428
446
|
def _predict_augment(self, x):
|
|
429
447
|
"""Perform augmentations on input image x and return augmented inference and train outputs.
|
|
430
448
|
|
|
@@ -62,7 +62,7 @@ class SecurityAlarm(BaseSolution):
|
|
|
62
62
|
"""
|
|
63
63
|
import smtplib
|
|
64
64
|
|
|
65
|
-
self.server = smtplib.SMTP("smtp.gmail.com
|
|
65
|
+
self.server = smtplib.SMTP("smtp.gmail.com", 587)
|
|
66
66
|
self.server.starttls()
|
|
67
67
|
self.server.login(from_email, password)
|
|
68
68
|
self.to_email = to_email
|