dgenerate-ultralytics-headless 8.3.134__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (272) hide show
  1. dgenerate_ultralytics_headless-8.3.134.dist-info/METADATA +400 -0
  2. dgenerate_ultralytics_headless-8.3.134.dist-info/RECORD +272 -0
  3. dgenerate_ultralytics_headless-8.3.134.dist-info/WHEEL +5 -0
  4. dgenerate_ultralytics_headless-8.3.134.dist-info/entry_points.txt +3 -0
  5. dgenerate_ultralytics_headless-8.3.134.dist-info/licenses/LICENSE +661 -0
  6. dgenerate_ultralytics_headless-8.3.134.dist-info/top_level.txt +1 -0
  7. tests/__init__.py +22 -0
  8. tests/conftest.py +83 -0
  9. tests/test_cli.py +138 -0
  10. tests/test_cuda.py +215 -0
  11. tests/test_engine.py +131 -0
  12. tests/test_exports.py +236 -0
  13. tests/test_integrations.py +154 -0
  14. tests/test_python.py +694 -0
  15. tests/test_solutions.py +187 -0
  16. ultralytics/__init__.py +30 -0
  17. ultralytics/assets/bus.jpg +0 -0
  18. ultralytics/assets/zidane.jpg +0 -0
  19. ultralytics/cfg/__init__.py +1023 -0
  20. ultralytics/cfg/datasets/Argoverse.yaml +77 -0
  21. ultralytics/cfg/datasets/DOTAv1.5.yaml +37 -0
  22. ultralytics/cfg/datasets/DOTAv1.yaml +36 -0
  23. ultralytics/cfg/datasets/GlobalWheat2020.yaml +68 -0
  24. ultralytics/cfg/datasets/HomeObjects-3K.yaml +33 -0
  25. ultralytics/cfg/datasets/ImageNet.yaml +2025 -0
  26. ultralytics/cfg/datasets/Objects365.yaml +443 -0
  27. ultralytics/cfg/datasets/SKU-110K.yaml +58 -0
  28. ultralytics/cfg/datasets/VOC.yaml +106 -0
  29. ultralytics/cfg/datasets/VisDrone.yaml +77 -0
  30. ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
  31. ultralytics/cfg/datasets/brain-tumor.yaml +23 -0
  32. ultralytics/cfg/datasets/carparts-seg.yaml +44 -0
  33. ultralytics/cfg/datasets/coco-pose.yaml +42 -0
  34. ultralytics/cfg/datasets/coco.yaml +118 -0
  35. ultralytics/cfg/datasets/coco128-seg.yaml +101 -0
  36. ultralytics/cfg/datasets/coco128.yaml +101 -0
  37. ultralytics/cfg/datasets/coco8-multispectral.yaml +104 -0
  38. ultralytics/cfg/datasets/coco8-pose.yaml +26 -0
  39. ultralytics/cfg/datasets/coco8-seg.yaml +101 -0
  40. ultralytics/cfg/datasets/coco8.yaml +101 -0
  41. ultralytics/cfg/datasets/crack-seg.yaml +22 -0
  42. ultralytics/cfg/datasets/dog-pose.yaml +24 -0
  43. ultralytics/cfg/datasets/dota8-multispectral.yaml +38 -0
  44. ultralytics/cfg/datasets/dota8.yaml +35 -0
  45. ultralytics/cfg/datasets/hand-keypoints.yaml +26 -0
  46. ultralytics/cfg/datasets/lvis.yaml +1240 -0
  47. ultralytics/cfg/datasets/medical-pills.yaml +22 -0
  48. ultralytics/cfg/datasets/open-images-v7.yaml +666 -0
  49. ultralytics/cfg/datasets/package-seg.yaml +22 -0
  50. ultralytics/cfg/datasets/signature.yaml +21 -0
  51. ultralytics/cfg/datasets/tiger-pose.yaml +25 -0
  52. ultralytics/cfg/datasets/xView.yaml +155 -0
  53. ultralytics/cfg/default.yaml +127 -0
  54. ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +17 -0
  55. ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
  56. ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
  57. ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
  58. ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
  59. ultralytics/cfg/models/11/yolo11.yaml +50 -0
  60. ultralytics/cfg/models/11/yoloe-11-seg.yaml +48 -0
  61. ultralytics/cfg/models/11/yoloe-11.yaml +48 -0
  62. ultralytics/cfg/models/12/yolo12-cls.yaml +32 -0
  63. ultralytics/cfg/models/12/yolo12-obb.yaml +48 -0
  64. ultralytics/cfg/models/12/yolo12-pose.yaml +49 -0
  65. ultralytics/cfg/models/12/yolo12-seg.yaml +48 -0
  66. ultralytics/cfg/models/12/yolo12.yaml +48 -0
  67. ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +53 -0
  68. ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +45 -0
  69. ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +45 -0
  70. ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +57 -0
  71. ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
  72. ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
  73. ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
  74. ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
  75. ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
  76. ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
  77. ultralytics/cfg/models/v3/yolov3-spp.yaml +49 -0
  78. ultralytics/cfg/models/v3/yolov3-tiny.yaml +40 -0
  79. ultralytics/cfg/models/v3/yolov3.yaml +49 -0
  80. ultralytics/cfg/models/v5/yolov5-p6.yaml +62 -0
  81. ultralytics/cfg/models/v5/yolov5.yaml +51 -0
  82. ultralytics/cfg/models/v6/yolov6.yaml +56 -0
  83. ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +45 -0
  84. ultralytics/cfg/models/v8/yoloe-v8.yaml +45 -0
  85. ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +28 -0
  86. ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +28 -0
  87. ultralytics/cfg/models/v8/yolov8-cls.yaml +32 -0
  88. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +58 -0
  89. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +60 -0
  90. ultralytics/cfg/models/v8/yolov8-ghost.yaml +50 -0
  91. ultralytics/cfg/models/v8/yolov8-obb.yaml +49 -0
  92. ultralytics/cfg/models/v8/yolov8-p2.yaml +57 -0
  93. ultralytics/cfg/models/v8/yolov8-p6.yaml +59 -0
  94. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +60 -0
  95. ultralytics/cfg/models/v8/yolov8-pose.yaml +50 -0
  96. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +49 -0
  97. ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +59 -0
  98. ultralytics/cfg/models/v8/yolov8-seg.yaml +49 -0
  99. ultralytics/cfg/models/v8/yolov8-world.yaml +51 -0
  100. ultralytics/cfg/models/v8/yolov8-worldv2.yaml +49 -0
  101. ultralytics/cfg/models/v8/yolov8.yaml +49 -0
  102. ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
  103. ultralytics/cfg/models/v9/yolov9c.yaml +41 -0
  104. ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
  105. ultralytics/cfg/models/v9/yolov9e.yaml +64 -0
  106. ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
  107. ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
  108. ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
  109. ultralytics/cfg/trackers/botsort.yaml +22 -0
  110. ultralytics/cfg/trackers/bytetrack.yaml +14 -0
  111. ultralytics/data/__init__.py +26 -0
  112. ultralytics/data/annotator.py +66 -0
  113. ultralytics/data/augment.py +2945 -0
  114. ultralytics/data/base.py +438 -0
  115. ultralytics/data/build.py +258 -0
  116. ultralytics/data/converter.py +754 -0
  117. ultralytics/data/dataset.py +834 -0
  118. ultralytics/data/loaders.py +676 -0
  119. ultralytics/data/scripts/download_weights.sh +18 -0
  120. ultralytics/data/scripts/get_coco.sh +61 -0
  121. ultralytics/data/scripts/get_coco128.sh +18 -0
  122. ultralytics/data/scripts/get_imagenet.sh +52 -0
  123. ultralytics/data/split.py +125 -0
  124. ultralytics/data/split_dota.py +325 -0
  125. ultralytics/data/utils.py +777 -0
  126. ultralytics/engine/__init__.py +1 -0
  127. ultralytics/engine/exporter.py +1519 -0
  128. ultralytics/engine/model.py +1156 -0
  129. ultralytics/engine/predictor.py +502 -0
  130. ultralytics/engine/results.py +1840 -0
  131. ultralytics/engine/trainer.py +853 -0
  132. ultralytics/engine/tuner.py +243 -0
  133. ultralytics/engine/validator.py +377 -0
  134. ultralytics/hub/__init__.py +168 -0
  135. ultralytics/hub/auth.py +137 -0
  136. ultralytics/hub/google/__init__.py +176 -0
  137. ultralytics/hub/session.py +446 -0
  138. ultralytics/hub/utils.py +248 -0
  139. ultralytics/models/__init__.py +9 -0
  140. ultralytics/models/fastsam/__init__.py +7 -0
  141. ultralytics/models/fastsam/model.py +61 -0
  142. ultralytics/models/fastsam/predict.py +181 -0
  143. ultralytics/models/fastsam/utils.py +24 -0
  144. ultralytics/models/fastsam/val.py +40 -0
  145. ultralytics/models/nas/__init__.py +7 -0
  146. ultralytics/models/nas/model.py +102 -0
  147. ultralytics/models/nas/predict.py +58 -0
  148. ultralytics/models/nas/val.py +39 -0
  149. ultralytics/models/rtdetr/__init__.py +7 -0
  150. ultralytics/models/rtdetr/model.py +63 -0
  151. ultralytics/models/rtdetr/predict.py +84 -0
  152. ultralytics/models/rtdetr/train.py +85 -0
  153. ultralytics/models/rtdetr/val.py +191 -0
  154. ultralytics/models/sam/__init__.py +6 -0
  155. ultralytics/models/sam/amg.py +260 -0
  156. ultralytics/models/sam/build.py +358 -0
  157. ultralytics/models/sam/model.py +170 -0
  158. ultralytics/models/sam/modules/__init__.py +1 -0
  159. ultralytics/models/sam/modules/blocks.py +1129 -0
  160. ultralytics/models/sam/modules/decoders.py +515 -0
  161. ultralytics/models/sam/modules/encoders.py +854 -0
  162. ultralytics/models/sam/modules/memory_attention.py +299 -0
  163. ultralytics/models/sam/modules/sam.py +1006 -0
  164. ultralytics/models/sam/modules/tiny_encoder.py +1002 -0
  165. ultralytics/models/sam/modules/transformer.py +351 -0
  166. ultralytics/models/sam/modules/utils.py +394 -0
  167. ultralytics/models/sam/predict.py +1605 -0
  168. ultralytics/models/utils/__init__.py +1 -0
  169. ultralytics/models/utils/loss.py +455 -0
  170. ultralytics/models/utils/ops.py +268 -0
  171. ultralytics/models/yolo/__init__.py +7 -0
  172. ultralytics/models/yolo/classify/__init__.py +7 -0
  173. ultralytics/models/yolo/classify/predict.py +88 -0
  174. ultralytics/models/yolo/classify/train.py +233 -0
  175. ultralytics/models/yolo/classify/val.py +215 -0
  176. ultralytics/models/yolo/detect/__init__.py +7 -0
  177. ultralytics/models/yolo/detect/predict.py +124 -0
  178. ultralytics/models/yolo/detect/train.py +217 -0
  179. ultralytics/models/yolo/detect/val.py +451 -0
  180. ultralytics/models/yolo/model.py +354 -0
  181. ultralytics/models/yolo/obb/__init__.py +7 -0
  182. ultralytics/models/yolo/obb/predict.py +66 -0
  183. ultralytics/models/yolo/obb/train.py +81 -0
  184. ultralytics/models/yolo/obb/val.py +283 -0
  185. ultralytics/models/yolo/pose/__init__.py +7 -0
  186. ultralytics/models/yolo/pose/predict.py +79 -0
  187. ultralytics/models/yolo/pose/train.py +154 -0
  188. ultralytics/models/yolo/pose/val.py +394 -0
  189. ultralytics/models/yolo/segment/__init__.py +7 -0
  190. ultralytics/models/yolo/segment/predict.py +113 -0
  191. ultralytics/models/yolo/segment/train.py +123 -0
  192. ultralytics/models/yolo/segment/val.py +428 -0
  193. ultralytics/models/yolo/world/__init__.py +5 -0
  194. ultralytics/models/yolo/world/train.py +119 -0
  195. ultralytics/models/yolo/world/train_world.py +176 -0
  196. ultralytics/models/yolo/yoloe/__init__.py +22 -0
  197. ultralytics/models/yolo/yoloe/predict.py +169 -0
  198. ultralytics/models/yolo/yoloe/train.py +298 -0
  199. ultralytics/models/yolo/yoloe/train_seg.py +124 -0
  200. ultralytics/models/yolo/yoloe/val.py +191 -0
  201. ultralytics/nn/__init__.py +29 -0
  202. ultralytics/nn/autobackend.py +842 -0
  203. ultralytics/nn/modules/__init__.py +182 -0
  204. ultralytics/nn/modules/activation.py +53 -0
  205. ultralytics/nn/modules/block.py +1966 -0
  206. ultralytics/nn/modules/conv.py +712 -0
  207. ultralytics/nn/modules/head.py +880 -0
  208. ultralytics/nn/modules/transformer.py +713 -0
  209. ultralytics/nn/modules/utils.py +164 -0
  210. ultralytics/nn/tasks.py +1627 -0
  211. ultralytics/nn/text_model.py +351 -0
  212. ultralytics/solutions/__init__.py +41 -0
  213. ultralytics/solutions/ai_gym.py +116 -0
  214. ultralytics/solutions/analytics.py +252 -0
  215. ultralytics/solutions/config.py +106 -0
  216. ultralytics/solutions/distance_calculation.py +124 -0
  217. ultralytics/solutions/heatmap.py +127 -0
  218. ultralytics/solutions/instance_segmentation.py +84 -0
  219. ultralytics/solutions/object_blurrer.py +90 -0
  220. ultralytics/solutions/object_counter.py +195 -0
  221. ultralytics/solutions/object_cropper.py +84 -0
  222. ultralytics/solutions/parking_management.py +273 -0
  223. ultralytics/solutions/queue_management.py +93 -0
  224. ultralytics/solutions/region_counter.py +120 -0
  225. ultralytics/solutions/security_alarm.py +154 -0
  226. ultralytics/solutions/similarity_search.py +172 -0
  227. ultralytics/solutions/solutions.py +724 -0
  228. ultralytics/solutions/speed_estimation.py +110 -0
  229. ultralytics/solutions/streamlit_inference.py +196 -0
  230. ultralytics/solutions/templates/similarity-search.html +160 -0
  231. ultralytics/solutions/trackzone.py +88 -0
  232. ultralytics/solutions/vision_eye.py +68 -0
  233. ultralytics/trackers/__init__.py +7 -0
  234. ultralytics/trackers/basetrack.py +124 -0
  235. ultralytics/trackers/bot_sort.py +260 -0
  236. ultralytics/trackers/byte_tracker.py +480 -0
  237. ultralytics/trackers/track.py +125 -0
  238. ultralytics/trackers/utils/__init__.py +1 -0
  239. ultralytics/trackers/utils/gmc.py +376 -0
  240. ultralytics/trackers/utils/kalman_filter.py +493 -0
  241. ultralytics/trackers/utils/matching.py +157 -0
  242. ultralytics/utils/__init__.py +1435 -0
  243. ultralytics/utils/autobatch.py +106 -0
  244. ultralytics/utils/autodevice.py +174 -0
  245. ultralytics/utils/benchmarks.py +695 -0
  246. ultralytics/utils/callbacks/__init__.py +5 -0
  247. ultralytics/utils/callbacks/base.py +234 -0
  248. ultralytics/utils/callbacks/clearml.py +153 -0
  249. ultralytics/utils/callbacks/comet.py +552 -0
  250. ultralytics/utils/callbacks/dvc.py +205 -0
  251. ultralytics/utils/callbacks/hub.py +108 -0
  252. ultralytics/utils/callbacks/mlflow.py +138 -0
  253. ultralytics/utils/callbacks/neptune.py +140 -0
  254. ultralytics/utils/callbacks/raytune.py +43 -0
  255. ultralytics/utils/callbacks/tensorboard.py +132 -0
  256. ultralytics/utils/callbacks/wb.py +185 -0
  257. ultralytics/utils/checks.py +897 -0
  258. ultralytics/utils/dist.py +119 -0
  259. ultralytics/utils/downloads.py +499 -0
  260. ultralytics/utils/errors.py +43 -0
  261. ultralytics/utils/export.py +219 -0
  262. ultralytics/utils/files.py +221 -0
  263. ultralytics/utils/instance.py +499 -0
  264. ultralytics/utils/loss.py +813 -0
  265. ultralytics/utils/metrics.py +1356 -0
  266. ultralytics/utils/ops.py +885 -0
  267. ultralytics/utils/patches.py +143 -0
  268. ultralytics/utils/plotting.py +1011 -0
  269. ultralytics/utils/tal.py +416 -0
  270. ultralytics/utils/torch_utils.py +990 -0
  271. ultralytics/utils/triton.py +116 -0
  272. ultralytics/utils/tuner.py +159 -0
@@ -0,0 +1,853 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+ """
3
+ Train a model on a dataset.
4
+
5
+ Usage:
6
+ $ yolo mode=train model=yolo11n.pt data=coco8.yaml imgsz=640 epochs=100 batch=16
7
+ """
8
+
9
+ import gc
10
+ import math
11
+ import os
12
+ import subprocess
13
+ import time
14
+ import warnings
15
+ from copy import copy, deepcopy
16
+ from datetime import datetime, timedelta
17
+ from pathlib import Path
18
+
19
+ import numpy as np
20
+ import torch
21
+ from torch import distributed as dist
22
+ from torch import nn, optim
23
+
24
+ from ultralytics import __version__
25
+ from ultralytics.cfg import get_cfg, get_save_dir
26
+ from ultralytics.data.utils import check_cls_dataset, check_det_dataset
27
+ from ultralytics.nn.tasks import attempt_load_one_weight, attempt_load_weights
28
+ from ultralytics.utils import (
29
+ DEFAULT_CFG,
30
+ LOCAL_RANK,
31
+ LOGGER,
32
+ RANK,
33
+ TQDM,
34
+ YAML,
35
+ callbacks,
36
+ clean_url,
37
+ colorstr,
38
+ emojis,
39
+ )
40
+ from ultralytics.utils.autobatch import check_train_batch_size
41
+ from ultralytics.utils.checks import check_amp, check_file, check_imgsz, check_model_file_from_stem, print_args
42
+ from ultralytics.utils.dist import ddp_cleanup, generate_ddp_command
43
+ from ultralytics.utils.files import get_latest_run
44
+ from ultralytics.utils.torch_utils import (
45
+ TORCH_2_4,
46
+ EarlyStopping,
47
+ ModelEMA,
48
+ autocast,
49
+ convert_optimizer_state_dict_to_fp16,
50
+ init_seeds,
51
+ one_cycle,
52
+ select_device,
53
+ strip_optimizer,
54
+ torch_distributed_zero_first,
55
+ unset_deterministic,
56
+ )
57
+
58
+
59
+ class BaseTrainer:
60
+ """
61
+ A base class for creating trainers.
62
+
63
+ Attributes:
64
+ args (SimpleNamespace): Configuration for the trainer.
65
+ validator (BaseValidator): Validator instance.
66
+ model (nn.Module): Model instance.
67
+ callbacks (defaultdict): Dictionary of callbacks.
68
+ save_dir (Path): Directory to save results.
69
+ wdir (Path): Directory to save weights.
70
+ last (Path): Path to the last checkpoint.
71
+ best (Path): Path to the best checkpoint.
72
+ save_period (int): Save checkpoint every x epochs (disabled if < 1).
73
+ batch_size (int): Batch size for training.
74
+ epochs (int): Number of epochs to train for.
75
+ start_epoch (int): Starting epoch for training.
76
+ device (torch.device): Device to use for training.
77
+ amp (bool): Flag to enable AMP (Automatic Mixed Precision).
78
+ scaler (amp.GradScaler): Gradient scaler for AMP.
79
+ data (str): Path to data.
80
+ ema (nn.Module): EMA (Exponential Moving Average) of the model.
81
+ resume (bool): Resume training from a checkpoint.
82
+ lf (nn.Module): Loss function.
83
+ scheduler (torch.optim.lr_scheduler._LRScheduler): Learning rate scheduler.
84
+ best_fitness (float): The best fitness value achieved.
85
+ fitness (float): Current fitness value.
86
+ loss (float): Current loss value.
87
+ tloss (float): Total loss value.
88
+ loss_names (list): List of loss names.
89
+ csv (Path): Path to results CSV file.
90
+ metrics (dict): Dictionary of metrics.
91
+ plots (dict): Dictionary of plots.
92
+ """
93
+
94
+ def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
95
+ """
96
+ Initialize the BaseTrainer class.
97
+
98
+ Args:
99
+ cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG.
100
+ overrides (dict, optional): Configuration overrides. Defaults to None.
101
+ _callbacks (list, optional): List of callback functions. Defaults to None.
102
+ """
103
+ self.args = get_cfg(cfg, overrides)
104
+ self.check_resume(overrides)
105
+ self.device = select_device(self.args.device, self.args.batch)
106
+ # update "-1" devices so post-training val does not repeat search
107
+ self.args.device = os.getenv("CUDA_VISIBLE_DEVICES") if "cuda" in str(self.device) else str(self.device)
108
+ self.validator = None
109
+ self.metrics = None
110
+ self.plots = {}
111
+ init_seeds(self.args.seed + 1 + RANK, deterministic=self.args.deterministic)
112
+
113
+ # Dirs
114
+ self.save_dir = get_save_dir(self.args)
115
+ self.args.name = self.save_dir.name # update name for loggers
116
+ self.wdir = self.save_dir / "weights" # weights dir
117
+ if RANK in {-1, 0}:
118
+ self.wdir.mkdir(parents=True, exist_ok=True) # make dir
119
+ self.args.save_dir = str(self.save_dir)
120
+ YAML.save(self.save_dir / "args.yaml", vars(self.args)) # save run args
121
+ self.last, self.best = self.wdir / "last.pt", self.wdir / "best.pt" # checkpoint paths
122
+ self.save_period = self.args.save_period
123
+
124
+ self.batch_size = self.args.batch
125
+ self.epochs = self.args.epochs or 100 # in case users accidentally pass epochs=None with timed training
126
+ self.start_epoch = 0
127
+ if RANK == -1:
128
+ print_args(vars(self.args))
129
+
130
+ # Device
131
+ if self.device.type in {"cpu", "mps"}:
132
+ self.args.workers = 0 # faster CPU training as time dominated by inference, not dataloading
133
+
134
+ # Model and Dataset
135
+ self.model = check_model_file_from_stem(self.args.model) # add suffix, i.e. yolo11n -> yolo11n.pt
136
+ with torch_distributed_zero_first(LOCAL_RANK): # avoid auto-downloading dataset multiple times
137
+ self.data = self.get_dataset()
138
+
139
+ self.ema = None
140
+
141
+ # Optimization utils init
142
+ self.lf = None
143
+ self.scheduler = None
144
+
145
+ # Epoch level metrics
146
+ self.best_fitness = None
147
+ self.fitness = None
148
+ self.loss = None
149
+ self.tloss = None
150
+ self.loss_names = ["Loss"]
151
+ self.csv = self.save_dir / "results.csv"
152
+ self.plot_idx = [0, 1, 2]
153
+
154
+ # HUB
155
+ self.hub_session = None
156
+
157
+ # Callbacks
158
+ self.callbacks = _callbacks or callbacks.get_default_callbacks()
159
+ if RANK in {-1, 0}:
160
+ callbacks.add_integration_callbacks(self)
161
+
162
+ def add_callback(self, event: str, callback):
163
+ """Append the given callback to the event's callback list."""
164
+ self.callbacks[event].append(callback)
165
+
166
+ def set_callback(self, event: str, callback):
167
+ """Override the existing callbacks with the given callback for the specified event."""
168
+ self.callbacks[event] = [callback]
169
+
170
+ def run_callbacks(self, event: str):
171
+ """Run all existing callbacks associated with a particular event."""
172
+ for callback in self.callbacks.get(event, []):
173
+ callback(self)
174
+
175
+ def train(self):
176
+ """Allow device='', device=None on Multi-GPU systems to default to device=0."""
177
+ if isinstance(self.args.device, str) and len(self.args.device): # i.e. device='0' or device='0,1,2,3'
178
+ world_size = len(self.args.device.split(","))
179
+ elif isinstance(self.args.device, (tuple, list)): # i.e. device=[0, 1, 2, 3] (multi-GPU from CLI is list)
180
+ world_size = len(self.args.device)
181
+ elif self.args.device in {"cpu", "mps"}: # i.e. device='cpu' or 'mps'
182
+ world_size = 0
183
+ elif torch.cuda.is_available(): # i.e. device=None or device='' or device=number
184
+ world_size = 1 # default to device 0
185
+ else: # i.e. device=None or device=''
186
+ world_size = 0
187
+
188
+ # Run subprocess if DDP training, else train normally
189
+ if world_size > 1 and "LOCAL_RANK" not in os.environ:
190
+ # Argument checks
191
+ if self.args.rect:
192
+ LOGGER.warning("'rect=True' is incompatible with Multi-GPU training, setting 'rect=False'")
193
+ self.args.rect = False
194
+ if self.args.batch < 1.0:
195
+ LOGGER.warning(
196
+ "'batch<1' for AutoBatch is incompatible with Multi-GPU training, setting default 'batch=16'"
197
+ )
198
+ self.args.batch = 16
199
+
200
+ # Command
201
+ cmd, file = generate_ddp_command(world_size, self)
202
+ try:
203
+ LOGGER.info(f"{colorstr('DDP:')} debug command {' '.join(cmd)}")
204
+ subprocess.run(cmd, check=True)
205
+ except Exception as e:
206
+ raise e
207
+ finally:
208
+ ddp_cleanup(self, str(file))
209
+
210
+ else:
211
+ self._do_train(world_size)
212
+
213
+ def _setup_scheduler(self):
214
+ """Initialize training learning rate scheduler."""
215
+ if self.args.cos_lr:
216
+ self.lf = one_cycle(1, self.args.lrf, self.epochs) # cosine 1->hyp['lrf']
217
+ else:
218
+ self.lf = lambda x: max(1 - x / self.epochs, 0) * (1.0 - self.args.lrf) + self.args.lrf # linear
219
+ self.scheduler = optim.lr_scheduler.LambdaLR(self.optimizer, lr_lambda=self.lf)
220
+
221
+ def _setup_ddp(self, world_size):
222
+ """Initialize and set the DistributedDataParallel parameters for training."""
223
+ torch.cuda.set_device(RANK)
224
+ self.device = torch.device("cuda", RANK)
225
+ # LOGGER.info(f'DDP info: RANK {RANK}, WORLD_SIZE {world_size}, DEVICE {self.device}')
226
+ os.environ["TORCH_NCCL_BLOCKING_WAIT"] = "1" # set to enforce timeout
227
+ dist.init_process_group(
228
+ backend="nccl" if dist.is_nccl_available() else "gloo",
229
+ timeout=timedelta(seconds=10800), # 3 hours
230
+ rank=RANK,
231
+ world_size=world_size,
232
+ )
233
+
234
+ def _setup_train(self, world_size):
235
+ """Build dataloaders and optimizer on correct rank process."""
236
+ # Model
237
+ self.run_callbacks("on_pretrain_routine_start")
238
+ ckpt = self.setup_model()
239
+ self.model = self.model.to(self.device)
240
+ self.set_model_attributes()
241
+
242
+ # Freeze layers
243
+ freeze_list = (
244
+ self.args.freeze
245
+ if isinstance(self.args.freeze, list)
246
+ else range(self.args.freeze)
247
+ if isinstance(self.args.freeze, int)
248
+ else []
249
+ )
250
+ always_freeze_names = [".dfl"] # always freeze these layers
251
+ freeze_layer_names = [f"model.{x}." for x in freeze_list] + always_freeze_names
252
+ self.freeze_layer_names = freeze_layer_names
253
+ for k, v in self.model.named_parameters():
254
+ # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results)
255
+ if any(x in k for x in freeze_layer_names):
256
+ LOGGER.info(f"Freezing layer '{k}'")
257
+ v.requires_grad = False
258
+ elif not v.requires_grad and v.dtype.is_floating_point: # only floating point Tensor can require gradients
259
+ LOGGER.warning(
260
+ f"setting 'requires_grad=True' for frozen layer '{k}'. "
261
+ "See ultralytics.engine.trainer for customization of frozen layers."
262
+ )
263
+ v.requires_grad = True
264
+
265
+ # Check AMP
266
+ self.amp = torch.tensor(self.args.amp).to(self.device) # True or False
267
+ if self.amp and RANK in {-1, 0}: # Single-GPU and DDP
268
+ callbacks_backup = callbacks.default_callbacks.copy() # backup callbacks as check_amp() resets them
269
+ self.amp = torch.tensor(check_amp(self.model), device=self.device)
270
+ callbacks.default_callbacks = callbacks_backup # restore callbacks
271
+ if RANK > -1 and world_size > 1: # DDP
272
+ dist.broadcast(self.amp.int(), src=0) # broadcast from rank 0 to all other ranks; gloo errors with boolean
273
+ self.amp = bool(self.amp) # as boolean
274
+ self.scaler = (
275
+ torch.amp.GradScaler("cuda", enabled=self.amp) if TORCH_2_4 else torch.cuda.amp.GradScaler(enabled=self.amp)
276
+ )
277
+ if world_size > 1:
278
+ self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK], find_unused_parameters=True)
279
+
280
+ # Check imgsz
281
+ gs = max(int(self.model.stride.max() if hasattr(self.model, "stride") else 32), 32) # grid size (max stride)
282
+ self.args.imgsz = check_imgsz(self.args.imgsz, stride=gs, floor=gs, max_dim=1)
283
+ self.stride = gs # for multiscale training
284
+
285
+ # Batch size
286
+ if self.batch_size < 1 and RANK == -1: # single-GPU only, estimate best batch size
287
+ self.args.batch = self.batch_size = self.auto_batch()
288
+
289
+ # Dataloaders
290
+ batch_size = self.batch_size // max(world_size, 1)
291
+ self.train_loader = self.get_dataloader(
292
+ self.data["train"], batch_size=batch_size, rank=LOCAL_RANK, mode="train"
293
+ )
294
+ if RANK in {-1, 0}:
295
+ # Note: When training DOTA dataset, double batch size could get OOM on images with >2000 objects.
296
+ self.test_loader = self.get_dataloader(
297
+ self.data.get("val") or self.data.get("test"),
298
+ batch_size=batch_size if self.args.task == "obb" else batch_size * 2,
299
+ rank=-1,
300
+ mode="val",
301
+ )
302
+ self.validator = self.get_validator()
303
+ metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix="val")
304
+ self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))
305
+ self.ema = ModelEMA(self.model)
306
+ if self.args.plots:
307
+ self.plot_training_labels()
308
+
309
+ # Optimizer
310
+ self.accumulate = max(round(self.args.nbs / self.batch_size), 1) # accumulate loss before optimizing
311
+ weight_decay = self.args.weight_decay * self.batch_size * self.accumulate / self.args.nbs # scale weight_decay
312
+ iterations = math.ceil(len(self.train_loader.dataset) / max(self.batch_size, self.args.nbs)) * self.epochs
313
+ self.optimizer = self.build_optimizer(
314
+ model=self.model,
315
+ name=self.args.optimizer,
316
+ lr=self.args.lr0,
317
+ momentum=self.args.momentum,
318
+ decay=weight_decay,
319
+ iterations=iterations,
320
+ )
321
+ # Scheduler
322
+ self._setup_scheduler()
323
+ self.stopper, self.stop = EarlyStopping(patience=self.args.patience), False
324
+ self.resume_training(ckpt)
325
+ self.scheduler.last_epoch = self.start_epoch - 1 # do not move
326
+ self.run_callbacks("on_pretrain_routine_end")
327
+
328
+ def _do_train(self, world_size=1):
329
+ """Train the model with the specified world size."""
330
+ if world_size > 1:
331
+ self._setup_ddp(world_size)
332
+ self._setup_train(world_size)
333
+
334
+ nb = len(self.train_loader) # number of batches
335
+ nw = max(round(self.args.warmup_epochs * nb), 100) if self.args.warmup_epochs > 0 else -1 # warmup iterations
336
+ last_opt_step = -1
337
+ self.epoch_time = None
338
+ self.epoch_time_start = time.time()
339
+ self.train_time_start = time.time()
340
+ self.run_callbacks("on_train_start")
341
+ LOGGER.info(
342
+ f"Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n"
343
+ f"Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n"
344
+ f"Logging results to {colorstr('bold', self.save_dir)}\n"
345
+ f"Starting training for " + (f"{self.args.time} hours..." if self.args.time else f"{self.epochs} epochs...")
346
+ )
347
+ if self.args.close_mosaic:
348
+ base_idx = (self.epochs - self.args.close_mosaic) * nb
349
+ self.plot_idx.extend([base_idx, base_idx + 1, base_idx + 2])
350
+ epoch = self.start_epoch
351
+ self.optimizer.zero_grad() # zero any resumed gradients to ensure stability on train start
352
+ while True:
353
+ self.epoch = epoch
354
+ self.run_callbacks("on_train_epoch_start")
355
+ with warnings.catch_warnings():
356
+ warnings.simplefilter("ignore") # suppress 'Detected lr_scheduler.step() before optimizer.step()'
357
+ self.scheduler.step()
358
+
359
+ self._model_train()
360
+ if RANK != -1:
361
+ self.train_loader.sampler.set_epoch(epoch)
362
+ pbar = enumerate(self.train_loader)
363
+ # Update dataloader attributes (optional)
364
+ if epoch == (self.epochs - self.args.close_mosaic):
365
+ self._close_dataloader_mosaic()
366
+ self.train_loader.reset()
367
+
368
+ if RANK in {-1, 0}:
369
+ LOGGER.info(self.progress_string())
370
+ pbar = TQDM(enumerate(self.train_loader), total=nb)
371
+ self.tloss = None
372
+ for i, batch in pbar:
373
+ self.run_callbacks("on_train_batch_start")
374
+ # Warmup
375
+ ni = i + nb * epoch
376
+ if ni <= nw:
377
+ xi = [0, nw] # x interp
378
+ self.accumulate = max(1, int(np.interp(ni, xi, [1, self.args.nbs / self.batch_size]).round()))
379
+ for j, x in enumerate(self.optimizer.param_groups):
380
+ # Bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
381
+ x["lr"] = np.interp(
382
+ ni, xi, [self.args.warmup_bias_lr if j == 0 else 0.0, x["initial_lr"] * self.lf(epoch)]
383
+ )
384
+ if "momentum" in x:
385
+ x["momentum"] = np.interp(ni, xi, [self.args.warmup_momentum, self.args.momentum])
386
+
387
+ # Forward
388
+ with autocast(self.amp):
389
+ batch = self.preprocess_batch(batch)
390
+ loss, self.loss_items = self.model(batch)
391
+ self.loss = loss.sum()
392
+ if RANK != -1:
393
+ self.loss *= world_size
394
+ self.tloss = (
395
+ (self.tloss * i + self.loss_items) / (i + 1) if self.tloss is not None else self.loss_items
396
+ )
397
+
398
+ # Backward
399
+ self.scaler.scale(self.loss).backward()
400
+
401
+ # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
402
+ if ni - last_opt_step >= self.accumulate:
403
+ self.optimizer_step()
404
+ last_opt_step = ni
405
+
406
+ # Timed stopping
407
+ if self.args.time:
408
+ self.stop = (time.time() - self.train_time_start) > (self.args.time * 3600)
409
+ if RANK != -1: # if DDP training
410
+ broadcast_list = [self.stop if RANK == 0 else None]
411
+ dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks
412
+ self.stop = broadcast_list[0]
413
+ if self.stop: # training time exceeded
414
+ break
415
+
416
+ # Log
417
+ if RANK in {-1, 0}:
418
+ loss_length = self.tloss.shape[0] if len(self.tloss.shape) else 1
419
+ pbar.set_description(
420
+ ("%11s" * 2 + "%11.4g" * (2 + loss_length))
421
+ % (
422
+ f"{epoch + 1}/{self.epochs}",
423
+ f"{self._get_memory():.3g}G", # (GB) GPU memory util
424
+ *(self.tloss if loss_length > 1 else torch.unsqueeze(self.tloss, 0)), # losses
425
+ batch["cls"].shape[0], # batch size, i.e. 8
426
+ batch["img"].shape[-1], # imgsz, i.e 640
427
+ )
428
+ )
429
+ self.run_callbacks("on_batch_end")
430
+ if self.args.plots and ni in self.plot_idx:
431
+ self.plot_training_samples(batch, ni)
432
+
433
+ self.run_callbacks("on_train_batch_end")
434
+
435
+ self.lr = {f"lr/pg{ir}": x["lr"] for ir, x in enumerate(self.optimizer.param_groups)} # for loggers
436
+ self.run_callbacks("on_train_epoch_end")
437
+ if RANK in {-1, 0}:
438
+ final_epoch = epoch + 1 >= self.epochs
439
+ self.ema.update_attr(self.model, include=["yaml", "nc", "args", "names", "stride", "class_weights"])
440
+
441
+ # Validation
442
+ if self.args.val or final_epoch or self.stopper.possible_stop or self.stop:
443
+ self.metrics, self.fitness = self.validate()
444
+ self.save_metrics(metrics={**self.label_loss_items(self.tloss), **self.metrics, **self.lr})
445
+ self.stop |= self.stopper(epoch + 1, self.fitness) or final_epoch
446
+ if self.args.time:
447
+ self.stop |= (time.time() - self.train_time_start) > (self.args.time * 3600)
448
+
449
+ # Save model
450
+ if self.args.save or final_epoch:
451
+ self.save_model()
452
+ self.run_callbacks("on_model_save")
453
+
454
+ # Scheduler
455
+ t = time.time()
456
+ self.epoch_time = t - self.epoch_time_start
457
+ self.epoch_time_start = t
458
+ if self.args.time:
459
+ mean_epoch_time = (t - self.train_time_start) / (epoch - self.start_epoch + 1)
460
+ self.epochs = self.args.epochs = math.ceil(self.args.time * 3600 / mean_epoch_time)
461
+ self._setup_scheduler()
462
+ self.scheduler.last_epoch = self.epoch # do not move
463
+ self.stop |= epoch >= self.epochs # stop if exceeded epochs
464
+ self.run_callbacks("on_fit_epoch_end")
465
+ if self._get_memory(fraction=True) > 0.5:
466
+ self._clear_memory() # clear if memory utilization > 50%
467
+
468
+ # Early Stopping
469
+ if RANK != -1: # if DDP training
470
+ broadcast_list = [self.stop if RANK == 0 else None]
471
+ dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks
472
+ self.stop = broadcast_list[0]
473
+ if self.stop:
474
+ break # must break all DDP ranks
475
+ epoch += 1
476
+
477
+ if RANK in {-1, 0}:
478
+ # Do final val with best.pt
479
+ seconds = time.time() - self.train_time_start
480
+ LOGGER.info(f"\n{epoch - self.start_epoch + 1} epochs completed in {seconds / 3600:.3f} hours.")
481
+ self.final_eval()
482
+ if self.args.plots:
483
+ self.plot_metrics()
484
+ self.run_callbacks("on_train_end")
485
+ self._clear_memory()
486
+ unset_deterministic()
487
+ self.run_callbacks("teardown")
488
+
489
+ def auto_batch(self, max_num_obj=0):
490
+ """Calculate optimal batch size based on model and device memory constraints."""
491
+ return check_train_batch_size(
492
+ model=self.model,
493
+ imgsz=self.args.imgsz,
494
+ amp=self.amp,
495
+ batch=self.batch_size,
496
+ max_num_obj=max_num_obj,
497
+ ) # returns batch size
498
+
499
+ def _get_memory(self, fraction=False):
500
+ """Get accelerator memory utilization in GB or as a fraction of total memory."""
501
+ memory, total = 0, 0
502
+ if self.device.type == "mps":
503
+ memory = torch.mps.driver_allocated_memory()
504
+ if fraction:
505
+ return __import__("psutil").virtual_memory().percent / 100
506
+ elif self.device.type != "cpu":
507
+ memory = torch.cuda.memory_reserved()
508
+ if fraction:
509
+ total = torch.cuda.get_device_properties(self.device).total_memory
510
+ return ((memory / total) if total > 0 else 0) if fraction else (memory / 2**30)
511
+
512
+ def _clear_memory(self):
513
+ """Clear accelerator memory by calling garbage collector and emptying cache."""
514
+ gc.collect()
515
+ if self.device.type == "mps":
516
+ torch.mps.empty_cache()
517
+ elif self.device.type == "cpu":
518
+ return
519
+ else:
520
+ torch.cuda.empty_cache()
521
+
522
+ def read_results_csv(self):
523
+ """Read results.csv into a dictionary using pandas."""
524
+ import pandas as pd # scope for faster 'import ultralytics'
525
+
526
+ return pd.read_csv(self.csv).to_dict(orient="list")
527
+
528
+ def _model_train(self):
529
+ """Set model in training mode."""
530
+ self.model.train()
531
+ # Freeze BN stat
532
+ for n, m in self.model.named_modules():
533
+ if any(filter(lambda f: f in n, self.freeze_layer_names)) and isinstance(m, nn.BatchNorm2d):
534
+ m.eval()
535
+
536
+ def save_model(self):
537
+ """Save model training checkpoints with additional metadata."""
538
+ import io
539
+
540
+ # Serialize ckpt to a byte buffer once (faster than repeated torch.save() calls)
541
+ buffer = io.BytesIO()
542
+ torch.save(
543
+ {
544
+ "epoch": self.epoch,
545
+ "best_fitness": self.best_fitness,
546
+ "model": None, # resume and final checkpoints derive from EMA
547
+ "ema": deepcopy(self.ema.ema).half(),
548
+ "updates": self.ema.updates,
549
+ "optimizer": convert_optimizer_state_dict_to_fp16(deepcopy(self.optimizer.state_dict())),
550
+ "train_args": vars(self.args), # save as dict
551
+ "train_metrics": {**self.metrics, **{"fitness": self.fitness}},
552
+ "train_results": self.read_results_csv(),
553
+ "date": datetime.now().isoformat(),
554
+ "version": __version__,
555
+ "license": "AGPL-3.0 (https://ultralytics.com/license)",
556
+ "docs": "https://docs.ultralytics.com",
557
+ },
558
+ buffer,
559
+ )
560
+ serialized_ckpt = buffer.getvalue() # get the serialized content to save
561
+
562
+ # Save checkpoints
563
+ self.last.write_bytes(serialized_ckpt) # save last.pt
564
+ if self.best_fitness == self.fitness:
565
+ self.best.write_bytes(serialized_ckpt) # save best.pt
566
+ if (self.save_period > 0) and (self.epoch % self.save_period == 0):
567
+ (self.wdir / f"epoch{self.epoch}.pt").write_bytes(serialized_ckpt) # save epoch, i.e. 'epoch3.pt'
568
+ # if self.args.close_mosaic and self.epoch == (self.epochs - self.args.close_mosaic - 1):
569
+ # (self.wdir / "last_mosaic.pt").write_bytes(serialized_ckpt) # save mosaic checkpoint
570
+
571
+ def get_dataset(self):
572
+ """
573
+ Get train and validation datasets from data dictionary.
574
+
575
+ Returns:
576
+ (dict): A dictionary containing the training/validation/test dataset and category names.
577
+ """
578
+ try:
579
+ if self.args.task == "classify":
580
+ data = check_cls_dataset(self.args.data)
581
+ elif self.args.data.split(".")[-1] in {"yaml", "yml"} or self.args.task in {
582
+ "detect",
583
+ "segment",
584
+ "pose",
585
+ "obb",
586
+ }:
587
+ data = check_det_dataset(self.args.data)
588
+ if "yaml_file" in data:
589
+ self.args.data = data["yaml_file"] # for validating 'yolo train data=url.zip' usage
590
+ except Exception as e:
591
+ raise RuntimeError(emojis(f"Dataset '{clean_url(self.args.data)}' error ❌ {e}")) from e
592
+ if self.args.single_cls:
593
+ LOGGER.info("Overriding class names with single class.")
594
+ data["names"] = {0: "item"}
595
+ data["nc"] = 1
596
+ return data
597
+
598
+ def setup_model(self):
599
+ """
600
+ Load, create, or download model for any task.
601
+
602
+ Returns:
603
+ (dict): Optional checkpoint to resume training from.
604
+ """
605
+ if isinstance(self.model, torch.nn.Module): # if model is loaded beforehand. No setup needed
606
+ return
607
+
608
+ cfg, weights = self.model, None
609
+ ckpt = None
610
+ if str(self.model).endswith(".pt"):
611
+ weights, ckpt = attempt_load_one_weight(self.model)
612
+ cfg = weights.yaml
613
+ elif isinstance(self.args.pretrained, (str, Path)):
614
+ weights, _ = attempt_load_one_weight(self.args.pretrained)
615
+ self.model = self.get_model(cfg=cfg, weights=weights, verbose=RANK == -1) # calls Model(cfg, weights)
616
+ return ckpt
617
+
618
+ def optimizer_step(self):
619
+ """Perform a single step of the training optimizer with gradient clipping and EMA update."""
620
+ self.scaler.unscale_(self.optimizer) # unscale gradients
621
+ torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=10.0) # clip gradients
622
+ self.scaler.step(self.optimizer)
623
+ self.scaler.update()
624
+ self.optimizer.zero_grad()
625
+ if self.ema:
626
+ self.ema.update(self.model)
627
+
628
+ def preprocess_batch(self, batch):
629
+ """Allows custom preprocessing model inputs and ground truths depending on task type."""
630
+ return batch
631
+
632
+ def validate(self):
633
+ """
634
+ Run validation on test set using self.validator.
635
+
636
+ Returns:
637
+ (tuple): A tuple containing metrics dictionary and fitness score.
638
+ """
639
+ metrics = self.validator(self)
640
+ fitness = metrics.pop("fitness", -self.loss.detach().cpu().numpy()) # use loss as fitness measure if not found
641
+ if not self.best_fitness or self.best_fitness < fitness:
642
+ self.best_fitness = fitness
643
+ return metrics, fitness
644
+
645
+ def get_model(self, cfg=None, weights=None, verbose=True):
646
+ """Get model and raise NotImplementedError for loading cfg files."""
647
+ raise NotImplementedError("This task trainer doesn't support loading cfg files")
648
+
649
+ def get_validator(self):
650
+ """Returns a NotImplementedError when the get_validator function is called."""
651
+ raise NotImplementedError("get_validator function not implemented in trainer")
652
+
653
+ def get_dataloader(self, dataset_path, batch_size=16, rank=0, mode="train"):
654
+ """Returns dataloader derived from torch.data.Dataloader."""
655
+ raise NotImplementedError("get_dataloader function not implemented in trainer")
656
+
657
+ def build_dataset(self, img_path, mode="train", batch=None):
658
+ """Build dataset."""
659
+ raise NotImplementedError("build_dataset function not implemented in trainer")
660
+
661
+ def label_loss_items(self, loss_items=None, prefix="train"):
662
+ """
663
+ Returns a loss dict with labelled training loss items tensor.
664
+
665
+ Note:
666
+ This is not needed for classification but necessary for segmentation & detection
667
+ """
668
+ return {"loss": loss_items} if loss_items is not None else ["loss"]
669
+
670
+ def set_model_attributes(self):
671
+ """Set or update model parameters before training."""
672
+ self.model.names = self.data["names"]
673
+
674
+ def build_targets(self, preds, targets):
675
+ """Builds target tensors for training YOLO model."""
676
+ pass
677
+
678
+ def progress_string(self):
679
+ """Returns a string describing training progress."""
680
+ return ""
681
+
682
+ # TODO: may need to put these following functions into callback
683
+ def plot_training_samples(self, batch, ni):
684
+ """Plots training samples during YOLO training."""
685
+ pass
686
+
687
+ def plot_training_labels(self):
688
+ """Plots training labels for YOLO model."""
689
+ pass
690
+
691
+ def save_metrics(self, metrics):
692
+ """Save training metrics to a CSV file."""
693
+ keys, vals = list(metrics.keys()), list(metrics.values())
694
+ n = len(metrics) + 2 # number of cols
695
+ s = "" if self.csv.exists() else (("%s," * n % tuple(["epoch", "time"] + keys)).rstrip(",") + "\n") # header
696
+ t = time.time() - self.train_time_start
697
+ with open(self.csv, "a", encoding="utf-8") as f:
698
+ f.write(s + ("%.6g," * n % tuple([self.epoch + 1, t] + vals)).rstrip(",") + "\n")
699
+
700
+ def plot_metrics(self):
701
+ """Plot and display metrics visually."""
702
+ pass
703
+
704
+ def on_plot(self, name, data=None):
705
+ """Registers plots (e.g. to be consumed in callbacks)."""
706
+ path = Path(name)
707
+ self.plots[path] = {"data": data, "timestamp": time.time()}
708
+
709
+ def final_eval(self):
710
+ """Perform final evaluation and validation for object detection YOLO model."""
711
+ ckpt = {}
712
+ for f in self.last, self.best:
713
+ if f.exists():
714
+ if f is self.last:
715
+ ckpt = strip_optimizer(f)
716
+ elif f is self.best:
717
+ k = "train_results" # update best.pt train_metrics from last.pt
718
+ strip_optimizer(f, updates={k: ckpt[k]} if k in ckpt else None)
719
+ LOGGER.info(f"\nValidating {f}...")
720
+ self.validator.args.plots = self.args.plots
721
+ self.metrics = self.validator(model=f)
722
+ self.metrics.pop("fitness", None)
723
+ self.run_callbacks("on_fit_epoch_end")
724
+
725
+ def check_resume(self, overrides):
726
+ """Check if resume checkpoint exists and update arguments accordingly."""
727
+ resume = self.args.resume
728
+ if resume:
729
+ try:
730
+ exists = isinstance(resume, (str, Path)) and Path(resume).exists()
731
+ last = Path(check_file(resume) if exists else get_latest_run())
732
+
733
+ # Check that resume data YAML exists, otherwise strip to force re-download of dataset
734
+ ckpt_args = attempt_load_weights(last).args
735
+ if not isinstance(ckpt_args["data"], dict) and not Path(ckpt_args["data"]).exists():
736
+ ckpt_args["data"] = self.args.data
737
+
738
+ resume = True
739
+ self.args = get_cfg(ckpt_args)
740
+ self.args.model = self.args.resume = str(last) # reinstate model
741
+ for k in (
742
+ "imgsz",
743
+ "batch",
744
+ "device",
745
+ "close_mosaic",
746
+ ): # allow arg updates to reduce memory or update device on resume
747
+ if k in overrides:
748
+ setattr(self.args, k, overrides[k])
749
+
750
+ except Exception as e:
751
+ raise FileNotFoundError(
752
+ "Resume checkpoint not found. Please pass a valid checkpoint to resume from, "
753
+ "i.e. 'yolo train resume model=path/to/last.pt'"
754
+ ) from e
755
+ self.resume = resume
756
+
757
+ def resume_training(self, ckpt):
758
+ """Resume YOLO training from given epoch and best fitness."""
759
+ if ckpt is None or not self.resume:
760
+ return
761
+ best_fitness = 0.0
762
+ start_epoch = ckpt.get("epoch", -1) + 1
763
+ if ckpt.get("optimizer", None) is not None:
764
+ self.optimizer.load_state_dict(ckpt["optimizer"]) # optimizer
765
+ best_fitness = ckpt["best_fitness"]
766
+ if self.ema and ckpt.get("ema"):
767
+ self.ema.ema.load_state_dict(ckpt["ema"].float().state_dict()) # EMA
768
+ self.ema.updates = ckpt["updates"]
769
+ assert start_epoch > 0, (
770
+ f"{self.args.model} training to {self.epochs} epochs is finished, nothing to resume.\n"
771
+ f"Start a new training without resuming, i.e. 'yolo train model={self.args.model}'"
772
+ )
773
+ LOGGER.info(f"Resuming training {self.args.model} from epoch {start_epoch + 1} to {self.epochs} total epochs")
774
+ if self.epochs < start_epoch:
775
+ LOGGER.info(
776
+ f"{self.model} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {self.epochs} more epochs."
777
+ )
778
+ self.epochs += ckpt["epoch"] # finetune additional epochs
779
+ self.best_fitness = best_fitness
780
+ self.start_epoch = start_epoch
781
+ if start_epoch > (self.epochs - self.args.close_mosaic):
782
+ self._close_dataloader_mosaic()
783
+
784
+ def _close_dataloader_mosaic(self):
785
+ """Update dataloaders to stop using mosaic augmentation."""
786
+ if hasattr(self.train_loader.dataset, "mosaic"):
787
+ self.train_loader.dataset.mosaic = False
788
+ if hasattr(self.train_loader.dataset, "close_mosaic"):
789
+ LOGGER.info("Closing dataloader mosaic")
790
+ self.train_loader.dataset.close_mosaic(hyp=copy(self.args))
791
+
792
+ def build_optimizer(self, model, name="auto", lr=0.001, momentum=0.9, decay=1e-5, iterations=1e5):
793
+ """
794
+ Construct an optimizer for the given model.
795
+
796
+ Args:
797
+ model (torch.nn.Module): The model for which to build an optimizer.
798
+ name (str, optional): The name of the optimizer to use. If 'auto', the optimizer is selected
799
+ based on the number of iterations. Default: 'auto'.
800
+ lr (float, optional): The learning rate for the optimizer. Default: 0.001.
801
+ momentum (float, optional): The momentum factor for the optimizer. Default: 0.9.
802
+ decay (float, optional): The weight decay for the optimizer. Default: 1e-5.
803
+ iterations (float, optional): The number of iterations, which determines the optimizer if
804
+ name is 'auto'. Default: 1e5.
805
+
806
+ Returns:
807
+ (torch.optim.Optimizer): The constructed optimizer.
808
+ """
809
+ g = [], [], [] # optimizer parameter groups
810
+ bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d()
811
+ if name == "auto":
812
+ LOGGER.info(
813
+ f"{colorstr('optimizer:')} 'optimizer=auto' found, "
814
+ f"ignoring 'lr0={self.args.lr0}' and 'momentum={self.args.momentum}' and "
815
+ f"determining best 'optimizer', 'lr0' and 'momentum' automatically... "
816
+ )
817
+ nc = self.data.get("nc", 10) # number of classes
818
+ lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places
819
+ name, lr, momentum = ("SGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
820
+ self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam
821
+
822
+ for module_name, module in model.named_modules():
823
+ for param_name, param in module.named_parameters(recurse=False):
824
+ fullname = f"{module_name}.{param_name}" if module_name else param_name
825
+ if "bias" in fullname: # bias (no decay)
826
+ g[2].append(param)
827
+ elif isinstance(module, bn) or "logit_scale" in fullname: # weight (no decay)
828
+ # ContrastiveHead and BNContrastiveHead included here with 'logit_scale'
829
+ g[1].append(param)
830
+ else: # weight (with decay)
831
+ g[0].append(param)
832
+
833
+ optimizers = {"Adam", "Adamax", "AdamW", "NAdam", "RAdam", "RMSProp", "SGD", "auto"}
834
+ name = {x.lower(): x for x in optimizers}.get(name.lower())
835
+ if name in {"Adam", "Adamax", "AdamW", "NAdam", "RAdam"}:
836
+ optimizer = getattr(optim, name, optim.Adam)(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
837
+ elif name == "RMSProp":
838
+ optimizer = optim.RMSprop(g[2], lr=lr, momentum=momentum)
839
+ elif name == "SGD":
840
+ optimizer = optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
841
+ else:
842
+ raise NotImplementedError(
843
+ f"Optimizer '{name}' not found in list of available optimizers {optimizers}. "
844
+ "Request support for addition optimizers at https://github.com/ultralytics/ultralytics."
845
+ )
846
+
847
+ optimizer.add_param_group({"params": g[0], "weight_decay": decay}) # add g0 with weight_decay
848
+ optimizer.add_param_group({"params": g[1], "weight_decay": 0.0}) # add g1 (BatchNorm2d weights)
849
+ LOGGER.info(
850
+ f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
851
+ f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias(decay=0.0)"
852
+ )
853
+ return optimizer