megadetector 5.0.5__py3-none-any.whl → 5.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +302 -263
- api/batch_processing/data_preparation/manage_video_batch.py +81 -2
- api/batch_processing/postprocessing/add_max_conf.py +1 -0
- api/batch_processing/postprocessing/categorize_detections_by_size.py +50 -19
- api/batch_processing/postprocessing/compare_batch_results.py +110 -60
- api/batch_processing/postprocessing/load_api_results.py +56 -70
- api/batch_processing/postprocessing/md_to_coco.py +1 -1
- api/batch_processing/postprocessing/md_to_labelme.py +2 -1
- api/batch_processing/postprocessing/postprocess_batch_results.py +240 -81
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +227 -75
- api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
- api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
- api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +2 -2
- classification/prepare_classification_script.py +191 -191
- data_management/coco_to_yolo.py +68 -45
- data_management/databases/integrity_check_json_db.py +7 -5
- data_management/generate_crops_from_cct.py +3 -3
- data_management/get_image_sizes.py +8 -6
- data_management/importers/add_timestamps_to_icct.py +79 -0
- data_management/importers/animl_results_to_md_results.py +160 -0
- data_management/importers/auckland_doc_test_to_json.py +4 -4
- data_management/importers/auckland_doc_to_json.py +1 -1
- data_management/importers/awc_to_json.py +5 -5
- data_management/importers/bellevue_to_json.py +5 -5
- data_management/importers/carrizo_shrubfree_2018.py +5 -5
- data_management/importers/carrizo_trail_cam_2017.py +5 -5
- data_management/importers/cct_field_adjustments.py +2 -3
- data_management/importers/channel_islands_to_cct.py +4 -4
- data_management/importers/ena24_to_json.py +5 -5
- data_management/importers/helena_to_cct.py +10 -10
- data_management/importers/idaho-camera-traps.py +12 -12
- data_management/importers/idfg_iwildcam_lila_prep.py +8 -8
- data_management/importers/jb_csv_to_json.py +4 -4
- data_management/importers/missouri_to_json.py +1 -1
- data_management/importers/noaa_seals_2019.py +1 -1
- data_management/importers/pc_to_json.py +5 -5
- data_management/importers/prepare-noaa-fish-data-for-lila.py +4 -4
- data_management/importers/prepare_zsl_imerit.py +5 -5
- data_management/importers/rspb_to_json.py +4 -4
- data_management/importers/save_the_elephants_survey_A.py +5 -5
- data_management/importers/save_the_elephants_survey_B.py +6 -6
- data_management/importers/snapshot_safari_importer.py +9 -9
- data_management/importers/snapshot_serengeti_lila.py +9 -9
- data_management/importers/timelapse_csv_set_to_json.py +5 -7
- data_management/importers/ubc_to_json.py +4 -4
- data_management/importers/umn_to_json.py +4 -4
- data_management/importers/wellington_to_json.py +1 -1
- data_management/importers/wi_to_json.py +2 -2
- data_management/importers/zamba_results_to_md_results.py +181 -0
- data_management/labelme_to_coco.py +35 -7
- data_management/labelme_to_yolo.py +229 -0
- data_management/lila/add_locations_to_island_camera_traps.py +1 -1
- data_management/lila/add_locations_to_nacti.py +147 -0
- data_management/lila/create_lila_blank_set.py +474 -0
- data_management/lila/create_lila_test_set.py +2 -1
- data_management/lila/create_links_to_md_results_files.py +106 -0
- data_management/lila/download_lila_subset.py +46 -21
- data_management/lila/generate_lila_per_image_labels.py +23 -14
- data_management/lila/get_lila_annotation_counts.py +17 -11
- data_management/lila/lila_common.py +14 -11
- data_management/lila/test_lila_metadata_urls.py +116 -0
- data_management/ocr_tools.py +829 -0
- data_management/resize_coco_dataset.py +13 -11
- data_management/yolo_output_to_md_output.py +84 -12
- data_management/yolo_to_coco.py +38 -20
- detection/process_video.py +36 -14
- detection/pytorch_detector.py +23 -8
- detection/run_detector.py +76 -19
- detection/run_detector_batch.py +178 -63
- detection/run_inference_with_yolov5_val.py +326 -57
- detection/run_tiled_inference.py +153 -43
- detection/video_utils.py +34 -8
- md_utils/ct_utils.py +172 -1
- md_utils/md_tests.py +372 -51
- md_utils/path_utils.py +167 -39
- md_utils/process_utils.py +26 -7
- md_utils/split_locations_into_train_val.py +215 -0
- md_utils/string_utils.py +10 -0
- md_utils/url_utils.py +0 -2
- md_utils/write_html_image_list.py +9 -26
- md_visualization/plot_utils.py +12 -8
- md_visualization/visualization_utils.py +106 -7
- md_visualization/visualize_db.py +16 -8
- md_visualization/visualize_detector_output.py +208 -97
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/METADATA +3 -6
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/RECORD +98 -121
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
- taxonomy_mapping/map_new_lila_datasets.py +43 -39
- taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
- taxonomy_mapping/preview_lila_taxonomy.py +27 -27
- taxonomy_mapping/species_lookup.py +33 -13
- taxonomy_mapping/taxonomy_csv_checker.py +7 -5
- api/synchronous/api_core/yolov5/detect.py +0 -252
- api/synchronous/api_core/yolov5/export.py +0 -607
- api/synchronous/api_core/yolov5/hubconf.py +0 -146
- api/synchronous/api_core/yolov5/models/__init__.py +0 -0
- api/synchronous/api_core/yolov5/models/common.py +0 -738
- api/synchronous/api_core/yolov5/models/experimental.py +0 -104
- api/synchronous/api_core/yolov5/models/tf.py +0 -574
- api/synchronous/api_core/yolov5/models/yolo.py +0 -338
- api/synchronous/api_core/yolov5/train.py +0 -670
- api/synchronous/api_core/yolov5/utils/__init__.py +0 -36
- api/synchronous/api_core/yolov5/utils/activations.py +0 -103
- api/synchronous/api_core/yolov5/utils/augmentations.py +0 -284
- api/synchronous/api_core/yolov5/utils/autoanchor.py +0 -170
- api/synchronous/api_core/yolov5/utils/autobatch.py +0 -66
- api/synchronous/api_core/yolov5/utils/aws/__init__.py +0 -0
- api/synchronous/api_core/yolov5/utils/aws/resume.py +0 -40
- api/synchronous/api_core/yolov5/utils/benchmarks.py +0 -148
- api/synchronous/api_core/yolov5/utils/callbacks.py +0 -71
- api/synchronous/api_core/yolov5/utils/dataloaders.py +0 -1087
- api/synchronous/api_core/yolov5/utils/downloads.py +0 -178
- api/synchronous/api_core/yolov5/utils/flask_rest_api/example_request.py +0 -19
- api/synchronous/api_core/yolov5/utils/flask_rest_api/restapi.py +0 -46
- api/synchronous/api_core/yolov5/utils/general.py +0 -1018
- api/synchronous/api_core/yolov5/utils/loggers/__init__.py +0 -187
- api/synchronous/api_core/yolov5/utils/loggers/wandb/__init__.py +0 -0
- api/synchronous/api_core/yolov5/utils/loggers/wandb/log_dataset.py +0 -27
- api/synchronous/api_core/yolov5/utils/loggers/wandb/sweep.py +0 -41
- api/synchronous/api_core/yolov5/utils/loggers/wandb/wandb_utils.py +0 -577
- api/synchronous/api_core/yolov5/utils/loss.py +0 -234
- api/synchronous/api_core/yolov5/utils/metrics.py +0 -355
- api/synchronous/api_core/yolov5/utils/plots.py +0 -489
- api/synchronous/api_core/yolov5/utils/torch_utils.py +0 -314
- api/synchronous/api_core/yolov5/val.py +0 -394
- md_utils/matlab_porting_tools.py +0 -97
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0
|
@@ -1,670 +0,0 @@
|
|
|
1
|
-
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
|
2
|
-
"""
|
|
3
|
-
Train a YOLOv5 model on a custom dataset.
|
|
4
|
-
|
|
5
|
-
Models and datasets download automatically from the latest YOLOv5 release.
|
|
6
|
-
Models: https://github.com/ultralytics/yolov5/tree/master/models
|
|
7
|
-
Datasets: https://github.com/ultralytics/yolov5/tree/master/data
|
|
8
|
-
Tutorial: https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data
|
|
9
|
-
|
|
10
|
-
Usage:
|
|
11
|
-
$ python path/to/train.py --data coco128.yaml --weights yolov5s.pt --img 640 # from pretrained (RECOMMENDED)
|
|
12
|
-
$ python path/to/train.py --data coco128.yaml --weights '' --cfg yolov5s.yaml --img 640 # from scratch
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
import argparse
|
|
16
|
-
import math
|
|
17
|
-
import os
|
|
18
|
-
import random
|
|
19
|
-
import sys
|
|
20
|
-
import time
|
|
21
|
-
from copy import deepcopy
|
|
22
|
-
from datetime import datetime
|
|
23
|
-
from pathlib import Path
|
|
24
|
-
|
|
25
|
-
import numpy as np
|
|
26
|
-
import torch
|
|
27
|
-
import torch.distributed as dist
|
|
28
|
-
import torch.nn as nn
|
|
29
|
-
import yaml
|
|
30
|
-
from torch.nn.parallel import DistributedDataParallel as DDP
|
|
31
|
-
from torch.optim import SGD, Adam, AdamW, lr_scheduler
|
|
32
|
-
from tqdm import tqdm
|
|
33
|
-
|
|
34
|
-
FILE = Path(__file__).resolve()
|
|
35
|
-
ROOT = FILE.parents[0] # YOLOv5 root directory
|
|
36
|
-
if str(ROOT) not in sys.path:
|
|
37
|
-
sys.path.append(str(ROOT)) # add ROOT to PATH
|
|
38
|
-
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
|
|
39
|
-
|
|
40
|
-
import val # for end-of-epoch mAP
|
|
41
|
-
from models.experimental import attempt_load
|
|
42
|
-
from models.yolo import Model
|
|
43
|
-
from utils.autoanchor import check_anchors
|
|
44
|
-
from utils.autobatch import check_train_batch_size
|
|
45
|
-
from utils.callbacks import Callbacks
|
|
46
|
-
from utils.dataloaders import create_dataloader
|
|
47
|
-
from utils.downloads import attempt_download
|
|
48
|
-
from utils.general import (LOGGER, check_amp, check_dataset, check_file, check_git_status, check_img_size,
|
|
49
|
-
check_requirements, check_suffix, check_version, check_yaml, colorstr, get_latest_run,
|
|
50
|
-
increment_path, init_seeds, intersect_dicts, labels_to_class_weights,
|
|
51
|
-
labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer)
|
|
52
|
-
from utils.loggers import Loggers
|
|
53
|
-
from utils.loggers.wandb.wandb_utils import check_wandb_resume
|
|
54
|
-
from utils.loss import ComputeLoss
|
|
55
|
-
from utils.metrics import fitness
|
|
56
|
-
from utils.plots import plot_evolve, plot_labels
|
|
57
|
-
from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
|
|
58
|
-
|
|
59
|
-
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
|
|
60
|
-
RANK = int(os.getenv('RANK', -1))
|
|
61
|
-
WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary
|
|
65
|
-
save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \
|
|
66
|
-
Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \
|
|
67
|
-
opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze
|
|
68
|
-
callbacks.run('on_pretrain_routine_start')
|
|
69
|
-
|
|
70
|
-
# Directories
|
|
71
|
-
w = save_dir / 'weights' # weights dir
|
|
72
|
-
(w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir
|
|
73
|
-
last, best = w / 'last.pt', w / 'best.pt'
|
|
74
|
-
|
|
75
|
-
# Hyperparameters
|
|
76
|
-
if isinstance(hyp, str):
|
|
77
|
-
with open(hyp, errors='ignore') as f:
|
|
78
|
-
hyp = yaml.safe_load(f) # load hyps dict
|
|
79
|
-
LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
|
|
80
|
-
|
|
81
|
-
# Save run settings
|
|
82
|
-
if not evolve:
|
|
83
|
-
with open(save_dir / 'hyp.yaml', 'w') as f:
|
|
84
|
-
yaml.safe_dump(hyp, f, sort_keys=False)
|
|
85
|
-
with open(save_dir / 'opt.yaml', 'w') as f:
|
|
86
|
-
yaml.safe_dump(vars(opt), f, sort_keys=False)
|
|
87
|
-
|
|
88
|
-
# Loggers
|
|
89
|
-
data_dict = None
|
|
90
|
-
if RANK in {-1, 0}:
|
|
91
|
-
loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance
|
|
92
|
-
if loggers.wandb:
|
|
93
|
-
data_dict = loggers.wandb.data_dict
|
|
94
|
-
if resume:
|
|
95
|
-
weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size
|
|
96
|
-
|
|
97
|
-
# Register actions
|
|
98
|
-
for k in methods(loggers):
|
|
99
|
-
callbacks.register_action(k, callback=getattr(loggers, k))
|
|
100
|
-
|
|
101
|
-
# Config
|
|
102
|
-
plots = not evolve and not opt.noplots # create plots
|
|
103
|
-
cuda = device.type != 'cpu'
|
|
104
|
-
init_seeds(1 + RANK)
|
|
105
|
-
with torch_distributed_zero_first(LOCAL_RANK):
|
|
106
|
-
data_dict = data_dict or check_dataset(data) # check if None
|
|
107
|
-
train_path, val_path = data_dict['train'], data_dict['val']
|
|
108
|
-
nc = 1 if single_cls else int(data_dict['nc']) # number of classes
|
|
109
|
-
names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names
|
|
110
|
-
assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check
|
|
111
|
-
is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset
|
|
112
|
-
|
|
113
|
-
# Model
|
|
114
|
-
check_suffix(weights, '.pt') # check weights
|
|
115
|
-
pretrained = weights.endswith('.pt')
|
|
116
|
-
if pretrained:
|
|
117
|
-
with torch_distributed_zero_first(LOCAL_RANK):
|
|
118
|
-
weights = attempt_download(weights) # download if not found locally
|
|
119
|
-
ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak
|
|
120
|
-
model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
|
|
121
|
-
exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys
|
|
122
|
-
csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
|
|
123
|
-
csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect
|
|
124
|
-
model.load_state_dict(csd, strict=False) # load
|
|
125
|
-
LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report
|
|
126
|
-
else:
|
|
127
|
-
model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create
|
|
128
|
-
amp = check_amp(model) # check AMP
|
|
129
|
-
|
|
130
|
-
# Freeze
|
|
131
|
-
freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze
|
|
132
|
-
for k, v in model.named_parameters():
|
|
133
|
-
v.requires_grad = True # train all layers
|
|
134
|
-
if any(x in k for x in freeze):
|
|
135
|
-
LOGGER.info(f'freezing {k}')
|
|
136
|
-
v.requires_grad = False
|
|
137
|
-
|
|
138
|
-
# Image size
|
|
139
|
-
gs = max(int(model.stride.max()), 32) # grid size (max stride)
|
|
140
|
-
imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple
|
|
141
|
-
|
|
142
|
-
# Batch size
|
|
143
|
-
if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size
|
|
144
|
-
batch_size = check_train_batch_size(model, imgsz, amp)
|
|
145
|
-
loggers.on_params_update({"batch_size": batch_size})
|
|
146
|
-
|
|
147
|
-
# Optimizer
|
|
148
|
-
nbs = 64 # nominal batch size
|
|
149
|
-
accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
|
|
150
|
-
hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay
|
|
151
|
-
LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}")
|
|
152
|
-
|
|
153
|
-
g = [], [], [] # optimizer parameter groups
|
|
154
|
-
bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d()
|
|
155
|
-
for v in model.modules():
|
|
156
|
-
if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias
|
|
157
|
-
g[2].append(v.bias)
|
|
158
|
-
if isinstance(v, bn): # weight (no decay)
|
|
159
|
-
g[1].append(v.weight)
|
|
160
|
-
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay)
|
|
161
|
-
g[0].append(v.weight)
|
|
162
|
-
|
|
163
|
-
if opt.optimizer == 'Adam':
|
|
164
|
-
optimizer = Adam(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
|
|
165
|
-
elif opt.optimizer == 'AdamW':
|
|
166
|
-
optimizer = AdamW(g[2], lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
|
|
167
|
-
else:
|
|
168
|
-
optimizer = SGD(g[2], lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
|
|
169
|
-
|
|
170
|
-
optimizer.add_param_group({'params': g[0], 'weight_decay': hyp['weight_decay']}) # add g0 with weight_decay
|
|
171
|
-
optimizer.add_param_group({'params': g[1]}) # add g1 (BatchNorm2d weights)
|
|
172
|
-
LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
|
|
173
|
-
f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias")
|
|
174
|
-
del g
|
|
175
|
-
|
|
176
|
-
# Scheduler
|
|
177
|
-
if opt.cos_lr:
|
|
178
|
-
lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf']
|
|
179
|
-
else:
|
|
180
|
-
lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear
|
|
181
|
-
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs)
|
|
182
|
-
|
|
183
|
-
# EMA
|
|
184
|
-
ema = ModelEMA(model) if RANK in {-1, 0} else None
|
|
185
|
-
|
|
186
|
-
# Resume
|
|
187
|
-
start_epoch, best_fitness = 0, 0.0
|
|
188
|
-
if pretrained:
|
|
189
|
-
# Optimizer
|
|
190
|
-
if ckpt['optimizer'] is not None:
|
|
191
|
-
optimizer.load_state_dict(ckpt['optimizer'])
|
|
192
|
-
best_fitness = ckpt['best_fitness']
|
|
193
|
-
|
|
194
|
-
# EMA
|
|
195
|
-
if ema and ckpt.get('ema'):
|
|
196
|
-
ema.ema.load_state_dict(ckpt['ema'].float().state_dict())
|
|
197
|
-
ema.updates = ckpt['updates']
|
|
198
|
-
|
|
199
|
-
# Epochs
|
|
200
|
-
start_epoch = ckpt['epoch'] + 1
|
|
201
|
-
if resume:
|
|
202
|
-
assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.'
|
|
203
|
-
if epochs < start_epoch:
|
|
204
|
-
LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.")
|
|
205
|
-
epochs += ckpt['epoch'] # finetune additional epochs
|
|
206
|
-
|
|
207
|
-
del ckpt, csd
|
|
208
|
-
|
|
209
|
-
# DP mode
|
|
210
|
-
if cuda and RANK == -1 and torch.cuda.device_count() > 1:
|
|
211
|
-
LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n'
|
|
212
|
-
'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
|
|
213
|
-
model = torch.nn.DataParallel(model)
|
|
214
|
-
|
|
215
|
-
# SyncBatchNorm
|
|
216
|
-
if opt.sync_bn and cuda and RANK != -1:
|
|
217
|
-
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
|
|
218
|
-
LOGGER.info('Using SyncBatchNorm()')
|
|
219
|
-
|
|
220
|
-
# Trainloader
|
|
221
|
-
train_loader, dataset = create_dataloader(train_path,
|
|
222
|
-
imgsz,
|
|
223
|
-
batch_size // WORLD_SIZE,
|
|
224
|
-
gs,
|
|
225
|
-
single_cls,
|
|
226
|
-
hyp=hyp,
|
|
227
|
-
augment=True,
|
|
228
|
-
cache=None if opt.cache == 'val' else opt.cache,
|
|
229
|
-
rect=opt.rect,
|
|
230
|
-
rank=LOCAL_RANK,
|
|
231
|
-
workers=workers,
|
|
232
|
-
image_weights=opt.image_weights,
|
|
233
|
-
quad=opt.quad,
|
|
234
|
-
prefix=colorstr('train: '),
|
|
235
|
-
shuffle=True)
|
|
236
|
-
mlc = int(np.concatenate(dataset.labels, 0)[:, 0].max()) # max label class
|
|
237
|
-
nb = len(train_loader) # number of batches
|
|
238
|
-
assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}'
|
|
239
|
-
|
|
240
|
-
# Process 0
|
|
241
|
-
if RANK in {-1, 0}:
|
|
242
|
-
val_loader = create_dataloader(val_path,
|
|
243
|
-
imgsz,
|
|
244
|
-
batch_size // WORLD_SIZE * 2,
|
|
245
|
-
gs,
|
|
246
|
-
single_cls,
|
|
247
|
-
hyp=hyp,
|
|
248
|
-
cache=None if noval else opt.cache,
|
|
249
|
-
rect=True,
|
|
250
|
-
rank=-1,
|
|
251
|
-
workers=workers * 2,
|
|
252
|
-
pad=0.5,
|
|
253
|
-
prefix=colorstr('val: '))[0]
|
|
254
|
-
|
|
255
|
-
if not resume:
|
|
256
|
-
labels = np.concatenate(dataset.labels, 0)
|
|
257
|
-
# c = torch.tensor(labels[:, 0]) # classes
|
|
258
|
-
# cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency
|
|
259
|
-
# model._initialize_biases(cf.to(device))
|
|
260
|
-
if plots:
|
|
261
|
-
plot_labels(labels, names, save_dir)
|
|
262
|
-
|
|
263
|
-
# Anchors
|
|
264
|
-
if not opt.noautoanchor:
|
|
265
|
-
check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
|
|
266
|
-
model.half().float() # pre-reduce anchor precision
|
|
267
|
-
|
|
268
|
-
callbacks.run('on_pretrain_routine_end')
|
|
269
|
-
|
|
270
|
-
# DDP mode
|
|
271
|
-
if cuda and RANK != -1:
|
|
272
|
-
if check_version(torch.__version__, '1.11.0'):
|
|
273
|
-
model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True)
|
|
274
|
-
else:
|
|
275
|
-
model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
|
|
276
|
-
|
|
277
|
-
# Model attributes
|
|
278
|
-
nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps)
|
|
279
|
-
hyp['box'] *= 3 / nl # scale to layers
|
|
280
|
-
hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers
|
|
281
|
-
hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
|
|
282
|
-
hyp['label_smoothing'] = opt.label_smoothing
|
|
283
|
-
model.nc = nc # attach number of classes to model
|
|
284
|
-
model.hyp = hyp # attach hyperparameters to model
|
|
285
|
-
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights
|
|
286
|
-
model.names = names
|
|
287
|
-
|
|
288
|
-
# Start training
|
|
289
|
-
t0 = time.time()
|
|
290
|
-
nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations)
|
|
291
|
-
# nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
|
|
292
|
-
last_opt_step = -1
|
|
293
|
-
maps = np.zeros(nc) # mAP per class
|
|
294
|
-
results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
|
|
295
|
-
scheduler.last_epoch = start_epoch - 1 # do not move
|
|
296
|
-
scaler = torch.cuda.amp.GradScaler(enabled=amp)
|
|
297
|
-
stopper = EarlyStopping(patience=opt.patience)
|
|
298
|
-
compute_loss = ComputeLoss(model) # init loss class
|
|
299
|
-
callbacks.run('on_train_start')
|
|
300
|
-
LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n'
|
|
301
|
-
f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n'
|
|
302
|
-
f"Logging results to {colorstr('bold', save_dir)}\n"
|
|
303
|
-
f'Starting training for {epochs} epochs...')
|
|
304
|
-
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
|
|
305
|
-
callbacks.run('on_train_epoch_start')
|
|
306
|
-
model.train()
|
|
307
|
-
|
|
308
|
-
# Update image weights (optional, single-GPU only)
|
|
309
|
-
if opt.image_weights:
|
|
310
|
-
cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights
|
|
311
|
-
iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights
|
|
312
|
-
dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx
|
|
313
|
-
|
|
314
|
-
# Update mosaic border (optional)
|
|
315
|
-
# b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
|
|
316
|
-
# dataset.mosaic_border = [b - imgsz, -b] # height, width borders
|
|
317
|
-
|
|
318
|
-
mloss = torch.zeros(3, device=device) # mean losses
|
|
319
|
-
if RANK != -1:
|
|
320
|
-
train_loader.sampler.set_epoch(epoch)
|
|
321
|
-
pbar = enumerate(train_loader)
|
|
322
|
-
LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size'))
|
|
323
|
-
if RANK in {-1, 0}:
|
|
324
|
-
pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar
|
|
325
|
-
optimizer.zero_grad()
|
|
326
|
-
for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
|
|
327
|
-
callbacks.run('on_train_batch_start')
|
|
328
|
-
ni = i + nb * epoch # number integrated batches (since train start)
|
|
329
|
-
imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0
|
|
330
|
-
|
|
331
|
-
# Warmup
|
|
332
|
-
if ni <= nw:
|
|
333
|
-
xi = [0, nw] # x interp
|
|
334
|
-
# compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou)
|
|
335
|
-
accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
|
|
336
|
-
for j, x in enumerate(optimizer.param_groups):
|
|
337
|
-
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
|
|
338
|
-
x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
|
|
339
|
-
if 'momentum' in x:
|
|
340
|
-
x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
|
|
341
|
-
|
|
342
|
-
# Multi-scale
|
|
343
|
-
if opt.multi_scale:
|
|
344
|
-
sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
|
|
345
|
-
sf = sz / max(imgs.shape[2:]) # scale factor
|
|
346
|
-
if sf != 1:
|
|
347
|
-
ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)
|
|
348
|
-
imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
|
|
349
|
-
|
|
350
|
-
# Forward
|
|
351
|
-
with torch.cuda.amp.autocast(amp):
|
|
352
|
-
pred = model(imgs) # forward
|
|
353
|
-
loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size
|
|
354
|
-
if RANK != -1:
|
|
355
|
-
loss *= WORLD_SIZE # gradient averaged between devices in DDP mode
|
|
356
|
-
if opt.quad:
|
|
357
|
-
loss *= 4.
|
|
358
|
-
|
|
359
|
-
# Backward
|
|
360
|
-
scaler.scale(loss).backward()
|
|
361
|
-
|
|
362
|
-
# Optimize
|
|
363
|
-
if ni - last_opt_step >= accumulate:
|
|
364
|
-
scaler.step(optimizer) # optimizer.step
|
|
365
|
-
scaler.update()
|
|
366
|
-
optimizer.zero_grad()
|
|
367
|
-
if ema:
|
|
368
|
-
ema.update(model)
|
|
369
|
-
last_opt_step = ni
|
|
370
|
-
|
|
371
|
-
# Log
|
|
372
|
-
if RANK in {-1, 0}:
|
|
373
|
-
mloss = (mloss * i + loss_items) / (i + 1) # update mean losses
|
|
374
|
-
mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB)
|
|
375
|
-
pbar.set_description(('%10s' * 2 + '%10.4g' * 5) %
|
|
376
|
-
(f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]))
|
|
377
|
-
callbacks.run('on_train_batch_end', ni, model, imgs, targets, paths, plots)
|
|
378
|
-
if callbacks.stop_training:
|
|
379
|
-
return
|
|
380
|
-
# end batch ------------------------------------------------------------------------------------------------
|
|
381
|
-
|
|
382
|
-
# Scheduler
|
|
383
|
-
lr = [x['lr'] for x in optimizer.param_groups] # for loggers
|
|
384
|
-
scheduler.step()
|
|
385
|
-
|
|
386
|
-
if RANK in {-1, 0}:
|
|
387
|
-
# mAP
|
|
388
|
-
callbacks.run('on_train_epoch_end', epoch=epoch)
|
|
389
|
-
ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
|
|
390
|
-
final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
|
|
391
|
-
if not noval or final_epoch: # Calculate mAP
|
|
392
|
-
results, maps, _ = val.run(data_dict,
|
|
393
|
-
batch_size=batch_size // WORLD_SIZE * 2,
|
|
394
|
-
imgsz=imgsz,
|
|
395
|
-
model=ema.ema,
|
|
396
|
-
single_cls=single_cls,
|
|
397
|
-
dataloader=val_loader,
|
|
398
|
-
save_dir=save_dir,
|
|
399
|
-
plots=False,
|
|
400
|
-
callbacks=callbacks,
|
|
401
|
-
compute_loss=compute_loss)
|
|
402
|
-
|
|
403
|
-
# Update best mAP
|
|
404
|
-
fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
|
|
405
|
-
if fi > best_fitness:
|
|
406
|
-
best_fitness = fi
|
|
407
|
-
log_vals = list(mloss) + list(results) + lr
|
|
408
|
-
callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
|
|
409
|
-
|
|
410
|
-
# Save model
|
|
411
|
-
if (not nosave) or (final_epoch and not evolve): # if save
|
|
412
|
-
ckpt = {
|
|
413
|
-
'epoch': epoch,
|
|
414
|
-
'best_fitness': best_fitness,
|
|
415
|
-
'model': deepcopy(de_parallel(model)).half(),
|
|
416
|
-
'ema': deepcopy(ema.ema).half(),
|
|
417
|
-
'updates': ema.updates,
|
|
418
|
-
'optimizer': optimizer.state_dict(),
|
|
419
|
-
'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
|
|
420
|
-
'date': datetime.now().isoformat()}
|
|
421
|
-
|
|
422
|
-
# Save last, best and delete
|
|
423
|
-
torch.save(ckpt, last)
|
|
424
|
-
if best_fitness == fi:
|
|
425
|
-
torch.save(ckpt, best)
|
|
426
|
-
if opt.save_period > 0 and epoch % opt.save_period == 0:
|
|
427
|
-
torch.save(ckpt, w / f'epoch{epoch}.pt')
|
|
428
|
-
del ckpt
|
|
429
|
-
callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
|
|
430
|
-
|
|
431
|
-
# Stop Single-GPU
|
|
432
|
-
if RANK == -1 and stopper(epoch=epoch, fitness=fi):
|
|
433
|
-
break
|
|
434
|
-
|
|
435
|
-
# Stop DDP TODO: known issues shttps://github.com/ultralytics/yolov5/pull/4576
|
|
436
|
-
# stop = stopper(epoch=epoch, fitness=fi)
|
|
437
|
-
# if RANK == 0:
|
|
438
|
-
# dist.broadcast_object_list([stop], 0) # broadcast 'stop' to all ranks
|
|
439
|
-
|
|
440
|
-
# Stop DPP
|
|
441
|
-
# with torch_distributed_zero_first(RANK):
|
|
442
|
-
# if stop:
|
|
443
|
-
# break # must break all DDP ranks
|
|
444
|
-
|
|
445
|
-
# end epoch ----------------------------------------------------------------------------------------------------
|
|
446
|
-
# end training -----------------------------------------------------------------------------------------------------
|
|
447
|
-
if RANK in {-1, 0}:
|
|
448
|
-
LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.')
|
|
449
|
-
for f in last, best:
|
|
450
|
-
if f.exists():
|
|
451
|
-
strip_optimizer(f) # strip optimizers
|
|
452
|
-
if f is best:
|
|
453
|
-
LOGGER.info(f'\nValidating {f}...')
|
|
454
|
-
results, _, _ = val.run(
|
|
455
|
-
data_dict,
|
|
456
|
-
batch_size=batch_size // WORLD_SIZE * 2,
|
|
457
|
-
imgsz=imgsz,
|
|
458
|
-
model=attempt_load(f, device).half(),
|
|
459
|
-
iou_thres=0.65 if is_coco else 0.60, # best pycocotools results at 0.65
|
|
460
|
-
single_cls=single_cls,
|
|
461
|
-
dataloader=val_loader,
|
|
462
|
-
save_dir=save_dir,
|
|
463
|
-
save_json=is_coco,
|
|
464
|
-
verbose=True,
|
|
465
|
-
plots=plots,
|
|
466
|
-
callbacks=callbacks,
|
|
467
|
-
compute_loss=compute_loss) # val best model with plots
|
|
468
|
-
if is_coco:
|
|
469
|
-
callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
|
|
470
|
-
|
|
471
|
-
callbacks.run('on_train_end', last, best, plots, epoch, results)
|
|
472
|
-
|
|
473
|
-
torch.cuda.empty_cache()
|
|
474
|
-
return results
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
def parse_opt(known=False):
|
|
478
|
-
parser = argparse.ArgumentParser()
|
|
479
|
-
parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path')
|
|
480
|
-
parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
|
|
481
|
-
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
|
|
482
|
-
parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
|
|
483
|
-
parser.add_argument('--epochs', type=int, default=300)
|
|
484
|
-
parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch')
|
|
485
|
-
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
|
|
486
|
-
parser.add_argument('--rect', action='store_true', help='rectangular training')
|
|
487
|
-
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
|
|
488
|
-
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
|
|
489
|
-
parser.add_argument('--noval', action='store_true', help='only validate final epoch')
|
|
490
|
-
parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
|
|
491
|
-
parser.add_argument('--noplots', action='store_true', help='save no plot files')
|
|
492
|
-
parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
|
|
493
|
-
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
|
|
494
|
-
parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
|
|
495
|
-
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
|
|
496
|
-
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
|
497
|
-
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
|
|
498
|
-
parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
|
|
499
|
-
parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
|
|
500
|
-
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
|
|
501
|
-
parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
|
|
502
|
-
parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
|
|
503
|
-
parser.add_argument('--name', default='exp', help='save to project/name')
|
|
504
|
-
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
|
|
505
|
-
parser.add_argument('--quad', action='store_true', help='quad dataloader')
|
|
506
|
-
parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
|
|
507
|
-
parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
|
|
508
|
-
parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
|
|
509
|
-
parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
|
|
510
|
-
parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
|
|
511
|
-
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
|
|
512
|
-
|
|
513
|
-
# Weights & Biases arguments
|
|
514
|
-
parser.add_argument('--entity', default=None, help='W&B: Entity')
|
|
515
|
-
parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
|
|
516
|
-
parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
|
|
517
|
-
parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')
|
|
518
|
-
|
|
519
|
-
opt = parser.parse_known_args()[0] if known else parser.parse_args()
|
|
520
|
-
return opt
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
def main(opt, callbacks=Callbacks()):
|
|
524
|
-
# Checks
|
|
525
|
-
if RANK in {-1, 0}:
|
|
526
|
-
print_args(vars(opt))
|
|
527
|
-
check_git_status()
|
|
528
|
-
check_requirements(exclude=['thop'])
|
|
529
|
-
|
|
530
|
-
# Resume
|
|
531
|
-
if opt.resume and not check_wandb_resume(opt) and not opt.evolve: # resume an interrupted run
|
|
532
|
-
ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path
|
|
533
|
-
assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
|
|
534
|
-
with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f:
|
|
535
|
-
opt = argparse.Namespace(**yaml.safe_load(f)) # replace
|
|
536
|
-
opt.cfg, opt.weights, opt.resume = '', ckpt, True # reinstate
|
|
537
|
-
LOGGER.info(f'Resuming training from {ckpt}')
|
|
538
|
-
else:
|
|
539
|
-
opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \
|
|
540
|
-
check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks
|
|
541
|
-
assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
|
|
542
|
-
if opt.evolve:
|
|
543
|
-
if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve
|
|
544
|
-
opt.project = str(ROOT / 'runs/evolve')
|
|
545
|
-
opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume
|
|
546
|
-
if opt.name == 'cfg':
|
|
547
|
-
opt.name = Path(opt.cfg).stem # use model.yaml as name
|
|
548
|
-
opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
|
|
549
|
-
|
|
550
|
-
# DDP mode
|
|
551
|
-
device = select_device(opt.device, batch_size=opt.batch_size)
|
|
552
|
-
if LOCAL_RANK != -1:
|
|
553
|
-
msg = 'is not compatible with YOLOv5 Multi-GPU DDP training'
|
|
554
|
-
assert not opt.image_weights, f'--image-weights {msg}'
|
|
555
|
-
assert not opt.evolve, f'--evolve {msg}'
|
|
556
|
-
assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size'
|
|
557
|
-
assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
|
|
558
|
-
assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
|
|
559
|
-
torch.cuda.set_device(LOCAL_RANK)
|
|
560
|
-
device = torch.device('cuda', LOCAL_RANK)
|
|
561
|
-
dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
|
|
562
|
-
|
|
563
|
-
# Train
|
|
564
|
-
if not opt.evolve:
|
|
565
|
-
train(opt.hyp, opt, device, callbacks)
|
|
566
|
-
if WORLD_SIZE > 1 and RANK == 0:
|
|
567
|
-
LOGGER.info('Destroying process group... ')
|
|
568
|
-
dist.destroy_process_group()
|
|
569
|
-
|
|
570
|
-
# Evolve hyperparameters (optional)
|
|
571
|
-
else:
|
|
572
|
-
# Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
|
|
573
|
-
meta = {
|
|
574
|
-
'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
|
|
575
|
-
'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
|
|
576
|
-
'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1
|
|
577
|
-
'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
|
|
578
|
-
'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok)
|
|
579
|
-
'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum
|
|
580
|
-
'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr
|
|
581
|
-
'box': (1, 0.02, 0.2), # box loss gain
|
|
582
|
-
'cls': (1, 0.2, 4.0), # cls loss gain
|
|
583
|
-
'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight
|
|
584
|
-
'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels)
|
|
585
|
-
'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
|
|
586
|
-
'iou_t': (0, 0.1, 0.7), # IoU training threshold
|
|
587
|
-
'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
|
|
588
|
-
'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore)
|
|
589
|
-
'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
|
|
590
|
-
'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
|
|
591
|
-
'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
|
|
592
|
-
'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction)
|
|
593
|
-
'degrees': (1, 0.0, 45.0), # image rotation (+/- deg)
|
|
594
|
-
'translate': (1, 0.0, 0.9), # image translation (+/- fraction)
|
|
595
|
-
'scale': (1, 0.0, 0.9), # image scale (+/- gain)
|
|
596
|
-
'shear': (1, 0.0, 10.0), # image shear (+/- deg)
|
|
597
|
-
'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
|
|
598
|
-
'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
|
|
599
|
-
'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
|
|
600
|
-
'mosaic': (1, 0.0, 1.0), # image mixup (probability)
|
|
601
|
-
'mixup': (1, 0.0, 1.0), # image mixup (probability)
|
|
602
|
-
'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability)
|
|
603
|
-
|
|
604
|
-
with open(opt.hyp, errors='ignore') as f:
|
|
605
|
-
hyp = yaml.safe_load(f) # load hyps dict
|
|
606
|
-
if 'anchors' not in hyp: # anchors commented in hyp.yaml
|
|
607
|
-
hyp['anchors'] = 3
|
|
608
|
-
opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch
|
|
609
|
-
# ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
|
|
610
|
-
evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv'
|
|
611
|
-
if opt.bucket:
|
|
612
|
-
os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}') # download evolve.csv if exists
|
|
613
|
-
|
|
614
|
-
for _ in range(opt.evolve): # generations to evolve
|
|
615
|
-
if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate
|
|
616
|
-
# Select parent(s)
|
|
617
|
-
parent = 'single' # parent selection method: 'single' or 'weighted'
|
|
618
|
-
x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1)
|
|
619
|
-
n = min(5, len(x)) # number of previous results to consider
|
|
620
|
-
x = x[np.argsort(-fitness(x))][:n] # top n mutations
|
|
621
|
-
w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0)
|
|
622
|
-
if parent == 'single' or len(x) == 1:
|
|
623
|
-
# x = x[random.randint(0, n - 1)] # random selection
|
|
624
|
-
x = x[random.choices(range(n), weights=w)[0]] # weighted selection
|
|
625
|
-
elif parent == 'weighted':
|
|
626
|
-
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
|
|
627
|
-
|
|
628
|
-
# Mutate
|
|
629
|
-
mp, s = 0.8, 0.2 # mutation probability, sigma
|
|
630
|
-
npr = np.random
|
|
631
|
-
npr.seed(int(time.time()))
|
|
632
|
-
g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1
|
|
633
|
-
ng = len(meta)
|
|
634
|
-
v = np.ones(ng)
|
|
635
|
-
while all(v == 1): # mutate until a change occurs (prevent duplicates)
|
|
636
|
-
v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
|
|
637
|
-
for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300)
|
|
638
|
-
hyp[k] = float(x[i + 7] * v[i]) # mutate
|
|
639
|
-
|
|
640
|
-
# Constrain to limits
|
|
641
|
-
for k, v in meta.items():
|
|
642
|
-
hyp[k] = max(hyp[k], v[1]) # lower limit
|
|
643
|
-
hyp[k] = min(hyp[k], v[2]) # upper limit
|
|
644
|
-
hyp[k] = round(hyp[k], 5) # significant digits
|
|
645
|
-
|
|
646
|
-
# Train mutation
|
|
647
|
-
results = train(hyp.copy(), opt, device, callbacks)
|
|
648
|
-
callbacks = Callbacks()
|
|
649
|
-
# Write mutation results
|
|
650
|
-
print_mutation(results, hyp.copy(), save_dir, opt.bucket)
|
|
651
|
-
|
|
652
|
-
# Plot results
|
|
653
|
-
plot_evolve(evolve_csv)
|
|
654
|
-
LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n'
|
|
655
|
-
f"Results saved to {colorstr('bold', save_dir)}\n"
|
|
656
|
-
f'Usage example: $ python train.py --hyp {evolve_yaml}')
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
def run(**kwargs):
|
|
660
|
-
# Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
|
|
661
|
-
opt = parse_opt(True)
|
|
662
|
-
for k, v in kwargs.items():
|
|
663
|
-
setattr(opt, k, v)
|
|
664
|
-
main(opt)
|
|
665
|
-
return opt
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
if __name__ == "__main__":
|
|
669
|
-
opt = parse_opt()
|
|
670
|
-
main(opt)
|