megadetector 5.0.5__py3-none-any.whl → 5.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +302 -263
- api/batch_processing/data_preparation/manage_video_batch.py +81 -2
- api/batch_processing/postprocessing/add_max_conf.py +1 -0
- api/batch_processing/postprocessing/categorize_detections_by_size.py +50 -19
- api/batch_processing/postprocessing/compare_batch_results.py +110 -60
- api/batch_processing/postprocessing/load_api_results.py +56 -70
- api/batch_processing/postprocessing/md_to_coco.py +1 -1
- api/batch_processing/postprocessing/md_to_labelme.py +2 -1
- api/batch_processing/postprocessing/postprocess_batch_results.py +240 -81
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +227 -75
- api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
- api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
- api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +2 -2
- classification/prepare_classification_script.py +191 -191
- data_management/coco_to_yolo.py +68 -45
- data_management/databases/integrity_check_json_db.py +7 -5
- data_management/generate_crops_from_cct.py +3 -3
- data_management/get_image_sizes.py +8 -6
- data_management/importers/add_timestamps_to_icct.py +79 -0
- data_management/importers/animl_results_to_md_results.py +160 -0
- data_management/importers/auckland_doc_test_to_json.py +4 -4
- data_management/importers/auckland_doc_to_json.py +1 -1
- data_management/importers/awc_to_json.py +5 -5
- data_management/importers/bellevue_to_json.py +5 -5
- data_management/importers/carrizo_shrubfree_2018.py +5 -5
- data_management/importers/carrizo_trail_cam_2017.py +5 -5
- data_management/importers/cct_field_adjustments.py +2 -3
- data_management/importers/channel_islands_to_cct.py +4 -4
- data_management/importers/ena24_to_json.py +5 -5
- data_management/importers/helena_to_cct.py +10 -10
- data_management/importers/idaho-camera-traps.py +12 -12
- data_management/importers/idfg_iwildcam_lila_prep.py +8 -8
- data_management/importers/jb_csv_to_json.py +4 -4
- data_management/importers/missouri_to_json.py +1 -1
- data_management/importers/noaa_seals_2019.py +1 -1
- data_management/importers/pc_to_json.py +5 -5
- data_management/importers/prepare-noaa-fish-data-for-lila.py +4 -4
- data_management/importers/prepare_zsl_imerit.py +5 -5
- data_management/importers/rspb_to_json.py +4 -4
- data_management/importers/save_the_elephants_survey_A.py +5 -5
- data_management/importers/save_the_elephants_survey_B.py +6 -6
- data_management/importers/snapshot_safari_importer.py +9 -9
- data_management/importers/snapshot_serengeti_lila.py +9 -9
- data_management/importers/timelapse_csv_set_to_json.py +5 -7
- data_management/importers/ubc_to_json.py +4 -4
- data_management/importers/umn_to_json.py +4 -4
- data_management/importers/wellington_to_json.py +1 -1
- data_management/importers/wi_to_json.py +2 -2
- data_management/importers/zamba_results_to_md_results.py +181 -0
- data_management/labelme_to_coco.py +35 -7
- data_management/labelme_to_yolo.py +229 -0
- data_management/lila/add_locations_to_island_camera_traps.py +1 -1
- data_management/lila/add_locations_to_nacti.py +147 -0
- data_management/lila/create_lila_blank_set.py +474 -0
- data_management/lila/create_lila_test_set.py +2 -1
- data_management/lila/create_links_to_md_results_files.py +106 -0
- data_management/lila/download_lila_subset.py +46 -21
- data_management/lila/generate_lila_per_image_labels.py +23 -14
- data_management/lila/get_lila_annotation_counts.py +17 -11
- data_management/lila/lila_common.py +14 -11
- data_management/lila/test_lila_metadata_urls.py +116 -0
- data_management/ocr_tools.py +829 -0
- data_management/resize_coco_dataset.py +13 -11
- data_management/yolo_output_to_md_output.py +84 -12
- data_management/yolo_to_coco.py +38 -20
- detection/process_video.py +36 -14
- detection/pytorch_detector.py +23 -8
- detection/run_detector.py +76 -19
- detection/run_detector_batch.py +178 -63
- detection/run_inference_with_yolov5_val.py +326 -57
- detection/run_tiled_inference.py +153 -43
- detection/video_utils.py +34 -8
- md_utils/ct_utils.py +172 -1
- md_utils/md_tests.py +372 -51
- md_utils/path_utils.py +167 -39
- md_utils/process_utils.py +26 -7
- md_utils/split_locations_into_train_val.py +215 -0
- md_utils/string_utils.py +10 -0
- md_utils/url_utils.py +0 -2
- md_utils/write_html_image_list.py +9 -26
- md_visualization/plot_utils.py +12 -8
- md_visualization/visualization_utils.py +106 -7
- md_visualization/visualize_db.py +16 -8
- md_visualization/visualize_detector_output.py +208 -97
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/METADATA +3 -6
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/RECORD +98 -121
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
- taxonomy_mapping/map_new_lila_datasets.py +43 -39
- taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
- taxonomy_mapping/preview_lila_taxonomy.py +27 -27
- taxonomy_mapping/species_lookup.py +33 -13
- taxonomy_mapping/taxonomy_csv_checker.py +7 -5
- api/synchronous/api_core/yolov5/detect.py +0 -252
- api/synchronous/api_core/yolov5/export.py +0 -607
- api/synchronous/api_core/yolov5/hubconf.py +0 -146
- api/synchronous/api_core/yolov5/models/__init__.py +0 -0
- api/synchronous/api_core/yolov5/models/common.py +0 -738
- api/synchronous/api_core/yolov5/models/experimental.py +0 -104
- api/synchronous/api_core/yolov5/models/tf.py +0 -574
- api/synchronous/api_core/yolov5/models/yolo.py +0 -338
- api/synchronous/api_core/yolov5/train.py +0 -670
- api/synchronous/api_core/yolov5/utils/__init__.py +0 -36
- api/synchronous/api_core/yolov5/utils/activations.py +0 -103
- api/synchronous/api_core/yolov5/utils/augmentations.py +0 -284
- api/synchronous/api_core/yolov5/utils/autoanchor.py +0 -170
- api/synchronous/api_core/yolov5/utils/autobatch.py +0 -66
- api/synchronous/api_core/yolov5/utils/aws/__init__.py +0 -0
- api/synchronous/api_core/yolov5/utils/aws/resume.py +0 -40
- api/synchronous/api_core/yolov5/utils/benchmarks.py +0 -148
- api/synchronous/api_core/yolov5/utils/callbacks.py +0 -71
- api/synchronous/api_core/yolov5/utils/dataloaders.py +0 -1087
- api/synchronous/api_core/yolov5/utils/downloads.py +0 -178
- api/synchronous/api_core/yolov5/utils/flask_rest_api/example_request.py +0 -19
- api/synchronous/api_core/yolov5/utils/flask_rest_api/restapi.py +0 -46
- api/synchronous/api_core/yolov5/utils/general.py +0 -1018
- api/synchronous/api_core/yolov5/utils/loggers/__init__.py +0 -187
- api/synchronous/api_core/yolov5/utils/loggers/wandb/__init__.py +0 -0
- api/synchronous/api_core/yolov5/utils/loggers/wandb/log_dataset.py +0 -27
- api/synchronous/api_core/yolov5/utils/loggers/wandb/sweep.py +0 -41
- api/synchronous/api_core/yolov5/utils/loggers/wandb/wandb_utils.py +0 -577
- api/synchronous/api_core/yolov5/utils/loss.py +0 -234
- api/synchronous/api_core/yolov5/utils/metrics.py +0 -355
- api/synchronous/api_core/yolov5/utils/plots.py +0 -489
- api/synchronous/api_core/yolov5/utils/torch_utils.py +0 -314
- api/synchronous/api_core/yolov5/val.py +0 -394
- md_utils/matlab_porting_tools.py +0 -97
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0
|
@@ -1,577 +0,0 @@
|
|
|
1
|
-
"""Utilities and tools for tracking runs with Weights & Biases."""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
import os
|
|
5
|
-
import sys
|
|
6
|
-
from contextlib import contextmanager
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
from typing import Dict
|
|
9
|
-
|
|
10
|
-
import yaml
|
|
11
|
-
from tqdm import tqdm
|
|
12
|
-
|
|
13
|
-
FILE = Path(__file__).resolve()
|
|
14
|
-
ROOT = FILE.parents[3] # YOLOv5 root directory
|
|
15
|
-
if str(ROOT) not in sys.path:
|
|
16
|
-
sys.path.append(str(ROOT)) # add ROOT to PATH
|
|
17
|
-
|
|
18
|
-
from utils.dataloaders import LoadImagesAndLabels, img2label_paths
|
|
19
|
-
from utils.general import LOGGER, check_dataset, check_file
|
|
20
|
-
|
|
21
|
-
try:
|
|
22
|
-
import wandb
|
|
23
|
-
|
|
24
|
-
assert hasattr(wandb, '__version__') # verify package import not local dir
|
|
25
|
-
except (ImportError, AssertionError):
|
|
26
|
-
wandb = None
|
|
27
|
-
|
|
28
|
-
RANK = int(os.getenv('RANK', -1))
|
|
29
|
-
WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def remove_prefix(from_string, prefix=WANDB_ARTIFACT_PREFIX):
|
|
33
|
-
return from_string[len(prefix):]
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def check_wandb_config_file(data_config_file):
|
|
37
|
-
wandb_config = '_wandb.'.join(data_config_file.rsplit('.', 1)) # updated data.yaml path
|
|
38
|
-
if Path(wandb_config).is_file():
|
|
39
|
-
return wandb_config
|
|
40
|
-
return data_config_file
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def check_wandb_dataset(data_file):
|
|
44
|
-
is_trainset_wandb_artifact = False
|
|
45
|
-
is_valset_wandb_artifact = False
|
|
46
|
-
if check_file(data_file) and data_file.endswith('.yaml'):
|
|
47
|
-
with open(data_file, errors='ignore') as f:
|
|
48
|
-
data_dict = yaml.safe_load(f)
|
|
49
|
-
is_trainset_wandb_artifact = isinstance(data_dict['train'],
|
|
50
|
-
str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX)
|
|
51
|
-
is_valset_wandb_artifact = isinstance(data_dict['val'],
|
|
52
|
-
str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX)
|
|
53
|
-
if is_trainset_wandb_artifact or is_valset_wandb_artifact:
|
|
54
|
-
return data_dict
|
|
55
|
-
else:
|
|
56
|
-
return check_dataset(data_file)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def get_run_info(run_path):
|
|
60
|
-
run_path = Path(remove_prefix(run_path, WANDB_ARTIFACT_PREFIX))
|
|
61
|
-
run_id = run_path.stem
|
|
62
|
-
project = run_path.parent.stem
|
|
63
|
-
entity = run_path.parent.parent.stem
|
|
64
|
-
model_artifact_name = 'run_' + run_id + '_model'
|
|
65
|
-
return entity, project, run_id, model_artifact_name
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def check_wandb_resume(opt):
|
|
69
|
-
process_wandb_config_ddp_mode(opt) if RANK not in [-1, 0] else None
|
|
70
|
-
if isinstance(opt.resume, str):
|
|
71
|
-
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
|
|
72
|
-
if RANK not in [-1, 0]: # For resuming DDP runs
|
|
73
|
-
entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
|
|
74
|
-
api = wandb.Api()
|
|
75
|
-
artifact = api.artifact(entity + '/' + project + '/' + model_artifact_name + ':latest')
|
|
76
|
-
modeldir = artifact.download()
|
|
77
|
-
opt.weights = str(Path(modeldir) / "last.pt")
|
|
78
|
-
return True
|
|
79
|
-
return None
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def process_wandb_config_ddp_mode(opt):
|
|
83
|
-
with open(check_file(opt.data), errors='ignore') as f:
|
|
84
|
-
data_dict = yaml.safe_load(f) # data dict
|
|
85
|
-
train_dir, val_dir = None, None
|
|
86
|
-
if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
|
|
87
|
-
api = wandb.Api()
|
|
88
|
-
train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias)
|
|
89
|
-
train_dir = train_artifact.download()
|
|
90
|
-
train_path = Path(train_dir) / 'data/images/'
|
|
91
|
-
data_dict['train'] = str(train_path)
|
|
92
|
-
|
|
93
|
-
if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX):
|
|
94
|
-
api = wandb.Api()
|
|
95
|
-
val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias)
|
|
96
|
-
val_dir = val_artifact.download()
|
|
97
|
-
val_path = Path(val_dir) / 'data/images/'
|
|
98
|
-
data_dict['val'] = str(val_path)
|
|
99
|
-
if train_dir or val_dir:
|
|
100
|
-
ddp_data_path = str(Path(val_dir) / 'wandb_local_data.yaml')
|
|
101
|
-
with open(ddp_data_path, 'w') as f:
|
|
102
|
-
yaml.safe_dump(data_dict, f)
|
|
103
|
-
opt.data = ddp_data_path
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
class WandbLogger():
|
|
107
|
-
"""Log training runs, datasets, models, and predictions to Weights & Biases.
|
|
108
|
-
|
|
109
|
-
This logger sends information to W&B at wandb.ai. By default, this information
|
|
110
|
-
includes hyperparameters, system configuration and metrics, model metrics,
|
|
111
|
-
and basic data metrics and analyses.
|
|
112
|
-
|
|
113
|
-
By providing additional command line arguments to train.py, datasets,
|
|
114
|
-
models and predictions can also be logged.
|
|
115
|
-
|
|
116
|
-
For more on how this logger is used, see the Weights & Biases documentation:
|
|
117
|
-
https://docs.wandb.com/guides/integrations/yolov5
|
|
118
|
-
"""
|
|
119
|
-
|
|
120
|
-
def __init__(self, opt, run_id=None, job_type='Training'):
|
|
121
|
-
"""
|
|
122
|
-
- Initialize WandbLogger instance
|
|
123
|
-
- Upload dataset if opt.upload_dataset is True
|
|
124
|
-
- Setup trainig processes if job_type is 'Training'
|
|
125
|
-
|
|
126
|
-
arguments:
|
|
127
|
-
opt (namespace) -- Commandline arguments for this run
|
|
128
|
-
run_id (str) -- Run ID of W&B run to be resumed
|
|
129
|
-
job_type (str) -- To set the job_type for this run
|
|
130
|
-
|
|
131
|
-
"""
|
|
132
|
-
# Pre-training routine --
|
|
133
|
-
self.job_type = job_type
|
|
134
|
-
self.wandb, self.wandb_run = wandb, None if not wandb else wandb.run
|
|
135
|
-
self.val_artifact, self.train_artifact = None, None
|
|
136
|
-
self.train_artifact_path, self.val_artifact_path = None, None
|
|
137
|
-
self.result_artifact = None
|
|
138
|
-
self.val_table, self.result_table = None, None
|
|
139
|
-
self.bbox_media_panel_images = []
|
|
140
|
-
self.val_table_path_map = None
|
|
141
|
-
self.max_imgs_to_log = 16
|
|
142
|
-
self.wandb_artifact_data_dict = None
|
|
143
|
-
self.data_dict = None
|
|
144
|
-
# It's more elegant to stick to 1 wandb.init call,
|
|
145
|
-
# but useful config data is overwritten in the WandbLogger's wandb.init call
|
|
146
|
-
if isinstance(opt.resume, str): # checks resume from artifact
|
|
147
|
-
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
|
|
148
|
-
entity, project, run_id, model_artifact_name = get_run_info(opt.resume)
|
|
149
|
-
model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name
|
|
150
|
-
assert wandb, 'install wandb to resume wandb runs'
|
|
151
|
-
# Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config
|
|
152
|
-
self.wandb_run = wandb.init(id=run_id,
|
|
153
|
-
project=project,
|
|
154
|
-
entity=entity,
|
|
155
|
-
resume='allow',
|
|
156
|
-
allow_val_change=True)
|
|
157
|
-
opt.resume = model_artifact_name
|
|
158
|
-
elif self.wandb:
|
|
159
|
-
self.wandb_run = wandb.init(config=opt,
|
|
160
|
-
resume="allow",
|
|
161
|
-
project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
|
|
162
|
-
entity=opt.entity,
|
|
163
|
-
name=opt.name if opt.name != 'exp' else None,
|
|
164
|
-
job_type=job_type,
|
|
165
|
-
id=run_id,
|
|
166
|
-
allow_val_change=True) if not wandb.run else wandb.run
|
|
167
|
-
if self.wandb_run:
|
|
168
|
-
if self.job_type == 'Training':
|
|
169
|
-
if opt.upload_dataset:
|
|
170
|
-
if not opt.resume:
|
|
171
|
-
self.wandb_artifact_data_dict = self.check_and_upload_dataset(opt)
|
|
172
|
-
|
|
173
|
-
if opt.resume:
|
|
174
|
-
# resume from artifact
|
|
175
|
-
if isinstance(opt.resume, str) and opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
|
|
176
|
-
self.data_dict = dict(self.wandb_run.config.data_dict)
|
|
177
|
-
else: # local resume
|
|
178
|
-
self.data_dict = check_wandb_dataset(opt.data)
|
|
179
|
-
else:
|
|
180
|
-
self.data_dict = check_wandb_dataset(opt.data)
|
|
181
|
-
self.wandb_artifact_data_dict = self.wandb_artifact_data_dict or self.data_dict
|
|
182
|
-
|
|
183
|
-
# write data_dict to config. useful for resuming from artifacts. Do this only when not resuming.
|
|
184
|
-
self.wandb_run.config.update({'data_dict': self.wandb_artifact_data_dict}, allow_val_change=True)
|
|
185
|
-
self.setup_training(opt)
|
|
186
|
-
|
|
187
|
-
if self.job_type == 'Dataset Creation':
|
|
188
|
-
self.wandb_run.config.update({"upload_dataset": True})
|
|
189
|
-
self.data_dict = self.check_and_upload_dataset(opt)
|
|
190
|
-
|
|
191
|
-
def check_and_upload_dataset(self, opt):
|
|
192
|
-
"""
|
|
193
|
-
Check if the dataset format is compatible and upload it as W&B artifact
|
|
194
|
-
|
|
195
|
-
arguments:
|
|
196
|
-
opt (namespace)-- Commandline arguments for current run
|
|
197
|
-
|
|
198
|
-
returns:
|
|
199
|
-
Updated dataset info dictionary where local dataset paths are replaced by WAND_ARFACT_PREFIX links.
|
|
200
|
-
"""
|
|
201
|
-
assert wandb, 'Install wandb to upload dataset'
|
|
202
|
-
config_path = self.log_dataset_artifact(opt.data, opt.single_cls,
|
|
203
|
-
'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem)
|
|
204
|
-
with open(config_path, errors='ignore') as f:
|
|
205
|
-
wandb_data_dict = yaml.safe_load(f)
|
|
206
|
-
return wandb_data_dict
|
|
207
|
-
|
|
208
|
-
def setup_training(self, opt):
|
|
209
|
-
"""
|
|
210
|
-
Setup the necessary processes for training YOLO models:
|
|
211
|
-
- Attempt to download model checkpoint and dataset artifacts if opt.resume stats with WANDB_ARTIFACT_PREFIX
|
|
212
|
-
- Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded
|
|
213
|
-
- Setup log_dict, initialize bbox_interval
|
|
214
|
-
|
|
215
|
-
arguments:
|
|
216
|
-
opt (namespace) -- commandline arguments for this run
|
|
217
|
-
|
|
218
|
-
"""
|
|
219
|
-
self.log_dict, self.current_epoch = {}, 0
|
|
220
|
-
self.bbox_interval = opt.bbox_interval
|
|
221
|
-
if isinstance(opt.resume, str):
|
|
222
|
-
modeldir, _ = self.download_model_artifact(opt)
|
|
223
|
-
if modeldir:
|
|
224
|
-
self.weights = Path(modeldir) / "last.pt"
|
|
225
|
-
config = self.wandb_run.config
|
|
226
|
-
opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp, opt.imgsz = str(
|
|
227
|
-
self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs,\
|
|
228
|
-
config.hyp, config.imgsz
|
|
229
|
-
data_dict = self.data_dict
|
|
230
|
-
if self.val_artifact is None: # If --upload_dataset is set, use the existing artifact, don't download
|
|
231
|
-
self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(
|
|
232
|
-
data_dict.get('train'), opt.artifact_alias)
|
|
233
|
-
self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(
|
|
234
|
-
data_dict.get('val'), opt.artifact_alias)
|
|
235
|
-
|
|
236
|
-
if self.train_artifact_path is not None:
|
|
237
|
-
train_path = Path(self.train_artifact_path) / 'data/images/'
|
|
238
|
-
data_dict['train'] = str(train_path)
|
|
239
|
-
if self.val_artifact_path is not None:
|
|
240
|
-
val_path = Path(self.val_artifact_path) / 'data/images/'
|
|
241
|
-
data_dict['val'] = str(val_path)
|
|
242
|
-
|
|
243
|
-
if self.val_artifact is not None:
|
|
244
|
-
self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
|
|
245
|
-
columns = ["epoch", "id", "ground truth", "prediction"]
|
|
246
|
-
columns.extend(self.data_dict['names'])
|
|
247
|
-
self.result_table = wandb.Table(columns)
|
|
248
|
-
self.val_table = self.val_artifact.get("val")
|
|
249
|
-
if self.val_table_path_map is None:
|
|
250
|
-
self.map_val_table_path()
|
|
251
|
-
if opt.bbox_interval == -1:
|
|
252
|
-
self.bbox_interval = opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else 1
|
|
253
|
-
if opt.evolve or opt.noplots:
|
|
254
|
-
self.bbox_interval = opt.bbox_interval = opt.epochs + 1 # disable bbox_interval
|
|
255
|
-
train_from_artifact = self.train_artifact_path is not None and self.val_artifact_path is not None
|
|
256
|
-
# Update the the data_dict to point to local artifacts dir
|
|
257
|
-
if train_from_artifact:
|
|
258
|
-
self.data_dict = data_dict
|
|
259
|
-
|
|
260
|
-
def download_dataset_artifact(self, path, alias):
|
|
261
|
-
"""
|
|
262
|
-
download the model checkpoint artifact if the path starts with WANDB_ARTIFACT_PREFIX
|
|
263
|
-
|
|
264
|
-
arguments:
|
|
265
|
-
path -- path of the dataset to be used for training
|
|
266
|
-
alias (str)-- alias of the artifact to be download/used for training
|
|
267
|
-
|
|
268
|
-
returns:
|
|
269
|
-
(str, wandb.Artifact) -- path of the downladed dataset and it's corresponding artifact object if dataset
|
|
270
|
-
is found otherwise returns (None, None)
|
|
271
|
-
"""
|
|
272
|
-
if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX):
|
|
273
|
-
artifact_path = Path(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias)
|
|
274
|
-
dataset_artifact = wandb.use_artifact(artifact_path.as_posix().replace("\\", "/"))
|
|
275
|
-
assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'"
|
|
276
|
-
datadir = dataset_artifact.download()
|
|
277
|
-
return datadir, dataset_artifact
|
|
278
|
-
return None, None
|
|
279
|
-
|
|
280
|
-
def download_model_artifact(self, opt):
|
|
281
|
-
"""
|
|
282
|
-
download the model checkpoint artifact if the resume path starts with WANDB_ARTIFACT_PREFIX
|
|
283
|
-
|
|
284
|
-
arguments:
|
|
285
|
-
opt (namespace) -- Commandline arguments for this run
|
|
286
|
-
"""
|
|
287
|
-
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
|
|
288
|
-
model_artifact = wandb.use_artifact(remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ":latest")
|
|
289
|
-
assert model_artifact is not None, 'Error: W&B model artifact doesn\'t exist'
|
|
290
|
-
modeldir = model_artifact.download()
|
|
291
|
-
# epochs_trained = model_artifact.metadata.get('epochs_trained')
|
|
292
|
-
total_epochs = model_artifact.metadata.get('total_epochs')
|
|
293
|
-
is_finished = total_epochs is None
|
|
294
|
-
assert not is_finished, 'training is finished, can only resume incomplete runs.'
|
|
295
|
-
return modeldir, model_artifact
|
|
296
|
-
return None, None
|
|
297
|
-
|
|
298
|
-
def log_model(self, path, opt, epoch, fitness_score, best_model=False):
|
|
299
|
-
"""
|
|
300
|
-
Log the model checkpoint as W&B artifact
|
|
301
|
-
|
|
302
|
-
arguments:
|
|
303
|
-
path (Path) -- Path of directory containing the checkpoints
|
|
304
|
-
opt (namespace) -- Command line arguments for this run
|
|
305
|
-
epoch (int) -- Current epoch number
|
|
306
|
-
fitness_score (float) -- fitness score for current epoch
|
|
307
|
-
best_model (boolean) -- Boolean representing if the current checkpoint is the best yet.
|
|
308
|
-
"""
|
|
309
|
-
model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model',
|
|
310
|
-
type='model',
|
|
311
|
-
metadata={
|
|
312
|
-
'original_url': str(path),
|
|
313
|
-
'epochs_trained': epoch + 1,
|
|
314
|
-
'save period': opt.save_period,
|
|
315
|
-
'project': opt.project,
|
|
316
|
-
'total_epochs': opt.epochs,
|
|
317
|
-
'fitness_score': fitness_score})
|
|
318
|
-
model_artifact.add_file(str(path / 'last.pt'), name='last.pt')
|
|
319
|
-
wandb.log_artifact(model_artifact,
|
|
320
|
-
aliases=['latest', 'last', 'epoch ' + str(self.current_epoch), 'best' if best_model else ''])
|
|
321
|
-
LOGGER.info(f"Saving model artifact on epoch {epoch + 1}")
|
|
322
|
-
|
|
323
|
-
def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False):
|
|
324
|
-
"""
|
|
325
|
-
Log the dataset as W&B artifact and return the new data file with W&B links
|
|
326
|
-
|
|
327
|
-
arguments:
|
|
328
|
-
data_file (str) -- the .yaml file with information about the dataset like - path, classes etc.
|
|
329
|
-
single_class (boolean) -- train multi-class data as single-class
|
|
330
|
-
project (str) -- project name. Used to construct the artifact path
|
|
331
|
-
overwrite_config (boolean) -- overwrites the data.yaml file if set to true otherwise creates a new
|
|
332
|
-
file with _wandb postfix. Eg -> data_wandb.yaml
|
|
333
|
-
|
|
334
|
-
returns:
|
|
335
|
-
the new .yaml file with artifact links. it can be used to start training directly from artifacts
|
|
336
|
-
"""
|
|
337
|
-
upload_dataset = self.wandb_run.config.upload_dataset
|
|
338
|
-
log_val_only = isinstance(upload_dataset, str) and upload_dataset == 'val'
|
|
339
|
-
self.data_dict = check_dataset(data_file) # parse and check
|
|
340
|
-
data = dict(self.data_dict)
|
|
341
|
-
nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names'])
|
|
342
|
-
names = {k: v for k, v in enumerate(names)} # to index dictionary
|
|
343
|
-
|
|
344
|
-
# log train set
|
|
345
|
-
if not log_val_only:
|
|
346
|
-
self.train_artifact = self.create_dataset_table(LoadImagesAndLabels(data['train'], rect=True, batch_size=1),
|
|
347
|
-
names,
|
|
348
|
-
name='train') if data.get('train') else None
|
|
349
|
-
if data.get('train'):
|
|
350
|
-
data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train')
|
|
351
|
-
|
|
352
|
-
self.val_artifact = self.create_dataset_table(
|
|
353
|
-
LoadImagesAndLabels(data['val'], rect=True, batch_size=1), names, name='val') if data.get('val') else None
|
|
354
|
-
if data.get('val'):
|
|
355
|
-
data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val')
|
|
356
|
-
|
|
357
|
-
path = Path(data_file)
|
|
358
|
-
# create a _wandb.yaml file with artifacts links if both train and test set are logged
|
|
359
|
-
if not log_val_only:
|
|
360
|
-
path = (path.stem if overwrite_config else path.stem + '_wandb') + '.yaml' # updated data.yaml path
|
|
361
|
-
path = ROOT / 'data' / path
|
|
362
|
-
data.pop('download', None)
|
|
363
|
-
data.pop('path', None)
|
|
364
|
-
with open(path, 'w') as f:
|
|
365
|
-
yaml.safe_dump(data, f)
|
|
366
|
-
LOGGER.info(f"Created dataset config file {path}")
|
|
367
|
-
|
|
368
|
-
if self.job_type == 'Training': # builds correct artifact pipeline graph
|
|
369
|
-
if not log_val_only:
|
|
370
|
-
self.wandb_run.log_artifact(
|
|
371
|
-
self.train_artifact) # calling use_artifact downloads the dataset. NOT NEEDED!
|
|
372
|
-
self.wandb_run.use_artifact(self.val_artifact)
|
|
373
|
-
self.val_artifact.wait()
|
|
374
|
-
self.val_table = self.val_artifact.get('val')
|
|
375
|
-
self.map_val_table_path()
|
|
376
|
-
else:
|
|
377
|
-
self.wandb_run.log_artifact(self.train_artifact)
|
|
378
|
-
self.wandb_run.log_artifact(self.val_artifact)
|
|
379
|
-
return path
|
|
380
|
-
|
|
381
|
-
def map_val_table_path(self):
|
|
382
|
-
"""
|
|
383
|
-
Map the validation dataset Table like name of file -> it's id in the W&B Table.
|
|
384
|
-
Useful for - referencing artifacts for evaluation.
|
|
385
|
-
"""
|
|
386
|
-
self.val_table_path_map = {}
|
|
387
|
-
LOGGER.info("Mapping dataset")
|
|
388
|
-
for i, data in enumerate(tqdm(self.val_table.data)):
|
|
389
|
-
self.val_table_path_map[data[3]] = data[0]
|
|
390
|
-
|
|
391
|
-
def create_dataset_table(self, dataset: LoadImagesAndLabels, class_to_id: Dict[int, str], name: str = 'dataset'):
|
|
392
|
-
"""
|
|
393
|
-
Create and return W&B artifact containing W&B Table of the dataset.
|
|
394
|
-
|
|
395
|
-
arguments:
|
|
396
|
-
dataset -- instance of LoadImagesAndLabels class used to iterate over the data to build Table
|
|
397
|
-
class_to_id -- hash map that maps class ids to labels
|
|
398
|
-
name -- name of the artifact
|
|
399
|
-
|
|
400
|
-
returns:
|
|
401
|
-
dataset artifact to be logged or used
|
|
402
|
-
"""
|
|
403
|
-
# TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
|
|
404
|
-
artifact = wandb.Artifact(name=name, type="dataset")
|
|
405
|
-
img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None
|
|
406
|
-
img_files = tqdm(dataset.im_files) if not img_files else img_files
|
|
407
|
-
for img_file in img_files:
|
|
408
|
-
if Path(img_file).is_dir():
|
|
409
|
-
artifact.add_dir(img_file, name='data/images')
|
|
410
|
-
labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
|
|
411
|
-
artifact.add_dir(labels_path, name='data/labels')
|
|
412
|
-
else:
|
|
413
|
-
artifact.add_file(img_file, name='data/images/' + Path(img_file).name)
|
|
414
|
-
label_file = Path(img2label_paths([img_file])[0])
|
|
415
|
-
artifact.add_file(str(label_file), name='data/labels/' +
|
|
416
|
-
label_file.name) if label_file.exists() else None
|
|
417
|
-
table = wandb.Table(columns=["id", "train_image", "Classes", "name"])
|
|
418
|
-
class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()])
|
|
419
|
-
for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)):
|
|
420
|
-
box_data, img_classes = [], {}
|
|
421
|
-
for cls, *xywh in labels[:, 1:].tolist():
|
|
422
|
-
cls = int(cls)
|
|
423
|
-
box_data.append({
|
|
424
|
-
"position": {
|
|
425
|
-
"middle": [xywh[0], xywh[1]],
|
|
426
|
-
"width": xywh[2],
|
|
427
|
-
"height": xywh[3]},
|
|
428
|
-
"class_id": cls,
|
|
429
|
-
"box_caption": "%s" % (class_to_id[cls])})
|
|
430
|
-
img_classes[cls] = class_to_id[cls]
|
|
431
|
-
boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}} # inference-space
|
|
432
|
-
table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), list(img_classes.values()),
|
|
433
|
-
Path(paths).name)
|
|
434
|
-
artifact.add(table, name)
|
|
435
|
-
return artifact
|
|
436
|
-
|
|
437
|
-
def log_training_progress(self, predn, path, names):
|
|
438
|
-
"""
|
|
439
|
-
Build evaluation Table. Uses reference from validation dataset table.
|
|
440
|
-
|
|
441
|
-
arguments:
|
|
442
|
-
predn (list): list of predictions in the native space in the format - [xmin, ymin, xmax, ymax, confidence, class]
|
|
443
|
-
path (str): local path of the current evaluation image
|
|
444
|
-
names (dict(int, str)): hash map that maps class ids to labels
|
|
445
|
-
"""
|
|
446
|
-
class_set = wandb.Classes([{'id': id, 'name': name} for id, name in names.items()])
|
|
447
|
-
box_data = []
|
|
448
|
-
avg_conf_per_class = [0] * len(self.data_dict['names'])
|
|
449
|
-
pred_class_count = {}
|
|
450
|
-
for *xyxy, conf, cls in predn.tolist():
|
|
451
|
-
if conf >= 0.25:
|
|
452
|
-
cls = int(cls)
|
|
453
|
-
box_data.append({
|
|
454
|
-
"position": {
|
|
455
|
-
"minX": xyxy[0],
|
|
456
|
-
"minY": xyxy[1],
|
|
457
|
-
"maxX": xyxy[2],
|
|
458
|
-
"maxY": xyxy[3]},
|
|
459
|
-
"class_id": cls,
|
|
460
|
-
"box_caption": f"{names[cls]} {conf:.3f}",
|
|
461
|
-
"scores": {
|
|
462
|
-
"class_score": conf},
|
|
463
|
-
"domain": "pixel"})
|
|
464
|
-
avg_conf_per_class[cls] += conf
|
|
465
|
-
|
|
466
|
-
if cls in pred_class_count:
|
|
467
|
-
pred_class_count[cls] += 1
|
|
468
|
-
else:
|
|
469
|
-
pred_class_count[cls] = 1
|
|
470
|
-
|
|
471
|
-
for pred_class in pred_class_count.keys():
|
|
472
|
-
avg_conf_per_class[pred_class] = avg_conf_per_class[pred_class] / pred_class_count[pred_class]
|
|
473
|
-
|
|
474
|
-
boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
|
|
475
|
-
id = self.val_table_path_map[Path(path).name]
|
|
476
|
-
self.result_table.add_data(self.current_epoch, id, self.val_table.data[id][1],
|
|
477
|
-
wandb.Image(self.val_table.data[id][1], boxes=boxes, classes=class_set),
|
|
478
|
-
*avg_conf_per_class)
|
|
479
|
-
|
|
480
|
-
def val_one_image(self, pred, predn, path, names, im):
|
|
481
|
-
"""
|
|
482
|
-
Log validation data for one image. updates the result Table if validation dataset is uploaded and log bbox media panel
|
|
483
|
-
|
|
484
|
-
arguments:
|
|
485
|
-
pred (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
|
|
486
|
-
predn (list): list of predictions in the native space - [xmin, ymin, xmax, ymax, confidence, class]
|
|
487
|
-
path (str): local path of the current evaluation image
|
|
488
|
-
"""
|
|
489
|
-
if self.val_table and self.result_table: # Log Table if Val dataset is uploaded as artifact
|
|
490
|
-
self.log_training_progress(predn, path, names)
|
|
491
|
-
|
|
492
|
-
if len(self.bbox_media_panel_images) < self.max_imgs_to_log and self.current_epoch > 0:
|
|
493
|
-
if self.current_epoch % self.bbox_interval == 0:
|
|
494
|
-
box_data = [{
|
|
495
|
-
"position": {
|
|
496
|
-
"minX": xyxy[0],
|
|
497
|
-
"minY": xyxy[1],
|
|
498
|
-
"maxX": xyxy[2],
|
|
499
|
-
"maxY": xyxy[3]},
|
|
500
|
-
"class_id": int(cls),
|
|
501
|
-
"box_caption": f"{names[int(cls)]} {conf:.3f}",
|
|
502
|
-
"scores": {
|
|
503
|
-
"class_score": conf},
|
|
504
|
-
"domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
|
|
505
|
-
boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
|
|
506
|
-
self.bbox_media_panel_images.append(wandb.Image(im, boxes=boxes, caption=path.name))
|
|
507
|
-
|
|
508
|
-
def log(self, log_dict):
|
|
509
|
-
"""
|
|
510
|
-
save the metrics to the logging dictionary
|
|
511
|
-
|
|
512
|
-
arguments:
|
|
513
|
-
log_dict (Dict) -- metrics/media to be logged in current step
|
|
514
|
-
"""
|
|
515
|
-
if self.wandb_run:
|
|
516
|
-
for key, value in log_dict.items():
|
|
517
|
-
self.log_dict[key] = value
|
|
518
|
-
|
|
519
|
-
def end_epoch(self, best_result=False):
|
|
520
|
-
"""
|
|
521
|
-
commit the log_dict, model artifacts and Tables to W&B and flush the log_dict.
|
|
522
|
-
|
|
523
|
-
arguments:
|
|
524
|
-
best_result (boolean): Boolean representing if the result of this evaluation is best or not
|
|
525
|
-
"""
|
|
526
|
-
if self.wandb_run:
|
|
527
|
-
with all_logging_disabled():
|
|
528
|
-
if self.bbox_media_panel_images:
|
|
529
|
-
self.log_dict["BoundingBoxDebugger"] = self.bbox_media_panel_images
|
|
530
|
-
try:
|
|
531
|
-
wandb.log(self.log_dict)
|
|
532
|
-
except BaseException as e:
|
|
533
|
-
LOGGER.info(
|
|
534
|
-
f"An error occurred in wandb logger. The training will proceed without interruption. More info\n{e}"
|
|
535
|
-
)
|
|
536
|
-
self.wandb_run.finish()
|
|
537
|
-
self.wandb_run = None
|
|
538
|
-
|
|
539
|
-
self.log_dict = {}
|
|
540
|
-
self.bbox_media_panel_images = []
|
|
541
|
-
if self.result_artifact:
|
|
542
|
-
self.result_artifact.add(self.result_table, 'result')
|
|
543
|
-
wandb.log_artifact(self.result_artifact,
|
|
544
|
-
aliases=[
|
|
545
|
-
'latest', 'last', 'epoch ' + str(self.current_epoch),
|
|
546
|
-
('best' if best_result else '')])
|
|
547
|
-
|
|
548
|
-
wandb.log({"evaluation": self.result_table})
|
|
549
|
-
columns = ["epoch", "id", "ground truth", "prediction"]
|
|
550
|
-
columns.extend(self.data_dict['names'])
|
|
551
|
-
self.result_table = wandb.Table(columns)
|
|
552
|
-
self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
|
|
553
|
-
|
|
554
|
-
def finish_run(self):
|
|
555
|
-
"""
|
|
556
|
-
Log metrics if any and finish the current W&B run
|
|
557
|
-
"""
|
|
558
|
-
if self.wandb_run:
|
|
559
|
-
if self.log_dict:
|
|
560
|
-
with all_logging_disabled():
|
|
561
|
-
wandb.log(self.log_dict)
|
|
562
|
-
wandb.run.finish()
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
@contextmanager
|
|
566
|
-
def all_logging_disabled(highest_level=logging.CRITICAL):
|
|
567
|
-
""" source - https://gist.github.com/simon-weber/7853144
|
|
568
|
-
A context manager that will prevent any logging messages triggered during the body from being processed.
|
|
569
|
-
:param highest_level: the maximum logging level in use.
|
|
570
|
-
This would only need to be changed if a custom level greater than CRITICAL is defined.
|
|
571
|
-
"""
|
|
572
|
-
previous_level = logging.root.manager.disable
|
|
573
|
-
logging.disable(highest_level)
|
|
574
|
-
try:
|
|
575
|
-
yield
|
|
576
|
-
finally:
|
|
577
|
-
logging.disable(previous_level)
|