megadetector 5.0.7__py3-none-any.whl → 5.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +28 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
- api/batch_processing/postprocessing/compare_batch_results.py +1 -1
- api/batch_processing/postprocessing/convert_output_format.py +24 -6
- api/batch_processing/postprocessing/load_api_results.py +1 -3
- api/batch_processing/postprocessing/md_to_labelme.py +118 -51
- api/batch_processing/postprocessing/merge_detections.py +30 -5
- api/batch_processing/postprocessing/postprocess_batch_results.py +24 -12
- api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +15 -12
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
- data_management/cct_json_utils.py +7 -2
- data_management/coco_to_labelme.py +263 -0
- data_management/coco_to_yolo.py +7 -4
- data_management/databases/integrity_check_json_db.py +68 -59
- data_management/databases/subset_json_db.py +1 -1
- data_management/get_image_sizes.py +44 -26
- data_management/importers/animl_results_to_md_results.py +1 -3
- data_management/importers/noaa_seals_2019.py +1 -1
- data_management/labelme_to_coco.py +252 -143
- data_management/labelme_to_yolo.py +95 -52
- data_management/lila/create_lila_blank_set.py +106 -23
- data_management/lila/download_lila_subset.py +133 -65
- data_management/lila/generate_lila_per_image_labels.py +1 -1
- data_management/lila/lila_common.py +8 -38
- data_management/read_exif.py +65 -16
- data_management/remap_coco_categories.py +84 -0
- data_management/resize_coco_dataset.py +3 -22
- data_management/wi_download_csv_to_coco.py +239 -0
- data_management/yolo_to_coco.py +283 -83
- detection/run_detector_batch.py +12 -3
- detection/run_inference_with_yolov5_val.py +10 -3
- detection/run_tiled_inference.py +2 -2
- detection/tf_detector.py +2 -1
- detection/video_utils.py +1 -1
- md_utils/ct_utils.py +22 -3
- md_utils/md_tests.py +11 -2
- md_utils/path_utils.py +206 -32
- md_utils/url_utils.py +66 -1
- md_utils/write_html_image_list.py +12 -3
- md_visualization/visualization_utils.py +363 -72
- md_visualization/visualize_db.py +33 -10
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/METADATA +10 -12
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/RECORD +47 -44
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
- md_visualization/visualize_megadb.py +0 -183
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
|
@@ -27,7 +27,6 @@ import os
|
|
|
27
27
|
import sys
|
|
28
28
|
import time
|
|
29
29
|
import uuid
|
|
30
|
-
import urllib
|
|
31
30
|
import warnings
|
|
32
31
|
import random
|
|
33
32
|
|
|
@@ -53,7 +52,6 @@ from md_utils import path_utils
|
|
|
53
52
|
from data_management.cct_json_utils import (CameraTrapJsonUtils, IndexedJsonDb)
|
|
54
53
|
from api.batch_processing.postprocessing.load_api_results import load_api_results
|
|
55
54
|
from md_utils.ct_utils import args_to_object
|
|
56
|
-
from md_utils.ct_utils import invert_dictionary
|
|
57
55
|
|
|
58
56
|
from detection.run_detector import get_typical_confidence_threshold_from_results
|
|
59
57
|
|
|
@@ -484,7 +482,14 @@ def render_bounding_boxes(
|
|
|
484
482
|
|
|
485
483
|
# Optionally add links back to the original images
|
|
486
484
|
if options.link_images_to_originals and (image_full_path is not None):
|
|
487
|
-
|
|
485
|
+
|
|
486
|
+
# Handling special characters in links has been pushed down into
|
|
487
|
+
# write_html_image_list
|
|
488
|
+
#
|
|
489
|
+
# link_target = image_full_path.replace('\\','/')
|
|
490
|
+
# link_target = urllib.parse.quote(link_target)
|
|
491
|
+
link_target = image_full_path
|
|
492
|
+
info['linkTarget'] = link_target
|
|
488
493
|
|
|
489
494
|
return info
|
|
490
495
|
|
|
@@ -848,7 +853,7 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
848
853
|
ground_truth_indexed_db = None
|
|
849
854
|
|
|
850
855
|
if (options.ground_truth_json_file is not None):
|
|
851
|
-
assert (options.confidence_threshold is None) or (isinstance(confidence_threshold,float)), \
|
|
856
|
+
assert (options.confidence_threshold is None) or (isinstance(options.confidence_threshold,float)), \
|
|
852
857
|
'Variable confidence thresholds are not supported when supplying ground truth'
|
|
853
858
|
|
|
854
859
|
if (options.ground_truth_json_file is not None) and (len(options.ground_truth_json_file) > 0):
|
|
@@ -876,7 +881,7 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
876
881
|
# If the caller hasn't supplied results, load them
|
|
877
882
|
if options.api_detection_results is None:
|
|
878
883
|
detections_df, other_fields = load_api_results(
|
|
879
|
-
options.api_output_file,
|
|
884
|
+
options.api_output_file, force_forward_slashes=True,
|
|
880
885
|
filename_replacements=options.api_output_filename_replacements)
|
|
881
886
|
ppresults.api_detection_results = detections_df
|
|
882
887
|
ppresults.api_other_fields = other_fields
|
|
@@ -1087,7 +1092,7 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1087
1092
|
(precision_at_confidence_threshold + recall_at_confidence_threshold)
|
|
1088
1093
|
|
|
1089
1094
|
print('At a confidence threshold of {:.1%}, precision={:.1%}, recall={:.1%}, f1={:.1%}'.format(
|
|
1090
|
-
|
|
1095
|
+
options.confidence_threshold, precision_at_confidence_threshold,
|
|
1091
1096
|
recall_at_confidence_threshold, f1))
|
|
1092
1097
|
|
|
1093
1098
|
##%% Collect classification results, if they exist
|
|
@@ -1289,7 +1294,8 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1289
1294
|
for file_info in tqdm(files_to_render):
|
|
1290
1295
|
rendering_results.append(render_image_with_gt(
|
|
1291
1296
|
file_info,ground_truth_indexed_db,
|
|
1292
|
-
detection_categories,classification_categories
|
|
1297
|
+
detection_categories,classification_categories,
|
|
1298
|
+
options=options))
|
|
1293
1299
|
elapsed = time.time() - start_time
|
|
1294
1300
|
|
|
1295
1301
|
# Map all the rendering results in the list rendering_results into the
|
|
@@ -1319,6 +1325,12 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1319
1325
|
image_counts['tp']
|
|
1320
1326
|
)
|
|
1321
1327
|
|
|
1328
|
+
confidence_threshold_string = ''
|
|
1329
|
+
if isinstance(options.confidence_threshold,float):
|
|
1330
|
+
confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
|
|
1331
|
+
else:
|
|
1332
|
+
confidence_threshold_string = str(options.confidence_threshold)
|
|
1333
|
+
|
|
1322
1334
|
index_page = """<html>
|
|
1323
1335
|
{}
|
|
1324
1336
|
<body>
|
|
@@ -1333,7 +1345,7 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1333
1345
|
|
|
1334
1346
|
<h3>Sample images</h3>
|
|
1335
1347
|
<div class="contentdiv">
|
|
1336
|
-
<p>A sample of {} images, annotated with detections above {
|
|
1348
|
+
<p>A sample of {} images, annotated with detections above confidence {}.</p>
|
|
1337
1349
|
<a href="tp.html">True positives (TP)</a> ({}) ({:0.1%})<br/>
|
|
1338
1350
|
CLASSIFICATION_PLACEHOLDER_1
|
|
1339
1351
|
<a href="tn.html">True negatives (TN)</a> ({}) ({:0.1%})<br/>
|
|
@@ -1343,7 +1355,7 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1343
1355
|
</div>
|
|
1344
1356
|
""".format(
|
|
1345
1357
|
style_header,job_name_string,model_version_string,
|
|
1346
|
-
image_count,
|
|
1358
|
+
image_count, confidence_threshold_string,
|
|
1347
1359
|
all_tp_count, all_tp_count/total_count,
|
|
1348
1360
|
image_counts['tn'], image_counts['tn']/total_count,
|
|
1349
1361
|
image_counts['fp'], image_counts['fp']/total_count,
|
|
@@ -1353,11 +1365,11 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1353
1365
|
index_page += """
|
|
1354
1366
|
<h3>Detection results</h3>
|
|
1355
1367
|
<div class="contentdiv">
|
|
1356
|
-
<p>At a confidence threshold of {
|
|
1368
|
+
<p>At a confidence threshold of {}, precision={:0.1%}, recall={:0.1%}</p>
|
|
1357
1369
|
<p><strong>Precision/recall summary for all {} images</strong></p><img src="{}"><br/>
|
|
1358
1370
|
</div>
|
|
1359
1371
|
""".format(
|
|
1360
|
-
|
|
1372
|
+
confidence_threshold_string, precision_at_confidence_threshold, recall_at_confidence_threshold,
|
|
1361
1373
|
len(detections_df), pr_figure_relative_filename
|
|
1362
1374
|
)
|
|
1363
1375
|
|
|
@@ -1589,7 +1601,7 @@ def process_batch_results(options: PostProcessingOptions
|
|
|
1589
1601
|
|
|
1590
1602
|
confidence_threshold_string = ''
|
|
1591
1603
|
if isinstance(options.confidence_threshold,float):
|
|
1592
|
-
confidence_threshold_string = '{:.
|
|
1604
|
+
confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
|
|
1593
1605
|
else:
|
|
1594
1606
|
confidence_threshold_string = str(options.confidence_threshold)
|
|
1595
1607
|
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
########
|
|
2
|
+
#
|
|
3
|
+
# remap_detection_categories.py
|
|
4
|
+
#
|
|
5
|
+
# Given a MegaDetector results file, remap the category IDs according to a specified
|
|
6
|
+
# dictionary, writing the results to a new file.
|
|
7
|
+
#
|
|
8
|
+
# Currently only supports remapping detection categories, not classification categories.
|
|
9
|
+
#
|
|
10
|
+
########
|
|
11
|
+
|
|
12
|
+
#%% Constants and imports
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
from tqdm import tqdm
|
|
18
|
+
|
|
19
|
+
from md_utils.ct_utils import invert_dictionary
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
#%% Main function
|
|
23
|
+
|
|
24
|
+
def remap_detection_categories(input_file,
|
|
25
|
+
output_file,
|
|
26
|
+
target_category_map,
|
|
27
|
+
extra_category_handling='error',
|
|
28
|
+
overwrite=False):
|
|
29
|
+
"""
|
|
30
|
+
Given a MD results file [input_file], remap the category IDs according to the dictionary
|
|
31
|
+
[target_category_map], writing the results to [output_file]. The remapped dictionary needs to have
|
|
32
|
+
the same category names as the input file's detection_categories dictionary.
|
|
33
|
+
|
|
34
|
+
Currently only supports remapping detection categories, not classification categories.
|
|
35
|
+
|
|
36
|
+
target_category_map can also be a MD results file, in which case we'll use that file's
|
|
37
|
+
detection_categories dictionary.
|
|
38
|
+
|
|
39
|
+
[extra_category_handling] specifies what we should do if categories are present in the source file
|
|
40
|
+
that are not present in the target mapping.
|
|
41
|
+
|
|
42
|
+
'error' == Error in this case.
|
|
43
|
+
'drop_if_unused' == Don't include these in the output file's category mappings if they are unused,
|
|
44
|
+
error if they are.
|
|
45
|
+
'remap' == Remap to unused category IDs. This is reserved for future use, not currently implemented.
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
if os.path.exists(output_file) and (not overwrite):
|
|
50
|
+
print('File {} exists, bypassing remapping'.format(output_file))
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
assert os.path.isfile(input_file), \
|
|
54
|
+
'File {} does not exist'.format(input_file)
|
|
55
|
+
|
|
56
|
+
# If "target_category_map" is passed as a filename, load the "detection_categories"
|
|
57
|
+
# dict.
|
|
58
|
+
if isinstance(target_category_map,str):
|
|
59
|
+
target_categories_file = target_category_map
|
|
60
|
+
with open(target_categories_file,'r') as f:
|
|
61
|
+
d = json.load(f)
|
|
62
|
+
target_category_map = d['detection_categories']
|
|
63
|
+
assert isinstance(target_category_map,dict)
|
|
64
|
+
|
|
65
|
+
with open(input_file,'r') as f:
|
|
66
|
+
input_data = json.load(f)
|
|
67
|
+
|
|
68
|
+
input_images = input_data['images']
|
|
69
|
+
input_categories = input_data['detection_categories']
|
|
70
|
+
|
|
71
|
+
# Figure out which categories are actually used
|
|
72
|
+
used_category_ids = set()
|
|
73
|
+
for im in input_images:
|
|
74
|
+
|
|
75
|
+
if 'detections' not in im or im['detections'] is None:
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
for det in im['detections']:
|
|
79
|
+
used_category_ids.add(det['category'])
|
|
80
|
+
used_category_names = [input_categories[cid] for cid in used_category_ids]
|
|
81
|
+
|
|
82
|
+
input_names_set = set(input_categories.values())
|
|
83
|
+
output_names_set = set(target_category_map.values())
|
|
84
|
+
|
|
85
|
+
# category_name = list(input_names_set)[0]
|
|
86
|
+
for category_name in input_names_set:
|
|
87
|
+
if category_name in output_names_set:
|
|
88
|
+
continue
|
|
89
|
+
if extra_category_handling == 'error':
|
|
90
|
+
raise ValueError('Category {} present in source but not in target'.format(category_name))
|
|
91
|
+
elif extra_category_handling == 'drop_if_unused':
|
|
92
|
+
if category_name in used_category_names:
|
|
93
|
+
raise ValueError('Category {} present (and used) in source but not in target'.format(
|
|
94
|
+
category_name))
|
|
95
|
+
else:
|
|
96
|
+
print('Category {} is unused and not present in the target mapping, ignoring'.format(
|
|
97
|
+
category_name))
|
|
98
|
+
continue
|
|
99
|
+
elif extra_category_handling == 'remap':
|
|
100
|
+
raise NotImplementedError('Remapping of extra category IDs not yet implemented')
|
|
101
|
+
else:
|
|
102
|
+
raise ValueError('Unrecognized extra category handling scheme {}'.format(
|
|
103
|
+
extra_category_handling))
|
|
104
|
+
|
|
105
|
+
output_category_name_to_output_category_id = invert_dictionary(target_category_map)
|
|
106
|
+
|
|
107
|
+
input_category_id_to_output_category_id = {}
|
|
108
|
+
for input_category_id in input_categories.keys():
|
|
109
|
+
category_name = input_categories[input_category_id]
|
|
110
|
+
if category_name not in output_category_name_to_output_category_id:
|
|
111
|
+
assert category_name not in used_category_names
|
|
112
|
+
else:
|
|
113
|
+
output_category_id = output_category_name_to_output_category_id[category_name]
|
|
114
|
+
input_category_id_to_output_category_id[input_category_id] = output_category_id
|
|
115
|
+
|
|
116
|
+
# im = input_images[0]
|
|
117
|
+
for im in tqdm(input_images):
|
|
118
|
+
|
|
119
|
+
if 'detections' not in im or im['detections'] is None:
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
# det = im['detections'][0]
|
|
123
|
+
for det in im['detections']:
|
|
124
|
+
det['category'] = input_category_id_to_output_category_id[det['category']]
|
|
125
|
+
|
|
126
|
+
input_data['detection_categories'] = target_category_map
|
|
127
|
+
|
|
128
|
+
with open(output_file,'w') as f:
|
|
129
|
+
json.dump(input_data,f,indent=1)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
print('Saved remapped results to {}'.format(output_file))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
#%% Interactive driver
|
|
136
|
+
|
|
137
|
+
if False:
|
|
138
|
+
|
|
139
|
+
pass
|
|
140
|
+
|
|
141
|
+
#%%
|
|
142
|
+
|
|
143
|
+
target_categories_file = '/home/dmorris/tmp/usgs-tegus/model-comparison/all-classes_usgs-only_yolov5x6.json'
|
|
144
|
+
target_category_map = target_categories_file
|
|
145
|
+
input_file = '/home/dmorris/tmp/usgs-tegus/model-comparison/all-classes_usgs-goannas-lilablanks_yolov5x6-20240223.json'
|
|
146
|
+
|
|
147
|
+
output_file = input_file.replace('.json','_remapped.json')
|
|
148
|
+
assert output_file != input_file
|
|
149
|
+
overwrite = True
|
|
150
|
+
|
|
151
|
+
extra_category_handling = 'drop_if_unused'
|
|
152
|
+
|
|
153
|
+
remap_detection_categories(input_file=input_file,
|
|
154
|
+
output_file=output_file,
|
|
155
|
+
target_category_map=target_category_map,
|
|
156
|
+
extra_category_handling=extra_category_handling,
|
|
157
|
+
overwrite=overwrite)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
#%% Command-line driver
|
|
161
|
+
|
|
162
|
+
# TODO
|
|
163
|
+
|
|
@@ -56,13 +56,13 @@ def render_image(im,render_image_constants):
|
|
|
56
56
|
|
|
57
57
|
assert im['file'] in filename_to_ground_truth_im
|
|
58
58
|
|
|
59
|
-
input_file = os.path.join(image_folder,im['file'])
|
|
60
|
-
assert os.path.isfile(input_file)
|
|
61
|
-
|
|
62
59
|
output_file = image_to_output_file(im,preview_images_folder)
|
|
63
60
|
if os.path.isfile(output_file) and not force_render_images:
|
|
64
61
|
return output_file
|
|
65
62
|
|
|
63
|
+
input_file = os.path.join(image_folder,im['file'])
|
|
64
|
+
assert os.path.isfile(input_file)
|
|
65
|
+
|
|
66
66
|
detections_to_render = []
|
|
67
67
|
|
|
68
68
|
for det in im['detections']:
|
|
@@ -82,8 +82,12 @@ def render_image(im,render_image_constants):
|
|
|
82
82
|
|
|
83
83
|
#%% Main function
|
|
84
84
|
|
|
85
|
-
def render_detection_confusion_matrix(ground_truth_file,
|
|
86
|
-
|
|
85
|
+
def render_detection_confusion_matrix(ground_truth_file,
|
|
86
|
+
results_file,
|
|
87
|
+
image_folder,
|
|
88
|
+
preview_folder,
|
|
89
|
+
force_render_images=False,
|
|
90
|
+
confidence_thresholds=None,
|
|
87
91
|
rendering_confidence_thresholds=None,
|
|
88
92
|
target_image_size=(1280,-1),
|
|
89
93
|
parallelize_rendering=True,
|
|
@@ -223,7 +227,7 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
|
|
|
223
227
|
filename_to_predicted_categories = defaultdict(set)
|
|
224
228
|
predicted_category_name_to_filenames = defaultdict(set)
|
|
225
229
|
|
|
226
|
-
# im =
|
|
230
|
+
# im = md_formatted_results['images'][0]
|
|
227
231
|
for im in tqdm(md_formatted_results['images']):
|
|
228
232
|
|
|
229
233
|
assert im['file'] in filename_to_ground_truth_im
|
|
@@ -247,9 +251,6 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
|
|
|
247
251
|
|
|
248
252
|
category_name_to_image_lists = {}
|
|
249
253
|
|
|
250
|
-
# These may not be identical; currently the ground truth contains an "unknown" category
|
|
251
|
-
# results_category_names = sorted(list(results_category_id_to_name.values()))
|
|
252
|
-
|
|
253
254
|
sub_page_tokens = ['fn','tn','fp','tp']
|
|
254
255
|
|
|
255
256
|
for category_name in ground_truth_category_names:
|
|
@@ -296,7 +297,7 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
|
|
|
296
297
|
assignment = 'tn'
|
|
297
298
|
|
|
298
299
|
category_name_to_image_lists[category_name][assignment].append(filename)
|
|
299
|
-
|
|
300
|
+
|
|
300
301
|
# ...for each filename
|
|
301
302
|
|
|
302
303
|
|
|
@@ -333,8 +334,8 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
|
|
|
333
334
|
results_category_name_to_confidence = defaultdict(int)
|
|
334
335
|
for det in results_im['detections']:
|
|
335
336
|
category_name = results_category_id_to_name[det['category']]
|
|
336
|
-
detection_threshold =
|
|
337
|
-
if category_name in
|
|
337
|
+
detection_threshold = confidence_thresholds['default']
|
|
338
|
+
if category_name in confidence_thresholds:
|
|
338
339
|
detection_threshold = confidence_thresholds[category_name]
|
|
339
340
|
if det['conf'] > detection_threshold:
|
|
340
341
|
results_category_name_to_confidence[category_name] = max(
|
|
@@ -354,6 +355,8 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
|
|
|
354
355
|
|
|
355
356
|
confusion_matrix[ground_truth_category_index,predicted_category_index] += 1
|
|
356
357
|
|
|
358
|
+
# ...for each file
|
|
359
|
+
|
|
357
360
|
plt.ioff()
|
|
358
361
|
|
|
359
362
|
fig_h = 3 + 0.3 * n_categories
|
|
@@ -181,7 +181,7 @@ class RepeatDetectionOptions:
|
|
|
181
181
|
|
|
182
182
|
# Optionally show a grid that includes a sample image for the detection, plus
|
|
183
183
|
# the top N additional detections
|
|
184
|
-
bRenderDetectionTiles =
|
|
184
|
+
bRenderDetectionTiles = True
|
|
185
185
|
|
|
186
186
|
# If this is None, we'll render at the width of the original image
|
|
187
187
|
detectionTilesPrimaryImageWidth = None
|
|
@@ -193,7 +193,7 @@ class RepeatDetectionOptions:
|
|
|
193
193
|
# of luck.
|
|
194
194
|
detectionTilesCroppedGridWidth = 0.6
|
|
195
195
|
detectionTilesPrimaryImageLocation='right'
|
|
196
|
-
detectionTilesMaxCrops =
|
|
196
|
+
detectionTilesMaxCrops = 250
|
|
197
197
|
|
|
198
198
|
# If bRenderOtherDetections is True, what color should we use to render the
|
|
199
199
|
# (hopefully pretty subtle) non-target detections?
|
|
@@ -142,7 +142,8 @@ class IndexedJsonDb:
|
|
|
142
142
|
def __init__(self, json_filename: Union[str, JSONObject],
|
|
143
143
|
b_normalize_paths: bool = False,
|
|
144
144
|
filename_replacements: Optional[Mapping[str, str]] = None,
|
|
145
|
-
b_convert_classes_to_lower: bool = True
|
|
145
|
+
b_convert_classes_to_lower: bool = True,
|
|
146
|
+
b_force_forward_slashes: bool = True):
|
|
146
147
|
"""
|
|
147
148
|
json_filename can also be an existing json db
|
|
148
149
|
"""
|
|
@@ -162,11 +163,15 @@ class IndexedJsonDb:
|
|
|
162
163
|
for c in self.db['categories']:
|
|
163
164
|
c['name'] = c['name'].lower()
|
|
164
165
|
|
|
166
|
+
# Normalize paths to simplify comparisons later
|
|
165
167
|
if b_normalize_paths:
|
|
166
|
-
# Normalize paths to simplify comparisons later
|
|
167
168
|
for im in self.db['images']:
|
|
168
169
|
im['file_name'] = os.path.normpath(im['file_name'])
|
|
169
170
|
|
|
171
|
+
if b_force_forward_slashes:
|
|
172
|
+
for im in self.db['images']:
|
|
173
|
+
im['file_name'] = im['file_name'].replace('\\','/')
|
|
174
|
+
|
|
170
175
|
if filename_replacements is not None:
|
|
171
176
|
for s in filename_replacements:
|
|
172
177
|
# Make custom replacements in filenames, typically used to
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
########
|
|
2
|
+
#
|
|
3
|
+
# coco_to_labelme.py
|
|
4
|
+
#
|
|
5
|
+
# Converts a COCO dataset to labelme format (one .json per image file).
|
|
6
|
+
#
|
|
7
|
+
# If you want to convert YOLO data to labelme, use yolo_to_coco, then coco_to_labelme.
|
|
8
|
+
#
|
|
9
|
+
########
|
|
10
|
+
|
|
11
|
+
#%% Imports and constants
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import json
|
|
15
|
+
|
|
16
|
+
from tqdm import tqdm
|
|
17
|
+
from collections import defaultdict
|
|
18
|
+
|
|
19
|
+
from md_visualization.visualization_utils import open_image
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
#%% Functions
|
|
23
|
+
|
|
24
|
+
def get_labelme_dict_for_image_from_coco_record(im,annotations,categories,info=None):
|
|
25
|
+
"""
|
|
26
|
+
For the given image struct in COCO format and associated list of annotations, reformat the detections
|
|
27
|
+
into labelme format. Returns a dict. All annotations in this list should point to this image.
|
|
28
|
+
|
|
29
|
+
"categories" is in the standard COCO format.
|
|
30
|
+
|
|
31
|
+
'height' and 'width' are required in [im].
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
image_base_name = os.path.basename(im['file_name'])
|
|
35
|
+
|
|
36
|
+
output_dict = {}
|
|
37
|
+
if info is not None:
|
|
38
|
+
output_dict['custom_info'] = info
|
|
39
|
+
output_dict['version'] = '5.3.0a0'
|
|
40
|
+
output_dict['flags'] = {}
|
|
41
|
+
output_dict['shapes'] = []
|
|
42
|
+
output_dict['imagePath'] = image_base_name
|
|
43
|
+
output_dict['imageHeight'] = im['height']
|
|
44
|
+
output_dict['imageWidth'] = im['width']
|
|
45
|
+
output_dict['imageData'] = None
|
|
46
|
+
|
|
47
|
+
# Store COCO categories in case we want to reconstruct the original IDs later
|
|
48
|
+
output_dict['coco_categories'] = categories
|
|
49
|
+
|
|
50
|
+
category_id_to_name = {c['id']:c['name'] for c in categories}
|
|
51
|
+
|
|
52
|
+
if 'flags' in im:
|
|
53
|
+
output_dict['flags'] = im['flags']
|
|
54
|
+
|
|
55
|
+
# ann = annotations[0]
|
|
56
|
+
for ann in annotations:
|
|
57
|
+
|
|
58
|
+
if 'bbox' not in ann:
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
shape = {}
|
|
62
|
+
shape['label'] = category_id_to_name[ann['category_id']]
|
|
63
|
+
shape['shape_type'] = 'rectangle'
|
|
64
|
+
shape['description'] = ''
|
|
65
|
+
shape['group_id'] = None
|
|
66
|
+
|
|
67
|
+
# COCO boxes are [x_min, y_min, width_of_box, height_of_box] (absolute)
|
|
68
|
+
#
|
|
69
|
+
# labelme boxes are [[x0,y0],[x1,y1]] (absolute)
|
|
70
|
+
x0 = ann['bbox'][0]
|
|
71
|
+
y0 = ann['bbox'][1]
|
|
72
|
+
x1 = ann['bbox'][0] + ann['bbox'][2]
|
|
73
|
+
y1 = ann['bbox'][1] + ann['bbox'][3]
|
|
74
|
+
|
|
75
|
+
shape['points'] = [[x0,y0],[x1,y1]]
|
|
76
|
+
output_dict['shapes'].append(shape)
|
|
77
|
+
|
|
78
|
+
# ...for each detection
|
|
79
|
+
|
|
80
|
+
return output_dict
|
|
81
|
+
|
|
82
|
+
# ...def get_labelme_dict_for_image()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check=False,verbose=False):
|
|
86
|
+
"""
|
|
87
|
+
For all the images in [coco_data] (a dict or a filename), write a .json file in
|
|
88
|
+
labelme format alongside the corresponding relative path within image_base.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
# Load COCO data if necessary
|
|
92
|
+
if isinstance(coco_data,str):
|
|
93
|
+
with open(coco_data,'r') as f:
|
|
94
|
+
coco_data = json.load(f)
|
|
95
|
+
assert isinstance(coco_data,dict)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
## Read image sizes if necessary
|
|
99
|
+
|
|
100
|
+
if bypass_image_size_check:
|
|
101
|
+
|
|
102
|
+
print('Bypassing size check')
|
|
103
|
+
|
|
104
|
+
else:
|
|
105
|
+
|
|
106
|
+
# TODO: parallelize this loop
|
|
107
|
+
|
|
108
|
+
print('Reading/validating image sizes...')
|
|
109
|
+
|
|
110
|
+
# im = coco_data['images'][0]
|
|
111
|
+
for im in tqdm(coco_data['images']):
|
|
112
|
+
|
|
113
|
+
# Make sure this file exists
|
|
114
|
+
im_full_path = os.path.join(image_base,im['file_name'])
|
|
115
|
+
assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
|
|
116
|
+
|
|
117
|
+
# Load w/h information if necessary
|
|
118
|
+
if 'height' not in im or 'width' not in im:
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
pil_im = open_image(im_full_path)
|
|
122
|
+
im['width'] = pil_im.width
|
|
123
|
+
im['height'] = pil_im.height
|
|
124
|
+
except Exception:
|
|
125
|
+
print('Warning: cannot open image {}'.format(im_full_path))
|
|
126
|
+
if 'failure' not in im:
|
|
127
|
+
im['failure'] = 'Failure image access'
|
|
128
|
+
|
|
129
|
+
# ...if we need to read w/h information
|
|
130
|
+
|
|
131
|
+
# ...for each image
|
|
132
|
+
|
|
133
|
+
# ...if we need to load image sizes
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
## Generate labelme files
|
|
137
|
+
|
|
138
|
+
print('Generating .json files...')
|
|
139
|
+
|
|
140
|
+
image_id_to_annotations = defaultdict(list)
|
|
141
|
+
for ann in coco_data['annotations']:
|
|
142
|
+
image_id_to_annotations[ann['image_id']].append(ann)
|
|
143
|
+
|
|
144
|
+
n_json_files_written = 0
|
|
145
|
+
n_json_files_error = 0
|
|
146
|
+
n_json_files_exist = 0
|
|
147
|
+
|
|
148
|
+
# Write output
|
|
149
|
+
for im in tqdm(coco_data['images']):
|
|
150
|
+
|
|
151
|
+
# Skip this image if it failed to load in whatever system generated this COCO file
|
|
152
|
+
skip_image = False
|
|
153
|
+
|
|
154
|
+
# Errors are represented differently depending on the source
|
|
155
|
+
for error_string in ('failure','error'):
|
|
156
|
+
if (error_string in im) and (im[error_string] is not None):
|
|
157
|
+
if verbose:
|
|
158
|
+
print('Warning: skipping labelme file generation for failed image {}'.format(
|
|
159
|
+
im['file_name']))
|
|
160
|
+
skip_image = True
|
|
161
|
+
n_json_files_error += 1
|
|
162
|
+
break
|
|
163
|
+
if skip_image:
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
im_full_path = os.path.join(image_base,im['file_name'])
|
|
167
|
+
json_path = os.path.splitext(im_full_path)[0] + '.json'
|
|
168
|
+
|
|
169
|
+
if (not overwrite) and (os.path.isfile(json_path)):
|
|
170
|
+
if verbose:
|
|
171
|
+
print('Skipping existing file {}'.format(json_path))
|
|
172
|
+
n_json_files_exist += 1
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
annotations_this_image = image_id_to_annotations[im['id']]
|
|
176
|
+
output_dict = get_labelme_dict_for_image_from_coco_record(im,
|
|
177
|
+
annotations_this_image,
|
|
178
|
+
coco_data['categories'],
|
|
179
|
+
info=None)
|
|
180
|
+
|
|
181
|
+
n_json_files_written += 1
|
|
182
|
+
with open(json_path,'w') as f:
|
|
183
|
+
json.dump(output_dict,f,indent=1)
|
|
184
|
+
|
|
185
|
+
# ...for each image
|
|
186
|
+
|
|
187
|
+
print('\nWrote {} .json files (skipped {} for errors, {} because they exist)'.format(
|
|
188
|
+
n_json_files_written,n_json_files_error,n_json_files_exist))
|
|
189
|
+
|
|
190
|
+
# ...def coco_to_labelme()
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
#%% Interactive driver
|
|
194
|
+
|
|
195
|
+
if False:
|
|
196
|
+
|
|
197
|
+
pass
|
|
198
|
+
|
|
199
|
+
#%% Configure options
|
|
200
|
+
|
|
201
|
+
coco_file = \
|
|
202
|
+
r'C:\\temp\\snapshot-exploration\\images\\training-images-good\\training-images-good_from_yolo.json'
|
|
203
|
+
image_folder = os.path.dirname(coco_file)
|
|
204
|
+
overwrite = True
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
#%% Programmatic execution
|
|
208
|
+
|
|
209
|
+
coco_to_labelme(coco_data=coco_file,image_base=image_folder,overwrite=overwrite)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
#%% Command-line execution
|
|
213
|
+
|
|
214
|
+
s = 'python coco_to_labelme.py "{}" "{}"'.format(coco_file,image_folder)
|
|
215
|
+
if overwrite:
|
|
216
|
+
s += ' --overwrite'
|
|
217
|
+
|
|
218
|
+
print(s)
|
|
219
|
+
import clipboard; clipboard.copy(s)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
#%% Opening labelme
|
|
223
|
+
|
|
224
|
+
s = 'python labelme {}'.format(image_folder)
|
|
225
|
+
print(s)
|
|
226
|
+
import clipboard; clipboard.copy(s)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
#%% Command-line driver
|
|
230
|
+
|
|
231
|
+
import sys,argparse
|
|
232
|
+
|
|
233
|
+
def main():
|
|
234
|
+
|
|
235
|
+
parser = argparse.ArgumentParser(
|
|
236
|
+
description='Convert a COCO database to labelme annotation format')
|
|
237
|
+
|
|
238
|
+
parser.add_argument(
|
|
239
|
+
'coco_file',
|
|
240
|
+
type=str,
|
|
241
|
+
help='Path to COCO data file (.json)')
|
|
242
|
+
|
|
243
|
+
parser.add_argument(
|
|
244
|
+
'image_base',
|
|
245
|
+
type=str,
|
|
246
|
+
help='Path to images (also the output folder)')
|
|
247
|
+
|
|
248
|
+
parser.add_argument(
|
|
249
|
+
'--overwrite',
|
|
250
|
+
action='store_true',
|
|
251
|
+
help='Overwrite existing labelme .json files')
|
|
252
|
+
|
|
253
|
+
if len(sys.argv[1:]) == 0:
|
|
254
|
+
parser.print_help()
|
|
255
|
+
parser.exit()
|
|
256
|
+
|
|
257
|
+
args = parser.parse_args()
|
|
258
|
+
|
|
259
|
+
coco_to_labelme(coco_data=args.coco_file,image_base=args.image_base,overwrite=args.overwrite)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
if __name__ == '__main__':
|
|
263
|
+
main()
|