megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +297 -202
- api/batch_processing/data_preparation/manage_video_batch.py +7 -2
- api/batch_processing/postprocessing/add_max_conf.py +1 -0
- api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
- api/batch_processing/postprocessing/compare_batch_results.py +111 -61
- api/batch_processing/postprocessing/convert_output_format.py +24 -6
- api/batch_processing/postprocessing/load_api_results.py +56 -72
- api/batch_processing/postprocessing/md_to_labelme.py +119 -51
- api/batch_processing/postprocessing/merge_detections.py +30 -5
- api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
- api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
- api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
- api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
- classification/prepare_classification_script.py +191 -191
- data_management/cct_json_utils.py +7 -2
- data_management/coco_to_labelme.py +263 -0
- data_management/coco_to_yolo.py +72 -48
- data_management/databases/integrity_check_json_db.py +75 -64
- data_management/databases/subset_json_db.py +1 -1
- data_management/generate_crops_from_cct.py +1 -1
- data_management/get_image_sizes.py +44 -26
- data_management/importers/animl_results_to_md_results.py +3 -5
- data_management/importers/noaa_seals_2019.py +2 -2
- data_management/importers/zamba_results_to_md_results.py +2 -2
- data_management/labelme_to_coco.py +264 -127
- data_management/labelme_to_yolo.py +96 -53
- data_management/lila/create_lila_blank_set.py +557 -0
- data_management/lila/create_lila_test_set.py +2 -1
- data_management/lila/create_links_to_md_results_files.py +1 -1
- data_management/lila/download_lila_subset.py +138 -45
- data_management/lila/generate_lila_per_image_labels.py +23 -14
- data_management/lila/get_lila_annotation_counts.py +16 -10
- data_management/lila/lila_common.py +15 -42
- data_management/lila/test_lila_metadata_urls.py +116 -0
- data_management/read_exif.py +65 -16
- data_management/remap_coco_categories.py +84 -0
- data_management/resize_coco_dataset.py +14 -31
- data_management/wi_download_csv_to_coco.py +239 -0
- data_management/yolo_output_to_md_output.py +40 -13
- data_management/yolo_to_coco.py +313 -100
- detection/process_video.py +36 -14
- detection/pytorch_detector.py +1 -1
- detection/run_detector.py +73 -18
- detection/run_detector_batch.py +116 -27
- detection/run_inference_with_yolov5_val.py +135 -27
- detection/run_tiled_inference.py +153 -43
- detection/tf_detector.py +2 -1
- detection/video_utils.py +4 -2
- md_utils/ct_utils.py +101 -6
- md_utils/md_tests.py +264 -17
- md_utils/path_utils.py +326 -47
- md_utils/process_utils.py +26 -7
- md_utils/split_locations_into_train_val.py +215 -0
- md_utils/string_utils.py +10 -0
- md_utils/url_utils.py +66 -3
- md_utils/write_html_image_list.py +12 -2
- md_visualization/visualization_utils.py +380 -74
- md_visualization/visualize_db.py +41 -10
- md_visualization/visualize_detector_output.py +185 -104
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
- taxonomy_mapping/map_new_lila_datasets.py +43 -39
- taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
- taxonomy_mapping/preview_lila_taxonomy.py +27 -27
- taxonomy_mapping/species_lookup.py +33 -13
- taxonomy_mapping/taxonomy_csv_checker.py +7 -5
- md_visualization/visualize_megadb.py +0 -183
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
|
@@ -2,17 +2,18 @@
|
|
|
2
2
|
#
|
|
3
3
|
# load_api_results.py
|
|
4
4
|
#
|
|
5
|
-
#
|
|
5
|
+
# DEPRECATED
|
|
6
6
|
#
|
|
7
|
-
#
|
|
7
|
+
# As of 2023.12, this module is used in postprocessing and RDE. Not recommended
|
|
8
|
+
# for new code.
|
|
8
9
|
#
|
|
9
|
-
#
|
|
10
|
+
# Loads the output of the batch processing API (json) into a Pandas dataframe.
|
|
11
|
+
#
|
|
12
|
+
# Includes functions to read/write the (very very old) .csv results format.
|
|
10
13
|
#
|
|
11
14
|
########
|
|
12
15
|
|
|
13
|
-
#%%
|
|
14
|
-
|
|
15
|
-
from collections import defaultdict
|
|
16
|
+
#%% Imports
|
|
16
17
|
|
|
17
18
|
import json
|
|
18
19
|
import os
|
|
@@ -23,72 +24,32 @@ import pandas as pd
|
|
|
23
24
|
|
|
24
25
|
from md_utils import ct_utils
|
|
25
26
|
|
|
26
|
-
headers = ['image_path', 'max_confidence', 'detections']
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
#%% Functions for grouping by sequence_id
|
|
30
|
-
|
|
31
|
-
def ss_file_to_file_name(f):
|
|
32
|
-
# example
|
|
33
|
-
# input 'file': 'SER/S1/F08/F08_R3/S1_F08_R3_PICT1150.JPG'
|
|
34
|
-
# output 'id': 'S1/F08/F08_R3/S1_F08_R3_PICT1150.JPG'
|
|
35
|
-
return f.split('SER/')[1].split('.JPG')[0]
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def caltech_file_to_file_name(f):
|
|
39
|
-
return f.split('cct_images/')[1].split('.')[0]
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def api_results_groupby(api_output_path, gt_db_indexed, file_to_image_id, field='seq_id'):
|
|
43
|
-
"""
|
|
44
|
-
Given the output file of the API, groupby (currently only seq_id).
|
|
45
|
-
|
|
46
|
-
Args:
|
|
47
|
-
api_output_path: path to the API output json file
|
|
48
|
-
gt_db_indexed: an instance of IndexedJsonDb so we know the seq_id to image_id mapping
|
|
49
|
-
file_to_image_id: a function that takes in the 'file' field in 'images' in the detector
|
|
50
|
-
output file and converts it to the 'id' field in the gt DB.
|
|
51
|
-
field: which field in the 'images' array to group by
|
|
52
|
-
|
|
53
|
-
Returns:
|
|
54
|
-
A dict where the keys are of the field requested, each points to an array
|
|
55
|
-
containing entries in the 'images' section of the output file
|
|
56
|
-
"""
|
|
57
|
-
|
|
58
|
-
with open(api_output_path) as f:
|
|
59
|
-
detection_results = json.load(f)
|
|
60
27
|
|
|
61
|
-
|
|
62
|
-
for i in detection_results['images']:
|
|
63
|
-
image_id = file_to_image_id(i['file'])
|
|
64
|
-
field_val = gt_db_indexed.image_id_to_image[image_id][field]
|
|
65
|
-
res[field_val].append(i)
|
|
66
|
-
return res
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
#%% Functions for loading the result as a Pandas DataFrame
|
|
28
|
+
#%% Functions for loading .json results into a Pandas DataFrame, and writing back to .json
|
|
70
29
|
|
|
71
30
|
def load_api_results(api_output_path: str, normalize_paths: bool = True,
|
|
72
|
-
filename_replacements: Optional[Mapping[str, str]] = None
|
|
31
|
+
filename_replacements: Optional[Mapping[str, str]] = None,
|
|
32
|
+
force_forward_slashes: bool = True
|
|
73
33
|
) -> Tuple[pd.DataFrame, Dict]:
|
|
74
34
|
"""
|
|
75
|
-
Loads
|
|
76
|
-
Pandas DataFrame, mainly useful for various postprocessing functions.
|
|
35
|
+
Loads json-formatted MegaDetector results to a Pandas DataFrame.
|
|
77
36
|
|
|
78
37
|
Args:
|
|
79
|
-
api_output_path: path to the
|
|
38
|
+
api_output_path: path to the output json file
|
|
80
39
|
normalize_paths: whether to apply os.path.normpath to the 'file' field
|
|
81
40
|
in each image entry in the output file
|
|
82
41
|
filename_replacements: replace some path tokens to match local paths to
|
|
83
42
|
the original blob structure
|
|
43
|
+
force_forward_slashes: whether to convert backslashes to forward slashes
|
|
44
|
+
in filenames
|
|
84
45
|
|
|
85
46
|
Returns:
|
|
86
47
|
detection_results: pd.DataFrame, contains at least the columns:
|
|
87
|
-
['file', 'detections','failure']
|
|
48
|
+
['file', 'detections','failure']
|
|
88
49
|
other_fields: a dict containing fields in the results other than 'images'
|
|
89
50
|
"""
|
|
90
51
|
|
|
91
|
-
print('Loading
|
|
52
|
+
print('Loading results from {}'.format(api_output_path))
|
|
92
53
|
|
|
93
54
|
with open(api_output_path) as f:
|
|
94
55
|
detection_results = json.load(f)
|
|
@@ -97,18 +58,20 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
|
|
|
97
58
|
for s in ['info', 'detection_categories', 'images']:
|
|
98
59
|
assert s in detection_results, 'Missing field {} in detection results'.format(s)
|
|
99
60
|
|
|
100
|
-
# Fields in the
|
|
61
|
+
# Fields in the output json other than 'images'
|
|
101
62
|
other_fields = {}
|
|
102
63
|
for k, v in detection_results.items():
|
|
103
64
|
if k != 'images':
|
|
104
65
|
other_fields[k] = v
|
|
105
66
|
|
|
106
|
-
# Normalize paths to simplify comparisons later
|
|
107
67
|
if normalize_paths:
|
|
108
68
|
for image in detection_results['images']:
|
|
109
|
-
image['file'] = os.path.normpath(image['file'])
|
|
110
|
-
# image['file'] = image['file'].replace('\\','/')
|
|
69
|
+
image['file'] = os.path.normpath(image['file'])
|
|
111
70
|
|
|
71
|
+
if force_forward_slashes:
|
|
72
|
+
for image in detection_results['images']:
|
|
73
|
+
image['file'] = image['file'].replace('\\','/')
|
|
74
|
+
|
|
112
75
|
# Replace some path tokens to match local paths to original blob structure
|
|
113
76
|
if filename_replacements is not None:
|
|
114
77
|
for string_to_replace in filename_replacements.keys():
|
|
@@ -127,9 +90,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
|
|
|
127
90
|
# Pack the json output into a Pandas DataFrame
|
|
128
91
|
detection_results = pd.DataFrame(detection_results['images'])
|
|
129
92
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
print('Finished loading API results for {} images from {}'.format(
|
|
93
|
+
print('Finished loading MegaDetector results for {} images from {}'.format(
|
|
133
94
|
len(detection_results),api_output_path))
|
|
134
95
|
|
|
135
96
|
return detection_results, other_fields
|
|
@@ -137,7 +98,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
|
|
|
137
98
|
|
|
138
99
|
def write_api_results(detection_results_table, other_fields, out_path):
|
|
139
100
|
"""
|
|
140
|
-
Writes a Pandas DataFrame
|
|
101
|
+
Writes a Pandas DataFrame to the MegaDetector .json format.
|
|
141
102
|
"""
|
|
142
103
|
|
|
143
104
|
print('Writing detection results to {}'.format(out_path))
|
|
@@ -148,6 +109,27 @@ def write_api_results(detection_results_table, other_fields, out_path):
|
|
|
148
109
|
double_precision=3)
|
|
149
110
|
images = json.loads(images)
|
|
150
111
|
fields['images'] = images
|
|
112
|
+
|
|
113
|
+
# Convert the 'version' field back to a string as per format convention
|
|
114
|
+
try:
|
|
115
|
+
version = other_fields['info']['format_version']
|
|
116
|
+
if not isinstance(version,str):
|
|
117
|
+
other_fields['info']['format_version'] = str(version)
|
|
118
|
+
except Exception:
|
|
119
|
+
print('Warning: error determining format version')
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
# Remove 'max_detection_conf' as per newer file convention (format >= v1.3)
|
|
123
|
+
try:
|
|
124
|
+
version = other_fields['info']['format_version']
|
|
125
|
+
version = float(version)
|
|
126
|
+
if version >= 1.3:
|
|
127
|
+
for im in images:
|
|
128
|
+
if 'max_detection_conf' in im:
|
|
129
|
+
del im['max_detection_conf']
|
|
130
|
+
except Exception:
|
|
131
|
+
print('Warning: error removing max_detection_conf from output')
|
|
132
|
+
pass
|
|
151
133
|
|
|
152
134
|
with open(out_path, 'w') as f:
|
|
153
135
|
json.dump(fields, f, indent=1)
|
|
@@ -157,15 +139,16 @@ def write_api_results(detection_results_table, other_fields, out_path):
|
|
|
157
139
|
|
|
158
140
|
def load_api_results_csv(filename, normalize_paths=True, filename_replacements={}, nrows=None):
|
|
159
141
|
"""
|
|
160
|
-
DEPRECATED
|
|
161
|
-
|
|
142
|
+
[DEPRECATED]
|
|
143
|
+
|
|
144
|
+
Loads .csv-formatted MegaDetector results to a pandas table
|
|
162
145
|
"""
|
|
163
146
|
|
|
164
|
-
print('Loading
|
|
147
|
+
print('Loading MegaDetector results from {}'.format(filename))
|
|
165
148
|
|
|
166
149
|
detection_results = pd.read_csv(filename,nrows=nrows)
|
|
167
150
|
|
|
168
|
-
print('De-serializing
|
|
151
|
+
print('De-serializing MegaDetector results from {}'.format(filename))
|
|
169
152
|
|
|
170
153
|
# Confirm that this is really a detector output file
|
|
171
154
|
for s in ['image_path','max_confidence','detections']:
|
|
@@ -191,17 +174,18 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
|
|
|
191
174
|
fn = fn.replace(string_to_replace,replacement_string)
|
|
192
175
|
detection_results.at[iRow,'image_path'] = fn
|
|
193
176
|
|
|
194
|
-
print('Finished loading and de-serializing
|
|
177
|
+
print('Finished loading and de-serializing MD results for {} images from {}'.format(
|
|
195
178
|
len(detection_results),filename))
|
|
196
179
|
|
|
197
180
|
return detection_results
|
|
198
181
|
|
|
199
182
|
|
|
200
183
|
def write_api_results_csv(detection_results, filename):
|
|
201
|
-
"""
|
|
202
|
-
DEPRECATED
|
|
203
|
-
|
|
204
|
-
|
|
184
|
+
"""
|
|
185
|
+
[DEPRECATED]
|
|
186
|
+
|
|
187
|
+
Writes a Pandas table to csv in a way that's compatible with the .csv output
|
|
188
|
+
format. Currently just a wrapper around to_csv that forces output writing
|
|
205
189
|
to go through a common code path.
|
|
206
190
|
"""
|
|
207
191
|
|
|
@@ -20,6 +20,10 @@ import json
|
|
|
20
20
|
|
|
21
21
|
from tqdm import tqdm
|
|
22
22
|
|
|
23
|
+
from multiprocessing.pool import Pool
|
|
24
|
+
from multiprocessing.pool import ThreadPool
|
|
25
|
+
from functools import partial
|
|
26
|
+
|
|
23
27
|
from md_visualization.visualization_utils import open_image
|
|
24
28
|
from md_utils.ct_utils import truncate_float
|
|
25
29
|
|
|
@@ -29,15 +33,21 @@ default_confidence_threshold = 0.15
|
|
|
29
33
|
|
|
30
34
|
#%% Functions
|
|
31
35
|
|
|
32
|
-
def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,
|
|
36
|
+
def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,
|
|
37
|
+
info=None,confidence_threshold=None):
|
|
33
38
|
"""
|
|
34
39
|
For the given image struct in MD results format, reformat the detections into
|
|
35
40
|
labelme format. Returns a dict.
|
|
41
|
+
|
|
42
|
+
'height' and 'width' are required in [im].
|
|
43
|
+
|
|
44
|
+
image_base_name is written directly to the 'imagePath' field in the output; it should generally be
|
|
45
|
+
os.path.basename(your_image_file).
|
|
36
46
|
"""
|
|
37
47
|
|
|
38
48
|
if confidence_threshold is None:
|
|
39
49
|
confidence_threshold = -1.0
|
|
40
|
-
|
|
50
|
+
|
|
41
51
|
output_dict = {}
|
|
42
52
|
if info is not None:
|
|
43
53
|
output_dict['detector_info'] = info
|
|
@@ -48,7 +58,9 @@ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,
|
|
|
48
58
|
output_dict['imageHeight'] = im['height']
|
|
49
59
|
output_dict['imageWidth'] = im['width']
|
|
50
60
|
output_dict['imageData'] = None
|
|
61
|
+
output_dict['detections'] = im['detections']
|
|
51
62
|
|
|
63
|
+
# det = im['detections'][1]
|
|
52
64
|
for det in im['detections']:
|
|
53
65
|
|
|
54
66
|
if det['conf'] < confidence_threshold:
|
|
@@ -78,69 +90,125 @@ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,
|
|
|
78
90
|
# ...def get_labelme_dict_for_image()
|
|
79
91
|
|
|
80
92
|
|
|
93
|
+
def _write_output_for_image(im,image_base,extension_prefix,info,
|
|
94
|
+
confidence_threshold,category_id_to_name,overwrite,
|
|
95
|
+
verbose=False):
|
|
96
|
+
|
|
97
|
+
if 'failure' in im and im['failure'] is not None:
|
|
98
|
+
assert 'detections' not in im or im['detections'] is None
|
|
99
|
+
if verbose:
|
|
100
|
+
print('Skipping labelme file generation for failed image {}'.format(
|
|
101
|
+
im['file']))
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
im_full_path = os.path.join(image_base,im['file'])
|
|
105
|
+
json_path = os.path.splitext(im_full_path)[0] + extension_prefix + '.json'
|
|
106
|
+
|
|
107
|
+
if (not overwrite) and (os.path.isfile(json_path)):
|
|
108
|
+
if verbose:
|
|
109
|
+
print('Skipping existing file {}'.format(json_path))
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
output_dict = get_labelme_dict_for_image(im,
|
|
113
|
+
image_base_name=os.path.basename(im_full_path),
|
|
114
|
+
category_id_to_name=category_id_to_name,
|
|
115
|
+
info=info,
|
|
116
|
+
confidence_threshold=confidence_threshold)
|
|
117
|
+
|
|
118
|
+
with open(json_path,'w') as f:
|
|
119
|
+
json.dump(output_dict,f,indent=1)
|
|
120
|
+
|
|
121
|
+
# ...def write_output_for_image(...)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
|
|
81
125
|
def md_to_labelme(results_file,image_base,confidence_threshold=None,
|
|
82
|
-
overwrite=False
|
|
126
|
+
overwrite=False,extension_prefix='',n_workers=1,
|
|
127
|
+
use_threads=False,bypass_image_size_read=False,
|
|
128
|
+
verbose=False):
|
|
83
129
|
"""
|
|
84
130
|
For all the images in [results_file], write a .json file in labelme format alongside the
|
|
85
131
|
corresponding relative path within image_base.
|
|
132
|
+
|
|
133
|
+
If non-empty, "extension_prefix" will be inserted before the .json extension.
|
|
86
134
|
"""
|
|
87
135
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
md_results = json.load(f)
|
|
136
|
+
if extension_prefix is None:
|
|
137
|
+
extension_prefix = ''
|
|
91
138
|
|
|
92
|
-
#
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
139
|
+
# Load MD results if necessary
|
|
140
|
+
if isinstance(results_file,dict):
|
|
141
|
+
md_results = results_file
|
|
142
|
+
else:
|
|
143
|
+
print('Loading MD results...')
|
|
144
|
+
with open(results_file,'r') as f:
|
|
145
|
+
md_results = json.load(f)
|
|
98
146
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
|
|
147
|
+
# Read image sizes if necessary
|
|
148
|
+
if bypass_image_size_read:
|
|
102
149
|
|
|
103
|
-
|
|
104
|
-
if 'height' not in im or 'width' not in im:
|
|
105
|
-
|
|
106
|
-
try:
|
|
107
|
-
pil_im = open_image(im_full_path)
|
|
108
|
-
im['width'] = pil_im.width
|
|
109
|
-
im['height'] = pil_im.height
|
|
110
|
-
except Exception:
|
|
111
|
-
print('Warning: cannot open image {}, treating as a failure during inference'.format(
|
|
112
|
-
im_full_path))
|
|
113
|
-
if 'failure' not in im:
|
|
114
|
-
im['failure'] = 'Failure image access'
|
|
115
|
-
|
|
116
|
-
# ...if we need to read w/h information
|
|
150
|
+
print('Bypassing image size read')
|
|
117
151
|
|
|
118
|
-
|
|
152
|
+
else:
|
|
119
153
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
154
|
+
# TODO: parallelize this loop
|
|
155
|
+
|
|
156
|
+
print('Reading image sizes...')
|
|
157
|
+
|
|
158
|
+
# im = md_results['images'][0]
|
|
159
|
+
for im in tqdm(md_results['images']):
|
|
160
|
+
|
|
161
|
+
# Make sure this file exists
|
|
162
|
+
im_full_path = os.path.join(image_base,im['file'])
|
|
163
|
+
assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
|
|
164
|
+
|
|
165
|
+
json_path = os.path.splitext(im_full_path)[0] + extension_prefix + '.json'
|
|
166
|
+
|
|
167
|
+
# Don't even bother reading sizes for files we're not going to generate
|
|
168
|
+
if (not overwrite) and (os.path.isfile(json_path)):
|
|
169
|
+
continue
|
|
128
170
|
|
|
129
|
-
|
|
130
|
-
|
|
171
|
+
# Load w/h information if necessary
|
|
172
|
+
if 'height' not in im or 'width' not in im:
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
pil_im = open_image(im_full_path)
|
|
176
|
+
im['width'] = pil_im.width
|
|
177
|
+
im['height'] = pil_im.height
|
|
178
|
+
except Exception:
|
|
179
|
+
print('Warning: cannot open image {}, treating as a failure during inference'.format(
|
|
180
|
+
im_full_path))
|
|
181
|
+
if 'failure' not in im:
|
|
182
|
+
im['failure'] = 'Failure image access'
|
|
183
|
+
|
|
184
|
+
# ...if we need to read w/h information
|
|
185
|
+
|
|
186
|
+
# ...for each image
|
|
131
187
|
|
|
132
|
-
|
|
133
|
-
print('Skipping existing file {}'.format(json_path))
|
|
134
|
-
continue
|
|
188
|
+
# ...if we're not bypassing image size read
|
|
135
189
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
190
|
+
print('\nGenerating labelme files...')
|
|
191
|
+
|
|
192
|
+
# Write output
|
|
193
|
+
if n_workers <= 1:
|
|
194
|
+
for im in tqdm(md_results['images']):
|
|
195
|
+
_write_output_for_image(im,image_base,extension_prefix,md_results['info'],confidence_threshold,
|
|
196
|
+
md_results['detection_categories'],overwrite,verbose)
|
|
197
|
+
else:
|
|
198
|
+
if use_threads:
|
|
199
|
+
print('Starting parallel thread pool with {} workers'.format(n_workers))
|
|
200
|
+
pool = ThreadPool(n_workers)
|
|
201
|
+
else:
|
|
202
|
+
print('Starting parallel process pool with {} workers'.format(n_workers))
|
|
203
|
+
pool = Pool(n_workers)
|
|
204
|
+
_ = list(tqdm(pool.imap(
|
|
205
|
+
partial(_write_output_for_image,
|
|
206
|
+
image_base=image_base,extension_prefix=extension_prefix,
|
|
207
|
+
info=md_results['info'],confidence_threshold=confidence_threshold,
|
|
208
|
+
category_id_to_name=md_results['detection_categories'],
|
|
209
|
+
overwrite=overwrite,verbose=verbose),
|
|
210
|
+
md_results['images']),
|
|
211
|
+
total=len(md_results['images'])))
|
|
144
212
|
|
|
145
213
|
# ...for each image
|
|
146
214
|
|
|
@@ -3,9 +3,12 @@
|
|
|
3
3
|
# merge_detections.py
|
|
4
4
|
#
|
|
5
5
|
# Merge high-confidence detections from one or more results files into another
|
|
6
|
-
# file.
|
|
6
|
+
# file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
|
|
7
7
|
# results file from MDv5a.
|
|
8
8
|
#
|
|
9
|
+
# Detection categories must be the same in both files; if you want to first remap
|
|
10
|
+
# one file's category mapping to be the same as another's, see remap_detection_categories.
|
|
11
|
+
#
|
|
9
12
|
# If you want to literally merge two .json files, see combine_api_outputs.py.
|
|
10
13
|
#
|
|
11
14
|
########
|
|
@@ -30,7 +33,7 @@ class MergeDetectionsOptions:
|
|
|
30
33
|
|
|
31
34
|
self.max_detection_size = 1.01
|
|
32
35
|
self.min_detection_size = 0
|
|
33
|
-
self.source_confidence_thresholds = [0.
|
|
36
|
+
self.source_confidence_thresholds = [0.05]
|
|
34
37
|
|
|
35
38
|
# Don't bother merging into target images if there is a similar detection
|
|
36
39
|
# above this threshold (or if there is *any* detection above this threshold,
|
|
@@ -38,7 +41,7 @@ class MergeDetectionsOptions:
|
|
|
38
41
|
self.target_confidence_threshold = 0.2
|
|
39
42
|
|
|
40
43
|
# If you want to merge only certain categories, specify one
|
|
41
|
-
# (but not both) of these.
|
|
44
|
+
# (but not both) of these. These are category IDs, not names.
|
|
42
45
|
self.categories_to_include = None
|
|
43
46
|
self.categories_to_exclude = None
|
|
44
47
|
|
|
@@ -47,11 +50,28 @@ class MergeDetectionsOptions:
|
|
|
47
50
|
self.merge_empty_only = False
|
|
48
51
|
|
|
49
52
|
self.iou_threshold = 0.65
|
|
53
|
+
|
|
54
|
+
self.overwrite = False
|
|
50
55
|
|
|
51
56
|
|
|
52
57
|
#%% Main function
|
|
53
58
|
|
|
54
59
|
def merge_detections(source_files,target_file,output_file,options=None):
|
|
60
|
+
"""
|
|
61
|
+
Merge high-confidence detections from one or more results files into another
|
|
62
|
+
file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
|
|
63
|
+
results file from MDv5a.
|
|
64
|
+
|
|
65
|
+
[source_files] (a list of files or a single filename) specifies the set of
|
|
66
|
+
results files that will be merged into [target_file]. The difference between a
|
|
67
|
+
"source file" and the "target file" is that if no merging is necessary, either because
|
|
68
|
+
two boxes are nearly identical or because merge_only_empty is True and the target
|
|
69
|
+
file already has above-threshold detection for an image+category, the output file gets
|
|
70
|
+
the results of the "target" file. I.e., the "target" file wins all ties.
|
|
71
|
+
|
|
72
|
+
The results are written to [output_file].
|
|
73
|
+
|
|
74
|
+
"""
|
|
55
75
|
|
|
56
76
|
if isinstance(source_files,str):
|
|
57
77
|
source_files = [source_files]
|
|
@@ -59,6 +79,10 @@ def merge_detections(source_files,target_file,output_file,options=None):
|
|
|
59
79
|
if options is None:
|
|
60
80
|
options = MergeDetectionsOptions()
|
|
61
81
|
|
|
82
|
+
if (not options.overwrite) and (os.path.isfile(output_file)):
|
|
83
|
+
print('File {} exists, bypassing merge'.format(output_file))
|
|
84
|
+
return
|
|
85
|
+
|
|
62
86
|
assert not ((options.categories_to_exclude is not None) and \
|
|
63
87
|
(options.categories_to_include is not None)), \
|
|
64
88
|
'categories_to_include and categories_to_exclude are mutually exclusive'
|
|
@@ -133,7 +157,8 @@ def merge_detections(source_files,target_file,output_file,options=None):
|
|
|
133
157
|
output_data['info']['detections_transferred_from'].append(os.path.basename(source_file))
|
|
134
158
|
output_data['info']['detector'] = output_data['info']['detector'] + ' + ' + source_detector_name
|
|
135
159
|
|
|
136
|
-
assert source_data['detection_categories'] == output_data['detection_categories']
|
|
160
|
+
assert source_data['detection_categories'] == output_data['detection_categories'], \
|
|
161
|
+
'Cannot merge files with different detection category maps'
|
|
137
162
|
|
|
138
163
|
source_confidence_threshold = options.source_confidence_thresholds[i_source_file]
|
|
139
164
|
|
|
@@ -246,7 +271,7 @@ def merge_detections(source_files,target_file,output_file,options=None):
|
|
|
246
271
|
# ...for each source file
|
|
247
272
|
|
|
248
273
|
with open(output_file,'w') as f:
|
|
249
|
-
json.dump(output_data,f,indent=
|
|
274
|
+
json.dump(output_data,f,indent=1)
|
|
250
275
|
|
|
251
276
|
print('Saved merged results to {}'.format(output_file))
|
|
252
277
|
|