megadetector 5.0.7__py3-none-any.whl → 5.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +28 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
- api/batch_processing/postprocessing/compare_batch_results.py +1 -1
- api/batch_processing/postprocessing/convert_output_format.py +24 -6
- api/batch_processing/postprocessing/load_api_results.py +1 -3
- api/batch_processing/postprocessing/md_to_labelme.py +118 -51
- api/batch_processing/postprocessing/merge_detections.py +30 -5
- api/batch_processing/postprocessing/postprocess_batch_results.py +24 -12
- api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +15 -12
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
- data_management/cct_json_utils.py +7 -2
- data_management/coco_to_labelme.py +263 -0
- data_management/coco_to_yolo.py +7 -4
- data_management/databases/integrity_check_json_db.py +68 -59
- data_management/databases/subset_json_db.py +1 -1
- data_management/get_image_sizes.py +44 -26
- data_management/importers/animl_results_to_md_results.py +1 -3
- data_management/importers/noaa_seals_2019.py +1 -1
- data_management/labelme_to_coco.py +252 -143
- data_management/labelme_to_yolo.py +95 -52
- data_management/lila/create_lila_blank_set.py +106 -23
- data_management/lila/download_lila_subset.py +133 -65
- data_management/lila/generate_lila_per_image_labels.py +1 -1
- data_management/lila/lila_common.py +8 -38
- data_management/read_exif.py +65 -16
- data_management/remap_coco_categories.py +84 -0
- data_management/resize_coco_dataset.py +3 -22
- data_management/wi_download_csv_to_coco.py +239 -0
- data_management/yolo_to_coco.py +283 -83
- detection/run_detector_batch.py +12 -3
- detection/run_inference_with_yolov5_val.py +10 -3
- detection/run_tiled_inference.py +2 -2
- detection/tf_detector.py +2 -1
- detection/video_utils.py +1 -1
- md_utils/ct_utils.py +22 -3
- md_utils/md_tests.py +11 -2
- md_utils/path_utils.py +206 -32
- md_utils/url_utils.py +66 -1
- md_utils/write_html_image_list.py +12 -3
- md_visualization/visualization_utils.py +363 -72
- md_visualization/visualize_db.py +33 -10
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/METADATA +10 -12
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/RECORD +47 -44
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
- md_visualization/visualize_megadb.py +0 -183
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
|
@@ -86,6 +86,8 @@ from api.batch_processing.postprocessing.postprocess_batch_results import (
|
|
|
86
86
|
from detection.run_detector import get_detector_version_from_filename
|
|
87
87
|
from md_utils.ct_utils import image_file_to_camera_folder
|
|
88
88
|
|
|
89
|
+
## Inference options
|
|
90
|
+
|
|
89
91
|
# To specify a non-default confidence threshold for including detections in the .json file
|
|
90
92
|
json_threshold = None
|
|
91
93
|
|
|
@@ -109,6 +111,11 @@ quiet_mode = True
|
|
|
109
111
|
# will use its default size, which is 1280 * 1.3, which is almost always what you want.
|
|
110
112
|
image_size = None
|
|
111
113
|
|
|
114
|
+
# Should we include image size, timestamp, and/or EXIF data in MD output?
|
|
115
|
+
include_image_size = False
|
|
116
|
+
include_image_timestamp = False
|
|
117
|
+
include_exif_data = False
|
|
118
|
+
|
|
112
119
|
# Only relevant when running on CPU
|
|
113
120
|
ncores = 1
|
|
114
121
|
|
|
@@ -187,7 +194,7 @@ augment = False
|
|
|
187
194
|
|
|
188
195
|
## Constants related to tiled inference
|
|
189
196
|
|
|
190
|
-
use_tiled_inference =
|
|
197
|
+
use_tiled_inference = False
|
|
191
198
|
|
|
192
199
|
# Should we delete tiles after each job? Only set this to False for debugging;
|
|
193
200
|
# large jobs will take up a lot of space if you keep tiles around after each task.
|
|
@@ -234,7 +241,7 @@ checkpoint_frequency = 10000
|
|
|
234
241
|
approx_images_per_second = estimate_md_images_per_second(model_file)
|
|
235
242
|
|
|
236
243
|
# Rough estimate for the inference time cost of augmentation
|
|
237
|
-
if augment:
|
|
244
|
+
if augment and (approx_images_per_second is not None):
|
|
238
245
|
approx_images_per_second = approx_images_per_second * 0.7
|
|
239
246
|
|
|
240
247
|
base_task_name = organization_name_short + '-' + job_date + job_description_string + '-' + \
|
|
@@ -268,6 +275,10 @@ filename_base = os.path.join(base_output_folder_name, base_task_name)
|
|
|
268
275
|
combined_api_output_folder = os.path.join(filename_base, 'combined_api_outputs')
|
|
269
276
|
postprocessing_output_folder = os.path.join(filename_base, 'preview')
|
|
270
277
|
|
|
278
|
+
combined_api_output_file = os.path.join(
|
|
279
|
+
combined_api_output_folder,
|
|
280
|
+
'{}_detections.json'.format(base_task_name))
|
|
281
|
+
|
|
271
282
|
os.makedirs(filename_base, exist_ok=True)
|
|
272
283
|
os.makedirs(combined_api_output_folder, exist_ok=True)
|
|
273
284
|
os.makedirs(postprocessing_output_folder, exist_ok=True)
|
|
@@ -494,7 +505,14 @@ for i_task,task in enumerate(task_info):
|
|
|
494
505
|
|
|
495
506
|
overwrite_handling_string = '--overwrite_handling {}'.format(overwrite_handling)
|
|
496
507
|
cmd = f'{cuda_string} python run_detector_batch.py "{model_file}" "{chunk_file}" "{output_fn}" {checkpoint_frequency_string} {checkpoint_path_string} {use_image_queue_string} {ncores_string} {quiet_string} {image_size_string} {confidence_threshold_string} {overwrite_handling_string}'
|
|
497
|
-
|
|
508
|
+
|
|
509
|
+
if include_image_size:
|
|
510
|
+
cmd += ' --include_image_size'
|
|
511
|
+
if include_image_timestamp:
|
|
512
|
+
cmd += ' --include_image_timestamp'
|
|
513
|
+
if include_exif_data:
|
|
514
|
+
cmd += ' --include_exif_data'
|
|
515
|
+
|
|
498
516
|
cmd_file = os.path.join(filename_base,'run_chunk_{}_gpu_{}{}'.format(str(i_task).zfill(3),
|
|
499
517
|
str(gpu_number).zfill(2),script_extension))
|
|
500
518
|
|
|
@@ -747,10 +765,6 @@ for im in combined_results['images']:
|
|
|
747
765
|
else:
|
|
748
766
|
im['file'] = im['file'].replace(input_path + '/','',1)
|
|
749
767
|
|
|
750
|
-
combined_api_output_file = os.path.join(
|
|
751
|
-
combined_api_output_folder,
|
|
752
|
-
'{}_detections.json'.format(base_task_name))
|
|
753
|
-
|
|
754
768
|
with open(combined_api_output_file,'w') as f:
|
|
755
769
|
json.dump(combined_results,f,indent=1)
|
|
756
770
|
|
|
@@ -793,7 +807,7 @@ options.api_output_file = combined_api_output_file
|
|
|
793
807
|
options.output_dir = output_base
|
|
794
808
|
ppresults = process_batch_results(options)
|
|
795
809
|
html_output_file = ppresults.output_html_file
|
|
796
|
-
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
|
|
810
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
797
811
|
# import clipboard; clipboard.copy(html_output_file)
|
|
798
812
|
|
|
799
813
|
|
|
@@ -823,7 +837,7 @@ options.otherDetectionsThreshold = options.confidenceMin
|
|
|
823
837
|
|
|
824
838
|
options.bRenderDetectionTiles = True
|
|
825
839
|
options.maxOutputImageWidth = 2000
|
|
826
|
-
options.detectionTilesMaxCrops =
|
|
840
|
+
options.detectionTilesMaxCrops = 250
|
|
827
841
|
|
|
828
842
|
# options.lineThickness = 5
|
|
829
843
|
# options.boxExpansion = 8
|
|
@@ -930,7 +944,7 @@ options.output_dir = output_base
|
|
|
930
944
|
ppresults = process_batch_results(options)
|
|
931
945
|
html_output_file = ppresults.output_html_file
|
|
932
946
|
|
|
933
|
-
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
|
|
947
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
934
948
|
# import clipboard; clipboard.copy(html_output_file)
|
|
935
949
|
|
|
936
950
|
|
|
@@ -2003,7 +2017,7 @@ print('Processing {} to {}'.format(base_task_name, output_base))
|
|
|
2003
2017
|
options.api_output_file = sequence_smoothed_classification_file
|
|
2004
2018
|
options.output_dir = output_base
|
|
2005
2019
|
ppresults = process_batch_results(options)
|
|
2006
|
-
path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True)
|
|
2020
|
+
path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
2007
2021
|
# import clipboard; clipboard.copy(ppresults.output_html_file)
|
|
2008
2022
|
|
|
2009
2023
|
#% Zip .json files
|
|
@@ -2071,7 +2085,7 @@ for i, j in itertools.combinations(list(range(0,len(filenames))),2):
|
|
|
2071
2085
|
results = compare_batch_results(options)
|
|
2072
2086
|
|
|
2073
2087
|
from md_utils.path_utils import open_file
|
|
2074
|
-
open_file(results.html_output_file,attempt_to_open_in_wsl_host=True)
|
|
2088
|
+
open_file(results.html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
2075
2089
|
|
|
2076
2090
|
|
|
2077
2091
|
#%% Merge in high-confidence detections from another results file
|
|
@@ -2125,7 +2139,7 @@ options.output_dir = output_base_large_boxes
|
|
|
2125
2139
|
|
|
2126
2140
|
ppresults = process_batch_results(options)
|
|
2127
2141
|
html_output_file = ppresults.output_html_file
|
|
2128
|
-
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
|
|
2142
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
2129
2143
|
|
|
2130
2144
|
|
|
2131
2145
|
#%% .json splitting
|
|
@@ -2280,7 +2294,7 @@ import nbformat as nbf
|
|
|
2280
2294
|
if os.name == 'nt':
|
|
2281
2295
|
git_base = r'c:\git'
|
|
2282
2296
|
else:
|
|
2283
|
-
git_base = os.path.
|
|
2297
|
+
git_base = os.path.expanduser('~/git')
|
|
2284
2298
|
|
|
2285
2299
|
input_py_file = git_base + '/MegaDetector/api/batch_processing/data_preparation/manage_local_batch.py'
|
|
2286
2300
|
assert os.path.isfile(input_py_file)
|
|
@@ -48,7 +48,7 @@ def combine_api_output_files(input_files: List[str],
|
|
|
48
48
|
input_files: list of str, paths to JSON detection files
|
|
49
49
|
output_file: optional str, path to write merged JSON
|
|
50
50
|
require_uniqueness: bool, whether to require that the images in
|
|
51
|
-
each
|
|
51
|
+
each list of images be unique
|
|
52
52
|
"""
|
|
53
53
|
|
|
54
54
|
def print_if_verbose(s):
|
|
@@ -84,7 +84,7 @@ def combine_api_output_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
|
|
|
84
84
|
input_dicts: list of dicts, each dict is the JSON of the detections
|
|
85
85
|
output file from the Batch Processing API
|
|
86
86
|
require_uniqueness: bool, whether to require that the images in
|
|
87
|
-
each
|
|
87
|
+
each input dict be unique
|
|
88
88
|
|
|
89
89
|
Returns: dict, represents the merged JSON
|
|
90
90
|
"""
|
|
@@ -291,7 +291,7 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
|
|
|
291
291
|
filenames_b_set = set([im['file'] for im in images_b])
|
|
292
292
|
|
|
293
293
|
if len(images_a) != len(images_b):
|
|
294
|
-
s = 'set A has {}
|
|
294
|
+
s = 'set A has {} images, set B has {}'.format(len(images_a),len(images_b))
|
|
295
295
|
if options.error_on_non_matching_lists:
|
|
296
296
|
raise ValueError(s)
|
|
297
297
|
else:
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
#
|
|
5
5
|
# Converts between file formats output by our batch processing API. Currently
|
|
6
6
|
# supports json <--> csv conversion, but this should be the landing place for any
|
|
7
|
-
# conversion - including between
|
|
8
|
-
# future.
|
|
7
|
+
# conversion - including between hypothetical alternative .json versions - that we support
|
|
8
|
+
# in the future.
|
|
9
9
|
#
|
|
10
10
|
########
|
|
11
11
|
|
|
@@ -30,10 +30,13 @@ CONF_DIGITS = 3
|
|
|
30
30
|
#%% Conversion functions
|
|
31
31
|
|
|
32
32
|
def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
|
|
33
|
-
omit_bounding_boxes=False,output_encoding=None
|
|
33
|
+
omit_bounding_boxes=False,output_encoding=None,
|
|
34
|
+
overwrite=True):
|
|
34
35
|
"""
|
|
35
36
|
Convert .json to .csv
|
|
36
37
|
|
|
38
|
+
If output_path is None, will convert x.json to x.csv.
|
|
39
|
+
|
|
37
40
|
TODO: this function should obviously be using Pandas or some other sensible structured
|
|
38
41
|
representation of tabular data. Even a list of dicts. This implementation is quite
|
|
39
42
|
brittle and depends on adding fields to every row in exactly the right order.
|
|
@@ -42,6 +45,10 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
|
|
|
42
45
|
if output_path is None:
|
|
43
46
|
output_path = os.path.splitext(input_path)[0]+'.csv'
|
|
44
47
|
|
|
48
|
+
if os.path.isfile(output_path) and (not overwrite):
|
|
49
|
+
print('File {} exists, skipping json --> csv conversion'.format(output_path))
|
|
50
|
+
return
|
|
51
|
+
|
|
45
52
|
print('Loading json results from {}...'.format(input_path))
|
|
46
53
|
json_output = json.load(open(input_path))
|
|
47
54
|
|
|
@@ -73,7 +80,7 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
|
|
|
73
80
|
|
|
74
81
|
n_classification_categories = len(classification_category_ids)
|
|
75
82
|
|
|
76
|
-
# There are several fields for which we add columns
|
|
83
|
+
# There are several .json fields for which we add .csv columns; other random bespoke fields
|
|
77
84
|
# will be ignored.
|
|
78
85
|
optional_fields = ['width','height','datetime','exif_metadata']
|
|
79
86
|
optional_fields_present = set()
|
|
@@ -104,7 +111,7 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
|
|
|
104
111
|
if 'failure' in im and im['failure'] is not None:
|
|
105
112
|
row = [image_id, 'failure', im['failure']]
|
|
106
113
|
rows.append(row)
|
|
107
|
-
print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
|
|
114
|
+
# print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
|
|
108
115
|
continue
|
|
109
116
|
|
|
110
117
|
max_conf = ct_utils.get_max_conf(im)
|
|
@@ -193,12 +200,21 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
|
|
|
193
200
|
writer.writerow(header)
|
|
194
201
|
writer.writerows(rows)
|
|
195
202
|
|
|
203
|
+
# ...def convert_json_to_csv(...)
|
|
204
|
+
|
|
196
205
|
|
|
197
|
-
def convert_csv_to_json(input_path,output_path=None):
|
|
206
|
+
def convert_csv_to_json(input_path,output_path=None,overwrite=True):
|
|
207
|
+
"""
|
|
208
|
+
Convert .csv to .json. If output_path is None, will convert x.csv to x.json.
|
|
209
|
+
"""
|
|
198
210
|
|
|
199
211
|
if output_path is None:
|
|
200
212
|
output_path = os.path.splitext(input_path)[0]+'.json'
|
|
201
213
|
|
|
214
|
+
if os.path.isfile(output_path) and (not overwrite):
|
|
215
|
+
print('File {} exists, skipping csv --> json conversion'.format(output_path))
|
|
216
|
+
return
|
|
217
|
+
|
|
202
218
|
# Format spec:
|
|
203
219
|
#
|
|
204
220
|
# https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing
|
|
@@ -259,6 +275,8 @@ def convert_csv_to_json(input_path,output_path=None):
|
|
|
259
275
|
json_out['images'] = images
|
|
260
276
|
|
|
261
277
|
json.dump(json_out,open(output_path,'w'),indent=1)
|
|
278
|
+
|
|
279
|
+
# ...def convert_csv_to_json(...)
|
|
262
280
|
|
|
263
281
|
|
|
264
282
|
#%% Interactive driver
|
|
@@ -64,11 +64,9 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
|
|
|
64
64
|
if k != 'images':
|
|
65
65
|
other_fields[k] = v
|
|
66
66
|
|
|
67
|
-
# Normalize paths to simplify comparisons later
|
|
68
67
|
if normalize_paths:
|
|
69
68
|
for image in detection_results['images']:
|
|
70
|
-
image['file'] = os.path.normpath(image['file'])
|
|
71
|
-
# image['file'] = image['file'].replace('\\','/')
|
|
69
|
+
image['file'] = os.path.normpath(image['file'])
|
|
72
70
|
|
|
73
71
|
if force_forward_slashes:
|
|
74
72
|
for image in detection_results['images']:
|
|
@@ -20,6 +20,10 @@ import json
|
|
|
20
20
|
|
|
21
21
|
from tqdm import tqdm
|
|
22
22
|
|
|
23
|
+
from multiprocessing.pool import Pool
|
|
24
|
+
from multiprocessing.pool import ThreadPool
|
|
25
|
+
from functools import partial
|
|
26
|
+
|
|
23
27
|
from md_visualization.visualization_utils import open_image
|
|
24
28
|
from md_utils.ct_utils import truncate_float
|
|
25
29
|
|
|
@@ -29,15 +33,21 @@ default_confidence_threshold = 0.15
|
|
|
29
33
|
|
|
30
34
|
#%% Functions
|
|
31
35
|
|
|
32
|
-
def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,
|
|
36
|
+
def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,
|
|
37
|
+
info=None,confidence_threshold=None):
|
|
33
38
|
"""
|
|
34
39
|
For the given image struct in MD results format, reformat the detections into
|
|
35
40
|
labelme format. Returns a dict.
|
|
41
|
+
|
|
42
|
+
'height' and 'width' are required in [im].
|
|
43
|
+
|
|
44
|
+
image_base_name is written directly to the 'imagePath' field in the output; it should generally be
|
|
45
|
+
os.path.basename(your_image_file).
|
|
36
46
|
"""
|
|
37
47
|
|
|
38
48
|
if confidence_threshold is None:
|
|
39
49
|
confidence_threshold = -1.0
|
|
40
|
-
|
|
50
|
+
|
|
41
51
|
output_dict = {}
|
|
42
52
|
if info is not None:
|
|
43
53
|
output_dict['detector_info'] = info
|
|
@@ -50,6 +60,7 @@ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,
|
|
|
50
60
|
output_dict['imageData'] = None
|
|
51
61
|
output_dict['detections'] = im['detections']
|
|
52
62
|
|
|
63
|
+
# det = im['detections'][1]
|
|
53
64
|
for det in im['detections']:
|
|
54
65
|
|
|
55
66
|
if det['conf'] < confidence_threshold:
|
|
@@ -79,69 +90,125 @@ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,
|
|
|
79
90
|
# ...def get_labelme_dict_for_image()
|
|
80
91
|
|
|
81
92
|
|
|
93
|
+
def _write_output_for_image(im,image_base,extension_prefix,info,
|
|
94
|
+
confidence_threshold,category_id_to_name,overwrite,
|
|
95
|
+
verbose=False):
|
|
96
|
+
|
|
97
|
+
if 'failure' in im and im['failure'] is not None:
|
|
98
|
+
assert 'detections' not in im or im['detections'] is None
|
|
99
|
+
if verbose:
|
|
100
|
+
print('Skipping labelme file generation for failed image {}'.format(
|
|
101
|
+
im['file']))
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
im_full_path = os.path.join(image_base,im['file'])
|
|
105
|
+
json_path = os.path.splitext(im_full_path)[0] + extension_prefix + '.json'
|
|
106
|
+
|
|
107
|
+
if (not overwrite) and (os.path.isfile(json_path)):
|
|
108
|
+
if verbose:
|
|
109
|
+
print('Skipping existing file {}'.format(json_path))
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
output_dict = get_labelme_dict_for_image(im,
|
|
113
|
+
image_base_name=os.path.basename(im_full_path),
|
|
114
|
+
category_id_to_name=category_id_to_name,
|
|
115
|
+
info=info,
|
|
116
|
+
confidence_threshold=confidence_threshold)
|
|
117
|
+
|
|
118
|
+
with open(json_path,'w') as f:
|
|
119
|
+
json.dump(output_dict,f,indent=1)
|
|
120
|
+
|
|
121
|
+
# ...def write_output_for_image(...)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
|
|
82
125
|
def md_to_labelme(results_file,image_base,confidence_threshold=None,
|
|
83
|
-
overwrite=False
|
|
126
|
+
overwrite=False,extension_prefix='',n_workers=1,
|
|
127
|
+
use_threads=False,bypass_image_size_read=False,
|
|
128
|
+
verbose=False):
|
|
84
129
|
"""
|
|
85
130
|
For all the images in [results_file], write a .json file in labelme format alongside the
|
|
86
131
|
corresponding relative path within image_base.
|
|
132
|
+
|
|
133
|
+
If non-empty, "extension_prefix" will be inserted before the .json extension.
|
|
87
134
|
"""
|
|
88
135
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
md_results = json.load(f)
|
|
136
|
+
if extension_prefix is None:
|
|
137
|
+
extension_prefix = ''
|
|
92
138
|
|
|
93
|
-
#
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
139
|
+
# Load MD results if necessary
|
|
140
|
+
if isinstance(results_file,dict):
|
|
141
|
+
md_results = results_file
|
|
142
|
+
else:
|
|
143
|
+
print('Loading MD results...')
|
|
144
|
+
with open(results_file,'r') as f:
|
|
145
|
+
md_results = json.load(f)
|
|
99
146
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
|
|
147
|
+
# Read image sizes if necessary
|
|
148
|
+
if bypass_image_size_read:
|
|
103
149
|
|
|
104
|
-
|
|
105
|
-
if 'height' not in im or 'width' not in im:
|
|
106
|
-
|
|
107
|
-
try:
|
|
108
|
-
pil_im = open_image(im_full_path)
|
|
109
|
-
im['width'] = pil_im.width
|
|
110
|
-
im['height'] = pil_im.height
|
|
111
|
-
except Exception:
|
|
112
|
-
print('Warning: cannot open image {}, treating as a failure during inference'.format(
|
|
113
|
-
im_full_path))
|
|
114
|
-
if 'failure' not in im:
|
|
115
|
-
im['failure'] = 'Failure image access'
|
|
116
|
-
|
|
117
|
-
# ...if we need to read w/h information
|
|
150
|
+
print('Bypassing image size read')
|
|
118
151
|
|
|
119
|
-
|
|
152
|
+
else:
|
|
120
153
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
154
|
+
# TODO: parallelize this loop
|
|
155
|
+
|
|
156
|
+
print('Reading image sizes...')
|
|
157
|
+
|
|
158
|
+
# im = md_results['images'][0]
|
|
159
|
+
for im in tqdm(md_results['images']):
|
|
160
|
+
|
|
161
|
+
# Make sure this file exists
|
|
162
|
+
im_full_path = os.path.join(image_base,im['file'])
|
|
163
|
+
assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
|
|
164
|
+
|
|
165
|
+
json_path = os.path.splitext(im_full_path)[0] + extension_prefix + '.json'
|
|
166
|
+
|
|
167
|
+
# Don't even bother reading sizes for files we're not going to generate
|
|
168
|
+
if (not overwrite) and (os.path.isfile(json_path)):
|
|
169
|
+
continue
|
|
129
170
|
|
|
130
|
-
|
|
131
|
-
|
|
171
|
+
# Load w/h information if necessary
|
|
172
|
+
if 'height' not in im or 'width' not in im:
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
pil_im = open_image(im_full_path)
|
|
176
|
+
im['width'] = pil_im.width
|
|
177
|
+
im['height'] = pil_im.height
|
|
178
|
+
except Exception:
|
|
179
|
+
print('Warning: cannot open image {}, treating as a failure during inference'.format(
|
|
180
|
+
im_full_path))
|
|
181
|
+
if 'failure' not in im:
|
|
182
|
+
im['failure'] = 'Failure image access'
|
|
183
|
+
|
|
184
|
+
# ...if we need to read w/h information
|
|
185
|
+
|
|
186
|
+
# ...for each image
|
|
132
187
|
|
|
133
|
-
|
|
134
|
-
print('Skipping existing file {}'.format(json_path))
|
|
135
|
-
continue
|
|
188
|
+
# ...if we're not bypassing image size read
|
|
136
189
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
190
|
+
print('\nGenerating labelme files...')
|
|
191
|
+
|
|
192
|
+
# Write output
|
|
193
|
+
if n_workers <= 1:
|
|
194
|
+
for im in tqdm(md_results['images']):
|
|
195
|
+
_write_output_for_image(im,image_base,extension_prefix,md_results['info'],confidence_threshold,
|
|
196
|
+
md_results['detection_categories'],overwrite,verbose)
|
|
197
|
+
else:
|
|
198
|
+
if use_threads:
|
|
199
|
+
print('Starting parallel thread pool with {} workers'.format(n_workers))
|
|
200
|
+
pool = ThreadPool(n_workers)
|
|
201
|
+
else:
|
|
202
|
+
print('Starting parallel process pool with {} workers'.format(n_workers))
|
|
203
|
+
pool = Pool(n_workers)
|
|
204
|
+
_ = list(tqdm(pool.imap(
|
|
205
|
+
partial(_write_output_for_image,
|
|
206
|
+
image_base=image_base,extension_prefix=extension_prefix,
|
|
207
|
+
info=md_results['info'],confidence_threshold=confidence_threshold,
|
|
208
|
+
category_id_to_name=md_results['detection_categories'],
|
|
209
|
+
overwrite=overwrite,verbose=verbose),
|
|
210
|
+
md_results['images']),
|
|
211
|
+
total=len(md_results['images'])))
|
|
145
212
|
|
|
146
213
|
# ...for each image
|
|
147
214
|
|
|
@@ -3,9 +3,12 @@
|
|
|
3
3
|
# merge_detections.py
|
|
4
4
|
#
|
|
5
5
|
# Merge high-confidence detections from one or more results files into another
|
|
6
|
-
# file.
|
|
6
|
+
# file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
|
|
7
7
|
# results file from MDv5a.
|
|
8
8
|
#
|
|
9
|
+
# Detection categories must be the same in both files; if you want to first remap
|
|
10
|
+
# one file's category mapping to be the same as another's, see remap_detection_categories.
|
|
11
|
+
#
|
|
9
12
|
# If you want to literally merge two .json files, see combine_api_outputs.py.
|
|
10
13
|
#
|
|
11
14
|
########
|
|
@@ -30,7 +33,7 @@ class MergeDetectionsOptions:
|
|
|
30
33
|
|
|
31
34
|
self.max_detection_size = 1.01
|
|
32
35
|
self.min_detection_size = 0
|
|
33
|
-
self.source_confidence_thresholds = [0.
|
|
36
|
+
self.source_confidence_thresholds = [0.05]
|
|
34
37
|
|
|
35
38
|
# Don't bother merging into target images if there is a similar detection
|
|
36
39
|
# above this threshold (or if there is *any* detection above this threshold,
|
|
@@ -38,7 +41,7 @@ class MergeDetectionsOptions:
|
|
|
38
41
|
self.target_confidence_threshold = 0.2
|
|
39
42
|
|
|
40
43
|
# If you want to merge only certain categories, specify one
|
|
41
|
-
# (but not both) of these.
|
|
44
|
+
# (but not both) of these. These are category IDs, not names.
|
|
42
45
|
self.categories_to_include = None
|
|
43
46
|
self.categories_to_exclude = None
|
|
44
47
|
|
|
@@ -47,11 +50,28 @@ class MergeDetectionsOptions:
|
|
|
47
50
|
self.merge_empty_only = False
|
|
48
51
|
|
|
49
52
|
self.iou_threshold = 0.65
|
|
53
|
+
|
|
54
|
+
self.overwrite = False
|
|
50
55
|
|
|
51
56
|
|
|
52
57
|
#%% Main function
|
|
53
58
|
|
|
54
59
|
def merge_detections(source_files,target_file,output_file,options=None):
|
|
60
|
+
"""
|
|
61
|
+
Merge high-confidence detections from one or more results files into another
|
|
62
|
+
file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
|
|
63
|
+
results file from MDv5a.
|
|
64
|
+
|
|
65
|
+
[source_files] (a list of files or a single filename) specifies the set of
|
|
66
|
+
results files that will be merged into [target_file]. The difference between a
|
|
67
|
+
"source file" and the "target file" is that if no merging is necessary, either because
|
|
68
|
+
two boxes are nearly identical or because merge_only_empty is True and the target
|
|
69
|
+
file already has above-threshold detection for an image+category, the output file gets
|
|
70
|
+
the results of the "target" file. I.e., the "target" file wins all ties.
|
|
71
|
+
|
|
72
|
+
The results are written to [output_file].
|
|
73
|
+
|
|
74
|
+
"""
|
|
55
75
|
|
|
56
76
|
if isinstance(source_files,str):
|
|
57
77
|
source_files = [source_files]
|
|
@@ -59,6 +79,10 @@ def merge_detections(source_files,target_file,output_file,options=None):
|
|
|
59
79
|
if options is None:
|
|
60
80
|
options = MergeDetectionsOptions()
|
|
61
81
|
|
|
82
|
+
if (not options.overwrite) and (os.path.isfile(output_file)):
|
|
83
|
+
print('File {} exists, bypassing merge'.format(output_file))
|
|
84
|
+
return
|
|
85
|
+
|
|
62
86
|
assert not ((options.categories_to_exclude is not None) and \
|
|
63
87
|
(options.categories_to_include is not None)), \
|
|
64
88
|
'categories_to_include and categories_to_exclude are mutually exclusive'
|
|
@@ -133,7 +157,8 @@ def merge_detections(source_files,target_file,output_file,options=None):
|
|
|
133
157
|
output_data['info']['detections_transferred_from'].append(os.path.basename(source_file))
|
|
134
158
|
output_data['info']['detector'] = output_data['info']['detector'] + ' + ' + source_detector_name
|
|
135
159
|
|
|
136
|
-
assert source_data['detection_categories'] == output_data['detection_categories']
|
|
160
|
+
assert source_data['detection_categories'] == output_data['detection_categories'], \
|
|
161
|
+
'Cannot merge files with different detection category maps'
|
|
137
162
|
|
|
138
163
|
source_confidence_threshold = options.source_confidence_thresholds[i_source_file]
|
|
139
164
|
|
|
@@ -246,7 +271,7 @@ def merge_detections(source_files,target_file,output_file,options=None):
|
|
|
246
271
|
# ...for each source file
|
|
247
272
|
|
|
248
273
|
with open(output_file,'w') as f:
|
|
249
|
-
json.dump(output_data,f,indent=
|
|
274
|
+
json.dump(output_data,f,indent=1)
|
|
250
275
|
|
|
251
276
|
print('Saved merged results to {}'.format(output_file))
|
|
252
277
|
|