megadetector 5.0.7__py3-none-any.whl → 5.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (48) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +28 -14
  2. api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
  3. api/batch_processing/postprocessing/compare_batch_results.py +1 -1
  4. api/batch_processing/postprocessing/convert_output_format.py +24 -6
  5. api/batch_processing/postprocessing/load_api_results.py +1 -3
  6. api/batch_processing/postprocessing/md_to_labelme.py +118 -51
  7. api/batch_processing/postprocessing/merge_detections.py +30 -5
  8. api/batch_processing/postprocessing/postprocess_batch_results.py +24 -12
  9. api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
  10. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +15 -12
  11. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  12. data_management/cct_json_utils.py +7 -2
  13. data_management/coco_to_labelme.py +263 -0
  14. data_management/coco_to_yolo.py +7 -4
  15. data_management/databases/integrity_check_json_db.py +68 -59
  16. data_management/databases/subset_json_db.py +1 -1
  17. data_management/get_image_sizes.py +44 -26
  18. data_management/importers/animl_results_to_md_results.py +1 -3
  19. data_management/importers/noaa_seals_2019.py +1 -1
  20. data_management/labelme_to_coco.py +252 -143
  21. data_management/labelme_to_yolo.py +95 -52
  22. data_management/lila/create_lila_blank_set.py +106 -23
  23. data_management/lila/download_lila_subset.py +133 -65
  24. data_management/lila/generate_lila_per_image_labels.py +1 -1
  25. data_management/lila/lila_common.py +8 -38
  26. data_management/read_exif.py +65 -16
  27. data_management/remap_coco_categories.py +84 -0
  28. data_management/resize_coco_dataset.py +3 -22
  29. data_management/wi_download_csv_to_coco.py +239 -0
  30. data_management/yolo_to_coco.py +283 -83
  31. detection/run_detector_batch.py +12 -3
  32. detection/run_inference_with_yolov5_val.py +10 -3
  33. detection/run_tiled_inference.py +2 -2
  34. detection/tf_detector.py +2 -1
  35. detection/video_utils.py +1 -1
  36. md_utils/ct_utils.py +22 -3
  37. md_utils/md_tests.py +11 -2
  38. md_utils/path_utils.py +206 -32
  39. md_utils/url_utils.py +66 -1
  40. md_utils/write_html_image_list.py +12 -3
  41. md_visualization/visualization_utils.py +363 -72
  42. md_visualization/visualize_db.py +33 -10
  43. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/METADATA +10 -12
  44. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/RECORD +47 -44
  45. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
  46. md_visualization/visualize_megadb.py +0 -183
  47. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
  48. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
@@ -86,6 +86,8 @@ from api.batch_processing.postprocessing.postprocess_batch_results import (
86
86
  from detection.run_detector import get_detector_version_from_filename
87
87
  from md_utils.ct_utils import image_file_to_camera_folder
88
88
 
89
+ ## Inference options
90
+
89
91
  # To specify a non-default confidence threshold for including detections in the .json file
90
92
  json_threshold = None
91
93
 
@@ -109,6 +111,11 @@ quiet_mode = True
109
111
  # will use its default size, which is 1280 * 1.3, which is almost always what you want.
110
112
  image_size = None
111
113
 
114
+ # Should we include image size, timestamp, and/or EXIF data in MD output?
115
+ include_image_size = False
116
+ include_image_timestamp = False
117
+ include_exif_data = False
118
+
112
119
  # Only relevant when running on CPU
113
120
  ncores = 1
114
121
 
@@ -187,7 +194,7 @@ augment = False
187
194
 
188
195
  ## Constants related to tiled inference
189
196
 
190
- use_tiled_inference = True
197
+ use_tiled_inference = False
191
198
 
192
199
  # Should we delete tiles after each job? Only set this to False for debugging;
193
200
  # large jobs will take up a lot of space if you keep tiles around after each task.
@@ -234,7 +241,7 @@ checkpoint_frequency = 10000
234
241
  approx_images_per_second = estimate_md_images_per_second(model_file)
235
242
 
236
243
  # Rough estimate for the inference time cost of augmentation
237
- if augment:
244
+ if augment and (approx_images_per_second is not None):
238
245
  approx_images_per_second = approx_images_per_second * 0.7
239
246
 
240
247
  base_task_name = organization_name_short + '-' + job_date + job_description_string + '-' + \
@@ -268,6 +275,10 @@ filename_base = os.path.join(base_output_folder_name, base_task_name)
268
275
  combined_api_output_folder = os.path.join(filename_base, 'combined_api_outputs')
269
276
  postprocessing_output_folder = os.path.join(filename_base, 'preview')
270
277
 
278
+ combined_api_output_file = os.path.join(
279
+ combined_api_output_folder,
280
+ '{}_detections.json'.format(base_task_name))
281
+
271
282
  os.makedirs(filename_base, exist_ok=True)
272
283
  os.makedirs(combined_api_output_folder, exist_ok=True)
273
284
  os.makedirs(postprocessing_output_folder, exist_ok=True)
@@ -494,7 +505,14 @@ for i_task,task in enumerate(task_info):
494
505
 
495
506
  overwrite_handling_string = '--overwrite_handling {}'.format(overwrite_handling)
496
507
  cmd = f'{cuda_string} python run_detector_batch.py "{model_file}" "{chunk_file}" "{output_fn}" {checkpoint_frequency_string} {checkpoint_path_string} {use_image_queue_string} {ncores_string} {quiet_string} {image_size_string} {confidence_threshold_string} {overwrite_handling_string}'
497
-
508
+
509
+ if include_image_size:
510
+ cmd += ' --include_image_size'
511
+ if include_image_timestamp:
512
+ cmd += ' --include_image_timestamp'
513
+ if include_exif_data:
514
+ cmd += ' --include_exif_data'
515
+
498
516
  cmd_file = os.path.join(filename_base,'run_chunk_{}_gpu_{}{}'.format(str(i_task).zfill(3),
499
517
  str(gpu_number).zfill(2),script_extension))
500
518
 
@@ -747,10 +765,6 @@ for im in combined_results['images']:
747
765
  else:
748
766
  im['file'] = im['file'].replace(input_path + '/','',1)
749
767
 
750
- combined_api_output_file = os.path.join(
751
- combined_api_output_folder,
752
- '{}_detections.json'.format(base_task_name))
753
-
754
768
  with open(combined_api_output_file,'w') as f:
755
769
  json.dump(combined_results,f,indent=1)
756
770
 
@@ -793,7 +807,7 @@ options.api_output_file = combined_api_output_file
793
807
  options.output_dir = output_base
794
808
  ppresults = process_batch_results(options)
795
809
  html_output_file = ppresults.output_html_file
796
- path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
810
+ path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
797
811
  # import clipboard; clipboard.copy(html_output_file)
798
812
 
799
813
 
@@ -823,7 +837,7 @@ options.otherDetectionsThreshold = options.confidenceMin
823
837
 
824
838
  options.bRenderDetectionTiles = True
825
839
  options.maxOutputImageWidth = 2000
826
- options.detectionTilesMaxCrops = 300
840
+ options.detectionTilesMaxCrops = 250
827
841
 
828
842
  # options.lineThickness = 5
829
843
  # options.boxExpansion = 8
@@ -930,7 +944,7 @@ options.output_dir = output_base
930
944
  ppresults = process_batch_results(options)
931
945
  html_output_file = ppresults.output_html_file
932
946
 
933
- path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
947
+ path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
934
948
  # import clipboard; clipboard.copy(html_output_file)
935
949
 
936
950
 
@@ -2003,7 +2017,7 @@ print('Processing {} to {}'.format(base_task_name, output_base))
2003
2017
  options.api_output_file = sequence_smoothed_classification_file
2004
2018
  options.output_dir = output_base
2005
2019
  ppresults = process_batch_results(options)
2006
- path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True)
2020
+ path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
2007
2021
  # import clipboard; clipboard.copy(ppresults.output_html_file)
2008
2022
 
2009
2023
  #% Zip .json files
@@ -2071,7 +2085,7 @@ for i, j in itertools.combinations(list(range(0,len(filenames))),2):
2071
2085
  results = compare_batch_results(options)
2072
2086
 
2073
2087
  from md_utils.path_utils import open_file
2074
- open_file(results.html_output_file,attempt_to_open_in_wsl_host=True)
2088
+ open_file(results.html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
2075
2089
 
2076
2090
 
2077
2091
  #%% Merge in high-confidence detections from another results file
@@ -2125,7 +2139,7 @@ options.output_dir = output_base_large_boxes
2125
2139
 
2126
2140
  ppresults = process_batch_results(options)
2127
2141
  html_output_file = ppresults.output_html_file
2128
- path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
2142
+ path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
2129
2143
 
2130
2144
 
2131
2145
  #%% .json splitting
@@ -2280,7 +2294,7 @@ import nbformat as nbf
2280
2294
  if os.name == 'nt':
2281
2295
  git_base = r'c:\git'
2282
2296
  else:
2283
- git_base = os.path.expanduer('~/git')
2297
+ git_base = os.path.expanduser('~/git')
2284
2298
 
2285
2299
  input_py_file = git_base + '/MegaDetector/api/batch_processing/data_preparation/manage_local_batch.py'
2286
2300
  assert os.path.isfile(input_py_file)
@@ -48,7 +48,7 @@ def combine_api_output_files(input_files: List[str],
48
48
  input_files: list of str, paths to JSON detection files
49
49
  output_file: optional str, path to write merged JSON
50
50
  require_uniqueness: bool, whether to require that the images in
51
- each input_dict be unique
51
+ each list of images be unique
52
52
  """
53
53
 
54
54
  def print_if_verbose(s):
@@ -84,7 +84,7 @@ def combine_api_output_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
84
84
  input_dicts: list of dicts, each dict is the JSON of the detections
85
85
  output file from the Batch Processing API
86
86
  require_uniqueness: bool, whether to require that the images in
87
- each input_dict be unique
87
+ each input dict be unique
88
88
 
89
89
  Returns: dict, represents the merged JSON
90
90
  """
@@ -291,7 +291,7 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
291
291
  filenames_b_set = set([im['file'] for im in images_b])
292
292
 
293
293
  if len(images_a) != len(images_b):
294
- s = 'set A has {} iamges, set B has {}'.format(len(images_a),len(images_b))
294
+ s = 'set A has {} images, set B has {}'.format(len(images_a),len(images_b))
295
295
  if options.error_on_non_matching_lists:
296
296
  raise ValueError(s)
297
297
  else:
@@ -4,8 +4,8 @@
4
4
  #
5
5
  # Converts between file formats output by our batch processing API. Currently
6
6
  # supports json <--> csv conversion, but this should be the landing place for any
7
- # conversion - including between future .json versions - that we support in the
8
- # future.
7
+ # conversion - including between hypothetical alternative .json versions - that we support
8
+ # in the future.
9
9
  #
10
10
  ########
11
11
 
@@ -30,10 +30,13 @@ CONF_DIGITS = 3
30
30
  #%% Conversion functions
31
31
 
32
32
  def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
33
- omit_bounding_boxes=False,output_encoding=None):
33
+ omit_bounding_boxes=False,output_encoding=None,
34
+ overwrite=True):
34
35
  """
35
36
  Convert .json to .csv
36
37
 
38
+ If output_path is None, will convert x.json to x.csv.
39
+
37
40
  TODO: this function should obviously be using Pandas or some other sensible structured
38
41
  representation of tabular data. Even a list of dicts. This implementation is quite
39
42
  brittle and depends on adding fields to every row in exactly the right order.
@@ -42,6 +45,10 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
42
45
  if output_path is None:
43
46
  output_path = os.path.splitext(input_path)[0]+'.csv'
44
47
 
48
+ if os.path.isfile(output_path) and (not overwrite):
49
+ print('File {} exists, skipping json --> csv conversion'.format(output_path))
50
+ return
51
+
45
52
  print('Loading json results from {}...'.format(input_path))
46
53
  json_output = json.load(open(input_path))
47
54
 
@@ -73,7 +80,7 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
73
80
 
74
81
  n_classification_categories = len(classification_category_ids)
75
82
 
76
- # There are several fields for which we add columns, other random bespoke fields
83
+ # There are several .json fields for which we add .csv columns; other random bespoke fields
77
84
  # will be ignored.
78
85
  optional_fields = ['width','height','datetime','exif_metadata']
79
86
  optional_fields_present = set()
@@ -104,7 +111,7 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
104
111
  if 'failure' in im and im['failure'] is not None:
105
112
  row = [image_id, 'failure', im['failure']]
106
113
  rows.append(row)
107
- print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
114
+ # print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
108
115
  continue
109
116
 
110
117
  max_conf = ct_utils.get_max_conf(im)
@@ -193,12 +200,21 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
193
200
  writer.writerow(header)
194
201
  writer.writerows(rows)
195
202
 
203
+ # ...def convert_json_to_csv(...)
204
+
196
205
 
197
- def convert_csv_to_json(input_path,output_path=None):
206
+ def convert_csv_to_json(input_path,output_path=None,overwrite=True):
207
+ """
208
+ Convert .csv to .json. If output_path is None, will convert x.csv to x.json.
209
+ """
198
210
 
199
211
  if output_path is None:
200
212
  output_path = os.path.splitext(input_path)[0]+'.json'
201
213
 
214
+ if os.path.isfile(output_path) and (not overwrite):
215
+ print('File {} exists, skipping csv --> json conversion'.format(output_path))
216
+ return
217
+
202
218
  # Format spec:
203
219
  #
204
220
  # https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing
@@ -259,6 +275,8 @@ def convert_csv_to_json(input_path,output_path=None):
259
275
  json_out['images'] = images
260
276
 
261
277
  json.dump(json_out,open(output_path,'w'),indent=1)
278
+
279
+ # ...def convert_csv_to_json(...)
262
280
 
263
281
 
264
282
  #%% Interactive driver
@@ -64,11 +64,9 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
64
64
  if k != 'images':
65
65
  other_fields[k] = v
66
66
 
67
- # Normalize paths to simplify comparisons later
68
67
  if normalize_paths:
69
68
  for image in detection_results['images']:
70
- image['file'] = os.path.normpath(image['file'])
71
- # image['file'] = image['file'].replace('\\','/')
69
+ image['file'] = os.path.normpath(image['file'])
72
70
 
73
71
  if force_forward_slashes:
74
72
  for image in detection_results['images']:
@@ -20,6 +20,10 @@ import json
20
20
 
21
21
  from tqdm import tqdm
22
22
 
23
+ from multiprocessing.pool import Pool
24
+ from multiprocessing.pool import ThreadPool
25
+ from functools import partial
26
+
23
27
  from md_visualization.visualization_utils import open_image
24
28
  from md_utils.ct_utils import truncate_float
25
29
 
@@ -29,15 +33,21 @@ default_confidence_threshold = 0.15
29
33
 
30
34
  #%% Functions
31
35
 
32
- def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,confidence_threshold=None):
36
+ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,
37
+ info=None,confidence_threshold=None):
33
38
  """
34
39
  For the given image struct in MD results format, reformat the detections into
35
40
  labelme format. Returns a dict.
41
+
42
+ 'height' and 'width' are required in [im].
43
+
44
+ image_base_name is written directly to the 'imagePath' field in the output; it should generally be
45
+ os.path.basename(your_image_file).
36
46
  """
37
47
 
38
48
  if confidence_threshold is None:
39
49
  confidence_threshold = -1.0
40
-
50
+
41
51
  output_dict = {}
42
52
  if info is not None:
43
53
  output_dict['detector_info'] = info
@@ -50,6 +60,7 @@ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,
50
60
  output_dict['imageData'] = None
51
61
  output_dict['detections'] = im['detections']
52
62
 
63
+ # det = im['detections'][1]
53
64
  for det in im['detections']:
54
65
 
55
66
  if det['conf'] < confidence_threshold:
@@ -79,69 +90,125 @@ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,
79
90
  # ...def get_labelme_dict_for_image()
80
91
 
81
92
 
93
+ def _write_output_for_image(im,image_base,extension_prefix,info,
94
+ confidence_threshold,category_id_to_name,overwrite,
95
+ verbose=False):
96
+
97
+ if 'failure' in im and im['failure'] is not None:
98
+ assert 'detections' not in im or im['detections'] is None
99
+ if verbose:
100
+ print('Skipping labelme file generation for failed image {}'.format(
101
+ im['file']))
102
+ return
103
+
104
+ im_full_path = os.path.join(image_base,im['file'])
105
+ json_path = os.path.splitext(im_full_path)[0] + extension_prefix + '.json'
106
+
107
+ if (not overwrite) and (os.path.isfile(json_path)):
108
+ if verbose:
109
+ print('Skipping existing file {}'.format(json_path))
110
+ return
111
+
112
+ output_dict = get_labelme_dict_for_image(im,
113
+ image_base_name=os.path.basename(im_full_path),
114
+ category_id_to_name=category_id_to_name,
115
+ info=info,
116
+ confidence_threshold=confidence_threshold)
117
+
118
+ with open(json_path,'w') as f:
119
+ json.dump(output_dict,f,indent=1)
120
+
121
+ # ...def write_output_for_image(...)
122
+
123
+
124
+
82
125
  def md_to_labelme(results_file,image_base,confidence_threshold=None,
83
- overwrite=False):
126
+ overwrite=False,extension_prefix='',n_workers=1,
127
+ use_threads=False,bypass_image_size_read=False,
128
+ verbose=False):
84
129
  """
85
130
  For all the images in [results_file], write a .json file in labelme format alongside the
86
131
  corresponding relative path within image_base.
132
+
133
+ If non-empty, "extension_prefix" will be inserted before the .json extension.
87
134
  """
88
135
 
89
- # Load MD results
90
- with open(results_file,'r') as f:
91
- md_results = json.load(f)
136
+ if extension_prefix is None:
137
+ extension_prefix = ''
92
138
 
93
- # Read image sizes
94
- #
95
- # TODO: parallelize this loop
96
- #
97
- # im = md_results['images'][0]
98
- for im in tqdm(md_results['images']):
139
+ # Load MD results if necessary
140
+ if isinstance(results_file,dict):
141
+ md_results = results_file
142
+ else:
143
+ print('Loading MD results...')
144
+ with open(results_file,'r') as f:
145
+ md_results = json.load(f)
99
146
 
100
- # Make sure this file exists
101
- im_full_path = os.path.join(image_base,im['file'])
102
- assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
147
+ # Read image sizes if necessary
148
+ if bypass_image_size_read:
103
149
 
104
- # Load w/h information if necessary
105
- if 'height' not in im or 'width' not in im:
106
-
107
- try:
108
- pil_im = open_image(im_full_path)
109
- im['width'] = pil_im.width
110
- im['height'] = pil_im.height
111
- except Exception:
112
- print('Warning: cannot open image {}, treating as a failure during inference'.format(
113
- im_full_path))
114
- if 'failure' not in im:
115
- im['failure'] = 'Failure image access'
116
-
117
- # ...if we need to read w/h information
150
+ print('Bypassing image size read')
118
151
 
119
- # ...for each image
152
+ else:
120
153
 
121
- # Write output
122
- for im in tqdm(md_results['images']):
123
-
124
- if 'failure' in im and im['failure'] is not None:
125
- assert 'detections' not in im
126
- print('Warning: skipping labelme file generation for failed image {}'.format(
127
- im['file']))
128
- continue
154
+ # TODO: parallelize this loop
155
+
156
+ print('Reading image sizes...')
157
+
158
+ # im = md_results['images'][0]
159
+ for im in tqdm(md_results['images']):
160
+
161
+ # Make sure this file exists
162
+ im_full_path = os.path.join(image_base,im['file'])
163
+ assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
164
+
165
+ json_path = os.path.splitext(im_full_path)[0] + extension_prefix + '.json'
166
+
167
+ # Don't even bother reading sizes for files we're not going to generate
168
+ if (not overwrite) and (os.path.isfile(json_path)):
169
+ continue
129
170
 
130
- im_full_path = os.path.join(image_base,im['file'])
131
- json_path = os.path.splitext(im_full_path)[0] + '.json'
171
+ # Load w/h information if necessary
172
+ if 'height' not in im or 'width' not in im:
173
+
174
+ try:
175
+ pil_im = open_image(im_full_path)
176
+ im['width'] = pil_im.width
177
+ im['height'] = pil_im.height
178
+ except Exception:
179
+ print('Warning: cannot open image {}, treating as a failure during inference'.format(
180
+ im_full_path))
181
+ if 'failure' not in im:
182
+ im['failure'] = 'Failure image access'
183
+
184
+ # ...if we need to read w/h information
185
+
186
+ # ...for each image
132
187
 
133
- if (not overwrite) and (os.path.isfile(json_path)):
134
- print('Skipping existing file {}'.format(json_path))
135
- continue
188
+ # ...if we're not bypassing image size read
136
189
 
137
- output_dict = get_labelme_dict_for_image(im,
138
- image_base_name=os.path.basename(im_full_path),
139
- category_id_to_name=md_results['detection_categories'],
140
- info=md_results['info'],
141
- confidence_threshold=confidence_threshold)
142
-
143
- with open(json_path,'w') as f:
144
- json.dump(output_dict,f,indent=1)
190
+ print('\nGenerating labelme files...')
191
+
192
+ # Write output
193
+ if n_workers <= 1:
194
+ for im in tqdm(md_results['images']):
195
+ _write_output_for_image(im,image_base,extension_prefix,md_results['info'],confidence_threshold,
196
+ md_results['detection_categories'],overwrite,verbose)
197
+ else:
198
+ if use_threads:
199
+ print('Starting parallel thread pool with {} workers'.format(n_workers))
200
+ pool = ThreadPool(n_workers)
201
+ else:
202
+ print('Starting parallel process pool with {} workers'.format(n_workers))
203
+ pool = Pool(n_workers)
204
+ _ = list(tqdm(pool.imap(
205
+ partial(_write_output_for_image,
206
+ image_base=image_base,extension_prefix=extension_prefix,
207
+ info=md_results['info'],confidence_threshold=confidence_threshold,
208
+ category_id_to_name=md_results['detection_categories'],
209
+ overwrite=overwrite,verbose=verbose),
210
+ md_results['images']),
211
+ total=len(md_results['images'])))
145
212
 
146
213
  # ...for each image
147
214
 
@@ -3,9 +3,12 @@
3
3
  # merge_detections.py
4
4
  #
5
5
  # Merge high-confidence detections from one or more results files into another
6
- # file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
6
+ # file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
7
7
  # results file from MDv5a.
8
8
  #
9
+ # Detection categories must be the same in both files; if you want to first remap
10
+ # one file's category mapping to be the same as another's, see remap_detection_categories.
11
+ #
9
12
  # If you want to literally merge two .json files, see combine_api_outputs.py.
10
13
  #
11
14
  ########
@@ -30,7 +33,7 @@ class MergeDetectionsOptions:
30
33
 
31
34
  self.max_detection_size = 1.01
32
35
  self.min_detection_size = 0
33
- self.source_confidence_thresholds = [0.2]
36
+ self.source_confidence_thresholds = [0.05]
34
37
 
35
38
  # Don't bother merging into target images if there is a similar detection
36
39
  # above this threshold (or if there is *any* detection above this threshold,
@@ -38,7 +41,7 @@ class MergeDetectionsOptions:
38
41
  self.target_confidence_threshold = 0.2
39
42
 
40
43
  # If you want to merge only certain categories, specify one
41
- # (but not both) of these.
44
+ # (but not both) of these. These are category IDs, not names.
42
45
  self.categories_to_include = None
43
46
  self.categories_to_exclude = None
44
47
 
@@ -47,11 +50,28 @@ class MergeDetectionsOptions:
47
50
  self.merge_empty_only = False
48
51
 
49
52
  self.iou_threshold = 0.65
53
+
54
+ self.overwrite = False
50
55
 
51
56
 
52
57
  #%% Main function
53
58
 
54
59
  def merge_detections(source_files,target_file,output_file,options=None):
60
+ """
61
+ Merge high-confidence detections from one or more results files into another
62
+ file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
63
+ results file from MDv5a.
64
+
65
+ [source_files] (a list of files or a single filename) specifies the set of
66
+ results files that will be merged into [target_file]. The difference between a
67
+ "source file" and the "target file" is that if no merging is necessary, either because
68
+ two boxes are nearly identical or because merge_only_empty is True and the target
69
+ file already has above-threshold detection for an image+category, the output file gets
70
+ the results of the "target" file. I.e., the "target" file wins all ties.
71
+
72
+ The results are written to [output_file].
73
+
74
+ """
55
75
 
56
76
  if isinstance(source_files,str):
57
77
  source_files = [source_files]
@@ -59,6 +79,10 @@ def merge_detections(source_files,target_file,output_file,options=None):
59
79
  if options is None:
60
80
  options = MergeDetectionsOptions()
61
81
 
82
+ if (not options.overwrite) and (os.path.isfile(output_file)):
83
+ print('File {} exists, bypassing merge'.format(output_file))
84
+ return
85
+
62
86
  assert not ((options.categories_to_exclude is not None) and \
63
87
  (options.categories_to_include is not None)), \
64
88
  'categories_to_include and categories_to_exclude are mutually exclusive'
@@ -133,7 +157,8 @@ def merge_detections(source_files,target_file,output_file,options=None):
133
157
  output_data['info']['detections_transferred_from'].append(os.path.basename(source_file))
134
158
  output_data['info']['detector'] = output_data['info']['detector'] + ' + ' + source_detector_name
135
159
 
136
- assert source_data['detection_categories'] == output_data['detection_categories']
160
+ assert source_data['detection_categories'] == output_data['detection_categories'], \
161
+ 'Cannot merge files with different detection category maps'
137
162
 
138
163
  source_confidence_threshold = options.source_confidence_thresholds[i_source_file]
139
164
 
@@ -246,7 +271,7 @@ def merge_detections(source_files,target_file,output_file,options=None):
246
271
  # ...for each source file
247
272
 
248
273
  with open(output_file,'w') as f:
249
- json.dump(output_data,f,indent=2)
274
+ json.dump(output_data,f,indent=1)
250
275
 
251
276
  print('Saved merged results to {}'.format(output_file))
252
277