megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (75) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +297 -202
  2. api/batch_processing/data_preparation/manage_video_batch.py +7 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
  5. api/batch_processing/postprocessing/compare_batch_results.py +111 -61
  6. api/batch_processing/postprocessing/convert_output_format.py +24 -6
  7. api/batch_processing/postprocessing/load_api_results.py +56 -72
  8. api/batch_processing/postprocessing/md_to_labelme.py +119 -51
  9. api/batch_processing/postprocessing/merge_detections.py +30 -5
  10. api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
  11. api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
  12. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
  13. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  14. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  15. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
  16. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  17. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  18. classification/prepare_classification_script.py +191 -191
  19. data_management/cct_json_utils.py +7 -2
  20. data_management/coco_to_labelme.py +263 -0
  21. data_management/coco_to_yolo.py +72 -48
  22. data_management/databases/integrity_check_json_db.py +75 -64
  23. data_management/databases/subset_json_db.py +1 -1
  24. data_management/generate_crops_from_cct.py +1 -1
  25. data_management/get_image_sizes.py +44 -26
  26. data_management/importers/animl_results_to_md_results.py +3 -5
  27. data_management/importers/noaa_seals_2019.py +2 -2
  28. data_management/importers/zamba_results_to_md_results.py +2 -2
  29. data_management/labelme_to_coco.py +264 -127
  30. data_management/labelme_to_yolo.py +96 -53
  31. data_management/lila/create_lila_blank_set.py +557 -0
  32. data_management/lila/create_lila_test_set.py +2 -1
  33. data_management/lila/create_links_to_md_results_files.py +1 -1
  34. data_management/lila/download_lila_subset.py +138 -45
  35. data_management/lila/generate_lila_per_image_labels.py +23 -14
  36. data_management/lila/get_lila_annotation_counts.py +16 -10
  37. data_management/lila/lila_common.py +15 -42
  38. data_management/lila/test_lila_metadata_urls.py +116 -0
  39. data_management/read_exif.py +65 -16
  40. data_management/remap_coco_categories.py +84 -0
  41. data_management/resize_coco_dataset.py +14 -31
  42. data_management/wi_download_csv_to_coco.py +239 -0
  43. data_management/yolo_output_to_md_output.py +40 -13
  44. data_management/yolo_to_coco.py +313 -100
  45. detection/process_video.py +36 -14
  46. detection/pytorch_detector.py +1 -1
  47. detection/run_detector.py +73 -18
  48. detection/run_detector_batch.py +116 -27
  49. detection/run_inference_with_yolov5_val.py +135 -27
  50. detection/run_tiled_inference.py +153 -43
  51. detection/tf_detector.py +2 -1
  52. detection/video_utils.py +4 -2
  53. md_utils/ct_utils.py +101 -6
  54. md_utils/md_tests.py +264 -17
  55. md_utils/path_utils.py +326 -47
  56. md_utils/process_utils.py +26 -7
  57. md_utils/split_locations_into_train_val.py +215 -0
  58. md_utils/string_utils.py +10 -0
  59. md_utils/url_utils.py +66 -3
  60. md_utils/write_html_image_list.py +12 -2
  61. md_visualization/visualization_utils.py +380 -74
  62. md_visualization/visualize_db.py +41 -10
  63. md_visualization/visualize_detector_output.py +185 -104
  64. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
  65. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
  66. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
  67. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  68. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  69. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  70. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  71. taxonomy_mapping/species_lookup.py +33 -13
  72. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  73. md_visualization/visualize_megadb.py +0 -183
  74. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
  75. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
@@ -2,17 +2,18 @@
2
2
  #
3
3
  # load_api_results.py
4
4
  #
5
- # Loads the output of the batch processing API (json) into a pandas dataframe.
5
+ # DEPRECATED
6
6
  #
7
- # Also functions to group entries by seq_id.
7
+ # As of 2023.12, this module is used in postprocessing and RDE. Not recommended
8
+ # for new code.
8
9
  #
9
- # Includes the deprecated functions that worked with the old CSV API output format.
10
+ # Loads the output of the batch processing API (json) into a Pandas dataframe.
11
+ #
12
+ # Includes functions to read/write the (very very old) .csv results format.
10
13
  #
11
14
  ########
12
15
 
13
- #%% Constants and imports
14
-
15
- from collections import defaultdict
16
+ #%% Imports
16
17
 
17
18
  import json
18
19
  import os
@@ -23,72 +24,32 @@ import pandas as pd
23
24
 
24
25
  from md_utils import ct_utils
25
26
 
26
- headers = ['image_path', 'max_confidence', 'detections']
27
-
28
-
29
- #%% Functions for grouping by sequence_id
30
-
31
- def ss_file_to_file_name(f):
32
- # example
33
- # input 'file': 'SER/S1/F08/F08_R3/S1_F08_R3_PICT1150.JPG'
34
- # output 'id': 'S1/F08/F08_R3/S1_F08_R3_PICT1150.JPG'
35
- return f.split('SER/')[1].split('.JPG')[0]
36
-
37
-
38
- def caltech_file_to_file_name(f):
39
- return f.split('cct_images/')[1].split('.')[0]
40
-
41
-
42
- def api_results_groupby(api_output_path, gt_db_indexed, file_to_image_id, field='seq_id'):
43
- """
44
- Given the output file of the API, groupby (currently only seq_id).
45
-
46
- Args:
47
- api_output_path: path to the API output json file
48
- gt_db_indexed: an instance of IndexedJsonDb so we know the seq_id to image_id mapping
49
- file_to_image_id: a function that takes in the 'file' field in 'images' in the detector
50
- output file and converts it to the 'id' field in the gt DB.
51
- field: which field in the 'images' array to group by
52
-
53
- Returns:
54
- A dict where the keys are of the field requested, each points to an array
55
- containing entries in the 'images' section of the output file
56
- """
57
-
58
- with open(api_output_path) as f:
59
- detection_results = json.load(f)
60
27
 
61
- res = defaultdict(list)
62
- for i in detection_results['images']:
63
- image_id = file_to_image_id(i['file'])
64
- field_val = gt_db_indexed.image_id_to_image[image_id][field]
65
- res[field_val].append(i)
66
- return res
67
-
68
-
69
- #%% Functions for loading the result as a Pandas DataFrame
28
+ #%% Functions for loading .json results into a Pandas DataFrame, and writing back to .json
70
29
 
71
30
  def load_api_results(api_output_path: str, normalize_paths: bool = True,
72
- filename_replacements: Optional[Mapping[str, str]] = None
31
+ filename_replacements: Optional[Mapping[str, str]] = None,
32
+ force_forward_slashes: bool = True
73
33
  ) -> Tuple[pd.DataFrame, Dict]:
74
34
  """
75
- Loads the json formatted results from the batch processing API to a
76
- Pandas DataFrame, mainly useful for various postprocessing functions.
35
+ Loads json-formatted MegaDetector results to a Pandas DataFrame.
77
36
 
78
37
  Args:
79
- api_output_path: path to the API output json file
38
+ api_output_path: path to the output json file
80
39
  normalize_paths: whether to apply os.path.normpath to the 'file' field
81
40
  in each image entry in the output file
82
41
  filename_replacements: replace some path tokens to match local paths to
83
42
  the original blob structure
43
+ force_forward_slashes: whether to convert backslashes to forward slashes
44
+ in filenames
84
45
 
85
46
  Returns:
86
47
  detection_results: pd.DataFrame, contains at least the columns:
87
- ['file', 'detections','failure']
48
+ ['file', 'detections','failure']
88
49
  other_fields: a dict containing fields in the results other than 'images'
89
50
  """
90
51
 
91
- print('Loading API results from {}'.format(api_output_path))
52
+ print('Loading results from {}'.format(api_output_path))
92
53
 
93
54
  with open(api_output_path) as f:
94
55
  detection_results = json.load(f)
@@ -97,18 +58,20 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
97
58
  for s in ['info', 'detection_categories', 'images']:
98
59
  assert s in detection_results, 'Missing field {} in detection results'.format(s)
99
60
 
100
- # Fields in the API output json other than 'images'
61
+ # Fields in the output json other than 'images'
101
62
  other_fields = {}
102
63
  for k, v in detection_results.items():
103
64
  if k != 'images':
104
65
  other_fields[k] = v
105
66
 
106
- # Normalize paths to simplify comparisons later
107
67
  if normalize_paths:
108
68
  for image in detection_results['images']:
109
- image['file'] = os.path.normpath(image['file'])
110
- # image['file'] = image['file'].replace('\\','/')
69
+ image['file'] = os.path.normpath(image['file'])
111
70
 
71
+ if force_forward_slashes:
72
+ for image in detection_results['images']:
73
+ image['file'] = image['file'].replace('\\','/')
74
+
112
75
  # Replace some path tokens to match local paths to original blob structure
113
76
  if filename_replacements is not None:
114
77
  for string_to_replace in filename_replacements.keys():
@@ -127,9 +90,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
127
90
  # Pack the json output into a Pandas DataFrame
128
91
  detection_results = pd.DataFrame(detection_results['images'])
129
92
 
130
-
131
-
132
- print('Finished loading API results for {} images from {}'.format(
93
+ print('Finished loading MegaDetector results for {} images from {}'.format(
133
94
  len(detection_results),api_output_path))
134
95
 
135
96
  return detection_results, other_fields
@@ -137,7 +98,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
137
98
 
138
99
  def write_api_results(detection_results_table, other_fields, out_path):
139
100
  """
140
- Writes a Pandas DataFrame back to a json that is compatible with the API output format.
101
+ Writes a Pandas DataFrame to the MegaDetector .json format.
141
102
  """
142
103
 
143
104
  print('Writing detection results to {}'.format(out_path))
@@ -148,6 +109,27 @@ def write_api_results(detection_results_table, other_fields, out_path):
148
109
  double_precision=3)
149
110
  images = json.loads(images)
150
111
  fields['images'] = images
112
+
113
+ # Convert the 'version' field back to a string as per format convention
114
+ try:
115
+ version = other_fields['info']['format_version']
116
+ if not isinstance(version,str):
117
+ other_fields['info']['format_version'] = str(version)
118
+ except Exception:
119
+ print('Warning: error determining format version')
120
+ pass
121
+
122
+ # Remove 'max_detection_conf' as per newer file convention (format >= v1.3)
123
+ try:
124
+ version = other_fields['info']['format_version']
125
+ version = float(version)
126
+ if version >= 1.3:
127
+ for im in images:
128
+ if 'max_detection_conf' in im:
129
+ del im['max_detection_conf']
130
+ except Exception:
131
+ print('Warning: error removing max_detection_conf from output')
132
+ pass
151
133
 
152
134
  with open(out_path, 'w') as f:
153
135
  json.dump(fields, f, indent=1)
@@ -157,15 +139,16 @@ def write_api_results(detection_results_table, other_fields, out_path):
157
139
 
158
140
  def load_api_results_csv(filename, normalize_paths=True, filename_replacements={}, nrows=None):
159
141
  """
160
- DEPRECATED
161
- Loads .csv-formatted results from the batch processing API to a pandas table
142
+ [DEPRECATED]
143
+
144
+ Loads .csv-formatted MegaDetector results to a pandas table
162
145
  """
163
146
 
164
- print('Loading API results from {}'.format(filename))
147
+ print('Loading MegaDetector results from {}'.format(filename))
165
148
 
166
149
  detection_results = pd.read_csv(filename,nrows=nrows)
167
150
 
168
- print('De-serializing API results from {}'.format(filename))
151
+ print('De-serializing MegaDetector results from {}'.format(filename))
169
152
 
170
153
  # Confirm that this is really a detector output file
171
154
  for s in ['image_path','max_confidence','detections']:
@@ -191,17 +174,18 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
191
174
  fn = fn.replace(string_to_replace,replacement_string)
192
175
  detection_results.at[iRow,'image_path'] = fn
193
176
 
194
- print('Finished loading and de-serializing API results for {} images from {}'.format(
177
+ print('Finished loading and de-serializing MD results for {} images from {}'.format(
195
178
  len(detection_results),filename))
196
179
 
197
180
  return detection_results
198
181
 
199
182
 
200
183
  def write_api_results_csv(detection_results, filename):
201
- """
202
- DEPRECATED
203
- Writes a pandas table to csv in a way that's compatible with the .csv API output
204
- format. Currently just a wrapper around to_csv that just forces output writing
184
+ """
185
+ [DEPRECATED]
186
+
187
+ Writes a Pandas table to csv in a way that's compatible with the .csv output
188
+ format. Currently just a wrapper around to_csv that forces output writing
205
189
  to go through a common code path.
206
190
  """
207
191
 
@@ -20,6 +20,10 @@ import json
20
20
 
21
21
  from tqdm import tqdm
22
22
 
23
+ from multiprocessing.pool import Pool
24
+ from multiprocessing.pool import ThreadPool
25
+ from functools import partial
26
+
23
27
  from md_visualization.visualization_utils import open_image
24
28
  from md_utils.ct_utils import truncate_float
25
29
 
@@ -29,15 +33,21 @@ default_confidence_threshold = 0.15
29
33
 
30
34
  #%% Functions
31
35
 
32
- def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,confidence_threshold=None):
36
+ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,
37
+ info=None,confidence_threshold=None):
33
38
  """
34
39
  For the given image struct in MD results format, reformat the detections into
35
40
  labelme format. Returns a dict.
41
+
42
+ 'height' and 'width' are required in [im].
43
+
44
+ image_base_name is written directly to the 'imagePath' field in the output; it should generally be
45
+ os.path.basename(your_image_file).
36
46
  """
37
47
 
38
48
  if confidence_threshold is None:
39
49
  confidence_threshold = -1.0
40
-
50
+
41
51
  output_dict = {}
42
52
  if info is not None:
43
53
  output_dict['detector_info'] = info
@@ -48,7 +58,9 @@ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,
48
58
  output_dict['imageHeight'] = im['height']
49
59
  output_dict['imageWidth'] = im['width']
50
60
  output_dict['imageData'] = None
61
+ output_dict['detections'] = im['detections']
51
62
 
63
+ # det = im['detections'][1]
52
64
  for det in im['detections']:
53
65
 
54
66
  if det['conf'] < confidence_threshold:
@@ -78,69 +90,125 @@ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,
78
90
  # ...def get_labelme_dict_for_image()
79
91
 
80
92
 
93
+ def _write_output_for_image(im,image_base,extension_prefix,info,
94
+ confidence_threshold,category_id_to_name,overwrite,
95
+ verbose=False):
96
+
97
+ if 'failure' in im and im['failure'] is not None:
98
+ assert 'detections' not in im or im['detections'] is None
99
+ if verbose:
100
+ print('Skipping labelme file generation for failed image {}'.format(
101
+ im['file']))
102
+ return
103
+
104
+ im_full_path = os.path.join(image_base,im['file'])
105
+ json_path = os.path.splitext(im_full_path)[0] + extension_prefix + '.json'
106
+
107
+ if (not overwrite) and (os.path.isfile(json_path)):
108
+ if verbose:
109
+ print('Skipping existing file {}'.format(json_path))
110
+ return
111
+
112
+ output_dict = get_labelme_dict_for_image(im,
113
+ image_base_name=os.path.basename(im_full_path),
114
+ category_id_to_name=category_id_to_name,
115
+ info=info,
116
+ confidence_threshold=confidence_threshold)
117
+
118
+ with open(json_path,'w') as f:
119
+ json.dump(output_dict,f,indent=1)
120
+
121
+ # ...def write_output_for_image(...)
122
+
123
+
124
+
81
125
  def md_to_labelme(results_file,image_base,confidence_threshold=None,
82
- overwrite=False):
126
+ overwrite=False,extension_prefix='',n_workers=1,
127
+ use_threads=False,bypass_image_size_read=False,
128
+ verbose=False):
83
129
  """
84
130
  For all the images in [results_file], write a .json file in labelme format alongside the
85
131
  corresponding relative path within image_base.
132
+
133
+ If non-empty, "extension_prefix" will be inserted before the .json extension.
86
134
  """
87
135
 
88
- # Load MD results
89
- with open(results_file,'r') as f:
90
- md_results = json.load(f)
136
+ if extension_prefix is None:
137
+ extension_prefix = ''
91
138
 
92
- # Read image sizes
93
- #
94
- # TODO: parallelize this loop
95
- #
96
- # im = md_results['images'][0]
97
- for im in tqdm(md_results['images']):
139
+ # Load MD results if necessary
140
+ if isinstance(results_file,dict):
141
+ md_results = results_file
142
+ else:
143
+ print('Loading MD results...')
144
+ with open(results_file,'r') as f:
145
+ md_results = json.load(f)
98
146
 
99
- # Make sure this file exists
100
- im_full_path = os.path.join(image_base,im['file'])
101
- assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
147
+ # Read image sizes if necessary
148
+ if bypass_image_size_read:
102
149
 
103
- # Load w/h information if necessary
104
- if 'height' not in im or 'width' not in im:
105
-
106
- try:
107
- pil_im = open_image(im_full_path)
108
- im['width'] = pil_im.width
109
- im['height'] = pil_im.height
110
- except Exception:
111
- print('Warning: cannot open image {}, treating as a failure during inference'.format(
112
- im_full_path))
113
- if 'failure' not in im:
114
- im['failure'] = 'Failure image access'
115
-
116
- # ...if we need to read w/h information
150
+ print('Bypassing image size read')
117
151
 
118
- # ...for each image
152
+ else:
119
153
 
120
- # Write output
121
- for im in tqdm(md_results['images']):
122
-
123
- if 'failure' in im and im['failure'] is not None:
124
- assert 'detections' not in im
125
- print('Warning: skipping labelme file generation for failed image {}'.format(
126
- im['file']))
127
- continue
154
+ # TODO: parallelize this loop
155
+
156
+ print('Reading image sizes...')
157
+
158
+ # im = md_results['images'][0]
159
+ for im in tqdm(md_results['images']):
160
+
161
+ # Make sure this file exists
162
+ im_full_path = os.path.join(image_base,im['file'])
163
+ assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
164
+
165
+ json_path = os.path.splitext(im_full_path)[0] + extension_prefix + '.json'
166
+
167
+ # Don't even bother reading sizes for files we're not going to generate
168
+ if (not overwrite) and (os.path.isfile(json_path)):
169
+ continue
128
170
 
129
- im_full_path = os.path.join(image_base,im['file'])
130
- json_path = os.path.splitext(im_full_path)[0] + '.json'
171
+ # Load w/h information if necessary
172
+ if 'height' not in im or 'width' not in im:
173
+
174
+ try:
175
+ pil_im = open_image(im_full_path)
176
+ im['width'] = pil_im.width
177
+ im['height'] = pil_im.height
178
+ except Exception:
179
+ print('Warning: cannot open image {}, treating as a failure during inference'.format(
180
+ im_full_path))
181
+ if 'failure' not in im:
182
+ im['failure'] = 'Failure image access'
183
+
184
+ # ...if we need to read w/h information
185
+
186
+ # ...for each image
131
187
 
132
- if (not overwrite) and (os.path.isfile(json_path)):
133
- print('Skipping existing file {}'.format(json_path))
134
- continue
188
+ # ...if we're not bypassing image size read
135
189
 
136
- output_dict = get_labelme_dict_for_image(im,
137
- image_base_name=os.path.basename(im_full_path),
138
- category_id_to_name=md_results['detection_categories'],
139
- info=md_results['info'],
140
- confidence_threshold=confidence_threshold)
141
-
142
- with open(json_path,'w') as f:
143
- json.dump(output_dict,f,indent=1)
190
+ print('\nGenerating labelme files...')
191
+
192
+ # Write output
193
+ if n_workers <= 1:
194
+ for im in tqdm(md_results['images']):
195
+ _write_output_for_image(im,image_base,extension_prefix,md_results['info'],confidence_threshold,
196
+ md_results['detection_categories'],overwrite,verbose)
197
+ else:
198
+ if use_threads:
199
+ print('Starting parallel thread pool with {} workers'.format(n_workers))
200
+ pool = ThreadPool(n_workers)
201
+ else:
202
+ print('Starting parallel process pool with {} workers'.format(n_workers))
203
+ pool = Pool(n_workers)
204
+ _ = list(tqdm(pool.imap(
205
+ partial(_write_output_for_image,
206
+ image_base=image_base,extension_prefix=extension_prefix,
207
+ info=md_results['info'],confidence_threshold=confidence_threshold,
208
+ category_id_to_name=md_results['detection_categories'],
209
+ overwrite=overwrite,verbose=verbose),
210
+ md_results['images']),
211
+ total=len(md_results['images'])))
144
212
 
145
213
  # ...for each image
146
214
 
@@ -3,9 +3,12 @@
3
3
  # merge_detections.py
4
4
  #
5
5
  # Merge high-confidence detections from one or more results files into another
6
- # file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
6
+ # file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
7
7
  # results file from MDv5a.
8
8
  #
9
+ # Detection categories must be the same in both files; if you want to first remap
10
+ # one file's category mapping to be the same as another's, see remap_detection_categories.
11
+ #
9
12
  # If you want to literally merge two .json files, see combine_api_outputs.py.
10
13
  #
11
14
  ########
@@ -30,7 +33,7 @@ class MergeDetectionsOptions:
30
33
 
31
34
  self.max_detection_size = 1.01
32
35
  self.min_detection_size = 0
33
- self.source_confidence_thresholds = [0.2]
36
+ self.source_confidence_thresholds = [0.05]
34
37
 
35
38
  # Don't bother merging into target images if there is a similar detection
36
39
  # above this threshold (or if there is *any* detection above this threshold,
@@ -38,7 +41,7 @@ class MergeDetectionsOptions:
38
41
  self.target_confidence_threshold = 0.2
39
42
 
40
43
  # If you want to merge only certain categories, specify one
41
- # (but not both) of these.
44
+ # (but not both) of these. These are category IDs, not names.
42
45
  self.categories_to_include = None
43
46
  self.categories_to_exclude = None
44
47
 
@@ -47,11 +50,28 @@ class MergeDetectionsOptions:
47
50
  self.merge_empty_only = False
48
51
 
49
52
  self.iou_threshold = 0.65
53
+
54
+ self.overwrite = False
50
55
 
51
56
 
52
57
  #%% Main function
53
58
 
54
59
  def merge_detections(source_files,target_file,output_file,options=None):
60
+ """
61
+ Merge high-confidence detections from one or more results files into another
62
+ file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
63
+ results file from MDv5a.
64
+
65
+ [source_files] (a list of files or a single filename) specifies the set of
66
+ results files that will be merged into [target_file]. The difference between a
67
+ "source file" and the "target file" is that if no merging is necessary, either because
68
+ two boxes are nearly identical or because merge_only_empty is True and the target
69
+ file already has above-threshold detection for an image+category, the output file gets
70
+ the results of the "target" file. I.e., the "target" file wins all ties.
71
+
72
+ The results are written to [output_file].
73
+
74
+ """
55
75
 
56
76
  if isinstance(source_files,str):
57
77
  source_files = [source_files]
@@ -59,6 +79,10 @@ def merge_detections(source_files,target_file,output_file,options=None):
59
79
  if options is None:
60
80
  options = MergeDetectionsOptions()
61
81
 
82
+ if (not options.overwrite) and (os.path.isfile(output_file)):
83
+ print('File {} exists, bypassing merge'.format(output_file))
84
+ return
85
+
62
86
  assert not ((options.categories_to_exclude is not None) and \
63
87
  (options.categories_to_include is not None)), \
64
88
  'categories_to_include and categories_to_exclude are mutually exclusive'
@@ -133,7 +157,8 @@ def merge_detections(source_files,target_file,output_file,options=None):
133
157
  output_data['info']['detections_transferred_from'].append(os.path.basename(source_file))
134
158
  output_data['info']['detector'] = output_data['info']['detector'] + ' + ' + source_detector_name
135
159
 
136
- assert source_data['detection_categories'] == output_data['detection_categories']
160
+ assert source_data['detection_categories'] == output_data['detection_categories'], \
161
+ 'Cannot merge files with different detection category maps'
137
162
 
138
163
  source_confidence_threshold = options.source_confidence_thresholds[i_source_file]
139
164
 
@@ -246,7 +271,7 @@ def merge_detections(source_files,target_file,output_file,options=None):
246
271
  # ...for each source file
247
272
 
248
273
  with open(output_file,'w') as f:
249
- json.dump(output_data,f,indent=2)
274
+ json.dump(output_data,f,indent=1)
250
275
 
251
276
  print('Saved merged results to {}'.format(output_file))
252
277