megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,17 +1,17 @@
1
- ########
2
- #
3
- # md_to_labelme.py
4
- #
5
- # "Converts" a MegaDetector output .json file to labelme format (one .json per image
6
- # file). "Convert" is in quotes because this is an opinionated transformation that
7
- # requires a confidence threshold.
8
- #
9
- # TODO:
10
- #
11
- # * support variable confidence thresholds across classes
12
- # * support classification data
13
- #
14
- ########
1
+ """
2
+
3
+ md_to_labelme.py
4
+
5
+ "Converts" a MegaDetector output .json file to labelme format (one .json per image
6
+ file). "Convert" is in quotes because this is an opinionated transformation that
7
+ requires a confidence threshold.
8
+
9
+ TODO:
10
+
11
+ * support variable confidence thresholds across classes
12
+ * support classification data
13
+
14
+ """
15
15
 
16
16
  #%% Imports and constants
17
17
 
@@ -20,8 +20,13 @@ import json
20
20
 
21
21
  from tqdm import tqdm
22
22
 
23
+ from multiprocessing.pool import Pool
24
+ from multiprocessing.pool import ThreadPool
25
+ from functools import partial
26
+
23
27
  from md_visualization.visualization_utils import open_image
24
28
  from md_utils.ct_utils import truncate_float
29
+ from detection.run_detector import DEFAULT_DETECTOR_LABEL_MAP
25
30
 
26
31
  output_precision = 3
27
32
  default_confidence_threshold = 0.15
@@ -29,15 +34,36 @@ default_confidence_threshold = 0.15
29
34
 
30
35
  #%% Functions
31
36
 
32
- def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,confidence_threshold=None):
37
+ def get_labelme_dict_for_image(im,image_base_name=None,category_id_to_name=None,
38
+ info=None,confidence_threshold=None):
33
39
  """
34
40
  For the given image struct in MD results format, reformat the detections into
35
- labelme format. Returns a dict.
41
+ labelme format.
42
+
43
+ Args:
44
+ im (dict): MegaDetector-formatted results dict, must include 'height' and 'width' fields
45
+ image_base_name (str, optional): written directly to the 'imagePath' field in the output;
46
+ defaults to os.path.basename(im['file']).
47
+ category_id_to_name (dict, optional): maps string-int category IDs to category names, defaults
48
+ to the standard MD categories
49
+ info (dict, optional): arbitrary metadata to write to the "detector_info" field in the output
50
+ dict
51
+ confidence_threshold (float, optional): only detections at or above this confidence threshold
52
+ will be included in the output dict
53
+
54
+ Return:
55
+ dict: labelme-formatted dictionary, suitable for writing directly to a labelme-formatted .json file
36
56
  """
37
57
 
58
+ if image_base_name is None:
59
+ image_base_name = os.path.basename(im['file'])
60
+
61
+ if category_id_to_name:
62
+ category_id_to_name = DEFAULT_DETECTOR_LABEL_MAP
63
+
38
64
  if confidence_threshold is None:
39
65
  confidence_threshold = -1.0
40
-
66
+
41
67
  output_dict = {}
42
68
  if info is not None:
43
69
  output_dict['detector_info'] = info
@@ -50,6 +76,7 @@ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,
50
76
  output_dict['imageData'] = None
51
77
  output_dict['detections'] = im['detections']
52
78
 
79
+ # det = im['detections'][1]
53
80
  for det in im['detections']:
54
81
 
55
82
  if det['conf'] < confidence_threshold:
@@ -79,69 +106,140 @@ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,
79
106
  # ...def get_labelme_dict_for_image()
80
107
 
81
108
 
109
+ def _write_output_for_image(im,image_base,extension_prefix,info,
110
+ confidence_threshold,category_id_to_name,overwrite,
111
+ verbose=False):
112
+
113
+ if 'failure' in im and im['failure'] is not None:
114
+ assert 'detections' not in im or im['detections'] is None
115
+ if verbose:
116
+ print('Skipping labelme file generation for failed image {}'.format(
117
+ im['file']))
118
+ return
119
+
120
+ im_full_path = os.path.join(image_base,im['file'])
121
+ json_path = os.path.splitext(im_full_path)[0] + extension_prefix + '.json'
122
+
123
+ if (not overwrite) and (os.path.isfile(json_path)):
124
+ if verbose:
125
+ print('Skipping existing file {}'.format(json_path))
126
+ return
127
+
128
+ output_dict = get_labelme_dict_for_image(im,
129
+ image_base_name=os.path.basename(im_full_path),
130
+ category_id_to_name=category_id_to_name,
131
+ info=info,
132
+ confidence_threshold=confidence_threshold)
133
+
134
+ with open(json_path,'w') as f:
135
+ json.dump(output_dict,f,indent=1)
136
+
137
+ # ...def write_output_for_image(...)
138
+
139
+
140
+
82
141
  def md_to_labelme(results_file,image_base,confidence_threshold=None,
83
- overwrite=False):
142
+ overwrite=False,extension_prefix='',n_workers=1,
143
+ use_threads=False,bypass_image_size_read=False,
144
+ verbose=False):
84
145
  """
85
146
  For all the images in [results_file], write a .json file in labelme format alongside the
86
147
  corresponding relative path within image_base.
148
+
149
+ Args:
150
+ results_file (str): MD results .json file to convert to Labelme format
151
+ image_base (str): folder of images; filenames in [results_file] should be relative to
152
+ this folder
153
+ confidence_threshold (float, optional): only detections at or above this confidence threshold
154
+ will be included in the output dict
155
+ overwrite (bool, optional): whether to overwrite existing output files; if this is False
156
+ and the output file for an image exists, we'll skip that image
157
+ extension_prefix (str, optional): if non-empty, "extension_prefix" will be inserted before the .json
158
+ extension
159
+ n_workers (int, optional): enables multiprocessing if > 1
160
+ use_threads (bool, optional): if [n_workers] > 1, determines whether we parallelize via threads (True)
161
+ or processes (False)
162
+ bypass_image_size_read (bool, optional): if True, skips reading image sizes and trusts whatever is in
163
+ the MD results file (don't set this to "True" if your MD results file doesn't contain image sizes)
164
+ verbose (bool, optional): enables additionald ebug output
87
165
  """
88
166
 
89
- # Load MD results
90
- with open(results_file,'r') as f:
91
- md_results = json.load(f)
167
+ if extension_prefix is None:
168
+ extension_prefix = ''
92
169
 
93
- # Read image sizes
94
- #
95
- # TODO: parallelize this loop
96
- #
97
- # im = md_results['images'][0]
98
- for im in tqdm(md_results['images']):
170
+ # Load MD results if necessary
171
+ if isinstance(results_file,dict):
172
+ md_results = results_file
173
+ else:
174
+ print('Loading MD results...')
175
+ with open(results_file,'r') as f:
176
+ md_results = json.load(f)
99
177
 
100
- # Make sure this file exists
101
- im_full_path = os.path.join(image_base,im['file'])
102
- assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
178
+ # Read image sizes if necessary
179
+ if bypass_image_size_read:
103
180
 
104
- # Load w/h information if necessary
105
- if 'height' not in im or 'width' not in im:
106
-
107
- try:
108
- pil_im = open_image(im_full_path)
109
- im['width'] = pil_im.width
110
- im['height'] = pil_im.height
111
- except Exception:
112
- print('Warning: cannot open image {}, treating as a failure during inference'.format(
113
- im_full_path))
114
- if 'failure' not in im:
115
- im['failure'] = 'Failure image access'
116
-
117
- # ...if we need to read w/h information
181
+ print('Bypassing image size read')
118
182
 
119
- # ...for each image
183
+ else:
120
184
 
121
- # Write output
122
- for im in tqdm(md_results['images']):
123
-
124
- if 'failure' in im and im['failure'] is not None:
125
- assert 'detections' not in im
126
- print('Warning: skipping labelme file generation for failed image {}'.format(
127
- im['file']))
128
- continue
185
+ # TODO: parallelize this loop
186
+
187
+ print('Reading image sizes...')
188
+
189
+ # im = md_results['images'][0]
190
+ for im in tqdm(md_results['images']):
191
+
192
+ # Make sure this file exists
193
+ im_full_path = os.path.join(image_base,im['file'])
194
+ assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
195
+
196
+ json_path = os.path.splitext(im_full_path)[0] + extension_prefix + '.json'
197
+
198
+ # Don't even bother reading sizes for files we're not going to generate
199
+ if (not overwrite) and (os.path.isfile(json_path)):
200
+ continue
129
201
 
130
- im_full_path = os.path.join(image_base,im['file'])
131
- json_path = os.path.splitext(im_full_path)[0] + '.json'
202
+ # Load w/h information if necessary
203
+ if 'height' not in im or 'width' not in im:
204
+
205
+ try:
206
+ pil_im = open_image(im_full_path)
207
+ im['width'] = pil_im.width
208
+ im['height'] = pil_im.height
209
+ except Exception:
210
+ print('Warning: cannot open image {}, treating as a failure during inference'.format(
211
+ im_full_path))
212
+ if 'failure' not in im:
213
+ im['failure'] = 'Failure image access'
214
+
215
+ # ...if we need to read w/h information
216
+
217
+ # ...for each image
132
218
 
133
- if (not overwrite) and (os.path.isfile(json_path)):
134
- print('Skipping existing file {}'.format(json_path))
135
- continue
219
+ # ...if we're not bypassing image size read
136
220
 
137
- output_dict = get_labelme_dict_for_image(im,
138
- image_base_name=os.path.basename(im_full_path),
139
- category_id_to_name=md_results['detection_categories'],
140
- info=md_results['info'],
141
- confidence_threshold=confidence_threshold)
142
-
143
- with open(json_path,'w') as f:
144
- json.dump(output_dict,f,indent=1)
221
+ print('\nGenerating labelme files...')
222
+
223
+ # Write output
224
+ if n_workers <= 1:
225
+ for im in tqdm(md_results['images']):
226
+ _write_output_for_image(im,image_base,extension_prefix,md_results['info'],confidence_threshold,
227
+ md_results['detection_categories'],overwrite,verbose)
228
+ else:
229
+ if use_threads:
230
+ print('Starting parallel thread pool with {} workers'.format(n_workers))
231
+ pool = ThreadPool(n_workers)
232
+ else:
233
+ print('Starting parallel process pool with {} workers'.format(n_workers))
234
+ pool = Pool(n_workers)
235
+ _ = list(tqdm(pool.imap(
236
+ partial(_write_output_for_image,
237
+ image_base=image_base,extension_prefix=extension_prefix,
238
+ info=md_results['info'],confidence_threshold=confidence_threshold,
239
+ category_id_to_name=md_results['detection_categories'],
240
+ overwrite=overwrite,verbose=verbose),
241
+ md_results['images']),
242
+ total=len(md_results['images'])))
145
243
 
146
244
  # ...for each image
147
245
 
@@ -227,7 +325,6 @@ def main():
227
325
  args = parser.parse_args()
228
326
 
229
327
  md_to_labelme(args.results_file,args.image_base,args.confidence_threshold,args.overwrite)
230
-
231
-
328
+
232
329
  if __name__ == '__main__':
233
330
  main()
@@ -1,14 +1,17 @@
1
- ########
2
- #
3
- # merge_detections.py
4
- #
5
- # Merge high-confidence detections from one or more results files into another
6
- # file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
7
- # results file from MDv5a.
8
- #
9
- # If you want to literally merge two .json files, see combine_api_outputs.py.
10
- #
11
- ########
1
+ """
2
+
3
+ merge_detections.py
4
+
5
+ Merge high-confidence detections from one or more results files into another
6
+ file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
7
+ results file from MDv5a.
8
+
9
+ Detection categories must be the same in both files; if you want to first remap
10
+ one file's category mapping to be the same as another's, see remap_detection_categories.
11
+
12
+ If you want to literally merge two .json files, see combine_api_outputs.py.
13
+
14
+ """
12
15
 
13
16
  #%% Constants and imports
14
17
 
@@ -30,7 +33,7 @@ class MergeDetectionsOptions:
30
33
 
31
34
  self.max_detection_size = 1.01
32
35
  self.min_detection_size = 0
33
- self.source_confidence_thresholds = [0.2]
36
+ self.source_confidence_thresholds = [0.05]
34
37
 
35
38
  # Don't bother merging into target images if there is a similar detection
36
39
  # above this threshold (or if there is *any* detection above this threshold,
@@ -38,7 +41,7 @@ class MergeDetectionsOptions:
38
41
  self.target_confidence_threshold = 0.2
39
42
 
40
43
  # If you want to merge only certain categories, specify one
41
- # (but not both) of these.
44
+ # (but not both) of these. These are category IDs, not names.
42
45
  self.categories_to_include = None
43
46
  self.categories_to_exclude = None
44
47
 
@@ -47,11 +50,28 @@ class MergeDetectionsOptions:
47
50
  self.merge_empty_only = False
48
51
 
49
52
  self.iou_threshold = 0.65
53
+
54
+ self.overwrite = False
50
55
 
51
56
 
52
57
  #%% Main function
53
58
 
54
59
  def merge_detections(source_files,target_file,output_file,options=None):
60
+ """
61
+ Merge high-confidence detections from one or more results files into another
62
+ file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
63
+ results file from MDv5a.
64
+
65
+ [source_files] (a list of files or a single filename) specifies the set of
66
+ results files that will be merged into [target_file]. The difference between a
67
+ "source file" and the "target file" is that if no merging is necessary, either because
68
+ two boxes are nearly identical or because merge_only_empty is True and the target
69
+ file already has above-threshold detection for an image+category, the output file gets
70
+ the results of the "target" file. I.e., the "target" file wins all ties.
71
+
72
+ The results are written to [output_file].
73
+
74
+ """
55
75
 
56
76
  if isinstance(source_files,str):
57
77
  source_files = [source_files]
@@ -59,6 +79,10 @@ def merge_detections(source_files,target_file,output_file,options=None):
59
79
  if options is None:
60
80
  options = MergeDetectionsOptions()
61
81
 
82
+ if (not options.overwrite) and (os.path.isfile(output_file)):
83
+ print('File {} exists, bypassing merge'.format(output_file))
84
+ return
85
+
62
86
  assert not ((options.categories_to_exclude is not None) and \
63
87
  (options.categories_to_include is not None)), \
64
88
  'categories_to_include and categories_to_exclude are mutually exclusive'
@@ -133,7 +157,8 @@ def merge_detections(source_files,target_file,output_file,options=None):
133
157
  output_data['info']['detections_transferred_from'].append(os.path.basename(source_file))
134
158
  output_data['info']['detector'] = output_data['info']['detector'] + ' + ' + source_detector_name
135
159
 
136
- assert source_data['detection_categories'] == output_data['detection_categories']
160
+ assert source_data['detection_categories'] == output_data['detection_categories'], \
161
+ 'Cannot merge files with different detection category maps'
137
162
 
138
163
  source_confidence_threshold = options.source_confidence_thresholds[i_source_file]
139
164
 
@@ -246,7 +271,7 @@ def merge_detections(source_files,target_file,output_file,options=None):
246
271
  # ...for each source file
247
272
 
248
273
  with open(output_file,'w') as f:
249
- json.dump(output_data,f,indent=2)
274
+ json.dump(output_data,f,indent=1)
250
275
 
251
276
  print('Saved merged results to {}'.format(output_file))
252
277