megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -0,0 +1,170 @@
1
+ """
2
+
3
+ remap_detection_categories.py
4
+
5
+ Given a MegaDetector results file, remap the category IDs according to a specified
6
+ dictionary, writing the results to a new file.
7
+
8
+ Currently only supports remapping detection categories, not classification categories.
9
+
10
+ """
11
+
12
+ #%% Constants and imports
13
+
14
+ import json
15
+ import os
16
+
17
+ from tqdm import tqdm
18
+
19
+ from md_utils.ct_utils import invert_dictionary
20
+
21
+
22
+ #%% Main function
23
+
24
+ def remap_detection_categories(input_file,
25
+ output_file,
26
+ target_category_map,
27
+ extra_category_handling='error',
28
+ overwrite=False):
29
+ """
30
+ Given a MegaDetector results file [input_file], remap the category IDs according to the dictionary
31
+ [target_category_map], writing the results to [output_file]. The remapped dictionary needs to have
32
+ the same category names as the input file's detection_categories dictionary.
33
+
34
+ Typically used to map, e.g., a variety of species to the class "mammal" or the class "animal".
35
+
36
+ Currently only supports remapping detection categories, not classification categories.
37
+
38
+ Args:
39
+ input_file (str): the MD .json results file to remap
40
+ output_file (str): the remapped .json file to write
41
+ target_category_map (dict): the category mapping that should be used in the output file.
42
+ This can also be a MD results file, in which case we'll use that file's
43
+ detection_categories dictionary.
44
+ extra_category_handling (str, optional): specifies what we should do if categories are present
45
+ in the source file that are not present in the target mapping:
46
+
47
+ * 'error' == Error in this case.
48
+ * 'drop_if_unused' == Don't include these in the output file's category mappings if they are
49
+ unused, error if they are.
50
+ * 'remap' == Remap to unused category IDs. This is reserved for future use, not currently
51
+ implemented.
52
+ overwrite (bool, optional): whether to overwrite [output_file] if it exists; if this is True and
53
+ [output_file] exists, this function is a no-op
54
+
55
+ """
56
+
57
+ if os.path.exists(output_file) and (not overwrite):
58
+ print('File {} exists, bypassing remapping'.format(output_file))
59
+ return
60
+
61
+ assert os.path.isfile(input_file), \
62
+ 'File {} does not exist'.format(input_file)
63
+
64
+ # If "target_category_map" is passed as a filename, load the "detection_categories"
65
+ # dict.
66
+ if isinstance(target_category_map,str):
67
+ target_categories_file = target_category_map
68
+ with open(target_categories_file,'r') as f:
69
+ d = json.load(f)
70
+ target_category_map = d['detection_categories']
71
+ assert isinstance(target_category_map,dict)
72
+
73
+ with open(input_file,'r') as f:
74
+ input_data = json.load(f)
75
+
76
+ input_images = input_data['images']
77
+ input_categories = input_data['detection_categories']
78
+
79
+ # Figure out which categories are actually used
80
+ used_category_ids = set()
81
+ for im in input_images:
82
+
83
+ if 'detections' not in im or im['detections'] is None:
84
+ continue
85
+
86
+ for det in im['detections']:
87
+ used_category_ids.add(det['category'])
88
+ used_category_names = [input_categories[cid] for cid in used_category_ids]
89
+
90
+ input_names_set = set(input_categories.values())
91
+ output_names_set = set(target_category_map.values())
92
+
93
+ # category_name = list(input_names_set)[0]
94
+ for category_name in input_names_set:
95
+ if category_name in output_names_set:
96
+ continue
97
+ if extra_category_handling == 'error':
98
+ raise ValueError('Category {} present in source but not in target'.format(category_name))
99
+ elif extra_category_handling == 'drop_if_unused':
100
+ if category_name in used_category_names:
101
+ raise ValueError('Category {} present (and used) in source but not in target'.format(
102
+ category_name))
103
+ else:
104
+ print('Category {} is unused and not present in the target mapping, ignoring'.format(
105
+ category_name))
106
+ continue
107
+ elif extra_category_handling == 'remap':
108
+ raise NotImplementedError('Remapping of extra category IDs not yet implemented')
109
+ else:
110
+ raise ValueError('Unrecognized extra category handling scheme {}'.format(
111
+ extra_category_handling))
112
+
113
+ output_category_name_to_output_category_id = invert_dictionary(target_category_map)
114
+
115
+ input_category_id_to_output_category_id = {}
116
+ for input_category_id in input_categories.keys():
117
+ category_name = input_categories[input_category_id]
118
+ if category_name not in output_category_name_to_output_category_id:
119
+ assert category_name not in used_category_names
120
+ else:
121
+ output_category_id = output_category_name_to_output_category_id[category_name]
122
+ input_category_id_to_output_category_id[input_category_id] = output_category_id
123
+
124
+ # im = input_images[0]
125
+ for im in tqdm(input_images):
126
+
127
+ if 'detections' not in im or im['detections'] is None:
128
+ continue
129
+
130
+ # det = im['detections'][0]
131
+ for det in im['detections']:
132
+ det['category'] = input_category_id_to_output_category_id[det['category']]
133
+
134
+ input_data['detection_categories'] = target_category_map
135
+
136
+ with open(output_file,'w') as f:
137
+ json.dump(input_data,f,indent=1)
138
+
139
+
140
+ print('Saved remapped results to {}'.format(output_file))
141
+
142
+
143
+ #%% Interactive driver
144
+
145
+ if False:
146
+
147
+ pass
148
+
149
+ #%%
150
+
151
+ target_categories_file = '/home/dmorris/tmp/usgs-tegus/model-comparison/all-classes_usgs-only_yolov5x6.json'
152
+ target_category_map = target_categories_file
153
+ input_file = '/home/dmorris/tmp/usgs-tegus/model-comparison/all-classes_usgs-goannas-lilablanks_yolov5x6-20240223.json'
154
+
155
+ output_file = input_file.replace('.json','_remapped.json')
156
+ assert output_file != input_file
157
+ overwrite = True
158
+
159
+ extra_category_handling = 'drop_if_unused'
160
+
161
+ remap_detection_categories(input_file=input_file,
162
+ output_file=output_file,
163
+ target_category_map=target_category_map,
164
+ extra_category_handling=extra_category_handling,
165
+ overwrite=overwrite)
166
+
167
+
168
+ #%% Command-line driver
169
+
170
+ # TODO
@@ -1,12 +1,12 @@
1
- ########
2
- #
3
- # render_detection_confusion_matrix.py
4
- #
5
- # Given a CCT-formatted ground truth file and a MegaDetector-formatted results file,
6
- # render an HTML confusion matrix. Typically used for multi-class detectors. Currently
7
- # assumes a single class per image.
8
- #
9
- ########
1
+ """
2
+
3
+ render_detection_confusion_matrix.py
4
+
5
+ Given a CCT-formatted ground truth file and a MegaDetector-formatted results file,
6
+ render an HTML confusion matrix. Typically used for multi-class detectors. Currently
7
+ assumes a single class per image.
8
+
9
+ """
10
10
 
11
11
  #%% Imports and constants
12
12
 
@@ -33,7 +33,10 @@ from multiprocessing.pool import Pool
33
33
 
34
34
  #%% Support functions
35
35
 
36
- def image_to_output_file(im,preview_images_folder):
36
+ def _image_to_output_file(im,preview_images_folder):
37
+ """
38
+ Produces a clean filename from im (if [im] is a str) or im['file'] (if [im] is a dict).
39
+ """
37
40
 
38
41
  if isinstance(im,str):
39
42
  filename_relative = im
@@ -44,7 +47,10 @@ def image_to_output_file(im,preview_images_folder):
44
47
  return os.path.join(preview_images_folder,fn_clean)
45
48
 
46
49
 
47
- def render_image(im,render_image_constants):
50
+ def _render_image(im,render_image_constants):
51
+ """
52
+ Internal function for rendering a single image to the confusion matrix preview folder.
53
+ """
48
54
 
49
55
  filename_to_ground_truth_im = render_image_constants['filename_to_ground_truth_im']
50
56
  image_folder = render_image_constants['image_folder']
@@ -56,13 +62,13 @@ def render_image(im,render_image_constants):
56
62
 
57
63
  assert im['file'] in filename_to_ground_truth_im
58
64
 
59
- input_file = os.path.join(image_folder,im['file'])
60
- assert os.path.isfile(input_file)
61
-
62
- output_file = image_to_output_file(im,preview_images_folder)
65
+ output_file = _image_to_output_file(im,preview_images_folder)
63
66
  if os.path.isfile(output_file) and not force_render_images:
64
67
  return output_file
65
68
 
69
+ input_file = os.path.join(image_folder,im['file'])
70
+ assert os.path.isfile(input_file)
71
+
66
72
  detections_to_render = []
67
73
 
68
74
  for det in im['detections']:
@@ -82,8 +88,12 @@ def render_image(im,render_image_constants):
82
88
 
83
89
  #%% Main function
84
90
 
85
- def render_detection_confusion_matrix(ground_truth_file,results_file,image_folder,preview_folder,
86
- force_render_images=False, confidence_thresholds=None,
91
+ def render_detection_confusion_matrix(ground_truth_file,
92
+ results_file,
93
+ image_folder,
94
+ preview_folder,
95
+ force_render_images=False,
96
+ confidence_thresholds=None,
87
97
  rendering_confidence_thresholds=None,
88
98
  target_image_size=(1280,-1),
89
99
  parallelize_rendering=True,
@@ -101,9 +111,36 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
101
111
  confidence_thresholds and rendering_confidence_thresholds are dictionaries mapping
102
112
  class names to thresholds. "default" is a special token that will be used for all
103
113
  classes not otherwise assigned thresholds.
114
+
115
+ Args:
116
+ ground_truth_file (str): the CCT-formatted .json file with ground truth information
117
+ results_file (str): the MegaDetector results .json file
118
+ image_folder (str): the folder where images live; filenames in [ground_truth_file] and
119
+ [results_file] should be relative to this folder.
120
+ preview_folder (str): the output folder, i.e. the folder in which we'll create our nifty
121
+ HTML stuff.
122
+ force_rendering_images (bool, optional): if False, skips images that already exist
123
+ confidence_thresholds (dict, optional): a dictionary mapping class names to thresholds;
124
+ all classes not explicitly named here will use the threshold for the "default" category.
125
+ rendering_thresholds (dict, optional): a dictionary mapping class names to thresholds;
126
+ all classes not explicitly named here will use the threshold for the "default" category.
127
+ target_image_size (tuple, optional): output image size, as a pair of ints (width,height). If one
128
+ value is -1 and the other is not, aspect ratio is preserved. If both are -1, the original image
129
+ sizes are preserved.
130
+ parallelize_rendering (bool, optional): enable (default) or disable parallelization when rendering
131
+ parallelize_rendering_n_core (int, optional): number of threads or processes to use for rendering, only
132
+ used if parallelize_rendering is True
133
+ parallelize_rendering_with_threads: whether to use threads (True) or processes (False) when rendering,
134
+ only used if parallelize_rendering is True
135
+ job_name (str, optional): job name to include in big letters in the output file
136
+ model_file (str, optional) model filename to include in HTML output
137
+ empty_category_name (str, optional): special category name that we should treat as empty, typically
138
+ "empty"
139
+ html_image_list_options (dict, optional): options listed passed along to write_html_image_list;
140
+ see write_html_image_list for documentation.
104
141
  """
105
142
 
106
- #%% Argument and path handling
143
+ ##%% Argument and path handling
107
144
 
108
145
  preview_images_folder = os.path.join(preview_folder,'images')
109
146
  os.makedirs(preview_images_folder,exist_ok=True)
@@ -114,7 +151,7 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
114
151
  rendering_confidence_thresholds = {'default':0.4}
115
152
 
116
153
 
117
- #%% Load ground truth
154
+ ##%% Load ground truth
118
155
 
119
156
  with open(ground_truth_file,'r') as f:
120
157
  ground_truth_data_cct = json.load(f)
@@ -125,14 +162,14 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
125
162
  filename_to_ground_truth_im[im['file_name']] = im
126
163
 
127
164
 
128
- #%% Confirm that the ground truth images are present in the image folder
165
+ ##%% Confirm that the ground truth images are present in the image folder
129
166
 
130
167
  ground_truth_images = find_images(image_folder,return_relative_paths=True,recursive=True)
131
168
  assert len(ground_truth_images) == len(ground_truth_data_cct['images'])
132
169
  del ground_truth_images
133
170
 
134
171
 
135
- #%% Map images to categories
172
+ ##%% Map images to categories
136
173
 
137
174
  # gt_image_id_to_image = {im['id']:im for im in ground_truth_data_cct['images']}
138
175
  gt_image_id_to_annotations = defaultdict(list)
@@ -171,7 +208,7 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
171
208
  'No ground truth category assigned to {}'.format(filename)
172
209
 
173
210
 
174
- #%% Load results
211
+ ##%% Load results
175
212
 
176
213
  with open(results_file,'r') as f:
177
214
  md_formatted_results = json.load(f)
@@ -179,7 +216,7 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
179
216
  results_category_id_to_name = md_formatted_results['detection_categories']
180
217
 
181
218
 
182
- #%% Render images with detections
219
+ ##%% Render images with detections
183
220
 
184
221
  render_image_constants = {}
185
222
  render_image_constants['filename_to_ground_truth_im'] = filename_to_ground_truth_im
@@ -207,7 +244,7 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
207
244
  print('Rendering images with {} {}'.format(parallelize_rendering_n_cores,
208
245
  worker_string))
209
246
 
210
- _ = list(tqdm(pool.imap(partial(render_image,render_image_constants=render_image_constants),
247
+ _ = list(tqdm(pool.imap(partial(_render_image,render_image_constants=render_image_constants),
211
248
  md_formatted_results['images']),
212
249
  total=len(md_formatted_results['images'])))
213
250
 
@@ -215,15 +252,15 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
215
252
 
216
253
  # im = md_formatted_results['images'][0]
217
254
  for im in tqdm(md_formatted_results['images']):
218
- render_image(im,render_image_constants)
255
+ _render_image(im,render_image_constants)
219
256
 
220
257
 
221
- #%% Map images to predicted categories, and vice-versa
258
+ ##%% Map images to predicted categories, and vice-versa
222
259
 
223
260
  filename_to_predicted_categories = defaultdict(set)
224
261
  predicted_category_name_to_filenames = defaultdict(set)
225
262
 
226
- # im = md_results['images'][0]
263
+ # im = md_formatted_results['images'][0]
227
264
  for im in tqdm(md_formatted_results['images']):
228
265
 
229
266
  assert im['file'] in filename_to_ground_truth_im
@@ -243,13 +280,10 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
243
280
  # ...for each image
244
281
 
245
282
 
246
- #%% Create TP/TN/FP/FN lists
283
+ ##%% Create TP/TN/FP/FN lists
247
284
 
248
285
  category_name_to_image_lists = {}
249
286
 
250
- # These may not be identical; currently the ground truth contains an "unknown" category
251
- # results_category_names = sorted(list(results_category_id_to_name.values()))
252
-
253
287
  sub_page_tokens = ['fn','tn','fp','tp']
254
288
 
255
289
  for category_name in ground_truth_category_names:
@@ -296,11 +330,11 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
296
330
  assignment = 'tn'
297
331
 
298
332
  category_name_to_image_lists[category_name][assignment].append(filename)
299
-
333
+
300
334
  # ...for each filename
301
335
 
302
336
 
303
- #%% Create confusion matrix
337
+ ##%% Create confusion matrix
304
338
 
305
339
  gt_category_name_to_category_index = {}
306
340
 
@@ -333,8 +367,8 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
333
367
  results_category_name_to_confidence = defaultdict(int)
334
368
  for det in results_im['detections']:
335
369
  category_name = results_category_id_to_name[det['category']]
336
- detection_threshold = rendering_confidence_thresholds['default']
337
- if category_name in rendering_confidence_thresholds:
370
+ detection_threshold = confidence_thresholds['default']
371
+ if category_name in confidence_thresholds:
338
372
  detection_threshold = confidence_thresholds[category_name]
339
373
  if det['conf'] > detection_threshold:
340
374
  results_category_name_to_confidence[category_name] = max(
@@ -354,6 +388,8 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
354
388
 
355
389
  confusion_matrix[ground_truth_category_index,predicted_category_index] += 1
356
390
 
391
+ # ...for each file
392
+
357
393
  plt.ioff()
358
394
 
359
395
  fig_h = 3 + 0.3 * n_categories
@@ -380,7 +416,7 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
380
416
  # open_file(cm_figure_fn_abs)
381
417
 
382
418
 
383
- #%% Create HTML confusion matrix
419
+ ##%% Create HTML confusion matrix
384
420
 
385
421
  html_confusion_matrix = '<table class="result-table">\n'
386
422
  html_confusion_matrix += '<tr>\n'
@@ -420,7 +456,7 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
420
456
  title = '<b>Image</b>: {}, <b>Max conf</b>: {:0.3f}'.format(
421
457
  image_filename_relative, max_conf)
422
458
  image_link = 'images/' + os.path.basename(
423
- image_to_output_file(image_filename_relative,preview_images_folder))
459
+ _image_to_output_file(image_filename_relative,preview_images_folder))
424
460
  html_image_info = {
425
461
  'filename': image_link,
426
462
  'title': title,
@@ -524,7 +560,7 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
524
560
  title = '<b>Image</b>: {}, <b>Max conf</b>: {:0.3f}'.format(
525
561
  image_filename_relative, max_conf)
526
562
  image_link = 'images/' + os.path.basename(
527
- image_to_output_file(image_filename_relative,preview_images_folder))
563
+ _image_to_output_file(image_filename_relative,preview_images_folder))
528
564
  html_image_info = {
529
565
  'filename': image_link,
530
566
  'title': title,
@@ -615,7 +651,7 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
615
651
  f.write(html)
616
652
 
617
653
 
618
- #%% Prepare return data
654
+ ##%% Prepare return data
619
655
 
620
656
  confusion_matrix_info = {}
621
657
  confusion_matrix_info['html_file'] = target_html_file
@@ -1,24 +1,26 @@
1
- ########
2
- #
3
- # find_repeat_detections.py
4
- #
5
- # If you want to use this script, we recommend that you read the user's guide:
6
- #
7
- # https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing/postprocessing/repeat_detection_elimination
8
- #
9
- # Really, don't try to run this script without reading the user's guide, you'll think
10
- # it's more magical than it is.
11
- #
12
- # This script looks through a sequence of detections in the API output json file, and finds
13
- # candidates that might be "repeated false positives", i.e. that random branch that the
14
- # detector thinks is an animal/person/vehicle.
15
- #
16
- # Typically after running this script, you would do a manual step to remove
17
- # true positives, then run remove_repeat_detections to produce a final output file.
18
- #
19
- # There's no way that statement was self-explanatory; see the user's guide.
20
- #
21
- ########
1
+ r"""
2
+
3
+ find_repeat_detections.py
4
+
5
+ If you want to use this script, we recommend that you read the RDE user's guide:
6
+
7
+ https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing/postprocessing/repeat_detection_elimination
8
+
9
+ Really, don't try to run this script without reading the user's guide, you'll think
10
+ it's more magical than it is.
11
+
12
+ This script looks through a sequence of detections in the API output json file, and finds
13
+ candidates that might be "repeated false positives", i.e. that random branch that the
14
+ detector thinks is an animal/person/vehicle.
15
+
16
+ Typically after running this script, you would do a manual step to remove
17
+ true positives, then run remove_repeat_detections to produce a final output file.
18
+
19
+ There's no way that statement was self-explanatory; see the user's guide.
20
+
21
+ This script is just a command-line driver for repeat_detections_core.py.
22
+
23
+ """
22
24
 
23
25
  #%% Constants and imports
24
26
 
@@ -51,7 +53,7 @@ if False:
51
53
  options.maxSuspiciousDetectionSize = 0.2
52
54
 
53
55
  options.filterFileToLoad = ''
54
- options.filterFileToLoad = os.path.join(baseDir,'...\detectionIndex.json')
56
+ options.filterFileToLoad = os.path.join(baseDir,r'..\detectionIndex.json')
55
57
 
56
58
  options.debugMaxDir = -1
57
59
  options.debugMaxRenderDir = -1
@@ -75,15 +77,16 @@ def main():
75
77
  defaultOptions = repeat_detections_core.RepeatDetectionOptions()
76
78
 
77
79
  parser = argparse.ArgumentParser()
78
- parser.add_argument('inputFile')
80
+
81
+ parser.add_argument('inputFile', type=str, help='MD results .json file to process')
79
82
 
80
83
  parser.add_argument('--outputFile', action='store', type=str, default=None,
81
- help=".json file to write filtered results to... don't use this " + \
82
- "if you're going to do manual review of the repeat detection images")
84
+ help='.json file to write filtered results to... do not use this if you are going to ' + \
85
+ 'do manual review of the repeat detection images (which you should)')
83
86
 
84
87
  parser.add_argument('--imageBase', action='store', type=str, default='',
85
- help='Image base dir, relevant if renderHtml is True or if " + \
86
- "omitFilteringFolder is not set')
88
+ help='Image base dir, relevant if renderHtml is True or if ' + \
89
+ '"omitFilteringFolder" is not set')
87
90
 
88
91
  parser.add_argument('--outputBase', action='store', type=str, default='',
89
92
  help='HTML or filtering folder output dir')
@@ -99,22 +102,22 @@ def main():
99
102
  parser.add_argument('--iouThreshold', action='store', type=float,
100
103
  default=defaultOptions.iouThreshold,
101
104
  help='Detections with IOUs greater than this are considered ' + \
102
- '"the same detection"')
105
+ '"the same detection"')
103
106
 
104
107
  parser.add_argument('--occurrenceThreshold', action='store', type=int,
105
108
  default=defaultOptions.occurrenceThreshold,
106
109
  help='More than this many near-identical detections in a group ' + \
107
- '(e.g. a folder) is considered suspicious')
110
+ '(e.g. a folder) is considered suspicious')
108
111
 
109
112
  parser.add_argument('--minSuspiciousDetectionSize', action='store', type=float,
110
113
  default=defaultOptions.minSuspiciousDetectionSize,
111
114
  help='Detections smaller than this fraction of image area are not ' + \
112
- 'considered suspicious')
115
+ 'considered suspicious')
113
116
 
114
117
  parser.add_argument('--maxSuspiciousDetectionSize', action='store', type=float,
115
118
  default=defaultOptions.maxSuspiciousDetectionSize,
116
119
  help='Detections larger than this fraction of image area are not ' + \
117
- 'considered suspicious')
120
+ 'considered suspicious')
118
121
 
119
122
  parser.add_argument('--maxImagesPerFolder', action='store', type=int,
120
123
  default=defaultOptions.maxImagesPerFolder,
@@ -138,26 +141,32 @@ def main():
138
141
 
139
142
  parser.add_argument('--filterFileToLoad', action='store', type=str, default='',
140
143
  help='Path to detectionIndex.json, which should be inside a ' + \
141
- 'folder of images that are manually verified to _not_ ' + \
142
- 'contain valid animals')
144
+ 'folder of images that are manually verified to _not_ ' + \
145
+ 'contain valid animals')
143
146
 
144
147
  parser.add_argument('--omitFilteringFolder', action='store_false',
145
148
  dest='bWriteFilteringFolder',
146
149
  help='Should we create a folder of rendered detections for post-filtering?')
147
150
 
148
- parser.add_argument('--debugMaxDir', action='store', type=int, default=-1)
149
- parser.add_argument('--debugMaxRenderDir', action='store', type=int, default=-1)
150
- parser.add_argument('--debugMaxRenderDetection', action='store', type=int, default=-1)
151
- parser.add_argument('--debugMaxRenderInstance', action='store', type=int, default=-1)
151
+ parser.add_argument('--debugMaxDir', action='store', type=int, default=-1,
152
+ help='For debugging only, limit the number of directories we process')
153
+ parser.add_argument('--debugMaxRenderDir', action='store', type=int, default=-1,
154
+ help='For debugging only, limit the number of directories we render')
155
+ parser.add_argument('--debugMaxRenderDetection', action='store', type=int, default=-1,
156
+ help='For debugging only, limit the number of detections we process per folder')
157
+ parser.add_argument('--debugMaxRenderInstance', action='store', type=int, default=-1,
158
+ help='For debugging only, limit the number of instances we process per detection')
152
159
 
153
160
  parser.add_argument('--forceSerialComparisons', action='store_false',
154
- dest='bParallelizeComparisons')
161
+ dest='bParallelizeComparisons',
162
+ help='Disable parallelization during the comparison stage')
155
163
  parser.add_argument('--forceSerialRendering', action='store_false',
156
- dest='bParallelizeRendering')
164
+ dest='bParallelizeRendering',
165
+ help='Disable parallelization during the rendering stage')
157
166
 
158
167
  parser.add_argument('--maxOutputImageWidth', action='store', type=int,
159
168
  default=defaultOptions.maxOutputImageWidth,
160
- help='Maximum output size for thumbnail images')
169
+ help='Maximum output size for thumbnail images')
161
170
 
162
171
  parser.add_argument('--lineThickness', action='store', type=int,
163
172
  default=defaultOptions.lineThickness,
@@ -170,16 +179,17 @@ def main():
170
179
  parser.add_argument('--nDirLevelsFromLeaf', type=int,
171
180
  default=defaultOptions.nDirLevelsFromLeaf,
172
181
  help='Number of levels from the leaf folders to use for repeat ' + \
173
- 'detection (0 == leaves)')
182
+ 'detection (0 == leaves)')
174
183
 
175
184
  parser.add_argument('--bRenderOtherDetections', action='store_true',
176
185
  help='Show non-target detections in light gray on each image')
177
186
 
178
187
  parser.add_argument('--bRenderDetectionTiles', action='store_true',
179
- help='Should we render a grid showing every instance for each detection?')
188
+ help='Should we render a grid showing every instance (up to a limit) for each detection?')
180
189
 
181
190
  parser.add_argument('--detectionTilesPrimaryImageWidth', type=int,
182
- default=defaultOptions.detectionTilesPrimaryImageWidth)
191
+ default=defaultOptions.detectionTilesPrimaryImageWidth,
192
+ help='The width of the main image when rendering images with detection tiles')
183
193
 
184
194
  parser.add_argument('--renderHtml', action='store_true',
185
195
  dest='bRenderHtml', help='Should we render HTML output?')
@@ -197,6 +207,5 @@ def main():
197
207
 
198
208
  repeat_detections_core.find_repeat_detections(args.inputFile, args.outputFile, options)
199
209
 
200
-
201
210
  if __name__ == '__main__':
202
211
  main()
@@ -1,15 +1,15 @@
1
- ########
2
- #
3
- # remove_repeat_detections.py
4
- #
5
- # Used after running find_repeat_detections, then manually filtering the results,
6
- # to create a final filtered output file.
7
- #
8
- # If you want to use this script, we recommend that you read the user's guide:
9
- #
10
- # https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing/postprocessing/repeat_detection_elimination
11
- #
12
- ########
1
+ """
2
+
3
+ remove_repeat_detections.py
4
+
5
+ Used after running find_repeat_detections, then manually filtering the results,
6
+ to create a final filtered output file.
7
+
8
+ If you want to use this script, we recommend that you read the RDE user's guide:
9
+
10
+ https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing/postprocessing/repeat_detection_elimination
11
+
12
+ """
13
13
 
14
14
  #%% Constants and imports
15
15
 
@@ -21,7 +21,19 @@ from api.batch_processing.postprocessing.repeat_detection_elimination import rep
21
21
  #%% Main function
22
22
 
23
23
  def remove_repeat_detections(inputFile,outputFile,filteringDir):
24
-
24
+ """
25
+ Given an index file that was produced in a first pass through find_repeat_detections,
26
+ and a folder of images (from which the user has deleted images they don't want removed),
27
+ remove the identified repeat detections from a set of MD results and write to a new file.
28
+
29
+ Args:
30
+ inputFile (str): .json file of MD results, from which we should remove repeat detections
31
+ outputFile (str): output .json file to which we should write MD results (with repeat
32
+ detections removed)
33
+ filteringDir (str): the folder produced by find_repeat_detections, containing a
34
+ detectionIndex.json file
35
+ """
36
+
25
37
  assert os.path.isfile(inputFile), "Can't find file {}".format(inputFile)
26
38
  assert os.path.isdir(filteringDir), "Can't find folder {}".format(filteringDir)
27
39
  options = repeat_detections_core.RepeatDetectionOptions()
@@ -66,6 +78,5 @@ def main():
66
78
  args = parser.parse_args()
67
79
  remove_repeat_detections(args.inputFile, args.outputFile, args.filteringDir)
68
80
 
69
-
70
81
  if __name__ == '__main__':
71
82
  main()