megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +98 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +152 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +92 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +126 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +610 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +239 -0
  58. megadetector/data_management/cct_json_utils.py +395 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +272 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +477 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +796 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +874 -0
  129. megadetector/data_management/read_exif.py +681 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/resize_coco_dataset.py +189 -0
  133. megadetector/data_management/wi_download_csv_to_coco.py +246 -0
  134. megadetector/data_management/yolo_output_to_md_output.py +441 -0
  135. megadetector/data_management/yolo_to_coco.py +676 -0
  136. megadetector/detection/__init__.py +0 -0
  137. megadetector/detection/detector_training/__init__.py +0 -0
  138. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  139. megadetector/detection/process_video.py +702 -0
  140. megadetector/detection/pytorch_detector.py +341 -0
  141. megadetector/detection/run_detector.py +779 -0
  142. megadetector/detection/run_detector_batch.py +1219 -0
  143. megadetector/detection/run_inference_with_yolov5_val.py +917 -0
  144. megadetector/detection/run_tiled_inference.py +934 -0
  145. megadetector/detection/tf_detector.py +189 -0
  146. megadetector/detection/video_utils.py +606 -0
  147. megadetector/postprocessing/__init__.py +0 -0
  148. megadetector/postprocessing/add_max_conf.py +64 -0
  149. megadetector/postprocessing/categorize_detections_by_size.py +163 -0
  150. megadetector/postprocessing/combine_api_outputs.py +249 -0
  151. megadetector/postprocessing/compare_batch_results.py +958 -0
  152. megadetector/postprocessing/convert_output_format.py +396 -0
  153. megadetector/postprocessing/load_api_results.py +195 -0
  154. megadetector/postprocessing/md_to_coco.py +310 -0
  155. megadetector/postprocessing/md_to_labelme.py +330 -0
  156. megadetector/postprocessing/merge_detections.py +401 -0
  157. megadetector/postprocessing/postprocess_batch_results.py +1902 -0
  158. megadetector/postprocessing/remap_detection_categories.py +170 -0
  159. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  160. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  161. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  162. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1631 -0
  163. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  164. megadetector/postprocessing/subset_json_detector_output.py +696 -0
  165. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  166. megadetector/taxonomy_mapping/__init__.py +0 -0
  167. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  168. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  169. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  170. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +590 -0
  171. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  172. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  173. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  174. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  175. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  176. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  177. megadetector/utils/__init__.py +0 -0
  178. megadetector/utils/azure_utils.py +178 -0
  179. megadetector/utils/ct_utils.py +612 -0
  180. megadetector/utils/directory_listing.py +246 -0
  181. megadetector/utils/md_tests.py +968 -0
  182. megadetector/utils/path_utils.py +1044 -0
  183. megadetector/utils/process_utils.py +157 -0
  184. megadetector/utils/sas_blob_utils.py +509 -0
  185. megadetector/utils/split_locations_into_train_val.py +228 -0
  186. megadetector/utils/string_utils.py +92 -0
  187. megadetector/utils/url_utils.py +323 -0
  188. megadetector/utils/write_html_image_list.py +225 -0
  189. megadetector/visualization/__init__.py +0 -0
  190. megadetector/visualization/plot_utils.py +293 -0
  191. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  192. megadetector/visualization/visualization_utils.py +1536 -0
  193. megadetector/visualization/visualize_db.py +550 -0
  194. megadetector/visualization/visualize_detector_output.py +405 -0
  195. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/METADATA +1 -1
  196. megadetector-5.0.12.dist-info/RECORD +199 -0
  197. megadetector-5.0.12.dist-info/top_level.txt +1 -0
  198. megadetector-5.0.11.dist-info/RECORD +0 -5
  199. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  200. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/LICENSE +0 -0
  201. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,228 @@
1
+ """
2
+
3
+ prepare_classification_script_mc.py
4
+
5
+ Notebook-y script used to prepare a series of shell commands to run MegaClassifier
6
+ on a MegaDetector result set.
7
+
8
+ Differs from prepare_classification_script.py only in the final class mapping step.
9
+
10
+ """
11
+
12
+ #%% Job options
13
+
14
+ import os
15
+
16
+ organization_name = 'idfg'
17
+ job_name = 'idfg-2022-01-27-EOE2021S_Group6'
18
+ input_filename = 'idfg-2022-01-27-EOE2021S_Group6_detections.filtered_rde_0.60_0.85_30_0.20.json'
19
+ image_base = '/datadrive/idfg/EOE2021S_Group6'
20
+ crop_path = os.path.join(os.path.expanduser('~/crops'),job_name + '_crops')
21
+ device_id = 0
22
+
23
+ working_dir_base = os.path.join(os.path.expanduser('~/postprocessing'),
24
+ organization_name,
25
+ job_name)
26
+
27
+ output_base = os.path.join(working_dir_base,'combined_api_outputs')
28
+
29
+ assert os.path.isdir(working_dir_base)
30
+ assert os.path.isdir(output_base)
31
+
32
+ output_file = os.path.join(working_dir_base,'run_megaclassifier_' + job_name + '.sh')
33
+
34
+ input_files = [
35
+ os.path.join(
36
+ os.path.expanduser('~/postprocessing'),
37
+ organization_name,
38
+ job_name,
39
+ 'combined_api_outputs',
40
+ input_filename
41
+ )
42
+ ]
43
+
44
+ for fn in input_files:
45
+ assert os.path.isfile(fn)
46
+
47
+
48
+ #%% Constants
49
+
50
+ classifier_base = os.path.expanduser('~/models/camera_traps/megaclassifier/v0.1/')
51
+ assert os.path.isdir(classifier_base)
52
+
53
+ checkpoint_path = os.path.join(classifier_base,'v0.1_efficientnet-b3_compiled.pt')
54
+ assert os.path.isfile(checkpoint_path)
55
+
56
+ classifier_categories_path = os.path.join(classifier_base,'v0.1_index_to_name.json')
57
+ assert os.path.isfile(classifier_categories_path)
58
+
59
+ target_mapping_path = os.path.join(classifier_base,'idfg_to_megaclassifier_labels.json')
60
+ assert os.path.isfile(target_mapping_path)
61
+
62
+ classifier_output_suffix = '_megaclassifier_output.csv.gz'
63
+ final_output_suffix = '_megaclassifier.json'
64
+
65
+ threshold_str = '0.65'
66
+ n_threads_str = '50'
67
+ image_size_str = '300'
68
+ batch_size_str = '64'
69
+ num_workers_str = '8'
70
+ logdir = working_dir_base
71
+
72
+ classification_threshold_str = '0.05'
73
+
74
+ # This is just passed along to the metadata in the output file, it has no impact
75
+ # on how the classification scripts run.
76
+ typical_classification_threshold_str = '0.75'
77
+
78
+ classifier_name = 'megaclassifier_v0.1_efficientnet-b3'
79
+
80
+
81
+ #%% Set up environment
82
+
83
+ commands = []
84
+ # commands.append('cd MegaDetector/classification\n')
85
+ # commands.append('conda activate cameratraps-classifier\n')
86
+
87
+
88
+ #%% Crop images
89
+
90
+ commands.append('\n### Cropping ###\n')
91
+
92
+ # fn = input_files[0]
93
+ for fn in input_files:
94
+
95
+ input_file_path = fn
96
+ crop_cmd = ''
97
+
98
+ crop_comment = '\n# Cropping {}\n'.format(fn)
99
+ crop_cmd += crop_comment
100
+
101
+ crop_cmd += "python crop_detections.py \\\n" + \
102
+ input_file_path + ' \\\n' + \
103
+ crop_path + ' \\\n' + \
104
+ '--images-dir "' + image_base + '"' + ' \\\n' + \
105
+ '--threshold "' + threshold_str + '"' + ' \\\n' + \
106
+ '--square-crops ' + ' \\\n' + \
107
+ '--threads "' + n_threads_str + '"' + ' \\\n' + \
108
+ '--logdir "' + logdir + '"' + ' \\\n' + \
109
+ '\n'
110
+ crop_cmd = '{}'.format(crop_cmd)
111
+ commands.append(crop_cmd)
112
+
113
+
114
+ #%% Run classifier
115
+
116
+ commands.append('\n### Classifying ###\n')
117
+
118
+ # fn = input_files[0]
119
+ for fn in input_files:
120
+
121
+ input_file_path = fn
122
+ classifier_output_path = crop_path + classifier_output_suffix
123
+
124
+ classify_cmd = ''
125
+
126
+ classify_comment = '\n# Classifying {}\n'.format(fn)
127
+ classify_cmd += classify_comment
128
+
129
+ classify_cmd += "python run_classifier.py \\\n" + \
130
+ checkpoint_path + ' \\\n' + \
131
+ crop_path + ' \\\n' + \
132
+ classifier_output_path + ' \\\n' + \
133
+ '--detections-json "' + input_file_path + '"' + ' \\\n' + \
134
+ '--classifier-categories "' + classifier_categories_path + '"' + ' \\\n' + \
135
+ '--image-size "' + image_size_str + '"' + ' \\\n' + \
136
+ '--batch-size "' + batch_size_str + '"' + ' \\\n' + \
137
+ '--num-workers "' + num_workers_str + '"' + ' \\\n'
138
+
139
+ if device_id is not None:
140
+ classify_cmd += '--device {}'.format(device_id)
141
+
142
+ classify_cmd += '\n\n'
143
+ classify_cmd = '{}'.format(classify_cmd)
144
+ commands.append(classify_cmd)
145
+
146
+
147
+ #%% Remap classifier outputs
148
+
149
+ commands.append('\n### Remapping ###\n')
150
+
151
+ # fn = input_files[0]
152
+ for fn in input_files:
153
+
154
+ input_file_path = fn
155
+ classifier_output_path = crop_path + classifier_output_suffix
156
+ classifier_output_path_remapped = \
157
+ classifier_output_path.replace(".csv.gz","_remapped.csv.gz")
158
+ assert not (classifier_output_path == classifier_output_path_remapped)
159
+
160
+ output_label_index = classifier_output_path_remapped.replace(
161
+ "_remapped.csv.gz","_label_index_remapped.json")
162
+
163
+ remap_cmd = ''
164
+
165
+ remap_comment = '\n# Remapping {}\n'.format(fn)
166
+ remap_cmd += remap_comment
167
+
168
+ remap_cmd += "python aggregate_classifier_probs.py \\\n" + \
169
+ classifier_output_path + ' \\\n' + \
170
+ '--target-mapping "' + target_mapping_path + '"' + ' \\\n' + \
171
+ '--output-csv "' + classifier_output_path_remapped + '"' + ' \\\n' + \
172
+ '--output-label-index "' + output_label_index + '"' + ' \\\n' + \
173
+ '\n'
174
+
175
+ remap_cmd = '{}'.format(remap_cmd)
176
+ commands.append(remap_cmd)
177
+
178
+
179
+ #%% Merge classification and detection outputs
180
+
181
+ commands.append('\n### Merging ###\n')
182
+
183
+ # fn = input_files[0]
184
+ for fn in input_files:
185
+
186
+ input_file_path = fn
187
+ classifier_output_path = crop_path + classifier_output_suffix
188
+
189
+ classifier_output_path_remapped = \
190
+ classifier_output_path.replace(".csv.gz","_remapped.csv.gz")
191
+
192
+ output_label_index = classifier_output_path_remapped.replace(
193
+ "_remapped.csv.gz","_label_index_remapped.json")
194
+
195
+ final_output_path = os.path.join(output_base,
196
+ os.path.basename(classifier_output_path)).\
197
+ replace(classifier_output_suffix,
198
+ final_output_suffix)
199
+ final_output_path = final_output_path.replace('_detections','')
200
+ final_output_path = final_output_path.replace('_crops','')
201
+
202
+ merge_cmd = ''
203
+
204
+ merge_comment = '\n# Merging {}\n'.format(fn)
205
+ merge_cmd += merge_comment
206
+
207
+ merge_cmd += "python merge_classification_detection_output.py \\\n" + \
208
+ classifier_output_path_remapped + ' \\\n' + \
209
+ output_label_index + ' \\\n' + \
210
+ '--output-json "' + final_output_path + '"' + ' \\\n' + \
211
+ '--detection-json "' + input_file_path + '"' + ' \\\n' + \
212
+ '--classifier-name "' + classifier_name + '"' + ' \\\n' + \
213
+ '--threshold "' + classification_threshold_str + '"' + ' \\\n' + \
214
+ '--typical-confidence-threshold "' + typical_classification_threshold_str + '"' + ' \\\n' + \
215
+ '\n'
216
+ merge_cmd = '{}'.format(merge_cmd)
217
+ commands.append(merge_cmd)
218
+
219
+
220
+ #%% Write everything out
221
+
222
+ with open(output_file,'w') as f:
223
+ for s in commands:
224
+ f.write('{}'.format(s))
225
+
226
+ import stat
227
+ st = os.stat(output_file)
228
+ os.chmod(output_file, st.st_mode | stat.S_IEXEC)
@@ -0,0 +1,287 @@
1
+ """
2
+
3
+ run_classifier.py
4
+
5
+ Run a species classifier.
6
+
7
+ This script is the classifier counterpart to detection/run_tf_detector_batch.py.
8
+ This script takes as input:
9
+ 1) a detections JSON file, usually the output of run_tf_detector_batch.py or the
10
+ output of the Batch API in the "Batch processing API output format"
11
+ 2) a path to a directory containing crops of bounding boxes from the detections
12
+ JSON file
13
+ 3) a path to a PyTorch TorchScript compiled model file
14
+ 4) (if the model is EfficientNet) an image size
15
+
16
+ By default, this script overwrites the detections JSON file, adding in
17
+ classification results. To output a new JSON file, use the --output argument.
18
+
19
+ """
20
+
21
+ #%% Imports
22
+
23
+ from __future__ import annotations
24
+
25
+ import argparse
26
+ import json
27
+ import os
28
+
29
+ from tqdm import tqdm
30
+ from typing import Any
31
+ from collections.abc import Callable, Sequence
32
+
33
+ import pandas as pd
34
+ import PIL
35
+ import torch
36
+ import torch.utils
37
+ import torchvision as tv
38
+ from torchvision.datasets.folder import default_loader
39
+
40
+ from megadetector.classification import train_classifier
41
+
42
+
43
+ #%% Example usage
44
+
45
+ """
46
+ python run_classifier.py \
47
+ detections.json \
48
+ /path/to/crops \
49
+ /path/to/model.pt \
50
+ --image-size 224
51
+ """
52
+
53
+
54
+ #%% Classes
55
+
56
+ class SimpleDataset(torch.utils.data.Dataset):
57
+ """
58
+ Very simple dataset.
59
+ """
60
+
61
+ def __init__(self, img_files: Sequence[str],
62
+ images_dir: str | None = None,
63
+ transform: Callable[[PIL.Image.Image], Any] | None = None):
64
+ """Creates a SimpleDataset."""
65
+ self.img_files = img_files
66
+ self.images_dir = images_dir
67
+ self.transform = transform
68
+
69
+ def __getitem__(self, index: int) -> tuple[Any, str]:
70
+ """
71
+ Returns: tuple, (img, img_file)
72
+ """
73
+ img_file = self.img_files[index]
74
+ if self.images_dir is not None:
75
+ img_path = os.path.join(self.images_dir, img_file)
76
+ else:
77
+ img_path = img_file
78
+ img = default_loader(img_path)
79
+ if self.transform is not None:
80
+ img = self.transform(img)
81
+ return img, img_file
82
+
83
+ def __len__(self) -> int:
84
+ return len(self.img_files)
85
+
86
+
87
+ #%% Support functions
88
+
89
+ def create_loader(cropped_images_dir: str,
90
+ detections_json_path: str | None,
91
+ img_size: int,
92
+ batch_size: int,
93
+ num_workers: int
94
+ ) -> torch.utils.data.DataLoader:
95
+ """
96
+ Creates a DataLoader.
97
+
98
+ Args:
99
+ cropped_images_dir: str, path to image crops
100
+ detections_json_path: optional str, path to detections JSON
101
+ img_size: int, resizes smallest side of image to img_size,
102
+ then center-crops to (img_size, img_size)
103
+ batch_size: int, batch size in dataloader
104
+ num_workers: int, # of workers in dataloader
105
+ """
106
+
107
+ crop_files = []
108
+
109
+ if detections_json_path is None:
110
+ # recursively find all files in cropped_images_dir
111
+ for subdir, _, files in os.walk(cropped_images_dir):
112
+ for file_name in files:
113
+ rel_dir = os.path.relpath(subdir, cropped_images_dir)
114
+ rel_file = os.path.join(rel_dir, file_name)
115
+ crop_files.append(rel_file)
116
+
117
+ else:
118
+ # only find crops of images from detections JSON
119
+ print('Loading detections JSON')
120
+ with open(detections_json_path, 'r') as f:
121
+ js = json.load(f)
122
+ detections = {img['file']: img for img in js['images']}
123
+ detector_version = js['info']['detector']
124
+
125
+ for img_file, info_dict in tqdm(detections.items()):
126
+ if 'detections' not in info_dict or info_dict['detections'] is None:
127
+ continue
128
+ for i in range(len(info_dict['detections'])):
129
+ crop_filename = img_file + f'___crop{i:02d}_{detector_version}.jpg'
130
+ crop_path = os.path.join(cropped_images_dir, crop_filename)
131
+ if os.path.exists(crop_path):
132
+ crop_files.append(crop_filename)
133
+
134
+ transform = tv.transforms.Compose([
135
+ # resizes smaller edge to img_size
136
+ tv.transforms.Resize(img_size, interpolation=PIL.Image.BICUBIC),
137
+ tv.transforms.CenterCrop(img_size),
138
+ tv.transforms.ToTensor(),
139
+ tv.transforms.Normalize(mean=train_classifier.MEANS,
140
+ std=train_classifier.STDS, inplace=True)
141
+ ])
142
+
143
+ dataset = SimpleDataset(img_files=crop_files, images_dir=cropped_images_dir,
144
+ transform=transform)
145
+ assert len(dataset) > 0
146
+ loader = torch.utils.data.DataLoader(
147
+ dataset, batch_size=batch_size, num_workers=num_workers,
148
+ pin_memory=True)
149
+ return loader
150
+
151
+
152
+ #%% Main function
153
+
154
+ def main(model_path: str,
155
+ cropped_images_dir: str,
156
+ output_csv_path: str,
157
+ detections_json_path: str | None,
158
+ classifier_categories_json_path: str | None,
159
+ img_size: int,
160
+ batch_size: int,
161
+ num_workers: int,
162
+ device_id: int | None = None) -> None:
163
+
164
+ # Evaluating with accimage is much faster than Pillow or Pillow-SIMD, but accimage
165
+ # is Linux-only.
166
+ try:
167
+ import accimage # noqa
168
+ tv.set_image_backend('accimage')
169
+ except:
170
+ print('Warning: could not start accimage backend (ignore this if you\'re not using Linux)')
171
+
172
+ # create dataset
173
+ print('Creating data loader')
174
+ loader = create_loader(
175
+ cropped_images_dir, detections_json_path=detections_json_path,
176
+ img_size=img_size, batch_size=batch_size, num_workers=num_workers)
177
+
178
+ label_names = None
179
+ if classifier_categories_json_path is not None:
180
+ with open(classifier_categories_json_path, 'r') as f:
181
+ categories = json.load(f)
182
+ label_names = [categories[str(i)] for i in range(len(categories))]
183
+
184
+ # create model
185
+ print('Loading saved model')
186
+ model = torch.jit.load(model_path)
187
+ model, device = train_classifier.prep_device(model, device_id=device_id)
188
+
189
+ test_epoch(model, loader, device=device, label_names=label_names,
190
+ output_csv_path=output_csv_path)
191
+
192
+
193
+ def test_epoch(model: torch.nn.Module,
194
+ loader: torch.utils.data.DataLoader,
195
+ device: torch.device,
196
+ label_names: Sequence[str] | None,
197
+ output_csv_path: str) -> None:
198
+ """
199
+ Runs for 1 epoch.
200
+
201
+ Writes results to the output CSV in batches.
202
+
203
+ Args:
204
+ model: torch.nn.Module
205
+ loader: torch.utils.data.DataLoader
206
+ device: torch.device
207
+ label_names: optional list of str, label names
208
+ output_csv_path: str
209
+ """
210
+
211
+ # set dropout and BN layers to eval mode
212
+ model.eval()
213
+
214
+ header = True
215
+ mode = 'w' # new file on first write
216
+
217
+ with torch.no_grad():
218
+ for inputs, img_files in tqdm(loader):
219
+ inputs = inputs.to(device, non_blocking=True)
220
+ outputs = model(inputs)
221
+ probs = torch.nn.functional.softmax(outputs, dim=1).cpu().numpy()
222
+
223
+ if label_names is None:
224
+ label_names = [str(i) for i in range(probs.shape[1])]
225
+
226
+ df = pd.DataFrame(data=probs, columns=label_names,
227
+ index=pd.Index(img_files, name='path'))
228
+ df.to_csv(output_csv_path, index=True, header=header, mode=mode)
229
+
230
+ if header:
231
+ header = False
232
+ mode = 'a'
233
+
234
+
235
+ #%% Command-line driver
236
+
237
+ def _parse_args() -> argparse.Namespace:
238
+
239
+ parser = argparse.ArgumentParser(
240
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
241
+ description='Run classifier.')
242
+ parser.add_argument(
243
+ 'model',
244
+ help='path to TorchScript compiled model')
245
+ parser.add_argument(
246
+ 'crops_dir',
247
+ help='path to directory containing cropped images')
248
+ parser.add_argument(
249
+ 'output',
250
+ help='path to save CSV file with classifier results (can use .csv.gz '
251
+ 'extension for compression)')
252
+ parser.add_argument(
253
+ '-d', '--detections-json',
254
+ help='path to detections JSON file, used to filter paths within '
255
+ 'crops_dir')
256
+ parser.add_argument(
257
+ '-c', '--classifier-categories',
258
+ help='path to JSON file for classifier categories. If not given, '
259
+ 'classes are numbered "0", "1", "2", ...')
260
+ parser.add_argument(
261
+ '--image-size', type=int, default=224,
262
+ help='size of input image to model, usually 224px, but may be larger '
263
+ 'especially for EfficientNet models')
264
+ parser.add_argument(
265
+ '--batch-size', type=int, default=1,
266
+ help='batch size for evaluating model')
267
+ parser.add_argument(
268
+ '--device', type=int, default=None,
269
+ help='preferred CUDA device')
270
+ parser.add_argument(
271
+ '--num-workers', type=int, default=8,
272
+ help='# of workers for data loading')
273
+ return parser.parse_args()
274
+
275
+
276
+ if __name__ == '__main__':
277
+
278
+ args = _parse_args()
279
+ main(model_path=args.model,
280
+ cropped_images_dir=args.crops_dir,
281
+ output_csv_path=args.output,
282
+ detections_json_path=args.detections_json,
283
+ classifier_categories_json_path=args.classifier_categories,
284
+ img_size=args.image_size,
285
+ batch_size=args.batch_size,
286
+ num_workers=args.num_workers,
287
+ device_id=args.device)
@@ -0,0 +1,110 @@
1
+ """
2
+
3
+ save_mislabeled.py
4
+
5
+ Update the list of known mislabeled images in MegaDB.
6
+
7
+ List of known mislabeled images is stored in Azure Blob Storage.
8
+ * storage account: cameratrapsc
9
+ * container: classifier-training
10
+ * blob: megadb_mislabeled/{dataset}.csv, one file per dataset
11
+
12
+ Each file megadb_mislabeled/{dataset}.csv has two columns:
13
+
14
+ * 'file': str, blob name
15
+
16
+ * 'correct_class': optional str, correct dataset class
17
+
18
+ if empty, indicates that the existing class in MegaDB is inaccurate, but
19
+ the correct class is unknown.
20
+
21
+ This script assumes that the classifier-training container is mounted locally.
22
+
23
+ Takes as input a CSV file (output from Timelapse) with the following columns:
24
+
25
+ * 'File': str, <blob_basename>
26
+ * 'RelativePath': str, <dataset>\<blob_dirname>
27
+ * 'mislabeled': str, values in ['true', 'false']
28
+ * 'correct_class': either empty or str
29
+
30
+ """
31
+
32
+ #%% Imports
33
+
34
+ import argparse
35
+ import os
36
+ import pathlib
37
+
38
+ import pandas as pd
39
+
40
+
41
+ #%% Main function
42
+
43
+ def update_mislabeled_images(container_path: str, input_csv_path: str) -> None:
44
+
45
+ df = pd.read_csv(input_csv_path, index_col=False)
46
+
47
+ # error checking
48
+ assert df['mislabeled'].dtype == bool
49
+
50
+ # any row with 'correct_class' should be marked 'mislabeled'
51
+ tmp = (df['correct_class'].notna() & df['mislabeled']).sum()
52
+ assert df['correct_class'].notna().sum() == tmp
53
+
54
+ # filter to only the mislabeled rows
55
+ df = df[df['mislabeled']].copy()
56
+
57
+ # convert '\' to '/'
58
+ df['RelativePath'] = df['RelativePath'].map(
59
+ lambda p: pathlib.PureWindowsPath(p).as_posix())
60
+ df[['dataset', 'blob_dirname']] = df['RelativePath'].str.split(
61
+ '/', n=1, expand=True)
62
+ df['file'] = df['blob_dirname'] + '/' + df['File']
63
+
64
+ for ds, ds_df in df.groupby('dataset'):
65
+
66
+ sr_path = os.path.join(container_path, 'megadb_mislabeled', f'{ds}.csv')
67
+ if os.path.exists(sr_path):
68
+ old_sr = pd.read_csv(sr_path, index_col='file', squeeze=True)
69
+ else:
70
+ old_sr = pd.Series(index=pd.Index([], name='file'),
71
+ dtype='str', name='correct_class')
72
+
73
+ ds_sr = ds_df.set_index('file', verify_integrity=True)['correct_class']
74
+
75
+ # verify that overlapping indices are the same
76
+ overlap_index = ds_sr.index.intersection(old_sr.index)
77
+ assert ds_sr.loc[overlap_index].equals(old_sr.loc[overlap_index])
78
+
79
+ # "add" any new mislabelings
80
+ new_indices = ds_sr.index.difference(old_sr.index)
81
+ new_sr = pd.concat([old_sr, ds_sr.loc[new_indices]],
82
+ verify_integrity=True)
83
+ new_sr.sort_index(inplace=True)
84
+
85
+ # write out results
86
+ new_sr.to_csv(sr_path, index=True)
87
+
88
+
89
+ #%% Command-line driver
90
+
91
+ def _parse_args() -> argparse.Namespace:
92
+
93
+ parser = argparse.ArgumentParser(
94
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
95
+ description='Merges classification results with Batch Detection API '
96
+ 'outputs.')
97
+ parser.add_argument(
98
+ 'container_path',
99
+ help='path to locally-mounted classifier-training container')
100
+ parser.add_argument(
101
+ 'input_csv',
102
+ help='path to CSV file output by Timelapse')
103
+ return parser.parse_args()
104
+
105
+
106
+ if __name__ == '__main__':
107
+
108
+ args = _parse_args()
109
+ update_mislabeled_images(container_path=args.container_path,
110
+ input_csv_path=args.input_csv)