megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +65 -65
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
- api/batch_processing/postprocessing/compare_batch_results.py +113 -43
- api/batch_processing/postprocessing/convert_output_format.py +41 -16
- api/batch_processing/postprocessing/load_api_results.py +16 -17
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +52 -22
- api/batch_processing/postprocessing/merge_detections.py +14 -14
- api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
- api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +102 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -263
- data_management/coco_to_yolo.py +79 -58
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +62 -24
- data_management/databases/subset_json_db.py +24 -15
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -162
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -158
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +7 -7
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +65 -24
- data_management/labelme_to_yolo.py +8 -8
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +13 -13
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +44 -110
- data_management/lila/generate_lila_per_image_labels.py +55 -42
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +96 -33
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +110 -97
- data_management/remap_coco_categories.py +83 -83
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +30 -23
- data_management/wi_download_csv_to_coco.py +246 -239
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +300 -60
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +179 -113
- detection/run_inference_with_yolov5_val.py +108 -48
- detection/run_tiled_inference.py +111 -40
- detection/tf_detector.py +51 -29
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +228 -68
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -871
- md_utils/path_utils.py +460 -134
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +176 -60
- md_utils/write_html_image_list.py +40 -33
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +597 -291
- md_visualization/visualize_db.py +76 -48
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- megadetector-5.0.8.dist-info/RECORD +0 -205
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
|
@@ -1,228 +1,228 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
#%% Job options
|
|
13
|
-
|
|
14
|
-
import os
|
|
15
|
-
|
|
16
|
-
organization_name = 'idfg'
|
|
17
|
-
job_name = 'idfg-2022-01-27-EOE2021S_Group6'
|
|
18
|
-
input_filename = 'idfg-2022-01-27-EOE2021S_Group6_detections.filtered_rde_0.60_0.85_30_0.20.json'
|
|
19
|
-
image_base = '/datadrive/idfg/EOE2021S_Group6'
|
|
20
|
-
crop_path = os.path.join(os.path.expanduser('~/crops'),job_name + '_crops')
|
|
21
|
-
device_id = 0
|
|
22
|
-
|
|
23
|
-
working_dir_base = os.path.join(os.path.expanduser('~/postprocessing'),
|
|
24
|
-
organization_name,
|
|
25
|
-
job_name)
|
|
26
|
-
|
|
27
|
-
output_base = os.path.join(working_dir_base,'combined_api_outputs')
|
|
28
|
-
|
|
29
|
-
assert os.path.isdir(working_dir_base)
|
|
30
|
-
assert os.path.isdir(output_base)
|
|
31
|
-
|
|
32
|
-
output_file = os.path.join(working_dir_base,'run_megaclassifier_' + job_name + '.sh')
|
|
33
|
-
|
|
34
|
-
input_files = [
|
|
35
|
-
os.path.join(
|
|
36
|
-
os.path.expanduser('~/postprocessing'),
|
|
37
|
-
organization_name,
|
|
38
|
-
job_name,
|
|
39
|
-
'combined_api_outputs',
|
|
40
|
-
input_filename
|
|
41
|
-
)
|
|
42
|
-
]
|
|
43
|
-
|
|
44
|
-
for fn in input_files:
|
|
45
|
-
assert os.path.isfile(fn)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
#%% Constants
|
|
49
|
-
|
|
50
|
-
classifier_base = os.path.expanduser('~/models/camera_traps/megaclassifier/v0.1/')
|
|
51
|
-
assert os.path.isdir(classifier_base)
|
|
52
|
-
|
|
53
|
-
checkpoint_path = os.path.join(classifier_base,'v0.1_efficientnet-b3_compiled.pt')
|
|
54
|
-
assert os.path.isfile(checkpoint_path)
|
|
55
|
-
|
|
56
|
-
classifier_categories_path = os.path.join(classifier_base,'v0.1_index_to_name.json')
|
|
57
|
-
assert os.path.isfile(classifier_categories_path)
|
|
58
|
-
|
|
59
|
-
target_mapping_path = os.path.join(classifier_base,'idfg_to_megaclassifier_labels.json')
|
|
60
|
-
assert os.path.isfile(target_mapping_path)
|
|
61
|
-
|
|
62
|
-
classifier_output_suffix = '_megaclassifier_output.csv.gz'
|
|
63
|
-
final_output_suffix = '_megaclassifier.json'
|
|
64
|
-
|
|
65
|
-
threshold_str = '0.65'
|
|
66
|
-
n_threads_str = '50'
|
|
67
|
-
image_size_str = '300'
|
|
68
|
-
batch_size_str = '64'
|
|
69
|
-
num_workers_str = '8'
|
|
70
|
-
logdir = working_dir_base
|
|
71
|
-
|
|
72
|
-
classification_threshold_str = '0.05'
|
|
73
|
-
|
|
74
|
-
# This is just passed along to the metadata in the output file, it has no impact
|
|
75
|
-
# on how the classification scripts run.
|
|
76
|
-
typical_classification_threshold_str = '0.75'
|
|
77
|
-
|
|
78
|
-
classifier_name = 'megaclassifier_v0.1_efficientnet-b3'
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
#%% Set up environment
|
|
82
|
-
|
|
83
|
-
commands = []
|
|
84
|
-
# commands.append('cd MegaDetector/classification\n')
|
|
85
|
-
# commands.append('conda activate cameratraps-classifier\n')
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
#%% Crop images
|
|
89
|
-
|
|
90
|
-
commands.append('\n### Cropping ###\n')
|
|
91
|
-
|
|
92
|
-
# fn = input_files[0]
|
|
93
|
-
for fn in input_files:
|
|
94
|
-
|
|
95
|
-
input_file_path = fn
|
|
96
|
-
crop_cmd = ''
|
|
97
|
-
|
|
98
|
-
crop_comment = '\n# Cropping {}\n'.format(fn)
|
|
99
|
-
crop_cmd += crop_comment
|
|
100
|
-
|
|
101
|
-
crop_cmd += "python crop_detections.py \\\n" + \
|
|
102
|
-
input_file_path + ' \\\n' + \
|
|
103
|
-
crop_path + ' \\\n' + \
|
|
104
|
-
'--images-dir "' + image_base + '"' + ' \\\n' + \
|
|
105
|
-
'--threshold "' + threshold_str + '"' + ' \\\n' + \
|
|
106
|
-
'--square-crops ' + ' \\\n' + \
|
|
107
|
-
'--threads "' + n_threads_str + '"' + ' \\\n' + \
|
|
108
|
-
'--logdir "' + logdir + '"' + ' \\\n' + \
|
|
109
|
-
'\n'
|
|
110
|
-
crop_cmd = '{}'.format(crop_cmd)
|
|
111
|
-
commands.append(crop_cmd)
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
#%% Run classifier
|
|
115
|
-
|
|
116
|
-
commands.append('\n### Classifying ###\n')
|
|
117
|
-
|
|
118
|
-
# fn = input_files[0]
|
|
119
|
-
for fn in input_files:
|
|
120
|
-
|
|
121
|
-
input_file_path = fn
|
|
122
|
-
classifier_output_path = crop_path + classifier_output_suffix
|
|
123
|
-
|
|
124
|
-
classify_cmd = ''
|
|
125
|
-
|
|
126
|
-
classify_comment = '\n# Classifying {}\n'.format(fn)
|
|
127
|
-
classify_cmd += classify_comment
|
|
128
|
-
|
|
129
|
-
classify_cmd += "python run_classifier.py \\\n" + \
|
|
130
|
-
checkpoint_path + ' \\\n' + \
|
|
131
|
-
crop_path + ' \\\n' + \
|
|
132
|
-
classifier_output_path + ' \\\n' + \
|
|
133
|
-
'--detections-json "' + input_file_path + '"' + ' \\\n' + \
|
|
134
|
-
'--classifier-categories "' + classifier_categories_path + '"' + ' \\\n' + \
|
|
135
|
-
'--image-size "' + image_size_str + '"' + ' \\\n' + \
|
|
136
|
-
'--batch-size "' + batch_size_str + '"' + ' \\\n' + \
|
|
137
|
-
'--num-workers "' + num_workers_str + '"' + ' \\\n'
|
|
138
|
-
|
|
139
|
-
if device_id is not None:
|
|
140
|
-
classify_cmd += '--device {}'.format(device_id)
|
|
141
|
-
|
|
142
|
-
classify_cmd += '\n\n'
|
|
143
|
-
classify_cmd = '{}'.format(classify_cmd)
|
|
144
|
-
commands.append(classify_cmd)
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
#%% Remap classifier outputs
|
|
148
|
-
|
|
149
|
-
commands.append('\n### Remapping ###\n')
|
|
150
|
-
|
|
151
|
-
# fn = input_files[0]
|
|
152
|
-
for fn in input_files:
|
|
153
|
-
|
|
154
|
-
input_file_path = fn
|
|
155
|
-
classifier_output_path = crop_path + classifier_output_suffix
|
|
156
|
-
classifier_output_path_remapped = \
|
|
157
|
-
classifier_output_path.replace(".csv.gz","_remapped.csv.gz")
|
|
158
|
-
assert not (classifier_output_path == classifier_output_path_remapped)
|
|
159
|
-
|
|
160
|
-
output_label_index = classifier_output_path_remapped.replace(
|
|
161
|
-
"_remapped.csv.gz","_label_index_remapped.json")
|
|
162
|
-
|
|
163
|
-
remap_cmd = ''
|
|
164
|
-
|
|
165
|
-
remap_comment = '\n# Remapping {}\n'.format(fn)
|
|
166
|
-
remap_cmd += remap_comment
|
|
167
|
-
|
|
168
|
-
remap_cmd += "python aggregate_classifier_probs.py \\\n" + \
|
|
169
|
-
classifier_output_path + ' \\\n' + \
|
|
170
|
-
'--target-mapping "' + target_mapping_path + '"' + ' \\\n' + \
|
|
171
|
-
'--output-csv "' + classifier_output_path_remapped + '"' + ' \\\n' + \
|
|
172
|
-
'--output-label-index "' + output_label_index + '"' + ' \\\n' + \
|
|
173
|
-
'\n'
|
|
174
|
-
|
|
175
|
-
remap_cmd = '{}'.format(remap_cmd)
|
|
176
|
-
commands.append(remap_cmd)
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
#%% Merge classification and detection outputs
|
|
180
|
-
|
|
181
|
-
commands.append('\n### Merging ###\n')
|
|
182
|
-
|
|
183
|
-
# fn = input_files[0]
|
|
184
|
-
for fn in input_files:
|
|
185
|
-
|
|
186
|
-
input_file_path = fn
|
|
187
|
-
classifier_output_path = crop_path + classifier_output_suffix
|
|
188
|
-
|
|
189
|
-
classifier_output_path_remapped = \
|
|
190
|
-
classifier_output_path.replace(".csv.gz","_remapped.csv.gz")
|
|
191
|
-
|
|
192
|
-
output_label_index = classifier_output_path_remapped.replace(
|
|
193
|
-
"_remapped.csv.gz","_label_index_remapped.json")
|
|
194
|
-
|
|
195
|
-
final_output_path = os.path.join(output_base,
|
|
196
|
-
os.path.basename(classifier_output_path)).\
|
|
197
|
-
replace(classifier_output_suffix,
|
|
198
|
-
final_output_suffix)
|
|
199
|
-
final_output_path = final_output_path.replace('_detections','')
|
|
200
|
-
final_output_path = final_output_path.replace('_crops','')
|
|
201
|
-
|
|
202
|
-
merge_cmd = ''
|
|
203
|
-
|
|
204
|
-
merge_comment = '\n# Merging {}\n'.format(fn)
|
|
205
|
-
merge_cmd += merge_comment
|
|
206
|
-
|
|
207
|
-
merge_cmd += "python merge_classification_detection_output.py \\\n" + \
|
|
208
|
-
classifier_output_path_remapped + ' \\\n' + \
|
|
209
|
-
output_label_index + ' \\\n' + \
|
|
210
|
-
'--output-json "' + final_output_path + '"' + ' \\\n' + \
|
|
211
|
-
'--detection-json "' + input_file_path + '"' + ' \\\n' + \
|
|
212
|
-
'--classifier-name "' + classifier_name + '"' + ' \\\n' + \
|
|
213
|
-
'--threshold "' + classification_threshold_str + '"' + ' \\\n' + \
|
|
214
|
-
'--typical-confidence-threshold "' + typical_classification_threshold_str + '"' + ' \\\n' + \
|
|
215
|
-
'\n'
|
|
216
|
-
merge_cmd = '{}'.format(merge_cmd)
|
|
217
|
-
commands.append(merge_cmd)
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
#%% Write everything out
|
|
221
|
-
|
|
222
|
-
with open(output_file,'w') as f:
|
|
223
|
-
for s in commands:
|
|
224
|
-
f.write('{}'.format(s))
|
|
225
|
-
|
|
226
|
-
import stat
|
|
227
|
-
st = os.stat(output_file)
|
|
228
|
-
os.chmod(output_file, st.st_mode | stat.S_IEXEC)
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
prepare_classification_script_mc.py
|
|
4
|
+
|
|
5
|
+
Notebook-y script used to prepare a series of shell commands to run MegaClassifier
|
|
6
|
+
on a MegaDetector result set.
|
|
7
|
+
|
|
8
|
+
Differs from prepare_classification_script.py only in the final class mapping step.
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
#%% Job options
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
|
|
16
|
+
organization_name = 'idfg'
|
|
17
|
+
job_name = 'idfg-2022-01-27-EOE2021S_Group6'
|
|
18
|
+
input_filename = 'idfg-2022-01-27-EOE2021S_Group6_detections.filtered_rde_0.60_0.85_30_0.20.json'
|
|
19
|
+
image_base = '/datadrive/idfg/EOE2021S_Group6'
|
|
20
|
+
crop_path = os.path.join(os.path.expanduser('~/crops'),job_name + '_crops')
|
|
21
|
+
device_id = 0
|
|
22
|
+
|
|
23
|
+
working_dir_base = os.path.join(os.path.expanduser('~/postprocessing'),
|
|
24
|
+
organization_name,
|
|
25
|
+
job_name)
|
|
26
|
+
|
|
27
|
+
output_base = os.path.join(working_dir_base,'combined_api_outputs')
|
|
28
|
+
|
|
29
|
+
assert os.path.isdir(working_dir_base)
|
|
30
|
+
assert os.path.isdir(output_base)
|
|
31
|
+
|
|
32
|
+
output_file = os.path.join(working_dir_base,'run_megaclassifier_' + job_name + '.sh')
|
|
33
|
+
|
|
34
|
+
input_files = [
|
|
35
|
+
os.path.join(
|
|
36
|
+
os.path.expanduser('~/postprocessing'),
|
|
37
|
+
organization_name,
|
|
38
|
+
job_name,
|
|
39
|
+
'combined_api_outputs',
|
|
40
|
+
input_filename
|
|
41
|
+
)
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
for fn in input_files:
|
|
45
|
+
assert os.path.isfile(fn)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
#%% Constants
|
|
49
|
+
|
|
50
|
+
classifier_base = os.path.expanduser('~/models/camera_traps/megaclassifier/v0.1/')
|
|
51
|
+
assert os.path.isdir(classifier_base)
|
|
52
|
+
|
|
53
|
+
checkpoint_path = os.path.join(classifier_base,'v0.1_efficientnet-b3_compiled.pt')
|
|
54
|
+
assert os.path.isfile(checkpoint_path)
|
|
55
|
+
|
|
56
|
+
classifier_categories_path = os.path.join(classifier_base,'v0.1_index_to_name.json')
|
|
57
|
+
assert os.path.isfile(classifier_categories_path)
|
|
58
|
+
|
|
59
|
+
target_mapping_path = os.path.join(classifier_base,'idfg_to_megaclassifier_labels.json')
|
|
60
|
+
assert os.path.isfile(target_mapping_path)
|
|
61
|
+
|
|
62
|
+
classifier_output_suffix = '_megaclassifier_output.csv.gz'
|
|
63
|
+
final_output_suffix = '_megaclassifier.json'
|
|
64
|
+
|
|
65
|
+
threshold_str = '0.65'
|
|
66
|
+
n_threads_str = '50'
|
|
67
|
+
image_size_str = '300'
|
|
68
|
+
batch_size_str = '64'
|
|
69
|
+
num_workers_str = '8'
|
|
70
|
+
logdir = working_dir_base
|
|
71
|
+
|
|
72
|
+
classification_threshold_str = '0.05'
|
|
73
|
+
|
|
74
|
+
# This is just passed along to the metadata in the output file, it has no impact
|
|
75
|
+
# on how the classification scripts run.
|
|
76
|
+
typical_classification_threshold_str = '0.75'
|
|
77
|
+
|
|
78
|
+
classifier_name = 'megaclassifier_v0.1_efficientnet-b3'
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
#%% Set up environment
|
|
82
|
+
|
|
83
|
+
commands = []
|
|
84
|
+
# commands.append('cd MegaDetector/classification\n')
|
|
85
|
+
# commands.append('conda activate cameratraps-classifier\n')
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
#%% Crop images
|
|
89
|
+
|
|
90
|
+
commands.append('\n### Cropping ###\n')
|
|
91
|
+
|
|
92
|
+
# fn = input_files[0]
|
|
93
|
+
for fn in input_files:
|
|
94
|
+
|
|
95
|
+
input_file_path = fn
|
|
96
|
+
crop_cmd = ''
|
|
97
|
+
|
|
98
|
+
crop_comment = '\n# Cropping {}\n'.format(fn)
|
|
99
|
+
crop_cmd += crop_comment
|
|
100
|
+
|
|
101
|
+
crop_cmd += "python crop_detections.py \\\n" + \
|
|
102
|
+
input_file_path + ' \\\n' + \
|
|
103
|
+
crop_path + ' \\\n' + \
|
|
104
|
+
'--images-dir "' + image_base + '"' + ' \\\n' + \
|
|
105
|
+
'--threshold "' + threshold_str + '"' + ' \\\n' + \
|
|
106
|
+
'--square-crops ' + ' \\\n' + \
|
|
107
|
+
'--threads "' + n_threads_str + '"' + ' \\\n' + \
|
|
108
|
+
'--logdir "' + logdir + '"' + ' \\\n' + \
|
|
109
|
+
'\n'
|
|
110
|
+
crop_cmd = '{}'.format(crop_cmd)
|
|
111
|
+
commands.append(crop_cmd)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
#%% Run classifier
|
|
115
|
+
|
|
116
|
+
commands.append('\n### Classifying ###\n')
|
|
117
|
+
|
|
118
|
+
# fn = input_files[0]
|
|
119
|
+
for fn in input_files:
|
|
120
|
+
|
|
121
|
+
input_file_path = fn
|
|
122
|
+
classifier_output_path = crop_path + classifier_output_suffix
|
|
123
|
+
|
|
124
|
+
classify_cmd = ''
|
|
125
|
+
|
|
126
|
+
classify_comment = '\n# Classifying {}\n'.format(fn)
|
|
127
|
+
classify_cmd += classify_comment
|
|
128
|
+
|
|
129
|
+
classify_cmd += "python run_classifier.py \\\n" + \
|
|
130
|
+
checkpoint_path + ' \\\n' + \
|
|
131
|
+
crop_path + ' \\\n' + \
|
|
132
|
+
classifier_output_path + ' \\\n' + \
|
|
133
|
+
'--detections-json "' + input_file_path + '"' + ' \\\n' + \
|
|
134
|
+
'--classifier-categories "' + classifier_categories_path + '"' + ' \\\n' + \
|
|
135
|
+
'--image-size "' + image_size_str + '"' + ' \\\n' + \
|
|
136
|
+
'--batch-size "' + batch_size_str + '"' + ' \\\n' + \
|
|
137
|
+
'--num-workers "' + num_workers_str + '"' + ' \\\n'
|
|
138
|
+
|
|
139
|
+
if device_id is not None:
|
|
140
|
+
classify_cmd += '--device {}'.format(device_id)
|
|
141
|
+
|
|
142
|
+
classify_cmd += '\n\n'
|
|
143
|
+
classify_cmd = '{}'.format(classify_cmd)
|
|
144
|
+
commands.append(classify_cmd)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
#%% Remap classifier outputs
|
|
148
|
+
|
|
149
|
+
commands.append('\n### Remapping ###\n')
|
|
150
|
+
|
|
151
|
+
# fn = input_files[0]
|
|
152
|
+
for fn in input_files:
|
|
153
|
+
|
|
154
|
+
input_file_path = fn
|
|
155
|
+
classifier_output_path = crop_path + classifier_output_suffix
|
|
156
|
+
classifier_output_path_remapped = \
|
|
157
|
+
classifier_output_path.replace(".csv.gz","_remapped.csv.gz")
|
|
158
|
+
assert not (classifier_output_path == classifier_output_path_remapped)
|
|
159
|
+
|
|
160
|
+
output_label_index = classifier_output_path_remapped.replace(
|
|
161
|
+
"_remapped.csv.gz","_label_index_remapped.json")
|
|
162
|
+
|
|
163
|
+
remap_cmd = ''
|
|
164
|
+
|
|
165
|
+
remap_comment = '\n# Remapping {}\n'.format(fn)
|
|
166
|
+
remap_cmd += remap_comment
|
|
167
|
+
|
|
168
|
+
remap_cmd += "python aggregate_classifier_probs.py \\\n" + \
|
|
169
|
+
classifier_output_path + ' \\\n' + \
|
|
170
|
+
'--target-mapping "' + target_mapping_path + '"' + ' \\\n' + \
|
|
171
|
+
'--output-csv "' + classifier_output_path_remapped + '"' + ' \\\n' + \
|
|
172
|
+
'--output-label-index "' + output_label_index + '"' + ' \\\n' + \
|
|
173
|
+
'\n'
|
|
174
|
+
|
|
175
|
+
remap_cmd = '{}'.format(remap_cmd)
|
|
176
|
+
commands.append(remap_cmd)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
#%% Merge classification and detection outputs
|
|
180
|
+
|
|
181
|
+
commands.append('\n### Merging ###\n')
|
|
182
|
+
|
|
183
|
+
# fn = input_files[0]
|
|
184
|
+
for fn in input_files:
|
|
185
|
+
|
|
186
|
+
input_file_path = fn
|
|
187
|
+
classifier_output_path = crop_path + classifier_output_suffix
|
|
188
|
+
|
|
189
|
+
classifier_output_path_remapped = \
|
|
190
|
+
classifier_output_path.replace(".csv.gz","_remapped.csv.gz")
|
|
191
|
+
|
|
192
|
+
output_label_index = classifier_output_path_remapped.replace(
|
|
193
|
+
"_remapped.csv.gz","_label_index_remapped.json")
|
|
194
|
+
|
|
195
|
+
final_output_path = os.path.join(output_base,
|
|
196
|
+
os.path.basename(classifier_output_path)).\
|
|
197
|
+
replace(classifier_output_suffix,
|
|
198
|
+
final_output_suffix)
|
|
199
|
+
final_output_path = final_output_path.replace('_detections','')
|
|
200
|
+
final_output_path = final_output_path.replace('_crops','')
|
|
201
|
+
|
|
202
|
+
merge_cmd = ''
|
|
203
|
+
|
|
204
|
+
merge_comment = '\n# Merging {}\n'.format(fn)
|
|
205
|
+
merge_cmd += merge_comment
|
|
206
|
+
|
|
207
|
+
merge_cmd += "python merge_classification_detection_output.py \\\n" + \
|
|
208
|
+
classifier_output_path_remapped + ' \\\n' + \
|
|
209
|
+
output_label_index + ' \\\n' + \
|
|
210
|
+
'--output-json "' + final_output_path + '"' + ' \\\n' + \
|
|
211
|
+
'--detection-json "' + input_file_path + '"' + ' \\\n' + \
|
|
212
|
+
'--classifier-name "' + classifier_name + '"' + ' \\\n' + \
|
|
213
|
+
'--threshold "' + classification_threshold_str + '"' + ' \\\n' + \
|
|
214
|
+
'--typical-confidence-threshold "' + typical_classification_threshold_str + '"' + ' \\\n' + \
|
|
215
|
+
'\n'
|
|
216
|
+
merge_cmd = '{}'.format(merge_cmd)
|
|
217
|
+
commands.append(merge_cmd)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
#%% Write everything out
|
|
221
|
+
|
|
222
|
+
with open(output_file,'w') as f:
|
|
223
|
+
for s in commands:
|
|
224
|
+
f.write('{}'.format(s))
|
|
225
|
+
|
|
226
|
+
import stat
|
|
227
|
+
st = os.stat(output_file)
|
|
228
|
+
os.chmod(output_file, st.st_mode | stat.S_IEXEC)
|
classification/run_classifier.py
CHANGED
|
@@ -1,31 +1,21 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# run_classifier.py
|
|
4
|
-
#
|
|
5
|
-
# Run a species classifier.
|
|
6
|
-
#
|
|
7
|
-
# This script is the classifier counterpart to detection/run_tf_detector_batch.py.
|
|
8
|
-
# This script takes as input:
|
|
9
|
-
# 1) a detections JSON file, usually the output of run_tf_detector_batch.py or the
|
|
10
|
-
# output of the Batch API in the "Batch processing API output format"
|
|
11
|
-
# 2) a path to a directory containing crops of bounding boxes from the detections
|
|
12
|
-
# JSON file
|
|
13
|
-
# 3) a path to a PyTorch TorchScript compiled model file
|
|
14
|
-
# 4) (if the model is EfficientNet) an image size
|
|
15
|
-
#
|
|
16
|
-
# By default, this script overwrites the detections JSON file, adding in
|
|
17
|
-
# classification results. To output a new JSON file, use the --output argument.
|
|
18
|
-
#
|
|
19
|
-
########
|
|
1
|
+
"""
|
|
20
2
|
|
|
21
|
-
|
|
3
|
+
run_classifier.py
|
|
4
|
+
|
|
5
|
+
Run a species classifier.
|
|
6
|
+
|
|
7
|
+
This script is the classifier counterpart to detection/run_tf_detector_batch.py.
|
|
8
|
+
This script takes as input:
|
|
9
|
+
1) a detections JSON file, usually the output of run_tf_detector_batch.py or the
|
|
10
|
+
output of the Batch API in the "Batch processing API output format"
|
|
11
|
+
2) a path to a directory containing crops of bounding boxes from the detections
|
|
12
|
+
JSON file
|
|
13
|
+
3) a path to a PyTorch TorchScript compiled model file
|
|
14
|
+
4) (if the model is EfficientNet) an image size
|
|
15
|
+
|
|
16
|
+
By default, this script overwrites the detections JSON file, adding in
|
|
17
|
+
classification results. To output a new JSON file, use the --output argument.
|
|
22
18
|
|
|
23
|
-
"""
|
|
24
|
-
python run_classifier.py \
|
|
25
|
-
detections.json \
|
|
26
|
-
/path/to/crops \
|
|
27
|
-
/path/to/model.pt \
|
|
28
|
-
--image-size 224
|
|
29
19
|
"""
|
|
30
20
|
|
|
31
21
|
#%% Imports
|
|
@@ -49,6 +39,17 @@ from torchvision.datasets.folder import default_loader
|
|
|
49
39
|
from classification import train_classifier
|
|
50
40
|
|
|
51
41
|
|
|
42
|
+
#%% Example usage
|
|
43
|
+
|
|
44
|
+
"""
|
|
45
|
+
python run_classifier.py \
|
|
46
|
+
detections.json \
|
|
47
|
+
/path/to/crops \
|
|
48
|
+
/path/to/model.pt \
|
|
49
|
+
--image-size 224
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
|
|
52
53
|
#%% Classes
|
|
53
54
|
|
|
54
55
|
class SimpleDataset(torch.utils.data.Dataset):
|
|
@@ -1,33 +1,33 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
save_mislabeled.py
|
|
4
|
+
|
|
5
|
+
Update the list of known mislabeled images in MegaDB.
|
|
6
|
+
|
|
7
|
+
List of known mislabeled images is stored in Azure Blob Storage.
|
|
8
|
+
* storage account: cameratrapsc
|
|
9
|
+
* container: classifier-training
|
|
10
|
+
* blob: megadb_mislabeled/{dataset}.csv, one file per dataset
|
|
11
|
+
|
|
12
|
+
Each file megadb_mislabeled/{dataset}.csv has two columns:
|
|
13
|
+
|
|
14
|
+
* 'file': str, blob name
|
|
15
|
+
|
|
16
|
+
* 'correct_class': optional str, correct dataset class
|
|
17
|
+
|
|
18
|
+
if empty, indicates that the existing class in MegaDB is inaccurate, but
|
|
19
|
+
the correct class is unknown.
|
|
20
|
+
|
|
21
|
+
This script assumes that the classifier-training container is mounted locally.
|
|
22
|
+
|
|
23
|
+
Takes as input a CSV file (output from Timelapse) with the following columns:
|
|
24
|
+
|
|
25
|
+
* 'File': str, <blob_basename>
|
|
26
|
+
* 'RelativePath': str, <dataset>\<blob_dirname>
|
|
27
|
+
* 'mislabeled': str, values in ['true', 'false']
|
|
28
|
+
* 'correct_class': either empty or str
|
|
29
|
+
|
|
30
|
+
"""
|
|
31
31
|
|
|
32
32
|
#%% Imports
|
|
33
33
|
|
|
@@ -1,25 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# train_classifier.py
|
|
4
|
-
#
|
|
5
|
-
# Train a EfficientNet or ResNet classifier.
|
|
6
|
-
#
|
|
7
|
-
# Currently the implementation of multi-label multi-class classification is
|
|
8
|
-
# non-functional.
|
|
9
|
-
#
|
|
10
|
-
# During training, start tensorboard from within the classification/ directory:
|
|
11
|
-
# tensorboard --logdir run --bind_all --samples_per_plugin scalars=0,images=0
|
|
12
|
-
#
|
|
13
|
-
########
|
|
1
|
+
"""
|
|
14
2
|
|
|
15
|
-
|
|
3
|
+
train_classifier.py
|
|
4
|
+
|
|
5
|
+
Train a EfficientNet or ResNet classifier.
|
|
6
|
+
|
|
7
|
+
Currently the implementation of multi-label multi-class classification is
|
|
8
|
+
non-functional.
|
|
9
|
+
|
|
10
|
+
During training, start tensorboard from within the classification/ directory:
|
|
11
|
+
tensorboard --logdir run --bind_all --samples_per_plugin scalars=0,images=0
|
|
16
12
|
|
|
17
|
-
"""
|
|
18
|
-
python train_classifier.py run_idfg /ssd/crops_sq \
|
|
19
|
-
-m "efficientnet-b0" --pretrained --finetune --label-weighted \
|
|
20
|
-
--epochs 50 --batch-size 512 --lr 1e-4 \
|
|
21
|
-
--num-workers 12 --seed 123 \
|
|
22
|
-
--logdir run_idfg
|
|
23
13
|
"""
|
|
24
14
|
|
|
25
15
|
#%% Imports and constants
|
|
@@ -50,6 +40,16 @@ from classification.train_utils import (
|
|
|
50
40
|
from md_visualization import plot_utils
|
|
51
41
|
|
|
52
42
|
|
|
43
|
+
#%% Example usage
|
|
44
|
+
|
|
45
|
+
"""
|
|
46
|
+
python train_classifier.py run_idfg /ssd/crops_sq \
|
|
47
|
+
-m "efficientnet-b0" --pretrained --finetune --label-weighted \
|
|
48
|
+
--epochs 50 --batch-size 512 --lr 1e-4 \
|
|
49
|
+
--num-workers 12 --seed 123 \
|
|
50
|
+
--logdir run_idfg
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
53
|
# mean/std values from https://pytorch.org/docs/stable/torchvision/models.html
|
|
54
54
|
MEANS = np.asarray([0.485, 0.456, 0.406])
|
|
55
55
|
STDS = np.asarray([0.229, 0.224, 0.225])
|