megadetector 10.0.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megadetector/__init__.py +0 -0
- megadetector/api/__init__.py +0 -0
- megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
- megadetector/classification/__init__.py +0 -0
- megadetector/classification/aggregate_classifier_probs.py +108 -0
- megadetector/classification/analyze_failed_images.py +227 -0
- megadetector/classification/cache_batchapi_outputs.py +198 -0
- megadetector/classification/create_classification_dataset.py +626 -0
- megadetector/classification/crop_detections.py +516 -0
- megadetector/classification/csv_to_json.py +226 -0
- megadetector/classification/detect_and_crop.py +853 -0
- megadetector/classification/efficientnet/__init__.py +9 -0
- megadetector/classification/efficientnet/model.py +415 -0
- megadetector/classification/efficientnet/utils.py +608 -0
- megadetector/classification/evaluate_model.py +520 -0
- megadetector/classification/identify_mislabeled_candidates.py +152 -0
- megadetector/classification/json_to_azcopy_list.py +63 -0
- megadetector/classification/json_validator.py +696 -0
- megadetector/classification/map_classification_categories.py +276 -0
- megadetector/classification/merge_classification_detection_output.py +509 -0
- megadetector/classification/prepare_classification_script.py +194 -0
- megadetector/classification/prepare_classification_script_mc.py +228 -0
- megadetector/classification/run_classifier.py +287 -0
- megadetector/classification/save_mislabeled.py +110 -0
- megadetector/classification/train_classifier.py +827 -0
- megadetector/classification/train_classifier_tf.py +725 -0
- megadetector/classification/train_utils.py +323 -0
- megadetector/data_management/__init__.py +0 -0
- megadetector/data_management/animl_to_md.py +161 -0
- megadetector/data_management/annotations/__init__.py +0 -0
- megadetector/data_management/annotations/annotation_constants.py +33 -0
- megadetector/data_management/camtrap_dp_to_coco.py +270 -0
- megadetector/data_management/cct_json_utils.py +566 -0
- megadetector/data_management/cct_to_md.py +184 -0
- megadetector/data_management/cct_to_wi.py +293 -0
- megadetector/data_management/coco_to_labelme.py +284 -0
- megadetector/data_management/coco_to_yolo.py +701 -0
- megadetector/data_management/databases/__init__.py +0 -0
- megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
- megadetector/data_management/databases/integrity_check_json_db.py +563 -0
- megadetector/data_management/databases/subset_json_db.py +195 -0
- megadetector/data_management/generate_crops_from_cct.py +200 -0
- megadetector/data_management/get_image_sizes.py +164 -0
- megadetector/data_management/labelme_to_coco.py +559 -0
- megadetector/data_management/labelme_to_yolo.py +349 -0
- megadetector/data_management/lila/__init__.py +0 -0
- megadetector/data_management/lila/create_lila_blank_set.py +556 -0
- megadetector/data_management/lila/create_lila_test_set.py +192 -0
- megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
- megadetector/data_management/lila/download_lila_subset.py +182 -0
- megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
- megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
- megadetector/data_management/lila/get_lila_image_counts.py +112 -0
- megadetector/data_management/lila/lila_common.py +319 -0
- megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
- megadetector/data_management/mewc_to_md.py +344 -0
- megadetector/data_management/ocr_tools.py +873 -0
- megadetector/data_management/read_exif.py +964 -0
- megadetector/data_management/remap_coco_categories.py +195 -0
- megadetector/data_management/remove_exif.py +156 -0
- megadetector/data_management/rename_images.py +194 -0
- megadetector/data_management/resize_coco_dataset.py +665 -0
- megadetector/data_management/speciesnet_to_md.py +41 -0
- megadetector/data_management/wi_download_csv_to_coco.py +247 -0
- megadetector/data_management/yolo_output_to_md_output.py +594 -0
- megadetector/data_management/yolo_to_coco.py +984 -0
- megadetector/data_management/zamba_to_md.py +188 -0
- megadetector/detection/__init__.py +0 -0
- megadetector/detection/change_detection.py +840 -0
- megadetector/detection/process_video.py +479 -0
- megadetector/detection/pytorch_detector.py +1451 -0
- megadetector/detection/run_detector.py +1267 -0
- megadetector/detection/run_detector_batch.py +2172 -0
- megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
- megadetector/detection/run_md_and_speciesnet.py +1604 -0
- megadetector/detection/run_tiled_inference.py +1044 -0
- megadetector/detection/tf_detector.py +209 -0
- megadetector/detection/video_utils.py +1379 -0
- megadetector/postprocessing/__init__.py +0 -0
- megadetector/postprocessing/add_max_conf.py +72 -0
- megadetector/postprocessing/categorize_detections_by_size.py +166 -0
- megadetector/postprocessing/classification_postprocessing.py +1943 -0
- megadetector/postprocessing/combine_batch_outputs.py +249 -0
- megadetector/postprocessing/compare_batch_results.py +2110 -0
- megadetector/postprocessing/convert_output_format.py +403 -0
- megadetector/postprocessing/create_crop_folder.py +629 -0
- megadetector/postprocessing/detector_calibration.py +570 -0
- megadetector/postprocessing/generate_csv_report.py +522 -0
- megadetector/postprocessing/load_api_results.py +223 -0
- megadetector/postprocessing/md_to_coco.py +428 -0
- megadetector/postprocessing/md_to_labelme.py +351 -0
- megadetector/postprocessing/md_to_wi.py +41 -0
- megadetector/postprocessing/merge_detections.py +392 -0
- megadetector/postprocessing/postprocess_batch_results.py +2140 -0
- megadetector/postprocessing/remap_detection_categories.py +226 -0
- megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
- megadetector/postprocessing/separate_detections_into_folders.py +795 -0
- megadetector/postprocessing/subset_json_detector_output.py +964 -0
- megadetector/postprocessing/top_folders_to_bottom.py +238 -0
- megadetector/postprocessing/validate_batch_results.py +332 -0
- megadetector/taxonomy_mapping/__init__.py +0 -0
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
- megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
- megadetector/taxonomy_mapping/simple_image_download.py +231 -0
- megadetector/taxonomy_mapping/species_lookup.py +1008 -0
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
- megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
- megadetector/tests/__init__.py +0 -0
- megadetector/tests/test_nms_synthetic.py +335 -0
- megadetector/utils/__init__.py +0 -0
- megadetector/utils/ct_utils.py +1857 -0
- megadetector/utils/directory_listing.py +199 -0
- megadetector/utils/extract_frames_from_video.py +307 -0
- megadetector/utils/gpu_test.py +125 -0
- megadetector/utils/md_tests.py +2072 -0
- megadetector/utils/path_utils.py +2872 -0
- megadetector/utils/process_utils.py +172 -0
- megadetector/utils/split_locations_into_train_val.py +237 -0
- megadetector/utils/string_utils.py +234 -0
- megadetector/utils/url_utils.py +825 -0
- megadetector/utils/wi_platform_utils.py +968 -0
- megadetector/utils/wi_taxonomy_utils.py +1766 -0
- megadetector/utils/write_html_image_list.py +239 -0
- megadetector/visualization/__init__.py +0 -0
- megadetector/visualization/plot_utils.py +309 -0
- megadetector/visualization/render_images_with_thumbnails.py +243 -0
- megadetector/visualization/visualization_utils.py +1973 -0
- megadetector/visualization/visualize_db.py +630 -0
- megadetector/visualization/visualize_detector_output.py +498 -0
- megadetector/visualization/visualize_video_output.py +705 -0
- megadetector-10.0.15.dist-info/METADATA +115 -0
- megadetector-10.0.15.dist-info/RECORD +147 -0
- megadetector-10.0.15.dist-info/WHEEL +5 -0
- megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
- megadetector-10.0.15.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
prepare_classification_script.py
|
|
4
|
+
|
|
5
|
+
Notebook-y script used to prepare a series of shell commands to run a classifier
|
|
6
|
+
(other than MegaClassifier) on a MegaDetector result set.
|
|
7
|
+
|
|
8
|
+
Differs from prepare_classification_script_mc.py only in the final class mapping step.
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
#%% Job options
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
|
|
16
|
+
def main(): # noqa
|
|
17
|
+
organization_name = 'idfg'
|
|
18
|
+
job_name = 'idfg-2022-01-27-EOE2021S_Group6'
|
|
19
|
+
input_filename = 'idfg-2022-01-27-EOE2021S_Group6_detections.filtered_rde_0.60_0.85_30_0.20.json'
|
|
20
|
+
image_base = '/datadrive/idfg/EOE2021S_Group6'
|
|
21
|
+
crop_path = os.path.join(os.path.expanduser('~/crops'),job_name + '_crops')
|
|
22
|
+
device_id = 1
|
|
23
|
+
|
|
24
|
+
working_dir_base = os.path.join(os.path.expanduser('~/postprocessing'),
|
|
25
|
+
organization_name,
|
|
26
|
+
job_name)
|
|
27
|
+
|
|
28
|
+
output_base = os.path.join(working_dir_base,'combined_api_outputs')
|
|
29
|
+
|
|
30
|
+
assert os.path.isdir(working_dir_base)
|
|
31
|
+
assert os.path.isdir(output_base)
|
|
32
|
+
|
|
33
|
+
output_file = os.path.join(working_dir_base,'run_idfgclassifier_' + job_name + '.sh')
|
|
34
|
+
|
|
35
|
+
input_files = [
|
|
36
|
+
os.path.join(
|
|
37
|
+
os.path.expanduser('~/postprocessing'),
|
|
38
|
+
organization_name,
|
|
39
|
+
job_name,
|
|
40
|
+
'combined_api_outputs',
|
|
41
|
+
input_filename
|
|
42
|
+
)
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
for fn in input_files:
|
|
46
|
+
assert os.path.isfile(fn)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
#%% Constants
|
|
50
|
+
|
|
51
|
+
include_cropping = False
|
|
52
|
+
|
|
53
|
+
classifier_base = os.path.expanduser('~/models/camera_traps/idfg_classifier/idfg_classifier_20200905_042558')
|
|
54
|
+
assert os.path.isdir(classifier_base)
|
|
55
|
+
|
|
56
|
+
checkpoint_path = os.path.join(classifier_base,'idfg_classifier_ckpt_14_compiled.pt')
|
|
57
|
+
assert os.path.isfile(checkpoint_path)
|
|
58
|
+
|
|
59
|
+
classifier_categories_path = os.path.join(classifier_base,'label_index.json')
|
|
60
|
+
assert os.path.isfile(classifier_categories_path)
|
|
61
|
+
|
|
62
|
+
classifier_output_suffix = '_idfg_classifier_output.csv.gz'
|
|
63
|
+
final_output_suffix = '_idfgclassifier.json'
|
|
64
|
+
|
|
65
|
+
threshold_str = '0.65'
|
|
66
|
+
n_threads_str = '50'
|
|
67
|
+
image_size_str = '300'
|
|
68
|
+
batch_size_str = '64'
|
|
69
|
+
num_workers_str = '8'
|
|
70
|
+
logdir = working_dir_base
|
|
71
|
+
|
|
72
|
+
classification_threshold_str = '0.05'
|
|
73
|
+
|
|
74
|
+
# This is just passed along to the metadata in the output file, it has no impact
|
|
75
|
+
# on how the classification scripts run.
|
|
76
|
+
typical_classification_threshold_str = '0.75'
|
|
77
|
+
|
|
78
|
+
classifier_name = 'idfg4'
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
#%% Set up environment
|
|
82
|
+
|
|
83
|
+
commands = []
|
|
84
|
+
# commands.append('cd MegaDetector/classification\n')
|
|
85
|
+
# commands.append('conda activate cameratraps-classifier\n')
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
#%% Crop images
|
|
89
|
+
|
|
90
|
+
if include_cropping:
|
|
91
|
+
|
|
92
|
+
commands.append('\n### Cropping ###\n')
|
|
93
|
+
|
|
94
|
+
# fn = input_files[0]
|
|
95
|
+
for fn in input_files:
|
|
96
|
+
|
|
97
|
+
input_file_path = fn
|
|
98
|
+
crop_cmd = ''
|
|
99
|
+
|
|
100
|
+
crop_comment = '\n# Cropping {}\n'.format(fn)
|
|
101
|
+
crop_cmd += crop_comment
|
|
102
|
+
|
|
103
|
+
crop_cmd += "python crop_detections.py \\\n" + \
|
|
104
|
+
input_file_path + ' \\\n' + \
|
|
105
|
+
crop_path + ' \\\n' + \
|
|
106
|
+
'--images-dir "' + image_base + '"' + ' \\\n' + \
|
|
107
|
+
'--threshold "' + threshold_str + '"' + ' \\\n' + \
|
|
108
|
+
'--square-crops ' + ' \\\n' + \
|
|
109
|
+
'--threads "' + n_threads_str + '"' + ' \\\n' + \
|
|
110
|
+
'--logdir "' + logdir + '"' + ' \\\n' + \
|
|
111
|
+
'\n'
|
|
112
|
+
crop_cmd = '{}'.format(crop_cmd)
|
|
113
|
+
commands.append(crop_cmd)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
#%% Run classifier
|
|
117
|
+
|
|
118
|
+
commands.append('\n### Classifying ###\n')
|
|
119
|
+
|
|
120
|
+
# fn = input_files[0]
|
|
121
|
+
for fn in input_files:
|
|
122
|
+
|
|
123
|
+
input_file_path = fn
|
|
124
|
+
classifier_output_path = crop_path + classifier_output_suffix
|
|
125
|
+
|
|
126
|
+
classify_cmd = ''
|
|
127
|
+
|
|
128
|
+
classify_comment = '\n# Classifying {}\n'.format(fn)
|
|
129
|
+
classify_cmd += classify_comment
|
|
130
|
+
|
|
131
|
+
classify_cmd += "python run_classifier.py \\\n" + \
|
|
132
|
+
checkpoint_path + ' \\\n' + \
|
|
133
|
+
crop_path + ' \\\n' + \
|
|
134
|
+
classifier_output_path + ' \\\n' + \
|
|
135
|
+
'--detections-json "' + input_file_path + '"' + ' \\\n' + \
|
|
136
|
+
'--classifier-categories "' + classifier_categories_path + '"' + ' \\\n' + \
|
|
137
|
+
'--image-size "' + image_size_str + '"' + ' \\\n' + \
|
|
138
|
+
'--batch-size "' + batch_size_str + '"' + ' \\\n' + \
|
|
139
|
+
'--num-workers "' + num_workers_str + '"' + ' \\\n'
|
|
140
|
+
|
|
141
|
+
if device_id is not None:
|
|
142
|
+
classify_cmd += '--device {}'.format(device_id)
|
|
143
|
+
|
|
144
|
+
classify_cmd += '\n\n'
|
|
145
|
+
classify_cmd = '{}'.format(classify_cmd)
|
|
146
|
+
commands.append(classify_cmd)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
#%% Merge classification and detection outputs
|
|
150
|
+
|
|
151
|
+
commands.append('\n### Merging ###\n')
|
|
152
|
+
|
|
153
|
+
# fn = input_files[0]
|
|
154
|
+
for fn in input_files:
|
|
155
|
+
|
|
156
|
+
input_file_path = fn
|
|
157
|
+
classifier_output_path = crop_path + classifier_output_suffix
|
|
158
|
+
final_output_path = os.path.join(output_base,
|
|
159
|
+
os.path.basename(classifier_output_path)).\
|
|
160
|
+
replace(classifier_output_suffix,
|
|
161
|
+
final_output_suffix)
|
|
162
|
+
final_output_path = final_output_path.replace('_detections','')
|
|
163
|
+
final_output_path = final_output_path.replace('_crops','')
|
|
164
|
+
|
|
165
|
+
merge_cmd = ''
|
|
166
|
+
|
|
167
|
+
merge_comment = '\n# Merging {}\n'.format(fn)
|
|
168
|
+
merge_cmd += merge_comment
|
|
169
|
+
|
|
170
|
+
merge_cmd += "python merge_classification_detection_output.py \\\n" + \
|
|
171
|
+
classifier_output_path + ' \\\n' + \
|
|
172
|
+
classifier_categories_path + ' \\\n' + \
|
|
173
|
+
'--output-json "' + final_output_path + '"' + ' \\\n' + \
|
|
174
|
+
'--detection-json "' + input_file_path + '"' + ' \\\n' + \
|
|
175
|
+
'--classifier-name "' + classifier_name + '"' + ' \\\n' + \
|
|
176
|
+
'--threshold "' + classification_threshold_str + '"' + ' \\\n' + \
|
|
177
|
+
'--typical-confidence-threshold "' + typical_classification_threshold_str + '"' + ' \\\n' + \
|
|
178
|
+
'\n'
|
|
179
|
+
merge_cmd = '{}'.format(merge_cmd)
|
|
180
|
+
commands.append(merge_cmd)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
#%% Write everything out
|
|
184
|
+
|
|
185
|
+
with open(output_file,'w') as f:
|
|
186
|
+
for s in commands:
|
|
187
|
+
f.write('{}'.format(s))
|
|
188
|
+
|
|
189
|
+
import stat
|
|
190
|
+
st = os.stat(output_file)
|
|
191
|
+
os.chmod(output_file, st.st_mode | stat.S_IEXEC)
|
|
192
|
+
|
|
193
|
+
if __name__ == '__main__':
|
|
194
|
+
main()
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
prepare_classification_script_mc.py
|
|
4
|
+
|
|
5
|
+
Notebook-y script used to prepare a series of shell commands to run MegaClassifier
|
|
6
|
+
on a MegaDetector result set.
|
|
7
|
+
|
|
8
|
+
Differs from prepare_classification_script.py only in the final class mapping step.
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
#%% Job options
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
|
|
16
|
+
organization_name = 'idfg'
|
|
17
|
+
job_name = 'idfg-2022-01-27-EOE2021S_Group6'
|
|
18
|
+
input_filename = 'idfg-2022-01-27-EOE2021S_Group6_detections.filtered_rde_0.60_0.85_30_0.20.json'
|
|
19
|
+
image_base = '/datadrive/idfg/EOE2021S_Group6'
|
|
20
|
+
crop_path = os.path.join(os.path.expanduser('~/crops'),job_name + '_crops')
|
|
21
|
+
device_id = 0
|
|
22
|
+
|
|
23
|
+
working_dir_base = os.path.join(os.path.expanduser('~/postprocessing'),
|
|
24
|
+
organization_name,
|
|
25
|
+
job_name)
|
|
26
|
+
|
|
27
|
+
output_base = os.path.join(working_dir_base,'combined_api_outputs')
|
|
28
|
+
|
|
29
|
+
assert os.path.isdir(working_dir_base)
|
|
30
|
+
assert os.path.isdir(output_base)
|
|
31
|
+
|
|
32
|
+
output_file = os.path.join(working_dir_base,'run_megaclassifier_' + job_name + '.sh')
|
|
33
|
+
|
|
34
|
+
input_files = [
|
|
35
|
+
os.path.join(
|
|
36
|
+
os.path.expanduser('~/postprocessing'),
|
|
37
|
+
organization_name,
|
|
38
|
+
job_name,
|
|
39
|
+
'combined_api_outputs',
|
|
40
|
+
input_filename
|
|
41
|
+
)
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
for fn in input_files:
|
|
45
|
+
assert os.path.isfile(fn)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
#%% Constants
|
|
49
|
+
|
|
50
|
+
classifier_base = os.path.expanduser('~/models/camera_traps/megaclassifier/v0.1/')
|
|
51
|
+
assert os.path.isdir(classifier_base)
|
|
52
|
+
|
|
53
|
+
checkpoint_path = os.path.join(classifier_base,'v0.1_efficientnet-b3_compiled.pt')
|
|
54
|
+
assert os.path.isfile(checkpoint_path)
|
|
55
|
+
|
|
56
|
+
classifier_categories_path = os.path.join(classifier_base,'v0.1_index_to_name.json')
|
|
57
|
+
assert os.path.isfile(classifier_categories_path)
|
|
58
|
+
|
|
59
|
+
target_mapping_path = os.path.join(classifier_base,'idfg_to_megaclassifier_labels.json')
|
|
60
|
+
assert os.path.isfile(target_mapping_path)
|
|
61
|
+
|
|
62
|
+
classifier_output_suffix = '_megaclassifier_output.csv.gz'
|
|
63
|
+
final_output_suffix = '_megaclassifier.json'
|
|
64
|
+
|
|
65
|
+
threshold_str = '0.65'
|
|
66
|
+
n_threads_str = '50'
|
|
67
|
+
image_size_str = '300'
|
|
68
|
+
batch_size_str = '64'
|
|
69
|
+
num_workers_str = '8'
|
|
70
|
+
logdir = working_dir_base
|
|
71
|
+
|
|
72
|
+
classification_threshold_str = '0.05'
|
|
73
|
+
|
|
74
|
+
# This is just passed along to the metadata in the output file, it has no impact
|
|
75
|
+
# on how the classification scripts run.
|
|
76
|
+
typical_classification_threshold_str = '0.75'
|
|
77
|
+
|
|
78
|
+
classifier_name = 'megaclassifier_v0.1_efficientnet-b3'
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
#%% Set up environment
|
|
82
|
+
|
|
83
|
+
commands = []
|
|
84
|
+
# commands.append('cd MegaDetector/classification\n')
|
|
85
|
+
# commands.append('conda activate cameratraps-classifier\n')
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
#%% Crop images
|
|
89
|
+
|
|
90
|
+
commands.append('\n### Cropping ###\n')
|
|
91
|
+
|
|
92
|
+
# fn = input_files[0]
|
|
93
|
+
for fn in input_files:
|
|
94
|
+
|
|
95
|
+
input_file_path = fn
|
|
96
|
+
crop_cmd = ''
|
|
97
|
+
|
|
98
|
+
crop_comment = '\n# Cropping {}\n'.format(fn)
|
|
99
|
+
crop_cmd += crop_comment
|
|
100
|
+
|
|
101
|
+
crop_cmd += "python crop_detections.py \\\n" + \
|
|
102
|
+
input_file_path + ' \\\n' + \
|
|
103
|
+
crop_path + ' \\\n' + \
|
|
104
|
+
'--images-dir "' + image_base + '"' + ' \\\n' + \
|
|
105
|
+
'--threshold "' + threshold_str + '"' + ' \\\n' + \
|
|
106
|
+
'--square-crops ' + ' \\\n' + \
|
|
107
|
+
'--threads "' + n_threads_str + '"' + ' \\\n' + \
|
|
108
|
+
'--logdir "' + logdir + '"' + ' \\\n' + \
|
|
109
|
+
'\n'
|
|
110
|
+
crop_cmd = '{}'.format(crop_cmd)
|
|
111
|
+
commands.append(crop_cmd)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
#%% Run classifier
|
|
115
|
+
|
|
116
|
+
commands.append('\n### Classifying ###\n')
|
|
117
|
+
|
|
118
|
+
# fn = input_files[0]
|
|
119
|
+
for fn in input_files:
|
|
120
|
+
|
|
121
|
+
input_file_path = fn
|
|
122
|
+
classifier_output_path = crop_path + classifier_output_suffix
|
|
123
|
+
|
|
124
|
+
classify_cmd = ''
|
|
125
|
+
|
|
126
|
+
classify_comment = '\n# Classifying {}\n'.format(fn)
|
|
127
|
+
classify_cmd += classify_comment
|
|
128
|
+
|
|
129
|
+
classify_cmd += "python run_classifier.py \\\n" + \
|
|
130
|
+
checkpoint_path + ' \\\n' + \
|
|
131
|
+
crop_path + ' \\\n' + \
|
|
132
|
+
classifier_output_path + ' \\\n' + \
|
|
133
|
+
'--detections-json "' + input_file_path + '"' + ' \\\n' + \
|
|
134
|
+
'--classifier-categories "' + classifier_categories_path + '"' + ' \\\n' + \
|
|
135
|
+
'--image-size "' + image_size_str + '"' + ' \\\n' + \
|
|
136
|
+
'--batch-size "' + batch_size_str + '"' + ' \\\n' + \
|
|
137
|
+
'--num-workers "' + num_workers_str + '"' + ' \\\n'
|
|
138
|
+
|
|
139
|
+
if device_id is not None:
|
|
140
|
+
classify_cmd += '--device {}'.format(device_id)
|
|
141
|
+
|
|
142
|
+
classify_cmd += '\n\n'
|
|
143
|
+
classify_cmd = '{}'.format(classify_cmd)
|
|
144
|
+
commands.append(classify_cmd)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
#%% Remap classifier outputs
|
|
148
|
+
|
|
149
|
+
commands.append('\n### Remapping ###\n')
|
|
150
|
+
|
|
151
|
+
# fn = input_files[0]
|
|
152
|
+
for fn in input_files:
|
|
153
|
+
|
|
154
|
+
input_file_path = fn
|
|
155
|
+
classifier_output_path = crop_path + classifier_output_suffix
|
|
156
|
+
classifier_output_path_remapped = \
|
|
157
|
+
classifier_output_path.replace(".csv.gz","_remapped.csv.gz")
|
|
158
|
+
assert not (classifier_output_path == classifier_output_path_remapped)
|
|
159
|
+
|
|
160
|
+
output_label_index = classifier_output_path_remapped.replace(
|
|
161
|
+
"_remapped.csv.gz","_label_index_remapped.json")
|
|
162
|
+
|
|
163
|
+
remap_cmd = ''
|
|
164
|
+
|
|
165
|
+
remap_comment = '\n# Remapping {}\n'.format(fn)
|
|
166
|
+
remap_cmd += remap_comment
|
|
167
|
+
|
|
168
|
+
remap_cmd += "python aggregate_classifier_probs.py \\\n" + \
|
|
169
|
+
classifier_output_path + ' \\\n' + \
|
|
170
|
+
'--target-mapping "' + target_mapping_path + '"' + ' \\\n' + \
|
|
171
|
+
'--output-csv "' + classifier_output_path_remapped + '"' + ' \\\n' + \
|
|
172
|
+
'--output-label-index "' + output_label_index + '"' + ' \\\n' + \
|
|
173
|
+
'\n'
|
|
174
|
+
|
|
175
|
+
remap_cmd = '{}'.format(remap_cmd)
|
|
176
|
+
commands.append(remap_cmd)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
#%% Merge classification and detection outputs
|
|
180
|
+
|
|
181
|
+
commands.append('\n### Merging ###\n')
|
|
182
|
+
|
|
183
|
+
# fn = input_files[0]
|
|
184
|
+
for fn in input_files:
|
|
185
|
+
|
|
186
|
+
input_file_path = fn
|
|
187
|
+
classifier_output_path = crop_path + classifier_output_suffix
|
|
188
|
+
|
|
189
|
+
classifier_output_path_remapped = \
|
|
190
|
+
classifier_output_path.replace(".csv.gz","_remapped.csv.gz")
|
|
191
|
+
|
|
192
|
+
output_label_index = classifier_output_path_remapped.replace(
|
|
193
|
+
"_remapped.csv.gz","_label_index_remapped.json")
|
|
194
|
+
|
|
195
|
+
final_output_path = os.path.join(output_base,
|
|
196
|
+
os.path.basename(classifier_output_path)).\
|
|
197
|
+
replace(classifier_output_suffix,
|
|
198
|
+
final_output_suffix)
|
|
199
|
+
final_output_path = final_output_path.replace('_detections','')
|
|
200
|
+
final_output_path = final_output_path.replace('_crops','')
|
|
201
|
+
|
|
202
|
+
merge_cmd = ''
|
|
203
|
+
|
|
204
|
+
merge_comment = '\n# Merging {}\n'.format(fn)
|
|
205
|
+
merge_cmd += merge_comment
|
|
206
|
+
|
|
207
|
+
merge_cmd += "python merge_classification_detection_output.py \\\n" + \
|
|
208
|
+
classifier_output_path_remapped + ' \\\n' + \
|
|
209
|
+
output_label_index + ' \\\n' + \
|
|
210
|
+
'--output-json "' + final_output_path + '"' + ' \\\n' + \
|
|
211
|
+
'--detection-json "' + input_file_path + '"' + ' \\\n' + \
|
|
212
|
+
'--classifier-name "' + classifier_name + '"' + ' \\\n' + \
|
|
213
|
+
'--threshold "' + classification_threshold_str + '"' + ' \\\n' + \
|
|
214
|
+
'--typical-confidence-threshold "' + typical_classification_threshold_str + '"' + ' \\\n' + \
|
|
215
|
+
'\n'
|
|
216
|
+
merge_cmd = '{}'.format(merge_cmd)
|
|
217
|
+
commands.append(merge_cmd)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
#%% Write everything out
|
|
221
|
+
|
|
222
|
+
with open(output_file,'w') as f:
|
|
223
|
+
for s in commands:
|
|
224
|
+
f.write('{}'.format(s))
|
|
225
|
+
|
|
226
|
+
import stat
|
|
227
|
+
st = os.stat(output_file)
|
|
228
|
+
os.chmod(output_file, st.st_mode | stat.S_IEXEC)
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
run_classifier.py
|
|
4
|
+
|
|
5
|
+
Run a species classifier.
|
|
6
|
+
|
|
7
|
+
This script is the classifier counterpart to detection/run_tf_detector_batch.py.
|
|
8
|
+
This script takes as input:
|
|
9
|
+
1) a detections JSON file, usually the output of run_tf_detector_batch.py or the
|
|
10
|
+
output of the Batch API in the "Batch processing API output format"
|
|
11
|
+
2) a path to a directory containing crops of bounding boxes from the detections
|
|
12
|
+
JSON file
|
|
13
|
+
3) a path to a PyTorch TorchScript compiled model file
|
|
14
|
+
4) (if the model is EfficientNet) an image size
|
|
15
|
+
|
|
16
|
+
By default, this script overwrites the detections JSON file, adding in
|
|
17
|
+
classification results. To output a new JSON file, use the --output argument.
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
#%% Imports
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import argparse
|
|
26
|
+
import json
|
|
27
|
+
import os
|
|
28
|
+
|
|
29
|
+
from tqdm import tqdm
|
|
30
|
+
from typing import Any
|
|
31
|
+
from collections.abc import Callable, Sequence
|
|
32
|
+
|
|
33
|
+
import pandas as pd
|
|
34
|
+
import PIL
|
|
35
|
+
import torch
|
|
36
|
+
import torch.utils
|
|
37
|
+
import torchvision as tv
|
|
38
|
+
from torchvision.datasets.folder import default_loader
|
|
39
|
+
|
|
40
|
+
from megadetector.classification import train_classifier
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
#%% Example usage
|
|
44
|
+
|
|
45
|
+
"""
|
|
46
|
+
python run_classifier.py \
|
|
47
|
+
detections.json \
|
|
48
|
+
/path/to/crops \
|
|
49
|
+
/path/to/model.pt \
|
|
50
|
+
--image-size 224
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
#%% Classes
|
|
55
|
+
|
|
56
|
+
class SimpleDataset(torch.utils.data.Dataset):
|
|
57
|
+
"""
|
|
58
|
+
Very simple dataset.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(self, img_files: Sequence[str],
|
|
62
|
+
images_dir: str | None = None,
|
|
63
|
+
transform: Callable[[PIL.Image.Image], Any] | None = None):
|
|
64
|
+
"""Creates a SimpleDataset."""
|
|
65
|
+
self.img_files = img_files
|
|
66
|
+
self.images_dir = images_dir
|
|
67
|
+
self.transform = transform
|
|
68
|
+
|
|
69
|
+
def __getitem__(self, index: int) -> tuple[Any, str]:
|
|
70
|
+
"""
|
|
71
|
+
Returns: tuple, (img, img_file)
|
|
72
|
+
"""
|
|
73
|
+
img_file = self.img_files[index]
|
|
74
|
+
if self.images_dir is not None:
|
|
75
|
+
img_path = os.path.join(self.images_dir, img_file)
|
|
76
|
+
else:
|
|
77
|
+
img_path = img_file
|
|
78
|
+
img = default_loader(img_path)
|
|
79
|
+
if self.transform is not None:
|
|
80
|
+
img = self.transform(img)
|
|
81
|
+
return img, img_file
|
|
82
|
+
|
|
83
|
+
def __len__(self) -> int:
|
|
84
|
+
return len(self.img_files)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
#%% Support functions
|
|
88
|
+
|
|
89
|
+
def create_loader(cropped_images_dir: str,
|
|
90
|
+
detections_json_path: str | None,
|
|
91
|
+
img_size: int,
|
|
92
|
+
batch_size: int,
|
|
93
|
+
num_workers: int
|
|
94
|
+
) -> torch.utils.data.DataLoader:
|
|
95
|
+
"""
|
|
96
|
+
Creates a DataLoader.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
cropped_images_dir: str, path to image crops
|
|
100
|
+
detections_json_path: optional str, path to detections JSON
|
|
101
|
+
img_size: int, resizes smallest side of image to img_size,
|
|
102
|
+
then center-crops to (img_size, img_size)
|
|
103
|
+
batch_size: int, batch size in dataloader
|
|
104
|
+
num_workers: int, # of workers in dataloader
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
crop_files = []
|
|
108
|
+
|
|
109
|
+
if detections_json_path is None:
|
|
110
|
+
# recursively find all files in cropped_images_dir
|
|
111
|
+
for subdir, _, files in os.walk(cropped_images_dir):
|
|
112
|
+
for file_name in files:
|
|
113
|
+
rel_dir = os.path.relpath(subdir, cropped_images_dir)
|
|
114
|
+
rel_file = os.path.join(rel_dir, file_name)
|
|
115
|
+
crop_files.append(rel_file)
|
|
116
|
+
|
|
117
|
+
else:
|
|
118
|
+
# only find crops of images from detections JSON
|
|
119
|
+
print('Loading detections JSON')
|
|
120
|
+
with open(detections_json_path, 'r') as f:
|
|
121
|
+
js = json.load(f)
|
|
122
|
+
detections = {img['file']: img for img in js['images']}
|
|
123
|
+
detector_version = js['info']['detector']
|
|
124
|
+
|
|
125
|
+
for img_file, info_dict in tqdm(detections.items()):
|
|
126
|
+
if 'detections' not in info_dict or info_dict['detections'] is None:
|
|
127
|
+
continue
|
|
128
|
+
for i in range(len(info_dict['detections'])):
|
|
129
|
+
crop_filename = img_file + f'___crop{i:02d}_{detector_version}.jpg'
|
|
130
|
+
crop_path = os.path.join(cropped_images_dir, crop_filename)
|
|
131
|
+
if os.path.exists(crop_path):
|
|
132
|
+
crop_files.append(crop_filename)
|
|
133
|
+
|
|
134
|
+
transform = tv.transforms.Compose([
|
|
135
|
+
# resizes smaller edge to img_size
|
|
136
|
+
tv.transforms.Resize(img_size, interpolation=PIL.Image.BICUBIC),
|
|
137
|
+
tv.transforms.CenterCrop(img_size),
|
|
138
|
+
tv.transforms.ToTensor(),
|
|
139
|
+
tv.transforms.Normalize(mean=train_classifier.MEANS,
|
|
140
|
+
std=train_classifier.STDS, inplace=True)
|
|
141
|
+
])
|
|
142
|
+
|
|
143
|
+
dataset = SimpleDataset(img_files=crop_files, images_dir=cropped_images_dir,
|
|
144
|
+
transform=transform)
|
|
145
|
+
assert len(dataset) > 0
|
|
146
|
+
loader = torch.utils.data.DataLoader(
|
|
147
|
+
dataset, batch_size=batch_size, num_workers=num_workers,
|
|
148
|
+
pin_memory=True)
|
|
149
|
+
return loader
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
#%% Main function
|
|
153
|
+
|
|
154
|
+
def main(model_path: str,
|
|
155
|
+
cropped_images_dir: str,
|
|
156
|
+
output_csv_path: str,
|
|
157
|
+
detections_json_path: str | None,
|
|
158
|
+
classifier_categories_json_path: str | None,
|
|
159
|
+
img_size: int,
|
|
160
|
+
batch_size: int,
|
|
161
|
+
num_workers: int,
|
|
162
|
+
device_id: int | None = None) -> None:
|
|
163
|
+
|
|
164
|
+
# Evaluating with accimage is much faster than Pillow or Pillow-SIMD, but accimage
|
|
165
|
+
# is Linux-only.
|
|
166
|
+
try:
|
|
167
|
+
import accimage # noqa
|
|
168
|
+
tv.set_image_backend('accimage')
|
|
169
|
+
except:
|
|
170
|
+
print('Warning: could not start accimage backend (ignore this if you\'re not using Linux)')
|
|
171
|
+
|
|
172
|
+
# create dataset
|
|
173
|
+
print('Creating data loader')
|
|
174
|
+
loader = create_loader(
|
|
175
|
+
cropped_images_dir, detections_json_path=detections_json_path,
|
|
176
|
+
img_size=img_size, batch_size=batch_size, num_workers=num_workers)
|
|
177
|
+
|
|
178
|
+
label_names = None
|
|
179
|
+
if classifier_categories_json_path is not None:
|
|
180
|
+
with open(classifier_categories_json_path, 'r') as f:
|
|
181
|
+
categories = json.load(f)
|
|
182
|
+
label_names = [categories[str(i)] for i in range(len(categories))]
|
|
183
|
+
|
|
184
|
+
# create model
|
|
185
|
+
print('Loading saved model')
|
|
186
|
+
model = torch.jit.load(model_path)
|
|
187
|
+
model, device = train_classifier.prep_device(model, device_id=device_id)
|
|
188
|
+
|
|
189
|
+
test_epoch(model, loader, device=device, label_names=label_names,
|
|
190
|
+
output_csv_path=output_csv_path)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def test_epoch(model: torch.nn.Module,
|
|
194
|
+
loader: torch.utils.data.DataLoader,
|
|
195
|
+
device: torch.device,
|
|
196
|
+
label_names: Sequence[str] | None,
|
|
197
|
+
output_csv_path: str) -> None:
|
|
198
|
+
"""
|
|
199
|
+
Runs for 1 epoch.
|
|
200
|
+
|
|
201
|
+
Writes results to the output CSV in batches.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
model: torch.nn.Module
|
|
205
|
+
loader: torch.utils.data.DataLoader
|
|
206
|
+
device: torch.device
|
|
207
|
+
label_names: optional list of str, label names
|
|
208
|
+
output_csv_path: str
|
|
209
|
+
"""
|
|
210
|
+
|
|
211
|
+
# set dropout and BN layers to eval mode
|
|
212
|
+
model.eval()
|
|
213
|
+
|
|
214
|
+
header = True
|
|
215
|
+
mode = 'w' # new file on first write
|
|
216
|
+
|
|
217
|
+
with torch.no_grad():
|
|
218
|
+
for inputs, img_files in tqdm(loader):
|
|
219
|
+
inputs = inputs.to(device, non_blocking=True)
|
|
220
|
+
outputs = model(inputs)
|
|
221
|
+
probs = torch.nn.functional.softmax(outputs, dim=1).cpu().numpy()
|
|
222
|
+
|
|
223
|
+
if label_names is None:
|
|
224
|
+
label_names = [str(i) for i in range(probs.shape[1])]
|
|
225
|
+
|
|
226
|
+
df = pd.DataFrame(data=probs, columns=label_names,
|
|
227
|
+
index=pd.Index(img_files, name='path'))
|
|
228
|
+
df.to_csv(output_csv_path, index=True, header=header, mode=mode)
|
|
229
|
+
|
|
230
|
+
if header:
|
|
231
|
+
header = False
|
|
232
|
+
mode = 'a'
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
#%% Command-line driver
|
|
236
|
+
|
|
237
|
+
def _parse_args() -> argparse.Namespace:
|
|
238
|
+
|
|
239
|
+
parser = argparse.ArgumentParser(
|
|
240
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
241
|
+
description='Run classifier.')
|
|
242
|
+
parser.add_argument(
|
|
243
|
+
'model',
|
|
244
|
+
help='path to TorchScript compiled model')
|
|
245
|
+
parser.add_argument(
|
|
246
|
+
'crops_dir',
|
|
247
|
+
help='path to directory containing cropped images')
|
|
248
|
+
parser.add_argument(
|
|
249
|
+
'output',
|
|
250
|
+
help='path to save CSV file with classifier results (can use .csv.gz '
|
|
251
|
+
'extension for compression)')
|
|
252
|
+
parser.add_argument(
|
|
253
|
+
'-d', '--detections-json',
|
|
254
|
+
help='path to detections JSON file, used to filter paths within '
|
|
255
|
+
'crops_dir')
|
|
256
|
+
parser.add_argument(
|
|
257
|
+
'-c', '--classifier-categories',
|
|
258
|
+
help='path to JSON file for classifier categories. If not given, '
|
|
259
|
+
'classes are numbered "0", "1", "2", ...')
|
|
260
|
+
parser.add_argument(
|
|
261
|
+
'--image-size', type=int, default=224,
|
|
262
|
+
help='size of input image to model, usually 224px, but may be larger '
|
|
263
|
+
'especially for EfficientNet models')
|
|
264
|
+
parser.add_argument(
|
|
265
|
+
'--batch-size', type=int, default=1,
|
|
266
|
+
help='batch size for evaluating model')
|
|
267
|
+
parser.add_argument(
|
|
268
|
+
'--device', type=int, default=None,
|
|
269
|
+
help='preferred CUDA device')
|
|
270
|
+
parser.add_argument(
|
|
271
|
+
'--num-workers', type=int, default=8,
|
|
272
|
+
help='# of workers for data loading')
|
|
273
|
+
return parser.parse_args()
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
if __name__ == '__main__':
|
|
277
|
+
|
|
278
|
+
args = _parse_args()
|
|
279
|
+
main(model_path=args.model,
|
|
280
|
+
cropped_images_dir=args.crops_dir,
|
|
281
|
+
output_csv_path=args.output,
|
|
282
|
+
detections_json_path=args.detections_json,
|
|
283
|
+
classifier_categories_json_path=args.classifier_categories,
|
|
284
|
+
img_size=args.image_size,
|
|
285
|
+
batch_size=args.batch_size,
|
|
286
|
+
num_workers=args.num_workers,
|
|
287
|
+
device_id=args.device)
|