megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +297 -202
- api/batch_processing/data_preparation/manage_video_batch.py +7 -2
- api/batch_processing/postprocessing/add_max_conf.py +1 -0
- api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
- api/batch_processing/postprocessing/compare_batch_results.py +111 -61
- api/batch_processing/postprocessing/convert_output_format.py +24 -6
- api/batch_processing/postprocessing/load_api_results.py +56 -72
- api/batch_processing/postprocessing/md_to_labelme.py +119 -51
- api/batch_processing/postprocessing/merge_detections.py +30 -5
- api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
- api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
- api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
- api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
- classification/prepare_classification_script.py +191 -191
- data_management/cct_json_utils.py +7 -2
- data_management/coco_to_labelme.py +263 -0
- data_management/coco_to_yolo.py +72 -48
- data_management/databases/integrity_check_json_db.py +75 -64
- data_management/databases/subset_json_db.py +1 -1
- data_management/generate_crops_from_cct.py +1 -1
- data_management/get_image_sizes.py +44 -26
- data_management/importers/animl_results_to_md_results.py +3 -5
- data_management/importers/noaa_seals_2019.py +2 -2
- data_management/importers/zamba_results_to_md_results.py +2 -2
- data_management/labelme_to_coco.py +264 -127
- data_management/labelme_to_yolo.py +96 -53
- data_management/lila/create_lila_blank_set.py +557 -0
- data_management/lila/create_lila_test_set.py +2 -1
- data_management/lila/create_links_to_md_results_files.py +1 -1
- data_management/lila/download_lila_subset.py +138 -45
- data_management/lila/generate_lila_per_image_labels.py +23 -14
- data_management/lila/get_lila_annotation_counts.py +16 -10
- data_management/lila/lila_common.py +15 -42
- data_management/lila/test_lila_metadata_urls.py +116 -0
- data_management/read_exif.py +65 -16
- data_management/remap_coco_categories.py +84 -0
- data_management/resize_coco_dataset.py +14 -31
- data_management/wi_download_csv_to_coco.py +239 -0
- data_management/yolo_output_to_md_output.py +40 -13
- data_management/yolo_to_coco.py +313 -100
- detection/process_video.py +36 -14
- detection/pytorch_detector.py +1 -1
- detection/run_detector.py +73 -18
- detection/run_detector_batch.py +116 -27
- detection/run_inference_with_yolov5_val.py +135 -27
- detection/run_tiled_inference.py +153 -43
- detection/tf_detector.py +2 -1
- detection/video_utils.py +4 -2
- md_utils/ct_utils.py +101 -6
- md_utils/md_tests.py +264 -17
- md_utils/path_utils.py +326 -47
- md_utils/process_utils.py +26 -7
- md_utils/split_locations_into_train_val.py +215 -0
- md_utils/string_utils.py +10 -0
- md_utils/url_utils.py +66 -3
- md_utils/write_html_image_list.py +12 -2
- md_visualization/visualization_utils.py +380 -74
- md_visualization/visualize_db.py +41 -10
- md_visualization/visualize_detector_output.py +185 -104
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
- taxonomy_mapping/map_new_lila_datasets.py +43 -39
- taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
- taxonomy_mapping/preview_lila_taxonomy.py +27 -27
- taxonomy_mapping/species_lookup.py +33 -13
- taxonomy_mapping/taxonomy_csv_checker.py +7 -5
- md_visualization/visualize_megadb.py +0 -183
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
|
@@ -17,6 +17,11 @@
|
|
|
17
17
|
# rather than an output path. All images in the folder blah\foo\bar will end up
|
|
18
18
|
# in a .json file called blah_foo_bar.json.
|
|
19
19
|
#
|
|
20
|
+
# Can also apply a confidence threshold.
|
|
21
|
+
#
|
|
22
|
+
# Can also subset by categories above a threshold (programmatic invocation only, this is
|
|
23
|
+
# not supported at the command line yet).
|
|
24
|
+
#
|
|
20
25
|
###
|
|
21
26
|
#
|
|
22
27
|
# Sample invocations (splitting into multiple json's):
|
|
@@ -65,6 +70,7 @@ from tqdm import tqdm
|
|
|
65
70
|
|
|
66
71
|
from md_utils.ct_utils import args_to_object
|
|
67
72
|
from md_utils.ct_utils import get_max_conf
|
|
73
|
+
from md_utils.ct_utils import invert_dictionary
|
|
68
74
|
|
|
69
75
|
|
|
70
76
|
#%% Helper classes
|
|
@@ -118,6 +124,17 @@ class SubsetJsonDetectorOutputOptions:
|
|
|
118
124
|
# Should we remove failed images?
|
|
119
125
|
remove_failed_images = False
|
|
120
126
|
|
|
127
|
+
# Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
|
|
128
|
+
# (as string-ints) (not names) to thresholds. Removes non-matching detections, does not
|
|
129
|
+
# remove images. Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
|
|
130
|
+
# scenario indeed where you would want to specify both.
|
|
131
|
+
categories_to_keep = None
|
|
132
|
+
|
|
133
|
+
# Either a list of category names (not IDs), or a dictionary mapping category *names* (not IDs) to thresholds.
|
|
134
|
+
# Removes non-matching detections, does not remove images. Not technically mutually exclusize with
|
|
135
|
+
# category_ids_to_keep, but it's an esoteric scenario indeed where you would want to specify both.
|
|
136
|
+
category_names_to_keep = None
|
|
137
|
+
|
|
121
138
|
debug_max_images = -1
|
|
122
139
|
|
|
123
140
|
|
|
@@ -151,7 +168,7 @@ def subset_json_detector_output_by_confidence(data, options):
|
|
|
151
168
|
Remove all detections below options.confidence_threshold, update max confidences accordingly.
|
|
152
169
|
"""
|
|
153
170
|
|
|
154
|
-
if
|
|
171
|
+
if options.confidence_threshold is None:
|
|
155
172
|
return data
|
|
156
173
|
|
|
157
174
|
images_in = data['images']
|
|
@@ -161,10 +178,13 @@ def subset_json_detector_output_by_confidence(data, options):
|
|
|
161
178
|
|
|
162
179
|
n_max_changes = 0
|
|
163
180
|
|
|
164
|
-
#
|
|
165
|
-
for
|
|
181
|
+
# im = images_in[0]
|
|
182
|
+
for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
|
|
166
183
|
|
|
184
|
+
# Always keep failed images; if the caller wants to remove these, they
|
|
185
|
+
# will use remove_failed_images
|
|
167
186
|
if ('detections' not in im) or (im['detections'] is None):
|
|
187
|
+
images_out.append(im)
|
|
168
188
|
continue
|
|
169
189
|
|
|
170
190
|
p_orig = get_max_conf(im)
|
|
@@ -182,7 +202,7 @@ def subset_json_detector_output_by_confidence(data, options):
|
|
|
182
202
|
|
|
183
203
|
# Otherwise find the max confidence
|
|
184
204
|
else:
|
|
185
|
-
p = max(d['conf'] for d in detections)
|
|
205
|
+
p = max([d['conf'] for d in detections])
|
|
186
206
|
|
|
187
207
|
im['detections'] = detections
|
|
188
208
|
|
|
@@ -190,11 +210,13 @@ def subset_json_detector_output_by_confidence(data, options):
|
|
|
190
210
|
if abs(p_orig - p) > 0.00001:
|
|
191
211
|
|
|
192
212
|
# We should only be *lowering* max confidence values (i.e., making them negative)
|
|
193
|
-
assert (p_orig <= 0) or (p < p_orig),
|
|
213
|
+
assert (p_orig <= 0) or (p < p_orig), \
|
|
214
|
+
'Confidence changed from {} to {}'.format(p_orig, p)
|
|
194
215
|
n_max_changes += 1
|
|
195
216
|
|
|
196
217
|
if 'max_detection_conf' in im:
|
|
197
218
|
im['max_detection_conf'] = p
|
|
219
|
+
|
|
198
220
|
images_out.append(im)
|
|
199
221
|
|
|
200
222
|
# ...for each image
|
|
@@ -208,6 +230,107 @@ def subset_json_detector_output_by_confidence(data, options):
|
|
|
208
230
|
# ...subset_json_detector_output_by_confidence()
|
|
209
231
|
|
|
210
232
|
|
|
233
|
+
def subset_json_detector_output_by_categories(data, options):
|
|
234
|
+
"""
|
|
235
|
+
Remove all detections without detections above a threshold for specific categories.
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
# If categories_to_keep is supplied as a list, convert to a dict
|
|
239
|
+
if options.categories_to_keep is not None:
|
|
240
|
+
if not isinstance(options.categories_to_keep, dict):
|
|
241
|
+
dict_categories_to_keep = {}
|
|
242
|
+
for category_id in options.categories_to_keep:
|
|
243
|
+
# Set unspecified thresholds to a silly negative value
|
|
244
|
+
dict_categories_to_keep[category_id] = -100000.0
|
|
245
|
+
options.categories_to_keep = dict_categories_to_keep
|
|
246
|
+
|
|
247
|
+
# If category_names_to_keep is supplied as a list, convert to a dict
|
|
248
|
+
if options.category_names_to_keep is not None:
|
|
249
|
+
if not isinstance(options.category_names_to_keep, dict):
|
|
250
|
+
dict_category_names_to_keep = {}
|
|
251
|
+
for category_name in options.category_names_to_keep:
|
|
252
|
+
# Set unspecified thresholds to a silly negative value
|
|
253
|
+
dict_category_names_to_keep[category_name] = -100000.0
|
|
254
|
+
options.category_names_to_keep = dict_category_names_to_keep
|
|
255
|
+
|
|
256
|
+
category_name_to_category_id = invert_dictionary(data['detection_categories'])
|
|
257
|
+
|
|
258
|
+
# If some categories are supplied as names, convert all to IDs and add to "categories_to_keep"
|
|
259
|
+
if options.category_names_to_keep is not None:
|
|
260
|
+
if options.categories_to_keep is None:
|
|
261
|
+
options.categories_to_keep = {}
|
|
262
|
+
for category_name in options.category_names_to_keep:
|
|
263
|
+
assert category_name in category_name_to_category_id, \
|
|
264
|
+
'Category {} not in detection categories'.format(category_name)
|
|
265
|
+
category_id = category_name_to_category_id[category_name]
|
|
266
|
+
assert category_id not in options.categories_to_keep, \
|
|
267
|
+
'Category {} ({}) specified as both a name and an ID'.format(
|
|
268
|
+
category_name,category_id)
|
|
269
|
+
options.categories_to_keep[category_id] = options.category_names_to_keep[category_name]
|
|
270
|
+
|
|
271
|
+
if options.categories_to_keep is None:
|
|
272
|
+
return data
|
|
273
|
+
|
|
274
|
+
images_in = data['images']
|
|
275
|
+
images_out = []
|
|
276
|
+
|
|
277
|
+
print('Subsetting by categories (keeping {} categories):'.format(
|
|
278
|
+
len(options.categories_to_keep)))
|
|
279
|
+
|
|
280
|
+
for category_id in sorted(list(options.categories_to_keep.keys())):
|
|
281
|
+
if category_id not in data['detection_categories']:
|
|
282
|
+
print('Warning: category ID {} not in category map in this file'.format(category_id))
|
|
283
|
+
else:
|
|
284
|
+
print('{} ({}) (threshold {})'.format(
|
|
285
|
+
category_id,
|
|
286
|
+
data['detection_categories'][category_id],
|
|
287
|
+
options.categories_to_keep[category_id]))
|
|
288
|
+
|
|
289
|
+
n_detections_in = 0
|
|
290
|
+
n_detections_kept = 0
|
|
291
|
+
|
|
292
|
+
# im = images_in[0]
|
|
293
|
+
for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
|
|
294
|
+
|
|
295
|
+
# Always keep failed images; if the caller wants to remove these, they
|
|
296
|
+
# will use remove_failed_images
|
|
297
|
+
if ('detections' not in im) or (im['detections'] is None):
|
|
298
|
+
images_out.append(im)
|
|
299
|
+
continue
|
|
300
|
+
|
|
301
|
+
n_detections_in += len(im['detections'])
|
|
302
|
+
|
|
303
|
+
# Find all matching detections for this image
|
|
304
|
+
detections = []
|
|
305
|
+
for d in im['detections']:
|
|
306
|
+
if (d['category'] in options.categories_to_keep) and \
|
|
307
|
+
(d['conf'] > options.categories_to_keep[d['category']]):
|
|
308
|
+
detections.append(d)
|
|
309
|
+
|
|
310
|
+
im['detections'] = detections
|
|
311
|
+
|
|
312
|
+
if 'max_detection_conf' in im:
|
|
313
|
+
if len(detections) == 0:
|
|
314
|
+
p = 0
|
|
315
|
+
else:
|
|
316
|
+
p = max([d['conf'] for d in detections])
|
|
317
|
+
im['max_detection_conf'] = p
|
|
318
|
+
|
|
319
|
+
n_detections_kept += len(im['detections'])
|
|
320
|
+
|
|
321
|
+
images_out.append(im)
|
|
322
|
+
|
|
323
|
+
# ...for each image
|
|
324
|
+
|
|
325
|
+
data['images'] = images_out
|
|
326
|
+
print('done, kept {} detections (of {})'.format(
|
|
327
|
+
n_detections_kept,n_detections_in))
|
|
328
|
+
|
|
329
|
+
return data
|
|
330
|
+
|
|
331
|
+
# ...subset_json_detector_output_by_categories()
|
|
332
|
+
|
|
333
|
+
|
|
211
334
|
def remove_failed_images(data,options):
|
|
212
335
|
"""
|
|
213
336
|
Removed failed images from [data]
|
|
@@ -399,6 +522,10 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
|
|
|
399
522
|
|
|
400
523
|
data = subset_json_detector_output_by_confidence(data, options)
|
|
401
524
|
|
|
525
|
+
if (options.categories_to_keep is not None) or (options.category_names_to_keep is not None):
|
|
526
|
+
|
|
527
|
+
data = subset_json_detector_output_by_categories(data, options)
|
|
528
|
+
|
|
402
529
|
if not options.split_folders:
|
|
403
530
|
|
|
404
531
|
write_detection_results(data, output_filename, options)
|
|
@@ -1,191 +1,191 @@
|
|
|
1
|
-
########
|
|
2
|
-
#
|
|
3
|
-
# prepare_classification_script.py
|
|
4
|
-
#
|
|
5
|
-
# Notebook-y script used to prepare a series of shell commands to run a classifier
|
|
6
|
-
# (other than MegaClassifier) on a MegaDetector result set.
|
|
7
|
-
#
|
|
8
|
-
# Differs from prepare_classification_script_mc.py only in the final class mapping step.
|
|
9
|
-
#
|
|
10
|
-
########
|
|
11
|
-
|
|
12
|
-
#%% Job options
|
|
13
|
-
|
|
14
|
-
import os
|
|
15
|
-
|
|
16
|
-
organization_name = 'idfg'
|
|
17
|
-
job_name = 'idfg-2022-01-27-EOE2021S_Group6'
|
|
18
|
-
input_filename = 'idfg-2022-01-27-EOE2021S_Group6_detections.filtered_rde_0.60_0.85_30_0.20.json'
|
|
19
|
-
image_base = '/datadrive/idfg/EOE2021S_Group6'
|
|
20
|
-
crop_path = os.path.join(os.path.expanduser('~/crops'),job_name + '_crops')
|
|
21
|
-
device_id = 1
|
|
22
|
-
|
|
23
|
-
working_dir_base = os.path.join(os.path.expanduser('~/postprocessing'),
|
|
24
|
-
organization_name,
|
|
25
|
-
job_name)
|
|
26
|
-
|
|
27
|
-
output_base = os.path.join(working_dir_base,'combined_api_outputs')
|
|
28
|
-
|
|
29
|
-
assert os.path.isdir(working_dir_base)
|
|
30
|
-
assert os.path.isdir(output_base)
|
|
31
|
-
|
|
32
|
-
output_file = os.path.join(working_dir_base,'run_idfgclassifier_' + job_name + '.sh')
|
|
33
|
-
|
|
34
|
-
input_files = [
|
|
35
|
-
os.path.join(
|
|
36
|
-
os.path.expanduser('~/postprocessing'),
|
|
37
|
-
organization_name,
|
|
38
|
-
job_name,
|
|
39
|
-
'combined_api_outputs',
|
|
40
|
-
input_filename
|
|
41
|
-
)
|
|
42
|
-
]
|
|
43
|
-
|
|
44
|
-
for fn in input_files:
|
|
45
|
-
assert os.path.isfile(fn)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
#%% Constants
|
|
49
|
-
|
|
50
|
-
include_cropping = False
|
|
51
|
-
|
|
52
|
-
classifier_base = os.path.expanduser('~/models/camera_traps/idfg_classifier/idfg_classifier_20200905_042558')
|
|
53
|
-
assert os.path.isdir(classifier_base)
|
|
54
|
-
|
|
55
|
-
checkpoint_path = os.path.join(classifier_base,'idfg_classifier_ckpt_14_compiled.pt')
|
|
56
|
-
assert os.path.isfile(checkpoint_path)
|
|
57
|
-
|
|
58
|
-
classifier_categories_path = os.path.join(classifier_base,'label_index.json')
|
|
59
|
-
assert os.path.isfile(classifier_categories_path)
|
|
60
|
-
|
|
61
|
-
classifier_output_suffix = '_idfg_classifier_output.csv.gz'
|
|
62
|
-
final_output_suffix = '_idfgclassifier.json'
|
|
63
|
-
|
|
64
|
-
threshold_str = '0.65'
|
|
65
|
-
n_threads_str = '50'
|
|
66
|
-
image_size_str = '300'
|
|
67
|
-
batch_size_str = '64'
|
|
68
|
-
num_workers_str = '8'
|
|
69
|
-
logdir = working_dir_base
|
|
70
|
-
|
|
71
|
-
classification_threshold_str = '0.05'
|
|
72
|
-
|
|
73
|
-
# This is just passed along to the metadata in the output file, it has no impact
|
|
74
|
-
# on how the classification scripts run.
|
|
75
|
-
typical_classification_threshold_str = '0.75'
|
|
76
|
-
|
|
77
|
-
classifier_name = 'idfg4'
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
#%% Set up environment
|
|
81
|
-
|
|
82
|
-
commands = []
|
|
83
|
-
# commands.append('cd MegaDetector/classification\n')
|
|
84
|
-
# commands.append('conda activate cameratraps-classifier\n')
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
#%% Crop images
|
|
88
|
-
|
|
89
|
-
if include_cropping:
|
|
90
|
-
|
|
91
|
-
commands.append('\n### Cropping ###\n')
|
|
92
|
-
|
|
93
|
-
# fn = input_files[0]
|
|
94
|
-
for fn in input_files:
|
|
95
|
-
|
|
96
|
-
input_file_path = fn
|
|
97
|
-
crop_cmd = ''
|
|
98
|
-
|
|
99
|
-
crop_comment = '\n# Cropping {}\n'.format(fn)
|
|
100
|
-
crop_cmd += crop_comment
|
|
101
|
-
|
|
102
|
-
crop_cmd += "python crop_detections.py \\\n" + \
|
|
103
|
-
input_file_path + ' \\\n' + \
|
|
104
|
-
crop_path + ' \\\n' + \
|
|
105
|
-
'--images-dir "' + image_base + '"' + ' \\\n' + \
|
|
106
|
-
'--threshold "' + threshold_str + '"' + ' \\\n' + \
|
|
107
|
-
'--square-crops ' + ' \\\n' + \
|
|
108
|
-
'--threads "' + n_threads_str + '"' + ' \\\n' + \
|
|
109
|
-
'--logdir "' + logdir + '"' + ' \\\n' + \
|
|
110
|
-
'\n'
|
|
111
|
-
crop_cmd = '{}'.format(crop_cmd)
|
|
112
|
-
commands.append(crop_cmd)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
#%% Run classifier
|
|
116
|
-
|
|
117
|
-
commands.append('\n### Classifying ###\n')
|
|
118
|
-
|
|
119
|
-
# fn = input_files[0]
|
|
120
|
-
for fn in input_files:
|
|
121
|
-
|
|
122
|
-
input_file_path = fn
|
|
123
|
-
classifier_output_path = crop_path + classifier_output_suffix
|
|
124
|
-
|
|
125
|
-
classify_cmd = ''
|
|
126
|
-
|
|
127
|
-
classify_comment = '\n# Classifying {}\n'.format(fn)
|
|
128
|
-
classify_cmd += classify_comment
|
|
129
|
-
|
|
130
|
-
classify_cmd += "python run_classifier.py \\\n" + \
|
|
131
|
-
checkpoint_path + ' \\\n' + \
|
|
132
|
-
crop_path + ' \\\n' + \
|
|
133
|
-
classifier_output_path + ' \\\n' + \
|
|
134
|
-
'--detections-json "' + input_file_path + '"' + ' \\\n' + \
|
|
135
|
-
'--classifier-categories "' + classifier_categories_path + '"' + ' \\\n' + \
|
|
136
|
-
'--image-size "' + image_size_str + '"' + ' \\\n' + \
|
|
137
|
-
'--batch-size "' + batch_size_str + '"' + ' \\\n' + \
|
|
138
|
-
'--num-workers "' + num_workers_str + '"' + ' \\\n'
|
|
139
|
-
|
|
140
|
-
if device_id is not None:
|
|
141
|
-
classify_cmd += '--device {}'.format(device_id)
|
|
142
|
-
|
|
143
|
-
classify_cmd += '\n\n'
|
|
144
|
-
classify_cmd = '{}'.format(classify_cmd)
|
|
145
|
-
commands.append(classify_cmd)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
#%% Merge classification and detection outputs
|
|
149
|
-
|
|
150
|
-
commands.append('\n### Merging ###\n')
|
|
151
|
-
|
|
152
|
-
# fn = input_files[0]
|
|
153
|
-
for fn in input_files:
|
|
154
|
-
|
|
155
|
-
input_file_path = fn
|
|
156
|
-
classifier_output_path = crop_path + classifier_output_suffix
|
|
157
|
-
final_output_path = os.path.join(output_base,
|
|
158
|
-
os.path.basename(classifier_output_path)).\
|
|
159
|
-
replace(classifier_output_suffix,
|
|
160
|
-
final_output_suffix)
|
|
161
|
-
final_output_path = final_output_path.replace('_detections','')
|
|
162
|
-
final_output_path = final_output_path.replace('_crops','')
|
|
163
|
-
|
|
164
|
-
merge_cmd = ''
|
|
165
|
-
|
|
166
|
-
merge_comment = '\n# Merging {}\n'.format(fn)
|
|
167
|
-
merge_cmd += merge_comment
|
|
168
|
-
|
|
169
|
-
merge_cmd += "python merge_classification_detection_output.py \\\n" + \
|
|
170
|
-
classifier_output_path + ' \\\n' + \
|
|
171
|
-
classifier_categories_path + ' \\\n' + \
|
|
172
|
-
'--output-json "' + final_output_path + '"' + ' \\\n' + \
|
|
173
|
-
'--detection-json "' + input_file_path + '"' + ' \\\n' + \
|
|
174
|
-
'--classifier-name "' + classifier_name + '"' + ' \\\n' + \
|
|
175
|
-
'--threshold "' + classification_threshold_str + '"' + ' \\\n' + \
|
|
176
|
-
'--typical-confidence-threshold "' + typical_classification_threshold_str + '"' + ' \\\n' + \
|
|
177
|
-
'\n'
|
|
178
|
-
merge_cmd = '{}'.format(merge_cmd)
|
|
179
|
-
commands.append(merge_cmd)
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
#%% Write everything out
|
|
183
|
-
|
|
184
|
-
with open(output_file,'w') as f:
|
|
185
|
-
for s in commands:
|
|
186
|
-
f.write('{}'.format(s))
|
|
187
|
-
|
|
188
|
-
import stat
|
|
189
|
-
st = os.stat(output_file)
|
|
190
|
-
os.chmod(output_file, st.st_mode | stat.S_IEXEC)
|
|
191
|
-
|
|
1
|
+
########
|
|
2
|
+
#
|
|
3
|
+
# prepare_classification_script.py
|
|
4
|
+
#
|
|
5
|
+
# Notebook-y script used to prepare a series of shell commands to run a classifier
|
|
6
|
+
# (other than MegaClassifier) on a MegaDetector result set.
|
|
7
|
+
#
|
|
8
|
+
# Differs from prepare_classification_script_mc.py only in the final class mapping step.
|
|
9
|
+
#
|
|
10
|
+
########
|
|
11
|
+
|
|
12
|
+
#%% Job options
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
|
|
16
|
+
organization_name = 'idfg'
|
|
17
|
+
job_name = 'idfg-2022-01-27-EOE2021S_Group6'
|
|
18
|
+
input_filename = 'idfg-2022-01-27-EOE2021S_Group6_detections.filtered_rde_0.60_0.85_30_0.20.json'
|
|
19
|
+
image_base = '/datadrive/idfg/EOE2021S_Group6'
|
|
20
|
+
crop_path = os.path.join(os.path.expanduser('~/crops'),job_name + '_crops')
|
|
21
|
+
device_id = 1
|
|
22
|
+
|
|
23
|
+
working_dir_base = os.path.join(os.path.expanduser('~/postprocessing'),
|
|
24
|
+
organization_name,
|
|
25
|
+
job_name)
|
|
26
|
+
|
|
27
|
+
output_base = os.path.join(working_dir_base,'combined_api_outputs')
|
|
28
|
+
|
|
29
|
+
assert os.path.isdir(working_dir_base)
|
|
30
|
+
assert os.path.isdir(output_base)
|
|
31
|
+
|
|
32
|
+
output_file = os.path.join(working_dir_base,'run_idfgclassifier_' + job_name + '.sh')
|
|
33
|
+
|
|
34
|
+
input_files = [
|
|
35
|
+
os.path.join(
|
|
36
|
+
os.path.expanduser('~/postprocessing'),
|
|
37
|
+
organization_name,
|
|
38
|
+
job_name,
|
|
39
|
+
'combined_api_outputs',
|
|
40
|
+
input_filename
|
|
41
|
+
)
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
for fn in input_files:
|
|
45
|
+
assert os.path.isfile(fn)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
#%% Constants
|
|
49
|
+
|
|
50
|
+
include_cropping = False
|
|
51
|
+
|
|
52
|
+
classifier_base = os.path.expanduser('~/models/camera_traps/idfg_classifier/idfg_classifier_20200905_042558')
|
|
53
|
+
assert os.path.isdir(classifier_base)
|
|
54
|
+
|
|
55
|
+
checkpoint_path = os.path.join(classifier_base,'idfg_classifier_ckpt_14_compiled.pt')
|
|
56
|
+
assert os.path.isfile(checkpoint_path)
|
|
57
|
+
|
|
58
|
+
classifier_categories_path = os.path.join(classifier_base,'label_index.json')
|
|
59
|
+
assert os.path.isfile(classifier_categories_path)
|
|
60
|
+
|
|
61
|
+
classifier_output_suffix = '_idfg_classifier_output.csv.gz'
|
|
62
|
+
final_output_suffix = '_idfgclassifier.json'
|
|
63
|
+
|
|
64
|
+
threshold_str = '0.65'
|
|
65
|
+
n_threads_str = '50'
|
|
66
|
+
image_size_str = '300'
|
|
67
|
+
batch_size_str = '64'
|
|
68
|
+
num_workers_str = '8'
|
|
69
|
+
logdir = working_dir_base
|
|
70
|
+
|
|
71
|
+
classification_threshold_str = '0.05'
|
|
72
|
+
|
|
73
|
+
# This is just passed along to the metadata in the output file, it has no impact
|
|
74
|
+
# on how the classification scripts run.
|
|
75
|
+
typical_classification_threshold_str = '0.75'
|
|
76
|
+
|
|
77
|
+
classifier_name = 'idfg4'
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
#%% Set up environment
|
|
81
|
+
|
|
82
|
+
commands = []
|
|
83
|
+
# commands.append('cd MegaDetector/classification\n')
|
|
84
|
+
# commands.append('conda activate cameratraps-classifier\n')
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
#%% Crop images
|
|
88
|
+
|
|
89
|
+
if include_cropping:
|
|
90
|
+
|
|
91
|
+
commands.append('\n### Cropping ###\n')
|
|
92
|
+
|
|
93
|
+
# fn = input_files[0]
|
|
94
|
+
for fn in input_files:
|
|
95
|
+
|
|
96
|
+
input_file_path = fn
|
|
97
|
+
crop_cmd = ''
|
|
98
|
+
|
|
99
|
+
crop_comment = '\n# Cropping {}\n'.format(fn)
|
|
100
|
+
crop_cmd += crop_comment
|
|
101
|
+
|
|
102
|
+
crop_cmd += "python crop_detections.py \\\n" + \
|
|
103
|
+
input_file_path + ' \\\n' + \
|
|
104
|
+
crop_path + ' \\\n' + \
|
|
105
|
+
'--images-dir "' + image_base + '"' + ' \\\n' + \
|
|
106
|
+
'--threshold "' + threshold_str + '"' + ' \\\n' + \
|
|
107
|
+
'--square-crops ' + ' \\\n' + \
|
|
108
|
+
'--threads "' + n_threads_str + '"' + ' \\\n' + \
|
|
109
|
+
'--logdir "' + logdir + '"' + ' \\\n' + \
|
|
110
|
+
'\n'
|
|
111
|
+
crop_cmd = '{}'.format(crop_cmd)
|
|
112
|
+
commands.append(crop_cmd)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
#%% Run classifier
|
|
116
|
+
|
|
117
|
+
commands.append('\n### Classifying ###\n')
|
|
118
|
+
|
|
119
|
+
# fn = input_files[0]
|
|
120
|
+
for fn in input_files:
|
|
121
|
+
|
|
122
|
+
input_file_path = fn
|
|
123
|
+
classifier_output_path = crop_path + classifier_output_suffix
|
|
124
|
+
|
|
125
|
+
classify_cmd = ''
|
|
126
|
+
|
|
127
|
+
classify_comment = '\n# Classifying {}\n'.format(fn)
|
|
128
|
+
classify_cmd += classify_comment
|
|
129
|
+
|
|
130
|
+
classify_cmd += "python run_classifier.py \\\n" + \
|
|
131
|
+
checkpoint_path + ' \\\n' + \
|
|
132
|
+
crop_path + ' \\\n' + \
|
|
133
|
+
classifier_output_path + ' \\\n' + \
|
|
134
|
+
'--detections-json "' + input_file_path + '"' + ' \\\n' + \
|
|
135
|
+
'--classifier-categories "' + classifier_categories_path + '"' + ' \\\n' + \
|
|
136
|
+
'--image-size "' + image_size_str + '"' + ' \\\n' + \
|
|
137
|
+
'--batch-size "' + batch_size_str + '"' + ' \\\n' + \
|
|
138
|
+
'--num-workers "' + num_workers_str + '"' + ' \\\n'
|
|
139
|
+
|
|
140
|
+
if device_id is not None:
|
|
141
|
+
classify_cmd += '--device {}'.format(device_id)
|
|
142
|
+
|
|
143
|
+
classify_cmd += '\n\n'
|
|
144
|
+
classify_cmd = '{}'.format(classify_cmd)
|
|
145
|
+
commands.append(classify_cmd)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
#%% Merge classification and detection outputs
|
|
149
|
+
|
|
150
|
+
commands.append('\n### Merging ###\n')
|
|
151
|
+
|
|
152
|
+
# fn = input_files[0]
|
|
153
|
+
for fn in input_files:
|
|
154
|
+
|
|
155
|
+
input_file_path = fn
|
|
156
|
+
classifier_output_path = crop_path + classifier_output_suffix
|
|
157
|
+
final_output_path = os.path.join(output_base,
|
|
158
|
+
os.path.basename(classifier_output_path)).\
|
|
159
|
+
replace(classifier_output_suffix,
|
|
160
|
+
final_output_suffix)
|
|
161
|
+
final_output_path = final_output_path.replace('_detections','')
|
|
162
|
+
final_output_path = final_output_path.replace('_crops','')
|
|
163
|
+
|
|
164
|
+
merge_cmd = ''
|
|
165
|
+
|
|
166
|
+
merge_comment = '\n# Merging {}\n'.format(fn)
|
|
167
|
+
merge_cmd += merge_comment
|
|
168
|
+
|
|
169
|
+
merge_cmd += "python merge_classification_detection_output.py \\\n" + \
|
|
170
|
+
classifier_output_path + ' \\\n' + \
|
|
171
|
+
classifier_categories_path + ' \\\n' + \
|
|
172
|
+
'--output-json "' + final_output_path + '"' + ' \\\n' + \
|
|
173
|
+
'--detection-json "' + input_file_path + '"' + ' \\\n' + \
|
|
174
|
+
'--classifier-name "' + classifier_name + '"' + ' \\\n' + \
|
|
175
|
+
'--threshold "' + classification_threshold_str + '"' + ' \\\n' + \
|
|
176
|
+
'--typical-confidence-threshold "' + typical_classification_threshold_str + '"' + ' \\\n' + \
|
|
177
|
+
'\n'
|
|
178
|
+
merge_cmd = '{}'.format(merge_cmd)
|
|
179
|
+
commands.append(merge_cmd)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
#%% Write everything out
|
|
183
|
+
|
|
184
|
+
with open(output_file,'w') as f:
|
|
185
|
+
for s in commands:
|
|
186
|
+
f.write('{}'.format(s))
|
|
187
|
+
|
|
188
|
+
import stat
|
|
189
|
+
st = os.stat(output_file)
|
|
190
|
+
os.chmod(output_file, st.st_mode | stat.S_IEXEC)
|
|
191
|
+
|
|
@@ -142,7 +142,8 @@ class IndexedJsonDb:
|
|
|
142
142
|
def __init__(self, json_filename: Union[str, JSONObject],
|
|
143
143
|
b_normalize_paths: bool = False,
|
|
144
144
|
filename_replacements: Optional[Mapping[str, str]] = None,
|
|
145
|
-
b_convert_classes_to_lower: bool = True
|
|
145
|
+
b_convert_classes_to_lower: bool = True,
|
|
146
|
+
b_force_forward_slashes: bool = True):
|
|
146
147
|
"""
|
|
147
148
|
json_filename can also be an existing json db
|
|
148
149
|
"""
|
|
@@ -162,11 +163,15 @@ class IndexedJsonDb:
|
|
|
162
163
|
for c in self.db['categories']:
|
|
163
164
|
c['name'] = c['name'].lower()
|
|
164
165
|
|
|
166
|
+
# Normalize paths to simplify comparisons later
|
|
165
167
|
if b_normalize_paths:
|
|
166
|
-
# Normalize paths to simplify comparisons later
|
|
167
168
|
for im in self.db['images']:
|
|
168
169
|
im['file_name'] = os.path.normpath(im['file_name'])
|
|
169
170
|
|
|
171
|
+
if b_force_forward_slashes:
|
|
172
|
+
for im in self.db['images']:
|
|
173
|
+
im['file_name'] = im['file_name'].replace('\\','/')
|
|
174
|
+
|
|
170
175
|
if filename_replacements is not None:
|
|
171
176
|
for s in filename_replacements:
|
|
172
177
|
# Make custom replacements in filenames, typically used to
|