megadetector 5.0.20__py3-none-any.whl → 5.0.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/cct_json_utils.py +143 -7
- megadetector/data_management/cct_to_md.py +12 -5
- megadetector/data_management/databases/integrity_check_json_db.py +83 -77
- megadetector/data_management/importers/osu-small-animals-to-json.py +4 -4
- megadetector/data_management/importers/raic_csv_to_md_results.py +416 -0
- megadetector/data_management/importers/zamba_results_to_md_results.py +1 -2
- megadetector/data_management/lila/create_lila_test_set.py +25 -11
- megadetector/data_management/lila/download_lila_subset.py +9 -2
- megadetector/data_management/lila/generate_lila_per_image_labels.py +3 -2
- megadetector/data_management/lila/test_lila_metadata_urls.py +5 -1
- megadetector/data_management/read_exif.py +10 -14
- megadetector/data_management/rename_images.py +1 -1
- megadetector/data_management/yolo_output_to_md_output.py +18 -5
- megadetector/detection/process_video.py +14 -3
- megadetector/detection/pytorch_detector.py +15 -3
- megadetector/detection/run_detector.py +4 -3
- megadetector/detection/run_inference_with_yolov5_val.py +121 -13
- megadetector/detection/video_utils.py +40 -17
- megadetector/postprocessing/classification_postprocessing.py +1 -1
- megadetector/postprocessing/combine_api_outputs.py +1 -1
- megadetector/postprocessing/compare_batch_results.py +931 -142
- megadetector/postprocessing/detector_calibration.py +565 -0
- megadetector/postprocessing/md_to_coco.py +85 -19
- megadetector/postprocessing/postprocess_batch_results.py +32 -21
- megadetector/postprocessing/validate_batch_results.py +174 -64
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -12
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -1
- megadetector/utils/ct_utils.py +64 -2
- megadetector/utils/md_tests.py +15 -13
- megadetector/utils/path_utils.py +153 -37
- megadetector/utils/process_utils.py +9 -3
- megadetector/utils/write_html_image_list.py +21 -6
- megadetector/visualization/visualization_utils.py +329 -102
- megadetector/visualization/visualize_db.py +104 -63
- {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/LICENSE +0 -0
- {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/METADATA +143 -142
- {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/RECORD +40 -39
- {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/WHEEL +1 -1
- {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/top_level.txt +0 -0
- megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
raic_csv_to_md_results.py
|
|
4
|
+
|
|
5
|
+
Converts classification+detection results in the .csv format provided to the Snapshot
|
|
6
|
+
Serengeti program by the RAIC team to the MD results format.
|
|
7
|
+
|
|
8
|
+
The input format is two .csv files:
|
|
9
|
+
|
|
10
|
+
* One with results, with columns [unnamed], filename, category, x_center, y_center,
|
|
11
|
+
width, height, confidence, datetime
|
|
12
|
+
|
|
13
|
+
* One with class IDs and names, with columns CLASS, SPECIES
|
|
14
|
+
|
|
15
|
+
Filenames are relative paths to .txt files, but with slashes replaced by underscores, e.g. this
|
|
16
|
+
file:
|
|
17
|
+
|
|
18
|
+
B04_R1/I__00122.JPG
|
|
19
|
+
|
|
20
|
+
...appears in the .csv file as:
|
|
21
|
+
|
|
22
|
+
B04_R1_I__00122.txt
|
|
23
|
+
|
|
24
|
+
Image coordinates are in absolute floating-point units, with an upper-left origin.
|
|
25
|
+
|
|
26
|
+
Unknowns at the time I'm writing this:
|
|
27
|
+
|
|
28
|
+
* I don't know what the unnamed column is, but it looks like an ID I can safely ignore.
|
|
29
|
+
|
|
30
|
+
* I believe that MegaDetector was run, then a classifier was run, but there is a
|
|
31
|
+
single "confidence" column in the output. I am writing out the results as if they were a
|
|
32
|
+
single multi-class detector. This is suspicious given the lack of a human class, which suggests
|
|
33
|
+
that this is intended to be run in conjunection with MD.
|
|
34
|
+
|
|
35
|
+
* There is no concept of "empty" in this file format, so by default I assume that images with
|
|
36
|
+
no annotations in the .csv file were processed and determine to have no detections above some
|
|
37
|
+
(unknown) threshold.
|
|
38
|
+
|
|
39
|
+
* I'm not currently handling EXIF rotations, as part of the effort to simplify this file
|
|
40
|
+
for conversion to R (see below).
|
|
41
|
+
|
|
42
|
+
Note to self: this file should not take dependencies on other components of the MD
|
|
43
|
+
repo, at the risk of creating some redundancy. I am going to convert this to R,
|
|
44
|
+
which will be easier if it's not using any non-standard libraries. Anything in the
|
|
45
|
+
"interactive driver" cells gets a pass.
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
#%% Imports and constants
|
|
50
|
+
|
|
51
|
+
import os
|
|
52
|
+
import glob
|
|
53
|
+
import json
|
|
54
|
+
import sys
|
|
55
|
+
import argparse
|
|
56
|
+
|
|
57
|
+
import pandas as pd
|
|
58
|
+
from PIL import Image
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
#%% Functions from the MD python package
|
|
62
|
+
|
|
63
|
+
# ...that I'm choosing to copy and paste to facilitate a conversion of this
|
|
64
|
+
# script to R.
|
|
65
|
+
|
|
66
|
+
# Should all be lower-case
|
|
67
|
+
IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png', '.tif', '.tiff', '.bmp')
|
|
68
|
+
|
|
69
|
+
def _is_image_file(s, img_extensions=IMG_EXTENSIONS):
|
|
70
|
+
"""
|
|
71
|
+
Checks a file's extension against a hard-coded set of image file
|
|
72
|
+
extensions. Uses case-insensitive comparison.
|
|
73
|
+
|
|
74
|
+
Does not check whether the file exists, only determines whether the filename
|
|
75
|
+
implies it's an image file.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
s (str): filename to evaluate for image-ness
|
|
79
|
+
img_extensions (list, optional): list of known image file extensions
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
bool: True if [s] appears to be an image file, else False
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
ext = os.path.splitext(s)[1]
|
|
86
|
+
return ext.lower() in img_extensions
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _find_image_strings(strings):
|
|
90
|
+
"""
|
|
91
|
+
Given a list of strings that are potentially image file names, looks for
|
|
92
|
+
strings that actually look like image file names (based on extension).
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
strings (list): list of filenames to check for image-ness
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
list: the subset of [strings] that appear to be image filenames
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
return [s for s in strings if _is_image_file(s)]
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _find_images(dirname,
|
|
105
|
+
recursive=False,
|
|
106
|
+
return_relative_paths=False,
|
|
107
|
+
convert_slashes=True):
|
|
108
|
+
"""
|
|
109
|
+
Finds all files in a directory that look like image file names. Returns
|
|
110
|
+
absolute paths unless return_relative_paths is set. Uses the OS-native
|
|
111
|
+
path separator unless convert_slashes is set, in which case will always
|
|
112
|
+
use '/'.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
dirname (str): the folder to search for images
|
|
116
|
+
recursive (bool, optional): whether to search recursively
|
|
117
|
+
return_relative_paths (str, optional): return paths that are relative
|
|
118
|
+
to [dirname], rather than absolute paths
|
|
119
|
+
convert_slashes (bool, optional): force forward slashes in return values
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
list: list of image filenames found in [dirname]
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
assert os.path.isdir(dirname), '{} is not a folder'.format(dirname)
|
|
126
|
+
|
|
127
|
+
if recursive:
|
|
128
|
+
strings = glob.glob(os.path.join(dirname, '**', '*.*'), recursive=True)
|
|
129
|
+
else:
|
|
130
|
+
strings = glob.glob(os.path.join(dirname, '*.*'))
|
|
131
|
+
|
|
132
|
+
image_files = _find_image_strings(strings)
|
|
133
|
+
|
|
134
|
+
if return_relative_paths:
|
|
135
|
+
image_files = [os.path.relpath(fn,dirname) for fn in image_files]
|
|
136
|
+
|
|
137
|
+
image_files = sorted(image_files)
|
|
138
|
+
|
|
139
|
+
if convert_slashes:
|
|
140
|
+
image_files = [fn.replace('\\', '/') for fn in image_files]
|
|
141
|
+
|
|
142
|
+
return image_files
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
#%% Main conversion function
|
|
146
|
+
|
|
147
|
+
def raic_csv_to_md_results(result_csv_file,
|
|
148
|
+
class_mapping_csv_file,
|
|
149
|
+
image_folder,
|
|
150
|
+
output_file=None,
|
|
151
|
+
unannotated_image_handling='empty'):
|
|
152
|
+
"""
|
|
153
|
+
Converts a pair of .csv files (see file header for details) to MD results format.
|
|
154
|
+
|
|
155
|
+
Currently errors if image filenames are ambiguous, or if any images referred to in
|
|
156
|
+
the results are not available.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
result_csv_file (str): the results file to read (.csv)
|
|
160
|
+
class_mapping_csv_file (str): the class mapping file (.csv)
|
|
161
|
+
image_folder (str): the folder containing all the images referred to in
|
|
162
|
+
[result_csv_file]
|
|
163
|
+
output_file (str, optional): the .json file to which we should write results. Defaults
|
|
164
|
+
to [result_csv_file].json
|
|
165
|
+
unannotated_image_handling (str, optional): can be "empty" (default) to assume
|
|
166
|
+
that images without annotations are empty, "warning", "error", or "skip"
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
str: the output file written, identical to [output_file] if [output_file] was not None
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
# Validate arguments
|
|
173
|
+
assert os.path.isfile(result_csv_file), \
|
|
174
|
+
'Result file {} not found'.format(result_csv_file)
|
|
175
|
+
assert os.path.isfile(class_mapping_csv_file), \
|
|
176
|
+
'Class mapping file {} not found'.format(class_mapping_csv_file)
|
|
177
|
+
assert os.path.isdir(image_folder), \
|
|
178
|
+
'Image folder {} not found'.format(image_folder)
|
|
179
|
+
|
|
180
|
+
if output_file is None:
|
|
181
|
+
output_file = result_csv_file + '.json'
|
|
182
|
+
|
|
183
|
+
image_files_relative = _find_images(image_folder,
|
|
184
|
+
recursive=True,
|
|
185
|
+
return_relative_paths=True,
|
|
186
|
+
convert_slashes=True)
|
|
187
|
+
image_file_base_flattened_to_image_file_relative = {}
|
|
188
|
+
for fn in image_files_relative:
|
|
189
|
+
# Convert, e.g. B04_R1/I__00108.JPG to B04_R1_I__00108
|
|
190
|
+
fn_flattened = fn.replace('/','_')
|
|
191
|
+
fn_flattened_base = os.path.splitext(fn_flattened)[0]
|
|
192
|
+
image_file_base_flattened_to_image_file_relative[fn_flattened_base] = \
|
|
193
|
+
fn
|
|
194
|
+
|
|
195
|
+
# Read the .csv files
|
|
196
|
+
df_results = pd.read_csv(result_csv_file)
|
|
197
|
+
df_class_mapping = pd.read_csv(class_mapping_csv_file)
|
|
198
|
+
|
|
199
|
+
assert 'CLASS' in df_class_mapping.columns and 'SPECIES' in df_class_mapping.columns, \
|
|
200
|
+
'Unexpected column names in class mapping file {}'.format(class_mapping_csv_file)
|
|
201
|
+
|
|
202
|
+
category_id_to_name = {}
|
|
203
|
+
for i_row,row in df_class_mapping.iterrows():
|
|
204
|
+
class_id = int(row['CLASS'])
|
|
205
|
+
assert class_id not in category_id_to_name, \
|
|
206
|
+
'Class ID {} occurs more than once in class mapping file {}'.format(
|
|
207
|
+
class_id,class_mapping_csv_file)
|
|
208
|
+
category_id_to_name[class_id] = row['SPECIES']
|
|
209
|
+
|
|
210
|
+
if len(category_id_to_name) != len(set(category_id_to_name.values())):
|
|
211
|
+
print('Warning: one or more categories are used more than once in class mapping file {}'.format(
|
|
212
|
+
class_mapping_csv_file))
|
|
213
|
+
|
|
214
|
+
# Convert results
|
|
215
|
+
|
|
216
|
+
fn_relative_to_im = {}
|
|
217
|
+
|
|
218
|
+
# i_row = 0; row = df_results.iloc[i_row]
|
|
219
|
+
for i_row,row in df_results.iterrows():
|
|
220
|
+
|
|
221
|
+
# Map the .txt filename base to a relative path
|
|
222
|
+
bn = row['filename']
|
|
223
|
+
assert bn.lower().endswith('.txt')
|
|
224
|
+
bn_no_ext = os.path.splitext(bn)[0]
|
|
225
|
+
assert bn_no_ext in image_file_base_flattened_to_image_file_relative, \
|
|
226
|
+
'No image found for result row {}'.format(row['filename'])
|
|
227
|
+
|
|
228
|
+
image_fn_relative = image_file_base_flattened_to_image_file_relative[bn_no_ext]
|
|
229
|
+
|
|
230
|
+
# Have we seen another detection for this image?
|
|
231
|
+
if image_fn_relative in fn_relative_to_im:
|
|
232
|
+
|
|
233
|
+
im = fn_relative_to_im[image_fn_relative]
|
|
234
|
+
|
|
235
|
+
# If not, load this image so we can read its size
|
|
236
|
+
else:
|
|
237
|
+
|
|
238
|
+
image_fn_abs = os.path.join(image_folder,image_fn_relative)
|
|
239
|
+
image = Image.open(image_fn_abs)
|
|
240
|
+
w = image.size[0]
|
|
241
|
+
h = image.size[1]
|
|
242
|
+
|
|
243
|
+
im = {}
|
|
244
|
+
im['file'] = image_fn_relative
|
|
245
|
+
im['width'] = w
|
|
246
|
+
im['height'] = h
|
|
247
|
+
im['detections'] = []
|
|
248
|
+
im['datetime'] = str(row['datetime'])
|
|
249
|
+
fn_relative_to_im[image_fn_relative] = im
|
|
250
|
+
|
|
251
|
+
# Convert annotation
|
|
252
|
+
x_center_abs = row['x_center']
|
|
253
|
+
y_center_abs = row['y_center']
|
|
254
|
+
box_width_abs = row['width']
|
|
255
|
+
box_height_abs = row['height']
|
|
256
|
+
|
|
257
|
+
# Convert to relative coordinates
|
|
258
|
+
box_left_abs = x_center_abs - (box_width_abs/2.0)
|
|
259
|
+
box_top_abs = y_center_abs - (box_height_abs/2.0)
|
|
260
|
+
bbox_normalized = [box_left_abs/im['width'],
|
|
261
|
+
box_top_abs/im['height'],
|
|
262
|
+
box_width_abs/im['width'],
|
|
263
|
+
box_height_abs/im['height']]
|
|
264
|
+
|
|
265
|
+
category_id = str(int(row['category']))
|
|
266
|
+
confidence = row['confidence']
|
|
267
|
+
assert isinstance(confidence,float) and confidence <= 1.0 and confidence >= 0.0
|
|
268
|
+
|
|
269
|
+
det = {}
|
|
270
|
+
im['detections'].append(det)
|
|
271
|
+
det['category'] = category_id
|
|
272
|
+
det['conf'] = confidence
|
|
273
|
+
det['bbox'] = bbox_normalized
|
|
274
|
+
|
|
275
|
+
# ...for each row
|
|
276
|
+
|
|
277
|
+
n_empty_images = 0
|
|
278
|
+
|
|
279
|
+
# Handle images without annotations
|
|
280
|
+
for fn_relative in image_files_relative:
|
|
281
|
+
|
|
282
|
+
if fn_relative not in fn_relative_to_im:
|
|
283
|
+
if unannotated_image_handling == 'empty':
|
|
284
|
+
im = {}
|
|
285
|
+
im['file'] = fn_relative
|
|
286
|
+
im['detections'] = []
|
|
287
|
+
fn_relative_to_im[fn_relative] = im
|
|
288
|
+
n_empty_images += 1
|
|
289
|
+
# Don't bother to read width and height here
|
|
290
|
+
elif unannotated_image_handling == 'warning':
|
|
291
|
+
print('Warning: image {} is not represented in the .csv results file'.format(fn_relative))
|
|
292
|
+
elif unannotated_image_handling == 'error':
|
|
293
|
+
raise ValueError('Image {} is not represented in the .csv results file'.format(fn_relative))
|
|
294
|
+
elif unannotated_image_handling == 'skip':
|
|
295
|
+
continue
|
|
296
|
+
|
|
297
|
+
# ...for each image file
|
|
298
|
+
|
|
299
|
+
if n_empty_images > 0:
|
|
300
|
+
print('Warning: assuming {} of {} images without annotations are empty'.format(
|
|
301
|
+
n_empty_images,len(image_files_relative)))
|
|
302
|
+
|
|
303
|
+
images = list(fn_relative_to_im.values())
|
|
304
|
+
|
|
305
|
+
# The MD output format uses string-ints for category IDs, right now we have ints
|
|
306
|
+
detection_categories = {}
|
|
307
|
+
for category_id_int in category_id_to_name:
|
|
308
|
+
detection_categories[str(category_id_int)] = category_id_to_name[category_id_int]
|
|
309
|
+
|
|
310
|
+
info = {}
|
|
311
|
+
info['format_version'] = '1.4'
|
|
312
|
+
info['detector'] = 'RAIC .csv converter'
|
|
313
|
+
|
|
314
|
+
d = {}
|
|
315
|
+
d['images'] = images
|
|
316
|
+
d['detection_categories'] = detection_categories
|
|
317
|
+
d['info'] = info
|
|
318
|
+
|
|
319
|
+
with open(output_file,'w') as f:
|
|
320
|
+
json.dump(d,f,indent=1)
|
|
321
|
+
|
|
322
|
+
return output_file
|
|
323
|
+
|
|
324
|
+
# ...def raic_csv_to_md_results(...)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
#%% Interactive driver
|
|
328
|
+
|
|
329
|
+
if False:
|
|
330
|
+
|
|
331
|
+
pass
|
|
332
|
+
|
|
333
|
+
#%% Test conversion
|
|
334
|
+
|
|
335
|
+
base_folder = r'G:\temp\S24_B04_R1_output_annotations_for_Dan'
|
|
336
|
+
result_csv_file = os.path.join(base_folder,'S24_B04_R1_output_annotations_for_Dan.csv')
|
|
337
|
+
class_mapping_csv_file = os.path.join(base_folder,'categories_key.csv')
|
|
338
|
+
|
|
339
|
+
# This is wrong, B04_R1 has to be part of the image paths
|
|
340
|
+
# image_folder = os.path.join(base_folder,'B04_R1')
|
|
341
|
+
|
|
342
|
+
image_folder = base_folder
|
|
343
|
+
|
|
344
|
+
output_file = None
|
|
345
|
+
unannotated_image_handling='empty'
|
|
346
|
+
|
|
347
|
+
output_file = raic_csv_to_md_results(result_csv_file=result_csv_file,
|
|
348
|
+
class_mapping_csv_file=class_mapping_csv_file,
|
|
349
|
+
image_folder=image_folder,
|
|
350
|
+
output_file=output_file,
|
|
351
|
+
unannotated_image_handling=unannotated_image_handling)
|
|
352
|
+
|
|
353
|
+
#%% Validate results file
|
|
354
|
+
|
|
355
|
+
from megadetector.postprocessing.validate_batch_results import \
|
|
356
|
+
ValidateBatchResultsOptions, validate_batch_results
|
|
357
|
+
|
|
358
|
+
validation_options = ValidateBatchResultsOptions()
|
|
359
|
+
validation_options.check_image_existence = True
|
|
360
|
+
validation_options.relative_path_base = image_folder
|
|
361
|
+
validation_options.return_data = True
|
|
362
|
+
|
|
363
|
+
results = validate_batch_results(output_file,validation_options)
|
|
364
|
+
assert len(results['validation_results']['errors']) == 0
|
|
365
|
+
assert len(results['validation_results']['warnings']) == 0
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
#%% Preview results
|
|
369
|
+
|
|
370
|
+
from megadetector.postprocessing.postprocess_batch_results import \
|
|
371
|
+
PostProcessingOptions, process_batch_results
|
|
372
|
+
|
|
373
|
+
postprocessing_options = PostProcessingOptions()
|
|
374
|
+
|
|
375
|
+
postprocessing_options.md_results_file = output_file
|
|
376
|
+
postprocessing_options.output_dir = r'g:\temp\serengeti-conversion-preview'
|
|
377
|
+
postprocessing_options.image_base_dir = image_folder
|
|
378
|
+
postprocessing_options.confidence_threshold = 0.2
|
|
379
|
+
postprocessing_options.num_images_to_sample = None
|
|
380
|
+
postprocessing_options.viz_target_width = 1280
|
|
381
|
+
postprocessing_options.line_thickness = 4
|
|
382
|
+
postprocessing_options.parallelize_rendering_n_cores = 10
|
|
383
|
+
postprocessing_options.parallelize_rendering_with_threads = True
|
|
384
|
+
|
|
385
|
+
postprocessing_results = process_batch_results(postprocessing_options)
|
|
386
|
+
|
|
387
|
+
from megadetector.utils.path_utils import open_file
|
|
388
|
+
open_file(postprocessing_results.output_html_file)
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
#%% Command-line driver
|
|
392
|
+
|
|
393
|
+
def main():
|
|
394
|
+
|
|
395
|
+
parser = argparse.ArgumentParser()
|
|
396
|
+
parser.add_argument('result_csv_file', type=str,
|
|
397
|
+
help='csv file containing AI results')
|
|
398
|
+
parser.add_argument('class_mapping_csv_file', type=str,
|
|
399
|
+
help='csv file containing class mappings (with columns CLASS, SPECIES)')
|
|
400
|
+
parser.add_argument('image_folder', type=str,
|
|
401
|
+
help='folder containing the images referred to in [result_csv_file]')
|
|
402
|
+
parser.add_argument('--output_file', type=str, default=None,
|
|
403
|
+
help='.json file to which we should write results (defaults to [result_csv_file].json)')
|
|
404
|
+
|
|
405
|
+
if len(sys.argv[1:])==0:
|
|
406
|
+
parser.print_help()
|
|
407
|
+
parser.exit()
|
|
408
|
+
|
|
409
|
+
args = parser.parse_args()
|
|
410
|
+
raic_csv_to_md_results(result_csv_file=args.result_csv_file,
|
|
411
|
+
class_mapping_csv_file=args.class_mapping_csv_file,
|
|
412
|
+
image_folder=args.image_folder,
|
|
413
|
+
output_file=args.output_file)
|
|
414
|
+
|
|
415
|
+
if __name__ == '__main__':
|
|
416
|
+
main()
|
|
@@ -14,8 +14,7 @@
|
|
|
14
14
|
corrected_label
|
|
15
15
|
|
|
16
16
|
Because the MD results file fundamentally stores detections, what we'll
|
|
17
|
-
actually do is
|
|
18
|
-
coordinates are not currently used in Timelapse video video anyway.
|
|
17
|
+
actually do is create bogus detections that fill the entire image.
|
|
19
18
|
|
|
20
19
|
There is no special handling of empty/blank categories; because these results are
|
|
21
20
|
based on a classifier, rather than a detector (where "blank" would be the absence of
|
|
@@ -16,8 +16,6 @@ import random
|
|
|
16
16
|
from megadetector.data_management.lila.lila_common import \
|
|
17
17
|
read_lila_metadata, read_metadata_file_for_dataset
|
|
18
18
|
|
|
19
|
-
from megadetector.utils.url_utils import download_url
|
|
20
|
-
|
|
21
19
|
n_empty_images_per_dataset = 1
|
|
22
20
|
n_non_empty_images_per_dataset = 1
|
|
23
21
|
|
|
@@ -48,6 +46,8 @@ for ds_name in metadata_table.keys():
|
|
|
48
46
|
|
|
49
47
|
#%% Choose images from each dataset
|
|
50
48
|
|
|
49
|
+
# Takes ~60 seconds
|
|
50
|
+
|
|
51
51
|
# ds_name = (list(metadata_table.keys()))[0]
|
|
52
52
|
for ds_name in metadata_table.keys():
|
|
53
53
|
|
|
@@ -102,10 +102,12 @@ for ds_name in metadata_table.keys():
|
|
|
102
102
|
|
|
103
103
|
#%% Convert to URLs
|
|
104
104
|
|
|
105
|
+
preferred_cloud = 'gcp'
|
|
106
|
+
|
|
105
107
|
# ds_name = (list(metadata_table.keys()))[0]
|
|
106
108
|
for ds_name in metadata_table.keys():
|
|
107
109
|
|
|
108
|
-
base_url = metadata_table[ds_name]['
|
|
110
|
+
base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
|
|
109
111
|
assert not base_url.endswith('/')
|
|
110
112
|
|
|
111
113
|
# Retrieve image file names
|
|
@@ -123,14 +125,14 @@ for ds_name in metadata_table.keys():
|
|
|
123
125
|
# ...for each dataset
|
|
124
126
|
|
|
125
127
|
|
|
126
|
-
#%% Download
|
|
128
|
+
#%% Download image files (prep)
|
|
129
|
+
|
|
130
|
+
url_to_target_file = {}
|
|
127
131
|
|
|
128
|
-
# TODO: trivially parallelizable
|
|
129
|
-
#
|
|
130
132
|
# ds_name = (list(metadata_table.keys()))[0]
|
|
131
133
|
for ds_name in metadata_table.keys():
|
|
132
134
|
|
|
133
|
-
base_url = metadata_table[ds_name]['
|
|
135
|
+
base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
|
|
134
136
|
assert not base_url.endswith('/')
|
|
135
137
|
base_url += '/'
|
|
136
138
|
|
|
@@ -142,11 +144,23 @@ for ds_name in metadata_table.keys():
|
|
|
142
144
|
assert base_url in url
|
|
143
145
|
output_file_relative = ds_name.lower().replace(' ','_') + '_' + url.replace(base_url,'').replace('/','_').replace('\\','_')
|
|
144
146
|
output_file_absolute = os.path.join(output_dir,output_file_relative)
|
|
145
|
-
|
|
146
|
-
download_url(url, destination_filename=output_file_absolute, force_download=False, verbose=True)
|
|
147
|
-
except Exception as e:
|
|
148
|
-
print('\n*** Error downloading {} ***\n{}'.format(url,str(e)))
|
|
147
|
+
url_to_target_file[url] = output_file_absolute
|
|
149
148
|
|
|
150
149
|
# ...for each url
|
|
151
150
|
|
|
152
151
|
# ...for each dataset
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
#%% Download image files (execution)
|
|
155
|
+
|
|
156
|
+
from megadetector.utils.url_utils import parallel_download_urls
|
|
157
|
+
|
|
158
|
+
download_results = parallel_download_urls(url_to_target_file,
|
|
159
|
+
verbose=False,
|
|
160
|
+
overwrite=False,
|
|
161
|
+
n_workers=20,
|
|
162
|
+
pool_type='thread')
|
|
163
|
+
|
|
164
|
+
# r = download_results[0]
|
|
165
|
+
for r in download_results:
|
|
166
|
+
assert r['status'] in ('skipped','success')
|
|
@@ -22,7 +22,8 @@ for s in lila_base_urls.values():
|
|
|
22
22
|
assert s.endswith('/')
|
|
23
23
|
|
|
24
24
|
# If any of these strings appear in the common name of a species, we'll download that image
|
|
25
|
-
species_of_interest = ['grey fox','gray fox','cape fox','red fox','kit fox']
|
|
25
|
+
# species_of_interest = ['grey fox','gray fox','cape fox','red fox','kit fox']
|
|
26
|
+
species_of_interest = ['bear']
|
|
26
27
|
|
|
27
28
|
# We'll write images, metadata downloads, and temporary files here
|
|
28
29
|
lila_local_base = os.path.expanduser('~/lila')
|
|
@@ -45,7 +46,7 @@ random.seed(0)
|
|
|
45
46
|
|
|
46
47
|
#%% Download and open the giant table of image URLs and labels
|
|
47
48
|
|
|
48
|
-
# Takes ~
|
|
49
|
+
# Takes ~2 minutes to download, unzip, and open
|
|
49
50
|
df = read_lila_all_images_file(metadata_dir)
|
|
50
51
|
|
|
51
52
|
|
|
@@ -144,6 +145,12 @@ download_results = parallel_download_urls(url_to_target_file=url_to_target_file,
|
|
|
144
145
|
pool_type='thread')
|
|
145
146
|
|
|
146
147
|
|
|
148
|
+
#%% Open output folder
|
|
149
|
+
|
|
150
|
+
from megadetector.utils.path_utils import open_file
|
|
151
|
+
open_file(output_dir)
|
|
152
|
+
|
|
153
|
+
|
|
147
154
|
#%% Scrap
|
|
148
155
|
|
|
149
156
|
if False:
|
|
@@ -57,6 +57,7 @@ ds_name_to_annotation_level['Channel IslandsCamera Traps'] = 'image'
|
|
|
57
57
|
ds_name_to_annotation_level['WCS Camera Traps'] = 'sequence'
|
|
58
58
|
ds_name_to_annotation_level['Wellington Camera Traps'] = 'sequence'
|
|
59
59
|
ds_name_to_annotation_level['NACTI'] = 'unknown'
|
|
60
|
+
ds_name_to_annotation_level['Seattle(ish) Camera Traps'] = 'image'
|
|
60
61
|
|
|
61
62
|
known_unmapped_labels = set(['WCS Camera Traps:#ref!'])
|
|
62
63
|
|
|
@@ -103,7 +104,7 @@ for i_row,row in taxonomy_df.iterrows():
|
|
|
103
104
|
|
|
104
105
|
#%% Process annotations for each dataset
|
|
105
106
|
|
|
106
|
-
# Takes
|
|
107
|
+
# Takes a few hours
|
|
107
108
|
|
|
108
109
|
# The order of these headers needs to match the order in which fields are added later in this cell;
|
|
109
110
|
# don't mess with this order.
|
|
@@ -173,7 +174,7 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
|
|
|
173
174
|
expected_annotation_level = None
|
|
174
175
|
|
|
175
176
|
# im = images[10]
|
|
176
|
-
for i_image,im in enumerate(images):
|
|
177
|
+
for i_image,im in tqdm(enumerate(images),total=len(images)):
|
|
177
178
|
|
|
178
179
|
if (debug_max_images_per_dataset is not None) and (debug_max_images_per_dataset > 0) \
|
|
179
180
|
and (i_image >= debug_max_images_per_dataset):
|
|
@@ -52,6 +52,8 @@ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
|
|
|
52
52
|
|
|
53
53
|
#%% Download and extract metadata and MD results for each dataset
|
|
54
54
|
|
|
55
|
+
# Takes ~60 seconds if everything needs to beo downloaded and unzipped
|
|
56
|
+
|
|
55
57
|
for ds_name in metadata_table.keys():
|
|
56
58
|
|
|
57
59
|
# Download the main metadata file for this dataset
|
|
@@ -73,10 +75,12 @@ for ds_name in metadata_table.keys():
|
|
|
73
75
|
json_url=md_results_url,
|
|
74
76
|
force_download=force_download)
|
|
75
77
|
|
|
78
|
+
# ...for each dataset
|
|
79
|
+
|
|
76
80
|
|
|
77
81
|
#%% Build up a list of URLs to test
|
|
78
82
|
|
|
79
|
-
# Takes ~15 mins, since it has to open all the giant .json files
|
|
83
|
+
# Takes ~15 mins, since it has to open all the giant .json files.
|
|
80
84
|
|
|
81
85
|
url_to_source = {}
|
|
82
86
|
|
|
@@ -16,7 +16,7 @@ path. No attempt is made to be consistent in format across the two approaches.
|
|
|
16
16
|
import os
|
|
17
17
|
import subprocess
|
|
18
18
|
import json
|
|
19
|
-
from datetime import
|
|
19
|
+
from datetime import datetime
|
|
20
20
|
|
|
21
21
|
from multiprocessing.pool import ThreadPool as ThreadPool
|
|
22
22
|
from multiprocessing.pool import Pool as Pool
|
|
@@ -27,6 +27,7 @@ from PIL import Image, ExifTags
|
|
|
27
27
|
from megadetector.utils.path_utils import find_images, is_executable
|
|
28
28
|
from megadetector.utils.ct_utils import args_to_object
|
|
29
29
|
from megadetector.utils.ct_utils import image_file_to_camera_folder
|
|
30
|
+
from megadetector.data_management.cct_json_utils import write_object_with_serialized_datetimes
|
|
30
31
|
|
|
31
32
|
debug_max_images = None
|
|
32
33
|
|
|
@@ -88,8 +89,8 @@ class ReadExifOptions:
|
|
|
88
89
|
|
|
89
90
|
class ExifResultsToCCTOptions:
|
|
90
91
|
"""
|
|
91
|
-
Options controlling the behavior of exif_results_to_cct() (which reformats the datetime
|
|
92
|
-
extracted by read_exif_from_folder().
|
|
92
|
+
Options controlling the behavior of exif_results_to_cct() (which reformats the datetime
|
|
93
|
+
information) extracted by read_exif_from_folder().
|
|
93
94
|
"""
|
|
94
95
|
|
|
95
96
|
def __init__(self):
|
|
@@ -730,9 +731,11 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
|
730
731
|
|
|
731
732
|
Args:
|
|
732
733
|
exif_results (str or list): the filename (or loaded list) containing the results
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
734
|
+
from read_exif_from_folder
|
|
735
|
+
cct_output_file (str,optional): the filename to which we should write
|
|
736
|
+
COCO-Camera-Traps-formatted data
|
|
737
|
+
options (ExifResultsToCCTOptions, optional): options guiding the generation
|
|
738
|
+
of the CCT file, particularly location mapping
|
|
736
739
|
|
|
737
740
|
Returns:
|
|
738
741
|
dict: a COCO Camera Traps dict (with no annotations).
|
|
@@ -811,15 +814,8 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
|
811
814
|
d['annotations'] = []
|
|
812
815
|
d['categories'] = []
|
|
813
816
|
|
|
814
|
-
def json_serialize_datetime(obj):
|
|
815
|
-
if isinstance(obj, (datetime, date)):
|
|
816
|
-
return obj.isoformat()
|
|
817
|
-
raise TypeError('Object {} (type {}) not serializable'.format(
|
|
818
|
-
str(obj),type(obj)))
|
|
819
|
-
|
|
820
817
|
if cct_output_file is not None:
|
|
821
|
-
|
|
822
|
-
json.dump(d,f,indent=1,default=json_serialize_datetime)
|
|
818
|
+
write_object_with_serialized_datetimes(d,cct_output_file)
|
|
823
819
|
|
|
824
820
|
return d
|
|
825
821
|
|