megadetector 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/cct_json_utils.py +15 -2
- megadetector/data_management/coco_to_yolo.py +53 -31
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
- megadetector/data_management/databases/integrity_check_json_db.py +2 -2
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
- megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
- megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
- megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
- megadetector/data_management/remap_coco_categories.py +60 -11
- megadetector/data_management/{wi_to_md.py → speciesnet_to_md.py} +2 -2
- megadetector/data_management/yolo_to_coco.py +45 -15
- megadetector/detection/run_detector.py +1 -0
- megadetector/detection/run_detector_batch.py +5 -4
- megadetector/postprocessing/classification_postprocessing.py +788 -524
- megadetector/postprocessing/compare_batch_results.py +176 -9
- megadetector/postprocessing/create_crop_folder.py +420 -0
- megadetector/postprocessing/load_api_results.py +4 -1
- megadetector/postprocessing/md_to_coco.py +1 -1
- megadetector/postprocessing/postprocess_batch_results.py +158 -44
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
- megadetector/postprocessing/separate_detections_into_folders.py +20 -4
- megadetector/postprocessing/subset_json_detector_output.py +180 -15
- megadetector/postprocessing/validate_batch_results.py +13 -5
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
- megadetector/taxonomy_mapping/species_lookup.py +45 -2
- megadetector/utils/ct_utils.py +76 -3
- megadetector/utils/directory_listing.py +4 -4
- megadetector/utils/gpu_test.py +21 -3
- megadetector/utils/md_tests.py +142 -49
- megadetector/utils/path_utils.py +342 -19
- megadetector/utils/wi_utils.py +1286 -212
- megadetector/visualization/visualization_utils.py +16 -4
- megadetector/visualization/visualize_db.py +1 -1
- megadetector/visualization/visualize_detector_output.py +1 -4
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/METADATA +6 -3
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/RECORD +41 -40
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/WHEEL +1 -1
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info/licenses}/LICENSE +0 -0
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/top_level.txt +0 -0
|
@@ -300,7 +300,10 @@ class SequenceOptions:
|
|
|
300
300
|
def __init__(self):
|
|
301
301
|
#: Images separated by <= this duration will be grouped into the same sequence.
|
|
302
302
|
self.episode_interval_seconds = 60.0
|
|
303
|
-
|
|
303
|
+
|
|
304
|
+
#: How to handle invalid datetimes: 'error' or 'none'
|
|
305
|
+
self.datetime_conversion_failure_behavior = 'none'
|
|
306
|
+
|
|
304
307
|
|
|
305
308
|
#%% Functions
|
|
306
309
|
|
|
@@ -445,7 +448,17 @@ def create_sequences(image_info,options=None):
|
|
|
445
448
|
raise ValueError('Unrecognized type for [image_info]')
|
|
446
449
|
|
|
447
450
|
# Modifies the images in place
|
|
448
|
-
_ = parse_datetimes_from_cct_image_list(image_info
|
|
451
|
+
_ = parse_datetimes_from_cct_image_list(image_info,
|
|
452
|
+
conversion_failure_behavior=options.datetime_conversion_failure_behavior)
|
|
453
|
+
|
|
454
|
+
n_invalid_datetimes = 0
|
|
455
|
+
for im in image_info:
|
|
456
|
+
if not isinstance(im['datetime'],datetime.datetime):
|
|
457
|
+
assert im['datetime'] is None, 'At this point, datetimes should be valid or None'
|
|
458
|
+
n_invalid_datetimes += 1
|
|
459
|
+
if n_invalid_datetimes > 0:
|
|
460
|
+
print('Warning: {} of {} images have invalid datetimes'.format(
|
|
461
|
+
n_invalid_datetimes,len(image_info)))
|
|
449
462
|
|
|
450
463
|
# Find all unique locations
|
|
451
464
|
locations = set()
|
|
@@ -47,6 +47,9 @@ def write_yolo_dataset_file(yolo_dataset_file,
|
|
|
47
47
|
class_list (list or str): an ordered list of class names (the first item will be class 0,
|
|
48
48
|
etc.), or the name of a text file containing an ordered list of class names (one per
|
|
49
49
|
line, starting from class zero).
|
|
50
|
+
train_folder_relative (str, optional): train folder name, used only to populate dataset.yaml
|
|
51
|
+
val_folder_relative (str, optional): val folder name, used only to populate dataset.yaml
|
|
52
|
+
test_folder_relative (str, optional): test folder name, used only to populate dataset.yaml
|
|
50
53
|
"""
|
|
51
54
|
|
|
52
55
|
# Read class names
|
|
@@ -97,7 +100,7 @@ def coco_to_yolo(input_image_folder,
|
|
|
97
100
|
category_names_to_exclude=None,
|
|
98
101
|
category_names_to_include=None,
|
|
99
102
|
write_output=True,
|
|
100
|
-
flatten_paths=
|
|
103
|
+
flatten_paths=False):
|
|
101
104
|
"""
|
|
102
105
|
Converts a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
|
|
103
106
|
dataset to a single folder in the process.
|
|
@@ -116,17 +119,21 @@ def coco_to_yolo(input_image_folder,
|
|
|
116
119
|
images are left alone.
|
|
117
120
|
source_format (str, optional): can be 'coco' (default) or 'coco_camera_traps'. The only difference
|
|
118
121
|
is that when source_format is 'coco_camera_traps', we treat an image with a non-bbox
|
|
119
|
-
annotation
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
122
|
+
annotation as a special case, i.e. that's how an empty image is indicated. The original
|
|
123
|
+
COCO standard is a little ambiguous on this issue. If source_format is 'coco', we
|
|
124
|
+
either treat images as empty or error, depending on the value of [allow_empty_annotations].
|
|
125
|
+
[allow_empty_annotations] has no effect if source_format is 'coco_camera_traps'.
|
|
126
|
+
overwrite_images (bool, optional): over-write images in the output folder if they exist
|
|
124
127
|
create_image_and_label_folder (bool, optional): whether to create separate folders called 'images' and
|
|
125
128
|
'labels' in the YOLO output folder. If create_image_and_label_folders is False,
|
|
126
129
|
a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
|
|
127
130
|
be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
|
|
128
131
|
images/a#b#c#image001.jpg, and the corresponding text file will be
|
|
129
132
|
labels/a#b#c#image001.txt.
|
|
133
|
+
class_file_name (str, optional): .txt file (relative to the output folder) that we should
|
|
134
|
+
populate with a list of classes (or None to omit)
|
|
135
|
+
allow_empty_annotations (bool, optional): if this is False and [source_format] is 'coco',
|
|
136
|
+
we'll error on annotations that have no 'bbox' field
|
|
130
137
|
clip_boxes (bool, optional): whether to clip bounding box coordinates to the range [0,1] before
|
|
131
138
|
converting to YOLO xywh format
|
|
132
139
|
image_id_to_output_image_json_file (str, optional): an optional *output* file, to which we will write
|
|
@@ -139,12 +146,14 @@ def coco_to_yolo(input_image_folder,
|
|
|
139
146
|
category_names_to_exclude (str, optional): category names that should not be represented in the
|
|
140
147
|
YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
|
|
141
148
|
you would want to specify this and [category_names_to_include].
|
|
142
|
-
category_names_to_include (str, optional): allow-list of category names that should be represented
|
|
143
|
-
YOLO output; only impacts annotations, does not prevent copying images. There's almost
|
|
144
|
-
you would want to specify this and [category_names_to_exclude].
|
|
149
|
+
category_names_to_include (str, optional): allow-list of category names that should be represented
|
|
150
|
+
in the YOLO output; only impacts annotations, does not prevent copying images. There's almost
|
|
151
|
+
no reason you would want to specify this and [category_names_to_exclude].
|
|
145
152
|
write_output (bool, optional): determines whether we actually copy images and write annotations;
|
|
146
153
|
setting this to False mostly puts this function in "dry run" "mode. The class list
|
|
147
154
|
file is written regardless of the value of write_output.
|
|
155
|
+
flatten_paths (bool, optional): replace /'s in image filenames with [path_replacement_char],
|
|
156
|
+
which ensures that the output folder is a single flat folder.
|
|
148
157
|
|
|
149
158
|
Returns:
|
|
150
159
|
dict: information about the coco --> yolo mapping, containing at least the fields:
|
|
@@ -313,9 +322,9 @@ def coco_to_yolo(input_image_folder,
|
|
|
313
322
|
|
|
314
323
|
elif source_format == 'coco_camera_traps':
|
|
315
324
|
|
|
316
|
-
# We allow empty bbox lists in COCO camera traps; this is typically a
|
|
317
|
-
# example in a dataset that has bounding boxes, and 0 is typically
|
|
318
|
-
# category.
|
|
325
|
+
# We allow empty bbox lists in COCO camera traps files; this is typically a
|
|
326
|
+
# negative example in a dataset that has bounding boxes, and 0 is typically
|
|
327
|
+
# the empty category, which is typically 0.
|
|
319
328
|
if ann['category_id'] != 0:
|
|
320
329
|
if not printed_empty_annotation_warning:
|
|
321
330
|
printed_empty_annotation_warning = True
|
|
@@ -429,13 +438,14 @@ def coco_to_yolo(input_image_folder,
|
|
|
429
438
|
|
|
430
439
|
print('Generating class list')
|
|
431
440
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
441
|
+
if class_file_name is not None:
|
|
442
|
+
class_list_filename = os.path.join(output_folder,class_file_name)
|
|
443
|
+
with open(class_list_filename, 'w') as f:
|
|
444
|
+
print('Writing class list to {}'.format(class_list_filename))
|
|
445
|
+
for i_class in range(0,len(yolo_id_to_name)):
|
|
446
|
+
# Category IDs should range from 0..N-1
|
|
447
|
+
assert i_class in yolo_id_to_name
|
|
448
|
+
f.write(yolo_id_to_name[i_class] + '\n')
|
|
439
449
|
|
|
440
450
|
if image_id_to_output_image_json_file is not None:
|
|
441
451
|
print('Writing image ID mapping to {}'.format(image_id_to_output_image_json_file))
|
|
@@ -457,6 +467,9 @@ def coco_to_yolo(input_image_folder,
|
|
|
457
467
|
|
|
458
468
|
source_image_to_dest_image = {}
|
|
459
469
|
|
|
470
|
+
label_files_written = []
|
|
471
|
+
n_boxes_written = 0
|
|
472
|
+
|
|
460
473
|
# TODO: parallelize this loop
|
|
461
474
|
#
|
|
462
475
|
# output_info = images_to_copy[0]
|
|
@@ -471,6 +484,7 @@ def coco_to_yolo(input_image_folder,
|
|
|
471
484
|
|
|
472
485
|
source_image_to_dest_image[source_image] = dest_image
|
|
473
486
|
|
|
487
|
+
# Copy the image if necessary
|
|
474
488
|
if write_output:
|
|
475
489
|
|
|
476
490
|
os.makedirs(os.path.dirname(dest_image),exist_ok=True)
|
|
@@ -482,17 +496,24 @@ def coco_to_yolo(input_image_folder,
|
|
|
482
496
|
if (not os.path.isfile(dest_image)) or (overwrite_images):
|
|
483
497
|
shutil.copyfile(source_image,dest_image)
|
|
484
498
|
|
|
485
|
-
|
|
499
|
+
bboxes = output_info['bboxes']
|
|
500
|
+
|
|
501
|
+
# Write the annotation file if necessary
|
|
502
|
+
#
|
|
503
|
+
# Only write an annotation file if there are bounding boxes. Images with
|
|
504
|
+
# no .txt files are treated as hard negatives, at least by YOLOv5:
|
|
505
|
+
#
|
|
506
|
+
# https://github.com/ultralytics/yolov5/issues/3218
|
|
507
|
+
#
|
|
508
|
+
# I think this is also true for images with empty .txt files, but
|
|
509
|
+
# I'm using the convention suggested on that issue, i.e. hard
|
|
510
|
+
# negatives are expressed as images without .txt files.
|
|
511
|
+
if len(bboxes) > 0:
|
|
486
512
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
#
|
|
492
|
-
# I think this is also true for images with empty .txt files, but
|
|
493
|
-
# I'm using the convention suggested on that issue, i.e. hard
|
|
494
|
-
# negatives are expressed as images without .txt files.
|
|
495
|
-
if len(bboxes) > 0:
|
|
513
|
+
n_boxes_written += len(bboxes)
|
|
514
|
+
label_files_written.append(dest_txt)
|
|
515
|
+
|
|
516
|
+
if write_output:
|
|
496
517
|
|
|
497
518
|
with open(dest_txt,'w') as f:
|
|
498
519
|
|
|
@@ -501,8 +522,7 @@ def coco_to_yolo(input_image_folder,
|
|
|
501
522
|
assert len(bbox) == 5
|
|
502
523
|
s = '{} {} {} {} {}'.format(bbox[0],bbox[1],bbox[2],bbox[3],bbox[4])
|
|
503
524
|
f.write(s + '\n')
|
|
504
|
-
|
|
505
|
-
# ...if we're actually writing output
|
|
525
|
+
|
|
506
526
|
|
|
507
527
|
# ...for each image
|
|
508
528
|
|
|
@@ -510,6 +530,8 @@ def coco_to_yolo(input_image_folder,
|
|
|
510
530
|
coco_to_yolo_info['class_list_filename'] = class_list_filename
|
|
511
531
|
coco_to_yolo_info['source_image_to_dest_image'] = source_image_to_dest_image
|
|
512
532
|
coco_to_yolo_info['coco_id_to_yolo_id'] = coco_id_to_yolo_id
|
|
533
|
+
coco_to_yolo_info['label_files_written'] = label_files_written
|
|
534
|
+
coco_to_yolo_info['n_boxes_written'] = n_boxes_written
|
|
513
535
|
|
|
514
536
|
return coco_to_yolo_info
|
|
515
537
|
|
|
@@ -24,8 +24,10 @@ import sys
|
|
|
24
24
|
|
|
25
25
|
#%% Merge functions
|
|
26
26
|
|
|
27
|
-
def combine_cct_files(input_files,
|
|
28
|
-
|
|
27
|
+
def combine_cct_files(input_files,
|
|
28
|
+
output_file=None,
|
|
29
|
+
require_uniqueness=True,
|
|
30
|
+
filename_prefixes=None):
|
|
29
31
|
"""
|
|
30
32
|
Merges the list of COCO Camera Traps files [input_files] into a single
|
|
31
33
|
dictionary, optionally writing the result to [output_file].
|
|
@@ -33,8 +35,10 @@ def combine_cct_files(input_files, output_file=None, require_uniqueness=True,
|
|
|
33
35
|
Args:
|
|
34
36
|
input_files (list): paths to CCT .json files
|
|
35
37
|
output_file (str, optional): path to write merged .json file
|
|
36
|
-
require_uniqueness (bool): whether to require that the images in
|
|
38
|
+
require_uniqueness (bool, optional): whether to require that the images in
|
|
37
39
|
each input_dict be unique
|
|
40
|
+
filename_prefixes (dict, optional): dict mapping input filenames to strings
|
|
41
|
+
that should be prepended to image filenames from that source
|
|
38
42
|
|
|
39
43
|
Returns:
|
|
40
44
|
dict: the merged COCO-formatted .json dict
|
|
@@ -327,7 +327,7 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
327
327
|
|
|
328
328
|
for i_image,result in enumerate(results):
|
|
329
329
|
if result is not None:
|
|
330
|
-
validation_errors.append(images[i_image]['file_name'],result)
|
|
330
|
+
validation_errors.append((images[i_image]['file_name'],result))
|
|
331
331
|
|
|
332
332
|
# ...for each image
|
|
333
333
|
|
|
@@ -393,7 +393,7 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
393
393
|
elif image['_count'] > 1:
|
|
394
394
|
nMultiAnnotated += 1
|
|
395
395
|
|
|
396
|
-
print('
|
|
396
|
+
print('\nFound {} unannotated images, {} images with multiple annotations'.format(
|
|
397
397
|
nUnannotated,nMultiAnnotated))
|
|
398
398
|
|
|
399
399
|
if (len(base_dir) > 0) and options.bFindUnusedImages:
|
|
@@ -20,78 +20,82 @@ preview_folder = os.path.expanduser('~/tmp/island_conservation_preview')
|
|
|
20
20
|
image_directory = os.path.expanduser('~/data/icct/public/')
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
#%%
|
|
23
|
+
#%% Prevent imports during testing
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
d = json.load(f)
|
|
27
|
-
|
|
28
|
-
d['info']
|
|
29
|
-
d['info']['version'] = '1.01'
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
#%% Find locations
|
|
33
|
-
|
|
34
|
-
images = d['images']
|
|
35
|
-
|
|
36
|
-
locations = set()
|
|
25
|
+
if False:
|
|
37
26
|
|
|
38
|
-
|
|
39
|
-
tokens_fn = im['file_name'].split('/')
|
|
40
|
-
tokens_id = im['id'].split('_')
|
|
41
|
-
assert tokens_fn[0] == tokens_id[0]
|
|
42
|
-
assert tokens_fn[1] == tokens_id[1]
|
|
43
|
-
location = tokens_fn[0] + '_' + tokens_fn[1]
|
|
44
|
-
im['location'] = location
|
|
45
|
-
locations.add(location)
|
|
46
|
-
|
|
47
|
-
locations = sorted(list(locations))
|
|
27
|
+
#%% Read input file
|
|
48
28
|
|
|
49
|
-
|
|
50
|
-
|
|
29
|
+
with open(input_fn,'r') as f:
|
|
30
|
+
d = json.load(f)
|
|
31
|
+
|
|
32
|
+
d['info']
|
|
33
|
+
d['info']['version'] = '1.01'
|
|
51
34
|
|
|
52
35
|
|
|
53
|
-
#%%
|
|
54
|
-
|
|
55
|
-
with open(output_fn,'w') as f:
|
|
56
|
-
json.dump(d,f,indent=1)
|
|
36
|
+
#%% Find locations
|
|
57
37
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
38
|
+
images = d['images']
|
|
39
|
+
|
|
40
|
+
locations = set()
|
|
41
|
+
|
|
42
|
+
for i_image,im in tqdm(enumerate(images),total=len(images)):
|
|
43
|
+
tokens_fn = im['file_name'].split('/')
|
|
44
|
+
tokens_id = im['id'].split('_')
|
|
45
|
+
assert tokens_fn[0] == tokens_id[0]
|
|
46
|
+
assert tokens_fn[1] == tokens_id[1]
|
|
47
|
+
location = tokens_fn[0] + '_' + tokens_fn[1]
|
|
48
|
+
im['location'] = location
|
|
49
|
+
locations.add(location)
|
|
50
|
+
|
|
51
|
+
locations = sorted(list(locations))
|
|
52
|
+
|
|
53
|
+
for s in locations:
|
|
54
|
+
print(s)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
#%% Write output file
|
|
58
|
+
|
|
59
|
+
with open(output_fn,'w') as f:
|
|
60
|
+
json.dump(d,f,indent=1)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
#%% Validate .json files
|
|
64
|
+
|
|
65
|
+
from megadetector.data_management.databases import integrity_check_json_db
|
|
66
|
+
|
|
67
|
+
options = integrity_check_json_db.IntegrityCheckOptions()
|
|
68
|
+
options.baseDir = image_directory
|
|
69
|
+
options.bCheckImageSizes = False
|
|
70
|
+
options.bCheckImageExistence = True
|
|
71
|
+
options.bFindUnusedImages = True
|
|
72
|
+
|
|
73
|
+
sorted_categories, data, error_info = integrity_check_json_db.integrity_check_json_db(output_fn, options)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
#%% Preview labels
|
|
77
|
+
|
|
78
|
+
from megadetector.visualization import visualize_db
|
|
79
|
+
|
|
80
|
+
viz_options = visualize_db.DbVizOptions()
|
|
81
|
+
viz_options.num_to_visualize = 2000
|
|
82
|
+
viz_options.trim_to_images_with_bboxes = False
|
|
83
|
+
viz_options.add_search_links = False
|
|
84
|
+
viz_options.sort_by_filename = False
|
|
85
|
+
viz_options.parallelize_rendering = True
|
|
86
|
+
viz_options.classes_to_exclude = ['test']
|
|
87
|
+
html_output_file, image_db = visualize_db.visualize_db(db_path=output_fn,
|
|
88
|
+
output_dir=preview_folder,
|
|
89
|
+
image_base_dir=image_directory,
|
|
90
|
+
options=viz_options)
|
|
91
|
+
|
|
92
|
+
from megadetector.utils import path_utils
|
|
93
|
+
path_utils.open_file(html_output_file)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
#%% Zip output file
|
|
97
|
+
|
|
98
|
+
from megadetector.utils.path_utils import zip_file
|
|
99
|
+
|
|
100
|
+
zip_file(output_fn, verbose=True)
|
|
101
|
+
assert os.path.isfile(output_fn + '.zip')
|
|
@@ -21,127 +21,131 @@ input_file = r'd:\lila\nacti\nacti_metadata.json.1.13\nacti_metadata.json'
|
|
|
21
21
|
output_file = r'g:\temp\nacti_metadata.1.14.json'
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
#%%
|
|
24
|
+
#%% Prevent execution during testing
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
d = json.load(f)
|
|
28
|
-
|
|
29
|
-
assert d['info']['version'] == 1.13
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
#%% Map images to locations (according to the metadata)
|
|
33
|
-
|
|
34
|
-
file_name_to_original_location = {}
|
|
35
|
-
|
|
36
|
-
# im = dataset_labels['images'][0]
|
|
37
|
-
for im in tqdm(d['images']):
|
|
38
|
-
file_name_to_original_location[im['file_name']] = im['location']
|
|
39
|
-
|
|
40
|
-
original_locations = set(file_name_to_original_location.values())
|
|
41
|
-
|
|
42
|
-
print('Found {} locations in the original metadata:'.format(len(original_locations)))
|
|
43
|
-
for loc in original_locations:
|
|
44
|
-
print('[{}]'.format(loc))
|
|
26
|
+
if False:
|
|
45
27
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
28
|
+
#%% Read metadata
|
|
29
|
+
|
|
30
|
+
with open(input_file,'r') as f:
|
|
31
|
+
d = json.load(f)
|
|
32
|
+
|
|
33
|
+
assert d['info']['version'] == 1.13
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
#%% Map images to locations (according to the metadata)
|
|
37
|
+
|
|
38
|
+
file_name_to_original_location = {}
|
|
39
|
+
|
|
40
|
+
# im = dataset_labels['images'][0]
|
|
41
|
+
for im in tqdm(d['images']):
|
|
42
|
+
file_name_to_original_location[im['file_name']] = im['location']
|
|
43
|
+
|
|
44
|
+
original_locations = set(file_name_to_original_location.values())
|
|
45
|
+
|
|
46
|
+
print('Found {} locations in the original metadata:'.format(len(original_locations)))
|
|
47
|
+
for loc in original_locations:
|
|
48
|
+
print('[{}]'.format(loc))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
#%% Map images to new locations
|
|
52
|
+
|
|
53
|
+
def path_to_location(relative_path):
|
|
54
|
+
|
|
55
|
+
relative_path = relative_path.replace('\\','/')
|
|
56
|
+
if relative_path in file_name_to_original_location:
|
|
57
|
+
location_name = file_name_to_original_location[relative_path]
|
|
58
|
+
if location_name == 'San Juan Mntns, Colorado':
|
|
59
|
+
# "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
|
|
60
|
+
tokens = relative_path.split('/')[-1].split('_')
|
|
61
|
+
assert tokens[1].startswith('Unit')
|
|
62
|
+
location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
|
|
63
|
+
elif location_name == 'Lebec, California':
|
|
64
|
+
# "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
|
|
65
|
+
tokens = relative_path.split('/')[-1].split('_')
|
|
66
|
+
assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
|
|
67
|
+
location_name = 'lebec_{}'.format(tokens[0])
|
|
68
|
+
elif location_name == 'Archbold, FL':
|
|
69
|
+
# "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
|
|
70
|
+
tokens = relative_path.split('/')[-1].split('_')
|
|
71
|
+
assert tokens[0].startswith('FL-')
|
|
72
|
+
location_name = 'archbold_{}'.format(tokens[0])
|
|
73
|
+
else:
|
|
74
|
+
assert location_name == ''
|
|
75
|
+
tokens = relative_path.split('/')[-1].split('_')
|
|
76
|
+
if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
|
|
77
|
+
location_name = '{}'.format(tokens[0])
|
|
78
|
+
|
|
69
79
|
else:
|
|
70
|
-
assert location_name == ''
|
|
71
|
-
tokens = relative_path.split('/')[-1].split('_')
|
|
72
|
-
if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
|
|
73
|
-
location_name = '{}'.format(tokens[0])
|
|
74
80
|
|
|
75
|
-
|
|
81
|
+
location_name = 'unknown'
|
|
76
82
|
|
|
77
|
-
|
|
83
|
+
# print('Returning location {} for file {}'.format(location_name,relative_path))
|
|
78
84
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
return location_name
|
|
82
|
-
|
|
83
|
-
file_name_to_updated_location = {}
|
|
84
|
-
updated_location_to_count = defaultdict(int)
|
|
85
|
-
for im in tqdm(d['images']):
|
|
85
|
+
return location_name
|
|
86
86
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
updated_locations = set(file_name_to_updated_location.values())
|
|
96
|
-
|
|
97
|
-
print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
|
|
98
|
-
for loc in updated_location_to_count:
|
|
99
|
-
print('{}: {}'.format(loc,updated_location_to_count[loc]))
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
#%% Re-write metadata
|
|
103
|
-
|
|
104
|
-
for im in d['images']:
|
|
105
|
-
im['location'] = file_name_to_updated_location[im['file_name']]
|
|
106
|
-
d['info']['version'] = 1.14
|
|
107
|
-
|
|
108
|
-
with open(output_file,'w') as f:
|
|
109
|
-
json.dump(d,f,indent=1)
|
|
87
|
+
file_name_to_updated_location = {}
|
|
88
|
+
updated_location_to_count = defaultdict(int)
|
|
89
|
+
for im in tqdm(d['images']):
|
|
90
|
+
|
|
91
|
+
updated_location = path_to_location(im['file_name'])
|
|
92
|
+
file_name_to_updated_location[im['file_name']] = updated_location
|
|
93
|
+
updated_location_to_count[updated_location] += 1
|
|
110
94
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
input_base = r'd:\lila\nacti-unzipped'
|
|
115
|
-
assert os.path.isdir(input_base)
|
|
116
|
-
|
|
117
|
-
location_to_images = defaultdict(list)
|
|
118
|
-
|
|
119
|
-
for im in d['images']:
|
|
120
|
-
location_to_images[im['location']].append(im)
|
|
95
|
+
updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
|
|
96
|
+
key=lambda item: item[1],
|
|
97
|
+
reverse=True)}
|
|
121
98
|
|
|
122
|
-
|
|
123
|
-
import random
|
|
124
|
-
random.seed(0)
|
|
125
|
-
sampling_folder_base = r'g:\temp\nacti_samples'
|
|
126
|
-
|
|
127
|
-
for location in tqdm(location_to_images):
|
|
99
|
+
updated_locations = set(file_name_to_updated_location.values())
|
|
128
100
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
101
|
+
print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
|
|
102
|
+
for loc in updated_location_to_count:
|
|
103
|
+
print('{}: {}'.format(loc,updated_location_to_count[loc]))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
#%% Re-write metadata
|
|
107
|
+
|
|
108
|
+
for im in d['images']:
|
|
109
|
+
im['location'] = file_name_to_updated_location[im['file_name']]
|
|
110
|
+
d['info']['version'] = 1.14
|
|
111
|
+
|
|
112
|
+
with open(output_file,'w') as f:
|
|
113
|
+
json.dump(d,f,indent=1)
|
|
134
114
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
115
|
+
|
|
116
|
+
#%% For each location, sample some random images to make sure they look consistent
|
|
117
|
+
|
|
118
|
+
input_base = r'd:\lila\nacti-unzipped'
|
|
119
|
+
assert os.path.isdir(input_base)
|
|
120
|
+
|
|
121
|
+
location_to_images = defaultdict(list)
|
|
122
|
+
|
|
123
|
+
for im in d['images']:
|
|
124
|
+
location_to_images[im['location']].append(im)
|
|
143
125
|
|
|
144
|
-
|
|
126
|
+
n_to_sample = 10
|
|
127
|
+
import random
|
|
128
|
+
random.seed(0)
|
|
129
|
+
sampling_folder_base = r'g:\temp\nacti_samples'
|
|
145
130
|
|
|
146
|
-
|
|
131
|
+
for location in tqdm(location_to_images):
|
|
132
|
+
|
|
133
|
+
images_this_location = location_to_images[location]
|
|
134
|
+
if len(images_this_location) > n_to_sample:
|
|
135
|
+
images_this_location = random.sample(images_this_location,n_to_sample)
|
|
147
136
|
|
|
137
|
+
for i_image,im in enumerate(images_this_location):
|
|
138
|
+
|
|
139
|
+
fn_relative = im['file_name']
|
|
140
|
+
source_fn_abs = os.path.join(input_base,fn_relative)
|
|
141
|
+
assert os.path.isfile(source_fn_abs)
|
|
142
|
+
ext = os.path.splitext(fn_relative)[1]
|
|
143
|
+
target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
|
|
144
|
+
location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
|
|
145
|
+
os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
|
|
146
|
+
shutil.copyfile(source_fn_abs,target_fn_abs)
|
|
147
|
+
|
|
148
|
+
# ...for each image
|
|
149
|
+
|
|
150
|
+
# ...for each location
|
|
151
|
+
|
|
@@ -349,7 +349,7 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
|
|
|
349
349
|
|
|
350
350
|
# ...with open()
|
|
351
351
|
|
|
352
|
-
print('
|
|
352
|
+
print('\nProcessed {} datasets'.format(len(metadata_table)))
|
|
353
353
|
|
|
354
354
|
|
|
355
355
|
#%% Read the .csv back
|
|
@@ -393,7 +393,7 @@ def check_row(row):
|
|
|
393
393
|
dataset_name_to_locations[ds_name].add(row['location_id'])
|
|
394
394
|
|
|
395
395
|
# Faster, but more annoying to debug
|
|
396
|
-
if
|
|
396
|
+
if True:
|
|
397
397
|
|
|
398
398
|
df.progress_apply(check_row, axis=1)
|
|
399
399
|
|