megadetector 5.0.23__py3-none-any.whl → 5.0.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +2 -3
- megadetector/classification/merge_classification_detection_output.py +2 -2
- megadetector/data_management/coco_to_labelme.py +2 -1
- megadetector/data_management/databases/integrity_check_json_db.py +15 -14
- megadetector/data_management/databases/subset_json_db.py +49 -21
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
- megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
- megadetector/data_management/mewc_to_md.py +340 -0
- megadetector/data_management/speciesnet_to_md.py +41 -0
- megadetector/data_management/yolo_output_to_md_output.py +15 -8
- megadetector/detection/process_video.py +24 -7
- megadetector/detection/pytorch_detector.py +841 -160
- megadetector/detection/run_detector.py +341 -146
- megadetector/detection/run_detector_batch.py +307 -70
- megadetector/detection/run_inference_with_yolov5_val.py +61 -4
- megadetector/detection/tf_detector.py +6 -1
- megadetector/postprocessing/{combine_api_outputs.py → combine_batch_outputs.py} +10 -13
- megadetector/postprocessing/compare_batch_results.py +236 -7
- megadetector/postprocessing/create_crop_folder.py +358 -0
- megadetector/postprocessing/md_to_labelme.py +7 -7
- megadetector/postprocessing/md_to_wi.py +40 -0
- megadetector/postprocessing/merge_detections.py +1 -1
- megadetector/postprocessing/postprocess_batch_results.py +12 -5
- megadetector/postprocessing/separate_detections_into_folders.py +32 -4
- megadetector/postprocessing/validate_batch_results.py +9 -4
- megadetector/utils/ct_utils.py +236 -45
- megadetector/utils/directory_listing.py +3 -3
- megadetector/utils/gpu_test.py +125 -0
- megadetector/utils/md_tests.py +455 -116
- megadetector/utils/path_utils.py +43 -2
- megadetector/utils/wi_utils.py +2691 -0
- megadetector/visualization/visualization_utils.py +95 -18
- megadetector/visualization/visualize_db.py +25 -7
- megadetector/visualization/visualize_detector_output.py +60 -13
- {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/METADATA +11 -23
- {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/RECORD +39 -36
- {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/WHEEL +1 -1
- megadetector/detection/detector_training/__init__.py +0 -0
- megadetector/detection/detector_training/model_main_tf2.py +0 -114
- megadetector/utils/torch_test.py +0 -32
- {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/LICENSE +0 -0
- {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/top_level.txt +0 -0
|
@@ -14,9 +14,8 @@ import redis
|
|
|
14
14
|
import argparse
|
|
15
15
|
import PIL
|
|
16
16
|
|
|
17
|
-
from
|
|
18
|
-
|
|
19
|
-
from detection.run_detector import load_detector, convert_to_tf_coords
|
|
17
|
+
from detection.run_detector import load_detector
|
|
18
|
+
from utils.ct_utils import convert_xywh_to_xyxy as convert_to_tf_coords
|
|
20
19
|
import config
|
|
21
20
|
import visualization.visualization_utils as vis_utils
|
|
22
21
|
|
|
@@ -70,7 +70,7 @@ from typing import Any
|
|
|
70
70
|
import pandas as pd
|
|
71
71
|
from tqdm import tqdm
|
|
72
72
|
|
|
73
|
-
from megadetector.utils.ct_utils import
|
|
73
|
+
from megadetector.utils.ct_utils import round_float
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
#%% Example usage
|
|
@@ -124,7 +124,7 @@ def row_to_classification_list(row: Mapping[str, Any],
|
|
|
124
124
|
|
|
125
125
|
# filter out confidences below the threshold, and set precision to 4
|
|
126
126
|
result = [
|
|
127
|
-
(k,
|
|
127
|
+
(k, round_float(conf, precision=4))
|
|
128
128
|
for k, conf in result if conf >= threshold
|
|
129
129
|
]
|
|
130
130
|
|
|
@@ -18,6 +18,7 @@ from tqdm import tqdm
|
|
|
18
18
|
from collections import defaultdict
|
|
19
19
|
|
|
20
20
|
from megadetector.visualization.visualization_utils import open_image
|
|
21
|
+
from megadetector.detection.run_detector import FAILURE_IMAGE_OPEN
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
#%% Functions
|
|
@@ -145,7 +146,7 @@ def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check
|
|
|
145
146
|
except Exception:
|
|
146
147
|
print('Warning: cannot open image {}'.format(im_full_path))
|
|
147
148
|
if 'failure' not in im:
|
|
148
|
-
im['failure'] =
|
|
149
|
+
im['failure'] = FAILURE_IMAGE_OPEN
|
|
149
150
|
|
|
150
151
|
# ...if we need to read w/h information
|
|
151
152
|
|
|
@@ -86,7 +86,7 @@ def _check_image_existence_and_size(image,options=None):
|
|
|
86
86
|
options (IntegrityCheckOptions): parameters impacting validation
|
|
87
87
|
|
|
88
88
|
Returns:
|
|
89
|
-
|
|
89
|
+
str: None if this image passes validation, otherwise an error string
|
|
90
90
|
"""
|
|
91
91
|
|
|
92
92
|
if options is None:
|
|
@@ -96,23 +96,23 @@ def _check_image_existence_and_size(image,options=None):
|
|
|
96
96
|
|
|
97
97
|
filePath = os.path.join(options.baseDir,image['file_name'])
|
|
98
98
|
if not os.path.isfile(filePath):
|
|
99
|
-
|
|
100
|
-
return
|
|
99
|
+
s = 'Image path {} does not exist'.format(filePath)
|
|
100
|
+
return s
|
|
101
101
|
|
|
102
102
|
if options.bCheckImageSizes:
|
|
103
103
|
if not ('height' in image and 'width' in image):
|
|
104
|
-
|
|
105
|
-
return
|
|
104
|
+
s = 'Missing image size in {}'.format(filePath)
|
|
105
|
+
return s
|
|
106
106
|
|
|
107
107
|
# width, height = Image.open(filePath).size
|
|
108
108
|
pil_im = open_image(filePath)
|
|
109
109
|
width,height = pil_im.size
|
|
110
110
|
if (not (width == image['width'] and height == image['height'])):
|
|
111
|
-
|
|
112
|
-
image['id'], filePath, image['width'], image['height'], width, height)
|
|
113
|
-
return
|
|
111
|
+
s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
|
|
112
|
+
image['id'], filePath, image['width'], image['height'], width, height)
|
|
113
|
+
return s
|
|
114
114
|
|
|
115
|
-
return
|
|
115
|
+
return None
|
|
116
116
|
|
|
117
117
|
|
|
118
118
|
def integrity_check_json_db(jsonFile, options=None):
|
|
@@ -287,6 +287,7 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
287
287
|
if fn_relative not in image_paths_in_json:
|
|
288
288
|
unused_files.append(fn_relative)
|
|
289
289
|
|
|
290
|
+
# List of (filename,error_string) tuples
|
|
290
291
|
validation_errors = []
|
|
291
292
|
|
|
292
293
|
# If we're checking image existence but not image size, we don't need to read the images
|
|
@@ -298,8 +299,8 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
298
299
|
image_paths_relative_set = set(image_paths_relative)
|
|
299
300
|
|
|
300
301
|
for im in images:
|
|
301
|
-
if im['file_name'] not in image_paths_relative_set:
|
|
302
|
-
validation_errors.append(im['file_name'])
|
|
302
|
+
if im['file_name'] not in image_paths_relative_set:
|
|
303
|
+
validation_errors.append((im['file_name'],'not found in relative path list'))
|
|
303
304
|
|
|
304
305
|
# If we're checking image size, we need to read the images
|
|
305
306
|
if options.bCheckImageSizes:
|
|
@@ -321,12 +322,12 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
321
322
|
results = tqdm(pool.imap(_check_image_existence_and_size, images), total=len(images))
|
|
322
323
|
else:
|
|
323
324
|
results = []
|
|
324
|
-
for im in tqdm(images):
|
|
325
|
+
for im in tqdm(images):
|
|
325
326
|
results.append(_check_image_existence_and_size(im,options))
|
|
326
327
|
|
|
327
328
|
for i_image,result in enumerate(results):
|
|
328
|
-
if result is not None:
|
|
329
|
-
validation_errors.append(images[i_image]['file_name'])
|
|
329
|
+
if result is not None:
|
|
330
|
+
validation_errors.append(images[i_image]['file_name'],result)
|
|
330
331
|
|
|
331
332
|
# ...for each image
|
|
332
333
|
|
|
@@ -12,16 +12,18 @@ subset_json_detector_output.py.
|
|
|
12
12
|
|
|
13
13
|
#%% Constants and imports
|
|
14
14
|
|
|
15
|
+
import os
|
|
15
16
|
import sys
|
|
16
17
|
import json
|
|
17
18
|
import argparse
|
|
18
19
|
|
|
19
20
|
from tqdm import tqdm
|
|
21
|
+
from copy import copy
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
#%% Functions
|
|
23
25
|
|
|
24
|
-
def subset_json_db(input_json, query, output_json=None, ignore_case=False):
|
|
26
|
+
def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbose=False):
|
|
25
27
|
"""
|
|
26
28
|
Given a json file (or dictionary already loaded from a json file), produce a new
|
|
27
29
|
database containing only the images whose filenames contain the string 'query',
|
|
@@ -29,54 +31,80 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False):
|
|
|
29
31
|
|
|
30
32
|
Args:
|
|
31
33
|
input_json (str): COCO Camera Traps .json file to load, or an already-loaded dict
|
|
32
|
-
query (str): string to query for, only include images in the output whose filenames
|
|
33
|
-
contain this string.
|
|
34
|
+
query (str or list): string to query for, only include images in the output whose filenames
|
|
35
|
+
contain this string. If this is a list, test for exact matches.
|
|
34
36
|
output_json (str, optional): file to write the resulting .json file to
|
|
35
37
|
ignore_case (bool, optional): whether to perform a case-insensitive search for [query]
|
|
38
|
+
verbose (bool, optional): enable additional debug output
|
|
36
39
|
|
|
37
40
|
Returns:
|
|
38
|
-
dict:
|
|
41
|
+
dict: CCT dictionary containing a subset of the images and annotations in the input dict
|
|
39
42
|
"""
|
|
40
|
-
|
|
41
|
-
if ignore_case:
|
|
42
|
-
query = query.lower()
|
|
43
43
|
|
|
44
44
|
# Load the input file if necessary
|
|
45
45
|
if isinstance(input_json,str):
|
|
46
46
|
print('Loading input .json...')
|
|
47
47
|
with open(input_json, 'r') as f:
|
|
48
|
-
|
|
48
|
+
input_data = json.load(f)
|
|
49
49
|
else:
|
|
50
|
-
|
|
50
|
+
input_data = input_json
|
|
51
51
|
|
|
52
52
|
# Find images matching the query
|
|
53
53
|
images = []
|
|
54
|
-
image_ids = set()
|
|
55
54
|
|
|
56
|
-
|
|
57
|
-
|
|
55
|
+
if isinstance(query,str):
|
|
56
|
+
|
|
57
|
+
if ignore_case:
|
|
58
|
+
query = query.lower()
|
|
59
|
+
|
|
60
|
+
for im in tqdm(input_data['images']):
|
|
61
|
+
fn = im['file_name']
|
|
62
|
+
if ignore_case:
|
|
63
|
+
fn = fn.lower()
|
|
64
|
+
if query in fn:
|
|
65
|
+
images.append(im)
|
|
66
|
+
|
|
67
|
+
else:
|
|
68
|
+
|
|
69
|
+
query = set(query)
|
|
70
|
+
|
|
58
71
|
if ignore_case:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
72
|
+
query = set([s.lower() for s in query])
|
|
73
|
+
|
|
74
|
+
for im in input_data['images']:
|
|
75
|
+
fn = im['file_name']
|
|
76
|
+
if ignore_case:
|
|
77
|
+
fn = fn.lower()
|
|
78
|
+
if fn in query:
|
|
79
|
+
images.append(im)
|
|
80
|
+
|
|
81
|
+
image_ids = set([im['id'] for im in images])
|
|
63
82
|
|
|
64
83
|
# Find annotations referring to those images
|
|
65
84
|
annotations = []
|
|
66
85
|
|
|
67
|
-
for ann in
|
|
86
|
+
for ann in input_data['annotations']:
|
|
68
87
|
if ann['image_id'] in image_ids:
|
|
69
88
|
annotations.append(ann)
|
|
70
89
|
|
|
71
|
-
output_data =
|
|
90
|
+
output_data = copy(input_data)
|
|
72
91
|
output_data['images'] = images
|
|
73
92
|
output_data['annotations'] = annotations
|
|
74
93
|
|
|
75
94
|
# Write the output file if requested
|
|
76
95
|
if output_json is not None:
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
96
|
+
if verbose:
|
|
97
|
+
print('Writing output .json to {}'.format(output_json))
|
|
98
|
+
output_dir = os.path.dirname(output_json)
|
|
99
|
+
os.makedirs(output_dir,exist_ok=True)
|
|
100
|
+
with open(output_json,'w') as f:
|
|
101
|
+
json.dump(output_data,f,indent=1)
|
|
102
|
+
|
|
103
|
+
if verbose:
|
|
104
|
+
print('Keeping {} of {} images, {} of {} annotations'.format(
|
|
105
|
+
len(output_data['images']),len(input_data['images']),
|
|
106
|
+
len(output_data['annotations']),len(input_data['annotations'])))
|
|
107
|
+
|
|
80
108
|
return output_data
|
|
81
109
|
|
|
82
110
|
|
|
@@ -20,78 +20,82 @@ preview_folder = os.path.expanduser('~/tmp/island_conservation_preview')
|
|
|
20
20
|
image_directory = os.path.expanduser('~/data/icct/public/')
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
#%%
|
|
23
|
+
#%% Prevent imports during testing
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
d = json.load(f)
|
|
27
|
-
|
|
28
|
-
d['info']
|
|
29
|
-
d['info']['version'] = '1.01'
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
#%% Find locations
|
|
33
|
-
|
|
34
|
-
images = d['images']
|
|
35
|
-
|
|
36
|
-
locations = set()
|
|
25
|
+
if False:
|
|
37
26
|
|
|
38
|
-
|
|
39
|
-
tokens_fn = im['file_name'].split('/')
|
|
40
|
-
tokens_id = im['id'].split('_')
|
|
41
|
-
assert tokens_fn[0] == tokens_id[0]
|
|
42
|
-
assert tokens_fn[1] == tokens_id[1]
|
|
43
|
-
location = tokens_fn[0] + '_' + tokens_fn[1]
|
|
44
|
-
im['location'] = location
|
|
45
|
-
locations.add(location)
|
|
46
|
-
|
|
47
|
-
locations = sorted(list(locations))
|
|
27
|
+
#%% Read input file
|
|
48
28
|
|
|
49
|
-
|
|
50
|
-
|
|
29
|
+
with open(input_fn,'r') as f:
|
|
30
|
+
d = json.load(f)
|
|
31
|
+
|
|
32
|
+
d['info']
|
|
33
|
+
d['info']['version'] = '1.01'
|
|
51
34
|
|
|
52
35
|
|
|
53
|
-
#%%
|
|
54
|
-
|
|
55
|
-
with open(output_fn,'w') as f:
|
|
56
|
-
json.dump(d,f,indent=1)
|
|
36
|
+
#%% Find locations
|
|
57
37
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
38
|
+
images = d['images']
|
|
39
|
+
|
|
40
|
+
locations = set()
|
|
41
|
+
|
|
42
|
+
for i_image,im in tqdm(enumerate(images),total=len(images)):
|
|
43
|
+
tokens_fn = im['file_name'].split('/')
|
|
44
|
+
tokens_id = im['id'].split('_')
|
|
45
|
+
assert tokens_fn[0] == tokens_id[0]
|
|
46
|
+
assert tokens_fn[1] == tokens_id[1]
|
|
47
|
+
location = tokens_fn[0] + '_' + tokens_fn[1]
|
|
48
|
+
im['location'] = location
|
|
49
|
+
locations.add(location)
|
|
50
|
+
|
|
51
|
+
locations = sorted(list(locations))
|
|
52
|
+
|
|
53
|
+
for s in locations:
|
|
54
|
+
print(s)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
#%% Write output file
|
|
58
|
+
|
|
59
|
+
with open(output_fn,'w') as f:
|
|
60
|
+
json.dump(d,f,indent=1)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
#%% Validate .json files
|
|
64
|
+
|
|
65
|
+
from megadetector.data_management.databases import integrity_check_json_db
|
|
66
|
+
|
|
67
|
+
options = integrity_check_json_db.IntegrityCheckOptions()
|
|
68
|
+
options.baseDir = image_directory
|
|
69
|
+
options.bCheckImageSizes = False
|
|
70
|
+
options.bCheckImageExistence = True
|
|
71
|
+
options.bFindUnusedImages = True
|
|
72
|
+
|
|
73
|
+
sorted_categories, data, error_info = integrity_check_json_db.integrity_check_json_db(output_fn, options)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
#%% Preview labels
|
|
77
|
+
|
|
78
|
+
from megadetector.visualization import visualize_db
|
|
79
|
+
|
|
80
|
+
viz_options = visualize_db.DbVizOptions()
|
|
81
|
+
viz_options.num_to_visualize = 2000
|
|
82
|
+
viz_options.trim_to_images_with_bboxes = False
|
|
83
|
+
viz_options.add_search_links = False
|
|
84
|
+
viz_options.sort_by_filename = False
|
|
85
|
+
viz_options.parallelize_rendering = True
|
|
86
|
+
viz_options.classes_to_exclude = ['test']
|
|
87
|
+
html_output_file, image_db = visualize_db.visualize_db(db_path=output_fn,
|
|
88
|
+
output_dir=preview_folder,
|
|
89
|
+
image_base_dir=image_directory,
|
|
90
|
+
options=viz_options)
|
|
91
|
+
|
|
92
|
+
from megadetector.utils import path_utils
|
|
93
|
+
path_utils.open_file(html_output_file)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
#%% Zip output file
|
|
97
|
+
|
|
98
|
+
from megadetector.utils.path_utils import zip_file
|
|
99
|
+
|
|
100
|
+
zip_file(output_fn, verbose=True)
|
|
101
|
+
assert os.path.isfile(output_fn + '.zip')
|
|
@@ -21,127 +21,131 @@ input_file = r'd:\lila\nacti\nacti_metadata.json.1.13\nacti_metadata.json'
|
|
|
21
21
|
output_file = r'g:\temp\nacti_metadata.1.14.json'
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
#%%
|
|
24
|
+
#%% Prevent execution during testing
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
d = json.load(f)
|
|
28
|
-
|
|
29
|
-
assert d['info']['version'] == 1.13
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
#%% Map images to locations (according to the metadata)
|
|
33
|
-
|
|
34
|
-
file_name_to_original_location = {}
|
|
35
|
-
|
|
36
|
-
# im = dataset_labels['images'][0]
|
|
37
|
-
for im in tqdm(d['images']):
|
|
38
|
-
file_name_to_original_location[im['file_name']] = im['location']
|
|
39
|
-
|
|
40
|
-
original_locations = set(file_name_to_original_location.values())
|
|
41
|
-
|
|
42
|
-
print('Found {} locations in the original metadata:'.format(len(original_locations)))
|
|
43
|
-
for loc in original_locations:
|
|
44
|
-
print('[{}]'.format(loc))
|
|
26
|
+
if False:
|
|
45
27
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
28
|
+
#%% Read metadata
|
|
29
|
+
|
|
30
|
+
with open(input_file,'r') as f:
|
|
31
|
+
d = json.load(f)
|
|
32
|
+
|
|
33
|
+
assert d['info']['version'] == 1.13
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
#%% Map images to locations (according to the metadata)
|
|
37
|
+
|
|
38
|
+
file_name_to_original_location = {}
|
|
39
|
+
|
|
40
|
+
# im = dataset_labels['images'][0]
|
|
41
|
+
for im in tqdm(d['images']):
|
|
42
|
+
file_name_to_original_location[im['file_name']] = im['location']
|
|
43
|
+
|
|
44
|
+
original_locations = set(file_name_to_original_location.values())
|
|
45
|
+
|
|
46
|
+
print('Found {} locations in the original metadata:'.format(len(original_locations)))
|
|
47
|
+
for loc in original_locations:
|
|
48
|
+
print('[{}]'.format(loc))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
#%% Map images to new locations
|
|
52
|
+
|
|
53
|
+
def path_to_location(relative_path):
|
|
54
|
+
|
|
55
|
+
relative_path = relative_path.replace('\\','/')
|
|
56
|
+
if relative_path in file_name_to_original_location:
|
|
57
|
+
location_name = file_name_to_original_location[relative_path]
|
|
58
|
+
if location_name == 'San Juan Mntns, Colorado':
|
|
59
|
+
# "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
|
|
60
|
+
tokens = relative_path.split('/')[-1].split('_')
|
|
61
|
+
assert tokens[1].startswith('Unit')
|
|
62
|
+
location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
|
|
63
|
+
elif location_name == 'Lebec, California':
|
|
64
|
+
# "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
|
|
65
|
+
tokens = relative_path.split('/')[-1].split('_')
|
|
66
|
+
assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
|
|
67
|
+
location_name = 'lebec_{}'.format(tokens[0])
|
|
68
|
+
elif location_name == 'Archbold, FL':
|
|
69
|
+
# "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
|
|
70
|
+
tokens = relative_path.split('/')[-1].split('_')
|
|
71
|
+
assert tokens[0].startswith('FL-')
|
|
72
|
+
location_name = 'archbold_{}'.format(tokens[0])
|
|
73
|
+
else:
|
|
74
|
+
assert location_name == ''
|
|
75
|
+
tokens = relative_path.split('/')[-1].split('_')
|
|
76
|
+
if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
|
|
77
|
+
location_name = '{}'.format(tokens[0])
|
|
78
|
+
|
|
69
79
|
else:
|
|
70
|
-
assert location_name == ''
|
|
71
|
-
tokens = relative_path.split('/')[-1].split('_')
|
|
72
|
-
if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
|
|
73
|
-
location_name = '{}'.format(tokens[0])
|
|
74
80
|
|
|
75
|
-
|
|
81
|
+
location_name = 'unknown'
|
|
76
82
|
|
|
77
|
-
|
|
83
|
+
# print('Returning location {} for file {}'.format(location_name,relative_path))
|
|
78
84
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
return location_name
|
|
82
|
-
|
|
83
|
-
file_name_to_updated_location = {}
|
|
84
|
-
updated_location_to_count = defaultdict(int)
|
|
85
|
-
for im in tqdm(d['images']):
|
|
85
|
+
return location_name
|
|
86
86
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
updated_locations = set(file_name_to_updated_location.values())
|
|
96
|
-
|
|
97
|
-
print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
|
|
98
|
-
for loc in updated_location_to_count:
|
|
99
|
-
print('{}: {}'.format(loc,updated_location_to_count[loc]))
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
#%% Re-write metadata
|
|
103
|
-
|
|
104
|
-
for im in d['images']:
|
|
105
|
-
im['location'] = file_name_to_updated_location[im['file_name']]
|
|
106
|
-
d['info']['version'] = 1.14
|
|
107
|
-
|
|
108
|
-
with open(output_file,'w') as f:
|
|
109
|
-
json.dump(d,f,indent=1)
|
|
87
|
+
file_name_to_updated_location = {}
|
|
88
|
+
updated_location_to_count = defaultdict(int)
|
|
89
|
+
for im in tqdm(d['images']):
|
|
90
|
+
|
|
91
|
+
updated_location = path_to_location(im['file_name'])
|
|
92
|
+
file_name_to_updated_location[im['file_name']] = updated_location
|
|
93
|
+
updated_location_to_count[updated_location] += 1
|
|
110
94
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
input_base = r'd:\lila\nacti-unzipped'
|
|
115
|
-
assert os.path.isdir(input_base)
|
|
116
|
-
|
|
117
|
-
location_to_images = defaultdict(list)
|
|
118
|
-
|
|
119
|
-
for im in d['images']:
|
|
120
|
-
location_to_images[im['location']].append(im)
|
|
95
|
+
updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
|
|
96
|
+
key=lambda item: item[1],
|
|
97
|
+
reverse=True)}
|
|
121
98
|
|
|
122
|
-
|
|
123
|
-
import random
|
|
124
|
-
random.seed(0)
|
|
125
|
-
sampling_folder_base = r'g:\temp\nacti_samples'
|
|
126
|
-
|
|
127
|
-
for location in tqdm(location_to_images):
|
|
99
|
+
updated_locations = set(file_name_to_updated_location.values())
|
|
128
100
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
101
|
+
print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
|
|
102
|
+
for loc in updated_location_to_count:
|
|
103
|
+
print('{}: {}'.format(loc,updated_location_to_count[loc]))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
#%% Re-write metadata
|
|
107
|
+
|
|
108
|
+
for im in d['images']:
|
|
109
|
+
im['location'] = file_name_to_updated_location[im['file_name']]
|
|
110
|
+
d['info']['version'] = 1.14
|
|
111
|
+
|
|
112
|
+
with open(output_file,'w') as f:
|
|
113
|
+
json.dump(d,f,indent=1)
|
|
134
114
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
115
|
+
|
|
116
|
+
#%% For each location, sample some random images to make sure they look consistent
|
|
117
|
+
|
|
118
|
+
input_base = r'd:\lila\nacti-unzipped'
|
|
119
|
+
assert os.path.isdir(input_base)
|
|
120
|
+
|
|
121
|
+
location_to_images = defaultdict(list)
|
|
122
|
+
|
|
123
|
+
for im in d['images']:
|
|
124
|
+
location_to_images[im['location']].append(im)
|
|
143
125
|
|
|
144
|
-
|
|
126
|
+
n_to_sample = 10
|
|
127
|
+
import random
|
|
128
|
+
random.seed(0)
|
|
129
|
+
sampling_folder_base = r'g:\temp\nacti_samples'
|
|
145
130
|
|
|
146
|
-
|
|
131
|
+
for location in tqdm(location_to_images):
|
|
132
|
+
|
|
133
|
+
images_this_location = location_to_images[location]
|
|
134
|
+
if len(images_this_location) > n_to_sample:
|
|
135
|
+
images_this_location = random.sample(images_this_location,n_to_sample)
|
|
147
136
|
|
|
137
|
+
for i_image,im in enumerate(images_this_location):
|
|
138
|
+
|
|
139
|
+
fn_relative = im['file_name']
|
|
140
|
+
source_fn_abs = os.path.join(input_base,fn_relative)
|
|
141
|
+
assert os.path.isfile(source_fn_abs)
|
|
142
|
+
ext = os.path.splitext(fn_relative)[1]
|
|
143
|
+
target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
|
|
144
|
+
location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
|
|
145
|
+
os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
|
|
146
|
+
shutil.copyfile(source_fn_abs,target_fn_abs)
|
|
147
|
+
|
|
148
|
+
# ...for each image
|
|
149
|
+
|
|
150
|
+
# ...for each location
|
|
151
|
+
|