megadetector 5.0.23__py3-none-any.whl → 5.0.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (42) hide show
  1. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +2 -3
  2. megadetector/classification/merge_classification_detection_output.py +2 -2
  3. megadetector/data_management/coco_to_labelme.py +2 -1
  4. megadetector/data_management/databases/integrity_check_json_db.py +15 -14
  5. megadetector/data_management/databases/subset_json_db.py +49 -21
  6. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
  7. megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
  8. megadetector/data_management/mewc_to_md.py +340 -0
  9. megadetector/data_management/speciesnet_to_md.py +41 -0
  10. megadetector/data_management/yolo_output_to_md_output.py +15 -8
  11. megadetector/detection/process_video.py +24 -7
  12. megadetector/detection/pytorch_detector.py +841 -160
  13. megadetector/detection/run_detector.py +341 -146
  14. megadetector/detection/run_detector_batch.py +307 -70
  15. megadetector/detection/run_inference_with_yolov5_val.py +61 -4
  16. megadetector/detection/tf_detector.py +6 -1
  17. megadetector/postprocessing/{combine_api_outputs.py → combine_batch_outputs.py} +10 -13
  18. megadetector/postprocessing/compare_batch_results.py +236 -7
  19. megadetector/postprocessing/create_crop_folder.py +358 -0
  20. megadetector/postprocessing/md_to_labelme.py +7 -7
  21. megadetector/postprocessing/md_to_wi.py +40 -0
  22. megadetector/postprocessing/merge_detections.py +1 -1
  23. megadetector/postprocessing/postprocess_batch_results.py +12 -5
  24. megadetector/postprocessing/separate_detections_into_folders.py +32 -4
  25. megadetector/postprocessing/validate_batch_results.py +9 -4
  26. megadetector/utils/ct_utils.py +236 -45
  27. megadetector/utils/directory_listing.py +3 -3
  28. megadetector/utils/gpu_test.py +125 -0
  29. megadetector/utils/md_tests.py +455 -116
  30. megadetector/utils/path_utils.py +43 -2
  31. megadetector/utils/wi_utils.py +2691 -0
  32. megadetector/visualization/visualization_utils.py +95 -18
  33. megadetector/visualization/visualize_db.py +25 -7
  34. megadetector/visualization/visualize_detector_output.py +60 -13
  35. {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/METADATA +11 -23
  36. {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/RECORD +39 -36
  37. {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/WHEEL +1 -1
  38. megadetector/detection/detector_training/__init__.py +0 -0
  39. megadetector/detection/detector_training/model_main_tf2.py +0 -114
  40. megadetector/utils/torch_test.py +0 -32
  41. {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/LICENSE +0 -0
  42. {megadetector-5.0.23.dist-info → megadetector-5.0.25.dist-info}/top_level.txt +0 -0
@@ -14,9 +14,8 @@ import redis
14
14
  import argparse
15
15
  import PIL
16
16
 
17
- from io import BytesIO
18
-
19
- from detection.run_detector import load_detector, convert_to_tf_coords
17
+ from detection.run_detector import load_detector
18
+ from utils.ct_utils import convert_xywh_to_xyxy as convert_to_tf_coords
20
19
  import config
21
20
  import visualization.visualization_utils as vis_utils
22
21
 
@@ -70,7 +70,7 @@ from typing import Any
70
70
  import pandas as pd
71
71
  from tqdm import tqdm
72
72
 
73
- from megadetector.utils.ct_utils import truncate_float
73
+ from megadetector.utils.ct_utils import round_float
74
74
 
75
75
 
76
76
  #%% Example usage
@@ -124,7 +124,7 @@ def row_to_classification_list(row: Mapping[str, Any],
124
124
 
125
125
  # filter out confidences below the threshold, and set precision to 4
126
126
  result = [
127
- (k, truncate_float(conf, precision=4))
127
+ (k, round_float(conf, precision=4))
128
128
  for k, conf in result if conf >= threshold
129
129
  ]
130
130
 
@@ -18,6 +18,7 @@ from tqdm import tqdm
18
18
  from collections import defaultdict
19
19
 
20
20
  from megadetector.visualization.visualization_utils import open_image
21
+ from megadetector.detection.run_detector import FAILURE_IMAGE_OPEN
21
22
 
22
23
 
23
24
  #%% Functions
@@ -145,7 +146,7 @@ def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check
145
146
  except Exception:
146
147
  print('Warning: cannot open image {}'.format(im_full_path))
147
148
  if 'failure' not in im:
148
- im['failure'] = 'Failure image access'
149
+ im['failure'] = FAILURE_IMAGE_OPEN
149
150
 
150
151
  # ...if we need to read w/h information
151
152
 
@@ -86,7 +86,7 @@ def _check_image_existence_and_size(image,options=None):
86
86
  options (IntegrityCheckOptions): parameters impacting validation
87
87
 
88
88
  Returns:
89
- bool: whether this image passes validation
89
+ str: None if this image passes validation, otherwise an error string
90
90
  """
91
91
 
92
92
  if options is None:
@@ -96,23 +96,23 @@ def _check_image_existence_and_size(image,options=None):
96
96
 
97
97
  filePath = os.path.join(options.baseDir,image['file_name'])
98
98
  if not os.path.isfile(filePath):
99
- # print('Image path {} does not exist'.format(filePath))
100
- return False
99
+ s = 'Image path {} does not exist'.format(filePath)
100
+ return s
101
101
 
102
102
  if options.bCheckImageSizes:
103
103
  if not ('height' in image and 'width' in image):
104
- print('Missing image size in {}'.format(filePath))
105
- return False
104
+ s = 'Missing image size in {}'.format(filePath)
105
+ return s
106
106
 
107
107
  # width, height = Image.open(filePath).size
108
108
  pil_im = open_image(filePath)
109
109
  width,height = pil_im.size
110
110
  if (not (width == image['width'] and height == image['height'])):
111
- print('Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
112
- image['id'], filePath, image['width'], image['height'], width, height))
113
- return False
111
+ s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
112
+ image['id'], filePath, image['width'], image['height'], width, height)
113
+ return s
114
114
 
115
- return True
115
+ return None
116
116
 
117
117
 
118
118
  def integrity_check_json_db(jsonFile, options=None):
@@ -287,6 +287,7 @@ def integrity_check_json_db(jsonFile, options=None):
287
287
  if fn_relative not in image_paths_in_json:
288
288
  unused_files.append(fn_relative)
289
289
 
290
+ # List of (filename,error_string) tuples
290
291
  validation_errors = []
291
292
 
292
293
  # If we're checking image existence but not image size, we don't need to read the images
@@ -298,8 +299,8 @@ def integrity_check_json_db(jsonFile, options=None):
298
299
  image_paths_relative_set = set(image_paths_relative)
299
300
 
300
301
  for im in images:
301
- if im['file_name'] not in image_paths_relative_set:
302
- validation_errors.append(im['file_name'])
302
+ if im['file_name'] not in image_paths_relative_set:
303
+ validation_errors.append((im['file_name'],'not found in relative path list'))
303
304
 
304
305
  # If we're checking image size, we need to read the images
305
306
  if options.bCheckImageSizes:
@@ -321,12 +322,12 @@ def integrity_check_json_db(jsonFile, options=None):
321
322
  results = tqdm(pool.imap(_check_image_existence_and_size, images), total=len(images))
322
323
  else:
323
324
  results = []
324
- for im in tqdm(images):
325
+ for im in tqdm(images):
325
326
  results.append(_check_image_existence_and_size(im,options))
326
327
 
327
328
  for i_image,result in enumerate(results):
328
- if result is not None:
329
- validation_errors.append(images[i_image]['file_name'])
329
+ if result is not None:
330
+ validation_errors.append(images[i_image]['file_name'],result)
330
331
 
331
332
  # ...for each image
332
333
 
@@ -12,16 +12,18 @@ subset_json_detector_output.py.
12
12
 
13
13
  #%% Constants and imports
14
14
 
15
+ import os
15
16
  import sys
16
17
  import json
17
18
  import argparse
18
19
 
19
20
  from tqdm import tqdm
21
+ from copy import copy
20
22
 
21
23
 
22
24
  #%% Functions
23
25
 
24
- def subset_json_db(input_json, query, output_json=None, ignore_case=False):
26
+ def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbose=False):
25
27
  """
26
28
  Given a json file (or dictionary already loaded from a json file), produce a new
27
29
  database containing only the images whose filenames contain the string 'query',
@@ -29,54 +31,80 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False):
29
31
 
30
32
  Args:
31
33
  input_json (str): COCO Camera Traps .json file to load, or an already-loaded dict
32
- query (str): string to query for, only include images in the output whose filenames
33
- contain this string.
34
+ query (str or list): string to query for, only include images in the output whose filenames
35
+ contain this string. If this is a list, test for exact matches.
34
36
  output_json (str, optional): file to write the resulting .json file to
35
37
  ignore_case (bool, optional): whether to perform a case-insensitive search for [query]
38
+ verbose (bool, optional): enable additional debug output
36
39
 
37
40
  Returns:
38
- dict: possibly-modified CCT dictionary
41
+ dict: CCT dictionary containing a subset of the images and annotations in the input dict
39
42
  """
40
-
41
- if ignore_case:
42
- query = query.lower()
43
43
 
44
44
  # Load the input file if necessary
45
45
  if isinstance(input_json,str):
46
46
  print('Loading input .json...')
47
47
  with open(input_json, 'r') as f:
48
- data = json.load(f)
48
+ input_data = json.load(f)
49
49
  else:
50
- data = input_json
50
+ input_data = input_json
51
51
 
52
52
  # Find images matching the query
53
53
  images = []
54
- image_ids = set()
55
54
 
56
- for im in tqdm(data['images']):
57
- fn = im['file_name']
55
+ if isinstance(query,str):
56
+
57
+ if ignore_case:
58
+ query = query.lower()
59
+
60
+ for im in tqdm(input_data['images']):
61
+ fn = im['file_name']
62
+ if ignore_case:
63
+ fn = fn.lower()
64
+ if query in fn:
65
+ images.append(im)
66
+
67
+ else:
68
+
69
+ query = set(query)
70
+
58
71
  if ignore_case:
59
- fn = fn.lower()
60
- if query in fn:
61
- images.append(im)
62
- image_ids.add(im['id'])
72
+ query = set([s.lower() for s in query])
73
+
74
+ for im in input_data['images']:
75
+ fn = im['file_name']
76
+ if ignore_case:
77
+ fn = fn.lower()
78
+ if fn in query:
79
+ images.append(im)
80
+
81
+ image_ids = set([im['id'] for im in images])
63
82
 
64
83
  # Find annotations referring to those images
65
84
  annotations = []
66
85
 
67
- for ann in tqdm(data['annotations']):
86
+ for ann in input_data['annotations']:
68
87
  if ann['image_id'] in image_ids:
69
88
  annotations.append(ann)
70
89
 
71
- output_data = data
90
+ output_data = copy(input_data)
72
91
  output_data['images'] = images
73
92
  output_data['annotations'] = annotations
74
93
 
75
94
  # Write the output file if requested
76
95
  if output_json is not None:
77
- print('Writing output .json...')
78
- json.dump(output_data,open(output_json,'w'),indent=1)
79
-
96
+ if verbose:
97
+ print('Writing output .json to {}'.format(output_json))
98
+ output_dir = os.path.dirname(output_json)
99
+ os.makedirs(output_dir,exist_ok=True)
100
+ with open(output_json,'w') as f:
101
+ json.dump(output_data,f,indent=1)
102
+
103
+ if verbose:
104
+ print('Keeping {} of {} images, {} of {} annotations'.format(
105
+ len(output_data['images']),len(input_data['images']),
106
+ len(output_data['annotations']),len(input_data['annotations'])))
107
+
80
108
  return output_data
81
109
 
82
110
 
@@ -20,78 +20,82 @@ preview_folder = os.path.expanduser('~/tmp/island_conservation_preview')
20
20
  image_directory = os.path.expanduser('~/data/icct/public/')
21
21
 
22
22
 
23
- #%% Read input file
23
+ #%% Prevent imports during testing
24
24
 
25
- with open(input_fn,'r') as f:
26
- d = json.load(f)
27
-
28
- d['info']
29
- d['info']['version'] = '1.01'
30
-
31
-
32
- #%% Find locations
33
-
34
- images = d['images']
35
-
36
- locations = set()
25
+ if False:
37
26
 
38
- for i_image,im in tqdm(enumerate(images),total=len(images)):
39
- tokens_fn = im['file_name'].split('/')
40
- tokens_id = im['id'].split('_')
41
- assert tokens_fn[0] == tokens_id[0]
42
- assert tokens_fn[1] == tokens_id[1]
43
- location = tokens_fn[0] + '_' + tokens_fn[1]
44
- im['location'] = location
45
- locations.add(location)
46
-
47
- locations = sorted(list(locations))
27
+ #%% Read input file
48
28
 
49
- for s in locations:
50
- print(s)
29
+ with open(input_fn,'r') as f:
30
+ d = json.load(f)
31
+
32
+ d['info']
33
+ d['info']['version'] = '1.01'
51
34
 
52
35
 
53
- #%% Write output file
54
-
55
- with open(output_fn,'w') as f:
56
- json.dump(d,f,indent=1)
36
+ #%% Find locations
57
37
 
58
-
59
- #%% Validate .json files
60
-
61
- from megadetector.data_management.databases import integrity_check_json_db
62
-
63
- options = integrity_check_json_db.IntegrityCheckOptions()
64
- options.baseDir = image_directory
65
- options.bCheckImageSizes = False
66
- options.bCheckImageExistence = True
67
- options.bFindUnusedImages = True
68
-
69
- sorted_categories, data, error_info = integrity_check_json_db.integrity_check_json_db(output_fn, options)
70
-
71
-
72
- #%% Preview labels
73
-
74
- from megadetector.visualization import visualize_db
75
-
76
- viz_options = visualize_db.DbVizOptions()
77
- viz_options.num_to_visualize = 2000
78
- viz_options.trim_to_images_with_bboxes = False
79
- viz_options.add_search_links = False
80
- viz_options.sort_by_filename = False
81
- viz_options.parallelize_rendering = True
82
- viz_options.classes_to_exclude = ['test']
83
- html_output_file, image_db = visualize_db.visualize_db(db_path=output_fn,
84
- output_dir=preview_folder,
85
- image_base_dir=image_directory,
86
- options=viz_options)
87
-
88
- from megadetector.utils import path_utils
89
- path_utils.open_file(html_output_file)
90
-
91
-
92
- #%% Zip output file
93
-
94
- from megadetector.utils.path_utils import zip_file
95
-
96
- zip_file(output_fn, verbose=True)
97
- assert os.path.isfile(output_fn + '.zip')
38
+ images = d['images']
39
+
40
+ locations = set()
41
+
42
+ for i_image,im in tqdm(enumerate(images),total=len(images)):
43
+ tokens_fn = im['file_name'].split('/')
44
+ tokens_id = im['id'].split('_')
45
+ assert tokens_fn[0] == tokens_id[0]
46
+ assert tokens_fn[1] == tokens_id[1]
47
+ location = tokens_fn[0] + '_' + tokens_fn[1]
48
+ im['location'] = location
49
+ locations.add(location)
50
+
51
+ locations = sorted(list(locations))
52
+
53
+ for s in locations:
54
+ print(s)
55
+
56
+
57
+ #%% Write output file
58
+
59
+ with open(output_fn,'w') as f:
60
+ json.dump(d,f,indent=1)
61
+
62
+
63
+ #%% Validate .json files
64
+
65
+ from megadetector.data_management.databases import integrity_check_json_db
66
+
67
+ options = integrity_check_json_db.IntegrityCheckOptions()
68
+ options.baseDir = image_directory
69
+ options.bCheckImageSizes = False
70
+ options.bCheckImageExistence = True
71
+ options.bFindUnusedImages = True
72
+
73
+ sorted_categories, data, error_info = integrity_check_json_db.integrity_check_json_db(output_fn, options)
74
+
75
+
76
+ #%% Preview labels
77
+
78
+ from megadetector.visualization import visualize_db
79
+
80
+ viz_options = visualize_db.DbVizOptions()
81
+ viz_options.num_to_visualize = 2000
82
+ viz_options.trim_to_images_with_bboxes = False
83
+ viz_options.add_search_links = False
84
+ viz_options.sort_by_filename = False
85
+ viz_options.parallelize_rendering = True
86
+ viz_options.classes_to_exclude = ['test']
87
+ html_output_file, image_db = visualize_db.visualize_db(db_path=output_fn,
88
+ output_dir=preview_folder,
89
+ image_base_dir=image_directory,
90
+ options=viz_options)
91
+
92
+ from megadetector.utils import path_utils
93
+ path_utils.open_file(html_output_file)
94
+
95
+
96
+ #%% Zip output file
97
+
98
+ from megadetector.utils.path_utils import zip_file
99
+
100
+ zip_file(output_fn, verbose=True)
101
+ assert os.path.isfile(output_fn + '.zip')
@@ -21,127 +21,131 @@ input_file = r'd:\lila\nacti\nacti_metadata.json.1.13\nacti_metadata.json'
21
21
  output_file = r'g:\temp\nacti_metadata.1.14.json'
22
22
 
23
23
 
24
- #%% Read metadata
24
+ #%% Prevent execution during testing
25
25
 
26
- with open(input_file,'r') as f:
27
- d = json.load(f)
28
-
29
- assert d['info']['version'] == 1.13
30
-
31
-
32
- #%% Map images to locations (according to the metadata)
33
-
34
- file_name_to_original_location = {}
35
-
36
- # im = dataset_labels['images'][0]
37
- for im in tqdm(d['images']):
38
- file_name_to_original_location[im['file_name']] = im['location']
39
-
40
- original_locations = set(file_name_to_original_location.values())
41
-
42
- print('Found {} locations in the original metadata:'.format(len(original_locations)))
43
- for loc in original_locations:
44
- print('[{}]'.format(loc))
26
+ if False:
45
27
 
46
-
47
- #%% Map images to new locations
48
-
49
- def path_to_location(relative_path):
50
-
51
- relative_path = relative_path.replace('\\','/')
52
- if relative_path in file_name_to_original_location:
53
- location_name = file_name_to_original_location[relative_path]
54
- if location_name == 'San Juan Mntns, Colorado':
55
- # "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
56
- tokens = relative_path.split('/')[-1].split('_')
57
- assert tokens[1].startswith('Unit')
58
- location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
59
- elif location_name == 'Lebec, California':
60
- # "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
61
- tokens = relative_path.split('/')[-1].split('_')
62
- assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
63
- location_name = 'lebec_{}'.format(tokens[0])
64
- elif location_name == 'Archbold, FL':
65
- # "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
66
- tokens = relative_path.split('/')[-1].split('_')
67
- assert tokens[0].startswith('FL-')
68
- location_name = 'archbold_{}'.format(tokens[0])
28
+ #%% Read metadata
29
+
30
+ with open(input_file,'r') as f:
31
+ d = json.load(f)
32
+
33
+ assert d['info']['version'] == 1.13
34
+
35
+
36
+ #%% Map images to locations (according to the metadata)
37
+
38
+ file_name_to_original_location = {}
39
+
40
+ # im = dataset_labels['images'][0]
41
+ for im in tqdm(d['images']):
42
+ file_name_to_original_location[im['file_name']] = im['location']
43
+
44
+ original_locations = set(file_name_to_original_location.values())
45
+
46
+ print('Found {} locations in the original metadata:'.format(len(original_locations)))
47
+ for loc in original_locations:
48
+ print('[{}]'.format(loc))
49
+
50
+
51
+ #%% Map images to new locations
52
+
53
+ def path_to_location(relative_path):
54
+
55
+ relative_path = relative_path.replace('\\','/')
56
+ if relative_path in file_name_to_original_location:
57
+ location_name = file_name_to_original_location[relative_path]
58
+ if location_name == 'San Juan Mntns, Colorado':
59
+ # "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
60
+ tokens = relative_path.split('/')[-1].split('_')
61
+ assert tokens[1].startswith('Unit')
62
+ location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
63
+ elif location_name == 'Lebec, California':
64
+ # "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
65
+ tokens = relative_path.split('/')[-1].split('_')
66
+ assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
67
+ location_name = 'lebec_{}'.format(tokens[0])
68
+ elif location_name == 'Archbold, FL':
69
+ # "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
70
+ tokens = relative_path.split('/')[-1].split('_')
71
+ assert tokens[0].startswith('FL-')
72
+ location_name = 'archbold_{}'.format(tokens[0])
73
+ else:
74
+ assert location_name == ''
75
+ tokens = relative_path.split('/')[-1].split('_')
76
+ if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
77
+ location_name = '{}'.format(tokens[0])
78
+
69
79
  else:
70
- assert location_name == ''
71
- tokens = relative_path.split('/')[-1].split('_')
72
- if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
73
- location_name = '{}'.format(tokens[0])
74
80
 
75
- else:
81
+ location_name = 'unknown'
76
82
 
77
- location_name = 'unknown'
83
+ # print('Returning location {} for file {}'.format(location_name,relative_path))
78
84
 
79
- # print('Returning location {} for file {}'.format(location_name,relative_path))
80
-
81
- return location_name
82
-
83
- file_name_to_updated_location = {}
84
- updated_location_to_count = defaultdict(int)
85
- for im in tqdm(d['images']):
85
+ return location_name
86
86
 
87
- updated_location = path_to_location(im['file_name'])
88
- file_name_to_updated_location[im['file_name']] = updated_location
89
- updated_location_to_count[updated_location] += 1
90
-
91
- updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
92
- key=lambda item: item[1],
93
- reverse=True)}
94
-
95
- updated_locations = set(file_name_to_updated_location.values())
96
-
97
- print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
98
- for loc in updated_location_to_count:
99
- print('{}: {}'.format(loc,updated_location_to_count[loc]))
100
-
101
-
102
- #%% Re-write metadata
103
-
104
- for im in d['images']:
105
- im['location'] = file_name_to_updated_location[im['file_name']]
106
- d['info']['version'] = 1.14
107
-
108
- with open(output_file,'w') as f:
109
- json.dump(d,f,indent=1)
87
+ file_name_to_updated_location = {}
88
+ updated_location_to_count = defaultdict(int)
89
+ for im in tqdm(d['images']):
90
+
91
+ updated_location = path_to_location(im['file_name'])
92
+ file_name_to_updated_location[im['file_name']] = updated_location
93
+ updated_location_to_count[updated_location] += 1
110
94
 
111
-
112
- #%% For each location, sample some random images to make sure they look consistent
113
-
114
- input_base = r'd:\lila\nacti-unzipped'
115
- assert os.path.isdir(input_base)
116
-
117
- location_to_images = defaultdict(list)
118
-
119
- for im in d['images']:
120
- location_to_images[im['location']].append(im)
95
+ updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
96
+ key=lambda item: item[1],
97
+ reverse=True)}
121
98
 
122
- n_to_sample = 10
123
- import random
124
- random.seed(0)
125
- sampling_folder_base = r'g:\temp\nacti_samples'
126
-
127
- for location in tqdm(location_to_images):
99
+ updated_locations = set(file_name_to_updated_location.values())
128
100
 
129
- images_this_location = location_to_images[location]
130
- if len(images_this_location) > n_to_sample:
131
- images_this_location = random.sample(images_this_location,n_to_sample)
132
-
133
- for i_image,im in enumerate(images_this_location):
101
+ print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
102
+ for loc in updated_location_to_count:
103
+ print('{}: {}'.format(loc,updated_location_to_count[loc]))
104
+
105
+
106
+ #%% Re-write metadata
107
+
108
+ for im in d['images']:
109
+ im['location'] = file_name_to_updated_location[im['file_name']]
110
+ d['info']['version'] = 1.14
111
+
112
+ with open(output_file,'w') as f:
113
+ json.dump(d,f,indent=1)
134
114
 
135
- fn_relative = im['file_name']
136
- source_fn_abs = os.path.join(input_base,fn_relative)
137
- assert os.path.isfile(source_fn_abs)
138
- ext = os.path.splitext(fn_relative)[1]
139
- target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
140
- location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
141
- os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
142
- shutil.copyfile(source_fn_abs,target_fn_abs)
115
+
116
+ #%% For each location, sample some random images to make sure they look consistent
117
+
118
+ input_base = r'd:\lila\nacti-unzipped'
119
+ assert os.path.isdir(input_base)
120
+
121
+ location_to_images = defaultdict(list)
122
+
123
+ for im in d['images']:
124
+ location_to_images[im['location']].append(im)
143
125
 
144
- # ...for each image
126
+ n_to_sample = 10
127
+ import random
128
+ random.seed(0)
129
+ sampling_folder_base = r'g:\temp\nacti_samples'
145
130
 
146
- # ...for each location
131
+ for location in tqdm(location_to_images):
132
+
133
+ images_this_location = location_to_images[location]
134
+ if len(images_this_location) > n_to_sample:
135
+ images_this_location = random.sample(images_this_location,n_to_sample)
147
136
 
137
+ for i_image,im in enumerate(images_this_location):
138
+
139
+ fn_relative = im['file_name']
140
+ source_fn_abs = os.path.join(input_base,fn_relative)
141
+ assert os.path.isfile(source_fn_abs)
142
+ ext = os.path.splitext(fn_relative)[1]
143
+ target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
144
+ location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
145
+ os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
146
+ shutil.copyfile(source_fn_abs,target_fn_abs)
147
+
148
+ # ...for each image
149
+
150
+ # ...for each location
151
+