megadetector 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (41) hide show
  1. megadetector/data_management/cct_json_utils.py +15 -2
  2. megadetector/data_management/coco_to_yolo.py +53 -31
  3. megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
  4. megadetector/data_management/databases/integrity_check_json_db.py +2 -2
  5. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
  6. megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
  7. megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
  8. megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
  9. megadetector/data_management/remap_coco_categories.py +60 -11
  10. megadetector/data_management/{wi_to_md.py → speciesnet_to_md.py} +2 -2
  11. megadetector/data_management/yolo_to_coco.py +45 -15
  12. megadetector/detection/run_detector.py +1 -0
  13. megadetector/detection/run_detector_batch.py +5 -4
  14. megadetector/postprocessing/classification_postprocessing.py +788 -524
  15. megadetector/postprocessing/compare_batch_results.py +176 -9
  16. megadetector/postprocessing/create_crop_folder.py +420 -0
  17. megadetector/postprocessing/load_api_results.py +4 -1
  18. megadetector/postprocessing/md_to_coco.py +1 -1
  19. megadetector/postprocessing/postprocess_batch_results.py +158 -44
  20. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
  21. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  22. megadetector/postprocessing/separate_detections_into_folders.py +20 -4
  23. megadetector/postprocessing/subset_json_detector_output.py +180 -15
  24. megadetector/postprocessing/validate_batch_results.py +13 -5
  25. megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
  26. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
  27. megadetector/taxonomy_mapping/species_lookup.py +45 -2
  28. megadetector/utils/ct_utils.py +76 -3
  29. megadetector/utils/directory_listing.py +4 -4
  30. megadetector/utils/gpu_test.py +21 -3
  31. megadetector/utils/md_tests.py +142 -49
  32. megadetector/utils/path_utils.py +342 -19
  33. megadetector/utils/wi_utils.py +1286 -212
  34. megadetector/visualization/visualization_utils.py +16 -4
  35. megadetector/visualization/visualize_db.py +1 -1
  36. megadetector/visualization/visualize_detector_output.py +1 -4
  37. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/METADATA +6 -3
  38. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/RECORD +41 -40
  39. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/WHEEL +1 -1
  40. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info/licenses}/LICENSE +0 -0
  41. {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/top_level.txt +0 -0
@@ -300,7 +300,10 @@ class SequenceOptions:
300
300
  def __init__(self):
301
301
  #: Images separated by <= this duration will be grouped into the same sequence.
302
302
  self.episode_interval_seconds = 60.0
303
-
303
+
304
+ #: How to handle invalid datetimes: 'error' or 'none'
305
+ self.datetime_conversion_failure_behavior = 'none'
306
+
304
307
 
305
308
  #%% Functions
306
309
 
@@ -445,7 +448,17 @@ def create_sequences(image_info,options=None):
445
448
  raise ValueError('Unrecognized type for [image_info]')
446
449
 
447
450
  # Modifies the images in place
448
- _ = parse_datetimes_from_cct_image_list(image_info)
451
+ _ = parse_datetimes_from_cct_image_list(image_info,
452
+ conversion_failure_behavior=options.datetime_conversion_failure_behavior)
453
+
454
+ n_invalid_datetimes = 0
455
+ for im in image_info:
456
+ if not isinstance(im['datetime'],datetime.datetime):
457
+ assert im['datetime'] is None, 'At this point, datetimes should be valid or None'
458
+ n_invalid_datetimes += 1
459
+ if n_invalid_datetimes > 0:
460
+ print('Warning: {} of {} images have invalid datetimes'.format(
461
+ n_invalid_datetimes,len(image_info)))
449
462
 
450
463
  # Find all unique locations
451
464
  locations = set()
@@ -47,6 +47,9 @@ def write_yolo_dataset_file(yolo_dataset_file,
47
47
  class_list (list or str): an ordered list of class names (the first item will be class 0,
48
48
  etc.), or the name of a text file containing an ordered list of class names (one per
49
49
  line, starting from class zero).
50
+ train_folder_relative (str, optional): train folder name, used only to populate dataset.yaml
51
+ val_folder_relative (str, optional): val folder name, used only to populate dataset.yaml
52
+ test_folder_relative (str, optional): test folder name, used only to populate dataset.yaml
50
53
  """
51
54
 
52
55
  # Read class names
@@ -97,7 +100,7 @@ def coco_to_yolo(input_image_folder,
97
100
  category_names_to_exclude=None,
98
101
  category_names_to_include=None,
99
102
  write_output=True,
100
- flatten_paths=True):
103
+ flatten_paths=False):
101
104
  """
102
105
  Converts a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
103
106
  dataset to a single folder in the process.
@@ -116,17 +119,21 @@ def coco_to_yolo(input_image_folder,
116
119
  images are left alone.
117
120
  source_format (str, optional): can be 'coco' (default) or 'coco_camera_traps'. The only difference
118
121
  is that when source_format is 'coco_camera_traps', we treat an image with a non-bbox
119
- annotation with a category id of 0 as a special case, i.e. that's how an empty image
120
- is indicated. The original COCO standard is a little ambiguous on this issue. If
121
- source_format is 'coco', we either treat images as empty or error, depending on the value
122
- of [allow_empty_annotations]. [allow_empty_annotations] has no effect if source_format is
123
- 'coco_camera_traps'.
122
+ annotation as a special case, i.e. that's how an empty image is indicated. The original
123
+ COCO standard is a little ambiguous on this issue. If source_format is 'coco', we
124
+ either treat images as empty or error, depending on the value of [allow_empty_annotations].
125
+ [allow_empty_annotations] has no effect if source_format is 'coco_camera_traps'.
126
+ overwrite_images (bool, optional): over-write images in the output folder if they exist
124
127
  create_image_and_label_folder (bool, optional): whether to create separate folders called 'images' and
125
128
  'labels' in the YOLO output folder. If create_image_and_label_folders is False,
126
129
  a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
127
130
  be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
128
131
  images/a#b#c#image001.jpg, and the corresponding text file will be
129
132
  labels/a#b#c#image001.txt.
133
+ class_file_name (str, optional): .txt file (relative to the output folder) that we should
134
+ populate with a list of classes (or None to omit)
135
+ allow_empty_annotations (bool, optional): if this is False and [source_format] is 'coco',
136
+ we'll error on annotations that have no 'bbox' field
130
137
  clip_boxes (bool, optional): whether to clip bounding box coordinates to the range [0,1] before
131
138
  converting to YOLO xywh format
132
139
  image_id_to_output_image_json_file (str, optional): an optional *output* file, to which we will write
@@ -139,12 +146,14 @@ def coco_to_yolo(input_image_folder,
139
146
  category_names_to_exclude (str, optional): category names that should not be represented in the
140
147
  YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
141
148
  you would want to specify this and [category_names_to_include].
142
- category_names_to_include (str, optional): allow-list of category names that should be represented in the
143
- YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
144
- you would want to specify this and [category_names_to_exclude].
149
+ category_names_to_include (str, optional): allow-list of category names that should be represented
150
+ in the YOLO output; only impacts annotations, does not prevent copying images. There's almost
151
+ no reason you would want to specify this and [category_names_to_exclude].
145
152
  write_output (bool, optional): determines whether we actually copy images and write annotations;
146
153
  setting this to False mostly puts this function in "dry run" "mode. The class list
147
154
  file is written regardless of the value of write_output.
155
+ flatten_paths (bool, optional): replace /'s in image filenames with [path_replacement_char],
156
+ which ensures that the output folder is a single flat folder.
148
157
 
149
158
  Returns:
150
159
  dict: information about the coco --> yolo mapping, containing at least the fields:
@@ -313,9 +322,9 @@ def coco_to_yolo(input_image_folder,
313
322
 
314
323
  elif source_format == 'coco_camera_traps':
315
324
 
316
- # We allow empty bbox lists in COCO camera traps; this is typically a negative
317
- # example in a dataset that has bounding boxes, and 0 is typically the empty
318
- # category.
325
+ # We allow empty bbox lists in COCO camera traps files; this is typically a
326
+ # negative example in a dataset that has bounding boxes, and 0 is typically
327
+ # the empty category, which is typically 0.
319
328
  if ann['category_id'] != 0:
320
329
  if not printed_empty_annotation_warning:
321
330
  printed_empty_annotation_warning = True
@@ -429,13 +438,14 @@ def coco_to_yolo(input_image_folder,
429
438
 
430
439
  print('Generating class list')
431
440
 
432
- class_list_filename = os.path.join(output_folder,class_file_name)
433
- with open(class_list_filename, 'w') as f:
434
- print('Writing class list to {}'.format(class_list_filename))
435
- for i_class in range(0,len(yolo_id_to_name)):
436
- # Category IDs should range from 0..N-1
437
- assert i_class in yolo_id_to_name
438
- f.write(yolo_id_to_name[i_class] + '\n')
441
+ if class_file_name is not None:
442
+ class_list_filename = os.path.join(output_folder,class_file_name)
443
+ with open(class_list_filename, 'w') as f:
444
+ print('Writing class list to {}'.format(class_list_filename))
445
+ for i_class in range(0,len(yolo_id_to_name)):
446
+ # Category IDs should range from 0..N-1
447
+ assert i_class in yolo_id_to_name
448
+ f.write(yolo_id_to_name[i_class] + '\n')
439
449
 
440
450
  if image_id_to_output_image_json_file is not None:
441
451
  print('Writing image ID mapping to {}'.format(image_id_to_output_image_json_file))
@@ -457,6 +467,9 @@ def coco_to_yolo(input_image_folder,
457
467
 
458
468
  source_image_to_dest_image = {}
459
469
 
470
+ label_files_written = []
471
+ n_boxes_written = 0
472
+
460
473
  # TODO: parallelize this loop
461
474
  #
462
475
  # output_info = images_to_copy[0]
@@ -471,6 +484,7 @@ def coco_to_yolo(input_image_folder,
471
484
 
472
485
  source_image_to_dest_image[source_image] = dest_image
473
486
 
487
+ # Copy the image if necessary
474
488
  if write_output:
475
489
 
476
490
  os.makedirs(os.path.dirname(dest_image),exist_ok=True)
@@ -482,17 +496,24 @@ def coco_to_yolo(input_image_folder,
482
496
  if (not os.path.isfile(dest_image)) or (overwrite_images):
483
497
  shutil.copyfile(source_image,dest_image)
484
498
 
485
- bboxes = output_info['bboxes']
499
+ bboxes = output_info['bboxes']
500
+
501
+ # Write the annotation file if necessary
502
+ #
503
+ # Only write an annotation file if there are bounding boxes. Images with
504
+ # no .txt files are treated as hard negatives, at least by YOLOv5:
505
+ #
506
+ # https://github.com/ultralytics/yolov5/issues/3218
507
+ #
508
+ # I think this is also true for images with empty .txt files, but
509
+ # I'm using the convention suggested on that issue, i.e. hard
510
+ # negatives are expressed as images without .txt files.
511
+ if len(bboxes) > 0:
486
512
 
487
- # Only write an annotation file if there are bounding boxes. Images with
488
- # no .txt files are treated as hard negatives, at least by YOLOv5:
489
- #
490
- # https://github.com/ultralytics/yolov5/issues/3218
491
- #
492
- # I think this is also true for images with empty .txt files, but
493
- # I'm using the convention suggested on that issue, i.e. hard
494
- # negatives are expressed as images without .txt files.
495
- if len(bboxes) > 0:
513
+ n_boxes_written += len(bboxes)
514
+ label_files_written.append(dest_txt)
515
+
516
+ if write_output:
496
517
 
497
518
  with open(dest_txt,'w') as f:
498
519
 
@@ -501,8 +522,7 @@ def coco_to_yolo(input_image_folder,
501
522
  assert len(bbox) == 5
502
523
  s = '{} {} {} {} {}'.format(bbox[0],bbox[1],bbox[2],bbox[3],bbox[4])
503
524
  f.write(s + '\n')
504
-
505
- # ...if we're actually writing output
525
+
506
526
 
507
527
  # ...for each image
508
528
 
@@ -510,6 +530,8 @@ def coco_to_yolo(input_image_folder,
510
530
  coco_to_yolo_info['class_list_filename'] = class_list_filename
511
531
  coco_to_yolo_info['source_image_to_dest_image'] = source_image_to_dest_image
512
532
  coco_to_yolo_info['coco_id_to_yolo_id'] = coco_id_to_yolo_id
533
+ coco_to_yolo_info['label_files_written'] = label_files_written
534
+ coco_to_yolo_info['n_boxes_written'] = n_boxes_written
513
535
 
514
536
  return coco_to_yolo_info
515
537
 
@@ -24,8 +24,10 @@ import sys
24
24
 
25
25
  #%% Merge functions
26
26
 
27
- def combine_cct_files(input_files, output_file=None, require_uniqueness=True,
28
- filename_prefixes=None):
27
+ def combine_cct_files(input_files,
28
+ output_file=None,
29
+ require_uniqueness=True,
30
+ filename_prefixes=None):
29
31
  """
30
32
  Merges the list of COCO Camera Traps files [input_files] into a single
31
33
  dictionary, optionally writing the result to [output_file].
@@ -33,8 +35,10 @@ def combine_cct_files(input_files, output_file=None, require_uniqueness=True,
33
35
  Args:
34
36
  input_files (list): paths to CCT .json files
35
37
  output_file (str, optional): path to write merged .json file
36
- require_uniqueness (bool): whether to require that the images in
38
+ require_uniqueness (bool, optional): whether to require that the images in
37
39
  each input_dict be unique
40
+ filename_prefixes (dict, optional): dict mapping input filenames to strings
41
+ that should be prepended to image filenames from that source
38
42
 
39
43
  Returns:
40
44
  dict: the merged COCO-formatted .json dict
@@ -327,7 +327,7 @@ def integrity_check_json_db(jsonFile, options=None):
327
327
 
328
328
  for i_image,result in enumerate(results):
329
329
  if result is not None:
330
- validation_errors.append(images[i_image]['file_name'],result)
330
+ validation_errors.append((images[i_image]['file_name'],result))
331
331
 
332
332
  # ...for each image
333
333
 
@@ -393,7 +393,7 @@ def integrity_check_json_db(jsonFile, options=None):
393
393
  elif image['_count'] > 1:
394
394
  nMultiAnnotated += 1
395
395
 
396
- print('Found {} unannotated images, {} images with multiple annotations'.format(
396
+ print('\nFound {} unannotated images, {} images with multiple annotations'.format(
397
397
  nUnannotated,nMultiAnnotated))
398
398
 
399
399
  if (len(base_dir) > 0) and options.bFindUnusedImages:
@@ -20,78 +20,82 @@ preview_folder = os.path.expanduser('~/tmp/island_conservation_preview')
20
20
  image_directory = os.path.expanduser('~/data/icct/public/')
21
21
 
22
22
 
23
- #%% Read input file
23
+ #%% Prevent imports during testing
24
24
 
25
- with open(input_fn,'r') as f:
26
- d = json.load(f)
27
-
28
- d['info']
29
- d['info']['version'] = '1.01'
30
-
31
-
32
- #%% Find locations
33
-
34
- images = d['images']
35
-
36
- locations = set()
25
+ if False:
37
26
 
38
- for i_image,im in tqdm(enumerate(images),total=len(images)):
39
- tokens_fn = im['file_name'].split('/')
40
- tokens_id = im['id'].split('_')
41
- assert tokens_fn[0] == tokens_id[0]
42
- assert tokens_fn[1] == tokens_id[1]
43
- location = tokens_fn[0] + '_' + tokens_fn[1]
44
- im['location'] = location
45
- locations.add(location)
46
-
47
- locations = sorted(list(locations))
27
+ #%% Read input file
48
28
 
49
- for s in locations:
50
- print(s)
29
+ with open(input_fn,'r') as f:
30
+ d = json.load(f)
31
+
32
+ d['info']
33
+ d['info']['version'] = '1.01'
51
34
 
52
35
 
53
- #%% Write output file
54
-
55
- with open(output_fn,'w') as f:
56
- json.dump(d,f,indent=1)
36
+ #%% Find locations
57
37
 
58
-
59
- #%% Validate .json files
60
-
61
- from megadetector.data_management.databases import integrity_check_json_db
62
-
63
- options = integrity_check_json_db.IntegrityCheckOptions()
64
- options.baseDir = image_directory
65
- options.bCheckImageSizes = False
66
- options.bCheckImageExistence = True
67
- options.bFindUnusedImages = True
68
-
69
- sorted_categories, data, error_info = integrity_check_json_db.integrity_check_json_db(output_fn, options)
70
-
71
-
72
- #%% Preview labels
73
-
74
- from megadetector.visualization import visualize_db
75
-
76
- viz_options = visualize_db.DbVizOptions()
77
- viz_options.num_to_visualize = 2000
78
- viz_options.trim_to_images_with_bboxes = False
79
- viz_options.add_search_links = False
80
- viz_options.sort_by_filename = False
81
- viz_options.parallelize_rendering = True
82
- viz_options.classes_to_exclude = ['test']
83
- html_output_file, image_db = visualize_db.visualize_db(db_path=output_fn,
84
- output_dir=preview_folder,
85
- image_base_dir=image_directory,
86
- options=viz_options)
87
-
88
- from megadetector.utils import path_utils
89
- path_utils.open_file(html_output_file)
90
-
91
-
92
- #%% Zip output file
93
-
94
- from megadetector.utils.path_utils import zip_file
95
-
96
- zip_file(output_fn, verbose=True)
97
- assert os.path.isfile(output_fn + '.zip')
38
+ images = d['images']
39
+
40
+ locations = set()
41
+
42
+ for i_image,im in tqdm(enumerate(images),total=len(images)):
43
+ tokens_fn = im['file_name'].split('/')
44
+ tokens_id = im['id'].split('_')
45
+ assert tokens_fn[0] == tokens_id[0]
46
+ assert tokens_fn[1] == tokens_id[1]
47
+ location = tokens_fn[0] + '_' + tokens_fn[1]
48
+ im['location'] = location
49
+ locations.add(location)
50
+
51
+ locations = sorted(list(locations))
52
+
53
+ for s in locations:
54
+ print(s)
55
+
56
+
57
+ #%% Write output file
58
+
59
+ with open(output_fn,'w') as f:
60
+ json.dump(d,f,indent=1)
61
+
62
+
63
+ #%% Validate .json files
64
+
65
+ from megadetector.data_management.databases import integrity_check_json_db
66
+
67
+ options = integrity_check_json_db.IntegrityCheckOptions()
68
+ options.baseDir = image_directory
69
+ options.bCheckImageSizes = False
70
+ options.bCheckImageExistence = True
71
+ options.bFindUnusedImages = True
72
+
73
+ sorted_categories, data, error_info = integrity_check_json_db.integrity_check_json_db(output_fn, options)
74
+
75
+
76
+ #%% Preview labels
77
+
78
+ from megadetector.visualization import visualize_db
79
+
80
+ viz_options = visualize_db.DbVizOptions()
81
+ viz_options.num_to_visualize = 2000
82
+ viz_options.trim_to_images_with_bboxes = False
83
+ viz_options.add_search_links = False
84
+ viz_options.sort_by_filename = False
85
+ viz_options.parallelize_rendering = True
86
+ viz_options.classes_to_exclude = ['test']
87
+ html_output_file, image_db = visualize_db.visualize_db(db_path=output_fn,
88
+ output_dir=preview_folder,
89
+ image_base_dir=image_directory,
90
+ options=viz_options)
91
+
92
+ from megadetector.utils import path_utils
93
+ path_utils.open_file(html_output_file)
94
+
95
+
96
+ #%% Zip output file
97
+
98
+ from megadetector.utils.path_utils import zip_file
99
+
100
+ zip_file(output_fn, verbose=True)
101
+ assert os.path.isfile(output_fn + '.zip')
@@ -21,127 +21,131 @@ input_file = r'd:\lila\nacti\nacti_metadata.json.1.13\nacti_metadata.json'
21
21
  output_file = r'g:\temp\nacti_metadata.1.14.json'
22
22
 
23
23
 
24
- #%% Read metadata
24
+ #%% Prevent execution during testing
25
25
 
26
- with open(input_file,'r') as f:
27
- d = json.load(f)
28
-
29
- assert d['info']['version'] == 1.13
30
-
31
-
32
- #%% Map images to locations (according to the metadata)
33
-
34
- file_name_to_original_location = {}
35
-
36
- # im = dataset_labels['images'][0]
37
- for im in tqdm(d['images']):
38
- file_name_to_original_location[im['file_name']] = im['location']
39
-
40
- original_locations = set(file_name_to_original_location.values())
41
-
42
- print('Found {} locations in the original metadata:'.format(len(original_locations)))
43
- for loc in original_locations:
44
- print('[{}]'.format(loc))
26
+ if False:
45
27
 
46
-
47
- #%% Map images to new locations
48
-
49
- def path_to_location(relative_path):
50
-
51
- relative_path = relative_path.replace('\\','/')
52
- if relative_path in file_name_to_original_location:
53
- location_name = file_name_to_original_location[relative_path]
54
- if location_name == 'San Juan Mntns, Colorado':
55
- # "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
56
- tokens = relative_path.split('/')[-1].split('_')
57
- assert tokens[1].startswith('Unit')
58
- location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
59
- elif location_name == 'Lebec, California':
60
- # "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
61
- tokens = relative_path.split('/')[-1].split('_')
62
- assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
63
- location_name = 'lebec_{}'.format(tokens[0])
64
- elif location_name == 'Archbold, FL':
65
- # "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
66
- tokens = relative_path.split('/')[-1].split('_')
67
- assert tokens[0].startswith('FL-')
68
- location_name = 'archbold_{}'.format(tokens[0])
28
+ #%% Read metadata
29
+
30
+ with open(input_file,'r') as f:
31
+ d = json.load(f)
32
+
33
+ assert d['info']['version'] == 1.13
34
+
35
+
36
+ #%% Map images to locations (according to the metadata)
37
+
38
+ file_name_to_original_location = {}
39
+
40
+ # im = dataset_labels['images'][0]
41
+ for im in tqdm(d['images']):
42
+ file_name_to_original_location[im['file_name']] = im['location']
43
+
44
+ original_locations = set(file_name_to_original_location.values())
45
+
46
+ print('Found {} locations in the original metadata:'.format(len(original_locations)))
47
+ for loc in original_locations:
48
+ print('[{}]'.format(loc))
49
+
50
+
51
+ #%% Map images to new locations
52
+
53
+ def path_to_location(relative_path):
54
+
55
+ relative_path = relative_path.replace('\\','/')
56
+ if relative_path in file_name_to_original_location:
57
+ location_name = file_name_to_original_location[relative_path]
58
+ if location_name == 'San Juan Mntns, Colorado':
59
+ # "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
60
+ tokens = relative_path.split('/')[-1].split('_')
61
+ assert tokens[1].startswith('Unit')
62
+ location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
63
+ elif location_name == 'Lebec, California':
64
+ # "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
65
+ tokens = relative_path.split('/')[-1].split('_')
66
+ assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
67
+ location_name = 'lebec_{}'.format(tokens[0])
68
+ elif location_name == 'Archbold, FL':
69
+ # "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
70
+ tokens = relative_path.split('/')[-1].split('_')
71
+ assert tokens[0].startswith('FL-')
72
+ location_name = 'archbold_{}'.format(tokens[0])
73
+ else:
74
+ assert location_name == ''
75
+ tokens = relative_path.split('/')[-1].split('_')
76
+ if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
77
+ location_name = '{}'.format(tokens[0])
78
+
69
79
  else:
70
- assert location_name == ''
71
- tokens = relative_path.split('/')[-1].split('_')
72
- if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
73
- location_name = '{}'.format(tokens[0])
74
80
 
75
- else:
81
+ location_name = 'unknown'
76
82
 
77
- location_name = 'unknown'
83
+ # print('Returning location {} for file {}'.format(location_name,relative_path))
78
84
 
79
- # print('Returning location {} for file {}'.format(location_name,relative_path))
80
-
81
- return location_name
82
-
83
- file_name_to_updated_location = {}
84
- updated_location_to_count = defaultdict(int)
85
- for im in tqdm(d['images']):
85
+ return location_name
86
86
 
87
- updated_location = path_to_location(im['file_name'])
88
- file_name_to_updated_location[im['file_name']] = updated_location
89
- updated_location_to_count[updated_location] += 1
90
-
91
- updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
92
- key=lambda item: item[1],
93
- reverse=True)}
94
-
95
- updated_locations = set(file_name_to_updated_location.values())
96
-
97
- print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
98
- for loc in updated_location_to_count:
99
- print('{}: {}'.format(loc,updated_location_to_count[loc]))
100
-
101
-
102
- #%% Re-write metadata
103
-
104
- for im in d['images']:
105
- im['location'] = file_name_to_updated_location[im['file_name']]
106
- d['info']['version'] = 1.14
107
-
108
- with open(output_file,'w') as f:
109
- json.dump(d,f,indent=1)
87
+ file_name_to_updated_location = {}
88
+ updated_location_to_count = defaultdict(int)
89
+ for im in tqdm(d['images']):
90
+
91
+ updated_location = path_to_location(im['file_name'])
92
+ file_name_to_updated_location[im['file_name']] = updated_location
93
+ updated_location_to_count[updated_location] += 1
110
94
 
111
-
112
- #%% For each location, sample some random images to make sure they look consistent
113
-
114
- input_base = r'd:\lila\nacti-unzipped'
115
- assert os.path.isdir(input_base)
116
-
117
- location_to_images = defaultdict(list)
118
-
119
- for im in d['images']:
120
- location_to_images[im['location']].append(im)
95
+ updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
96
+ key=lambda item: item[1],
97
+ reverse=True)}
121
98
 
122
- n_to_sample = 10
123
- import random
124
- random.seed(0)
125
- sampling_folder_base = r'g:\temp\nacti_samples'
126
-
127
- for location in tqdm(location_to_images):
99
+ updated_locations = set(file_name_to_updated_location.values())
128
100
 
129
- images_this_location = location_to_images[location]
130
- if len(images_this_location) > n_to_sample:
131
- images_this_location = random.sample(images_this_location,n_to_sample)
132
-
133
- for i_image,im in enumerate(images_this_location):
101
+ print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
102
+ for loc in updated_location_to_count:
103
+ print('{}: {}'.format(loc,updated_location_to_count[loc]))
104
+
105
+
106
+ #%% Re-write metadata
107
+
108
+ for im in d['images']:
109
+ im['location'] = file_name_to_updated_location[im['file_name']]
110
+ d['info']['version'] = 1.14
111
+
112
+ with open(output_file,'w') as f:
113
+ json.dump(d,f,indent=1)
134
114
 
135
- fn_relative = im['file_name']
136
- source_fn_abs = os.path.join(input_base,fn_relative)
137
- assert os.path.isfile(source_fn_abs)
138
- ext = os.path.splitext(fn_relative)[1]
139
- target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
140
- location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
141
- os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
142
- shutil.copyfile(source_fn_abs,target_fn_abs)
115
+
116
+ #%% For each location, sample some random images to make sure they look consistent
117
+
118
+ input_base = r'd:\lila\nacti-unzipped'
119
+ assert os.path.isdir(input_base)
120
+
121
+ location_to_images = defaultdict(list)
122
+
123
+ for im in d['images']:
124
+ location_to_images[im['location']].append(im)
143
125
 
144
- # ...for each image
126
+ n_to_sample = 10
127
+ import random
128
+ random.seed(0)
129
+ sampling_folder_base = r'g:\temp\nacti_samples'
145
130
 
146
- # ...for each location
131
+ for location in tqdm(location_to_images):
132
+
133
+ images_this_location = location_to_images[location]
134
+ if len(images_this_location) > n_to_sample:
135
+ images_this_location = random.sample(images_this_location,n_to_sample)
147
136
 
137
+ for i_image,im in enumerate(images_this_location):
138
+
139
+ fn_relative = im['file_name']
140
+ source_fn_abs = os.path.join(input_base,fn_relative)
141
+ assert os.path.isfile(source_fn_abs)
142
+ ext = os.path.splitext(fn_relative)[1]
143
+ target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
144
+ location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
145
+ os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
146
+ shutil.copyfile(source_fn_abs,target_fn_abs)
147
+
148
+ # ...for each image
149
+
150
+ # ...for each location
151
+
@@ -349,7 +349,7 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
349
349
 
350
350
  # ...with open()
351
351
 
352
- print('Processed {} datasets'.format(len(metadata_table)))
352
+ print('\nProcessed {} datasets'.format(len(metadata_table)))
353
353
 
354
354
 
355
355
  #%% Read the .csv back
@@ -393,7 +393,7 @@ def check_row(row):
393
393
  dataset_name_to_locations[ds_name].add(row['location_id'])
394
394
 
395
395
  # Faster, but more annoying to debug
396
- if False:
396
+ if True:
397
397
 
398
398
  df.progress_apply(check_row, axis=1)
399
399