megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
@@ -1,10 +1,10 @@
1
- ########
2
- #
3
- # labelme_to_coco.py
4
- #
5
- # Converts a folder of labelme-formatted .json files to COCO format.
6
- #
7
- ########
1
+ """
2
+
3
+ labelme_to_coco.py
4
+
5
+ Converts a folder of labelme-formatted .json files to COCO.
6
+
7
+ """
8
8
 
9
9
  #%% Constants and imports
10
10
 
@@ -23,10 +23,10 @@ from tqdm import tqdm
23
23
 
24
24
  #%% Support functions
25
25
 
26
- def add_category(category_name,category_name_to_id,candidate_category_id=0):
26
+ def _add_category(category_name,category_name_to_id,candidate_category_id=0):
27
27
  """
28
- Add the category [category_name] to the dict [category_name_to_id], by default
29
- using the next available integer index.
28
+ Adds the category [category_name] to the dict [category_name_to_id], by default
29
+ using the next available integer index.
30
30
  """
31
31
 
32
32
  if category_name in category_name_to_id:
@@ -121,7 +121,7 @@ def _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
121
121
  if len(shapes) == 0:
122
122
 
123
123
  if allow_new_categories:
124
- category_id = add_category('empty',category_name_to_id)
124
+ category_id = _add_category('empty',category_name_to_id)
125
125
  else:
126
126
  assert 'empty' in category_name_to_id
127
127
  category_id = category_name_to_id['empty']
@@ -148,7 +148,7 @@ def _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
148
148
  category_name = shape['label']
149
149
 
150
150
  if allow_new_categories:
151
- category_id = add_category(category_name,category_name_to_id)
151
+ category_id = _add_category(category_name,category_name_to_id)
152
152
  else:
153
153
  assert category_name in category_name_to_id
154
154
  category_id = category_name_to_id[category_name]
@@ -202,7 +202,7 @@ def labelme_to_coco(input_folder,
202
202
  max_workers=1,
203
203
  use_threads=True):
204
204
  """
205
- Find all images in [input_folder] that have corresponding .json files, and convert
205
+ Finds all images in [input_folder] that have corresponding .json files, and converts
206
206
  to a COCO .json file.
207
207
 
208
208
  Currently only supports bounding box annotations and image-level flags (i.e., does not
@@ -224,11 +224,38 @@ def labelme_to_coco(input_folder,
224
224
  file. Empty images in the "lion" folder will still be given the label "empty" (or
225
225
  [empty_category_name]).
226
226
 
227
- no_json_handling can be:
227
+ Args:
228
+ input_folder (str): input folder to search for images and Labelme .json files
229
+ output_file (str, optional): output file to which we should write COCO-formatted data; if None
230
+ this function just returns the COCO-formatted dict
231
+ category_id_to_category_name (dict, optional): dict mapping category IDs to category names;
232
+ really used to map Labelme category names to COCO category IDs. IDs will be auto-generated
233
+ if this is None.
234
+ empty_category_id (int, optional): category ID to use for the not-very-COCO-like "empty" category;
235
+ also see the no_json_handling parameter.
236
+ info_struct (dict, optional): dict to stash in the "info" field of the resulting COCO dict
237
+ relative_paths_to_include (list, optional): allowlist of relative paths to include in the COCO
238
+ dict; there's no reason to specify this along with relative_paths_to_exclude.
239
+ relative_paths_to_exclude (list, optional): blocklist of relative paths to exclude from the COCO
240
+ dict; there's no reason to specify this along with relative_paths_to_include.
241
+ use_folders_as_labels (bool, optional): if this is True, class names will be pulled from folder names,
242
+ useful if you have images like a/b/cat/image001.jpg, a/b/dog/image002.jpg, etc.
243
+ recursive (bool, optional): whether to recurse into [input_folder]
244
+ no_json_handling (str, optional): how to deal with image files that have no corresponding .json files,
245
+ can be:
246
+
247
+ - 'skip': ignore image files with no corresponding .json files
248
+ - 'empty': treat image files with no corresponding .json files as empty
249
+ - 'error': throw an error when an image file has no corresponding .json file
250
+ validate_image_sizes (bool, optional): whether to load images to verify that the sizes specified
251
+ in the labelme files are correct
252
+ max_workers (int, optional): number of workers to use for parallelization, set to <=1 to disable
253
+ parallelization
254
+ use_threads (bool, optional): whether to use threads (True) or processes (False) for parallelization,
255
+ not relevant if max_workers <= 1
228
256
 
229
- * 'skip': ignore image files with no corresponding .json files
230
- * 'empty': treat image files with no corresponding .json files as empty
231
- * 'error': throw an error when an image file has no corresponding .json file
257
+ Returns:
258
+ dict: a COCO-formatted dictionary, identical to what's written to [output_file] if [output_file] is not None.
232
259
  """
233
260
 
234
261
  if max_workers > 1:
@@ -288,7 +315,7 @@ def labelme_to_coco(input_folder,
288
315
  raise ValueError('Category IDs must be ints or string-formatted ints')
289
316
 
290
317
  if empty_category_id is None:
291
- empty_category_id = add_category(empty_category_name,category_name_to_id)
318
+ empty_category_id = _add_category(empty_category_name,category_name_to_id)
292
319
 
293
320
  if max_workers <= 1:
294
321
 
@@ -366,12 +393,26 @@ def find_empty_labelme_files(input_folder,recursive=True):
366
393
  Returns a list of all image files in in [input_folder] associated with .json files that have
367
394
  no boxes in them. Also returns a list of images with no associated .json files. Specifically,
368
395
  returns a dict:
369
-
370
- {
371
- 'images_with_empty_json_files':[list],
372
- 'images_with_no_json_files':[list],
373
- 'images_with_non_empty_json_files':[list]
374
- }
396
+
397
+ .. code-block: none
398
+
399
+ {
400
+ 'images_with_empty_json_files':[list],
401
+ 'images_with_no_json_files':[list],
402
+ 'images_with_non_empty_json_files':[list]
403
+ }
404
+
405
+ Args:
406
+ input_folder (str): the folder to search for empty (i.e., box-less) Labelme .json files
407
+ recursive (bool, optional): whether to recurse into [input_folder]
408
+
409
+ Returns:
410
+ dict: a dict with fields:
411
+ - images_with_empty_json_files: a list of all image files in [input_folder] associated with
412
+ .json files that have no boxes in them
413
+ - images_with_no_json_files: a list of images in [input_folder] with no associated .json files
414
+ - images_with_non_empty_json_files: a list of images in [input_folder] associated with .json
415
+ files that have at least one box
375
416
  """
376
417
  image_filenames_relative = path_utils.find_images(input_folder,recursive=True,
377
418
  return_relative_paths=True)
@@ -1,10 +1,10 @@
1
- ########
2
- #
3
- # labelme_to_yolo.py
4
- #
5
- # Create YOLO .txt files in a folder containing labelme .json files.
6
- #
7
- ########
1
+ """
2
+
3
+ labelme_to_yolo.py
4
+
5
+ Create YOLO .txt files in a folder containing labelme .json files.
6
+
7
+ """
8
8
 
9
9
  #%% Imports
10
10
 
@@ -77,7 +77,7 @@ def labelme_file_to_yolo_file(labelme_file,
77
77
  p0 = shape['points'][0]
78
78
  p1 = shape['points'][1]
79
79
 
80
- # LabelMe: [[x0,y0],[x1,y1]] (arbitrarily sorted) (absolute coordinates)
80
+ # Labelme: [[x0,y0],[x1,y1]] (arbitrarily sorted) (absolute coordinates)
81
81
  #
82
82
  # YOLO: [class, x_center, y_center, width, height] (normalized coordinates)
83
83
  minx_abs = min(p0[0],p1[0])
File without changes
@@ -1,12 +1,12 @@
1
- ########
2
- #
3
- # add_locations_to_island_camera_traps.py
4
- #
5
- # The Island Conservation Camera Traps dataset had unique camera identifiers embedded
6
- # in filenames, but not in the proper metadata fields. This script copies that information
7
- # to metadata.
8
- #
9
- ########
1
+ """
2
+
3
+ add_locations_to_island_camera_traps.py
4
+
5
+ The Island Conservation Camera Traps dataset had unique camera identifiers embedded
6
+ in filenames, but not in the proper metadata fields. This script copies that information
7
+ to metadata.
8
+
9
+ """
10
10
 
11
11
  #%% Imports and constants
12
12
 
@@ -1,147 +1,147 @@
1
- ########
2
- #
3
- # add_locations_to_nacti.py
4
- #
5
- # As of 10.2023, NACTI metadata only has very coarse location information (e.g. "Florida"),
6
- # but camera IDs are embedded in filenames. This script pulls that information from filenames
7
- # and adds it to metadata.
8
- #
9
- ########
10
-
11
- #%% Imports and constants
12
-
13
- import os
14
- import json
15
- import shutil
16
-
17
- from tqdm import tqdm
18
- from collections import defaultdict
19
-
20
- input_file = r'd:\lila\nacti\nacti_metadata.json.1.13\nacti_metadata.json'
21
- output_file = r'g:\temp\nacti_metadata.1.14.json'
22
-
23
-
24
- #%% Read metadata
25
-
26
- with open(input_file,'r') as f:
27
- d = json.load(f)
28
-
29
- assert d['info']['version'] == 1.13
30
-
31
-
32
- #%% Map images to locations (according to the metadata)
33
-
34
- file_name_to_original_location = {}
35
-
36
- # im = dataset_labels['images'][0]
37
- for im in tqdm(d['images']):
38
- file_name_to_original_location[im['file_name']] = im['location']
39
-
40
- original_locations = set(file_name_to_original_location.values())
41
-
42
- print('Found {} locations in the original metadata:'.format(len(original_locations)))
43
- for loc in original_locations:
44
- print('[{}]'.format(loc))
45
-
46
-
47
- #%% Map images to new locations
48
-
49
- def path_to_location(relative_path):
50
-
51
- relative_path = relative_path.replace('\\','/')
52
- if relative_path in file_name_to_original_location:
53
- location_name = file_name_to_original_location[relative_path]
54
- if location_name == 'San Juan Mntns, Colorado':
55
- # "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
56
- tokens = relative_path.split('/')[-1].split('_')
57
- assert tokens[1].startswith('Unit')
58
- location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
59
- elif location_name == 'Lebec, California':
60
- # "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
61
- tokens = relative_path.split('/')[-1].split('_')
62
- assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
63
- location_name = 'lebec_{}'.format(tokens[0])
64
- elif location_name == 'Archbold, FL':
65
- # "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
66
- tokens = relative_path.split('/')[-1].split('_')
67
- assert tokens[0].startswith('FL-')
68
- location_name = 'archbold_{}'.format(tokens[0])
69
- else:
70
- assert location_name == ''
71
- tokens = relative_path.split('/')[-1].split('_')
72
- if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
73
- location_name = '{}'.format(tokens[0])
74
-
75
- else:
76
-
77
- location_name = 'unknown'
78
-
79
- # print('Returning location {} for file {}'.format(location_name,relative_path))
80
-
81
- return location_name
82
-
83
- file_name_to_updated_location = {}
84
- updated_location_to_count = defaultdict(int)
85
- for im in tqdm(d['images']):
86
-
87
- updated_location = path_to_location(im['file_name'])
88
- file_name_to_updated_location[im['file_name']] = updated_location
89
- updated_location_to_count[updated_location] += 1
90
-
91
- updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
92
- key=lambda item: item[1],
93
- reverse=True)}
94
-
95
- updated_locations = set(file_name_to_updated_location.values())
96
-
97
- print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
98
- for loc in updated_location_to_count:
99
- print('{}: {}'.format(loc,updated_location_to_count[loc]))
100
-
101
-
102
- #%% Re-write metadata
103
-
104
- for im in d['images']:
105
- im['location'] = file_name_to_updated_location[im['file_name']]
106
- d['info']['version'] = 1.14
107
-
108
- with open(output_file,'w') as f:
109
- json.dump(d,f,indent=1)
110
-
111
-
112
- #%% For each location, sample some random images to make sure they look consistent
113
-
114
- input_base = r'd:\lila\nacti-unzipped'
115
- assert os.path.isdir(input_base)
116
-
117
- location_to_images = defaultdict(list)
118
-
119
- for im in d['images']:
120
- location_to_images[im['location']].append(im)
121
-
122
- n_to_sample = 10
123
- import random
124
- random.seed(0)
125
- sampling_folder_base = r'g:\temp\nacti_samples'
126
-
127
- for location in tqdm(location_to_images):
128
-
129
- images_this_location = location_to_images[location]
130
- if len(images_this_location) > n_to_sample:
131
- images_this_location = random.sample(images_this_location,n_to_sample)
132
-
133
- for i_image,im in enumerate(images_this_location):
134
-
135
- fn_relative = im['file_name']
136
- source_fn_abs = os.path.join(input_base,fn_relative)
137
- assert os.path.isfile(source_fn_abs)
138
- ext = os.path.splitext(fn_relative)[1]
139
- target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
140
- location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
141
- os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
142
- shutil.copyfile(source_fn_abs,target_fn_abs)
143
-
144
- # ...for each image
145
-
146
- # ...for each location
147
-
1
+ """
2
+
3
+ add_locations_to_nacti.py
4
+
5
+ As of 10.2023, NACTI metadata only has very coarse location information (e.g. "Florida"),
6
+ but camera IDs are embedded in filenames. This script pulls that information from filenames
7
+ and adds it to metadata.
8
+
9
+ """
10
+
11
+ #%% Imports and constants
12
+
13
+ import os
14
+ import json
15
+ import shutil
16
+
17
+ from tqdm import tqdm
18
+ from collections import defaultdict
19
+
20
+ input_file = r'd:\lila\nacti\nacti_metadata.json.1.13\nacti_metadata.json'
21
+ output_file = r'g:\temp\nacti_metadata.1.14.json'
22
+
23
+
24
+ #%% Read metadata
25
+
26
+ with open(input_file,'r') as f:
27
+ d = json.load(f)
28
+
29
+ assert d['info']['version'] == 1.13
30
+
31
+
32
+ #%% Map images to locations (according to the metadata)
33
+
34
+ file_name_to_original_location = {}
35
+
36
+ # im = dataset_labels['images'][0]
37
+ for im in tqdm(d['images']):
38
+ file_name_to_original_location[im['file_name']] = im['location']
39
+
40
+ original_locations = set(file_name_to_original_location.values())
41
+
42
+ print('Found {} locations in the original metadata:'.format(len(original_locations)))
43
+ for loc in original_locations:
44
+ print('[{}]'.format(loc))
45
+
46
+
47
+ #%% Map images to new locations
48
+
49
+ def path_to_location(relative_path):
50
+
51
+ relative_path = relative_path.replace('\\','/')
52
+ if relative_path in file_name_to_original_location:
53
+ location_name = file_name_to_original_location[relative_path]
54
+ if location_name == 'San Juan Mntns, Colorado':
55
+ # "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
56
+ tokens = relative_path.split('/')[-1].split('_')
57
+ assert tokens[1].startswith('Unit')
58
+ location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
59
+ elif location_name == 'Lebec, California':
60
+ # "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
61
+ tokens = relative_path.split('/')[-1].split('_')
62
+ assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
63
+ location_name = 'lebec_{}'.format(tokens[0])
64
+ elif location_name == 'Archbold, FL':
65
+ # "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
66
+ tokens = relative_path.split('/')[-1].split('_')
67
+ assert tokens[0].startswith('FL-')
68
+ location_name = 'archbold_{}'.format(tokens[0])
69
+ else:
70
+ assert location_name == ''
71
+ tokens = relative_path.split('/')[-1].split('_')
72
+ if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
73
+ location_name = '{}'.format(tokens[0])
74
+
75
+ else:
76
+
77
+ location_name = 'unknown'
78
+
79
+ # print('Returning location {} for file {}'.format(location_name,relative_path))
80
+
81
+ return location_name
82
+
83
+ file_name_to_updated_location = {}
84
+ updated_location_to_count = defaultdict(int)
85
+ for im in tqdm(d['images']):
86
+
87
+ updated_location = path_to_location(im['file_name'])
88
+ file_name_to_updated_location[im['file_name']] = updated_location
89
+ updated_location_to_count[updated_location] += 1
90
+
91
+ updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
92
+ key=lambda item: item[1],
93
+ reverse=True)}
94
+
95
+ updated_locations = set(file_name_to_updated_location.values())
96
+
97
+ print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
98
+ for loc in updated_location_to_count:
99
+ print('{}: {}'.format(loc,updated_location_to_count[loc]))
100
+
101
+
102
+ #%% Re-write metadata
103
+
104
+ for im in d['images']:
105
+ im['location'] = file_name_to_updated_location[im['file_name']]
106
+ d['info']['version'] = 1.14
107
+
108
+ with open(output_file,'w') as f:
109
+ json.dump(d,f,indent=1)
110
+
111
+
112
+ #%% For each location, sample some random images to make sure they look consistent
113
+
114
+ input_base = r'd:\lila\nacti-unzipped'
115
+ assert os.path.isdir(input_base)
116
+
117
+ location_to_images = defaultdict(list)
118
+
119
+ for im in d['images']:
120
+ location_to_images[im['location']].append(im)
121
+
122
+ n_to_sample = 10
123
+ import random
124
+ random.seed(0)
125
+ sampling_folder_base = r'g:\temp\nacti_samples'
126
+
127
+ for location in tqdm(location_to_images):
128
+
129
+ images_this_location = location_to_images[location]
130
+ if len(images_this_location) > n_to_sample:
131
+ images_this_location = random.sample(images_this_location,n_to_sample)
132
+
133
+ for i_image,im in enumerate(images_this_location):
134
+
135
+ fn_relative = im['file_name']
136
+ source_fn_abs = os.path.join(input_base,fn_relative)
137
+ assert os.path.isfile(source_fn_abs)
138
+ ext = os.path.splitext(fn_relative)[1]
139
+ target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
140
+ location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
141
+ os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
142
+ shutil.copyfile(source_fn_abs,target_fn_abs)
143
+
144
+ # ...for each image
145
+
146
+ # ...for each location
147
+
@@ -1,16 +1,16 @@
1
- ########
2
- #
3
- # create_lila_blank_set.py
4
- #
5
- # Create a folder of blank images sampled from LILA. We'll aim for diversity, so less-common
6
- # locations will be oversampled relative to more common locations. We'll also run MegaDetector
7
- # (with manual review) to remove some incorrectly-labeled, not-actually-empty images from our
8
- # blank set.
9
- #
10
- # We'll store location information for each image in a .json file, so we can split locations
11
- # into train/val in downstream tasks.
12
- #
13
- ########
1
+ """
2
+
3
+ create_lila_blank_set.py
4
+
5
+ Create a folder of blank images sampled from LILA. We'll aim for diversity, so less-common
6
+ locations will be oversampled relative to more common locations. We'll also run MegaDetector
7
+ (with manual review) to remove some incorrectly-labeled, not-actually-empty images from our
8
+ blank set.
9
+
10
+ We'll store location information for each image in a .json file, so we can split locations
11
+ into train/val in downstream tasks.
12
+
13
+ """
14
14
 
15
15
  #%% Constants and imports
16
16
 
@@ -1,11 +1,11 @@
1
- ########
2
- #
3
- # create_lila_test_set.py
4
- #
5
- # Create a test set of camera trap images, containing N empty and N non-empty
6
- # images from each LILA data set.
7
- #
8
- ########
1
+ """
2
+
3
+ create_lila_test_set.py
4
+
5
+ Create a test set of camera trap images, containing N empty and N non-empty
6
+ images from each LILA data set.
7
+
8
+ """
9
9
 
10
10
  #%% Constants and imports
11
11