megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -14,9 +14,12 @@ import random
14
14
 
15
15
  from tqdm import tqdm
16
16
  from collections import defaultdict
17
+ from copy import deepcopy
17
18
 
18
19
  from megadetector.data_management.lila.lila_common import \
19
20
  read_lila_all_images_file, is_empty, lila_base_urls
21
+ from megadetector.utils.url_utils import parallel_download_urls
22
+ from megadetector.utils.path_utils import open_file
20
23
 
21
24
  for s in lila_base_urls.values():
22
25
  assert s.endswith('/')
@@ -58,13 +61,13 @@ common_name_to_count = defaultdict(int)
58
61
 
59
62
  ds_name_to_urls = defaultdict(list)
60
63
 
61
- def find_items(row):
62
-
64
+ def find_items(row): # noqa
65
+
63
66
  if is_empty(row['common_name']):
64
67
  return
65
-
68
+
66
69
  match = False
67
-
70
+
68
71
  # This is the only bit of this file that's specific to a particular query. In this case
69
72
  # we're checking whether each row is on a list of species of interest, but you do you.
70
73
  for species_name in species_of_interest:
@@ -72,7 +75,7 @@ def find_items(row):
72
75
  match = True
73
76
  common_name_to_count[species_name] += 1
74
77
  break
75
-
78
+
76
79
  if match:
77
80
  ds_name_to_urls[row['dataset_name']].append(row['url_' + preferred_provider])
78
81
 
@@ -86,8 +89,7 @@ print('Found {} matching URLs across {} datasets'.format(len(all_urls),len(ds_na
86
89
 
87
90
  for common_name in common_name_to_count:
88
91
  print('{}: {}'.format(common_name,common_name_to_count[common_name]))
89
-
90
- from copy import deepcopy
92
+
91
93
  ds_name_to_urls_raw = deepcopy(ds_name_to_urls)
92
94
 
93
95
 
@@ -104,19 +106,17 @@ else:
104
106
 
105
107
  #%% Choose target files for each URL
106
108
 
107
- from megadetector.data_management.lila.lila_common import lila_base_urls
108
-
109
109
  # We have a list of URLs per dataset, flatten that into a single list of URLs
110
110
  urls_to_download = set()
111
111
  for ds_name in ds_name_to_urls:
112
112
  for url in ds_name_to_urls[ds_name]:
113
113
  urls_to_download.add(url)
114
- urls_to_download = sorted(list(urls_to_download))
114
+ urls_to_download = sorted(list(urls_to_download))
115
115
 
116
116
  # A URL might look like this:
117
117
  #
118
118
  # https://storage.googleapis.com/public-datasets-lila/wcs-unzipped/animals/0667/0302.jpg
119
- #
119
+ #
120
120
  # We'll write that to an output file that looks like this (relative to output_dir):
121
121
  #
122
122
  # wcs-unzipped/animals/0667/0302.jpg
@@ -128,7 +128,7 @@ assert base_url.endswith('/')
128
128
  url_to_target_file = {}
129
129
 
130
130
  for url in urls_to_download:
131
- assert url.startswith(base_url)
131
+ assert url.startswith(base_url)
132
132
  target_fn_relative = url.replace(base_url,'')
133
133
  target_fn_abs = os.path.join(output_dir,target_fn_relative)
134
134
  url_to_target_file[url] = target_fn_abs
@@ -136,8 +136,6 @@ for url in urls_to_download:
136
136
 
137
137
  #%% Download image files
138
138
 
139
- from megadetector.utils.url_utils import parallel_download_urls
140
-
141
139
  download_results = parallel_download_urls(url_to_target_file=url_to_target_file,
142
140
  verbose=False,
143
141
  overwrite=False,
@@ -147,39 +145,38 @@ download_results = parallel_download_urls(url_to_target_file=url_to_target_file,
147
145
 
148
146
  #%% Open output folder
149
147
 
150
- from megadetector.utils.path_utils import open_file
151
148
  open_file(output_dir)
152
149
 
153
150
 
154
151
  #%% Scrap
155
152
 
156
153
  if False:
157
-
154
+
158
155
  pass
159
156
 
160
157
  #%% Find all the reptiles on LILA
161
158
 
162
159
  reptile_rows = df.loc[df['class'] == 'reptilia']
163
-
160
+
164
161
  # i_row = 0; row = reptile_rows.iloc[i_row]
165
-
162
+
166
163
  common_name_to_count = defaultdict(int)
167
164
  dataset_to_count = defaultdict(int)
168
165
  for i_row,row in reptile_rows.iterrows():
169
166
  common_name_to_count[row['common_name']] += 1
170
167
  dataset_to_count[row['dataset_name']] += 1
171
-
168
+
172
169
  from megadetector.utils.ct_utils import sort_dictionary_by_value
173
-
170
+
174
171
  print('Found {} reptiles\n'.format(len(reptile_rows)))
175
-
172
+
176
173
  common_name_to_count = sort_dictionary_by_value(common_name_to_count,reverse=True)
177
174
  dataset_to_count = sort_dictionary_by_value(dataset_to_count,reverse=True)
178
-
175
+
179
176
  print('Common names by count:\n')
180
177
  for k in common_name_to_count:
181
178
  print('{} ({})'.format(k,common_name_to_count[k]))
182
-
183
- print('\nDatasets by count:\n')
179
+
180
+ print('\nDatasets by count:\n')
184
181
  for k in dataset_to_count:
185
182
  print('{} ({})'.format(k,dataset_to_count[k]))
@@ -35,6 +35,7 @@ from megadetector.data_management.lila.lila_common import \
35
35
  from megadetector.utils import write_html_image_list
36
36
  from megadetector.utils.path_utils import zip_file
37
37
  from megadetector.utils.path_utils import open_file
38
+ from megadetector.utils.url_utils import parallel_download_urls
38
39
 
39
40
  # We'll write images, metadata downloads, and temporary files here
40
41
  lila_local_base = os.path.expanduser('~/lila')
@@ -47,7 +48,7 @@ os.makedirs(metadata_dir,exist_ok=True)
47
48
 
48
49
  output_file = os.path.join(lila_local_base,'lila_image_urls_and_labels.csv')
49
50
 
50
- # Some datasets don't have "sequence_level_annotation" fields populated, but we know their
51
+ # Some datasets don't have "sequence_level_annotation" fields populated, but we know their
51
52
  # annotation level
52
53
  ds_name_to_annotation_level = {}
53
54
  ds_name_to_annotation_level['Caltech Camera Traps'] = 'image'
@@ -79,11 +80,11 @@ if False:
79
80
 
80
81
  #%% Download and extract metadata for each dataset
81
82
 
82
- for ds_name in metadata_table.keys():
83
+ for ds_name in metadata_table.keys():
83
84
  metadata_table[ds_name]['metadata_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
84
85
  metadata_dir=metadata_dir,
85
86
  metadata_table=metadata_table)
86
-
87
+
87
88
  #%% Load taxonomy data
88
89
 
89
90
  taxonomy_df = read_lila_taxonomy_mapping(metadata_dir)
@@ -95,12 +96,12 @@ ds_label_to_taxonomy = {}
95
96
 
96
97
  # i_row = 0; row = taxonomy_df.iloc[i_row]
97
98
  for i_row,row in taxonomy_df.iterrows():
98
-
99
+
99
100
  ds_label = row['dataset_name'] + ':' + row['query']
100
101
  assert ds_label.strip() == ds_label
101
102
  assert ds_label not in ds_label_to_taxonomy
102
103
  ds_label_to_taxonomy[ds_label] = row.to_dict()
103
-
104
+
104
105
 
105
106
  #%% Process annotations for each dataset
106
107
 
@@ -116,12 +117,12 @@ taxonomy_levels_to_include = \
116
117
  ['kingdom','phylum','subphylum','superclass','class','subclass','infraclass','superorder','order',
117
118
  'suborder','infraorder','superfamily','family','subfamily','tribe','genus','species','subspecies',
118
119
  'variety']
119
-
120
+
120
121
  header.extend(taxonomy_levels_to_include)
121
122
 
122
123
  missing_annotations = set()
123
124
 
124
- def clearnan(v):
125
+ def _clearnan(v):
125
126
  if isinstance(v,float):
126
127
  assert np.isnan(v)
127
128
  v = ''
@@ -129,57 +130,57 @@ def clearnan(v):
129
130
  return v
130
131
 
131
132
  with open(output_file,'w',encoding='utf-8',newline='') as f:
132
-
133
+
133
134
  csv_writer = csv.writer(f)
134
135
  csv_writer.writerow(header)
135
-
136
+
136
137
  # ds_name = list(metadata_table.keys())[0]
137
138
  for ds_name in metadata_table.keys():
138
-
139
+
139
140
  if 'bbox' in ds_name:
140
141
  print('Skipping bbox dataset {}'.format(ds_name))
141
142
  continue
142
-
143
+
143
144
  print('Processing dataset {}'.format(ds_name))
144
-
145
+
145
146
  json_filename = metadata_table[ds_name]['metadata_filename']
146
147
  with open(json_filename, 'r') as f:
147
148
  data = json.load(f)
148
-
149
+
149
150
  categories = data['categories']
150
151
  category_ids = [c['id'] for c in categories]
151
152
  for c in categories:
152
153
  category_id_to_name = {c['id']:c['name'] for c in categories}
153
-
154
+
154
155
  annotations = data['annotations']
155
156
  images = data['images']
156
-
157
+
157
158
  image_id_to_annotations = defaultdict(list)
158
-
159
+
159
160
  # Go through annotations, marking each image with the categories that are present
160
161
  #
161
162
  # ann = annotations[0]
162
- for ann in annotations:
163
+ for ann in annotations:
163
164
  image_id_to_annotations[ann['image_id']].append(ann)
164
-
165
+
165
166
  unannotated_images = []
166
-
167
+
167
168
  found_date = False
168
169
  found_location = False
169
170
  found_annotation_level = False
170
-
171
+
171
172
  if ds_name in ds_name_to_annotation_level:
172
173
  expected_annotation_level = ds_name_to_annotation_level[ds_name]
173
174
  else:
174
175
  expected_annotation_level = None
175
-
176
+
176
177
  # im = images[10]
177
178
  for i_image,im in tqdm(enumerate(images),total=len(images)):
178
-
179
+
179
180
  if (debug_max_images_per_dataset is not None) and (debug_max_images_per_dataset > 0) \
180
181
  and (i_image >= debug_max_images_per_dataset):
181
182
  break
182
-
183
+
183
184
  file_name = im['file_name'].replace('\\','/')
184
185
  base_url_gcp = metadata_table[ds_name]['image_base_url_gcp']
185
186
  base_url_aws = metadata_table[ds_name]['image_base_url_aws']
@@ -187,21 +188,21 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
187
188
  assert not base_url_gcp.endswith('/')
188
189
  assert not base_url_aws.endswith('/')
189
190
  assert not base_url_azure.endswith('/')
190
-
191
+
191
192
  url_gcp = base_url_gcp + '/' + file_name
192
193
  url_aws = base_url_aws + '/' + file_name
193
194
  url_azure = base_url_azure + '/' + file_name
194
-
195
+
195
196
  for k in im.keys():
196
197
  if ('date' in k or 'time' in k) and (k not in ['datetime','date_captured']):
197
198
  raise ValueError('Unrecognized datetime field')
198
-
199
+
199
200
  # This field name was only used for Caltech Camera Traps
200
201
  if 'date_captured' in im:
201
202
  assert ds_name == 'Caltech Camera Traps'
202
203
  im['datetime'] = im['date_captured']
203
-
204
- def has_valid_datetime(im):
204
+
205
+ def _has_valid_datetime(im):
205
206
  if 'datetime' not in im:
206
207
  return False
207
208
  v = im['datetime']
@@ -212,29 +213,29 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
212
213
  else:
213
214
  assert isinstance(v,float) and np.isnan(v)
214
215
  return False
215
-
216
- dt_string = ''
217
- if (has_valid_datetime(im)):
218
-
216
+
217
+ dt_string = ''
218
+ if (_has_valid_datetime(im)):
219
+
219
220
  dt = dateparser.parse(im['datetime'])
220
-
221
+
221
222
  if dt is None or dt.year < 1990 or dt.year > 2025:
222
-
223
+
223
224
  # raise ValueError('Suspicious date parsing result')
224
-
225
- # Special case we don't want to print a warning about... this is
225
+
226
+ # Special case we don't want to print a warning about... this is
226
227
  # in invalid date that very likely originates on the camera, not at
227
228
  # some intermediate processing step.
228
229
  #
229
230
  # print('Suspicious date for image {}: {} ({})'.format(
230
231
  # im['id'], im['datetime'], ds_name))
231
- pass
232
-
232
+ pass
233
+
233
234
  else:
234
-
235
+
235
236
  found_date = True
236
237
  dt_string = dt.strftime("%m-%d-%Y %H:%M:%S")
237
-
238
+
238
239
  # Location, sequence, and image IDs are only guaranteed to be unique within
239
240
  # a dataset, so for the output .csv file, include both
240
241
  if 'location' in im:
@@ -242,25 +243,25 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
242
243
  location_id = ds_name + ' : ' + str(im['location'])
243
244
  else:
244
245
  location_id = ds_name
245
-
246
+
246
247
  image_id = ds_name + ' : ' + str(im['id'])
247
-
248
+
248
249
  if 'seq_id' in im:
249
250
  sequence_id = ds_name + ' : ' + str(im['seq_id'])
250
251
  else:
251
252
  sequence_id = ds_name + ' : ' + 'unknown'
252
-
253
+
253
254
  if 'frame_num' in im:
254
255
  frame_num = im['frame_num']
255
256
  else:
256
257
  frame_num = -1
257
-
258
+
258
259
  annotations_this_image = image_id_to_annotations[im['id']]
259
-
260
+
260
261
  categories_this_image = set()
261
-
262
+
262
263
  annotation_level = 'unknown'
263
-
264
+
264
265
  for ann in annotations_this_image:
265
266
  assert ann['image_id'] == im['id']
266
267
  categories_this_image.add(category_id_to_name[ann['category_id']])
@@ -275,35 +276,35 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
275
276
  'Unexpected annotation level'
276
277
  elif expected_annotation_level is not None:
277
278
  annotation_level = expected_annotation_level
278
-
279
+
279
280
  if len(categories_this_image) == 0:
280
281
  unannotated_images.append(im)
281
282
  continue
282
-
283
+
283
284
  # category_name = list(categories_this_image)[0]
284
285
  for category_name in categories_this_image:
285
-
286
+
286
287
  ds_label = ds_name + ':' + category_name.lower()
287
-
288
+
288
289
  if ds_label not in ds_label_to_taxonomy:
289
-
290
+
290
291
  assert ds_label in known_unmapped_labels
291
-
292
+
292
293
  # Only print a warning the first time we see an unmapped label
293
294
  if ds_label not in missing_annotations:
294
295
  print('Warning: {} not in taxonomy file'.format(ds_label))
295
296
  missing_annotations.add(ds_label)
296
297
  continue
297
-
298
+
298
299
  taxonomy_labels = ds_label_to_taxonomy[ds_label]
299
-
300
+
300
301
  """
301
- header =
302
+ header =
302
303
  ['dataset_name','url','image_id','sequence_id','location_id',
303
304
  'frame_num','original_label','scientific_name','common_name',
304
305
  'datetime','annotation_level']
305
306
  """
306
-
307
+
307
308
  row = []
308
309
  row.append(ds_name)
309
310
  row.append(url_gcp)
@@ -314,37 +315,37 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
314
315
  row.append(location_id)
315
316
  row.append(frame_num)
316
317
  row.append(taxonomy_labels['query'])
317
- row.append(clearnan(taxonomy_labels['scientific_name']))
318
- row.append(clearnan(taxonomy_labels['common_name']))
318
+ row.append(_clearnan(taxonomy_labels['scientific_name']))
319
+ row.append(_clearnan(taxonomy_labels['common_name']))
319
320
  row.append(dt_string)
320
321
  row.append(annotation_level)
321
-
322
+
322
323
  for s in taxonomy_levels_to_include:
323
- row.append(clearnan(taxonomy_labels[s]))
324
-
324
+ row.append(_clearnan(taxonomy_labels[s]))
325
+
325
326
  assert len(row) == len(header)
326
-
327
+
327
328
  csv_writer.writerow(row)
328
-
329
+
329
330
  # ...for each category that was applied at least once to this image
330
-
331
+
331
332
  # ...for each image in this dataset
332
-
333
+
333
334
  if not found_date:
334
335
  pass
335
336
  # print('Warning: no date information available for this dataset')
336
-
337
+
337
338
  if not found_location:
338
339
  pass
339
340
  # print('Warning: no location information available for this dataset')
340
-
341
+
341
342
  if not found_annotation_level and (ds_name not in ds_name_to_annotation_level):
342
343
  print('Warning: no annotation level information available for this dataset')
343
-
344
+
344
345
  if len(unannotated_images) > 0:
345
346
  print('Warning: {} of {} images are un-annotated\n'.\
346
347
  format(len(unannotated_images),len(images)))
347
-
348
+
348
349
  # ...for each dataset
349
350
 
350
351
  # ...with open()
@@ -364,7 +365,7 @@ print('Read {} rows from {}'.format(len(df),output_file))
364
365
 
365
366
  tqdm.pandas()
366
367
 
367
- def isint(v):
368
+ def _isint(v):
368
369
  return isinstance(v,int) or isinstance(v,np.int64)
369
370
 
370
371
  valid_annotation_levels = set(['sequence','image','unknown'])
@@ -373,8 +374,8 @@ valid_annotation_levels = set(['sequence','image','unknown'])
373
374
  # in the next cell to look for datasets that only have a single location
374
375
  dataset_name_to_locations = defaultdict(set)
375
376
 
376
- def check_row(row):
377
-
377
+ def _check_row(row):
378
+
378
379
  assert row['dataset_name'] in metadata_table.keys()
379
380
  for url_column in ['url_gcp','url_aws','url_azure']:
380
381
  assert row[url_column].startswith('https://') or row[url_column].startswith('http://')
@@ -387,21 +388,21 @@ def check_row(row):
387
388
  assert np.isnan(row['frame_num'])
388
389
  else:
389
390
  # -1 is sometimes used for sequences of unknown length
390
- assert isint(row['frame_num']) and row['frame_num'] >= -1
391
+ assert _isint(row['frame_num']) and row['frame_num'] >= -1
391
392
 
392
393
  ds_name = row['dataset_name']
393
394
  dataset_name_to_locations[ds_name].add(row['location_id'])
394
-
395
+
395
396
  # Faster, but more annoying to debug
396
397
  if True:
397
-
398
- df.progress_apply(check_row, axis=1)
398
+
399
+ df.progress_apply(_check_row, axis=1)
399
400
 
400
401
  else:
401
-
402
+
402
403
  # i_row = 0; row = df.iloc[i_row]
403
404
  for i_row,row in tqdm(df.iterrows(),total=len(df)):
404
- check_row(row)
405
+ _check_row(row)
405
406
 
406
407
 
407
408
  #%% Check for datasets that have only one location string (typically "unknown")
@@ -428,19 +429,19 @@ images_to_download = []
428
429
 
429
430
  # ds_name = list(metadata_table.keys())[2]
430
431
  for ds_name in metadata_table.keys():
431
-
432
+
432
433
  if 'bbox' in ds_name:
433
434
  continue
434
-
435
+
435
436
  # Find all rows for this dataset
436
437
  ds_rows = df.loc[df['dataset_name'] == ds_name]
437
-
438
+
438
439
  print('{} rows available for {}'.format(len(ds_rows),ds_name))
439
440
  assert len(ds_rows) > 0
440
-
441
+
441
442
  empty_rows = ds_rows[ds_rows['scientific_name'].isnull()]
442
443
  non_empty_rows = ds_rows[~ds_rows['scientific_name'].isnull()]
443
-
444
+
444
445
  if len(empty_rows) == 0:
445
446
  print('No empty images available for {}'.format(ds_name))
446
447
  elif len(empty_rows) > n_empty_images_per_dataset:
@@ -452,7 +453,7 @@ for ds_name in metadata_table.keys():
452
453
  elif len(non_empty_rows) > n_non_empty_images_per_dataset:
453
454
  non_empty_rows = non_empty_rows.sample(n=n_non_empty_images_per_dataset)
454
455
  images_to_download.extend(non_empty_rows.to_dict('records'))
455
-
456
+
456
457
  # ...for each dataset
457
458
 
458
459
  print('Selected {} total images'.format(len(images_to_download)))
@@ -468,7 +469,7 @@ url_to_target_file = {}
468
469
 
469
470
  # i_image = 10; image = images_to_download[i_image]
470
471
  for i_image,image in tqdm(enumerate(images_to_download),total=len(images_to_download)):
471
-
472
+
472
473
  url = image['url_' + preferred_cloud]
473
474
  ext = os.path.splitext(url)[1]
474
475
  fn_relative = 'image_{}'.format(str(i_image).zfill(4)) + ext
@@ -476,11 +477,10 @@ for i_image,image in tqdm(enumerate(images_to_download),total=len(images_to_down
476
477
  image['relative_file'] = fn_relative
477
478
  image['url'] = url
478
479
  url_to_target_file[url] = fn_abs
479
-
480
+
480
481
 
481
482
  #%% Download images (execution)
482
483
 
483
- from megadetector.utils.url_utils import parallel_download_urls
484
484
  download_results = parallel_download_urls(url_to_target_file,verbose=False,overwrite=True,
485
485
  n_workers=20,pool_type='thread')
486
486
 
@@ -493,10 +493,10 @@ html_images = []
493
493
 
494
494
  # im = images_to_download[0]
495
495
  for im in images_to_download:
496
-
496
+
497
497
  if im['relative_file'] is None:
498
498
  continue
499
-
499
+
500
500
  output_im = {}
501
501
  output_im['filename'] = im['relative_file']
502
502
  output_im['linkTarget'] = im['url']
@@ -504,7 +504,7 @@ for im in images_to_download:
504
504
  output_im['imageStyle'] = 'width:600px;'
505
505
  output_im['textStyle'] = 'font-weight:normal;font-size:100%;'
506
506
  html_images.append(output_im)
507
-
507
+
508
508
  write_html_image_list.write_html_image_list(html_filename,html_images)
509
509
 
510
510
  open_file(html_filename)