megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +98 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +152 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +92 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +126 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +610 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +239 -0
  58. megadetector/data_management/cct_json_utils.py +395 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +272 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +477 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +796 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +874 -0
  129. megadetector/data_management/read_exif.py +681 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/resize_coco_dataset.py +189 -0
  133. megadetector/data_management/wi_download_csv_to_coco.py +246 -0
  134. megadetector/data_management/yolo_output_to_md_output.py +441 -0
  135. megadetector/data_management/yolo_to_coco.py +676 -0
  136. megadetector/detection/__init__.py +0 -0
  137. megadetector/detection/detector_training/__init__.py +0 -0
  138. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  139. megadetector/detection/process_video.py +702 -0
  140. megadetector/detection/pytorch_detector.py +341 -0
  141. megadetector/detection/run_detector.py +779 -0
  142. megadetector/detection/run_detector_batch.py +1219 -0
  143. megadetector/detection/run_inference_with_yolov5_val.py +917 -0
  144. megadetector/detection/run_tiled_inference.py +934 -0
  145. megadetector/detection/tf_detector.py +189 -0
  146. megadetector/detection/video_utils.py +606 -0
  147. megadetector/postprocessing/__init__.py +0 -0
  148. megadetector/postprocessing/add_max_conf.py +64 -0
  149. megadetector/postprocessing/categorize_detections_by_size.py +163 -0
  150. megadetector/postprocessing/combine_api_outputs.py +249 -0
  151. megadetector/postprocessing/compare_batch_results.py +958 -0
  152. megadetector/postprocessing/convert_output_format.py +396 -0
  153. megadetector/postprocessing/load_api_results.py +195 -0
  154. megadetector/postprocessing/md_to_coco.py +310 -0
  155. megadetector/postprocessing/md_to_labelme.py +330 -0
  156. megadetector/postprocessing/merge_detections.py +401 -0
  157. megadetector/postprocessing/postprocess_batch_results.py +1902 -0
  158. megadetector/postprocessing/remap_detection_categories.py +170 -0
  159. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  160. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  161. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  162. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1631 -0
  163. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  164. megadetector/postprocessing/subset_json_detector_output.py +696 -0
  165. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  166. megadetector/taxonomy_mapping/__init__.py +0 -0
  167. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  168. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  169. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  170. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +590 -0
  171. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  172. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  173. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  174. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  175. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  176. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  177. megadetector/utils/__init__.py +0 -0
  178. megadetector/utils/azure_utils.py +178 -0
  179. megadetector/utils/ct_utils.py +612 -0
  180. megadetector/utils/directory_listing.py +246 -0
  181. megadetector/utils/md_tests.py +968 -0
  182. megadetector/utils/path_utils.py +1044 -0
  183. megadetector/utils/process_utils.py +157 -0
  184. megadetector/utils/sas_blob_utils.py +509 -0
  185. megadetector/utils/split_locations_into_train_val.py +228 -0
  186. megadetector/utils/string_utils.py +92 -0
  187. megadetector/utils/url_utils.py +323 -0
  188. megadetector/utils/write_html_image_list.py +225 -0
  189. megadetector/visualization/__init__.py +0 -0
  190. megadetector/visualization/plot_utils.py +293 -0
  191. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  192. megadetector/visualization/visualization_utils.py +1536 -0
  193. megadetector/visualization/visualize_db.py +550 -0
  194. megadetector/visualization/visualize_detector_output.py +405 -0
  195. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/METADATA +1 -1
  196. megadetector-5.0.12.dist-info/RECORD +199 -0
  197. megadetector-5.0.12.dist-info/top_level.txt +1 -0
  198. megadetector-5.0.11.dist-info/RECORD +0 -5
  199. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  200. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/LICENSE +0 -0
  201. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,263 @@
1
+ """
2
+
3
+ wellington_to_json.py
4
+
5
+ Convert the .csv file provided for the Wellington data set to a
6
+ COCO-camera-traps .json file
7
+
8
+ """
9
+
10
+ #%% Constants and environment
11
+
12
+ import pandas as pd
13
+ import os
14
+ import glob
15
+ import json
16
+ import re
17
+ import uuid
18
+ import time
19
+ import ntpath
20
+ import humanfriendly
21
+ import PIL
22
+
23
+ from tqdm import tqdm
24
+
25
+ input_metadata_file = os.path.expanduser('~/data/wct/wellington_camera_traps.csv')
26
+ output_file = os.path.expanduser('~/data/wct/wellington_camera_traps.json')
27
+ image_directory = os.path.expanduser('~/data/wct/images')
28
+ preview_dir = os.path.expanduser('~/data/wct/preview')
29
+
30
+ assert(os.path.isdir(image_directory))
31
+
32
+
33
+ #%% Read source data
34
+
35
+ input_metadata = pd.read_csv(input_metadata_file)
36
+
37
+ print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
38
+ len(input_metadata)))
39
+
40
+ # Filenames were provided as *.jpg, but images were *.JPG, converting here
41
+ input_metadata['file'] = input_metadata['file'].apply(lambda x: x.replace('.jpg','.JPG'))
42
+
43
+ print('Converted extensions to uppercase')
44
+
45
+
46
+ #%% Map filenames to rows, verify image existence
47
+
48
+ # Takes ~30 seconds, since it's checking the existence of ~270k images
49
+
50
+ start_time = time.time()
51
+ filenames_to_rows = {}
52
+ image_filenames = input_metadata.file
53
+
54
+ duplicate_rows = []
55
+
56
+ # Build up a map from filenames to a list of rows, checking image existence as we go
57
+ for i_file,fn in enumerate(image_filenames):
58
+
59
+ if (fn in filenames_to_rows):
60
+ duplicate_rows.append(i_file)
61
+ filenames_to_rows[fn].append(i_file)
62
+ else:
63
+ filenames_to_rows[fn] = [i_file]
64
+ image_path = os.path.join(image_directory,fn)
65
+ assert(os.path.isfile(image_path))
66
+
67
+ elapsed = time.time() - start_time
68
+ print('Finished verifying image existence in {}, found {} filenames with multiple labels'.format(
69
+ humanfriendly.format_timespan(elapsed),len(duplicate_rows)))
70
+
71
+ # I didn't expect this to be true a priori, but it appears to be true, and
72
+ # it saves us the trouble of checking consistency across multiple occurrences
73
+ # of an image.
74
+ assert(len(duplicate_rows) == 0)
75
+
76
+
77
+ #%% Check for images that aren't included in the metadata file
78
+
79
+ # Enumerate all images
80
+ image_full_paths = glob.glob(os.path.join(image_directory,'*.JPG'))
81
+
82
+ for i_image,image_path in enumerate(image_full_paths):
83
+
84
+ fn = ntpath.basename(image_path)
85
+ assert(fn in filenames_to_rows)
86
+
87
+ print('Finished checking {} images to make sure they\'re in the metadata'.format(
88
+ len(image_full_paths)))
89
+
90
+
91
+ #%% Create CCT dictionaries
92
+
93
+ # Also gets image sizes, so this takes ~6 minutes
94
+ #
95
+ # Implicitly checks images for overt corruptness, i.e. by not crashing.
96
+
97
+ images = []
98
+ annotations = []
99
+
100
+ # Map categories to integer IDs (that's what COCO likes)
101
+ next_category_id = 0
102
+ categories_to_category_id = {}
103
+ categories_to_counts = {}
104
+
105
+ # For each image
106
+ #
107
+ # Because in practice images are 1:1 with annotations in this data set,
108
+ # this is also a loop over annotations.
109
+
110
+ start_time = time.time()
111
+
112
+ sequence_frame_ids = set()
113
+
114
+ # image_name = image_filenames[0]
115
+ for image_name in tqdm(image_filenames):
116
+
117
+ rows = filenames_to_rows[image_name]
118
+
119
+ # As per above, this is convenient and appears to be true; asserting to be safe
120
+ assert(len(rows) == 1)
121
+ i_row = rows[0]
122
+
123
+ row = input_metadata.iloc[i_row]
124
+
125
+ im = {}
126
+ # Filenames look like "290716114012001a1116.jpg"
127
+ im['id'] = image_name.split('.')[0]
128
+ im['file_name'] = image_name
129
+
130
+ # This gets imported as an int64
131
+ im['seq_id'] = str(row['sequence'])
132
+
133
+ # These appear as "image1", "image2", etc.
134
+ frame_id = row['image_sequence']
135
+ m = re.match('^image(\d+)$',frame_id)
136
+ assert (m is not None)
137
+ im['frame_num'] = int(m.group(1))-1
138
+
139
+ # Make sure we haven't seen this sequence before
140
+ sequence_frame_id = im['seq_id'] + '_' + str(im['frame_num'])
141
+ assert sequence_frame_id not in sequence_frame_ids
142
+ sequence_frame_ids.add(sequence_frame_id)
143
+
144
+ # In the form "001a"
145
+ im['location'] = row['site']
146
+
147
+ # Can be in the form '111' or 's46'
148
+ im['camera'] = row['camera']
149
+
150
+ # In the form "7/29/2016 11:40"
151
+ im['datetime'] = row['date']
152
+
153
+ # Check image height and width
154
+ image_path = os.path.join(image_directory,fn)
155
+ assert(os.path.isfile(image_path))
156
+ pil_image = PIL.Image.open(image_path)
157
+ width, height = pil_image.size
158
+ im['width'] = width
159
+ im['height'] = height
160
+
161
+ images.append(im)
162
+
163
+ category = row['label'].lower()
164
+
165
+ # Use 'empty', to be consistent with other data on lila
166
+ if (category == 'nothinghere'):
167
+ category = 'empty'
168
+
169
+ # Have we seen this category before?
170
+ if category in categories_to_category_id:
171
+ category_id = categories_to_category_id[category]
172
+ categories_to_counts[category] += 1
173
+ else:
174
+ category_id = next_category_id
175
+ categories_to_category_id[category] = category_id
176
+ categories_to_counts[category] = 0
177
+ next_category_id += 1
178
+
179
+ # Create an annotation
180
+ ann = {}
181
+
182
+ # The Internet tells me this guarantees uniqueness to a reasonable extent, even
183
+ # beyond the sheer improbability of collisions.
184
+ ann['id'] = str(uuid.uuid1())
185
+ ann['image_id'] = im['id']
186
+ ann['category_id'] = category_id
187
+
188
+ annotations.append(ann)
189
+
190
+ # ...for each image
191
+
192
+ # Convert categories to a CCT-style dictionary
193
+
194
+ categories = []
195
+
196
+ for category in categories_to_counts:
197
+ print('Category {}, count {}'.format(category,categories_to_counts[category]))
198
+ category_id = categories_to_category_id[category]
199
+ cat = {}
200
+ cat['name'] = category
201
+ cat['id'] = category_id
202
+ categories.append(cat)
203
+
204
+ elapsed = time.time() - start_time
205
+ print('Finished creating CCT dictionaries in {}'.format(
206
+ humanfriendly.format_timespan(elapsed)))
207
+
208
+
209
+ #%% Create info struct
210
+
211
+ info = {}
212
+ info['year'] = 2018
213
+ info['version'] = '1.01'
214
+ info['description'] = 'Wellington Camera Traps'
215
+ info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
216
+ info['contributor'] = 'Victor Anton'
217
+
218
+
219
+ #%% Write output
220
+
221
+ json_data = {}
222
+ json_data['images'] = images
223
+ json_data['annotations'] = annotations
224
+ json_data['categories'] = categories
225
+ json_data['info'] = info
226
+ json.dump(json_data,open(output_file,'w'),indent=1)
227
+
228
+ print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
229
+ len(images),len(annotations),len(categories)))
230
+
231
+
232
+ #%% Validate .json files
233
+
234
+ from megadetector.data_management.databases import integrity_check_json_db
235
+
236
+ options = integrity_check_json_db.IntegrityCheckOptions()
237
+ options.baseDir = image_directory
238
+ options.bCheckImageSizes = False
239
+ options.bCheckImageExistence = True
240
+ options.bFindUnusedImages = True
241
+
242
+ sorted_categories, data, error_info = integrity_check_json_db.integrity_check_json_db(output_file, options)
243
+
244
+
245
+ #%% Preview labels
246
+
247
+ from megadetector.visualization import visualize_db
248
+
249
+ viz_options = visualize_db.DbVizOptions()
250
+ viz_options.num_to_visualize = 2000
251
+ viz_options.trim_to_images_with_bboxes = False
252
+ viz_options.add_search_links = False
253
+ viz_options.sort_by_filename = False
254
+ viz_options.parallelize_rendering = True
255
+ viz_options.classes_to_exclude = ['test']
256
+ html_output_file, image_db = visualize_db.visualize_db(db_path=output_file,
257
+ output_dir=os.path.join(
258
+ preview_dir),
259
+ image_base_dir=image_directory,
260
+ options=viz_options)
261
+
262
+ from megadetector.utils import path_utils
263
+ path_utils.open_file(html_output_file)
@@ -0,0 +1,442 @@
1
+ """
2
+
3
+ wi_to_json
4
+
5
+ Prepares CCT-formatted metadata based on a Wildlife Insights data export.
6
+
7
+ Mostly assumes you have the images also, for validation/QA.
8
+
9
+ """
10
+
11
+ #%% Imports and constants
12
+
13
+ import os
14
+ import json
15
+ import pandas as pd
16
+ import shutil
17
+ import uuid
18
+ import datetime
19
+ import dateutil.parser
20
+ import sys
21
+ import subprocess
22
+ import copy
23
+
24
+ from collections import defaultdict
25
+ from tqdm import tqdm
26
+
27
+ from megadetector.visualization import visualize_db
28
+ from megadetector.data_management.databases import integrity_check_json_db
29
+
30
+ organization_name = 'organization'
31
+ input_base = os.path.expanduser('~/data/' + organization_name)
32
+ image_base = os.path.join(input_base,'deployment')
33
+ image_csv = os.path.join(input_base,'images.csv')
34
+ output_json_filename = os.path.join(input_base, organization_name + '_camera_traps.json')
35
+ preview_base = os.path.expanduser('~/data/' + organization_name + '/preview')
36
+
37
+ assert os.path.isfile(image_csv)
38
+ assert os.path.isdir(image_base)
39
+
40
+ MISSING_COMMON_NAME_TOKEN = 'MISSING'
41
+
42
+ output_encoding = 'utf-8'
43
+
44
+ # Because WI filenames are GUIDs, it's not practical to page through sequences in an
45
+ # image viewer. So we're going to (optionally) create a copy of the data set where
46
+ # images are ordered.
47
+ create_ordered_dataset = False
48
+
49
+ ordered_image_base = os.path.join(input_base,'deployment-ordered')
50
+ ordered_json_filename = os.path.join(input_base, organization_name + '_camera_traps_ordered.json')
51
+ ordered_preview_base = os.path.expanduser('~/data/' + organization_name + '/preview-ordered')
52
+
53
+ info = {}
54
+ info['year'] = 2020
55
+ info['version'] = '1.0'
56
+ info['description'] = organization_name + ' camera traps)'
57
+ info['contributor'] = organization_name
58
+ info['date_created'] = str(datetime.date.today())
59
+
60
+ def open_file(filename):
61
+ if sys.platform == "win32":
62
+ os.startfile(filename)
63
+ else:
64
+ opener = "open" if sys.platform == "darwin" else "xdg-open"
65
+ subprocess.call([opener, filename])
66
+
67
+
68
+ #%% Load ground truth
69
+
70
+ images_df = pd.read_csv(image_csv)
71
+
72
+ print('Loaded {} ground truth annotations'.format(
73
+ len(images_df)))
74
+
75
+
76
+ #%% Take everything out of Pandas
77
+
78
+ images = images_df.to_dict('records')
79
+
80
+
81
+ #%% Synthesize common names when they're not available
82
+
83
+ for im in images:
84
+
85
+ if not isinstance(im['common_name'],str):
86
+
87
+ # Blank rows should always have "Blank" as the common name
88
+ assert im['is_blank'] == 0
89
+ assert isinstance(im['genus'],str) and isinstance(im['species'],str)
90
+ # print('Warning: missing common name for row {} ({})'.format(i_row,row['filename']))
91
+ im['common_name'] = im['genus'].strip() + ' ' + im['species'].strip()
92
+
93
+
94
+ #%% Convert string timestamps to Python datetimes
95
+
96
+ all_locations = set()
97
+
98
+ # im = ground_truth_dicts[0]
99
+ for im in tqdm(images):
100
+ dt = dateutil.parser.isoparse(im['timestamp'])
101
+ assert dt.year >= 2019 and dt.year <= 2021
102
+ im['datetime'] = dt
103
+
104
+ # The field called "location" in the WI .csv file is a URL, we want to reclaim
105
+ # the "location" keyword for CCT output
106
+ im['url'] = im['location']
107
+
108
+ # Filenames look like, e.g., N36/100EK113/06040726.JPG
109
+ im['location'] = im['deployment_id']
110
+ all_locations.add(im['location'])
111
+
112
+
113
+ #%% Synthesize sequence information
114
+
115
+ locations = all_locations
116
+ print('Found {} locations'.format(len(locations)))
117
+
118
+ locations = list(locations)
119
+
120
+ sequences = set()
121
+ sequence_to_images = defaultdict(list)
122
+ max_seconds_within_sequence = 10
123
+
124
+ # Sort images by time within each location
125
+ # i_location=0; location = locations[i_location]
126
+ for i_location,location in tqdm(enumerate(locations),total=len(locations)):
127
+
128
+ images_this_location = [im for im in images if im['location'] == location]
129
+ sorted_images_this_location = sorted(images_this_location, key = lambda im: im['datetime'])
130
+
131
+ current_sequence_id = None
132
+ next_frame_number = 0
133
+ previous_datetime = None
134
+
135
+ # previous_datetime = sorted_images_this_location[0]['datetime']
136
+ # im = sorted_images_this_camera[1]
137
+ for i_image,im in enumerate(sorted_images_this_location):
138
+
139
+ # Timestamp for this image, may be None
140
+ dt = im['datetime']
141
+
142
+ # Start a new sequence if:
143
+ #
144
+ # * This image has no timestamp
145
+ # * This image has a frame number of zero
146
+ # * We have no previous image timestamp
147
+ #
148
+ if dt is None:
149
+ delta = None
150
+ elif previous_datetime is None:
151
+ delta = None
152
+ else:
153
+ assert isinstance(dt,datetime.datetime)
154
+ delta = (dt - previous_datetime).total_seconds()
155
+
156
+ # Start a new sequence if necessary
157
+ if delta is None or delta > max_seconds_within_sequence:
158
+ next_frame_number = 0
159
+ current_sequence_id = str(uuid.uuid1())
160
+ sequences.add(current_sequence_id)
161
+ assert current_sequence_id is not None
162
+
163
+ im['seq_id'] = current_sequence_id
164
+ im['synthetic_frame_number'] = next_frame_number
165
+ next_frame_number = next_frame_number + 1
166
+ previous_datetime = dt
167
+ sequence_to_images[im['seq_id']].append(im)
168
+
169
+ # ...for each image in this location
170
+
171
+ # ...for each location
172
+
173
+
174
+ #%% Create category dict and category IDs
175
+
176
+ categories_to_counts = defaultdict(int)
177
+ category_mappings = {'blank':'empty',
178
+ 'mammal':'unknown_mammal',
179
+ 'bird':'unknown_bird',
180
+ 'unknown_species':'unknown'
181
+ }
182
+
183
+ for c in category_mappings.values():
184
+ assert ' ' not in c
185
+
186
+ # im = images[0]
187
+ for im in tqdm(images):
188
+
189
+ category_name = im['common_name'].lower().replace("'",'').replace(' ','_')
190
+ if category_name in category_mappings:
191
+ category_name = category_mappings[category_name]
192
+ categories_to_counts[category_name] += 1
193
+ im['category_name'] = category_name
194
+
195
+ categories_to_counts_sorted = {k: v for k, v in sorted(categories_to_counts.items(),
196
+ key=lambda item: item[1],reverse=True)}
197
+
198
+ print('\n')
199
+ for s in categories_to_counts_sorted.keys():
200
+ print('{}: {}'.format(s,categories_to_counts_sorted[s]))
201
+
202
+
203
+ #%% Count frames in each sequence
204
+
205
+ sequence_id_to_n_frames = defaultdict(int)
206
+
207
+ for im in tqdm(images):
208
+ seq_id = im['seq_id']
209
+ sequence_id_to_n_frames[seq_id] = sequence_id_to_n_frames[seq_id] + 1
210
+
211
+ for im in tqdm(images):
212
+ seq_id = im['seq_id']
213
+ im['seq_num_frames'] = sequence_id_to_n_frames[seq_id]
214
+
215
+
216
+ #%% Build relative paths
217
+
218
+ missing_images = []
219
+
220
+ # im = images[0]
221
+ for i_image,im in enumerate(tqdm(images)):
222
+ # Sample URL:
223
+ #
224
+ # gs://project-asfasdfd/deployment/21444549/asdfasdfd-616a-4d10-a921-45ac456c568a.jpg'
225
+ relative_path = im['url'].split('/deployment/')[1]
226
+ assert relative_path is not None and len(relative_path) > 0
227
+ im['relative_path'] = relative_path
228
+
229
+ if not os.path.isfile(os.path.join(image_base,relative_path)):
230
+ missing_images.append(im)
231
+
232
+ print('{} images are missing'.format(len(missing_images)))
233
+
234
+
235
+ #%% Double check images with multiple annotations
236
+
237
+ filename_to_images = defaultdict(list)
238
+
239
+ # im = images[0]
240
+ for im in tqdm(images):
241
+ filename_to_images[im['relative_path']].append(im)
242
+
243
+ filenames_with_multiple_annotations = [fn for fn in filename_to_images.keys() if len(filename_to_images[fn]) > 1]
244
+
245
+ print('\nFound {} filenames with multiple annotations'.format(len(filenames_with_multiple_annotations)))
246
+
247
+
248
+ #%% Assemble dictionaries
249
+
250
+ images_out = []
251
+ image_id_to_image = {}
252
+ annotations = []
253
+ categories = []
254
+
255
+ category_name_to_category = {}
256
+ category_id_to_category = {}
257
+
258
+ # Force the empty category to be ID 0
259
+ empty_category = {}
260
+ empty_category['name'] = 'empty'
261
+ empty_category['id'] = 0
262
+ empty_category['count'] = 0
263
+
264
+ category_id_to_category[0] = empty_category
265
+ category_name_to_category['empty'] = empty_category
266
+ categories.append(empty_category)
267
+ next_id = 1
268
+
269
+ # input_im = images[0]
270
+ for input_im in tqdm(images):
271
+
272
+ category_name = input_im['category_name'].lower().strip()
273
+
274
+ if category_name not in category_name_to_category:
275
+
276
+ category_id = next_id
277
+ next_id += 1
278
+ category = {}
279
+ category['id'] = category_id
280
+ category['name'] = category_name
281
+ category['count'] = 0
282
+ categories.append(category)
283
+ category_name_to_category[category_name] = category
284
+ category_id_to_category[category_id] = category
285
+
286
+ else:
287
+
288
+ category = category_name_to_category[category_name]
289
+
290
+ category_id = category['id']
291
+ category['count'] += 1
292
+
293
+ im = {}
294
+ im['id'] = input_im['relative_path'].replace('/','_')
295
+ im['datetime'] = str(input_im['datetime'])
296
+ im['file_name'] = input_im['relative_path']
297
+ im['seq_id'] = input_im['seq_id']
298
+ im['frame_num'] = input_im['synthetic_frame_number']
299
+ im['seq_num_frames'] = input_im['seq_num_frames']
300
+ im['location'] = input_im['location']
301
+
302
+ if im['id'] in image_id_to_image:
303
+ # print('Warning: image ID {} ({}) has multiple annotations'.format(im['id'],im['id'].replace('_','/')))
304
+ pass
305
+ else:
306
+ image_id_to_image[im['id']] = im
307
+ images_out.append(im)
308
+
309
+ ann = {}
310
+
311
+ ann['id'] = str(uuid.uuid1())
312
+ ann['image_id'] = im['id']
313
+ ann['category_id'] = category_id
314
+ ann['sequence_level_annotation'] = False
315
+ annotations.append(ann)
316
+
317
+ # ...for each image
318
+
319
+
320
+ #%% Write output .json
321
+
322
+ data = {}
323
+ data['info'] = info
324
+ data['images'] = images_out
325
+ data['annotations'] = annotations
326
+ data['categories'] = categories
327
+
328
+ with open(output_json_filename, 'w') as f:
329
+ json.dump(data, f, indent=1)
330
+
331
+ print('Finished writing json to {}'.format(output_json_filename))
332
+
333
+
334
+ #%% Validate .json file
335
+
336
+ options = integrity_check_json_db.IntegrityCheckOptions()
337
+ options.baseDir = image_base
338
+ options.bCheckImageSizes = False
339
+ options.bCheckImageExistence = True
340
+ options.bFindUnusedImages = True
341
+
342
+ _, _, _ = integrity_check_json_db.integrity_check_json_db(output_json_filename, options)
343
+
344
+
345
+ #%% Preview labels
346
+
347
+ viz_options = visualize_db.DbVizOptions()
348
+ viz_options.num_to_visualize = 300
349
+ viz_options.trim_to_images_with_bboxes = False
350
+ viz_options.add_search_links = True
351
+ viz_options.sort_by_filename = False
352
+ viz_options.parallelize_rendering = True
353
+ viz_options.include_filename_links = True
354
+
355
+ html_output_file, _ = visualize_db.visualize_db(db_path=output_json_filename,
356
+ output_dir=preview_base,
357
+ image_base_dir=image_base,
358
+ options=viz_options)
359
+ open_file(html_output_file)
360
+ # open_file(os.path.join(image_base,'2100703/1141a545-88d2-498b-a684-7431f7aeb324.jpg'))
361
+
362
+
363
+ #%%
364
+
365
+ if create_ordered_dataset:
366
+
367
+ pass
368
+
369
+ #%% Create ordered dataset
370
+
371
+ # Because WI filenames are GUIDs, it's not practical to page through sequences in an
372
+ # image viewer. So we're going to create a copy of the data set where images are
373
+ # ordered.
374
+
375
+ os.makedirs(ordered_image_base,exist_ok=True)
376
+
377
+ ordered_images = {}
378
+
379
+ # im = images_out[0]; im
380
+ for im in tqdm(images_out):
381
+ im_out = copy.deepcopy(im)
382
+ ordered_filename = im['location'] + '_' + im['seq_id'] + '_' +\
383
+ str(im['frame_num']) + '_' + os.path.basename(im['file_name'])
384
+ assert ordered_filename not in ordered_images
385
+ im_out['original_file'] = im_out['file_name']
386
+ im_out['file_name'] = ordered_filename
387
+ ordered_images[ordered_filename] = im_out
388
+
389
+ ordered_images = list(ordered_images.values())
390
+
391
+
392
+ #%% Create ordered .json
393
+
394
+ data_ordered = copy.copy(data)
395
+ data_ordered['images'] = ordered_images
396
+
397
+ with open(ordered_json_filename, 'w') as f:
398
+ json.dump(data_ordered, f, indent=1)
399
+
400
+ print('Finished writing json to {}'.format(ordered_json_filename))
401
+
402
+
403
+ #%% Copy files to their new locations
404
+
405
+ # im = ordered_images[0]
406
+ for im in tqdm(ordered_images):
407
+ output_file = os.path.join(ordered_image_base,im['file_name'])
408
+ input_file = os.path.join(image_base,im['original_file'])
409
+ if not os.path.isfile(input_file):
410
+ print('Warning: file {} is missing'.format(input_file))
411
+ continue
412
+ shutil.copyfile(input_file,output_file)
413
+
414
+ original_fn_to_ordered_fn = {}
415
+ # im = data_ordered['images'][0]
416
+ for im in data_ordered['images']:
417
+ original_fn_to_ordered_fn[im['original_file']] = im['file_name']
418
+
419
+
420
+ #%% Preview labels in the ordered dataset
421
+
422
+ viz_options = visualize_db.DbVizOptions()
423
+ viz_options.num_to_visualize = 300
424
+ viz_options.trim_to_images_with_bboxes = False
425
+ viz_options.add_search_links = True
426
+ viz_options.sort_by_filename = False
427
+ viz_options.parallelize_rendering = True
428
+ viz_options.include_filename_links = True
429
+
430
+ html_output_file, _ = visualize_db.visualize_db(db_path=ordered_json_filename,
431
+ output_dir=ordered_preview_base,
432
+ image_base_dir=ordered_image_base,
433
+ options=viz_options)
434
+ open_file(html_output_file)
435
+ # open_file(os.path.join(image_base,'2100703/1141a545-88d2-498b-a684-7431f7aeb324.jpg'))
436
+
437
+
438
+ #%% Open an ordered filename from the unordered filename
439
+
440
+ unordered_filename = '2100557/54e5c751-28b4-42e3-b6d4-e8ee290228ae.jpg'
441
+ fn = os.path.join(ordered_image_base,original_fn_to_ordered_fn[unordered_filename])
442
+ open_file(fn)