megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +231 -224
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +340 -337
  65. megadetector/detection/pytorch_detector.py +304 -262
  66. megadetector/detection/run_detector.py +177 -164
  67. megadetector/detection/run_detector_batch.py +364 -363
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +256 -249
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +290 -282
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +415 -415
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +219 -146
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -499
  81. megadetector/postprocessing/load_api_results.py +23 -20
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +313 -298
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1018 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1457 -398
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +61 -61
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2526
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +401 -397
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +79 -73
  124. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  128. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
  129. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  130. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  131. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  132. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  133. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  134. megadetector/data_management/importers/awc_to_json.py +0 -191
  135. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  136. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  137. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  138. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  139. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  140. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  141. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  142. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  143. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  144. megadetector/data_management/importers/ena24_to_json.py +0 -276
  145. megadetector/data_management/importers/filenames_to_json.py +0 -386
  146. megadetector/data_management/importers/helena_to_cct.py +0 -283
  147. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  148. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  149. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  150. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  151. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  152. megadetector/data_management/importers/missouri_to_json.py +0 -490
  153. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  154. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  155. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  156. megadetector/data_management/importers/pc_to_json.py +0 -365
  157. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  158. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  159. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  160. megadetector/data_management/importers/rspb_to_json.py +0 -356
  161. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  162. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  163. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  164. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  165. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  166. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  167. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  168. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  169. megadetector/data_management/importers/ubc_to_json.py +0 -399
  170. megadetector/data_management/importers/umn_to_json.py +0 -507
  171. megadetector/data_management/importers/wellington_to_json.py +0 -263
  172. megadetector/data_management/importers/wi_to_json.py +0 -442
  173. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  174. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  175. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  176. megadetector-5.0.28.dist-info/RECORD +0 -209
@@ -1,263 +0,0 @@
1
- """
2
-
3
- wellington_to_json.py
4
-
5
- Convert the .csv file provided for the Wellington data set to a
6
- COCO-camera-traps .json file
7
-
8
- """
9
-
10
- #%% Constants and environment
11
-
12
- import pandas as pd
13
- import os
14
- import glob
15
- import json
16
- import re
17
- import uuid
18
- import time
19
- import ntpath
20
- import humanfriendly
21
- import PIL
22
-
23
- from tqdm import tqdm
24
-
25
- input_metadata_file = os.path.expanduser('~/data/wct/wellington_camera_traps.csv')
26
- output_file = os.path.expanduser('~/data/wct/wellington_camera_traps.json')
27
- image_directory = os.path.expanduser('~/data/wct/images')
28
- preview_dir = os.path.expanduser('~/data/wct/preview')
29
-
30
- assert(os.path.isdir(image_directory))
31
-
32
-
33
- #%% Read source data
34
-
35
- input_metadata = pd.read_csv(input_metadata_file)
36
-
37
- print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
38
- len(input_metadata)))
39
-
40
- # Filenames were provided as *.jpg, but images were *.JPG, converting here
41
- input_metadata['file'] = input_metadata['file'].apply(lambda x: x.replace('.jpg','.JPG'))
42
-
43
- print('Converted extensions to uppercase')
44
-
45
-
46
- #%% Map filenames to rows, verify image existence
47
-
48
- # Takes ~30 seconds, since it's checking the existence of ~270k images
49
-
50
- start_time = time.time()
51
- filenames_to_rows = {}
52
- image_filenames = input_metadata.file
53
-
54
- duplicate_rows = []
55
-
56
- # Build up a map from filenames to a list of rows, checking image existence as we go
57
- for i_file,fn in enumerate(image_filenames):
58
-
59
- if (fn in filenames_to_rows):
60
- duplicate_rows.append(i_file)
61
- filenames_to_rows[fn].append(i_file)
62
- else:
63
- filenames_to_rows[fn] = [i_file]
64
- image_path = os.path.join(image_directory,fn)
65
- assert(os.path.isfile(image_path))
66
-
67
- elapsed = time.time() - start_time
68
- print('Finished verifying image existence in {}, found {} filenames with multiple labels'.format(
69
- humanfriendly.format_timespan(elapsed),len(duplicate_rows)))
70
-
71
- # I didn't expect this to be true a priori, but it appears to be true, and
72
- # it saves us the trouble of checking consistency across multiple occurrences
73
- # of an image.
74
- assert(len(duplicate_rows) == 0)
75
-
76
-
77
- #%% Check for images that aren't included in the metadata file
78
-
79
- # Enumerate all images
80
- image_full_paths = glob.glob(os.path.join(image_directory,'*.JPG'))
81
-
82
- for i_image,image_path in enumerate(image_full_paths):
83
-
84
- fn = ntpath.basename(image_path)
85
- assert(fn in filenames_to_rows)
86
-
87
- print('Finished checking {} images to make sure they\'re in the metadata'.format(
88
- len(image_full_paths)))
89
-
90
-
91
- #%% Create CCT dictionaries
92
-
93
- # Also gets image sizes, so this takes ~6 minutes
94
- #
95
- # Implicitly checks images for overt corruptness, i.e. by not crashing.
96
-
97
- images = []
98
- annotations = []
99
-
100
- # Map categories to integer IDs (that's what COCO likes)
101
- next_category_id = 0
102
- categories_to_category_id = {}
103
- categories_to_counts = {}
104
-
105
- # For each image
106
- #
107
- # Because in practice images are 1:1 with annotations in this data set,
108
- # this is also a loop over annotations.
109
-
110
- start_time = time.time()
111
-
112
- sequence_frame_ids = set()
113
-
114
- # image_name = image_filenames[0]
115
- for image_name in tqdm(image_filenames):
116
-
117
- rows = filenames_to_rows[image_name]
118
-
119
- # As per above, this is convenient and appears to be true; asserting to be safe
120
- assert(len(rows) == 1)
121
- i_row = rows[0]
122
-
123
- row = input_metadata.iloc[i_row]
124
-
125
- im = {}
126
- # Filenames look like "290716114012001a1116.jpg"
127
- im['id'] = image_name.split('.')[0]
128
- im['file_name'] = image_name
129
-
130
- # This gets imported as an int64
131
- im['seq_id'] = str(row['sequence'])
132
-
133
- # These appear as "image1", "image2", etc.
134
- frame_id = row['image_sequence']
135
- m = re.match('^image(\d+)$',frame_id)
136
- assert (m is not None)
137
- im['frame_num'] = int(m.group(1))-1
138
-
139
- # Make sure we haven't seen this sequence before
140
- sequence_frame_id = im['seq_id'] + '_' + str(im['frame_num'])
141
- assert sequence_frame_id not in sequence_frame_ids
142
- sequence_frame_ids.add(sequence_frame_id)
143
-
144
- # In the form "001a"
145
- im['location'] = row['site']
146
-
147
- # Can be in the form '111' or 's46'
148
- im['camera'] = row['camera']
149
-
150
- # In the form "7/29/2016 11:40"
151
- im['datetime'] = row['date']
152
-
153
- # Check image height and width
154
- image_path = os.path.join(image_directory,fn)
155
- assert(os.path.isfile(image_path))
156
- pil_image = PIL.Image.open(image_path)
157
- width, height = pil_image.size
158
- im['width'] = width
159
- im['height'] = height
160
-
161
- images.append(im)
162
-
163
- category = row['label'].lower()
164
-
165
- # Use 'empty', to be consistent with other data on lila
166
- if (category == 'nothinghere'):
167
- category = 'empty'
168
-
169
- # Have we seen this category before?
170
- if category in categories_to_category_id:
171
- category_id = categories_to_category_id[category]
172
- categories_to_counts[category] += 1
173
- else:
174
- category_id = next_category_id
175
- categories_to_category_id[category] = category_id
176
- categories_to_counts[category] = 0
177
- next_category_id += 1
178
-
179
- # Create an annotation
180
- ann = {}
181
-
182
- # The Internet tells me this guarantees uniqueness to a reasonable extent, even
183
- # beyond the sheer improbability of collisions.
184
- ann['id'] = str(uuid.uuid1())
185
- ann['image_id'] = im['id']
186
- ann['category_id'] = category_id
187
-
188
- annotations.append(ann)
189
-
190
- # ...for each image
191
-
192
- # Convert categories to a CCT-style dictionary
193
-
194
- categories = []
195
-
196
- for category in categories_to_counts:
197
- print('Category {}, count {}'.format(category,categories_to_counts[category]))
198
- category_id = categories_to_category_id[category]
199
- cat = {}
200
- cat['name'] = category
201
- cat['id'] = category_id
202
- categories.append(cat)
203
-
204
- elapsed = time.time() - start_time
205
- print('Finished creating CCT dictionaries in {}'.format(
206
- humanfriendly.format_timespan(elapsed)))
207
-
208
-
209
- #%% Create info struct
210
-
211
- info = {}
212
- info['year'] = 2018
213
- info['version'] = '1.01'
214
- info['description'] = 'Wellington Camera Traps'
215
- info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
216
- info['contributor'] = 'Victor Anton'
217
-
218
-
219
- #%% Write output
220
-
221
- json_data = {}
222
- json_data['images'] = images
223
- json_data['annotations'] = annotations
224
- json_data['categories'] = categories
225
- json_data['info'] = info
226
- json.dump(json_data,open(output_file,'w'),indent=1)
227
-
228
- print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
229
- len(images),len(annotations),len(categories)))
230
-
231
-
232
- #%% Validate .json files
233
-
234
- from megadetector.data_management.databases import integrity_check_json_db
235
-
236
- options = integrity_check_json_db.IntegrityCheckOptions()
237
- options.baseDir = image_directory
238
- options.bCheckImageSizes = False
239
- options.bCheckImageExistence = True
240
- options.bFindUnusedImages = True
241
-
242
- sorted_categories, data, error_info = integrity_check_json_db.integrity_check_json_db(output_file, options)
243
-
244
-
245
- #%% Preview labels
246
-
247
- from megadetector.visualization import visualize_db
248
-
249
- viz_options = visualize_db.DbVizOptions()
250
- viz_options.num_to_visualize = 2000
251
- viz_options.trim_to_images_with_bboxes = False
252
- viz_options.add_search_links = False
253
- viz_options.sort_by_filename = False
254
- viz_options.parallelize_rendering = True
255
- viz_options.classes_to_exclude = ['test']
256
- html_output_file, image_db = visualize_db.visualize_db(db_path=output_file,
257
- output_dir=os.path.join(
258
- preview_dir),
259
- image_base_dir=image_directory,
260
- options=viz_options)
261
-
262
- from megadetector.utils import path_utils
263
- path_utils.open_file(html_output_file)
@@ -1,442 +0,0 @@
1
- """
2
-
3
- wi_to_json
4
-
5
- Prepares CCT-formatted metadata based on a Wildlife Insights data export.
6
-
7
- Mostly assumes you have the images also, for validation/QA.
8
-
9
- """
10
-
11
- #%% Imports and constants
12
-
13
- import os
14
- import json
15
- import pandas as pd
16
- import shutil
17
- import uuid
18
- import datetime
19
- import dateutil.parser
20
- import sys
21
- import subprocess
22
- import copy
23
-
24
- from collections import defaultdict
25
- from tqdm import tqdm
26
-
27
- from megadetector.visualization import visualize_db
28
- from megadetector.data_management.databases import integrity_check_json_db
29
-
30
- organization_name = 'organization'
31
- input_base = os.path.expanduser('~/data/' + organization_name)
32
- image_base = os.path.join(input_base,'deployment')
33
- image_csv = os.path.join(input_base,'images.csv')
34
- output_json_filename = os.path.join(input_base, organization_name + '_camera_traps.json')
35
- preview_base = os.path.expanduser('~/data/' + organization_name + '/preview')
36
-
37
- assert os.path.isfile(image_csv)
38
- assert os.path.isdir(image_base)
39
-
40
- MISSING_COMMON_NAME_TOKEN = 'MISSING'
41
-
42
- output_encoding = 'utf-8'
43
-
44
- # Because WI filenames are GUIDs, it's not practical to page through sequences in an
45
- # image viewer. So we're going to (optionally) create a copy of the data set where
46
- # images are ordered.
47
- create_ordered_dataset = False
48
-
49
- ordered_image_base = os.path.join(input_base,'deployment-ordered')
50
- ordered_json_filename = os.path.join(input_base, organization_name + '_camera_traps_ordered.json')
51
- ordered_preview_base = os.path.expanduser('~/data/' + organization_name + '/preview-ordered')
52
-
53
- info = {}
54
- info['year'] = 2020
55
- info['version'] = '1.0'
56
- info['description'] = organization_name + ' camera traps)'
57
- info['contributor'] = organization_name
58
- info['date_created'] = str(datetime.date.today())
59
-
60
- def open_file(filename):
61
- if sys.platform == "win32":
62
- os.startfile(filename)
63
- else:
64
- opener = "open" if sys.platform == "darwin" else "xdg-open"
65
- subprocess.call([opener, filename])
66
-
67
-
68
- #%% Load ground truth
69
-
70
- images_df = pd.read_csv(image_csv)
71
-
72
- print('Loaded {} ground truth annotations'.format(
73
- len(images_df)))
74
-
75
-
76
- #%% Take everything out of Pandas
77
-
78
- images = images_df.to_dict('records')
79
-
80
-
81
- #%% Synthesize common names when they're not available
82
-
83
- for im in images:
84
-
85
- if not isinstance(im['common_name'],str):
86
-
87
- # Blank rows should always have "Blank" as the common name
88
- assert im['is_blank'] == 0
89
- assert isinstance(im['genus'],str) and isinstance(im['species'],str)
90
- # print('Warning: missing common name for row {} ({})'.format(i_row,row['filename']))
91
- im['common_name'] = im['genus'].strip() + ' ' + im['species'].strip()
92
-
93
-
94
- #%% Convert string timestamps to Python datetimes
95
-
96
- all_locations = set()
97
-
98
- # im = ground_truth_dicts[0]
99
- for im in tqdm(images):
100
- dt = dateutil.parser.isoparse(im['timestamp'])
101
- assert dt.year >= 2019 and dt.year <= 2021
102
- im['datetime'] = dt
103
-
104
- # The field called "location" in the WI .csv file is a URL, we want to reclaim
105
- # the "location" keyword for CCT output
106
- im['url'] = im['location']
107
-
108
- # Filenames look like, e.g., N36/100EK113/06040726.JPG
109
- im['location'] = im['deployment_id']
110
- all_locations.add(im['location'])
111
-
112
-
113
- #%% Synthesize sequence information
114
-
115
- locations = all_locations
116
- print('Found {} locations'.format(len(locations)))
117
-
118
- locations = list(locations)
119
-
120
- sequences = set()
121
- sequence_to_images = defaultdict(list)
122
- max_seconds_within_sequence = 10
123
-
124
- # Sort images by time within each location
125
- # i_location=0; location = locations[i_location]
126
- for i_location,location in tqdm(enumerate(locations),total=len(locations)):
127
-
128
- images_this_location = [im for im in images if im['location'] == location]
129
- sorted_images_this_location = sorted(images_this_location, key = lambda im: im['datetime'])
130
-
131
- current_sequence_id = None
132
- next_frame_number = 0
133
- previous_datetime = None
134
-
135
- # previous_datetime = sorted_images_this_location[0]['datetime']
136
- # im = sorted_images_this_camera[1]
137
- for i_image,im in enumerate(sorted_images_this_location):
138
-
139
- # Timestamp for this image, may be None
140
- dt = im['datetime']
141
-
142
- # Start a new sequence if:
143
- #
144
- # * This image has no timestamp
145
- # * This image has a frame number of zero
146
- # * We have no previous image timestamp
147
- #
148
- if dt is None:
149
- delta = None
150
- elif previous_datetime is None:
151
- delta = None
152
- else:
153
- assert isinstance(dt,datetime.datetime)
154
- delta = (dt - previous_datetime).total_seconds()
155
-
156
- # Start a new sequence if necessary
157
- if delta is None or delta > max_seconds_within_sequence:
158
- next_frame_number = 0
159
- current_sequence_id = str(uuid.uuid1())
160
- sequences.add(current_sequence_id)
161
- assert current_sequence_id is not None
162
-
163
- im['seq_id'] = current_sequence_id
164
- im['synthetic_frame_number'] = next_frame_number
165
- next_frame_number = next_frame_number + 1
166
- previous_datetime = dt
167
- sequence_to_images[im['seq_id']].append(im)
168
-
169
- # ...for each image in this location
170
-
171
- # ...for each location
172
-
173
-
174
- #%% Create category dict and category IDs
175
-
176
- categories_to_counts = defaultdict(int)
177
- category_mappings = {'blank':'empty',
178
- 'mammal':'unknown_mammal',
179
- 'bird':'unknown_bird',
180
- 'unknown_species':'unknown'
181
- }
182
-
183
- for c in category_mappings.values():
184
- assert ' ' not in c
185
-
186
- # im = images[0]
187
- for im in tqdm(images):
188
-
189
- category_name = im['common_name'].lower().replace("'",'').replace(' ','_')
190
- if category_name in category_mappings:
191
- category_name = category_mappings[category_name]
192
- categories_to_counts[category_name] += 1
193
- im['category_name'] = category_name
194
-
195
- categories_to_counts_sorted = {k: v for k, v in sorted(categories_to_counts.items(),
196
- key=lambda item: item[1],reverse=True)}
197
-
198
- print('\n')
199
- for s in categories_to_counts_sorted.keys():
200
- print('{}: {}'.format(s,categories_to_counts_sorted[s]))
201
-
202
-
203
- #%% Count frames in each sequence
204
-
205
- sequence_id_to_n_frames = defaultdict(int)
206
-
207
- for im in tqdm(images):
208
- seq_id = im['seq_id']
209
- sequence_id_to_n_frames[seq_id] = sequence_id_to_n_frames[seq_id] + 1
210
-
211
- for im in tqdm(images):
212
- seq_id = im['seq_id']
213
- im['seq_num_frames'] = sequence_id_to_n_frames[seq_id]
214
-
215
-
216
- #%% Build relative paths
217
-
218
- missing_images = []
219
-
220
- # im = images[0]
221
- for i_image,im in enumerate(tqdm(images)):
222
- # Sample URL:
223
- #
224
- # gs://project-asfasdfd/deployment/21444549/asdfasdfd-616a-4d10-a921-45ac456c568a.jpg'
225
- relative_path = im['url'].split('/deployment/')[1]
226
- assert relative_path is not None and len(relative_path) > 0
227
- im['relative_path'] = relative_path
228
-
229
- if not os.path.isfile(os.path.join(image_base,relative_path)):
230
- missing_images.append(im)
231
-
232
- print('{} images are missing'.format(len(missing_images)))
233
-
234
-
235
- #%% Double check images with multiple annotations
236
-
237
- filename_to_images = defaultdict(list)
238
-
239
- # im = images[0]
240
- for im in tqdm(images):
241
- filename_to_images[im['relative_path']].append(im)
242
-
243
- filenames_with_multiple_annotations = [fn for fn in filename_to_images.keys() if len(filename_to_images[fn]) > 1]
244
-
245
- print('\nFound {} filenames with multiple annotations'.format(len(filenames_with_multiple_annotations)))
246
-
247
-
248
- #%% Assemble dictionaries
249
-
250
- images_out = []
251
- image_id_to_image = {}
252
- annotations = []
253
- categories = []
254
-
255
- category_name_to_category = {}
256
- category_id_to_category = {}
257
-
258
- # Force the empty category to be ID 0
259
- empty_category = {}
260
- empty_category['name'] = 'empty'
261
- empty_category['id'] = 0
262
- empty_category['count'] = 0
263
-
264
- category_id_to_category[0] = empty_category
265
- category_name_to_category['empty'] = empty_category
266
- categories.append(empty_category)
267
- next_id = 1
268
-
269
- # input_im = images[0]
270
- for input_im in tqdm(images):
271
-
272
- category_name = input_im['category_name'].lower().strip()
273
-
274
- if category_name not in category_name_to_category:
275
-
276
- category_id = next_id
277
- next_id += 1
278
- category = {}
279
- category['id'] = category_id
280
- category['name'] = category_name
281
- category['count'] = 0
282
- categories.append(category)
283
- category_name_to_category[category_name] = category
284
- category_id_to_category[category_id] = category
285
-
286
- else:
287
-
288
- category = category_name_to_category[category_name]
289
-
290
- category_id = category['id']
291
- category['count'] += 1
292
-
293
- im = {}
294
- im['id'] = input_im['relative_path'].replace('/','_')
295
- im['datetime'] = str(input_im['datetime'])
296
- im['file_name'] = input_im['relative_path']
297
- im['seq_id'] = input_im['seq_id']
298
- im['frame_num'] = input_im['synthetic_frame_number']
299
- im['seq_num_frames'] = input_im['seq_num_frames']
300
- im['location'] = input_im['location']
301
-
302
- if im['id'] in image_id_to_image:
303
- # print('Warning: image ID {} ({}) has multiple annotations'.format(im['id'],im['id'].replace('_','/')))
304
- pass
305
- else:
306
- image_id_to_image[im['id']] = im
307
- images_out.append(im)
308
-
309
- ann = {}
310
-
311
- ann['id'] = str(uuid.uuid1())
312
- ann['image_id'] = im['id']
313
- ann['category_id'] = category_id
314
- ann['sequence_level_annotation'] = False
315
- annotations.append(ann)
316
-
317
- # ...for each image
318
-
319
-
320
- #%% Write output .json
321
-
322
- data = {}
323
- data['info'] = info
324
- data['images'] = images_out
325
- data['annotations'] = annotations
326
- data['categories'] = categories
327
-
328
- with open(output_json_filename, 'w') as f:
329
- json.dump(data, f, indent=1)
330
-
331
- print('Finished writing json to {}'.format(output_json_filename))
332
-
333
-
334
- #%% Validate .json file
335
-
336
- options = integrity_check_json_db.IntegrityCheckOptions()
337
- options.baseDir = image_base
338
- options.bCheckImageSizes = False
339
- options.bCheckImageExistence = True
340
- options.bFindUnusedImages = True
341
-
342
- _, _, _ = integrity_check_json_db.integrity_check_json_db(output_json_filename, options)
343
-
344
-
345
- #%% Preview labels
346
-
347
- viz_options = visualize_db.DbVizOptions()
348
- viz_options.num_to_visualize = 300
349
- viz_options.trim_to_images_with_bboxes = False
350
- viz_options.add_search_links = True
351
- viz_options.sort_by_filename = False
352
- viz_options.parallelize_rendering = True
353
- viz_options.include_filename_links = True
354
-
355
- html_output_file, _ = visualize_db.visualize_db(db_path=output_json_filename,
356
- output_dir=preview_base,
357
- image_base_dir=image_base,
358
- options=viz_options)
359
- open_file(html_output_file)
360
- # open_file(os.path.join(image_base,'2100703/1141a545-88d2-498b-a684-7431f7aeb324.jpg'))
361
-
362
-
363
- #%%
364
-
365
- if create_ordered_dataset:
366
-
367
- pass
368
-
369
- #%% Create ordered dataset
370
-
371
- # Because WI filenames are GUIDs, it's not practical to page through sequences in an
372
- # image viewer. So we're going to create a copy of the data set where images are
373
- # ordered.
374
-
375
- os.makedirs(ordered_image_base,exist_ok=True)
376
-
377
- ordered_images = {}
378
-
379
- # im = images_out[0]; im
380
- for im in tqdm(images_out):
381
- im_out = copy.deepcopy(im)
382
- ordered_filename = im['location'] + '_' + im['seq_id'] + '_' +\
383
- str(im['frame_num']) + '_' + os.path.basename(im['file_name'])
384
- assert ordered_filename not in ordered_images
385
- im_out['original_file'] = im_out['file_name']
386
- im_out['file_name'] = ordered_filename
387
- ordered_images[ordered_filename] = im_out
388
-
389
- ordered_images = list(ordered_images.values())
390
-
391
-
392
- #%% Create ordered .json
393
-
394
- data_ordered = copy.copy(data)
395
- data_ordered['images'] = ordered_images
396
-
397
- with open(ordered_json_filename, 'w') as f:
398
- json.dump(data_ordered, f, indent=1)
399
-
400
- print('Finished writing json to {}'.format(ordered_json_filename))
401
-
402
-
403
- #%% Copy files to their new locations
404
-
405
- # im = ordered_images[0]
406
- for im in tqdm(ordered_images):
407
- output_file = os.path.join(ordered_image_base,im['file_name'])
408
- input_file = os.path.join(image_base,im['original_file'])
409
- if not os.path.isfile(input_file):
410
- print('Warning: file {} is missing'.format(input_file))
411
- continue
412
- shutil.copyfile(input_file,output_file)
413
-
414
- original_fn_to_ordered_fn = {}
415
- # im = data_ordered['images'][0]
416
- for im in data_ordered['images']:
417
- original_fn_to_ordered_fn[im['original_file']] = im['file_name']
418
-
419
-
420
- #%% Preview labels in the ordered dataset
421
-
422
- viz_options = visualize_db.DbVizOptions()
423
- viz_options.num_to_visualize = 300
424
- viz_options.trim_to_images_with_bboxes = False
425
- viz_options.add_search_links = True
426
- viz_options.sort_by_filename = False
427
- viz_options.parallelize_rendering = True
428
- viz_options.include_filename_links = True
429
-
430
- html_output_file, _ = visualize_db.visualize_db(db_path=ordered_json_filename,
431
- output_dir=ordered_preview_base,
432
- image_base_dir=ordered_image_base,
433
- options=viz_options)
434
- open_file(html_output_file)
435
- # open_file(os.path.join(image_base,'2100703/1141a545-88d2-498b-a684-7431f7aeb324.jpg'))
436
-
437
-
438
- #%% Open an ordered filename from the unordered filename
439
-
440
- unordered_filename = '2100557/54e5c751-28b4-42e3-b6d4-e8ee290228ae.jpg'
441
- fn = os.path.join(ordered_image_base,original_fn_to_ordered_fn[unordered_filename])
442
- open_file(fn)