megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.10.dist-info/RECORD +0 -224
  214. megadetector-5.0.10.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,263 +0,0 @@
1
- """
2
-
3
- wellington_to_json.py
4
-
5
- Convert the .csv file provided for the Wellington data set to a
6
- COCO-camera-traps .json file
7
-
8
- """
9
-
10
- #%% Constants and environment
11
-
12
- import pandas as pd
13
- import os
14
- import glob
15
- import json
16
- import re
17
- import uuid
18
- import time
19
- import ntpath
20
- import humanfriendly
21
- import PIL
22
-
23
- from tqdm import tqdm
24
-
25
- input_metadata_file = os.path.expanduser('~/data/wct/wellington_camera_traps.csv')
26
- output_file = os.path.expanduser('~/data/wct/wellington_camera_traps.json')
27
- image_directory = os.path.expanduser('~/data/wct/images')
28
- preview_dir = os.path.expanduser('~/data/wct/preview')
29
-
30
- assert(os.path.isdir(image_directory))
31
-
32
-
33
- #%% Read source data
34
-
35
- input_metadata = pd.read_csv(input_metadata_file)
36
-
37
- print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
38
- len(input_metadata)))
39
-
40
- # Filenames were provided as *.jpg, but images were *.JPG, converting here
41
- input_metadata['file'] = input_metadata['file'].apply(lambda x: x.replace('.jpg','.JPG'))
42
-
43
- print('Converted extensions to uppercase')
44
-
45
-
46
- #%% Map filenames to rows, verify image existence
47
-
48
- # Takes ~30 seconds, since it's checking the existence of ~270k images
49
-
50
- start_time = time.time()
51
- filenames_to_rows = {}
52
- image_filenames = input_metadata.file
53
-
54
- duplicate_rows = []
55
-
56
- # Build up a map from filenames to a list of rows, checking image existence as we go
57
- for i_file,fn in enumerate(image_filenames):
58
-
59
- if (fn in filenames_to_rows):
60
- duplicate_rows.append(i_file)
61
- filenames_to_rows[fn].append(i_file)
62
- else:
63
- filenames_to_rows[fn] = [i_file]
64
- image_path = os.path.join(image_directory,fn)
65
- assert(os.path.isfile(image_path))
66
-
67
- elapsed = time.time() - start_time
68
- print('Finished verifying image existence in {}, found {} filenames with multiple labels'.format(
69
- humanfriendly.format_timespan(elapsed),len(duplicate_rows)))
70
-
71
- # I didn't expect this to be true a priori, but it appears to be true, and
72
- # it saves us the trouble of checking consistency across multiple occurrences
73
- # of an image.
74
- assert(len(duplicate_rows) == 0)
75
-
76
-
77
- #%% Check for images that aren't included in the metadata file
78
-
79
- # Enumerate all images
80
- image_full_paths = glob.glob(os.path.join(image_directory,'*.JPG'))
81
-
82
- for i_image,image_path in enumerate(image_full_paths):
83
-
84
- fn = ntpath.basename(image_path)
85
- assert(fn in filenames_to_rows)
86
-
87
- print('Finished checking {} images to make sure they\'re in the metadata'.format(
88
- len(image_full_paths)))
89
-
90
-
91
- #%% Create CCT dictionaries
92
-
93
- # Also gets image sizes, so this takes ~6 minutes
94
- #
95
- # Implicitly checks images for overt corruptness, i.e. by not crashing.
96
-
97
- images = []
98
- annotations = []
99
-
100
- # Map categories to integer IDs (that's what COCO likes)
101
- next_category_id = 0
102
- categories_to_category_id = {}
103
- categories_to_counts = {}
104
-
105
- # For each image
106
- #
107
- # Because in practice images are 1:1 with annotations in this data set,
108
- # this is also a loop over annotations.
109
-
110
- start_time = time.time()
111
-
112
- sequence_frame_ids = set()
113
-
114
- # image_name = image_filenames[0]
115
- for image_name in tqdm(image_filenames):
116
-
117
- rows = filenames_to_rows[image_name]
118
-
119
- # As per above, this is convenient and appears to be true; asserting to be safe
120
- assert(len(rows) == 1)
121
- i_row = rows[0]
122
-
123
- row = input_metadata.iloc[i_row]
124
-
125
- im = {}
126
- # Filenames look like "290716114012001a1116.jpg"
127
- im['id'] = image_name.split('.')[0]
128
- im['file_name'] = image_name
129
-
130
- # This gets imported as an int64
131
- im['seq_id'] = str(row['sequence'])
132
-
133
- # These appear as "image1", "image2", etc.
134
- frame_id = row['image_sequence']
135
- m = re.match('^image(\d+)$',frame_id)
136
- assert (m is not None)
137
- im['frame_num'] = int(m.group(1))-1
138
-
139
- # Make sure we haven't seen this sequence before
140
- sequence_frame_id = im['seq_id'] + '_' + str(im['frame_num'])
141
- assert sequence_frame_id not in sequence_frame_ids
142
- sequence_frame_ids.add(sequence_frame_id)
143
-
144
- # In the form "001a"
145
- im['location'] = row['site']
146
-
147
- # Can be in the form '111' or 's46'
148
- im['camera'] = row['camera']
149
-
150
- # In the form "7/29/2016 11:40"
151
- im['datetime'] = row['date']
152
-
153
- # Check image height and width
154
- image_path = os.path.join(image_directory,fn)
155
- assert(os.path.isfile(image_path))
156
- pil_image = PIL.Image.open(image_path)
157
- width, height = pil_image.size
158
- im['width'] = width
159
- im['height'] = height
160
-
161
- images.append(im)
162
-
163
- category = row['label'].lower()
164
-
165
- # Use 'empty', to be consistent with other data on lila
166
- if (category == 'nothinghere'):
167
- category = 'empty'
168
-
169
- # Have we seen this category before?
170
- if category in categories_to_category_id:
171
- category_id = categories_to_category_id[category]
172
- categories_to_counts[category] += 1
173
- else:
174
- category_id = next_category_id
175
- categories_to_category_id[category] = category_id
176
- categories_to_counts[category] = 0
177
- next_category_id += 1
178
-
179
- # Create an annotation
180
- ann = {}
181
-
182
- # The Internet tells me this guarantees uniqueness to a reasonable extent, even
183
- # beyond the sheer improbability of collisions.
184
- ann['id'] = str(uuid.uuid1())
185
- ann['image_id'] = im['id']
186
- ann['category_id'] = category_id
187
-
188
- annotations.append(ann)
189
-
190
- # ...for each image
191
-
192
- # Convert categories to a CCT-style dictionary
193
-
194
- categories = []
195
-
196
- for category in categories_to_counts:
197
- print('Category {}, count {}'.format(category,categories_to_counts[category]))
198
- category_id = categories_to_category_id[category]
199
- cat = {}
200
- cat['name'] = category
201
- cat['id'] = category_id
202
- categories.append(cat)
203
-
204
- elapsed = time.time() - start_time
205
- print('Finished creating CCT dictionaries in {}'.format(
206
- humanfriendly.format_timespan(elapsed)))
207
-
208
-
209
- #%% Create info struct
210
-
211
- info = {}
212
- info['year'] = 2018
213
- info['version'] = '1.01'
214
- info['description'] = 'Wellington Camera Traps'
215
- info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
216
- info['contributor'] = 'Victor Anton'
217
-
218
-
219
- #%% Write output
220
-
221
- json_data = {}
222
- json_data['images'] = images
223
- json_data['annotations'] = annotations
224
- json_data['categories'] = categories
225
- json_data['info'] = info
226
- json.dump(json_data,open(output_file,'w'),indent=1)
227
-
228
- print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
229
- len(images),len(annotations),len(categories)))
230
-
231
-
232
- #%% Validate .json files
233
-
234
- from data_management.databases import integrity_check_json_db
235
-
236
- options = integrity_check_json_db.IntegrityCheckOptions()
237
- options.baseDir = image_directory
238
- options.bCheckImageSizes = False
239
- options.bCheckImageExistence = True
240
- options.bFindUnusedImages = True
241
-
242
- sorted_categories, data, error_info = integrity_check_json_db.integrity_check_json_db(output_file, options)
243
-
244
-
245
- #%% Preview labels
246
-
247
- from md_visualization import visualize_db
248
-
249
- viz_options = visualize_db.DbVizOptions()
250
- viz_options.num_to_visualize = 2000
251
- viz_options.trim_to_images_with_bboxes = False
252
- viz_options.add_search_links = False
253
- viz_options.sort_by_filename = False
254
- viz_options.parallelize_rendering = True
255
- viz_options.classes_to_exclude = ['test']
256
- html_output_file, image_db = visualize_db.visualize_db(db_path=output_file,
257
- output_dir=os.path.join(
258
- preview_dir),
259
- image_base_dir=image_directory,
260
- options=viz_options)
261
-
262
- from md_utils import path_utils
263
- path_utils.open_file(html_output_file)
@@ -1,441 +0,0 @@
1
- """
2
-
3
- wi_to_json
4
-
5
- Prepares CCT-formatted metadata based on a Wildlife Insights data export.
6
-
7
- Mostly assumes you have the images also, for validation/QA.
8
-
9
- """
10
-
11
- #%% Imports and constants
12
-
13
- import os
14
- import json
15
- import pandas as pd
16
- import shutil
17
- import uuid
18
- import datetime
19
- import dateutil.parser
20
- import sys
21
- import subprocess
22
- import copy
23
-
24
- from collections import defaultdict
25
- from tqdm import tqdm
26
- from md_visualization import visualize_db
27
- from data_management.databases import integrity_check_json_db
28
-
29
- organization_name = 'organization'
30
- input_base = os.path.expanduser('~/data/' + organization_name)
31
- image_base = os.path.join(input_base,'deployment')
32
- image_csv = os.path.join(input_base,'images.csv')
33
- output_json_filename = os.path.join(input_base, organization_name + '_camera_traps.json')
34
- preview_base = os.path.expanduser('~/data/' + organization_name + '/preview')
35
-
36
- assert os.path.isfile(image_csv)
37
- assert os.path.isdir(image_base)
38
-
39
- MISSING_COMMON_NAME_TOKEN = 'MISSING'
40
-
41
- output_encoding = 'utf-8'
42
-
43
- # Because WI filenames are GUIDs, it's not practical to page through sequences in an
44
- # image viewer. So we're going to (optionally) create a copy of the data set where
45
- # images are ordered.
46
- create_ordered_dataset = False
47
-
48
- ordered_image_base = os.path.join(input_base,'deployment-ordered')
49
- ordered_json_filename = os.path.join(input_base, organization_name + '_camera_traps_ordered.json')
50
- ordered_preview_base = os.path.expanduser('~/data/' + organization_name + '/preview-ordered')
51
-
52
- info = {}
53
- info['year'] = 2020
54
- info['version'] = '1.0'
55
- info['description'] = organization_name + ' camera traps)'
56
- info['contributor'] = organization_name
57
- info['date_created'] = str(datetime.date.today())
58
-
59
- def open_file(filename):
60
- if sys.platform == "win32":
61
- os.startfile(filename)
62
- else:
63
- opener = "open" if sys.platform == "darwin" else "xdg-open"
64
- subprocess.call([opener, filename])
65
-
66
-
67
- #%% Load ground truth
68
-
69
- images_df = pd.read_csv(image_csv)
70
-
71
- print('Loaded {} ground truth annotations'.format(
72
- len(images_df)))
73
-
74
-
75
- #%% Take everything out of Pandas
76
-
77
- images = images_df.to_dict('records')
78
-
79
-
80
- #%% Synthesize common names when they're not available
81
-
82
- for im in images:
83
-
84
- if not isinstance(im['common_name'],str):
85
-
86
- # Blank rows should always have "Blank" as the common name
87
- assert im['is_blank'] == 0
88
- assert isinstance(im['genus'],str) and isinstance(im['species'],str)
89
- # print('Warning: missing common name for row {} ({})'.format(i_row,row['filename']))
90
- im['common_name'] = im['genus'].strip() + ' ' + im['species'].strip()
91
-
92
-
93
- #%% Convert string timestamps to Python datetimes
94
-
95
- all_locations = set()
96
-
97
- # im = ground_truth_dicts[0]
98
- for im in tqdm(images):
99
- dt = dateutil.parser.isoparse(im['timestamp'])
100
- assert dt.year >= 2019 and dt.year <= 2021
101
- im['datetime'] = dt
102
-
103
- # The field called "location" in the WI .csv file is a URL, we want to reclaim
104
- # the "location" keyword for CCT output
105
- im['url'] = im['location']
106
-
107
- # Filenames look like, e.g., N36/100EK113/06040726.JPG
108
- im['location'] = im['deployment_id']
109
- all_locations.add(im['location'])
110
-
111
-
112
- #%% Synthesize sequence information
113
-
114
- locations = all_locations
115
- print('Found {} locations'.format(len(locations)))
116
-
117
- locations = list(locations)
118
-
119
- sequences = set()
120
- sequence_to_images = defaultdict(list)
121
- max_seconds_within_sequence = 10
122
-
123
- # Sort images by time within each location
124
- # i_location=0; location = locations[i_location]
125
- for i_location,location in tqdm(enumerate(locations),total=len(locations)):
126
-
127
- images_this_location = [im for im in images if im['location'] == location]
128
- sorted_images_this_location = sorted(images_this_location, key = lambda im: im['datetime'])
129
-
130
- current_sequence_id = None
131
- next_frame_number = 0
132
- previous_datetime = None
133
-
134
- # previous_datetime = sorted_images_this_location[0]['datetime']
135
- # im = sorted_images_this_camera[1]
136
- for i_image,im in enumerate(sorted_images_this_location):
137
-
138
- # Timestamp for this image, may be None
139
- dt = im['datetime']
140
-
141
- # Start a new sequence if:
142
- #
143
- # * This image has no timestamp
144
- # * This image has a frame number of zero
145
- # * We have no previous image timestamp
146
- #
147
- if dt is None:
148
- delta = None
149
- elif previous_datetime is None:
150
- delta = None
151
- else:
152
- assert isinstance(dt,datetime.datetime)
153
- delta = (dt - previous_datetime).total_seconds()
154
-
155
- # Start a new sequence if necessary
156
- if delta is None or delta > max_seconds_within_sequence:
157
- next_frame_number = 0
158
- current_sequence_id = str(uuid.uuid1())
159
- sequences.add(current_sequence_id)
160
- assert current_sequence_id is not None
161
-
162
- im['seq_id'] = current_sequence_id
163
- im['synthetic_frame_number'] = next_frame_number
164
- next_frame_number = next_frame_number + 1
165
- previous_datetime = dt
166
- sequence_to_images[im['seq_id']].append(im)
167
-
168
- # ...for each image in this location
169
-
170
- # ...for each location
171
-
172
-
173
- #%% Create category dict and category IDs
174
-
175
- categories_to_counts = defaultdict(int)
176
- category_mappings = {'blank':'empty',
177
- 'mammal':'unknown_mammal',
178
- 'bird':'unknown_bird',
179
- 'unknown_species':'unknown'
180
- }
181
-
182
- for c in category_mappings.values():
183
- assert ' ' not in c
184
-
185
- # im = images[0]
186
- for im in tqdm(images):
187
-
188
- category_name = im['common_name'].lower().replace("'",'').replace(' ','_')
189
- if category_name in category_mappings:
190
- category_name = category_mappings[category_name]
191
- categories_to_counts[category_name] += 1
192
- im['category_name'] = category_name
193
-
194
- categories_to_counts_sorted = {k: v for k, v in sorted(categories_to_counts.items(),
195
- key=lambda item: item[1],reverse=True)}
196
-
197
- print('\n')
198
- for s in categories_to_counts_sorted.keys():
199
- print('{}: {}'.format(s,categories_to_counts_sorted[s]))
200
-
201
-
202
- #%% Count frames in each sequence
203
-
204
- sequence_id_to_n_frames = defaultdict(int)
205
-
206
- for im in tqdm(images):
207
- seq_id = im['seq_id']
208
- sequence_id_to_n_frames[seq_id] = sequence_id_to_n_frames[seq_id] + 1
209
-
210
- for im in tqdm(images):
211
- seq_id = im['seq_id']
212
- im['seq_num_frames'] = sequence_id_to_n_frames[seq_id]
213
-
214
-
215
- #%% Build relative paths
216
-
217
- missing_images = []
218
-
219
- # im = images[0]
220
- for i_image,im in enumerate(tqdm(images)):
221
- # Sample URL:
222
- #
223
- # gs://project-asfasdfd/deployment/21444549/asdfasdfd-616a-4d10-a921-45ac456c568a.jpg'
224
- relative_path = im['url'].split('/deployment/')[1]
225
- assert relative_path is not None and len(relative_path) > 0
226
- im['relative_path'] = relative_path
227
-
228
- if not os.path.isfile(os.path.join(image_base,relative_path)):
229
- missing_images.append(im)
230
-
231
- print('{} images are missing'.format(len(missing_images)))
232
-
233
-
234
- #%% Double check images with multiple annotations
235
-
236
- filename_to_images = defaultdict(list)
237
-
238
- # im = images[0]
239
- for im in tqdm(images):
240
- filename_to_images[im['relative_path']].append(im)
241
-
242
- filenames_with_multiple_annotations = [fn for fn in filename_to_images.keys() if len(filename_to_images[fn]) > 1]
243
-
244
- print('\nFound {} filenames with multiple annotations'.format(len(filenames_with_multiple_annotations)))
245
-
246
-
247
- #%% Assemble dictionaries
248
-
249
- images_out = []
250
- image_id_to_image = {}
251
- annotations = []
252
- categories = []
253
-
254
- category_name_to_category = {}
255
- category_id_to_category = {}
256
-
257
- # Force the empty category to be ID 0
258
- empty_category = {}
259
- empty_category['name'] = 'empty'
260
- empty_category['id'] = 0
261
- empty_category['count'] = 0
262
-
263
- category_id_to_category[0] = empty_category
264
- category_name_to_category['empty'] = empty_category
265
- categories.append(empty_category)
266
- next_id = 1
267
-
268
- # input_im = images[0]
269
- for input_im in tqdm(images):
270
-
271
- category_name = input_im['category_name'].lower().strip()
272
-
273
- if category_name not in category_name_to_category:
274
-
275
- category_id = next_id
276
- next_id += 1
277
- category = {}
278
- category['id'] = category_id
279
- category['name'] = category_name
280
- category['count'] = 0
281
- categories.append(category)
282
- category_name_to_category[category_name] = category
283
- category_id_to_category[category_id] = category
284
-
285
- else:
286
-
287
- category = category_name_to_category[category_name]
288
-
289
- category_id = category['id']
290
- category['count'] += 1
291
-
292
- im = {}
293
- im['id'] = input_im['relative_path'].replace('/','_')
294
- im['datetime'] = str(input_im['datetime'])
295
- im['file_name'] = input_im['relative_path']
296
- im['seq_id'] = input_im['seq_id']
297
- im['frame_num'] = input_im['synthetic_frame_number']
298
- im['seq_num_frames'] = input_im['seq_num_frames']
299
- im['location'] = input_im['location']
300
-
301
- if im['id'] in image_id_to_image:
302
- # print('Warning: image ID {} ({}) has multiple annotations'.format(im['id'],im['id'].replace('_','/')))
303
- pass
304
- else:
305
- image_id_to_image[im['id']] = im
306
- images_out.append(im)
307
-
308
- ann = {}
309
-
310
- ann['id'] = str(uuid.uuid1())
311
- ann['image_id'] = im['id']
312
- ann['category_id'] = category_id
313
- ann['sequence_level_annotation'] = False
314
- annotations.append(ann)
315
-
316
- # ...for each image
317
-
318
-
319
- #%% Write output .json
320
-
321
- data = {}
322
- data['info'] = info
323
- data['images'] = images_out
324
- data['annotations'] = annotations
325
- data['categories'] = categories
326
-
327
- with open(output_json_filename, 'w') as f:
328
- json.dump(data, f, indent=1)
329
-
330
- print('Finished writing json to {}'.format(output_json_filename))
331
-
332
-
333
- #%% Validate .json file
334
-
335
- options = integrity_check_json_db.IntegrityCheckOptions()
336
- options.baseDir = image_base
337
- options.bCheckImageSizes = False
338
- options.bCheckImageExistence = True
339
- options.bFindUnusedImages = True
340
-
341
- _, _, _ = integrity_check_json_db.integrity_check_json_db(output_json_filename, options)
342
-
343
-
344
- #%% Preview labels
345
-
346
- viz_options = visualize_db.DbVizOptions()
347
- viz_options.num_to_visualize = 300
348
- viz_options.trim_to_images_with_bboxes = False
349
- viz_options.add_search_links = True
350
- viz_options.sort_by_filename = False
351
- viz_options.parallelize_rendering = True
352
- viz_options.include_filename_links = True
353
-
354
- html_output_file, _ = visualize_db.visualize_db(db_path=output_json_filename,
355
- output_dir=preview_base,
356
- image_base_dir=image_base,
357
- options=viz_options)
358
- open_file(html_output_file)
359
- # open_file(os.path.join(image_base,'2100703/1141a545-88d2-498b-a684-7431f7aeb324.jpg'))
360
-
361
-
362
- #%%
363
-
364
- if create_ordered_dataset:
365
-
366
- pass
367
-
368
- #%% Create ordered dataset
369
-
370
- # Because WI filenames are GUIDs, it's not practical to page through sequences in an
371
- # image viewer. So we're going to create a copy of the data set where images are
372
- # ordered.
373
-
374
- os.makedirs(ordered_image_base,exist_ok=True)
375
-
376
- ordered_images = {}
377
-
378
- # im = images_out[0]; im
379
- for im in tqdm(images_out):
380
- im_out = copy.deepcopy(im)
381
- ordered_filename = im['location'] + '_' + im['seq_id'] + '_' +\
382
- str(im['frame_num']) + '_' + os.path.basename(im['file_name'])
383
- assert ordered_filename not in ordered_images
384
- im_out['original_file'] = im_out['file_name']
385
- im_out['file_name'] = ordered_filename
386
- ordered_images[ordered_filename] = im_out
387
-
388
- ordered_images = list(ordered_images.values())
389
-
390
-
391
- #%% Create ordered .json
392
-
393
- data_ordered = copy.copy(data)
394
- data_ordered['images'] = ordered_images
395
-
396
- with open(ordered_json_filename, 'w') as f:
397
- json.dump(data_ordered, f, indent=1)
398
-
399
- print('Finished writing json to {}'.format(ordered_json_filename))
400
-
401
-
402
- #%% Copy files to their new locations
403
-
404
- # im = ordered_images[0]
405
- for im in tqdm(ordered_images):
406
- output_file = os.path.join(ordered_image_base,im['file_name'])
407
- input_file = os.path.join(image_base,im['original_file'])
408
- if not os.path.isfile(input_file):
409
- print('Warning: file {} is missing'.format(input_file))
410
- continue
411
- shutil.copyfile(input_file,output_file)
412
-
413
- original_fn_to_ordered_fn = {}
414
- # im = data_ordered['images'][0]
415
- for im in data_ordered['images']:
416
- original_fn_to_ordered_fn[im['original_file']] = im['file_name']
417
-
418
-
419
- #%% Preview labels in the ordered dataset
420
-
421
- viz_options = visualize_db.DbVizOptions()
422
- viz_options.num_to_visualize = 300
423
- viz_options.trim_to_images_with_bboxes = False
424
- viz_options.add_search_links = True
425
- viz_options.sort_by_filename = False
426
- viz_options.parallelize_rendering = True
427
- viz_options.include_filename_links = True
428
-
429
- html_output_file, _ = visualize_db.visualize_db(db_path=ordered_json_filename,
430
- output_dir=ordered_preview_base,
431
- image_base_dir=ordered_image_base,
432
- options=viz_options)
433
- open_file(html_output_file)
434
- # open_file(os.path.join(image_base,'2100703/1141a545-88d2-498b-a684-7431f7aeb324.jpg'))
435
-
436
-
437
- #%% Open an ordered filename from the unordered filename
438
-
439
- unordered_filename = '2100557/54e5c751-28b4-42e3-b6d4-e8ee290228ae.jpg'
440
- fn = os.path.join(ordered_image_base,original_fn_to_ordered_fn[unordered_filename])
441
- open_file(fn)