megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +98 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +152 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +92 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +126 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +610 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +239 -0
  58. megadetector/data_management/cct_json_utils.py +395 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +272 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +477 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +796 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +874 -0
  129. megadetector/data_management/read_exif.py +681 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/resize_coco_dataset.py +189 -0
  133. megadetector/data_management/wi_download_csv_to_coco.py +246 -0
  134. megadetector/data_management/yolo_output_to_md_output.py +441 -0
  135. megadetector/data_management/yolo_to_coco.py +676 -0
  136. megadetector/detection/__init__.py +0 -0
  137. megadetector/detection/detector_training/__init__.py +0 -0
  138. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  139. megadetector/detection/process_video.py +702 -0
  140. megadetector/detection/pytorch_detector.py +341 -0
  141. megadetector/detection/run_detector.py +779 -0
  142. megadetector/detection/run_detector_batch.py +1219 -0
  143. megadetector/detection/run_inference_with_yolov5_val.py +917 -0
  144. megadetector/detection/run_tiled_inference.py +934 -0
  145. megadetector/detection/tf_detector.py +189 -0
  146. megadetector/detection/video_utils.py +606 -0
  147. megadetector/postprocessing/__init__.py +0 -0
  148. megadetector/postprocessing/add_max_conf.py +64 -0
  149. megadetector/postprocessing/categorize_detections_by_size.py +163 -0
  150. megadetector/postprocessing/combine_api_outputs.py +249 -0
  151. megadetector/postprocessing/compare_batch_results.py +958 -0
  152. megadetector/postprocessing/convert_output_format.py +396 -0
  153. megadetector/postprocessing/load_api_results.py +195 -0
  154. megadetector/postprocessing/md_to_coco.py +310 -0
  155. megadetector/postprocessing/md_to_labelme.py +330 -0
  156. megadetector/postprocessing/merge_detections.py +401 -0
  157. megadetector/postprocessing/postprocess_batch_results.py +1902 -0
  158. megadetector/postprocessing/remap_detection_categories.py +170 -0
  159. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  160. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  161. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  162. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1631 -0
  163. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  164. megadetector/postprocessing/subset_json_detector_output.py +696 -0
  165. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  166. megadetector/taxonomy_mapping/__init__.py +0 -0
  167. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  168. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  169. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  170. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +590 -0
  171. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  172. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  173. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  174. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  175. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  176. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  177. megadetector/utils/__init__.py +0 -0
  178. megadetector/utils/azure_utils.py +178 -0
  179. megadetector/utils/ct_utils.py +612 -0
  180. megadetector/utils/directory_listing.py +246 -0
  181. megadetector/utils/md_tests.py +968 -0
  182. megadetector/utils/path_utils.py +1044 -0
  183. megadetector/utils/process_utils.py +157 -0
  184. megadetector/utils/sas_blob_utils.py +509 -0
  185. megadetector/utils/split_locations_into_train_val.py +228 -0
  186. megadetector/utils/string_utils.py +92 -0
  187. megadetector/utils/url_utils.py +323 -0
  188. megadetector/utils/write_html_image_list.py +225 -0
  189. megadetector/visualization/__init__.py +0 -0
  190. megadetector/visualization/plot_utils.py +293 -0
  191. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  192. megadetector/visualization/visualization_utils.py +1536 -0
  193. megadetector/visualization/visualize_db.py +550 -0
  194. megadetector/visualization/visualize_detector_output.py +405 -0
  195. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/METADATA +1 -1
  196. megadetector-5.0.12.dist-info/RECORD +199 -0
  197. megadetector-5.0.12.dist-info/top_level.txt +1 -0
  198. megadetector-5.0.11.dist-info/RECORD +0 -5
  199. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  200. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/LICENSE +0 -0
  201. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,294 @@
1
+ """
2
+
3
+ idfg_iwildcam_lila_prep.py
4
+
5
+ Adding class labels (from the private test .csv) to the iWildCam 2019 IDFG
6
+ test set, in preparation for release on LILA.
7
+
8
+ This version works with the public iWildCam release images.
9
+
10
+ """
11
+
12
+ #%% ############ Take one, from iWildCam .json files ############
13
+
14
+ #%% Imports and constants
15
+
16
+ import uuid
17
+ import json
18
+ import os
19
+ from tqdm import tqdm
20
+
21
+ base_folder = r'h:\iWildCam_2019_IDFG'
22
+ input_json = os.path.join(base_folder,'iWildCam_2019_IDFG_info.json')
23
+ input_csv = os.path.join(base_folder,'IDFG_eval_public_v_private.csv')
24
+ output_json = os.path.join(base_folder,'idaho_camera_traps.json')
25
+
26
+ assert os.path.isfile(input_json)
27
+ assert os.path.isfile(input_csv)
28
+
29
+
30
+ #%% Read input files
31
+
32
+ with open(input_json,'r') as f:
33
+ input_data = json.load(f)
34
+
35
+ with open(input_csv,'r') as f:
36
+ private_csv_lines = f.readlines()
37
+
38
+ private_csv_lines = [s.strip() for s in private_csv_lines]
39
+
40
+ # Remove the header line
41
+ assert private_csv_lines[0] == 'Id,Category,Usage'
42
+ private_csv_lines = private_csv_lines[1:]
43
+
44
+ print('Read {} annotations for {} images'.format(len(private_csv_lines),len(input_data['images'])))
45
+
46
+ assert len(private_csv_lines) == len(input_data['images'])
47
+ n_images = len(input_data['images'])
48
+
49
+
50
+ #%% Parse annotations
51
+
52
+ image_id_to_category_ids = {}
53
+ for line in tqdm(private_csv_lines):
54
+
55
+ # Lines look like:
56
+ #
57
+ # b005e5b2-2c0b-11e9-bcad-06f1011196c4,1,Private
58
+
59
+ tokens = line.split(',')
60
+ assert len(tokens) == 3
61
+ assert tokens[2] in ['Private','Public']
62
+ image_id_to_category_ids[tokens[0]] = int(tokens[1])
63
+
64
+ assert len(image_id_to_category_ids) == n_images
65
+
66
+
67
+ #%% Minor cleanup re: images
68
+
69
+ for im in tqdm(input_data['images']):
70
+ image_id = im['id']
71
+ im['file_name'] = im['file_name'].replace('iWildCam_IDFG_images/','')
72
+ assert isinstance(im['location'],int)
73
+ im['location'] = str(im['location'])
74
+
75
+
76
+ #%% Create annotations
77
+
78
+ annotations = []
79
+
80
+ for image_id in tqdm(image_id_to_category_ids):
81
+ category_id = image_id_to_category_ids[image_id]
82
+ ann = {}
83
+ ann['id'] = str(uuid.uuid1())
84
+ ann['image_id'] = image_id
85
+ ann['category_id'] = category_id
86
+ annotations.append(ann)
87
+
88
+
89
+ #%% Prepare info
90
+
91
+ info = input_data['info']
92
+ info['contributor'] = 'Images acquired by the Idaho Department of Fish and Game, dataset curated by Sara Beery'
93
+ info['description'] = 'Idaho Camera traps'
94
+ info['version'] = '2021.07.19'
95
+
96
+
97
+ #%% Minor adjustments to categories
98
+
99
+ input_categories = input_data['categories']
100
+
101
+ category_id_to_name = {cat['id']:cat['name'] for cat in input_categories}
102
+ category_name_to_id = {cat['name']:cat['id'] for cat in input_categories}
103
+ assert category_id_to_name[0] == 'empty'
104
+
105
+ category_names_to_counts = {}
106
+ for category in input_categories:
107
+ category_names_to_counts[category['name']] = 0
108
+
109
+ for ann in annotations:
110
+ category_id = ann['category_id']
111
+ category_name = category_id_to_name[category_id]
112
+ category_names_to_counts[category_name] = category_names_to_counts[category_name] + 1
113
+
114
+ categories = []
115
+
116
+ for category_name in category_names_to_counts:
117
+ count = category_names_to_counts[category_name]
118
+
119
+ # Remove unused categories
120
+ if count == 0:
121
+ continue
122
+
123
+ category_id = category_name_to_id[category_name]
124
+
125
+ # Name adjustments
126
+ if category_name == 'prongs':
127
+ category_name = 'pronghorn'
128
+
129
+ categories.append({'id':category_id,'name':category_name})
130
+
131
+
132
+ #%% Create output
133
+
134
+ output_data = {}
135
+ output_data['images'] = input_data['images']
136
+ output_data['annotations'] = annotations
137
+ output_data['categories'] = categories
138
+ output_data['info'] = info
139
+
140
+
141
+ #%% Write output
142
+
143
+ with open(output_json,'w') as f:
144
+ json.dump(output_data,f,indent=2)
145
+
146
+
147
+ #%% Validate .json file
148
+
149
+ from megadetector.data_management.databases import integrity_check_json_db
150
+
151
+ options = integrity_check_json_db.IntegrityCheckOptions()
152
+ options.baseDir = os.path.join(base_folder,'images'); assert os.path.isdir(options.baseDir)
153
+ options.bCheckImageSizes = False
154
+ options.bCheckImageExistence = False
155
+ options.bFindUnusedImages = False
156
+
157
+ _, _, _ = integrity_check_json_db.integrity_check_json_db(output_json, options)
158
+
159
+
160
+ #%% Preview labels
161
+
162
+ from megadetector.visualization import visualize_db
163
+
164
+ viz_options = visualize_db.DbVizOptions()
165
+ viz_options.num_to_visualize = 100
166
+ viz_options.trim_to_images_with_bboxes = False
167
+ viz_options.add_search_links = False
168
+ viz_options.sort_by_filename = False
169
+ viz_options.parallelize_rendering = True
170
+ viz_options.include_filename_links = True
171
+
172
+ # viz_options.classes_to_exclude = ['test']
173
+ html_output_file, _ = visualize_db.visualize_db(db_path=output_json,
174
+ output_dir=os.path.join(
175
+ base_folder,'preview'),
176
+ image_base_dir=os.path.join(base_folder,'images'),
177
+ options=viz_options)
178
+ os.startfile(html_output_file)
179
+
180
+
181
+ #%% ############ Take two, from pre-iWildCam .json files created from IDFG .csv files ############
182
+
183
+ #%% Imports and constants
184
+
185
+ import json
186
+ import os
187
+
188
+ base_folder = r'h:\idaho-camera-traps'
189
+ input_json_sl = os.path.join(base_folder,'iWildCam_IDFG.json')
190
+ input_json = os.path.join(base_folder,'iWildCam_IDFG_ml.json')
191
+ output_json = os.path.join(base_folder,'idaho_camera_traps.json')
192
+ remote_image_base_dir = r'z:\idfg'
193
+
194
+ assert os.path.isfile(input_json)
195
+
196
+
197
+ #%% One-time line break addition
198
+
199
+ if not os.path.isfile(input_json):
200
+
201
+ sl_json = input_json_sl
202
+ ml_json = input_json
203
+
204
+ with open(sl_json,'r') as f:
205
+ d = json.load(f)
206
+ with open(ml_json,'w') as f:
207
+ json.dump(d,f,indent=2)
208
+
209
+
210
+ #%% Read input files
211
+
212
+ with open(input_json,'r') as f:
213
+ input_data = json.load(f)
214
+
215
+ print('Read {} annotations for {} images'.format(len(input_data['annotations']),len(input_data['images'])))
216
+
217
+ n_images = len(input_data['images'])
218
+
219
+
220
+ #%% Prepare info
221
+
222
+ info = {}
223
+ info['contributor'] = 'Images acquired by the Idaho Department of Fish and Game, dataset curated by Sara Beery'
224
+ info['description'] = 'Idaho Camera traps'
225
+ info['version'] = '2021.07.19'
226
+
227
+
228
+ #%% Minor adjustments to categories
229
+
230
+ input_categories = input_data['categories']
231
+ output_categories = []
232
+
233
+ for c in input_categories:
234
+ category_name = c['name']
235
+ category_id = c['id']
236
+ if category_name == 'prong':
237
+ category_name = 'pronghorn'
238
+ category_name = category_name.lower()
239
+ output_categories.append({'name':category_name,'id':category_id})
240
+
241
+
242
+ #%% Minor adjustments to annotations
243
+
244
+ for ann in input_data['annotations']:
245
+ ann['id'] = str(ann['id'])
246
+
247
+
248
+ #%% Create output
249
+
250
+ output_data = {}
251
+ output_data['images'] = input_data['images']
252
+ output_data['annotations'] = input_data['annotations']
253
+ output_data['categories'] = output_categories
254
+ output_data['info'] = info
255
+
256
+
257
+ #%% Write output
258
+
259
+ with open(output_json,'w') as f:
260
+ json.dump(output_data,f,indent=2)
261
+
262
+
263
+ #%% Validate .json file
264
+
265
+ from megadetector.data_management.databases import integrity_check_json_db
266
+
267
+ options = integrity_check_json_db.IntegrityCheckOptions()
268
+ options.baseDir = remote_image_base_dir
269
+ options.bCheckImageSizes = False
270
+ options.bCheckImageExistence = False
271
+ options.bFindUnusedImages = False
272
+
273
+ _, _, _ = integrity_check_json_db.integrity_check_json_db(output_json, options)
274
+
275
+
276
+ #%% Preview labels
277
+
278
+ from megadetector.visualization import visualize_db
279
+
280
+ viz_options = visualize_db.DbVizOptions()
281
+ viz_options.num_to_visualize = 100
282
+ viz_options.trim_to_images_with_bboxes = False
283
+ viz_options.add_search_links = False
284
+ viz_options.sort_by_filename = False
285
+ viz_options.parallelize_rendering = True
286
+ viz_options.include_filename_links = True
287
+
288
+ # viz_options.classes_to_exclude = ['test']
289
+ html_output_file, _ = visualize_db.visualize_db(db_path=output_json,
290
+ output_dir=os.path.join(
291
+ base_folder,'preview'),
292
+ image_base_dir=remote_image_base_dir,
293
+ options=viz_options)
294
+ os.startfile(html_output_file)
@@ -0,0 +1,150 @@
1
+ """
2
+
3
+ jb_csv_to_json.py
4
+
5
+ Convert a particular .csv file to CCT format. Images were not available at
6
+ the time I wrote this script, so this is much shorter than other scripts
7
+ in this folder.
8
+
9
+ """
10
+
11
+ #%% Constants and environment
12
+
13
+ import pandas as pd
14
+ import uuid
15
+ import json
16
+
17
+ input_metadata_file = r'd:\temp\pre_bounding_box.csv'
18
+ output_file = r'd:\temp\pre_bounding_box.json'
19
+ filename_col = 'filename'
20
+ label_col = 'category'
21
+
22
+
23
+ #%% Read source data
24
+
25
+ input_metadata = pd.read_csv(input_metadata_file)
26
+
27
+ print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
28
+ len(input_metadata)))
29
+
30
+
31
+ #%% Confirm filename uniqueness (this data set has one label per image)
32
+
33
+ imageFilenames = input_metadata[filename_col]
34
+
35
+ duplicateRows = []
36
+ filenamesToRows = {}
37
+
38
+ # Build up a map from filenames to a list of rows, checking image existence as we go
39
+ for iFile,fn in enumerate(imageFilenames):
40
+
41
+ if (fn in filenamesToRows):
42
+ duplicateRows.append(iFile)
43
+ filenamesToRows[fn].append(iFile)
44
+ else:
45
+ filenamesToRows[fn] = [iFile]
46
+
47
+ assert(len(duplicateRows) == 0)
48
+
49
+
50
+ #%% Create CCT dictionaries
51
+
52
+ images = []
53
+ annotations = []
54
+
55
+ # Map categories to integer IDs (that's what COCO likes)
56
+ nextCategoryID = 1
57
+ categories = []
58
+ categoryNamesToCategories = {}
59
+
60
+ cat = {}
61
+ cat['name'] = 'empty'
62
+ cat['id'] = 0
63
+ categories.append(cat)
64
+ categoryNamesToCategories['empty'] = cat
65
+
66
+ # For each image
67
+ #
68
+ # Because in practice images are 1:1 with annotations in this data set,
69
+ # this is also a loop over annotations.
70
+
71
+ # imageName = imageFilenames[0]
72
+ for imageName in imageFilenames:
73
+
74
+ rows = filenamesToRows[imageName]
75
+
76
+ # As per above, this is convenient and appears to be true; asserting to be safe
77
+ assert(len(rows) == 1)
78
+ iRow = rows[0]
79
+
80
+ row = input_metadata.iloc[iRow]
81
+
82
+ im = {}
83
+ # Filenames look like "290716114012001a1116.jpg"
84
+ im['id'] = imageName.split('.')[0]
85
+ im['file_name'] = imageName
86
+ im['seq_id'] = '-1'
87
+
88
+ images.append(im)
89
+
90
+ categoryName = row[label_col].lower()
91
+
92
+ # Have we seen this category before?
93
+ if categoryName in categoryNamesToCategories:
94
+ categoryID = categoryNamesToCategories[categoryName]['id']
95
+ else:
96
+ cat = {}
97
+ categoryID = nextCategoryID
98
+ cat['name'] = categoryName
99
+ cat['id'] = nextCategoryID
100
+ categories.append(cat)
101
+ categoryNamesToCategories[categoryName] = cat
102
+ nextCategoryID += 1
103
+
104
+ # Create an annotation
105
+ ann = {}
106
+
107
+ # The Internet tells me this guarantees uniqueness to a reasonable extent, even
108
+ # beyond the sheer improbability of collisions.
109
+ ann['id'] = str(uuid.uuid1())
110
+ ann['image_id'] = im['id']
111
+ ann['category_id'] = categoryID
112
+
113
+ annotations.append(ann)
114
+
115
+ # ...for each image
116
+
117
+ print('Finished creating dictionaries')
118
+
119
+
120
+ #%% Create info struct
121
+
122
+ info = {}
123
+ info['year'] = 2019
124
+ info['version'] = 1
125
+ info['description'] = 'COCO style database'
126
+ info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
127
+ info['contributor'] = ''
128
+
129
+
130
+ #%% Write output
131
+
132
+ json_data = {}
133
+ json_data['images'] = images
134
+ json_data['annotations'] = annotations
135
+ json_data['categories'] = categories
136
+ json_data['info'] = info
137
+ json.dump(json_data, open(output_file,'w'), indent=4)
138
+
139
+ print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
140
+ len(images),len(annotations),len(categories)))
141
+
142
+
143
+ #%% Validate
144
+
145
+ from megadetector.data_management.databases import integrity_check_json_db
146
+
147
+ options = integrity_check_json_db.IntegrityCheckOptions()
148
+ sortedCategories,data = integrity_check_json_db.integrity_check_json_db(output_file, options)
149
+
150
+
@@ -0,0 +1,250 @@
1
+ """
2
+
3
+ mcgill_to_json.py
4
+
5
+ Convert the .csv file provided for the McGill test data set to a
6
+ COCO-camera-traps .json file
7
+
8
+ """
9
+
10
+ #%% Constants and environment
11
+
12
+ import pandas as pd
13
+ import os
14
+ import glob
15
+ import json
16
+ import uuid
17
+ import time
18
+ import ntpath
19
+ import humanfriendly
20
+ import PIL
21
+ import math
22
+
23
+ baseDir = r'D:\wildlife_data\mcgill_test'
24
+ input_metadata_file = os.path.join(baseDir, 'dan_500_photos_metadata.csv')
25
+ output_file = os.path.join(baseDir, 'mcgill_test.json')
26
+ image_directory = baseDir
27
+
28
+ assert(os.path.isdir(image_directory))
29
+ assert(os.path.isfile(input_metadata_file))
30
+
31
+
32
+ #%% Read source data
33
+
34
+ input_metadata = pd.read_csv(input_metadata_file)
35
+
36
+ print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
37
+ len(input_metadata)))
38
+
39
+
40
+ #%% Map filenames to rows, verify image existence
41
+
42
+ # Create an additional column for concatenated filenames
43
+ input_metadata['relative_path'] = ''
44
+ input_metadata['full_path'] = ''
45
+
46
+ startTime = time.time()
47
+
48
+ # Maps relative filenames to rows
49
+ filenamesToRows = {}
50
+
51
+ duplicateRows = []
52
+
53
+ # Build up a map from filenames to a list of rows, checking image existence as we go
54
+ # row = input_metadata.iloc[0]
55
+ for iFile,row in input_metadata.iterrows():
56
+
57
+ relativePath = os.path.join(row['site'],row['date_range'],str(row['camera']),
58
+ str(row['folder']),row['filename'])
59
+ fullPath = os.path.join(baseDir,relativePath)
60
+
61
+ if (relativePath in filenamesToRows):
62
+ duplicateRows.append(iFile)
63
+ filenamesToRows[relativePath].append(iFile)
64
+ else:
65
+ filenamesToRows[relativePath] = [iFile]
66
+ assert(os.path.isfile(fullPath))
67
+
68
+ row['relative_path'] = relativePath
69
+ row['full_path'] = fullPath
70
+
71
+ input_metadata.iloc[iFile] = row
72
+
73
+ elapsed = time.time() - startTime
74
+ print('Finished verifying image existence in {}, found {} filenames with multiple labels'.format(
75
+ humanfriendly.format_timespan(elapsed),len(duplicateRows)))
76
+
77
+ # I didn't expect this to be true a priori, but it appears to be true, and
78
+ # it saves us the trouble of checking consistency across multiple occurrences
79
+ # of an image.
80
+ assert(len(duplicateRows) == 0)
81
+
82
+
83
+ #%% Check for images that aren't included in the metadata file
84
+
85
+ # Enumerate all images
86
+ imageFullPaths = glob.glob(os.path.join(image_directory,'**/*.JPG'), recursive=True)
87
+
88
+ for iImage,imagePath in enumerate(imageFullPaths):
89
+
90
+ imageRelPath = ntpath.relpath(imagePath, image_directory)
91
+ assert(imageRelPath in filenamesToRows)
92
+
93
+ print('Finished checking {} images to make sure they\'re in the metadata'.format(
94
+ len(imageFullPaths)))
95
+
96
+
97
+ #%% Create CCT dictionaries
98
+
99
+ # Also gets image sizes, so this takes ~6 minutes
100
+ #
101
+ # Implicitly checks images for overt corruptness, i.e. by not crashing.
102
+
103
+ images = []
104
+ annotations = []
105
+ categories = []
106
+
107
+ emptyCategory = {}
108
+ emptyCategory['id'] = 0
109
+ emptyCategory['name'] = 'empty'
110
+ emptyCategory['latin'] = 'empty'
111
+ emptyCategory['count'] = 0
112
+ categories.append(emptyCategory)
113
+
114
+ # Map categories to integer IDs (that's what COCO likes)
115
+ nextCategoryID = 1
116
+ labelToCategory = {'empty':emptyCategory}
117
+
118
+ # For each image
119
+ #
120
+ # Because in practice images are 1:1 with annotations in this data set,
121
+ # this is also a loop over annotations.
122
+
123
+ startTime = time.time()
124
+
125
+ # row = input_metadata.iloc[0]
126
+ for iFile,row in input_metadata.iterrows():
127
+
128
+ relPath = row['relative_path'].replace('\\','/')
129
+ im = {}
130
+ # Filenames look like "290716114012001a1116.jpg"
131
+ im['id'] = relPath.replace('/','_').replace(' ','_')
132
+
133
+ im['file_name'] = relPath
134
+
135
+ im['seq_id'] = -1
136
+ im['frame_num'] = -1
137
+
138
+ # In the form "001a"
139
+ im['site']= row['site']
140
+
141
+ # Can be in the form '111' or 's46'
142
+ im['camera'] = row['camera']
143
+
144
+ # In the form "7/29/2016 11:40"
145
+ im['datetime'] = row['timestamp']
146
+
147
+ otherFields = ['motion','temp_F','n_present','n_waterhole','n_contact','notes']
148
+
149
+ for s in otherFields:
150
+ im[s] = row[s]
151
+
152
+ # Check image height and width
153
+ fullPath = row['full_path']
154
+ assert(os.path.isfile(fullPath))
155
+ pilImage = PIL.Image.open(fullPath)
156
+ width, height = pilImage.size
157
+ im['width'] = width
158
+ im['height'] = height
159
+
160
+ images.append(im)
161
+
162
+ label = row['species']
163
+ if not isinstance(label,str):
164
+ # NaN is the only thing we should see that's not a string
165
+ assert math.isnan(label)
166
+ label = 'empty'
167
+ else:
168
+ label = label.lower()
169
+
170
+ latin = row['binomial']
171
+ if not isinstance(latin,str):
172
+ # NaN is the only thing we should see that's not a string
173
+ assert math.isnan(latin)
174
+ latin = 'empty'
175
+ else:
176
+ latin = latin.lower()
177
+
178
+ if label == 'empty':
179
+ if latin != 'empty':
180
+ latin = 'empty'
181
+
182
+ if label == 'unknown':
183
+ if latin != 'unknown':
184
+ latin = 'unknown'
185
+
186
+ if label not in labelToCategory:
187
+ print('Adding category {} ({})'.format(label,latin))
188
+ category = {}
189
+ categoryID = nextCategoryID
190
+ category['id'] = categoryID
191
+ nextCategoryID += 1
192
+ category['name'] = label
193
+ category['latin'] = latin
194
+ category['count'] = 1
195
+ labelToCategory[label] = category
196
+ categories.append(category)
197
+ else:
198
+ category = labelToCategory[label]
199
+ category['count'] = category['count'] + 1
200
+ categoryID = category['id']
201
+
202
+ # Create an annotation
203
+ ann = {}
204
+
205
+ # The Internet tells me this guarantees uniqueness to a reasonable extent, even
206
+ # beyond the sheer improbability of collisions.
207
+ ann['id'] = str(uuid.uuid1())
208
+ ann['image_id'] = im['id']
209
+ ann['category_id'] = categoryID
210
+
211
+ annotations.append(ann)
212
+
213
+ # ...for each image
214
+
215
+ # Convert categories to a CCT-style dictionary
216
+
217
+
218
+ for category in categories:
219
+ print('Category {}, count {}'.format(category['name'],category['count']))
220
+
221
+ elapsed = time.time() - startTime
222
+ print('Finished creating CCT dictionaries in {}'.format(
223
+ humanfriendly.format_timespan(elapsed)))
224
+
225
+
226
+ #%% Create info struct
227
+
228
+ info = {}
229
+ info['year'] = 2019
230
+ info['version'] = 1
231
+ info['description'] = 'COCO style database'
232
+ info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
233
+ info['contributor'] = 'McGill University'
234
+
235
+
236
+ #%% Write output
237
+
238
+ json_data = {}
239
+ json_data['images'] = images
240
+ json_data['annotations'] = annotations
241
+ json_data['categories'] = categories
242
+ json_data['info'] = info
243
+ json.dump(json_data, open(output_file,'w'), indent=4)
244
+
245
+ print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
246
+ len(images),len(annotations),len(categories)))
247
+
248
+
249
+
250
+