megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.9.dist-info/RECORD +0 -224
  214. megadetector-5.0.9.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,507 +0,0 @@
1
- """
2
-
3
- umn_to_json.py
4
-
5
- Prepare images and metadata for the Orinoquía Camera Traps dataset.
6
-
7
- """
8
-
9
- #%% Imports and constants
10
-
11
- import os
12
- import json
13
- import pandas as pd
14
- import shutil
15
- import uuid
16
- import datetime
17
- import dateutil.parser
18
-
19
- from collections import defaultdict
20
- from tqdm import tqdm
21
- from pathlib import Path
22
- from multiprocessing.pool import ThreadPool
23
-
24
- input_base = "f:\\"
25
- image_base = os.path.join(input_base,'2021.11.24-images\jan2020')
26
- ground_truth_file = os.path.join(input_base,'images_hv_jan2020_reviewed_force_nonblank.csv')
27
-
28
- # For two deployments, we're only processing imagse in the "detections" subfolder
29
- detection_only_deployments = ['N23','N32']
30
- deployments_to_ignore = ['N18','N28']
31
-
32
- MISSING_COMMON_NAME_TOKEN = 'MISSING'
33
-
34
- assert os.path.isfile(ground_truth_file)
35
- assert os.path.isdir(image_base)
36
-
37
-
38
- #%% Enumerate deployment folders
39
-
40
- deployment_folders = os.listdir(image_base)
41
- deployment_folders = [s for s in deployment_folders if os.path.isdir(os.path.join(image_base,s))]
42
- deployment_folders = set(deployment_folders)
43
- print('Listed {} deployment folders'.format(len(deployment_folders)))
44
-
45
-
46
- #%% Load ground truth
47
-
48
- ground_truth_df = pd.read_csv(ground_truth_file)
49
-
50
- print('Loaded {} ground truth annotations'.format(
51
- len(ground_truth_df)))
52
-
53
- # i_row = 0; row = ground_truth_df.iloc[i_row]
54
- for i_row,row in tqdm(ground_truth_df.iterrows()):
55
- if not isinstance(row['common_name'],str):
56
- print('Warning: missing common name for {}'.format(row['filename']))
57
- row['common_name'] = MISSING_COMMON_NAME_TOKEN
58
-
59
-
60
- #%% Create relative paths for ground truth data
61
-
62
- # Some deployment folders have no subfolders, e.g. this is a valid file name:
63
- #
64
- # M00/01010132.JPG
65
- #
66
- # But some deployment folders have subfolders, e.g. this is also a valid file name:
67
- #
68
- # N17/100EK113/07160020.JPG
69
- #
70
- # So we can't find files by just concatenating folder and file names, we have to enumerate and explicitly
71
- # map what will appear in the ground truth as "folder/filename" to complete relative paths.
72
-
73
- deployment_name_to_file_mappings = {}
74
-
75
- n_filenames_ignored = 0
76
- n_deployments_ignored = 0
77
-
78
- # deployment_name = list(deployment_folders)[0]
79
- for deployment_name in tqdm(deployment_folders):
80
-
81
- file_mappings = {}
82
-
83
- if deployment_name in deployments_to_ignore:
84
- print('Ignoring deployment {}'.format(deployment_name))
85
- n_deployments_ignored += 1
86
- continue
87
-
88
- # Enumerate all files in this folder
89
- absolute_deployment_folder = os.path.join(image_base,deployment_name)
90
- assert os.path.isdir(absolute_deployment_folder)
91
-
92
- files = list(Path(absolute_deployment_folder).rglob('*'))
93
- files = [p for p in files if not p.is_dir()]
94
- files = [str(s) for s in files]
95
- files = [s.replace('\\','/') for s in files]
96
- # print('Enumerated {} files for deployment {}'.format(len(files),deployment_name))
97
-
98
- # filename = files[100]
99
- for filename in files:
100
-
101
- if deployment_name in detection_only_deployments and 'detection' not in filename:
102
- n_filenames_ignored += 1
103
- continue
104
-
105
- if '.DS_Store' in filename:
106
- n_filenames_ignored += 1
107
- continue
108
-
109
- relative_path = os.path.relpath(filename,absolute_deployment_folder).replace('\\','/')
110
- image_name = relative_path.split('/')[-1]
111
- assert image_name not in file_mappings, 'Redundant image name {} in deployment {}'.format(
112
- image_name,deployment_name)
113
- assert '\\' not in relative_path
114
- file_mappings[image_name] = relative_path
115
-
116
- # ...for each file in this deployment
117
-
118
- deployment_name_to_file_mappings[deployment_name] = file_mappings
119
-
120
- # ...for each deployment
121
-
122
- print('Processed deployments, ignored {} deployments and {} files'.format(
123
- n_deployments_ignored,n_filenames_ignored))
124
-
125
-
126
- #%% Add relative paths to our ground truth table
127
-
128
- ground_truth_df['relative_path'] = None
129
-
130
- # i_row = 0; row = ground_truth_df.iloc[i_row]
131
- for i_row,row in tqdm(ground_truth_df.iterrows(),total=len(ground_truth_df)):
132
-
133
- # row['filename'] looks like, e.g. A01/01080001.JPG. This is not actually a path, it's
134
- # just the deployment ID and the image name, separated by a slash.
135
-
136
- deployment_name = row['filename'].split('/')[0]
137
-
138
- assert deployment_name in deployment_folders, 'Could not find deployment folder {}'.format(deployment_name)
139
- assert deployment_name in deployment_name_to_file_mappings, 'Could not find deployment folder {}'.format(deployment_name)
140
-
141
- file_mappings = deployment_name_to_file_mappings[deployment_name]
142
-
143
- # Find the relative path for this image
144
- image_name = row['filename'].split('/')[-1]
145
- assert image_name in file_mappings, 'No mappings for image {} in deployment {}'.format(
146
- image_name,deployment_name)
147
- relative_path = os.path.join(deployment_name,file_mappings[image_name]).replace('\\','/')
148
-
149
- # Make sure this image file exists
150
- absolute_path = os.path.join(image_base,relative_path)
151
- assert os.path.isfile(absolute_path), 'Could not find file {}'.format(absolute_path)
152
-
153
- ground_truth_df.loc[i_row,'relative_path'] = relative_path
154
-
155
- # ...for each row in the ground truth table
156
-
157
-
158
- #%% Take everything out of Pandas
159
-
160
- ground_truth_dicts = ground_truth_df.to_dict('records')
161
-
162
-
163
- #%% Convert string timestamps to Python datetimes
164
-
165
- all_locations = set()
166
-
167
- # im = ground_truth_dicts[0]
168
- for im in tqdm(ground_truth_dicts):
169
- dt = dateutil.parser.isoparse(im['timestamp'])
170
- assert dt.year == 2020
171
- im['datetime'] = dt
172
-
173
- # Filenames look like, e.g., N36/100EK113/06040726.JPG
174
- im['location'] = im['relative_path'].split('/')[0]
175
- assert len(im['location']) == 3
176
- all_locations.add(im['location'])
177
-
178
-
179
- #%% Synthesize sequence information
180
-
181
- locations = all_locations
182
- print('Found {} locations'.format(len(locations)))
183
-
184
- locations = list(locations)
185
-
186
- sequences = set()
187
- sequence_to_images = defaultdict(list)
188
- images = ground_truth_dicts
189
- max_seconds_within_sequence = 10
190
-
191
- # Sort images by time within each location
192
- # i_location=0; location = locations[i_location]
193
- for i_location,location in tqdm(enumerate(locations)):
194
-
195
- images_this_location = [im for im in images if im['location'] == location]
196
- sorted_images_this_location = sorted(images_this_location, key = lambda im: im['datetime'])
197
-
198
- current_sequence_id = None
199
- next_frame_number = 0
200
- previous_datetime = None
201
-
202
- # previous_datetime = sorted_images_this_location[0]['datetime']
203
- # im = sorted_images_this_camera[1]
204
- for i_image,im in enumerate(sorted_images_this_location):
205
-
206
- # Timestamp for this image, may be None
207
- dt = im['datetime']
208
-
209
- # Start a new sequence if:
210
- #
211
- # * This image has no timestamp
212
- # * This image has a frame number of zero
213
- # * We have no previous image timestamp
214
- #
215
- if dt is None:
216
- delta = None
217
- elif previous_datetime is None:
218
- delta = None
219
- else:
220
- assert isinstance(dt,datetime.datetime)
221
- delta = (dt - previous_datetime).total_seconds()
222
-
223
- # Start a new sequence if necessary
224
- if delta is None or delta > max_seconds_within_sequence:
225
- next_frame_number = 0
226
- current_sequence_id = str(uuid.uuid1())
227
- sequences.add(current_sequence_id)
228
- assert current_sequence_id is not None
229
-
230
- im['seq_id'] = current_sequence_id
231
- im['synthetic_frame_number'] = next_frame_number
232
- next_frame_number = next_frame_number + 1
233
- previous_datetime = dt
234
- sequence_to_images[im['seq_id']].append(im)
235
-
236
- # ...for each image in this location
237
-
238
- # ...for each location
239
-
240
-
241
- #%% Create category dict and category IDs
242
-
243
- categories_to_counts = defaultdict(int)
244
- category_mappings = {'blank':'empty',
245
- 'mammal':'unknown_mammal',
246
- 'dasypus_species':'unknown_armadillo',
247
- 'bird':'unknown_bird',
248
- 'bos_species':'cattle',
249
- 'possum_family':'unknown_possum',
250
- 'cervidae_family':'unknown_cervid',
251
- 'unknown_species':'unknown',
252
- 'lizards_and_snakes':'unknown_reptile',
253
- 'caprimulgidae_family':'unknown_nightjar',
254
- 'turtle_order':'unknown_turtle',
255
- 'ornate_tití_monkey':'ornate_titi_monkey',
256
- 'saimiri_species':'unknown_squirrel_monkey',
257
- 'peccary_family':'unknown_peccary',
258
- 'pecari_species':'unknown_peccary',
259
- 'alouatta_species':'unknown_howler_monkey',
260
- 'human-camera_trapper':'human',
261
- 'weasel_family':'unknown_weasel',
262
- 'motorcycle':'human',
263
- 'eira_species':'unknown_tayra',
264
- 'sapajus_species':'unknown_capuchin_monkey',
265
- 'red_brocket':'red_brocket_deer'
266
- }
267
-
268
- for c in category_mappings.values():
269
- assert ' ' not in c
270
-
271
- # im = images[0]
272
- for im in tqdm(images):
273
-
274
- category_name = im['common_name'].lower().replace("'",'').replace(' ','_')
275
- if category_name in category_mappings:
276
- category_name = category_mappings[category_name]
277
- categories_to_counts[category_name] += 1
278
- im['category_name'] = category_name
279
-
280
-
281
- categories_to_counts_sorted = {k: v for k, v in sorted(categories_to_counts.items(),
282
- key=lambda item: item[1],reverse=True)}
283
-
284
- for s in categories_to_counts_sorted.keys():
285
- print('{}: {}'.format(s,categories_to_counts_sorted[s]))
286
-
287
-
288
- #%% Imports and constants (.json generation)
289
-
290
- import os
291
- import uuid
292
- import datetime
293
- from tqdm import tqdm
294
-
295
- from data_management.databases import integrity_check_json_db
296
-
297
- output_base = 'f:\orinoquia_camera_traps'
298
- output_image_base = os.path.join(output_base,'images')
299
- os.makedirs(output_image_base,exist_ok=True)
300
-
301
- output_json_filename = os.path.join(output_base, 'orinoquia_camera_traps.json')
302
- output_encoding = 'utf-8'
303
- read_image_sizes = False
304
-
305
- info = {}
306
- info['year'] = 2020
307
- info['version'] = '1.0'
308
- info['description'] = 'Orinoquia Camera Traps'
309
- info['contributor'] = 'University of Minnesota'
310
- info['date_created'] = str(datetime.date.today())
311
-
312
-
313
- #%% Count frames in each sequence
314
-
315
- sequence_id_to_n_frames = defaultdict(int)
316
-
317
- for im in tqdm(images):
318
- seq_id = im['seq_id']
319
- sequence_id_to_n_frames[seq_id] = sequence_id_to_n_frames[seq_id] + 1
320
-
321
- for im in tqdm(images):
322
- seq_id = im['seq_id']
323
- im['seq_num_frames'] = sequence_id_to_n_frames[seq_id]
324
-
325
-
326
- #%% Double check images with multiple annotations
327
-
328
- filename_to_images = defaultdict(list)
329
-
330
- # im = images[0]
331
- for im in tqdm(images):
332
- fn = im['relative_path']
333
- filename_to_images[fn].append(im)
334
-
335
- filenames_with_multiple_annotations = [fn for fn in filename_to_images.keys() if len(filename_to_images[fn]) > 1]
336
-
337
- print('Found {} filenames with multiple annotations'.format(len(filenames_with_multiple_annotations)))
338
-
339
- for fn in filenames_with_multiple_annotations:
340
- images_this_file = filename_to_images[fn]
341
- print(fn + ': ')
342
- for im in images_this_file:
343
- print(im['category_name'])
344
- print('')
345
-
346
-
347
- #%% Assemble dictionaries
348
-
349
- images_out = []
350
- image_id_to_image = {}
351
- annotations = []
352
- categories = []
353
-
354
- category_name_to_category = {}
355
- category_id_to_category = {}
356
-
357
- # Force the empty category to be ID 0
358
- empty_category = {}
359
- empty_category['name'] = 'empty'
360
- empty_category['id'] = 0
361
- empty_category['count'] = 0
362
-
363
- category_id_to_category[0] = empty_category
364
- category_name_to_category['empty'] = empty_category
365
- categories.append(empty_category)
366
- next_id = 1
367
-
368
- # input_im = images[0]
369
- for input_im in tqdm(images):
370
-
371
- category_name = input_im['category_name'].lower().strip()
372
-
373
- if category_name not in category_name_to_category:
374
-
375
- category_id = next_id
376
- next_id += 1
377
- category = {}
378
- category['id'] = category_id
379
- category['name'] = category_name
380
- category['count'] = 0
381
- categories.append(category)
382
- category_name_to_category[category_name] = category
383
- category_id_to_category[category_id] = category
384
-
385
- else:
386
-
387
- category = category_name_to_category[category_name]
388
-
389
- category_id = category['id']
390
- category['count'] += 1
391
-
392
- im = {}
393
- im['id'] = input_im['relative_path'].replace('/','_')
394
- im['datetime'] = str(input_im['datetime'])
395
- im['file_name'] = input_im['relative_path']
396
- im['seq_id'] = input_im['seq_id']
397
- im['frame_num'] = input_im['synthetic_frame_number']
398
- im['seq_num_frames'] = input_im['seq_num_frames']
399
- im['location'] = input_im['location']
400
-
401
- if im['id'] in image_id_to_image:
402
- print('Warning: image ID {} ({}) has multiple annotations'.format(im['id'],im['id'].replace('_','/')))
403
- else:
404
- image_id_to_image[im['id']] = im
405
- images_out.append(im)
406
-
407
- ann = {}
408
-
409
- ann['id'] = str(uuid.uuid1())
410
- ann['image_id'] = im['id']
411
- ann['category_id'] = category_id
412
- ann['sequence_level_annotation'] = False
413
- annotations.append(ann)
414
-
415
- # ...for each image
416
-
417
-
418
- #%% Write output .json
419
-
420
- data = {}
421
- data['info'] = info
422
- data['images'] = images_out
423
- data['annotations'] = annotations
424
- data['categories'] = categories
425
-
426
- with open(output_json_filename, 'w') as f:
427
- json.dump(data, f, indent=1)
428
-
429
- print('Finished writing json to {}'.format(output_json_filename))
430
-
431
-
432
- #%% Validate .json file
433
-
434
- options = integrity_check_json_db.IntegrityCheckOptions()
435
- options.baseDir = output_base
436
- options.bCheckImageSizes = False
437
- options.bCheckImageExistence = False
438
- options.bFindUnusedImages = False
439
-
440
- _, _, _ = integrity_check_json_db.integrity_check_json_db(output_json_filename, options)
441
-
442
-
443
- #%% Map relative paths to annotation categories
444
-
445
- category_id_to_category_names = {c['id']:c['name'] for c in data['categories']}
446
- image_id_to_category_names = defaultdict(list)
447
-
448
- # ann = data['annotations'][0]
449
- for ann in data['annotations']:
450
- category_name = category_id_to_category_names[ann['category_id']]
451
- image_id_to_category_names[ann['image_id']].append(category_name)
452
-
453
-
454
- #%% Copy images to output
455
-
456
- # EXCLUDE HUMAN AND MISSING
457
-
458
- # im = data['images'][0]
459
- def copy_image(im):
460
-
461
- image_id = im['id']
462
- category_names_this_image = image_id_to_category_names[image_id]
463
- assert len(category_names_this_image) > 0
464
- if ('human' in category_names_this_image) or ('missing' in category_names_this_image):
465
- prefix = 'private'
466
- else:
467
- prefix = 'public'
468
- input_fn_absolute = os.path.join(image_base,im['file_name'])
469
- output_fn_absolute = os.path.join(output_image_base,prefix,im['file_name'])
470
- dirname = os.path.dirname(output_fn_absolute)
471
- os.makedirs(dirname,exist_ok=True)
472
- shutil.copy(input_fn_absolute,output_fn_absolute)
473
-
474
- n_threads = 10
475
-
476
- # im = images[0]
477
- if n_threads == 1:
478
- for im in tqdm(data['images']):
479
- copy_image(im)
480
- else:
481
- pool = ThreadPool(n_threads)
482
- with tqdm(total=len(data['images'])) as pbar:
483
- for i,_ in enumerate(pool.imap_unordered(copy_image,data['images'])):
484
- pbar.update()
485
-
486
-
487
- #%% Preview labels
488
-
489
- from md_visualization import visualize_db
490
-
491
- viz_options = visualize_db.DbVizOptions()
492
- viz_options.num_to_visualize = 100
493
- viz_options.trim_to_images_with_bboxes = False
494
- viz_options.add_search_links = False
495
- viz_options.sort_by_filename = False
496
- viz_options.parallelize_rendering = True
497
- viz_options.include_filename_links = True
498
-
499
- # viz_options.classes_to_exclude = ['test']
500
- html_output_file, _ = visualize_db.visualize_db(db_path=output_json_filename,
501
- output_dir=os.path.join(
502
- output_base,'preview'),
503
- image_base_dir=os.path.join(output_image_base,'public'),
504
- options=viz_options)
505
- os.startfile(html_output_file)
506
-
507
-