megadetector 5.0.11__py3-none-any.whl → 5.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (203) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +97 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +149 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +88 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +263 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +607 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +237 -0
  58. megadetector/data_management/cct_json_utils.py +404 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +283 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +493 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +793 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +870 -0
  129. megadetector/data_management/read_exif.py +809 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/rename_images.py +187 -0
  133. megadetector/data_management/resize_coco_dataset.py +189 -0
  134. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  135. megadetector/data_management/yolo_output_to_md_output.py +446 -0
  136. megadetector/data_management/yolo_to_coco.py +676 -0
  137. megadetector/detection/__init__.py +0 -0
  138. megadetector/detection/detector_training/__init__.py +0 -0
  139. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  140. megadetector/detection/process_video.py +846 -0
  141. megadetector/detection/pytorch_detector.py +355 -0
  142. megadetector/detection/run_detector.py +779 -0
  143. megadetector/detection/run_detector_batch.py +1219 -0
  144. megadetector/detection/run_inference_with_yolov5_val.py +1087 -0
  145. megadetector/detection/run_tiled_inference.py +934 -0
  146. megadetector/detection/tf_detector.py +192 -0
  147. megadetector/detection/video_utils.py +698 -0
  148. megadetector/postprocessing/__init__.py +0 -0
  149. megadetector/postprocessing/add_max_conf.py +64 -0
  150. megadetector/postprocessing/categorize_detections_by_size.py +165 -0
  151. megadetector/postprocessing/classification_postprocessing.py +716 -0
  152. megadetector/postprocessing/combine_api_outputs.py +249 -0
  153. megadetector/postprocessing/compare_batch_results.py +966 -0
  154. megadetector/postprocessing/convert_output_format.py +396 -0
  155. megadetector/postprocessing/load_api_results.py +195 -0
  156. megadetector/postprocessing/md_to_coco.py +310 -0
  157. megadetector/postprocessing/md_to_labelme.py +330 -0
  158. megadetector/postprocessing/merge_detections.py +412 -0
  159. megadetector/postprocessing/postprocess_batch_results.py +1908 -0
  160. megadetector/postprocessing/remap_detection_categories.py +170 -0
  161. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  162. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  163. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  164. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1635 -0
  165. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  166. megadetector/postprocessing/subset_json_detector_output.py +700 -0
  167. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  168. megadetector/taxonomy_mapping/__init__.py +0 -0
  169. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  170. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  171. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  172. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +588 -0
  173. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  174. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  175. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  176. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  177. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  178. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  179. megadetector/utils/__init__.py +0 -0
  180. megadetector/utils/azure_utils.py +178 -0
  181. megadetector/utils/ct_utils.py +613 -0
  182. megadetector/utils/directory_listing.py +246 -0
  183. megadetector/utils/md_tests.py +1164 -0
  184. megadetector/utils/path_utils.py +1045 -0
  185. megadetector/utils/process_utils.py +160 -0
  186. megadetector/utils/sas_blob_utils.py +509 -0
  187. megadetector/utils/split_locations_into_train_val.py +228 -0
  188. megadetector/utils/string_utils.py +92 -0
  189. megadetector/utils/url_utils.py +323 -0
  190. megadetector/utils/write_html_image_list.py +225 -0
  191. megadetector/visualization/__init__.py +0 -0
  192. megadetector/visualization/plot_utils.py +293 -0
  193. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  194. megadetector/visualization/visualization_utils.py +1536 -0
  195. megadetector/visualization/visualize_db.py +552 -0
  196. megadetector/visualization/visualize_detector_output.py +405 -0
  197. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/LICENSE +0 -0
  198. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/METADATA +2 -2
  199. megadetector-5.0.13.dist-info/RECORD +201 -0
  200. megadetector-5.0.13.dist-info/top_level.txt +1 -0
  201. megadetector-5.0.11.dist-info/RECORD +0 -5
  202. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  203. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,180 @@
1
+ '''
2
+ copy_and_unzip_emammal.py
3
+
4
+ Siyu Yang
5
+
6
+ Script to copy all deployments in the emammal container (mounted on the VM or not) to data
7
+ disk at /datadrive and unzip them, deleting the copied zip file.
8
+
9
+ Need to add exception handling.
10
+ '''
11
+
12
+ #%% Imports and constants
13
+
14
+ from datetime import datetime
15
+ import itertools
16
+ import json
17
+ import multiprocessing
18
+ from multiprocessing.dummy import Pool as ThreadPool # this functions like threading
19
+ import os
20
+ from shutil import copy, copyfile
21
+ from tqdm import tqdm
22
+ from typing import Optional
23
+ import zipfile
24
+
25
+ from azure.storage.blob import BlobServiceClient
26
+
27
+
28
+ # configurations and paths
29
+ log_folder = '/home/lynx/logs'
30
+ dest_folder = '/datadrive/emammal_robertlong' # data disk attached where data is stored
31
+ origin = 'cloud' # 'cloud' or 'mounted'
32
+
33
+
34
+ #%% Helper functions
35
+
36
+ def _copy_unzip(source_path, dest_folder):
37
+
38
+ try:
39
+ dest_subfolder = os.path.join(dest_folder, os.path.basename(source_path).split('.zip')[0])
40
+ if os.path.exists(dest_subfolder):
41
+ print('{} exists.'.format(dest_subfolder))
42
+ return('exists')
43
+
44
+ print('Copying...')
45
+ # dest_path = copy(source_path, dest_folder)
46
+ dest_path = os.path.join(dest_folder, os.path.basename(source_path))
47
+ copyfile(source_path, dest_path)
48
+
49
+ with zipfile.ZipFile(dest_path, 'r') as zip_ref:
50
+ zip_ref.extractall(dest_subfolder)
51
+
52
+ os.remove(dest_path)
53
+ print('{} copied and extracted'.format(dest_subfolder))
54
+ return None
55
+
56
+ except Exception:
57
+ try:
58
+ print('Retrying...')
59
+ dest_path = os.path.join(dest_folder, os.path.basename(source_path))
60
+ copyfile(source_path, dest_path)
61
+ with zipfile.ZipFile(dest_path, 'r') as zip_ref:
62
+ zip_ref.extractall(dest_subfolder)
63
+ os.remove(dest_path)
64
+ print('{} copied and extracted'.format(dest_subfolder))
65
+ return (None)
66
+ except Exception as e:
67
+ print('WARNING {} did not get copied. Exception: {}'.format(source_path, str(e)))
68
+ return source_path
69
+
70
+
71
+ def copy_from_mounted_container(source_folder, dest_folder):
72
+
73
+ sources = []
74
+
75
+ collections = sorted(os.listdir(source_folder))
76
+
77
+ for collection in collections:
78
+ collection_folder = os.path.join(source_folder, collection)
79
+ if not os.path.isdir(collection_folder):
80
+ continue
81
+
82
+ print('Processing collection {}'.format(collection))
83
+
84
+ for file in tqdm(sorted(os.listdir(collection_folder))):
85
+ source_path = os.path.join(collection_folder, file)
86
+ sources.append(source_path)
87
+
88
+ # num_workers = multiprocessing.cpu_count()
89
+ # pool = ThreadPool(num_workers)
90
+ # results = pool.starmap(_copy_unzip, zip(sources, itertools.repeat(dest_folder)))
91
+ #
92
+ # print('Waiting for processes to finish...')
93
+ # pool.close()
94
+ # pool.join()
95
+
96
+ # sequential
97
+ results = []
98
+ for source_path in sources:
99
+ result = _copy_unzip(source_path, dest_folder)
100
+ results.append(result)
101
+
102
+ cur_time = datetime.now().strftime('%Y%m%d-%H%M%S')
103
+ with open(os.path.join(log_folder, 'copy_unzip_results_{}.json'.format(cur_time)), 'w') as f:
104
+ json.dump(results, f)
105
+
106
+
107
+ def _download_unzip(blob_service: BlobServiceClient,
108
+ container: str,
109
+ blob_path: str,
110
+ dest_path: str) -> Optional[str]:
111
+ try:
112
+ with open(dest_path, 'wb') as f:
113
+ cc = blob_service.get_container_client(container)
114
+ print('Downloading...')
115
+ cc.download_blob(blob_path).readinto(f)
116
+
117
+ dest_subfolder = dest_path.split('.zip')[0]
118
+
119
+ with zipfile.ZipFile(dest_path, 'r') as zip_ref:
120
+ zip_ref.extractall(dest_subfolder)
121
+
122
+ os.remove(dest_path)
123
+ print('{} copied and extracted'.format(dest_subfolder))
124
+ return None
125
+
126
+ except Exception as e:
127
+ print('ERROR while downloading or unzipping {}. Exception: {}'.format(blob_path, str(e)))
128
+ return blob_path
129
+
130
+
131
+ def download_from_container(dest_folder: str,
132
+ blob_service: BlobServiceClient,
133
+ container: str = 'emammal',
134
+ desired_blob_prefix: str = '') -> None:
135
+ generator = blob_service.get_containre_client(container).list_blobs()
136
+ desired_blobs = [blob.name for blob in generator
137
+ if blob.name.startswith(desired_blob_prefix)]
138
+
139
+ print('desired_blobs', desired_blobs)
140
+
141
+ results = []
142
+ for blob_path in tqdm(desired_blobs):
143
+ blob_name = blob_path.split('/')[2]
144
+ print('blob_name', blob_name)
145
+ dest_path = os.path.join(dest_folder, blob_name)
146
+ print('dest_path', dest_path)
147
+ result = _download_unzip(blob_service, container, blob_path, dest_path)
148
+ results.append(result)
149
+
150
+ cur_time = datetime.now().strftime('%Y%m%d-%H%M%S')
151
+ with open(os.path.join(log_folder, 'download_unzip_results_{}.json'.format(cur_time)), 'w') as f:
152
+ json.dump(results, f)
153
+
154
+
155
+ #%% Command-line driver
156
+
157
+ if __name__ == '__main__':
158
+
159
+ if origin == 'cloud':
160
+ container = 'wpz'
161
+ desired_blob_prefix = 'emammal/0Robert Long/'
162
+
163
+ print('Start timing...')
164
+ start_time = datetime.now()
165
+
166
+ if origin == 'mounted':
167
+ # if the blob container is already mounted on the VM
168
+ source_folder = '/home/yasiyu/mnt/wildlifeblobssc/emammal' # blob container mounted
169
+ copy_from_mounted_container(source_folder, dest_folder)
170
+
171
+ elif origin == 'cloud':
172
+ # or you can download them using the storage Python SDK
173
+ # store storage account key in environment variable AZ_STORAGE_KEY
174
+ blob_service = BlobServiceClient(
175
+ account_url='wildlifeblobssc.blob.core.windows.net',
176
+ credential=os.environ["AZ_STORAGE_KEY"])
177
+ download_from_container(dest_folder, blob_service, container=container,
178
+ desired_blob_prefix=desired_blob_prefix)
179
+
180
+ print('Process took {}.'.format(datetime.now() - start_time))
@@ -0,0 +1,249 @@
1
+ #
2
+ # eMammal_helpers.py
3
+ #
4
+ # Support functions for processing eMammal metadata
5
+ #
6
+
7
+ #%% Constants and imports
8
+
9
+ from PIL import Image, ImageDraw
10
+ import matplotlib.pyplot as plt
11
+ import operator
12
+ from datetime import datetime
13
+
14
+
15
+ #%% Support functions
16
+
17
+ def clean_species_name(common_name):
18
+ """
19
+ Converts various forms of "human" to the token "human", and various forms
20
+ of "empty" to the token "empty"
21
+ """
22
+
23
+ _people_tags = {
24
+ 'Bicycle',
25
+ 'Calibration Photos',
26
+ 'Camera Trapper',
27
+ 'camera trappper',
28
+ 'camera trapper',
29
+ 'Homo sapien',
30
+ 'Homo sapiens',
31
+ 'Human, non staff',
32
+ 'Human, non-staff',
33
+ 'camera trappe',
34
+ 'Human non-staff',
35
+ 'Setup Pickup',
36
+ 'Vehicle'
37
+ }
38
+ PEOPLE_TAGS = {x.lower() for x in _people_tags}
39
+
40
+ _no_animal_tags = {'No Animal', 'no animal', 'Time Lapse', 'Camera Misfire', 'False trigger', 'Blank'}
41
+ NO_ANIMAL_TAGS = {x.lower() for x in _no_animal_tags}
42
+
43
+ common_name = common_name.lower().strip()
44
+ if common_name in PEOPLE_TAGS:
45
+ return 'human'
46
+
47
+ if common_name in NO_ANIMAL_TAGS:
48
+ return 'empty'
49
+
50
+ return common_name
51
+
52
+
53
+ def clean_frame_number(img_frame):
54
+
55
+ # pad to a total of 3 digits if < 1000, or 4 digits otherwise
56
+ # img_frame is a string from the xml tree
57
+ length = len(img_frame)
58
+
59
+ assert length > 0
60
+ assert length < 5
61
+
62
+ # length 4 frame order is returned as is, others are left padded to be 3 digit long
63
+ # we need to make sure img_frame has length 3 when it's < 1000 so we can match it to the iMerit labels
64
+ if length == 1:
65
+ return '00' + img_frame
66
+ elif length == 2:
67
+ return '0' + img_frame
68
+ else: # for '100' and '1000'
69
+ return img_frame
70
+
71
+
72
+ def clean_frame_number_4_digit(img_frame):
73
+
74
+ # pad to a total of 4 digits
75
+ # img_frame is a string from the xml tree
76
+ length = len(img_frame)
77
+
78
+ assert length > 0
79
+ assert length < 5
80
+
81
+ # length 4 frame order is returned as is, others are left padded to be 3 digit long
82
+ # we need to make sure img_frame has length 3 when it's < 1000 so we can match it to the iMerit labels
83
+ if length == 1:
84
+ return '000' + img_frame
85
+ elif length == 2:
86
+ return '00' + img_frame
87
+ elif length == 3:
88
+ return '0' + img_frame
89
+ else: # for'1000'
90
+ return img_frame
91
+
92
+
93
+ def get_img_size(img_path):
94
+ """
95
+ There are ways to size the image without loading it into memory by reading its headers
96
+ (https://github.com/scardine/image_size), but seems less reliable.
97
+
98
+ Returns (-1, -1) if PIL could not open the image
99
+ """
100
+
101
+ try:
102
+ im = Image.open(img_path)
103
+ width, height = im.size
104
+ except:
105
+ return (-1, -1)
106
+ return (width, height)
107
+
108
+
109
+ def get_total_from_distribution(d):
110
+
111
+ total = 0
112
+ for key, count in d.items():
113
+ total += int(key) * count
114
+ return total
115
+
116
+
117
+ def sort_dict_val_desc(d, percent=False):
118
+ """ Sort a dictionary by the values in descending order. Returns a list of tuples. """
119
+
120
+ sorted_d = sorted(d.items(), key=operator.itemgetter(1), reverse=True)
121
+
122
+ if percent:
123
+ with_percent = []
124
+ total = sum([t[1] for t in sorted_d])
125
+ for k, v in sorted_d:
126
+ p = '{:.1f}%'.format(100 * float(v) / total)
127
+ with_percent.append((k, v, p))
128
+ return with_percent
129
+
130
+ return sorted_d
131
+
132
+
133
+ def plot_distribution(d, title='', top=15):
134
+
135
+ if top is None or top > len(d):
136
+ top = len(d)
137
+
138
+ sorted_d = sort_dict_val_desc(d)
139
+
140
+ top_d = sorted_d[:top]
141
+ x = [t[0] for t in top_d]
142
+ y = [t[1] for t in top_d]
143
+
144
+ # others column
145
+ others_d = sorted_d[top:]
146
+ others_sum = sum([t[1] for t in others_d])
147
+ x.append('others')
148
+ y.append(others_sum)
149
+
150
+ plt.bar(range(len(x)), y, align='center', facecolor='#57BC90', edgecolor=None)
151
+ plt.xticks(range(len(x)), x, rotation=90)
152
+ plt.title(title)
153
+ plt.show()
154
+
155
+
156
+ def plot_histogram(l, title='', max_val=None, bins='auto'):
157
+
158
+ if max_val:
159
+ l = [x for x in l if x < max_val]
160
+
161
+ plt.hist(l, bins=bins, facecolor='#57BC90', edgecolor=None)
162
+ plt.title(title)
163
+ plt.show()
164
+
165
+
166
+ def draw_bboxes(image, bboxes, classes, thickness=4, show_label=False):
167
+ """
168
+ Draw bounding boxes on top of an image
169
+ Args:
170
+ image : Path to image or a loaded PIL image
171
+ bboxes: A list of bboxes to draw on the image, each bbox is [top left x, top left y, width, height] in relative coordinates
172
+ classes: A list of classes corresponding to the bboxes
173
+ thickness: Thickness of the line to draw, minimum is 1
174
+ Outputs:
175
+ Image object with the bboxes and class labels annotated
176
+ """
177
+ if type(image) is str:
178
+ img = Image.open(image)
179
+ else:
180
+ img = image.copy()
181
+
182
+ draw = ImageDraw.Draw(img)
183
+ img_width, img_height = img.size
184
+
185
+ for i in range(len(bboxes)):
186
+ x_rel, y_rel, w_rel, h_rel = bboxes[i]
187
+ x = x_rel * img_width # x and y are the top left
188
+ y = y_rel * img_height
189
+ w = w_rel * img_width
190
+ h = h_rel * img_height
191
+
192
+ if show_label:
193
+ c = classes[i]
194
+ draw.text((x + 15, y + 15), str(c), fill=(255, 0, 0, 255))
195
+
196
+ for j in range(thickness):
197
+ draw.rectangle(((x + j, y + j), (x + w + j, y + h + j)), outline='red')
198
+
199
+ return img
200
+
201
+
202
+ def is_daytime(date_time):
203
+ """
204
+ Returns True if daytime as determined by the input timestamp, a rough
205
+ decision based on two seasons
206
+ """
207
+
208
+ # summer day hours: 6am - 7pm
209
+ # others day hours: 7am - 6pm
210
+
211
+ is_summer = True if date_time.month in [5, 6, 7, 8, 9] else False
212
+ if is_summer:
213
+ if date_time.hour >= 6 and date_time.hour <= 19:
214
+ return True
215
+ else:
216
+ return False
217
+ else:
218
+ if date_time.hour >= 7 and date_time.hour <= 18:
219
+ return True
220
+ else:
221
+ return False
222
+
223
+
224
+ def parse_timestamp(time_str):
225
+ """
226
+ There are three datetime string formats in eMammal, and some have an empty field.
227
+ Args:
228
+ time_str: text in the tag ImageDateTime
229
+
230
+ Returns:
231
+ datetime object, error (None if no error)
232
+ """
233
+
234
+ if time_str == '' or time_str is None:
235
+ return '', 'empty or None'
236
+ try:
237
+ res = datetime.strptime(time_str, '%Y-%m-%dT%H:%M:%S')
238
+ return res, None
239
+ except Exception:
240
+ try:
241
+ res = datetime.strptime(time_str, '%Y-%m-%d %H:%M:%S')
242
+ return res, None
243
+ except Exception:
244
+ try:
245
+ res = datetime.strptime(time_str, '%m/%d/%Y %H:%M')
246
+ return res, None
247
+ except:
248
+ print('WARNING, time_str cannot be parsed {}.'.format(time_str))
249
+ return time_str, 'cannot be parsed {}'.format(time_str) # return original string
@@ -0,0 +1,223 @@
1
+ #
2
+ # make_eMammal_json.py
3
+ #
4
+ # Produces the COCO-formatted json database for an eMammal dataset, i.e. a
5
+ # collection of folders, each of which contains a deployment_manifest.xml file.
6
+ #
7
+ # In this process, each image needs to be loaded to size it.
8
+ #
9
+ # To add bounding box annotations to the resulting database, use
10
+ # add_annotations_to_eMammal_json.py.
11
+ #
12
+
13
+ #%% Constants and imports
14
+
15
+ # Either add the eMammal directory to your path, or run from there
16
+ # os.chdir(r'd:\git\MegaDetector\database_tools\eMammal')
17
+
18
+ import json
19
+ import multiprocessing
20
+ import os
21
+ # import warnings
22
+ import eMammal_helpers as helpers
23
+
24
+ from datetime import datetime, date
25
+ from multiprocessing.dummy import Pool as ThreadPool
26
+ from lxml import etree
27
+ from tqdm import tqdm
28
+
29
+ # ignoring all "PIL cannot read EXIF metainfo for the images" warnings
30
+ # warnings.filterwarnings('ignore')
31
+
32
+ # Should we run the image size retrieval in parallel?
33
+ run_parallel = False
34
+
35
+ output_dir_path = r'd:\path'
36
+ deployments_path = r'd:\other_path'
37
+ db_filename = 'apr.json'
38
+ corrupt_images_db_filename = 'apr_corrupt.json'
39
+
40
+ description = 'description'
41
+ version = '1.0'
42
+ contributor = 'contributor'
43
+ curator = '.json created by curator'
44
+
45
+
46
+ #%% Support functions
47
+
48
+ def _add_image(entry, full_img_path):
49
+ """
50
+ Open the image to get size information and add height and width to the image entry.
51
+ """
52
+
53
+ img_width, img_height = helpers.get_img_size(full_img_path)
54
+ if img_width == -1 or img_height == -1:
55
+ corrupt_images.append(full_img_path)
56
+ return None
57
+ entry['width'] = img_width
58
+ entry['height'] = img_height
59
+ pbar.update(1)
60
+ return entry
61
+
62
+
63
+ #%% Main loop (metadata processing; image sizes are retrieved later)
64
+
65
+ print('Creating tasks to get all images...')
66
+ start_time = datetime.now()
67
+ tasks = []
68
+ folders = os.listdir(deployments_path)
69
+
70
+ all_species_strings = set()
71
+
72
+ # deployment = folders[0]
73
+ for deployment in tqdm(folders):
74
+
75
+ deployment_path = os.path.join(deployments_path, deployment)
76
+ manifest_path = os.path.join(deployment_path, 'deployment_manifest.xml')
77
+
78
+ assert os.path.isfile(manifest_path)
79
+
80
+ with open(manifest_path, 'r') as f:
81
+ tree = etree.parse(f)
82
+
83
+ root = tree.getroot()
84
+ project_id = root.findtext('ProjectId')
85
+ deployment_id = root.findtext('CameraDeploymentID')
86
+ deployment_location = root.findtext('CameraSiteName')
87
+
88
+ image_sequences = root.findall('ImageSequence')
89
+
90
+ # sequence = image_sequences[0]
91
+ for sequence in image_sequences:
92
+
93
+ seq_id = sequence.findtext('ImageSequenceId')
94
+
95
+ # get species info for this sequence
96
+ researcher_identifications = sequence.findall('ResearcherIdentifications')
97
+ species = set()
98
+
99
+ for researcher_id in researcher_identifications:
100
+ identifications = researcher_id.findall('Identification')
101
+ for id in identifications:
102
+ species_common_name = helpers.clean_species_name(id.findtext('SpeciesCommonName'))
103
+ species.add(species_common_name)
104
+
105
+ species_str = ';'.join(sorted(list(species)))
106
+ all_species_strings.add(species_str)
107
+
108
+ # add each image's info to database
109
+ images = sequence.findall('Image')
110
+ seq_num_frames = len(images) # total number of frames in this sequence
111
+ assert isinstance(seq_num_frames,int) and seq_num_frames > 0 # historical quirk
112
+
113
+ # img = images[0]
114
+ for img in images:
115
+
116
+ img_id = img.findtext('ImageId')
117
+ img_file_name = img.findtext('ImageFileName')
118
+ assert img_file_name.lower().endswith('.jpg')
119
+
120
+ img_frame = img.findtext('ImageOrder')
121
+ if img_frame == '' or img_frame is None:
122
+ # some manifests don't have the ImageOrder info, but the info is in the file name
123
+ img_frame = img_file_name.split('i')[1].split('.')[0]
124
+
125
+ # full_img_id has no frame info
126
+ #
127
+ # frame number only used in requests to iMerit for ordering
128
+ full_img_id = 'datasetemammal.project{}.deployment{}.seq{}.img{}'.format(
129
+ project_id, deployment_id, seq_id, img_id)
130
+ full_img_path = os.path.join(deployment_path, img_file_name)
131
+
132
+ img_datetime, datetime_err = helpers.parse_timestamp(img.findtext('ImageDateTime'))
133
+ if datetime_err:
134
+ print('WARNING datetime parsing error for image {}. Error: {}'.format(
135
+ full_img_path, datetime_err))
136
+
137
+ entry = {
138
+ 'id': full_img_id,
139
+ 'width': 0, # place holders
140
+ 'height': 0,
141
+ 'file_name': os.path.join(deployment, img_file_name),
142
+ 'location': deployment_location,
143
+ 'datetime': str(img_datetime),
144
+ 'seq_id': seq_id,
145
+ 'frame_num': int(img_frame),
146
+ 'seq_num_frames': seq_num_frames,
147
+ 'label': species_str # extra field for eMammal
148
+ }
149
+
150
+ tasks.append((entry, full_img_path))
151
+
152
+ # ...for each image
153
+
154
+ # ...for each sequence
155
+
156
+ # ...for each deployment
157
+
158
+ print('Finished creating tasks to get images.')
159
+
160
+
161
+ #%% Get image sizes
162
+
163
+ # 'tasks' is currently a list of 2-tuples, with each entry as [image dictionary,path].
164
+ #
165
+ # Go through that and copy just the image dictionaries to 'db_images', adding size
166
+ # information to each entry. Takes a couple hours.
167
+
168
+ db_images = []
169
+ corrupt_images = []
170
+ pbar = tqdm(total=len(tasks))
171
+
172
+ if run_parallel:
173
+ # opening each image seems too fast for this multi-threaded version to be faster than sequential code.
174
+ num_workers = multiprocessing.cpu_count()
175
+ pool = ThreadPool(num_workers)
176
+ db_images = pool.starmap(_add_image, tasks)
177
+ print('Waiting for image processes to finish...')
178
+ pool.close()
179
+ pool.join()
180
+ else:
181
+ print('Finding image size sequentially')
182
+ for entry, full_img_path in tasks:
183
+ db_images.append(_add_image(entry, full_img_path))
184
+
185
+ db_images = [i for i in db_images if i is not None]
186
+
187
+ print('{} images could not be opened:'.format(len(corrupt_images)))
188
+ print(corrupt_images)
189
+ print('Done getting image sizes')
190
+
191
+
192
+ #%% Assemble top-level dictionaries
193
+
194
+ db_info = {
195
+ 'year': 'unknown',
196
+ 'version': version,
197
+ 'description': description,
198
+ 'contributor': contributor,
199
+ 'curator': curator,
200
+ 'date_created': str(date.today())
201
+ }
202
+
203
+ coco_formatted_json = {
204
+ 'info': db_info,
205
+ 'images': db_images
206
+ }
207
+
208
+
209
+ #%% Write out .json
210
+
211
+ print('Saving the json database to disk...')
212
+ with open(os.path.join(output_dir_path, db_filename), 'w') as f:
213
+ json.dump(coco_formatted_json, f, indent=4, sort_keys=True)
214
+ print('...done')
215
+
216
+ print('Saving list of corrupt images...')
217
+ with open(os.path.join(output_dir_path, corrupt_images_db_filename), 'w') as f:
218
+ json.dump(corrupt_images, f, indent=4)
219
+ print('...done')
220
+
221
+ print('Running the script took {}.'.format(datetime.now() - start_time))
222
+
223
+