megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.10.dist-info/RECORD +0 -224
  214. megadetector-5.0.10.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,180 +0,0 @@
1
- '''
2
- copy_and_unzip_emammal.py
3
-
4
- Siyu Yang
5
-
6
- Script to copy all deployments in the emammal container (mounted on the VM or not) to data
7
- disk at /datadrive and unzip them, deleting the copied zip file.
8
-
9
- Need to add exception handling.
10
- '''
11
-
12
- #%% Imports and constants
13
-
14
- from datetime import datetime
15
- import itertools
16
- import json
17
- import multiprocessing
18
- from multiprocessing.dummy import Pool as ThreadPool # this functions like threading
19
- import os
20
- from shutil import copy, copyfile
21
- from tqdm import tqdm
22
- from typing import Optional
23
- import zipfile
24
-
25
- from azure.storage.blob import BlobServiceClient
26
-
27
-
28
- # configurations and paths
29
- log_folder = '/home/lynx/logs'
30
- dest_folder = '/datadrive/emammal_robertlong' # data disk attached where data is stored
31
- origin = 'cloud' # 'cloud' or 'mounted'
32
-
33
-
34
- #%% Helper functions
35
-
36
- def _copy_unzip(source_path, dest_folder):
37
-
38
- try:
39
- dest_subfolder = os.path.join(dest_folder, os.path.basename(source_path).split('.zip')[0])
40
- if os.path.exists(dest_subfolder):
41
- print('{} exists.'.format(dest_subfolder))
42
- return('exists')
43
-
44
- print('Copying...')
45
- # dest_path = copy(source_path, dest_folder)
46
- dest_path = os.path.join(dest_folder, os.path.basename(source_path))
47
- copyfile(source_path, dest_path)
48
-
49
- with zipfile.ZipFile(dest_path, 'r') as zip_ref:
50
- zip_ref.extractall(dest_subfolder)
51
-
52
- os.remove(dest_path)
53
- print('{} copied and extracted'.format(dest_subfolder))
54
- return None
55
-
56
- except Exception:
57
- try:
58
- print('Retrying...')
59
- dest_path = os.path.join(dest_folder, os.path.basename(source_path))
60
- copyfile(source_path, dest_path)
61
- with zipfile.ZipFile(dest_path, 'r') as zip_ref:
62
- zip_ref.extractall(dest_subfolder)
63
- os.remove(dest_path)
64
- print('{} copied and extracted'.format(dest_subfolder))
65
- return (None)
66
- except Exception as e:
67
- print('WARNING {} did not get copied. Exception: {}'.format(source_path, str(e)))
68
- return source_path
69
-
70
-
71
- def copy_from_mounted_container(source_folder, dest_folder):
72
-
73
- sources = []
74
-
75
- collections = sorted(os.listdir(source_folder))
76
-
77
- for collection in collections:
78
- collection_folder = os.path.join(source_folder, collection)
79
- if not os.path.isdir(collection_folder):
80
- continue
81
-
82
- print('Processing collection {}'.format(collection))
83
-
84
- for file in tqdm(sorted(os.listdir(collection_folder))):
85
- source_path = os.path.join(collection_folder, file)
86
- sources.append(source_path)
87
-
88
- # num_workers = multiprocessing.cpu_count()
89
- # pool = ThreadPool(num_workers)
90
- # results = pool.starmap(_copy_unzip, zip(sources, itertools.repeat(dest_folder)))
91
- #
92
- # print('Waiting for processes to finish...')
93
- # pool.close()
94
- # pool.join()
95
-
96
- # sequential
97
- results = []
98
- for source_path in sources:
99
- result = _copy_unzip(source_path, dest_folder)
100
- results.append(result)
101
-
102
- cur_time = datetime.now().strftime('%Y%m%d-%H%M%S')
103
- with open(os.path.join(log_folder, 'copy_unzip_results_{}.json'.format(cur_time)), 'w') as f:
104
- json.dump(results, f)
105
-
106
-
107
- def _download_unzip(blob_service: BlobServiceClient,
108
- container: str,
109
- blob_path: str,
110
- dest_path: str) -> Optional[str]:
111
- try:
112
- with open(dest_path, 'wb') as f:
113
- cc = blob_service.get_container_client(container)
114
- print('Downloading...')
115
- cc.download_blob(blob_path).readinto(f)
116
-
117
- dest_subfolder = dest_path.split('.zip')[0]
118
-
119
- with zipfile.ZipFile(dest_path, 'r') as zip_ref:
120
- zip_ref.extractall(dest_subfolder)
121
-
122
- os.remove(dest_path)
123
- print('{} copied and extracted'.format(dest_subfolder))
124
- return None
125
-
126
- except Exception as e:
127
- print('ERROR while downloading or unzipping {}. Exception: {}'.format(blob_path, str(e)))
128
- return blob_path
129
-
130
-
131
- def download_from_container(dest_folder: str,
132
- blob_service: BlobServiceClient,
133
- container: str = 'emammal',
134
- desired_blob_prefix: str = '') -> None:
135
- generator = blob_service.get_containre_client(container).list_blobs()
136
- desired_blobs = [blob.name for blob in generator
137
- if blob.name.startswith(desired_blob_prefix)]
138
-
139
- print('desired_blobs', desired_blobs)
140
-
141
- results = []
142
- for blob_path in tqdm(desired_blobs):
143
- blob_name = blob_path.split('/')[2]
144
- print('blob_name', blob_name)
145
- dest_path = os.path.join(dest_folder, blob_name)
146
- print('dest_path', dest_path)
147
- result = _download_unzip(blob_service, container, blob_path, dest_path)
148
- results.append(result)
149
-
150
- cur_time = datetime.now().strftime('%Y%m%d-%H%M%S')
151
- with open(os.path.join(log_folder, 'download_unzip_results_{}.json'.format(cur_time)), 'w') as f:
152
- json.dump(results, f)
153
-
154
-
155
- #%% Command-line driver
156
-
157
- if __name__ == '__main__':
158
-
159
- if origin == 'cloud':
160
- container = 'wpz'
161
- desired_blob_prefix = 'emammal/0Robert Long/'
162
-
163
- print('Start timing...')
164
- start_time = datetime.now()
165
-
166
- if origin == 'mounted':
167
- # if the blob container is already mounted on the VM
168
- source_folder = '/home/yasiyu/mnt/wildlifeblobssc/emammal' # blob container mounted
169
- copy_from_mounted_container(source_folder, dest_folder)
170
-
171
- elif origin == 'cloud':
172
- # or you can download them using the storage Python SDK
173
- # store storage account key in environment variable AZ_STORAGE_KEY
174
- blob_service = BlobServiceClient(
175
- account_url='wildlifeblobssc.blob.core.windows.net',
176
- credential=os.environ["AZ_STORAGE_KEY"])
177
- download_from_container(dest_folder, blob_service, container=container,
178
- desired_blob_prefix=desired_blob_prefix)
179
-
180
- print('Process took {}.'.format(datetime.now() - start_time))
@@ -1,249 +0,0 @@
1
- #
2
- # eMammal_helpers.py
3
- #
4
- # Support functions for processing eMammal metadata
5
- #
6
-
7
- #%% Constants and imports
8
-
9
- from PIL import Image, ImageDraw
10
- import matplotlib.pyplot as plt
11
- import operator
12
- from datetime import datetime
13
-
14
-
15
- #%% Support functions
16
-
17
- def clean_species_name(common_name):
18
- """
19
- Converts various forms of "human" to the token "human", and various forms
20
- of "empty" to the token "empty"
21
- """
22
-
23
- _people_tags = {
24
- 'Bicycle',
25
- 'Calibration Photos',
26
- 'Camera Trapper',
27
- 'camera trappper',
28
- 'camera trapper',
29
- 'Homo sapien',
30
- 'Homo sapiens',
31
- 'Human, non staff',
32
- 'Human, non-staff',
33
- 'camera trappe',
34
- 'Human non-staff',
35
- 'Setup Pickup',
36
- 'Vehicle'
37
- }
38
- PEOPLE_TAGS = {x.lower() for x in _people_tags}
39
-
40
- _no_animal_tags = {'No Animal', 'no animal', 'Time Lapse', 'Camera Misfire', 'False trigger', 'Blank'}
41
- NO_ANIMAL_TAGS = {x.lower() for x in _no_animal_tags}
42
-
43
- common_name = common_name.lower().strip()
44
- if common_name in PEOPLE_TAGS:
45
- return 'human'
46
-
47
- if common_name in NO_ANIMAL_TAGS:
48
- return 'empty'
49
-
50
- return common_name
51
-
52
-
53
- def clean_frame_number(img_frame):
54
-
55
- # pad to a total of 3 digits if < 1000, or 4 digits otherwise
56
- # img_frame is a string from the xml tree
57
- length = len(img_frame)
58
-
59
- assert length > 0
60
- assert length < 5
61
-
62
- # length 4 frame order is returned as is, others are left padded to be 3 digit long
63
- # we need to make sure img_frame has length 3 when it's < 1000 so we can match it to the iMerit labels
64
- if length == 1:
65
- return '00' + img_frame
66
- elif length == 2:
67
- return '0' + img_frame
68
- else: # for '100' and '1000'
69
- return img_frame
70
-
71
-
72
- def clean_frame_number_4_digit(img_frame):
73
-
74
- # pad to a total of 4 digits
75
- # img_frame is a string from the xml tree
76
- length = len(img_frame)
77
-
78
- assert length > 0
79
- assert length < 5
80
-
81
- # length 4 frame order is returned as is, others are left padded to be 3 digit long
82
- # we need to make sure img_frame has length 3 when it's < 1000 so we can match it to the iMerit labels
83
- if length == 1:
84
- return '000' + img_frame
85
- elif length == 2:
86
- return '00' + img_frame
87
- elif length == 3:
88
- return '0' + img_frame
89
- else: # for'1000'
90
- return img_frame
91
-
92
-
93
- def get_img_size(img_path):
94
- """
95
- There are ways to size the image without loading it into memory by reading its headers
96
- (https://github.com/scardine/image_size), but seems less reliable.
97
-
98
- Returns (-1, -1) if PIL could not open the image
99
- """
100
-
101
- try:
102
- im = Image.open(img_path)
103
- width, height = im.size
104
- except:
105
- return (-1, -1)
106
- return (width, height)
107
-
108
-
109
- def get_total_from_distribution(d):
110
-
111
- total = 0
112
- for key, count in d.items():
113
- total += int(key) * count
114
- return total
115
-
116
-
117
- def sort_dict_val_desc(d, percent=False):
118
- """ Sort a dictionary by the values in descending order. Returns a list of tuples. """
119
-
120
- sorted_d = sorted(d.items(), key=operator.itemgetter(1), reverse=True)
121
-
122
- if percent:
123
- with_percent = []
124
- total = sum([t[1] for t in sorted_d])
125
- for k, v in sorted_d:
126
- p = '{:.1f}%'.format(100 * float(v) / total)
127
- with_percent.append((k, v, p))
128
- return with_percent
129
-
130
- return sorted_d
131
-
132
-
133
- def plot_distribution(d, title='', top=15):
134
-
135
- if top is None or top > len(d):
136
- top = len(d)
137
-
138
- sorted_d = sort_dict_val_desc(d)
139
-
140
- top_d = sorted_d[:top]
141
- x = [t[0] for t in top_d]
142
- y = [t[1] for t in top_d]
143
-
144
- # others column
145
- others_d = sorted_d[top:]
146
- others_sum = sum([t[1] for t in others_d])
147
- x.append('others')
148
- y.append(others_sum)
149
-
150
- plt.bar(range(len(x)), y, align='center', facecolor='#57BC90', edgecolor=None)
151
- plt.xticks(range(len(x)), x, rotation=90)
152
- plt.title(title)
153
- plt.show()
154
-
155
-
156
- def plot_histogram(l, title='', max_val=None, bins='auto'):
157
-
158
- if max_val:
159
- l = [x for x in l if x < max_val]
160
-
161
- plt.hist(l, bins=bins, facecolor='#57BC90', edgecolor=None)
162
- plt.title(title)
163
- plt.show()
164
-
165
-
166
- def draw_bboxes(image, bboxes, classes, thickness=4, show_label=False):
167
- """
168
- Draw bounding boxes on top of an image
169
- Args:
170
- image : Path to image or a loaded PIL image
171
- bboxes: A list of bboxes to draw on the image, each bbox is [top left x, top left y, width, height] in relative coordinates
172
- classes: A list of classes corresponding to the bboxes
173
- thickness: Thickness of the line to draw, minimum is 1
174
- Outputs:
175
- Image object with the bboxes and class labels annotated
176
- """
177
- if type(image) is str:
178
- img = Image.open(image)
179
- else:
180
- img = image.copy()
181
-
182
- draw = ImageDraw.Draw(img)
183
- img_width, img_height = img.size
184
-
185
- for i in range(len(bboxes)):
186
- x_rel, y_rel, w_rel, h_rel = bboxes[i]
187
- x = x_rel * img_width # x and y are the top left
188
- y = y_rel * img_height
189
- w = w_rel * img_width
190
- h = h_rel * img_height
191
-
192
- if show_label:
193
- c = classes[i]
194
- draw.text((x + 15, y + 15), str(c), fill=(255, 0, 0, 255))
195
-
196
- for j in range(thickness):
197
- draw.rectangle(((x + j, y + j), (x + w + j, y + h + j)), outline='red')
198
-
199
- return img
200
-
201
-
202
- def is_daytime(date_time):
203
- """
204
- Returns True if daytime as determined by the input timestamp, a rough
205
- decision based on two seasons
206
- """
207
-
208
- # summer day hours: 6am - 7pm
209
- # others day hours: 7am - 6pm
210
-
211
- is_summer = True if date_time.month in [5, 6, 7, 8, 9] else False
212
- if is_summer:
213
- if date_time.hour >= 6 and date_time.hour <= 19:
214
- return True
215
- else:
216
- return False
217
- else:
218
- if date_time.hour >= 7 and date_time.hour <= 18:
219
- return True
220
- else:
221
- return False
222
-
223
-
224
- def parse_timestamp(time_str):
225
- """
226
- There are three datetime string formats in eMammal, and some have an empty field.
227
- Args:
228
- time_str: text in the tag ImageDateTime
229
-
230
- Returns:
231
- datetime object, error (None if no error)
232
- """
233
-
234
- if time_str == '' or time_str is None:
235
- return '', 'empty or None'
236
- try:
237
- res = datetime.strptime(time_str, '%Y-%m-%dT%H:%M:%S')
238
- return res, None
239
- except Exception:
240
- try:
241
- res = datetime.strptime(time_str, '%Y-%m-%d %H:%M:%S')
242
- return res, None
243
- except Exception:
244
- try:
245
- res = datetime.strptime(time_str, '%m/%d/%Y %H:%M')
246
- return res, None
247
- except:
248
- print('WARNING, time_str cannot be parsed {}.'.format(time_str))
249
- return time_str, 'cannot be parsed {}'.format(time_str) # return original string
@@ -1,223 +0,0 @@
1
- #
2
- # make_eMammal_json.py
3
- #
4
- # Produces the COCO-formatted json database for an eMammal dataset, i.e. a
5
- # collection of folders, each of which contains a deployment_manifest.xml file.
6
- #
7
- # In this process, each image needs to be loaded to size it.
8
- #
9
- # To add bounding box annotations to the resulting database, use
10
- # add_annotations_to_eMammal_json.py.
11
- #
12
-
13
- #%% Constants and imports
14
-
15
- # Either add the eMammal directory to your path, or run from there
16
- # os.chdir(r'd:\git\MegaDetector\database_tools\eMammal')
17
-
18
- import json
19
- import multiprocessing
20
- import os
21
- # import warnings
22
- import eMammal_helpers as helpers
23
-
24
- from datetime import datetime, date
25
- from multiprocessing.dummy import Pool as ThreadPool
26
- from lxml import etree
27
- from tqdm import tqdm
28
-
29
- # ignoring all "PIL cannot read EXIF metainfo for the images" warnings
30
- # warnings.filterwarnings('ignore')
31
-
32
- # Should we run the image size retrieval in parallel?
33
- run_parallel = False
34
-
35
- output_dir_path = r'd:\path'
36
- deployments_path = r'd:\other_path'
37
- db_filename = 'apr.json'
38
- corrupt_images_db_filename = 'apr_corrupt.json'
39
-
40
- description = 'description'
41
- version = '1.0'
42
- contributor = 'contributor'
43
- curator = '.json created by curator'
44
-
45
-
46
- #%% Support functions
47
-
48
- def _add_image(entry, full_img_path):
49
- """
50
- Open the image to get size information and add height and width to the image entry.
51
- """
52
-
53
- img_width, img_height = helpers.get_img_size(full_img_path)
54
- if img_width == -1 or img_height == -1:
55
- corrupt_images.append(full_img_path)
56
- return None
57
- entry['width'] = img_width
58
- entry['height'] = img_height
59
- pbar.update(1)
60
- return entry
61
-
62
-
63
- #%% Main loop (metadata processing; image sizes are retrieved later)
64
-
65
- print('Creating tasks to get all images...')
66
- start_time = datetime.now()
67
- tasks = []
68
- folders = os.listdir(deployments_path)
69
-
70
- all_species_strings = set()
71
-
72
- # deployment = folders[0]
73
- for deployment in tqdm(folders):
74
-
75
- deployment_path = os.path.join(deployments_path, deployment)
76
- manifest_path = os.path.join(deployment_path, 'deployment_manifest.xml')
77
-
78
- assert os.path.isfile(manifest_path)
79
-
80
- with open(manifest_path, 'r') as f:
81
- tree = etree.parse(f)
82
-
83
- root = tree.getroot()
84
- project_id = root.findtext('ProjectId')
85
- deployment_id = root.findtext('CameraDeploymentID')
86
- deployment_location = root.findtext('CameraSiteName')
87
-
88
- image_sequences = root.findall('ImageSequence')
89
-
90
- # sequence = image_sequences[0]
91
- for sequence in image_sequences:
92
-
93
- seq_id = sequence.findtext('ImageSequenceId')
94
-
95
- # get species info for this sequence
96
- researcher_identifications = sequence.findall('ResearcherIdentifications')
97
- species = set()
98
-
99
- for researcher_id in researcher_identifications:
100
- identifications = researcher_id.findall('Identification')
101
- for id in identifications:
102
- species_common_name = helpers.clean_species_name(id.findtext('SpeciesCommonName'))
103
- species.add(species_common_name)
104
-
105
- species_str = ';'.join(sorted(list(species)))
106
- all_species_strings.add(species_str)
107
-
108
- # add each image's info to database
109
- images = sequence.findall('Image')
110
- seq_num_frames = len(images) # total number of frames in this sequence
111
- assert isinstance(seq_num_frames,int) and seq_num_frames > 0 # historical quirk
112
-
113
- # img = images[0]
114
- for img in images:
115
-
116
- img_id = img.findtext('ImageId')
117
- img_file_name = img.findtext('ImageFileName')
118
- assert img_file_name.lower().endswith('.jpg')
119
-
120
- img_frame = img.findtext('ImageOrder')
121
- if img_frame == '' or img_frame is None:
122
- # some manifests don't have the ImageOrder info, but the info is in the file name
123
- img_frame = img_file_name.split('i')[1].split('.')[0]
124
-
125
- # full_img_id has no frame info
126
- #
127
- # frame number only used in requests to iMerit for ordering
128
- full_img_id = 'datasetemammal.project{}.deployment{}.seq{}.img{}'.format(
129
- project_id, deployment_id, seq_id, img_id)
130
- full_img_path = os.path.join(deployment_path, img_file_name)
131
-
132
- img_datetime, datetime_err = helpers.parse_timestamp(img.findtext('ImageDateTime'))
133
- if datetime_err:
134
- print('WARNING datetime parsing error for image {}. Error: {}'.format(
135
- full_img_path, datetime_err))
136
-
137
- entry = {
138
- 'id': full_img_id,
139
- 'width': 0, # place holders
140
- 'height': 0,
141
- 'file_name': os.path.join(deployment, img_file_name),
142
- 'location': deployment_location,
143
- 'datetime': str(img_datetime),
144
- 'seq_id': seq_id,
145
- 'frame_num': int(img_frame),
146
- 'seq_num_frames': seq_num_frames,
147
- 'label': species_str # extra field for eMammal
148
- }
149
-
150
- tasks.append((entry, full_img_path))
151
-
152
- # ...for each image
153
-
154
- # ...for each sequence
155
-
156
- # ...for each deployment
157
-
158
- print('Finished creating tasks to get images.')
159
-
160
-
161
- #%% Get image sizes
162
-
163
- # 'tasks' is currently a list of 2-tuples, with each entry as [image dictionary,path].
164
- #
165
- # Go through that and copy just the image dictionaries to 'db_images', adding size
166
- # information to each entry. Takes a couple hours.
167
-
168
- db_images = []
169
- corrupt_images = []
170
- pbar = tqdm(total=len(tasks))
171
-
172
- if run_parallel:
173
- # opening each image seems too fast for this multi-threaded version to be faster than sequential code.
174
- num_workers = multiprocessing.cpu_count()
175
- pool = ThreadPool(num_workers)
176
- db_images = pool.starmap(_add_image, tasks)
177
- print('Waiting for image processes to finish...')
178
- pool.close()
179
- pool.join()
180
- else:
181
- print('Finding image size sequentially')
182
- for entry, full_img_path in tasks:
183
- db_images.append(_add_image(entry, full_img_path))
184
-
185
- db_images = [i for i in db_images if i is not None]
186
-
187
- print('{} images could not be opened:'.format(len(corrupt_images)))
188
- print(corrupt_images)
189
- print('Done getting image sizes')
190
-
191
-
192
- #%% Assemble top-level dictionaries
193
-
194
- db_info = {
195
- 'year': 'unknown',
196
- 'version': version,
197
- 'description': description,
198
- 'contributor': contributor,
199
- 'curator': curator,
200
- 'date_created': str(date.today())
201
- }
202
-
203
- coco_formatted_json = {
204
- 'info': db_info,
205
- 'images': db_images
206
- }
207
-
208
-
209
- #%% Write out .json
210
-
211
- print('Saving the json database to disk...')
212
- with open(os.path.join(output_dir_path, db_filename), 'w') as f:
213
- json.dump(coco_formatted_json, f, indent=4, sort_keys=True)
214
- print('...done')
215
-
216
- print('Saving list of corrupt images...')
217
- with open(os.path.join(output_dir_path, corrupt_images_db_filename), 'w') as f:
218
- json.dump(corrupt_images, f, indent=4)
219
- print('...done')
220
-
221
- print('Running the script took {}.'.format(datetime.now() - start_time))
222
-
223
-