megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +98 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +152 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +92 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +126 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +610 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +239 -0
  58. megadetector/data_management/cct_json_utils.py +395 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +272 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +477 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +796 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +874 -0
  129. megadetector/data_management/read_exif.py +681 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/resize_coco_dataset.py +189 -0
  133. megadetector/data_management/wi_download_csv_to_coco.py +246 -0
  134. megadetector/data_management/yolo_output_to_md_output.py +441 -0
  135. megadetector/data_management/yolo_to_coco.py +676 -0
  136. megadetector/detection/__init__.py +0 -0
  137. megadetector/detection/detector_training/__init__.py +0 -0
  138. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  139. megadetector/detection/process_video.py +702 -0
  140. megadetector/detection/pytorch_detector.py +341 -0
  141. megadetector/detection/run_detector.py +779 -0
  142. megadetector/detection/run_detector_batch.py +1219 -0
  143. megadetector/detection/run_inference_with_yolov5_val.py +917 -0
  144. megadetector/detection/run_tiled_inference.py +934 -0
  145. megadetector/detection/tf_detector.py +189 -0
  146. megadetector/detection/video_utils.py +606 -0
  147. megadetector/postprocessing/__init__.py +0 -0
  148. megadetector/postprocessing/add_max_conf.py +64 -0
  149. megadetector/postprocessing/categorize_detections_by_size.py +163 -0
  150. megadetector/postprocessing/combine_api_outputs.py +249 -0
  151. megadetector/postprocessing/compare_batch_results.py +958 -0
  152. megadetector/postprocessing/convert_output_format.py +396 -0
  153. megadetector/postprocessing/load_api_results.py +195 -0
  154. megadetector/postprocessing/md_to_coco.py +310 -0
  155. megadetector/postprocessing/md_to_labelme.py +330 -0
  156. megadetector/postprocessing/merge_detections.py +401 -0
  157. megadetector/postprocessing/postprocess_batch_results.py +1902 -0
  158. megadetector/postprocessing/remap_detection_categories.py +170 -0
  159. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  160. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  161. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  162. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1631 -0
  163. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  164. megadetector/postprocessing/subset_json_detector_output.py +696 -0
  165. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  166. megadetector/taxonomy_mapping/__init__.py +0 -0
  167. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  168. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  169. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  170. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +590 -0
  171. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  172. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  173. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  174. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  175. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  176. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  177. megadetector/utils/__init__.py +0 -0
  178. megadetector/utils/azure_utils.py +178 -0
  179. megadetector/utils/ct_utils.py +612 -0
  180. megadetector/utils/directory_listing.py +246 -0
  181. megadetector/utils/md_tests.py +968 -0
  182. megadetector/utils/path_utils.py +1044 -0
  183. megadetector/utils/process_utils.py +157 -0
  184. megadetector/utils/sas_blob_utils.py +509 -0
  185. megadetector/utils/split_locations_into_train_val.py +228 -0
  186. megadetector/utils/string_utils.py +92 -0
  187. megadetector/utils/url_utils.py +323 -0
  188. megadetector/utils/write_html_image_list.py +225 -0
  189. megadetector/visualization/__init__.py +0 -0
  190. megadetector/visualization/plot_utils.py +293 -0
  191. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  192. megadetector/visualization/visualization_utils.py +1536 -0
  193. megadetector/visualization/visualize_db.py +550 -0
  194. megadetector/visualization/visualize_detector_output.py +405 -0
  195. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/METADATA +1 -1
  196. megadetector-5.0.12.dist-info/RECORD +199 -0
  197. megadetector-5.0.12.dist-info/top_level.txt +1 -0
  198. megadetector-5.0.11.dist-info/RECORD +0 -5
  199. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  200. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/LICENSE +0 -0
  201. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,294 @@
1
+ # Copyright (c) Microsoft Corporation. All rights reserved.
2
+ # Licensed under the MIT License.
3
+
4
+ import string
5
+ import uuid
6
+ import threading
7
+ from datetime import timedelta
8
+
9
+ import sas_blob_utils
10
+ from flask import Flask, request, jsonify
11
+
12
+ import server_api_config as api_config
13
+ from server_app_config import AppConfig
14
+ from server_batch_job_manager import BatchJobManager
15
+ from server_orchestration import create_batch_job, monitor_batch_job
16
+ from server_job_status_table import JobStatusTable
17
+ from server_utils import *
18
+
19
+ # %% Flask app
20
+ app = Flask(__name__)
21
+
22
+ # reference: https://trstringer.com/logging-flask-gunicorn-the-manageable-way/
23
+ if __name__ != '__main__':
24
+ gunicorn_logger = logging.getLogger('gunicorn.error')
25
+ app.logger.handlers = gunicorn_logger.handlers
26
+ app.logger.setLevel(gunicorn_logger.level)
27
+
28
+
29
+ API_PREFIX = api_config.API_PREFIX
30
+ app.logger.info('server, created Flask application...')
31
+
32
+ # %% Helper classes
33
+
34
+ app_config = AppConfig()
35
+ job_status_table = JobStatusTable()
36
+ batch_job_manager = BatchJobManager()
37
+ app.logger.info('server, finished instantiating helper classes')
38
+
39
+
40
+ # %% Flask endpoints
41
+
42
+ @app.route(f'{API_PREFIX}/')
43
+ def hello():
44
+ return f'Camera traps batch processing API. Instance: {api_config.API_INSTANCE_NAME}'
45
+
46
+
47
+ @app.route(f'{API_PREFIX}/request_detections', methods=['POST'])
48
+ def request_detections():
49
+ """
50
+ Checks that the input parameters to this endpoint are valid, starts a thread
51
+ to launch the batch processing job, and return the job_id/request_id to the user.
52
+ """
53
+ if not request.is_json:
54
+ msg = 'Body needs to have a JSON mimetype (e.g., application/json).'
55
+ return make_error(415, msg)
56
+
57
+ try:
58
+ post_body = request.get_json()
59
+ except Exception as e:
60
+ return make_error(415, f'Error occurred reading POST request body: {e}.')
61
+
62
+ app.logger.info(f'server, request_detections, post_body: {post_body}')
63
+
64
+ # required params
65
+
66
+ caller_id = post_body.get('caller', None)
67
+ if caller_id is None or caller_id not in app_config.get_allowlist():
68
+ msg = ('Parameter caller is not supplied or is not on our allowlist. '
69
+ 'Please email cameratraps@lila.science to request access.')
70
+ return make_error(401, msg)
71
+
72
+ use_url = post_body.get('use_url', False)
73
+ if use_url and isinstance(use_url, str): # in case it is included but is intended to be False
74
+ if use_url.lower() in ['false', 'f', 'no', 'n']:
75
+ use_url = False
76
+
77
+ input_container_sas = post_body.get('input_container_sas', None)
78
+ if not input_container_sas and not use_url:
79
+ msg = ('input_container_sas with read and list access is a required '
80
+ 'field when not using image URLs.')
81
+ return make_error(400, msg)
82
+
83
+ if input_container_sas is not None:
84
+ if not sas_blob_utils.is_container_uri(input_container_sas):
85
+ return make_error(400, 'input_container_sas provided is not for a container.')
86
+
87
+ result = check_data_container_sas(input_container_sas)
88
+ if result is not None:
89
+ return make_error(result[0], result[1])
90
+
91
+ # can be an URL to a file not hosted in an Azure blob storage container
92
+ images_requested_json_sas = post_body.get('images_requested_json_sas', None)
93
+
94
+ if images_requested_json_sas is not None:
95
+ if not images_requested_json_sas.startswith(('http://', 'https://')):
96
+ return make_error(400, 'images_requested_json_sas needs to be an URL.')
97
+
98
+ # if use_url, then images_requested_json_sas is required
99
+ if use_url and images_requested_json_sas is None:
100
+ return make_error(400, 'images_requested_json_sas is required since use_url is true.')
101
+
102
+ # optional params
103
+
104
+ # check model_version is among the available model versions
105
+ model_version = post_body.get('model_version', '')
106
+ if model_version != '':
107
+ model_version = str(model_version) # in case user used an int
108
+ if model_version not in api_config.MD_VERSIONS_TO_REL_PATH: # TODO use AppConfig to store model version info
109
+ return make_error(400, f'model_version {model_version} is not supported.')
110
+
111
+ # check request_name has only allowed characters
112
+ request_name = post_body.get('request_name', '')
113
+ if request_name != '':
114
+ if len(request_name) > 92:
115
+ return make_error(400, 'request_name is longer than 92 characters.')
116
+ allowed = set(string.ascii_letters + string.digits + '_' + '-')
117
+ if not set(request_name) <= allowed:
118
+ msg = ('request_name contains invalid characters (only letters, '
119
+ 'digits, - and _ are allowed).')
120
+ return make_error(400, msg)
121
+
122
+ # optional params for telemetry collection - logged to status table for now as part of call_params
123
+ country = post_body.get('country', None)
124
+ organization_name = post_body.get('organization_name', None)
125
+
126
+ # All API instances / node pools share a quota on total number of active Jobs;
127
+ # we cannot accept new Job submissions if we are at the quota
128
+ try:
129
+ num_active_jobs = batch_job_manager.get_num_active_jobs()
130
+ if num_active_jobs >= api_config.MAX_BATCH_ACCOUNT_ACTIVE_JOBS:
131
+ return make_error(503, f'Too many active jobs, please try again later')
132
+ except Exception as e:
133
+ return make_error(500, f'Error checking number of active jobs: {e}')
134
+
135
+ try:
136
+ job_id = uuid.uuid4().hex
137
+ job_status_table.create_job_status(
138
+ job_id=job_id,
139
+ status= get_job_status('created', 'Request received. Listing images next...'),
140
+ call_params=post_body
141
+ )
142
+ except Exception as e:
143
+ return make_error(500, f'Error creating a job status entry: {e}')
144
+
145
+ try:
146
+ thread = threading.Thread(
147
+ target=create_batch_job,
148
+ name=f'job_{job_id}',
149
+ kwargs={'job_id': job_id, 'body': post_body}
150
+ )
151
+ thread.start()
152
+ except Exception as e:
153
+ return make_error(500, f'Error creating or starting the batch processing thread: {e}')
154
+
155
+ return {'request_id': job_id}
156
+
157
+
158
+ @app.route(f'{API_PREFIX}/cancel_request', methods=['POST'])
159
+ def cancel_request():
160
+ """
161
+ Cancels a request/job given the job_id and caller_id
162
+ """
163
+ if not request.is_json:
164
+ msg = 'Body needs to have a JSON mimetype (e.g., application/json).'
165
+ return make_error(415, msg)
166
+ try:
167
+ post_body = request.get_json()
168
+ except Exception as e:
169
+ return make_error(415, f'Error occurred reading POST request body: {e}.')
170
+
171
+ app.logger.info(f'server, cancel_request received, body: {post_body}')
172
+
173
+ # required fields
174
+ job_id = post_body.get('request_id', None)
175
+ if job_id is None:
176
+ return make_error(400, 'request_id is a required field.')
177
+
178
+ caller_id = post_body.get('caller', None)
179
+ if caller_id is None or caller_id not in app_config.get_allowlist():
180
+ return make_error(401, 'Parameter caller is not supplied or is not on our allowlist.')
181
+
182
+ item_read = job_status_table.read_job_status(job_id)
183
+ if item_read is None:
184
+ return make_error(404, 'Task is not found.')
185
+ if 'status' not in item_read:
186
+ return make_error(404, 'Something went wrong. This task does not have a status field.')
187
+
188
+ request_status = item_read['status']['request_status']
189
+ if request_status not in ['running', 'problem']:
190
+ # request_status is either completed or failed
191
+ return make_error(400, f'Task has {request_status} and cannot be canceled')
192
+
193
+ try:
194
+ batch_job_manager.cancel_batch_job(job_id)
195
+ # the create_batch_job thread will stop when it wakes up the next time
196
+ except Exception as e:
197
+ return make_error(500, f'Error when canceling the request: {e}')
198
+ else:
199
+ job_status_table.update_job_status(job_id, {
200
+ 'request_status': 'canceled',
201
+ 'message': 'Request has been canceled by the user.'
202
+ })
203
+ return 'Canceling signal has been sent. You can verify the status at the /task endpoint'
204
+
205
+
206
+ @app.route(f'{API_PREFIX}/task/<job_id>')
207
+ def retrieve_job_status(job_id: str):
208
+ """
209
+ Does not require the "caller" field to avoid checking the allowlist in App Configurations.
210
+ Retains the /task endpoint name to be compatible with previous versions.
211
+ """
212
+ # Fix for Zooniverse - deleting any "-" characters in the job_id
213
+ job_id = job_id.replace('-', '')
214
+
215
+ item_read = job_status_table.read_job_status(job_id) # just what the monitoring thread wrote to the DB
216
+ if item_read is None:
217
+ return make_error(404, 'Task is not found.')
218
+ if 'status' not in item_read or 'last_updated' not in item_read or 'call_params' not in item_read:
219
+ return make_error(404, 'Something went wrong. This task does not have a valid status.')
220
+
221
+ # If the status is running, it could be a Job submitted before the last restart of this
222
+ # API instance. If that is the case, we should start to monitor its progress again.
223
+ status = item_read['status']
224
+
225
+ last_updated = datetime.fromisoformat(item_read['last_updated'][:-1]) # get rid of "Z" (required by Cosmos DB)
226
+ time_passed = datetime.utcnow() - last_updated
227
+ job_is_unmonitored = True if time_passed > timedelta(minutes=(api_config.MONITOR_PERIOD_MINUTES + 1)) else False
228
+
229
+ if isinstance(status, dict) and \
230
+ 'request_status' in status and \
231
+ status['request_status'] in ['running', 'problem'] and \
232
+ 'num_tasks' in status and \
233
+ job_id not in get_thread_names() and \
234
+ job_is_unmonitored:
235
+ # WARNING model_version could be wrong (a newer version number gets written to the output file) around
236
+ # the time that the model is updated, if this request was submitted before the model update
237
+ # and the API restart; this should be quite rare
238
+ model_version = item_read['call_params'].get('model_version', api_config.DEFAULT_MD_VERSION)
239
+
240
+ num_tasks = status['num_tasks']
241
+ job_name = item_read['call_params'].get('request_name', '')
242
+ job_submission_timestamp = item_read.get('job_submission_time', '')
243
+
244
+ thread = threading.Thread(
245
+ target=monitor_batch_job,
246
+ name=f'job_{job_id}',
247
+ kwargs={
248
+ 'job_id': job_id,
249
+ 'num_tasks': num_tasks,
250
+ 'model_version': model_version,
251
+ 'job_name': job_name,
252
+ 'job_submission_timestamp': job_submission_timestamp
253
+ }
254
+ )
255
+ thread.start()
256
+ app.logger.info(f'server, started a new thread to monitor job {job_id}')
257
+
258
+ # conform to previous schemes
259
+ if 'num_tasks' in status:
260
+ del status['num_tasks']
261
+ item_to_return = {
262
+ 'Status': status,
263
+ 'Endpoint': f'{API_PREFIX}/request_detections',
264
+ 'TaskId': job_id,
265
+ 'Timestamp': item_read['last_updated']
266
+ }
267
+ return item_to_return
268
+
269
+
270
+ @app.route(f'{API_PREFIX}/default_model_version')
271
+ def get_default_model_version() -> str:
272
+ return api_config.DEFAULT_MD_VERSION
273
+
274
+
275
+ @app.route(f'{API_PREFIX}/supported_model_versions')
276
+ def get_supported_model_versions() -> str:
277
+ return jsonify(sorted(list(api_config.MD_VERSIONS_TO_REL_PATH.keys())))
278
+
279
+
280
+ # %% undocumented endpoints
281
+
282
+ def get_thread_names() -> list:
283
+ thread_names = []
284
+ for thread in threading.enumerate():
285
+ if thread.name.startswith('job_'):
286
+ thread_names.append(thread.name.split('_')[1])
287
+ return sorted(thread_names)
288
+
289
+
290
+ @app.route(f'{API_PREFIX}/all_jobs')
291
+ def get_all_jobs():
292
+ """List all Jobs being monitored since this API instance started"""
293
+ thread_names = get_thread_names()
294
+ return jsonify(thread_names)
@@ -0,0 +1,98 @@
1
+ # Copyright (c) Microsoft Corporation. All rights reserved.
2
+ # Licensed under the MIT License.
3
+
4
+ """
5
+ A module to hold the configurations specific to an instance of the API.
6
+ """
7
+
8
+ import os
9
+
10
+
11
+ #%% instance-specific API settings
12
+ # you likely need to modify these when deploying a new instance of the API
13
+
14
+ API_INSTANCE_NAME = 'cm' # 'internal', 'cm', 'camelot', 'zooniverse'
15
+ POOL_ID = 'cm_1' # name of the Batch pool created for this API instance
16
+
17
+ MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB = 4 * 1000 * 1000 # inclusive
18
+
19
+ # Azure Batch for batch processing
20
+ BATCH_ACCOUNT_NAME = 'cameratrapssc'
21
+ BATCH_ACCOUNT_URL = 'https://cameratrapssc.southcentralus.batch.azure.com'
22
+
23
+
24
+ #%% general API settings
25
+ API_PREFIX = '/v4/camera-trap/detection-batch' # URL to root is http://127.0.0.1:5000/v4/camera-trap/detection-batch/
26
+
27
+ MONITOR_PERIOD_MINUTES = 10
28
+
29
+ # if this number of times the thread wakes up to check is exceeded, stop the monitoring thread
30
+ MAX_MONITOR_CYCLES = 4 * 7 * int((60 * 24) / MONITOR_PERIOD_MINUTES) # 4 weeks
31
+
32
+ IMAGE_SUFFIXES_ACCEPTED = ('.jpg', '.jpeg', '.png') # case-insensitive
33
+ assert isinstance(IMAGE_SUFFIXES_ACCEPTED, tuple)
34
+
35
+ OUTPUT_FORMAT_VERSION = '1.1'
36
+
37
+ NUM_IMAGES_PER_TASK = 2000
38
+
39
+ OUTPUT_SAS_EXPIRATION_DAYS = 180
40
+
41
+ # quota of active Jobs in our Batch account, which all node pools i.e. API instances share;
42
+ # cannot accept job submissions if there are this many active Jobs already
43
+ MAX_BATCH_ACCOUNT_ACTIVE_JOBS = 300
44
+
45
+
46
+ #%% MegaDetector info
47
+ DETECTION_CONF_THRESHOLD = 0.1
48
+
49
+ # relative to the `megadetector_copies` folder in the container `models`
50
+ # TODO add MD versions info to AppConfig
51
+ MD_VERSIONS_TO_REL_PATH = {
52
+ '4.1': 'megadetector_v4_1/md_v4.1.0.pb',
53
+ '3': 'megadetector_v3/megadetector_v3_tf19.pb',
54
+ '2': 'megadetector_v2/frozen_inference_graph.pb'
55
+ }
56
+ DEFAULT_MD_VERSION = '4.1'
57
+ assert DEFAULT_MD_VERSION in MD_VERSIONS_TO_REL_PATH
58
+
59
+ # copied from TFDetector class in detection/run_detector.py
60
+ DETECTOR_LABEL_MAP = {
61
+ '1': 'animal',
62
+ '2': 'person',
63
+ '3': 'vehicle'
64
+ }
65
+
66
+
67
+ #%% Azure Batch settings
68
+ NUM_TASKS_PER_SUBMISSION = 20 # max for the Python SDK without extension is 100
69
+
70
+ NUM_TASKS_PER_RESUBMISSION = 5
71
+
72
+
73
+ #%% env variables for service credentials, and info related to these services
74
+
75
+ # Cosmos DB `batch-api-jobs` table for job status
76
+ COSMOS_ENDPOINT = os.environ['COSMOS_ENDPOINT']
77
+ COSMOS_WRITE_KEY = os.environ['COSMOS_WRITE_KEY']
78
+
79
+ # Service principal of this "application", authorized to use Azure Batch
80
+ APP_TENANT_ID = os.environ['APP_TENANT_ID']
81
+ APP_CLIENT_ID = os.environ['APP_CLIENT_ID']
82
+ APP_CLIENT_SECRET = os.environ['APP_CLIENT_SECRET']
83
+
84
+ # Blob storage account for storing Batch tasks' outputs and scoring script
85
+ STORAGE_ACCOUNT_NAME = os.environ['STORAGE_ACCOUNT_NAME']
86
+ STORAGE_ACCOUNT_KEY = os.environ['STORAGE_ACCOUNT_KEY']
87
+
88
+ # STORAGE_CONTAINER_MODELS = 'models' # names of the two containers supporting Batch
89
+ STORAGE_CONTAINER_API = 'batch-api'
90
+
91
+ # Azure Container Registry for Docker image used by our Batch node pools
92
+ REGISTRY_SERVER = os.environ['REGISTRY_SERVER']
93
+ REGISTRY_PASSWORD = os.environ['REGISTRY_PASSWORD']
94
+ CONTAINER_IMAGE_NAME = 'cameratracrsppftkje.azurecr.io/tensorflow:1.14.0-gpu-py3'
95
+
96
+ # Azure App Configuration instance to get configurations specific to
97
+ # this instance of the API
98
+ APP_CONFIG_CONNECTION_STR = os.environ['APP_CONFIG_CONNECTION_STR']
@@ -0,0 +1,55 @@
1
+ # Copyright (c) Microsoft Corporation. All rights reserved.
2
+ # Licensed under the MIT License.
3
+
4
+ """
5
+ A class wrapping the Azure App Configuration client to get configurations
6
+ for each instance of the API.
7
+ """
8
+ import logging
9
+ import os
10
+
11
+ from server_api_config import APP_CONFIG_CONNECTION_STR, API_INSTANCE_NAME
12
+
13
+ from azure.appconfiguration import AzureAppConfigurationClient
14
+
15
+
16
+ log = logging.getLogger(os.environ['FLASK_APP'])
17
+
18
+
19
+ class AppConfig:
20
+ """Wrapper around the Azure App Configuration client"""
21
+
22
+ def __init__(self):
23
+ self.client = AzureAppConfigurationClient.from_connection_string(APP_CONFIG_CONNECTION_STR)
24
+
25
+ self.api_instance = API_INSTANCE_NAME
26
+
27
+ # sentinel should change if new configurations are available
28
+ self.sentinel = self._get_sentinel() # get initial sentinel and allowlist values
29
+ self.allowlist = self._get_allowlist()
30
+
31
+ def _get_sentinel(self):
32
+ return self.client.get_configuration_setting(key='batch_api:sentinel').value
33
+
34
+ def _get_allowlist(self):
35
+ filtered_listed = self.client.list_configuration_settings(key_filter='batch_api_allow:*')
36
+ allowlist = []
37
+ for item in filtered_listed:
38
+ if item.value == self.api_instance:
39
+ allowlist.append(item.key.split('batch_api_allow:')[1])
40
+ return allowlist
41
+
42
+ def get_allowlist(self):
43
+ try:
44
+ cur_sentinel = self._get_sentinel()
45
+ if cur_sentinel == self.sentinel:
46
+ # configs have not changed
47
+ return self.allowlist
48
+ else:
49
+ self.sentinel = cur_sentinel
50
+ self.allowlist = self._get_allowlist()
51
+ return self.allowlist
52
+
53
+ except Exception as e:
54
+ log.error(f'AppConfig, get_allowlist, exception so using old allowlist: {e}')
55
+ return self.allowlist
@@ -0,0 +1,220 @@
1
+ # Copyright (c) Microsoft Corporation. All rights reserved.
2
+ # Licensed under the MIT License.
3
+
4
+ """
5
+ A class wrapping the Azure Batch client.
6
+ """
7
+
8
+ import logging
9
+ import os
10
+ import math
11
+ from typing import Tuple
12
+ from datetime import datetime, timedelta
13
+
14
+ import sas_blob_utils
15
+ from azure.storage.blob import ContainerClient, ContainerSasPermissions, generate_container_sas
16
+ from azure.batch import BatchServiceClient
17
+ from azure.batch.models import *
18
+ from azure.common.credentials import ServicePrincipalCredentials
19
+
20
+ import server_api_config as api_config
21
+
22
+
23
+ # Gunicorn logger handler will get attached if needed in server.py
24
+ log = logging.getLogger(os.environ['FLASK_APP'])
25
+
26
+
27
+ class BatchJobManager:
28
+ """Wrapper around the Azure App Configuration client"""
29
+
30
+ def __init__(self):
31
+ credentials = ServicePrincipalCredentials(
32
+ client_id=api_config.APP_CLIENT_ID,
33
+ secret=api_config.APP_CLIENT_SECRET,
34
+ tenant=api_config.APP_TENANT_ID,
35
+ resource='https://batch.core.windows.net/'
36
+ )
37
+ self.batch_client = BatchServiceClient(credentials=credentials,
38
+ batch_url=api_config.BATCH_ACCOUNT_URL)
39
+
40
+ def create_job(self, job_id: str, detector_model_rel_path: str,
41
+ input_container_sas: str, use_url: bool):
42
+ log.info(f'BatchJobManager, create_job, job_id: {job_id}')
43
+ job = JobAddParameter(
44
+ id=job_id,
45
+ pool_info=PoolInformation(pool_id=api_config.POOL_ID),
46
+
47
+ # set for all tasks in the job
48
+ common_environment_settings=[
49
+ EnvironmentSetting(name='DETECTOR_REL_PATH', value=detector_model_rel_path),
50
+ EnvironmentSetting(name='API_INSTANCE_NAME', value=api_config.API_INSTANCE_NAME),
51
+ EnvironmentSetting(name='JOB_CONTAINER_SAS', value=input_container_sas),
52
+ EnvironmentSetting(name='JOB_USE_URL', value=str(use_url)),
53
+ EnvironmentSetting(name='DETECTION_CONF_THRESHOLD', value=api_config.DETECTION_CONF_THRESHOLD)
54
+ ]
55
+ )
56
+ self.batch_client.job.add(job)
57
+
58
+ def submit_tasks(self, job_id: str, num_images: int) -> Tuple[int, list]:
59
+ """
60
+ Shard the images and submit each shard as a Task under the Job pointed to by this job_id
61
+ Args:
62
+ job_id: ID of the Batch Job to submit the tasks to
63
+ num_images: total number of images to be processed in this Job
64
+
65
+ Returns:
66
+ num_task: total number of Tasks that should be in this Job
67
+ task_ids_failed_to_submit: which Tasks from the above failed to be submitted
68
+ """
69
+ log.info('BatchJobManager, submit_tasks')
70
+
71
+ # cannot execute the scoring script that is in the mounted directory; has to be copied to cwd
72
+ # not luck giving the commandline arguments via formatted string - set as env vars instead
73
+ score_command = '/bin/bash -c \"cp $AZ_BATCH_NODE_MOUNTS_DIR/batch-api/scripts/score.py . && python score.py\" '
74
+
75
+ num_images_per_task = api_config.NUM_IMAGES_PER_TASK
76
+
77
+ # form shards of images and assign each shard to a Task
78
+ num_tasks = math.ceil(num_images / num_images_per_task)
79
+
80
+ # for persisting stdout and stderr
81
+ permissions = ContainerSasPermissions(read=True, write=True, list=True)
82
+ access_duration_hrs = api_config.MONITOR_PERIOD_MINUTES * api_config.MAX_MONITOR_CYCLES / 60
83
+ container_sas_token = generate_container_sas(
84
+ account_name=api_config.STORAGE_ACCOUNT_NAME,
85
+ container_name=api_config.STORAGE_CONTAINER_API,
86
+ account_key=api_config.STORAGE_ACCOUNT_KEY,
87
+ permission=permissions,
88
+ expiry=datetime.utcnow() + timedelta(hours=access_duration_hrs))
89
+ container_sas_url = sas_blob_utils.build_azure_storage_uri(
90
+ account=api_config.STORAGE_ACCOUNT_NAME,
91
+ container=api_config.STORAGE_CONTAINER_API,
92
+ sas_token=container_sas_token)
93
+
94
+ tasks = []
95
+ for task_id in range(num_tasks):
96
+ begin_index = task_id * num_images_per_task
97
+ end_index = begin_index + num_images_per_task
98
+
99
+ # persist stdout and stderr (will be removed when node removed)
100
+ # paths are relative to the Task working directory
101
+ stderr_destination = OutputFileDestination(
102
+ container=OutputFileBlobContainerDestination(
103
+ container_url=container_sas_url,
104
+ path=f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/task_logs/job_{job_id}_task_{task_id}_stderr.txt'
105
+ )
106
+ )
107
+ stdout_destination = OutputFileDestination(
108
+ container=OutputFileBlobContainerDestination(
109
+ container_url=container_sas_url,
110
+ path=f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/task_logs/job_{job_id}_task_{task_id}_stdout.txt'
111
+ )
112
+ )
113
+ std_err_and_out = [
114
+ OutputFile(
115
+ file_pattern='../stderr.txt', # stderr.txt is at the same level as wd
116
+ destination=stderr_destination,
117
+ upload_options=OutputFileUploadOptions(upload_condition=OutputFileUploadCondition.task_completion)
118
+ # can also just upload on failure
119
+ ),
120
+ OutputFile(
121
+ file_pattern='../stdout.txt',
122
+ destination=stdout_destination,
123
+ upload_options=OutputFileUploadOptions(upload_condition=OutputFileUploadCondition.task_completion)
124
+ )
125
+ ]
126
+
127
+ task = TaskAddParameter(
128
+ id=str(task_id),
129
+ command_line=score_command,
130
+ container_settings=TaskContainerSettings(
131
+ image_name=api_config.CONTAINER_IMAGE_NAME,
132
+ working_directory='taskWorkingDirectory'
133
+ ),
134
+ environment_settings=[
135
+ EnvironmentSetting(name='TASK_BEGIN_INDEX', value=begin_index),
136
+ EnvironmentSetting(name='TASK_END_INDEX', value=end_index),
137
+ ],
138
+ output_files=std_err_and_out
139
+ )
140
+ tasks.append(task)
141
+
142
+ # first try submitting Tasks
143
+ task_ids_failed_to_submit = self._create_tasks(job_id, tasks, api_config.NUM_TASKS_PER_SUBMISSION, 1)
144
+
145
+ # retry submitting Tasks
146
+ if len(task_ids_failed_to_submit) > 0:
147
+ task_ids_failed_to_submit_set = set(task_ids_failed_to_submit)
148
+ tasks_to_retry = [t for t in tasks if t.id in task_ids_failed_to_submit_set]
149
+ task_ids_failed_to_submit = self._create_tasks(job_id,
150
+ tasks_to_retry,
151
+ api_config.NUM_TASKS_PER_RESUBMISSION,
152
+ 2)
153
+
154
+ if len(task_ids_failed_to_submit) > 0:
155
+ log.info('BatchJobManager, submit_tasks, after retry, '
156
+ f'len of task_ids_failed_to_submit: {len(task_ids_failed_to_submit)}')
157
+ else:
158
+ log.info('BatchJobManager, submit_tasks, after retry, all Tasks submitted')
159
+ else:
160
+ log.info('BatchJobManager, submit_tasks, all Tasks submitted after first try')
161
+
162
+ # Change the Job's on_all_tasks_complete option to 'terminateJob' so the Job's status changes automatically
163
+ # after all submitted tasks are done
164
+ # This is so that we do not take up the quota for active Jobs in the Batch account.
165
+ job_patch_params = JobPatchParameter(
166
+ on_all_tasks_complete=OnAllTasksComplete.terminate_job
167
+ )
168
+ self.batch_client.job.patch(job_id, job_patch_params)
169
+
170
+ return num_tasks, task_ids_failed_to_submit
171
+
172
+ def _create_tasks(self, job_id, tasks, num_tasks_per_submission, n_th_try) -> list:
173
+ task_ids_failed_to_submit = []
174
+
175
+ for i in range(0, len(tasks), num_tasks_per_submission):
176
+ tasks_to_submit = tasks[i: i + num_tasks_per_submission]
177
+
178
+ # return type: TaskAddCollectionResult
179
+ collection_results = self.batch_client.task.add_collection(job_id, tasks_to_submit, threads=10)
180
+
181
+ for task_result in collection_results.value:
182
+ if task_result.status is not TaskAddStatus.success:
183
+ # actually we should probably only re-submit if it's a server_error
184
+ task_ids_failed_to_submit.append(task_result.task_id)
185
+ log.info(f'task {task_result.task_id} failed to submitted after {n_th_try} try/tries, '
186
+ f'status: {task_result.status}, error: {task_result.error}')
187
+
188
+ return task_ids_failed_to_submit
189
+
190
+ def get_num_completed_tasks(self, job_id: str) -> Tuple[int, int]:
191
+ """
192
+ Returns the number of completed tasks for the job of job_id, as a tuple:
193
+ (number of succeeded jobs, number of failed jobs) - both are considered "completed".=
194
+ """
195
+ # docs: # https://docs.microsoft.com/en-us/rest/api/batchservice/odata-filters-in-batch#list-tasks
196
+ tasks = self.batch_client.task.list(job_id,
197
+ task_list_options=TaskListOptions(
198
+ filter='state eq \'completed\'',
199
+ select='id, executionInfo' # only the id field will be non-empty
200
+ ))
201
+ num_succeeded, num_failed = 0, 0
202
+ for task in tasks:
203
+ exit_code: int = task.execution_info.exit_code
204
+ if exit_code == 0:
205
+ num_succeeded += 1
206
+ else:
207
+ num_failed += 1
208
+ return num_succeeded, num_failed
209
+
210
+ def cancel_batch_job(self, job_id: str):
211
+ self.batch_client.job.terminate(job_id, terminate_reason='APIUserCanceled')
212
+
213
+ def get_num_active_jobs(self) -> int:
214
+ jobs_generator = self.batch_client.job.list(
215
+ job_list_options=JobListOptions(
216
+ filter='state eq \'active\'',
217
+ select='id'
218
+ ))
219
+ jobs_list = [j for j in jobs_generator]
220
+ return len(jobs_list)