megadetector 10.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +701 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +563 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +192 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +665 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +984 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2172 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1604 -0
  81. megadetector/detection/run_tiled_inference.py +1044 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1943 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2140 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +231 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2872 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1766 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1973 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +498 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.15.dist-info/METADATA +115 -0
  144. megadetector-10.0.15.dist-info/RECORD +147 -0
  145. megadetector-10.0.15.dist-info/WHEEL +5 -0
  146. megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.15.dist-info/top_level.txt +1 -0
@@ -0,0 +1,108 @@
1
+ """
2
+
3
+ aggregate_classifier_probs.py
4
+
5
+ Aggregate probabilities from a classifier's outputs according to a mapping
6
+ from the desired (target) categories to the classifier's categories.
7
+
8
+ Using the mapping, create a new version of the classifier output CSV with
9
+ probabilities summed within each target category. Also output a new
10
+ "index-to-name" JSON file which identifies the sequential order of the target
11
+ categories.
12
+
13
+ """
14
+
15
+ #%% Imports
16
+
17
+ from __future__ import annotations
18
+
19
+ import argparse
20
+ import json
21
+
22
+ import pandas as pd
23
+ from tqdm import tqdm
24
+
25
+ #%% Example usage
26
+
27
+ """
28
+ python aggregate_classifier_probs.py \
29
+ classifier_output.csv.gz \
30
+ --target-mapping target_to_classifier_labels.json \
31
+ --output-csv classifier_output_remapped.csv.gz \
32
+ --output-label-index label_index_remapped.json
33
+ """
34
+
35
+ #%% Main function
36
+
37
+ def main(classifier_results_csv_path: str,
38
+ target_mapping_json_path: str,
39
+ output_csv_path: str,
40
+ output_label_index_json_path: str) -> None:
41
+ """
42
+ Main function.
43
+
44
+ Because the output CSV is often very large, we process it in chunks of 1000
45
+ rows at a time.
46
+ """
47
+
48
+ chunked_df_iterator = pd.read_csv(
49
+ classifier_results_csv_path, chunksize=1000, float_precision='high',
50
+ index_col='path')
51
+
52
+ with open(target_mapping_json_path, 'r') as f:
53
+ target_mapping = json.load(f)
54
+ target_names = sorted(target_mapping.keys())
55
+
56
+ all_classifier_labels: set[str] = set()
57
+ for classifier_labels in target_mapping.values():
58
+ assert all_classifier_labels.isdisjoint(classifier_labels)
59
+ all_classifier_labels.update(classifier_labels)
60
+
61
+ for i, chunk_df in tqdm(enumerate(chunked_df_iterator)):
62
+ if i == 0:
63
+ assert set(chunk_df.columns) == all_classifier_labels
64
+ header, mode = True, 'w'
65
+ else:
66
+ header, mode = False, 'a'
67
+
68
+ agg_df = pd.DataFrame(
69
+ data=0., index=chunk_df.index, columns=target_names)
70
+ for target in target_names:
71
+ classifier_labels = target_mapping[target]
72
+ agg_df[target] = chunk_df[classifier_labels].sum(axis=1)
73
+
74
+ agg_df.to_csv(output_csv_path, index=True, header=header, mode=mode)
75
+
76
+ with open(output_label_index_json_path, 'w') as f:
77
+ json.dump(dict(enumerate(target_names)), f, indent=1)
78
+
79
+
80
+ #%% Command-line driver
81
+
82
+ def _parse_args() -> argparse.Namespace:
83
+
84
+ parser = argparse.ArgumentParser(
85
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
86
+ description='Aggregate classifier probabilities to target classes.')
87
+ parser.add_argument(
88
+ 'classifier_results_csv',
89
+ help='path to CSV with classifier probabilities')
90
+ parser.add_argument(
91
+ '-t', '--target-mapping', required=True,
92
+ help='path to JSON file mapping target categories to classifier labels')
93
+ parser.add_argument(
94
+ '-o', '--output-csv', required=True,
95
+ help='path to save output CSV with aggregated probabilities')
96
+ parser.add_argument(
97
+ '-i', '--output-label-index', required=True,
98
+ help='path to save output label index JSON')
99
+ return parser.parse_args()
100
+
101
+
102
+ if __name__ == '__main__':
103
+
104
+ args = _parse_args()
105
+ main(classifier_results_csv_path=args.classifier_results_csv,
106
+ target_mapping_json_path=args.target_mapping,
107
+ output_csv_path=args.output_csv,
108
+ output_label_index_json_path=args.output_label_index)
@@ -0,0 +1,227 @@
1
+ """
2
+
3
+ analyze_failed_images.py
4
+
5
+ """
6
+
7
+ #%% Imports and constants
8
+
9
+ import argparse
10
+ from collections.abc import Mapping, Sequence
11
+ from concurrent import futures
12
+ import json
13
+ from pprint import pprint
14
+ import threading
15
+ from typing import Any, Optional
16
+
17
+ from PIL import Image, ImageFile
18
+ import requests
19
+ from tqdm import tqdm
20
+
21
+ from megadetector.data_management.megadb.megadb_utils import MegadbUtils
22
+ from megadetector.utils import path_utils
23
+ from megadetector.utils import sas_blob_utils
24
+
25
+
26
+ #%% Example usage
27
+
28
+ """
29
+ python analyze_failed_images.py failed.json \
30
+ -a ACCOUNT -c CONTAINER -s SAS_TOKEN
31
+ """
32
+
33
+ ImageFile.LOAD_TRUNCATED_IMAGES = False
34
+
35
+
36
+ #%% Support functions
37
+
38
+ def check_image_condition(img_path: str,
39
+ truncated_images_lock: threading.Lock,
40
+ account: Optional[str] = None,
41
+ container: Optional[str] = None,
42
+ sas_token: Optional[str] = None,
43
+ datasets_table: Optional[Mapping[str, Any]] = None
44
+ ) -> tuple[str, str]:
45
+ """
46
+ Args:
47
+ img_path: str, either <blob_name> if datasets_table is None, or
48
+ <dataset>/<blob_name> if datasets_table is given
49
+ account: str, name of Azure Blob Storage account
50
+ container: str, name of Azure Blob Storage container
51
+ sas_token: str, optional SAS token (without leading '?') if the
52
+ container is not publicly accessible
53
+ datasets_table: dict, maps dataset name to dict of information
54
+
55
+ Returns: (img_file, status) tuple, where status is one of
56
+ 'nonexistent': blob does not exist in the container
57
+ 'non_image': img_file does not have valid file extension
58
+ 'good': image exists and is able to be opened without setting
59
+ ImageFile.LOAD_TRUNCATED_IMAGES=True
60
+ 'truncated': image exists but can only be opened by setting
61
+ ImageFile.LOAD_TRUNCATED_IMAGES=True
62
+ 'bad': image exists, but cannot be opened even when setting
63
+ ImageFile.LOAD_TRUNCATED_IMAGES=True
64
+ """
65
+
66
+ if (account is None) or (container is None) or (datasets_table is not None):
67
+ assert account is None
68
+ assert container is None
69
+ assert sas_token is None
70
+ assert datasets_table is not None
71
+
72
+ dataset, img_file = img_path.split('/', maxsplit=1)
73
+ account = datasets_table[dataset]['storage_account']
74
+ container = datasets_table[dataset]['container']
75
+ sas_token = datasets_table[dataset]['container_sas_key']
76
+ if sas_token[0] == '?': # strip leading '?' from SAS token
77
+ sas_token = sas_token[1:]
78
+ else:
79
+ img_file = img_path
80
+
81
+ if not path_utils.is_image_file(img_file):
82
+ return img_file, 'non_image'
83
+
84
+ blob_url = sas_blob_utils.build_azure_storage_uri(
85
+ account=account, container=container, sas_token=sas_token,
86
+ blob=img_file)
87
+ blob_exists = sas_blob_utils.check_blob_exists(blob_url)
88
+ if not blob_exists:
89
+ return img_file, 'nonexistent'
90
+
91
+ stream, _ = sas_blob_utils.download_blob_to_stream(blob_url)
92
+ stream.seek(0)
93
+ try:
94
+ with truncated_images_lock:
95
+ ImageFile.LOAD_TRUNCATED_IMAGES = False
96
+ with Image.open(stream) as img:
97
+ img.load()
98
+ return img_file, 'good'
99
+ except OSError: # PIL.UnidentifiedImageError is a subclass of OSError
100
+ try:
101
+ stream.seek(0)
102
+ with truncated_images_lock:
103
+ ImageFile.LOAD_TRUNCATED_IMAGES = True
104
+ with Image.open(stream) as img:
105
+ img.load()
106
+ return img_file, 'truncated'
107
+ except Exception as e: # pylint: disable=broad-except
108
+ exception_type = type(e).__name__
109
+ tqdm.write(f'Unable to load {img_file}. {exception_type}: {e}.')
110
+ return img_file, 'bad'
111
+
112
+
113
+ #%% Main function
114
+
115
+ def analyze_images(url_or_path: str, json_keys: Optional[Sequence[str]] = None,
116
+ account: Optional[str] = None,
117
+ container: Optional[str] = None,
118
+ sas_token: Optional[str] = None) -> None:
119
+ """
120
+ Args:
121
+ url_or_path: str, URL or local path to a file containing a list
122
+ of image paths. Each image path is either <blob_name> if account and
123
+ container are given, or <dataset>/<blob_name> if account and
124
+ container are None. File can either be a list of image paths, or a
125
+ JSON file containing image paths.
126
+ json_keys: optional list of str, only relevant if url_or_path is a JSON
127
+ file. If json_keys=None, then the JSON file at url_or_path is
128
+ assumed to be a JSON list of image paths. If json_keys is not None,
129
+ then the JSON file should be a dict, whose values corresponding to
130
+ json_keys are lists of image paths.
131
+ account: str, name of Azure Blob Storage account
132
+ container: str, name of Azure Blob Storage container
133
+ sas_token: str, optional SAS token (without leading '?') if the
134
+ container is not publicly accessible
135
+ """
136
+
137
+ datasets_table = None
138
+ if (account is None) or (container is None):
139
+ assert account is None
140
+ assert container is None
141
+ assert sas_token is None
142
+ datasets_table = MegadbUtils().get_datasets_table()
143
+
144
+ is_json = ('.json' in url_or_path)
145
+ if url_or_path.startswith(('http://', 'https://')):
146
+ r = requests.get(url_or_path)
147
+ if is_json:
148
+ img_paths = r.json()
149
+ else:
150
+ img_paths = r.text.splitlines()
151
+ else:
152
+ with open(url_or_path, 'r') as f:
153
+ if is_json:
154
+ img_paths = json.load(f)
155
+ else:
156
+ img_paths = f.readlines()
157
+
158
+ if is_json and json_keys is not None:
159
+ img_paths_json = img_paths
160
+ img_paths = []
161
+ for k in json_keys:
162
+ img_paths += img_paths_json[k]
163
+
164
+ mapping: dict[str, list[str]] = {
165
+ status: []
166
+ for status in ['good', 'nonexistent', 'non_image', 'truncated', 'bad']
167
+ }
168
+
169
+ pool = futures.ThreadPoolExecutor(max_workers=100)
170
+
171
+ # lock before changing ImageFile.LOAD_TRUNCATED_IMAGES
172
+ truncated_images_lock = threading.Lock()
173
+
174
+ futures_list = []
175
+ for img_path in tqdm(img_paths):
176
+ future = pool.submit(
177
+ check_image_condition, img_path, truncated_images_lock, account,
178
+ container, sas_token, datasets_table)
179
+ futures_list.append(future)
180
+
181
+ total = len(futures_list)
182
+ for future in tqdm(futures.as_completed(futures_list), total=total):
183
+ img_file, status = future.result()
184
+ mapping[status].append(img_file)
185
+
186
+ for status, img_list in mapping.items():
187
+ print(f'{status}: {len(img_list)}')
188
+ pprint(sorted(img_list))
189
+
190
+
191
+ #%% Command-line driver
192
+
193
+ def _parse_args() -> argparse.Namespace:
194
+
195
+ parser = argparse.ArgumentParser(
196
+ description='Analyze a list of images that failed to download or crop.')
197
+ parser.add_argument(
198
+ 'failed_images', metavar='URL_OR_PATH',
199
+ help='URL or path to text or JSON file containing list of image paths')
200
+ parser.add_argument(
201
+ '-k', '--json-keys', nargs='*',
202
+ help='list of keys in JSON file containing image paths')
203
+ parser.add_argument(
204
+ '-a', '--account',
205
+ help='name of Azure Blob Storage account. If not given, then image '
206
+ 'paths are assumed to start with the dataset name, so we can look '
207
+ 'up the account from MegaDB.')
208
+ parser.add_argument(
209
+ '-c', '--container',
210
+ help='name of Azure Blob Storage container. If not given, then image '
211
+ 'paths are assumed to start with the dataset name, so we can look '
212
+ 'up the container from MegaDB.')
213
+ parser.add_argument(
214
+ '-s', '--sas-token',
215
+ help='optional SAS token (without leading "?") if the container is not '
216
+ 'publicly accessible. If account and container not given, then '
217
+ 'image paths are assumed to start with the dataset name, so we '
218
+ 'can look up the SAS Token from MegaDB.')
219
+ return parser.parse_args()
220
+
221
+
222
+ if __name__ == '__main__':
223
+
224
+ args = _parse_args()
225
+ analyze_images(url_or_path=args.failed_images, json_keys=args.json_keys,
226
+ account=args.account, container=args.container,
227
+ sas_token=args.sas_token)
@@ -0,0 +1,198 @@
1
+ """
2
+
3
+ cache_batchapi_outputs.py
4
+
5
+ Script to cache Batch Detection API outputs.
6
+
7
+ This script can handle either the Batch Detection API JSON Response or the
8
+ detections JSON.
9
+
10
+ Batch Detection API Response format:
11
+
12
+ {
13
+ "Status": {
14
+ "request_status": "completed",
15
+ "message": {
16
+ "num_failed_shards": 0,
17
+ "output_file_urls": {
18
+ "detections": "https://url/to/detections.json",
19
+ "failed_images": "https://url/to/failed_images.json",
20
+ "images": https://url/to/images.json",
21
+ }
22
+ },
23
+ },
24
+ "Endpoint": "/v3/camera-trap/detection-batch/request_detections",
25
+ "TaskId": "ea26326e-7e0d-4524-a9ea-f57a5799d4ba"
26
+ }
27
+
28
+ Detections JSON format:
29
+
30
+ {
31
+ "info": {...}
32
+ "detection_categories": {...}
33
+ "classification_categories": {...}
34
+ "images": [
35
+ {
36
+ "file": "path/from/base/dir/image1.jpg",
37
+ "max_detection_conf": 0.926,
38
+ "detections": [{
39
+ "category": "1",
40
+ "conf": 0.061,
41
+ "bbox": [0.0451, 0.1849, 0.3642, 0.4636]
42
+ }]
43
+ }
44
+ ]
45
+ }
46
+
47
+ Batch Detection API Output Format:
48
+
49
+ github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_processing#api-outputs
50
+
51
+ """
52
+
53
+ #%% Imports
54
+
55
+ from __future__ import annotations
56
+
57
+ import argparse
58
+ from collections.abc import Mapping
59
+ import json
60
+ import os
61
+ from typing import Any, Optional
62
+
63
+ import requests
64
+
65
+ from api.batch_processing.data_preparation.prepare_api_submission import (
66
+ TaskStatus, Task)
67
+ from api.batch_processing.postprocessing.combine_api_outputs import (
68
+ combine_api_output_dictionaries)
69
+ from megadetector.utils import ct_utils
70
+
71
+
72
+ #%% Support functions
73
+
74
+ def cache_json(json_path: str,
75
+ is_detections: bool,
76
+ dataset: str,
77
+ detector_output_cache_base_dir: str,
78
+ detector_version: Optional[str]) -> None:
79
+ """
80
+ Args:
81
+ json_path: str, path to JSON file
82
+ is_detections: bool, True if <json_path> is a detections JSON file,
83
+ False if <json_path> is a API response JSON file
84
+ dataset: str
85
+ detector_output_cache_base_dir: str
86
+ detector_version: str
87
+ """
88
+
89
+ with open(json_path, 'r') as f:
90
+ js = json.load(f)
91
+
92
+ if is_detections:
93
+ detections = js
94
+
95
+ else:
96
+ response = js
97
+
98
+ # task finished successfully
99
+ status = TaskStatus(response['Status']['request_status'])
100
+ assert status == TaskStatus.COMPLETED
101
+
102
+ # parse the task ID
103
+ task_id = response['TaskId']
104
+
105
+ message = response['Status']['message']
106
+ detections_url = message['output_file_urls']['detections']
107
+ assert detections_url.split('/')[-2] == task_id
108
+
109
+ # print info about missing and failed images
110
+ task = Task(name=task_id, task_id=task_id)
111
+ task.response = response
112
+ task.status = status
113
+ task.get_missing_images(verbose=True)
114
+
115
+ # get the detections
116
+ detections = requests.get(detections_url).json()
117
+
118
+ # add detections to the detections cache
119
+ api_det_version = detections['info']['detector'].rsplit('v', maxsplit=1)[1]
120
+ if detector_version is not None:
121
+ assert api_det_version == detector_version
122
+ detector_output_cache_dir = os.path.join(
123
+ detector_output_cache_base_dir, f'v{api_det_version}')
124
+ msg = cache_detections(
125
+ detections=detections, dataset=dataset,
126
+ detector_output_cache_dir=detector_output_cache_dir)
127
+ print(msg)
128
+
129
+
130
+ def cache_detections(detections: Mapping[str, Any], dataset: str,
131
+ detector_output_cache_dir: str) -> str:
132
+ """
133
+ Args:
134
+ detections: dict, represents JSON output of detector
135
+ dataset: str, name of dataset
136
+ detector_output_cache_dir: str, path to folder where detector outputs
137
+ are cached, stored as 1 JSON file per dataset, directory must
138
+ already exist
139
+
140
+ Returns: str, message
141
+ """
142
+
143
+ # combine detections with cache
144
+ dataset_cache_path = os.path.join(
145
+ detector_output_cache_dir, f'{dataset}.json')
146
+ merged_dataset_cache: Mapping[str, Any]
147
+ if os.path.exists(dataset_cache_path):
148
+ with open(dataset_cache_path, 'r') as f:
149
+ dataset_cache = json.load(f)
150
+ merged_dataset_cache = combine_api_output_dictionaries(
151
+ input_dicts=[dataset_cache, detections], require_uniqueness=False)
152
+ msg = f'Merging detection output with {dataset_cache_path}'
153
+ else:
154
+ merged_dataset_cache = detections
155
+ msg = ('No cached detection outputs found. Saving detection output to '
156
+ f'{dataset_cache_path}')
157
+
158
+ # write combined detections back out to cache
159
+ ct_utils.write_json(dataset_cache_path, merged_dataset_cache)
160
+ return msg
161
+
162
+
163
+ #%% Command-line driver
164
+
165
+ def _parse_args() -> argparse.Namespace:
166
+
167
+ parser = argparse.ArgumentParser(
168
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
169
+ description='Caches detector outputs.')
170
+ parser.add_argument(
171
+ 'json_file',
172
+ help='path to JSON file containing response of Batch Detection API')
173
+ parser.add_argument(
174
+ '-f', '--format', choices=['response', 'detections'], required=True,
175
+ help='(required) whether <json_file> is a Batch API response or a '
176
+ 'detections JSON file')
177
+ parser.add_argument(
178
+ '-d', '--dataset', required=True,
179
+ help='(required) name of dataset corresponding to the API task')
180
+ parser.add_argument(
181
+ '-c', '--detector-output-cache-dir', required=True,
182
+ help='(required) path to directory where detector outputs are cached')
183
+ parser.add_argument(
184
+ '-v', '--detector-version',
185
+ help='detector version string, e.g., "4.1", inferred from detections '
186
+ 'file if not given')
187
+ return parser.parse_args()
188
+
189
+
190
+ if __name__ == '__main__':
191
+
192
+ args = _parse_args()
193
+ cache_json(
194
+ json_path=args.json_file,
195
+ is_detections=(args.format == 'detections'),
196
+ dataset=args.dataset,
197
+ detector_output_cache_base_dir=args.detector_output_cache_dir,
198
+ detector_version=args.detector_version)