megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.9.dist-info/RECORD +0 -224
  214. megadetector-5.0.9.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
File without changes
@@ -1,110 +0,0 @@
1
-
2
- import os
3
- import json
4
- import io
5
- import random
6
- import requests
7
-
8
- from PIL import Image
9
- from multiprocessing import Pool
10
- from datetime import datetime
11
- from requests_toolbelt import MultipartEncoder
12
- from requests_toolbelt.multipart import decoder
13
-
14
-
15
- ip_address = '100.100.200.200'
16
- port = 5050
17
-
18
- base_url = 'http://{}:{}/v1/camera-trap/sync/'.format(ip_address, port)
19
-
20
-
21
- def call_api(args):
22
- start = datetime.now()
23
-
24
- index, url, params, data, headers = args['index'],args['url'], args['params'], args['data'], args['headers']
25
- print('calling api: {} starttime: {}'.format(index, start))
26
-
27
- response = requests.post(url, params=params, data=data, headers=headers)
28
- elapsed_time = datetime.now() - start
29
- print('\napi {} status code: {}, elapsed time in seconds {}'.format(index, response.status_code, elapsed_time.total_seconds()))
30
-
31
- get_detections(response)
32
- return response
33
-
34
- def get_detections(response):
35
- results = decoder.MultipartDecoder.from_response(response)
36
- text_results = {}
37
- images = {}
38
- for part in results.parts:
39
- # part is a BodyPart object with b'Content-Type', and b'Content-Disposition', the later includes 'name' and 'filename' info
40
- headers = {}
41
- for k, v in part.headers.items():
42
- headers[k.decode(part.encoding)] = v.decode(part.encoding)
43
-
44
- if headers.get('Content-Type', None) == 'application/json':
45
- text_result = json.loads(part.content.decode())
46
-
47
- print(text_result)
48
-
49
-
50
- def test_load(num_requests, params, max_images=1):
51
- requests = []
52
-
53
- # read the images anew for each request
54
- index = 0
55
- for i in range(num_requests):
56
- index += 1
57
- files = {}
58
- sample_input_dir = '../../../api/synchronous/sample_input/test_images'
59
-
60
- image_files = os.listdir(sample_input_dir)
61
- random.shuffle(image_files)
62
-
63
- num_images = 0
64
- for i, image_name in enumerate(image_files):
65
- if not image_name.lower().endswith('.jpg'):
66
- continue
67
-
68
- if num_images >= max_images:
69
- break
70
- else:
71
- num_images += 1
72
-
73
- img_path = os.path.join(sample_input_dir, image_name)
74
- with open(img_path, 'rb') as f:
75
- content = f.read()
76
- files[image_name] = (image_name, content, 'image/jpeg')
77
-
78
- m = MultipartEncoder(fields=files)
79
- args = {
80
- 'index': index,
81
- 'url': base_url + 'detect',
82
- 'params': params,
83
- 'data': m,
84
- 'headers': {'Content-Type': m.content_type}
85
- }
86
- requests.append(args)
87
-
88
- print('starting', num_requests, 'threads...')
89
- # images are read and in each request by the time we call the API in map()
90
- with Pool(num_requests) as pool:
91
- results = pool.map(call_api, requests)
92
-
93
- return results
94
-
95
-
96
- if __name__ == "__main__":
97
- params = {
98
- 'min_confidence': 0.05,
99
- 'min_rendering_confidence': 0.2,
100
- 'render': True
101
- }
102
-
103
- num_requests = 10
104
- max_images = 1
105
-
106
- start = datetime.now()
107
- responses = test_load(num_requests, params, max_images=max_images)
108
- end = datetime.now()
109
- total_time = end - start
110
- print('Total time for {} requests: {}'.format(num_requests, total_time))
File without changes
@@ -1,108 +0,0 @@
1
- """
2
-
3
- aggregate_classifier_probs.py
4
-
5
- Aggregate probabilities from a classifier's outputs according to a mapping
6
- from the desired (target) categories to the classifier's categories.
7
-
8
- Using the mapping, create a new version of the classifier output CSV with
9
- probabilities summed within each target category. Also output a new
10
- "index-to-name" JSON file which identifies the sequential order of the target
11
- categories.
12
-
13
- """
14
-
15
- #%% Imports
16
-
17
- from __future__ import annotations
18
-
19
- import argparse
20
- import json
21
-
22
- import pandas as pd
23
- from tqdm import tqdm
24
-
25
- #%% Example usage
26
-
27
- """
28
- python aggregate_classifier_probs.py \
29
- classifier_output.csv.gz \
30
- --target-mapping target_to_classifier_labels.json \
31
- --output-csv classifier_output_remapped.csv.gz \
32
- --output-label-index label_index_remapped.json
33
- """
34
-
35
- #%% Main function
36
-
37
- def main(classifier_results_csv_path: str,
38
- target_mapping_json_path: str,
39
- output_csv_path: str,
40
- output_label_index_json_path: str) -> None:
41
- """
42
- Main function.
43
-
44
- Because the output CSV is often very large, we process it in chunks of 1000
45
- rows at a time.
46
- """
47
-
48
- chunked_df_iterator = pd.read_csv(
49
- classifier_results_csv_path, chunksize=1000, float_precision='high',
50
- index_col='path')
51
-
52
- with open(target_mapping_json_path, 'r') as f:
53
- target_mapping = json.load(f)
54
- target_names = sorted(target_mapping.keys())
55
-
56
- all_classifier_labels: set[str] = set()
57
- for classifier_labels in target_mapping.values():
58
- assert all_classifier_labels.isdisjoint(classifier_labels)
59
- all_classifier_labels.update(classifier_labels)
60
-
61
- for i, chunk_df in tqdm(enumerate(chunked_df_iterator)):
62
- if i == 0:
63
- assert set(chunk_df.columns) == all_classifier_labels
64
- header, mode = True, 'w'
65
- else:
66
- header, mode = False, 'a'
67
-
68
- agg_df = pd.DataFrame(
69
- data=0., index=chunk_df.index, columns=target_names)
70
- for target in target_names:
71
- classifier_labels = target_mapping[target]
72
- agg_df[target] = chunk_df[classifier_labels].sum(axis=1)
73
-
74
- agg_df.to_csv(output_csv_path, index=True, header=header, mode=mode)
75
-
76
- with open(output_label_index_json_path, 'w') as f:
77
- json.dump(dict(enumerate(target_names)), f, indent=1)
78
-
79
-
80
- #%% Command-line driver
81
-
82
- def _parse_args() -> argparse.Namespace:
83
-
84
- parser = argparse.ArgumentParser(
85
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
86
- description='Aggregate classifier probabilities to target classes.')
87
- parser.add_argument(
88
- 'classifier_results_csv',
89
- help='path to CSV with classifier probabilities')
90
- parser.add_argument(
91
- '-t', '--target-mapping', required=True,
92
- help='path to JSON file mapping target categories to classifier labels')
93
- parser.add_argument(
94
- '-o', '--output-csv', required=True,
95
- help='path to save output CSV with aggregated probabilities')
96
- parser.add_argument(
97
- '-i', '--output-label-index', required=True,
98
- help='path to save output label index JSON')
99
- return parser.parse_args()
100
-
101
-
102
- if __name__ == '__main__':
103
-
104
- args = _parse_args()
105
- main(classifier_results_csv_path=args.classifier_results_csv,
106
- target_mapping_json_path=args.target_mapping,
107
- output_csv_path=args.output_csv,
108
- output_label_index_json_path=args.output_label_index)
@@ -1,227 +0,0 @@
1
- """
2
-
3
- analyze_failed_images.py
4
-
5
- """
6
-
7
- #%% Imports and constants
8
-
9
- import argparse
10
- from collections.abc import Mapping, Sequence
11
- from concurrent import futures
12
- import json
13
- from pprint import pprint
14
- import threading
15
- from typing import Any, Optional
16
-
17
- from PIL import Image, ImageFile
18
- import requests
19
- from tqdm import tqdm
20
-
21
- from data_management.megadb.megadb_utils import MegadbUtils
22
- from md_utils import path_utils
23
- from md_utils import sas_blob_utils
24
-
25
-
26
- #%% Example usage
27
-
28
- """
29
- python analyze_failed_images.py failed.json \
30
- -a ACCOUNT -c CONTAINER -s SAS_TOKEN
31
- """
32
-
33
- ImageFile.LOAD_TRUNCATED_IMAGES = False
34
-
35
-
36
- #%% Support functions
37
-
38
- def check_image_condition(img_path: str,
39
- truncated_images_lock: threading.Lock,
40
- account: Optional[str] = None,
41
- container: Optional[str] = None,
42
- sas_token: Optional[str] = None,
43
- datasets_table: Optional[Mapping[str, Any]] = None
44
- ) -> tuple[str, str]:
45
- """
46
- Args:
47
- img_path: str, either <blob_name> if datasets_table is None, or
48
- <dataset>/<blob_name> if datasets_table is given
49
- account: str, name of Azure Blob Storage account
50
- container: str, name of Azure Blob Storage container
51
- sas_token: str, optional SAS token (without leading '?') if the
52
- container is not publicly accessible
53
- datasets_table: dict, maps dataset name to dict of information
54
-
55
- Returns: (img_file, status) tuple, where status is one of
56
- 'nonexistent': blob does not exist in the container
57
- 'non_image': img_file does not have valid file extension
58
- 'good': image exists and is able to be opened without setting
59
- ImageFile.LOAD_TRUNCATED_IMAGES=True
60
- 'truncated': image exists but can only be opened by setting
61
- ImageFile.LOAD_TRUNCATED_IMAGES=True
62
- 'bad': image exists, but cannot be opened even when setting
63
- ImageFile.LOAD_TRUNCATED_IMAGES=True
64
- """
65
-
66
- if (account is None) or (container is None) or (datasets_table is not None):
67
- assert account is None
68
- assert container is None
69
- assert sas_token is None
70
- assert datasets_table is not None
71
-
72
- dataset, img_file = img_path.split('/', maxsplit=1)
73
- account = datasets_table[dataset]['storage_account']
74
- container = datasets_table[dataset]['container']
75
- sas_token = datasets_table[dataset]['container_sas_key']
76
- if sas_token[0] == '?': # strip leading '?' from SAS token
77
- sas_token = sas_token[1:]
78
- else:
79
- img_file = img_path
80
-
81
- if not path_utils.is_image_file(img_file):
82
- return img_file, 'non_image'
83
-
84
- blob_url = sas_blob_utils.build_azure_storage_uri(
85
- account=account, container=container, sas_token=sas_token,
86
- blob=img_file)
87
- blob_exists = sas_blob_utils.check_blob_exists(blob_url)
88
- if not blob_exists:
89
- return img_file, 'nonexistent'
90
-
91
- stream, _ = sas_blob_utils.download_blob_to_stream(blob_url)
92
- stream.seek(0)
93
- try:
94
- with truncated_images_lock:
95
- ImageFile.LOAD_TRUNCATED_IMAGES = False
96
- with Image.open(stream) as img:
97
- img.load()
98
- return img_file, 'good'
99
- except OSError: # PIL.UnidentifiedImageError is a subclass of OSError
100
- try:
101
- stream.seek(0)
102
- with truncated_images_lock:
103
- ImageFile.LOAD_TRUNCATED_IMAGES = True
104
- with Image.open(stream) as img:
105
- img.load()
106
- return img_file, 'truncated'
107
- except Exception as e: # pylint: disable=broad-except
108
- exception_type = type(e).__name__
109
- tqdm.write(f'Unable to load {img_file}. {exception_type}: {e}.')
110
- return img_file, 'bad'
111
-
112
-
113
- #%% Main function
114
-
115
- def analyze_images(url_or_path: str, json_keys: Optional[Sequence[str]] = None,
116
- account: Optional[str] = None,
117
- container: Optional[str] = None,
118
- sas_token: Optional[str] = None) -> None:
119
- """
120
- Args:
121
- url_or_path: str, URL or local path to a file containing a list
122
- of image paths. Each image path is either <blob_name> if account and
123
- container are given, or <dataset>/<blob_name> if account and
124
- container are None. File can either be a list of image paths, or a
125
- JSON file containing image paths.
126
- json_keys: optional list of str, only relevant if url_or_path is a JSON
127
- file. If json_keys=None, then the JSON file at url_or_path is
128
- assumed to be a JSON list of image paths. If json_keys is not None,
129
- then the JSON file should be a dict, whose values corresponding to
130
- json_keys are lists of image paths.
131
- account: str, name of Azure Blob Storage account
132
- container: str, name of Azure Blob Storage container
133
- sas_token: str, optional SAS token (without leading '?') if the
134
- container is not publicly accessible
135
- """
136
-
137
- datasets_table = None
138
- if (account is None) or (container is None):
139
- assert account is None
140
- assert container is None
141
- assert sas_token is None
142
- datasets_table = MegadbUtils().get_datasets_table()
143
-
144
- is_json = ('.json' in url_or_path)
145
- if url_or_path.startswith(('http://', 'https://')):
146
- r = requests.get(url_or_path)
147
- if is_json:
148
- img_paths = r.json()
149
- else:
150
- img_paths = r.text.splitlines()
151
- else:
152
- with open(url_or_path, 'r') as f:
153
- if is_json:
154
- img_paths = json.load(f)
155
- else:
156
- img_paths = f.readlines()
157
-
158
- if is_json and json_keys is not None:
159
- img_paths_json = img_paths
160
- img_paths = []
161
- for k in json_keys:
162
- img_paths += img_paths_json[k]
163
-
164
- mapping: dict[str, list[str]] = {
165
- status: []
166
- for status in ['good', 'nonexistent', 'non_image', 'truncated', 'bad']
167
- }
168
-
169
- pool = futures.ThreadPoolExecutor(max_workers=100)
170
-
171
- # lock before changing ImageFile.LOAD_TRUNCATED_IMAGES
172
- truncated_images_lock = threading.Lock()
173
-
174
- futures_list = []
175
- for img_path in tqdm(img_paths):
176
- future = pool.submit(
177
- check_image_condition, img_path, truncated_images_lock, account,
178
- container, sas_token, datasets_table)
179
- futures_list.append(future)
180
-
181
- total = len(futures_list)
182
- for future in tqdm(futures.as_completed(futures_list), total=total):
183
- img_file, status = future.result()
184
- mapping[status].append(img_file)
185
-
186
- for status, img_list in mapping.items():
187
- print(f'{status}: {len(img_list)}')
188
- pprint(sorted(img_list))
189
-
190
-
191
- #%% Command-line driver
192
-
193
- def _parse_args() -> argparse.Namespace:
194
-
195
- parser = argparse.ArgumentParser(
196
- description='Analyze a list of images that failed to download or crop.')
197
- parser.add_argument(
198
- 'failed_images', metavar='URL_OR_PATH',
199
- help='URL or path to text or JSON file containing list of image paths')
200
- parser.add_argument(
201
- '-k', '--json-keys', nargs='*',
202
- help='list of keys in JSON file containing image paths')
203
- parser.add_argument(
204
- '-a', '--account',
205
- help='name of Azure Blob Storage account. If not given, then image '
206
- 'paths are assumed to start with the dataset name, so we can look '
207
- 'up the account from MegaDB.')
208
- parser.add_argument(
209
- '-c', '--container',
210
- help='name of Azure Blob Storage container. If not given, then image '
211
- 'paths are assumed to start with the dataset name, so we can look '
212
- 'up the container from MegaDB.')
213
- parser.add_argument(
214
- '-s', '--sas-token',
215
- help='optional SAS token (without leading "?") if the container is not '
216
- 'publicly accessible. If account and container not given, then '
217
- 'image paths are assumed to start with the dataset name, so we '
218
- 'can look up the SAS Token from MegaDB.')
219
- return parser.parse_args()
220
-
221
-
222
- if __name__ == '__main__':
223
-
224
- args = _parse_args()
225
- analyze_images(url_or_path=args.failed_images, json_keys=args.json_keys,
226
- account=args.account, container=args.container,
227
- sas_token=args.sas_token)
@@ -1,198 +0,0 @@
1
- """
2
-
3
- cache_batchapi_outputs.py
4
-
5
- Script to cache Batch Detection API outputs.
6
-
7
- This script can handle either the Batch Detection API JSON Response or the
8
- detections JSON.
9
-
10
- Batch Detection API Response format:
11
-
12
- {
13
- "Status": {
14
- "request_status": "completed",
15
- "message": {
16
- "num_failed_shards": 0,
17
- "output_file_urls": {
18
- "detections": "https://url/to/detections.json",
19
- "failed_images": "https://url/to/failed_images.json",
20
- "images": https://url/to/images.json",
21
- }
22
- },
23
- },
24
- "Endpoint": "/v3/camera-trap/detection-batch/request_detections",
25
- "TaskId": "ea26326e-7e0d-4524-a9ea-f57a5799d4ba"
26
- }
27
-
28
- Detections JSON format:
29
-
30
- {
31
- "info": {...}
32
- "detection_categories": {...}
33
- "classification_categories": {...}
34
- "images": [
35
- {
36
- "file": "path/from/base/dir/image1.jpg",
37
- "max_detection_conf": 0.926,
38
- "detections": [{
39
- "category": "1",
40
- "conf": 0.061,
41
- "bbox": [0.0451, 0.1849, 0.3642, 0.4636]
42
- }]
43
- }
44
- ]
45
- }
46
-
47
- Batch Detection API Output Format:
48
-
49
- github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#api-outputs
50
-
51
- """
52
-
53
- #%% Imports
54
-
55
- from __future__ import annotations
56
-
57
- import argparse
58
- from collections.abc import Mapping
59
- import json
60
- import os
61
- from typing import Any, Optional
62
-
63
- import requests
64
-
65
- from api.batch_processing.data_preparation.prepare_api_submission import (
66
- TaskStatus, Task)
67
- from api.batch_processing.postprocessing.combine_api_outputs import (
68
- combine_api_output_dictionaries)
69
-
70
-
71
- #%% Support functions
72
-
73
- def cache_json(json_path: str,
74
- is_detections: bool,
75
- dataset: str,
76
- detector_output_cache_base_dir: str,
77
- detector_version: Optional[str]) -> None:
78
- """
79
- Args:
80
- json_path: str, path to JSON file
81
- is_detections: bool, True if <json_path> is a detections JSON file,
82
- False if <json_path> is a API response JSON file
83
- dataset: str
84
- detector_output_cache_base_dir: str
85
- detector_version: str
86
- """
87
-
88
- with open(json_path, 'r') as f:
89
- js = json.load(f)
90
-
91
- if is_detections:
92
- detections = js
93
-
94
- else:
95
- response = js
96
-
97
- # task finished successfully
98
- status = TaskStatus(response['Status']['request_status'])
99
- assert status == TaskStatus.COMPLETED
100
-
101
- # parse the task ID
102
- task_id = response['TaskId']
103
-
104
- message = response['Status']['message']
105
- detections_url = message['output_file_urls']['detections']
106
- assert detections_url.split('/')[-2] == task_id
107
-
108
- # print info about missing and failed images
109
- task = Task(name=task_id, task_id=task_id)
110
- task.response = response
111
- task.status = status
112
- task.get_missing_images(verbose=True)
113
-
114
- # get the detections
115
- detections = requests.get(detections_url).json()
116
-
117
- # add detections to the detections cache
118
- api_det_version = detections['info']['detector'].rsplit('v', maxsplit=1)[1]
119
- if detector_version is not None:
120
- assert api_det_version == detector_version
121
- detector_output_cache_dir = os.path.join(
122
- detector_output_cache_base_dir, f'v{api_det_version}')
123
- msg = cache_detections(
124
- detections=detections, dataset=dataset,
125
- detector_output_cache_dir=detector_output_cache_dir)
126
- print(msg)
127
-
128
-
129
- def cache_detections(detections: Mapping[str, Any], dataset: str,
130
- detector_output_cache_dir: str) -> str:
131
- """
132
- Args:
133
- detections: dict, represents JSON output of detector
134
- dataset: str, name of dataset
135
- detector_output_cache_dir: str, path to folder where detector outputs
136
- are cached, stored as 1 JSON file per dataset, directory must
137
- already exist
138
-
139
- Returns: str, message
140
- """
141
-
142
- # combine detections with cache
143
- dataset_cache_path = os.path.join(
144
- detector_output_cache_dir, f'{dataset}.json')
145
- merged_dataset_cache: Mapping[str, Any]
146
- if os.path.exists(dataset_cache_path):
147
- with open(dataset_cache_path, 'r') as f:
148
- dataset_cache = json.load(f)
149
- merged_dataset_cache = combine_api_output_dictionaries(
150
- input_dicts=[dataset_cache, detections], require_uniqueness=False)
151
- msg = f'Merging detection output with {dataset_cache_path}'
152
- else:
153
- merged_dataset_cache = detections
154
- msg = ('No cached detection outputs found. Saving detection output to '
155
- f'{dataset_cache_path}')
156
-
157
- # write combined detections back out to cache
158
- with open(dataset_cache_path, 'w') as f:
159
- json.dump(merged_dataset_cache, f, indent=1)
160
- return msg
161
-
162
-
163
- #%% Command-line driver
164
-
165
- def _parse_args() -> argparse.Namespace:
166
-
167
- parser = argparse.ArgumentParser(
168
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
169
- description='Caches detector outputs.')
170
- parser.add_argument(
171
- 'json_file',
172
- help='path to JSON file containing response of Batch Detection API')
173
- parser.add_argument(
174
- '-f', '--format', choices=['response', 'detections'], required=True,
175
- help='(required) whether <json_file> is a Batch API response or a '
176
- 'detections JSON file')
177
- parser.add_argument(
178
- '-d', '--dataset', required=True,
179
- help='(required) name of dataset corresponding to the API task')
180
- parser.add_argument(
181
- '-c', '--detector-output-cache-dir', required=True,
182
- help='(required) path to directory where detector outputs are cached')
183
- parser.add_argument(
184
- '-v', '--detector-version',
185
- help='detector version string, e.g., "4.1", inferred from detections '
186
- 'file if not given')
187
- return parser.parse_args()
188
-
189
-
190
- if __name__ == '__main__':
191
-
192
- args = _parse_args()
193
- cache_json(
194
- json_path=args.json_file,
195
- is_detections=(args.format == 'detections'),
196
- dataset=args.dataset,
197
- detector_output_cache_base_dir=args.detector_output_cache_dir,
198
- detector_version=args.detector_version)