megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,110 +1,110 @@
1
-
2
- import os
3
- import json
4
- import io
5
- import random
6
- import requests
7
-
8
- from PIL import Image
9
- from multiprocessing import Pool
10
- from datetime import datetime
11
- from requests_toolbelt import MultipartEncoder
12
- from requests_toolbelt.multipart import decoder
13
-
14
-
15
- ip_address = '100.100.200.200'
16
- port = 5050
17
-
18
- base_url = 'http://{}:{}/v1/camera-trap/sync/'.format(ip_address, port)
19
-
20
-
21
- def call_api(args):
22
- start = datetime.now()
23
-
24
- index, url, params, data, headers = args['index'],args['url'], args['params'], args['data'], args['headers']
25
- print('calling api: {} starttime: {}'.format(index, start))
26
-
27
- response = requests.post(url, params=params, data=data, headers=headers)
28
- elapsed_time = datetime.now() - start
29
- print('\napi {} status code: {}, elapsed time in seconds {}'.format(index, response.status_code, elapsed_time.total_seconds()))
30
-
31
- get_detections(response)
32
- return response
33
-
34
- def get_detections(response):
35
- results = decoder.MultipartDecoder.from_response(response)
36
- text_results = {}
37
- images = {}
38
- for part in results.parts:
39
- # part is a BodyPart object with b'Content-Type', and b'Content-Disposition', the later includes 'name' and 'filename' info
40
- headers = {}
41
- for k, v in part.headers.items():
42
- headers[k.decode(part.encoding)] = v.decode(part.encoding)
43
-
44
- if headers.get('Content-Type', None) == 'application/json':
45
- text_result = json.loads(part.content.decode())
46
-
47
- print(text_result)
48
-
49
-
50
- def test_load(num_requests, params, max_images=1):
51
- requests = []
52
-
53
- # read the images anew for each request
54
- index = 0
55
- for i in range(num_requests):
56
- index += 1
57
- files = {}
58
- sample_input_dir = '../../../api/synchronous/sample_input/test_images'
59
-
60
- image_files = os.listdir(sample_input_dir)
61
- random.shuffle(image_files)
62
-
63
- num_images = 0
64
- for i, image_name in enumerate(image_files):
65
- if not image_name.lower().endswith('.jpg'):
66
- continue
67
-
68
- if num_images >= max_images:
69
- break
70
- else:
71
- num_images += 1
72
-
73
- img_path = os.path.join(sample_input_dir, image_name)
74
- with open(img_path, 'rb') as f:
75
- content = f.read()
76
- files[image_name] = (image_name, content, 'image/jpeg')
77
-
78
- m = MultipartEncoder(fields=files)
79
- args = {
80
- 'index': index,
81
- 'url': base_url + 'detect',
82
- 'params': params,
83
- 'data': m,
84
- 'headers': {'Content-Type': m.content_type}
85
- }
86
- requests.append(args)
87
-
88
- print('starting', num_requests, 'threads...')
89
- # images are read and in each request by the time we call the API in map()
90
- with Pool(num_requests) as pool:
91
- results = pool.map(call_api, requests)
92
-
93
- return results
94
-
95
-
96
- if __name__ == "__main__":
97
- params = {
98
- 'min_confidence': 0.05,
99
- 'min_rendering_confidence': 0.2,
100
- 'render': True
101
- }
102
-
103
- num_requests = 10
104
- max_images = 1
105
-
106
- start = datetime.now()
107
- responses = test_load(num_requests, params, max_images=max_images)
108
- end = datetime.now()
109
- total_time = end - start
1
+
2
+ import os
3
+ import json
4
+ import io
5
+ import random
6
+ import requests
7
+
8
+ from PIL import Image
9
+ from multiprocessing import Pool
10
+ from datetime import datetime
11
+ from requests_toolbelt import MultipartEncoder
12
+ from requests_toolbelt.multipart import decoder
13
+
14
+
15
+ ip_address = '100.100.200.200'
16
+ port = 5050
17
+
18
+ base_url = 'http://{}:{}/v1/camera-trap/sync/'.format(ip_address, port)
19
+
20
+
21
+ def call_api(args):
22
+ start = datetime.now()
23
+
24
+ index, url, params, data, headers = args['index'],args['url'], args['params'], args['data'], args['headers']
25
+ print('calling api: {} starttime: {}'.format(index, start))
26
+
27
+ response = requests.post(url, params=params, data=data, headers=headers)
28
+ elapsed_time = datetime.now() - start
29
+ print('\napi {} status code: {}, elapsed time in seconds {}'.format(index, response.status_code, elapsed_time.total_seconds()))
30
+
31
+ get_detections(response)
32
+ return response
33
+
34
+ def get_detections(response):
35
+ results = decoder.MultipartDecoder.from_response(response)
36
+ text_results = {}
37
+ images = {}
38
+ for part in results.parts:
39
+ # part is a BodyPart object with b'Content-Type', and b'Content-Disposition', the later includes 'name' and 'filename' info
40
+ headers = {}
41
+ for k, v in part.headers.items():
42
+ headers[k.decode(part.encoding)] = v.decode(part.encoding)
43
+
44
+ if headers.get('Content-Type', None) == 'application/json':
45
+ text_result = json.loads(part.content.decode())
46
+
47
+ print(text_result)
48
+
49
+
50
+ def test_load(num_requests, params, max_images=1):
51
+ requests = []
52
+
53
+ # read the images anew for each request
54
+ index = 0
55
+ for i in range(num_requests):
56
+ index += 1
57
+ files = {}
58
+ sample_input_dir = '../../../api/synchronous/sample_input/test_images'
59
+
60
+ image_files = os.listdir(sample_input_dir)
61
+ random.shuffle(image_files)
62
+
63
+ num_images = 0
64
+ for i, image_name in enumerate(image_files):
65
+ if not image_name.lower().endswith('.jpg'):
66
+ continue
67
+
68
+ if num_images >= max_images:
69
+ break
70
+ else:
71
+ num_images += 1
72
+
73
+ img_path = os.path.join(sample_input_dir, image_name)
74
+ with open(img_path, 'rb') as f:
75
+ content = f.read()
76
+ files[image_name] = (image_name, content, 'image/jpeg')
77
+
78
+ m = MultipartEncoder(fields=files)
79
+ args = {
80
+ 'index': index,
81
+ 'url': base_url + 'detect',
82
+ 'params': params,
83
+ 'data': m,
84
+ 'headers': {'Content-Type': m.content_type}
85
+ }
86
+ requests.append(args)
87
+
88
+ print('starting', num_requests, 'threads...')
89
+ # images are read and in each request by the time we call the API in map()
90
+ with Pool(num_requests) as pool:
91
+ results = pool.map(call_api, requests)
92
+
93
+ return results
94
+
95
+
96
+ if __name__ == "__main__":
97
+ params = {
98
+ 'min_confidence': 0.05,
99
+ 'min_rendering_confidence': 0.2,
100
+ 'render': True
101
+ }
102
+
103
+ num_requests = 10
104
+ max_images = 1
105
+
106
+ start = datetime.now()
107
+ responses = test_load(num_requests, params, max_images=max_images)
108
+ end = datetime.now()
109
+ total_time = end - start
110
110
  print('Total time for {} requests: {}'.format(num_requests, total_time))
File without changes
@@ -1,27 +1,16 @@
1
- ########
2
- #
3
- # aggregate_classifier_probs.py
4
- #
5
- # Aggregate probabilities from a classifier's outputs according to a mapping
6
- # from the desired (target) categories to the classifier's categories.
7
- #
8
- # Using the mapping, create a new version of the classifier output CSV with
9
- # probabilities summed within each target category. Also output a new
10
- # "index-to-name" JSON file which identifies the sequential order of the target
11
- # categories.
12
- #
13
- ########
1
+ """
14
2
 
15
- #%% Example usage
3
+ aggregate_classifier_probs.py
16
4
 
17
- """
18
- python aggregate_classifier_probs.py \
19
- classifier_output.csv.gz \
20
- --target-mapping target_to_classifier_labels.json \
21
- --output-csv classifier_output_remapped.csv.gz \
22
- --output-label-index label_index_remapped.json
23
- """
5
+ Aggregate probabilities from a classifier's outputs according to a mapping
6
+ from the desired (target) categories to the classifier's categories.
24
7
 
8
+ Using the mapping, create a new version of the classifier output CSV with
9
+ probabilities summed within each target category. Also output a new
10
+ "index-to-name" JSON file which identifies the sequential order of the target
11
+ categories.
12
+
13
+ """
25
14
 
26
15
  #%% Imports
27
16
 
@@ -33,6 +22,15 @@ import json
33
22
  import pandas as pd
34
23
  from tqdm import tqdm
35
24
 
25
+ #%% Example usage
26
+
27
+ """
28
+ python aggregate_classifier_probs.py \
29
+ classifier_output.csv.gz \
30
+ --target-mapping target_to_classifier_labels.json \
31
+ --output-csv classifier_output_remapped.csv.gz \
32
+ --output-label-index label_index_remapped.json
33
+ """
36
34
 
37
35
  #%% Main function
38
36
 
@@ -46,6 +44,7 @@ def main(classifier_results_csv_path: str,
46
44
  Because the output CSV is often very large, we process it in chunks of 1000
47
45
  rows at a time.
48
46
  """
47
+
49
48
  chunked_df_iterator = pd.read_csv(
50
49
  classifier_results_csv_path, chunksize=1000, float_precision='high',
51
50
  index_col='path')
@@ -81,9 +80,7 @@ def main(classifier_results_csv_path: str,
81
80
  #%% Command-line driver
82
81
 
83
82
  def _parse_args() -> argparse.Namespace:
84
- """
85
- Parses arguments.
86
- """
83
+
87
84
  parser = argparse.ArgumentParser(
88
85
  formatter_class=argparse.ArgumentDefaultsHelpFormatter,
89
86
  description='Aggregate classifier probabilities to target classes.')
@@ -1,20 +1,11 @@
1
- ########
2
- #
3
- # analyze_failed_images.py
4
- #
5
- ########
1
+ """
6
2
 
7
- #%% Example usage
3
+ analyze_failed_images.py
8
4
 
9
- """
10
- python analyze_failed_images.py failed.json \
11
- -a ACCOUNT -c CONTAINER -s SAS_TOKEN
12
5
  """
13
6
 
14
7
  #%% Imports and constants
15
8
 
16
- from __future__ import annotations
17
-
18
9
  import argparse
19
10
  from collections.abc import Mapping, Sequence
20
11
  from concurrent import futures
@@ -31,6 +22,14 @@ from data_management.megadb.megadb_utils import MegadbUtils
31
22
  from md_utils import path_utils
32
23
  from md_utils import sas_blob_utils
33
24
 
25
+
26
+ #%% Example usage
27
+
28
+ """
29
+ python analyze_failed_images.py failed.json \
30
+ -a ACCOUNT -c CONTAINER -s SAS_TOKEN
31
+ """
32
+
34
33
  ImageFile.LOAD_TRUNCATED_IMAGES = False
35
34
 
36
35
 
@@ -191,8 +190,7 @@ def analyze_images(url_or_path: str, json_keys: Optional[Sequence[str]] = None,
191
190
 
192
191
  #%% Command-line driver
193
192
 
194
- def _parse_args() -> argparse.Namespace:
195
-
193
+ def _parse_args() -> argparse.Namespace:
196
194
 
197
195
  parser = argparse.ArgumentParser(
198
196
  description='Analyze a list of images that failed to download or crop.')
@@ -1,54 +1,54 @@
1
- ########
2
- #
3
- # cache_batchapi_outputs.py
4
- #
5
- # Script to cache Batch Detection API outputs.
6
- #
7
- # This script can handle either the Batch Detection API JSON Response or the
8
- # detections JSON.
9
- #
10
- # Batch Detection API Response format:
11
- #
12
- # {
13
- # "Status": {
14
- # "request_status": "completed",
15
- # "message": {
16
- # "num_failed_shards": 0,
17
- # "output_file_urls": {
18
- # "detections": "https://url/to/detections.json",
19
- # "failed_images": "https://url/to/failed_images.json",
20
- # "images": https://url/to/images.json",
21
- # }
22
- # },
23
- # },
24
- # "Endpoint": "/v3/camera-trap/detection-batch/request_detections",
25
- # "TaskId": "ea26326e-7e0d-4524-a9ea-f57a5799d4ba"
26
- # }
27
- #
28
- # Detections JSON format:
29
- #
30
- # {
31
- # "info": {...}
32
- # "detection_categories": {...}
33
- # "classification_categories": {...}
34
- # "images": [
35
- # {
36
- # "file": "path/from/base/dir/image1.jpg",
37
- # "max_detection_conf": 0.926,
38
- # "detections": [{
39
- # "category": "1",
40
- # "conf": 0.061,
41
- # "bbox": [0.0451, 0.1849, 0.3642, 0.4636]
42
- # }]
43
- # }
44
- # ]
45
- # }
46
- #
47
- # Batch Detection API Output Format:
48
- #
49
- # github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#api-outputs
50
- #
51
- ########
1
+ """
2
+
3
+ cache_batchapi_outputs.py
4
+
5
+ Script to cache Batch Detection API outputs.
6
+
7
+ This script can handle either the Batch Detection API JSON Response or the
8
+ detections JSON.
9
+
10
+ Batch Detection API Response format:
11
+
12
+ {
13
+ "Status": {
14
+ "request_status": "completed",
15
+ "message": {
16
+ "num_failed_shards": 0,
17
+ "output_file_urls": {
18
+ "detections": "https://url/to/detections.json",
19
+ "failed_images": "https://url/to/failed_images.json",
20
+ "images": https://url/to/images.json",
21
+ }
22
+ },
23
+ },
24
+ "Endpoint": "/v3/camera-trap/detection-batch/request_detections",
25
+ "TaskId": "ea26326e-7e0d-4524-a9ea-f57a5799d4ba"
26
+ }
27
+
28
+ Detections JSON format:
29
+
30
+ {
31
+ "info": {...}
32
+ "detection_categories": {...}
33
+ "classification_categories": {...}
34
+ "images": [
35
+ {
36
+ "file": "path/from/base/dir/image1.jpg",
37
+ "max_detection_conf": 0.926,
38
+ "detections": [{
39
+ "category": "1",
40
+ "conf": 0.061,
41
+ "bbox": [0.0451, 0.1849, 0.3642, 0.4636]
42
+ }]
43
+ }
44
+ ]
45
+ }
46
+
47
+ Batch Detection API Output Format:
48
+
49
+ github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#api-outputs
50
+
51
+ """
52
52
 
53
53
  #%% Imports
54
54
 
@@ -1,73 +1,63 @@
1
- ########
2
- #
3
- # create_classification_dataset.py
4
- #
5
- # Creates a classification dataset CSV with a corresponding JSON file determining
6
- # the train/val/test split.
7
- #
8
- # This script takes as input a "queried images" JSON file whose keys are paths to
9
- # images and values are dictionaries containing information relevant for training
10
- # a classifier, including labels and (optionally) ground-truth bounding boxes.
11
- # The image paths are in the format `<dataset-name>/<blob-name>` where we assume
12
- # that the dataset name does not contain '/'.
13
- #
14
- # {
15
- # "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
16
- # "dataset": "caltech",
17
- # "location": 13,
18
- # "class": "mountain_lion", # class from dataset
19
- # "bbox": [{"category": "animal",
20
- # "bbox": [0, 0.347, 0.237, 0.257]}], # ground-truth bbox
21
- # "label": ["monutain_lion"] # labels to use in classifier
22
- # },
23
- # "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
24
- # "dataset": "caltech",
25
- # "location": 13,
26
- # "class": "mountain_lion", # class from dataset
27
- # "label": ["monutain_lion"] # labels to use in classifier
28
- # },
29
- # ...
30
- # }
31
- #
32
- # We assume that the tuple (dataset, location) identifies a unique location. In
33
- # other words, we assume that no two datasets have overlapping locations. This
34
- # probably isn't 100% true, but it's pretty much the best we can do in terms of
35
- # avoiding overlapping locations between the train/val/test splits.
36
- #
37
- # This script outputs 3 files to <output_dir>:
38
- #
39
- # 1) classification_ds.csv, contains columns:
40
- #
41
- # - 'path': str, path to cropped images
42
- # - 'dataset': str, name of dataset
43
- # - 'location': str, location that image was taken, as saved in MegaDB
44
- # - 'dataset_class': str, original class assigned to image, as saved in MegaDB
45
- # - 'confidence': float, confidence that this crop is of an actual animal,
46
- # 1.0 if the crop is a "ground truth bounding box" (i.e., from MegaDB),
47
- # <= 1.0 if the bounding box was detected by MegaDetector
48
- # - 'label': str, comma-separated list of label(s) assigned to this crop for
49
- # the sake of classification
50
- #
51
- # 2) label_index.json: maps integer to label name
52
- #
53
- # - keys are string representations of Python integers (JSON requires keys to
54
- # be strings), numbered from 0 to num_labels-1
55
- # - values are strings, label names
56
- #
57
- # 3) splits.json: serialization of a Python dict that maps each split
58
- # ['train', 'val', 'test'] to a list of length-2 lists, where each inner list
59
- # is [<dataset>, <location>]
60
- #
61
- ########
1
+ """
62
2
 
63
- #%% Example usage
3
+ create_classification_dataset.py
4
+
5
+ Creates a classification dataset CSV with a corresponding JSON file determining
6
+ the train/val/test split.
7
+
8
+ This script takes as input a "queried images" JSON file whose keys are paths to
9
+ images and values are dictionaries containing information relevant for training
10
+ a classifier, including labels and (optionally) ground-truth bounding boxes.
11
+ The image paths are in the format `<dataset-name>/<blob-name>` where we assume
12
+ that the dataset name does not contain '/'.
13
+
14
+ {
15
+ "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
16
+ "dataset": "caltech",
17
+ "location": 13,
18
+ "class": "mountain_lion", # class from dataset
19
+ "bbox": [{"category": "animal",
20
+ "bbox": [0, 0.347, 0.237, 0.257]}], # ground-truth bbox
21
+ "label": ["monutain_lion"] # labels to use in classifier
22
+ },
23
+ "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
24
+ "dataset": "caltech",
25
+ "location": 13,
26
+ "class": "mountain_lion", # class from dataset
27
+ "label": ["monutain_lion"] # labels to use in classifier
28
+ },
29
+ ...
30
+ }
31
+
32
+ We assume that the tuple (dataset, location) identifies a unique location. In
33
+ other words, we assume that no two datasets have overlapping locations. This
34
+ probably isn't 100% true, but it's pretty much the best we can do in terms of
35
+ avoiding overlapping locations between the train/val/test splits.
36
+
37
+ This script outputs 3 files to <output_dir>:
38
+
39
+ 1) classification_ds.csv, contains columns:
40
+
41
+ - 'path': str, path to cropped images
42
+ - 'dataset': str, name of dataset
43
+ - 'location': str, location that image was taken, as saved in MegaDB
44
+ - 'dataset_class': str, original class assigned to image, as saved in MegaDB
45
+ - 'confidence': float, confidence that this crop is of an actual animal,
46
+ 1.0 if the crop is a "ground truth bounding box" (i.e., from MegaDB),
47
+ <= 1.0 if the bounding box was detected by MegaDetector
48
+ - 'label': str, comma-separated list of label(s) assigned to this crop for
49
+ the sake of classification
50
+
51
+ 2) label_index.json: maps integer to label name
52
+
53
+ - keys are string representations of Python integers (JSON requires keys to
54
+ be strings), numbered from 0 to num_labels-1
55
+ - values are strings, label names
56
+
57
+ 3) splits.json: serialization of a Python dict that maps each split
58
+ ['train', 'val', 'test'] to a list of length-2 lists, where each inner list
59
+ is [<dataset>, <location>]
64
60
 
65
- """
66
- python create_classification_dataset.py \
67
- run_idfg2 \
68
- --queried-images-json run_idfg2/queried_images.json \
69
- --cropped-images-dir /ssd/crops_sq \
70
- -d $HOME/classifier-training/mdcache -v "4.1" -t 0.8
71
61
  """
72
62
 
73
63
  #%% Imports and constants
@@ -87,6 +77,17 @@ from tqdm import tqdm
87
77
  from classification import detect_and_crop
88
78
 
89
79
 
80
+ #%% Example usage
81
+
82
+ """
83
+ python create_classification_dataset.py \
84
+ run_idfg2 \
85
+ --queried-images-json run_idfg2/queried_images.json \
86
+ --cropped-images-dir /ssd/crops_sq \
87
+ -d $HOME/classifier-training/mdcache -v "4.1" -t 0.8
88
+ """
89
+
90
+
90
91
  DATASET_FILENAME = 'classification_ds.csv'
91
92
  LABEL_INDEX_FILENAME = 'label_index.json'
92
93
  SPLITS_FILENAME = 'splits.json'