megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
api/__init__.py ADDED
File without changes
File without changes
File without changes
@@ -435,6 +435,5 @@ def main():
435
435
  with open(task_output_path, 'w', encoding='utf-8') as f:
436
436
  json.dump(detections, f, ensure_ascii=False)
437
437
 
438
-
439
438
  if __name__ == '__main__':
440
439
  main()
@@ -148,6 +148,5 @@ class TestJobStatusTable(unittest.TestCase):
148
148
  item_read = table.read_job_status(job_id)
149
149
  self.assertIsNone(item_read)
150
150
 
151
-
152
151
  if __name__ == '__main__':
153
152
  unittest.main()
File without changes
@@ -42,6 +42,5 @@ def main():
42
42
  output_file_urls_str = json.dumps(output_file_urls)
43
43
  print(output_file_urls_str)
44
44
 
45
-
46
45
  if __name__ == '__main__':
47
46
  main()
File without changes
@@ -148,6 +148,5 @@ def main():
148
148
  send_message()
149
149
  time.sleep(24 * 60 * 60)
150
150
 
151
-
152
151
  if __name__ == '__main__':
153
152
  main()
File without changes
@@ -1,64 +1,64 @@
1
- ########
2
- #
3
- # manage_local_batch.py
4
- #
5
- # Semi-automated process for managing a local MegaDetector job, including
6
- # standard postprocessing steps.
7
- #
8
- # This script is not intended to be run from top to bottom like a typical Python script,
9
- # it's a notebook disguised with a .py extension. It's the Bestest Most Awesome way to
10
- # run MegaDetector, but it's also pretty subtle; if you want to play with this, you might
11
- # want to check in with cameratraps@lila.science for some tips. Otherwise... YMMV.
12
- #
13
- # Some general notes on using this script, which I do in Spyder, though everything will be
14
- # the same if you are reading this in Jupyter Notebook (using the .ipynb version of the
15
- # script):
16
- #
17
- # * Typically when I have a MegaDetector job to run, I make a copy of this script. Let's
18
- # say I'm running a job for an organization called "bibblebop"; I have a big folder of
19
- # job-specific copies of this script, and I might save a new one called "bibblebop-2023-07-26.py"
20
- # (the filename doesn't matter, it just helps me keep these organized).
21
- #
22
- # * There are three variables you need to set in this script before you start running code:
23
- # "input_path", "organization_name_short", and "job_date". You will get a sensible error if you forget
24
- # to set any of these. In this case I might set those to "/data/bibblebobcamerastuff",
25
- # "bibblebop", and "2023-07-26", respectively.
26
- #
27
- # * The defaults assume you want to split the job into two tasks (this is the default because I have
28
- # two GPUs). Nothing bad will happen if you do this on a zero-GPU or single-GPU machine, but if you
29
- # want everything to run in one logical task, change "n_gpus" and "n_jobs" to 1 (instead of 2).
30
- #
31
- # * After setting the required variables, I run the first few cells - up to and including the one
32
- # called "Generate commands" - which collectively take basically zero seconds. After you run the
33
- # "Generate commands" cell, you will have a folder that looks something like:
34
- #
35
- # ~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
36
- #
37
- # On Windows, this means:
38
- #
39
- # ~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
40
- #
41
- # Everything related to this job - scripts, outputs, intermediate stuff - will be in this folder.
42
- # Specifically, after the "Generate commands" cell, you'll have scripts in that folder called something
43
- # like:
44
- #
45
- # run_chunk_000_gpu_00.sh (or .bat on Windows)
46
- #
47
- # Personally, I like to run that script directly in a command prompt (I just leave Spyder open, though
48
- # it's OK if Spyder gets shut down while MD is running).
49
- #
50
- # At this point, once you get the hang of it, you've invested about zero seconds of human time,
51
- # but possibly several days of unattended compute time, depending on the size of your job.
52
- #
53
- # * Then when the jobs are done, back to the interactive environment! I run the next few cells,
54
- # which make sure the job finished OK, and the cell called "Post-processing (pre-RDE)", which
55
- # generates an HTML preview of the results. You are very plausibly done at this point, and can ignore
56
- # all the remaining cells. If you want to do things like repeat detection elimination, or running
57
- # a classifier, or splitting your results file up in specialized ways, there are cells for all of those
58
- # things, but now you're in power-user territory, so I'm going to leave this guide here. Email
59
- # cameratraps@lila.science with questions about the fancy stuff.
60
- #
61
- ########
1
+ """
2
+
3
+ manage_local_batch.py
4
+
5
+ Semi-automated process for managing a local MegaDetector job, including
6
+ standard postprocessing steps.
7
+
8
+ This script is not intended to be run from top to bottom like a typical Python script,
9
+ it's a notebook disguised with a .py extension. It's the Bestest Most Awesome way to
10
+ run MegaDetector, but it's also pretty subtle; if you want to play with this, you might
11
+ want to check in with cameratraps@lila.science for some tips. Otherwise... YMMV.
12
+
13
+ Some general notes on using this script, which I do in Spyder, though everything will be
14
+ the same if you are reading this in Jupyter Notebook (using the .ipynb version of the
15
+ script):
16
+
17
+ * Typically when I have a MegaDetector job to run, I make a copy of this script. Let's
18
+ say I'm running a job for an organization called "bibblebop"; I have a big folder of
19
+ job-specific copies of this script, and I might save a new one called "bibblebop-2023-07-26.py"
20
+ (the filename doesn't matter, it just helps me keep these organized).
21
+
22
+ * There are three variables you need to set in this script before you start running code:
23
+ "input_path", "organization_name_short", and "job_date". You will get a sensible error if you forget
24
+ to set any of these. In this case I might set those to "/data/bibblebobcamerastuff",
25
+ "bibblebop", and "2023-07-26", respectively.
26
+
27
+ * The defaults assume you want to split the job into two tasks (this is the default because I have
28
+ two GPUs). Nothing bad will happen if you do this on a zero-GPU or single-GPU machine, but if you
29
+ want everything to run in one logical task, change "n_gpus" and "n_jobs" to 1 (instead of 2).
30
+
31
+ * After setting the required variables, I run the first few cells - up to and including the one
32
+ called "Generate commands" - which collectively take basically zero seconds. After you run the
33
+ "Generate commands" cell, you will have a folder that looks something like:
34
+
35
+ ~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
36
+
37
+ On Windows, this means:
38
+
39
+ ~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
40
+
41
+ Everything related to this job - scripts, outputs, intermediate stuff - will be in this folder.
42
+ Specifically, after the "Generate commands" cell, you'll have scripts in that folder called something
43
+ like:
44
+
45
+ run_chunk_000_gpu_00.sh (or .bat on Windows)
46
+
47
+ Personally, I like to run that script directly in a command prompt (I just leave Spyder open, though
48
+ it's OK if Spyder gets shut down while MD is running).
49
+
50
+ At this point, once you get the hang of it, you've invested about zero seconds of human time,
51
+ but possibly several days of unattended compute time, depending on the size of your job.
52
+
53
+ * Then when the jobs are done, back to the interactive environment! I run the next few cells,
54
+ which make sure the job finished OK, and the cell called "Post-processing (pre-RDE)", which
55
+ generates an HTML preview of the results. You are very plausibly done at this point, and can ignore
56
+ all the remaining cells. If you want to do things like repeat detection elimination, or running
57
+ a classifier, or splitting your results file up in specialized ways, there are cells for all of those
58
+ things, but now you're in power-user territory, so I'm going to leave this guide here. Email
59
+ cameratraps@lila.science with questions about the fancy stuff.
60
+
61
+ """
62
62
 
63
63
  #%% Imports and constants
64
64
 
@@ -803,7 +803,7 @@ if render_animals_only:
803
803
  os.makedirs(output_base, exist_ok=True)
804
804
  print('Processing to {}'.format(output_base))
805
805
 
806
- options.api_output_file = combined_api_output_file
806
+ options.md_results_file = combined_api_output_file
807
807
  options.output_dir = output_base
808
808
  ppresults = process_batch_results(options)
809
809
  html_output_file = ppresults.output_html_file
@@ -939,7 +939,7 @@ os.makedirs(output_base, exist_ok=True)
939
939
 
940
940
  print('Processing post-RDE to {}'.format(output_base))
941
941
 
942
- options.api_output_file = filtered_output_filename
942
+ options.md_results_file = filtered_output_filename
943
943
  options.output_dir = output_base
944
944
  ppresults = process_batch_results(options)
945
945
  html_output_file = ppresults.output_html_file
@@ -2014,7 +2014,7 @@ output_base = os.path.join(postprocessing_output_folder, folder_token + \
2014
2014
  os.makedirs(output_base, exist_ok=True)
2015
2015
  print('Processing {} to {}'.format(base_task_name, output_base))
2016
2016
 
2017
- options.api_output_file = sequence_smoothed_classification_file
2017
+ options.md_results_file = sequence_smoothed_classification_file
2018
2018
  options.output_dir = output_base
2019
2019
  ppresults = process_batch_results(options)
2020
2020
  path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
@@ -2134,7 +2134,7 @@ output_base_large_boxes = os.path.join(postprocessing_output_folder,
2134
2134
  os.makedirs(output_base_large_boxes, exist_ok=True)
2135
2135
  print('Processing post-RDE, post-size-separation to {}'.format(output_base_large_boxes))
2136
2136
 
2137
- options.api_output_file = size_separated_file
2137
+ options.md_results_file = size_separated_file
2138
2138
  options.output_dir = output_base_large_boxes
2139
2139
 
2140
2140
  ppresults = process_batch_results(options)
@@ -1,11 +1,11 @@
1
- ########
2
- #
3
- # manage_video_batch.py
4
- #
5
- # Notebook-esque script to manage the process of running a local batch of videos
6
- # through MD. Defers most of the heavy lifting to manage_local_batch.py .
7
- #
8
- ########
1
+ """
2
+
3
+ manage_video_batch.py
4
+
5
+ Notebook-esque script to manage the process of running a local batch of videos
6
+ through MD. Defers most of the heavy lifting to manage_local_batch.py .
7
+
8
+ """
9
9
 
10
10
  #%% Imports and constants
11
11
 
@@ -460,7 +460,6 @@ def main():
460
460
  else:
461
461
  process_input_data(options)
462
462
 
463
-
464
463
  if __name__ == '__main__':
465
464
 
466
465
  main()
@@ -121,7 +121,6 @@ def main():
121
121
  update_data(sql)
122
122
  mysql_connection.commit()
123
123
 
124
-
125
124
  if __name__ == '__main__':
126
125
  main()
127
126
 
File without changes
@@ -1,15 +1,15 @@
1
- ########
2
- #
3
- # add_max_conf.py
4
- #
5
- # The MD output format included a "max_detection_conf" field with each image
6
- # up to and including version 1.2; it was removed as of version 1.3 (it's
7
- # redundant with the individual detection confidence values).
8
- #
9
- # Just in case someone took a dependency on that field, this script allows you
10
- # to add it back to an existing .json file.
11
- #
12
- ########
1
+ """
2
+
3
+ add_max_conf.py
4
+
5
+ The MD output format included a "max_detection_conf" field with each image
6
+ up to and including version 1.2; it was removed as of version 1.3 (it's
7
+ redundant with the individual detection confidence values).
8
+
9
+ Just in case someone took a dependency on that field, this script allows you
10
+ to add it back to an existing .json file.
11
+
12
+ """
13
13
 
14
14
  #%% Imports and constants
15
15
 
@@ -1,11 +1,11 @@
1
- ########
2
- #
3
- # categorize_detections_by_size.py
4
- #
5
- # Given an API output .json file, creates a separate category for bounding boxes
6
- # above one or more size thresholds.
7
- #
8
- ########
1
+ """
2
+
3
+ categorize_detections_by_size.py
4
+
5
+ Given a MegaDetector .json file, creates a separate category for bounding boxes
6
+ above one or more size thresholds.
7
+
8
+ """
9
9
 
10
10
  #%% Constants and imports
11
11
 
@@ -18,24 +18,42 @@ from tqdm import tqdm
18
18
  #%% Support classes
19
19
 
20
20
  class SizeCategorizationOptions:
21
-
22
- # Should be sorted from smallest to largest
21
+ """
22
+ Options used to parameterize categorize_detections_by_size().
23
+ """
24
+
25
+ #: Thresholds to use for separation, as a fraction of the image size.
26
+ #:
27
+ #: Should be sorted from smallest to largest.
23
28
  size_thresholds = [0.95]
24
29
 
25
- # List of category numbers to use in separation; uses all categories if None
30
+ #: List of category numbers to use in separation; uses all categories if None
26
31
  categories_to_separate = None
27
32
 
28
- # Can be "size", "width", or "height"
33
+ #: Dimension to use for thresholding; can be "size", "width", or "height"
29
34
  measurement = 'size'
30
35
 
31
- # Should have the same length as "size_thresholds"
36
+ #: Categories to assign to thresholded ranges; should have the same length as
37
+ #: "size_thresholds".
32
38
  size_category_names = ['large_detection']
33
39
 
34
40
 
35
41
  #%% Main functions
36
42
 
37
43
  def categorize_detections_by_size(input_file,output_file=None,options=None):
38
-
44
+ """
45
+ Given a MegaDetector .json file, creates a separate category for bounding boxes
46
+ above one or more size thresholds, optionally writing results to [output_file].
47
+
48
+ Args:
49
+ input_file (str): file to process
50
+ output_file (str, optional): optional output file
51
+ options (SizeCategorizationOptions): categorization parameters
52
+
53
+ Returns:
54
+ dict: data loaded from [input_file], with the new size-based categories.
55
+ Identical to what's written to [output_file], if [output_file] is not None.
56
+ """
39
57
  if options is None:
40
58
  options = SizeCategorizationOptions()
41
59
 
@@ -1,54 +1,56 @@
1
- ########
2
- #
3
- # combine_api_outputs.py
4
- #
5
- # Merges two or more .json files in batch API output format, optionally
6
- # writing the results to another .json file.
7
- #
8
- # * Concatenates image lists, erroring if images are not unique.
9
- # * Errors if class lists are conflicting; errors on unrecognized fields.
10
- # * Checks compatibility in info structs, within reason.
11
- #
12
- # File format:
13
- #
14
- # https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
15
- #
16
- # Command-line use:
17
- #
18
- # combine_api_outputs input1.json input2.json ... inputN.json output.json
19
- #
20
- # Also see combine_api_shard_files() (not exposed via the command line yet) to
21
- # combine the intermediate files created by the API.
22
- #
23
- # This does no checking for redundancy; if you are looking to ensemble
24
- # the results of multiple model versions, see merge_detections.py.
25
- #
26
- ########
1
+ """
2
+
3
+ combine_api_outputs.py
4
+
5
+ Merges two or more .json files in batch API output format, optionally
6
+ writing the results to another .json file.
7
+
8
+ * Concatenates image lists, erroring if images are not unique.
9
+ * Errors if class lists are conflicting; errors on unrecognized fields.
10
+ * Checks compatibility in info structs, within reason.
11
+
12
+ File format:
13
+
14
+ https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
15
+
16
+ Command-line use:
17
+
18
+ combine_api_outputs input1.json input2.json ... inputN.json output.json
19
+
20
+ Also see combine_api_shard_files() (not exposed via the command line yet) to
21
+ combine the intermediate files created by the API.
22
+
23
+ This does no checking for redundancy; if you are looking to ensemble
24
+ the results of multiple model versions, see merge_detections.py.
25
+
26
+ """
27
27
 
28
28
  #%% Constants and imports
29
29
 
30
30
  import argparse
31
31
  import sys
32
32
  import json
33
- from typing import Any, Dict, Iterable, Mapping, List, Optional
34
33
 
35
34
 
36
35
  #%% Merge functions
37
36
 
38
- def combine_api_output_files(input_files: List[str],
39
- output_file: Optional[str] = None,
40
- require_uniqueness: bool = True,
41
- verbose: bool = True
42
- ) -> Dict[str, Any]:
37
+ def combine_api_output_files(input_files,
38
+ output_file=None,
39
+ require_uniqueness=True,
40
+ verbose=True):
43
41
  """
44
- Merges list of JSON API detection files *input_files* into a single
45
- dictionary, optionally writing the result to *output_file*.
42
+ Merges the list of MD results files [input_files] into a single
43
+ dictionary, optionally writing the result to [output_file].
46
44
 
47
45
  Args:
48
- input_files: list of str, paths to JSON detection files
49
- output_file: optional str, path to write merged JSON
50
- require_uniqueness: bool, whether to require that the images in
46
+ input_files (list of str): paths to JSON detection files
47
+ output_file (str, optional): path to write merged JSON
48
+ require_uniqueness (bool): whether to require that the images in
51
49
  each list of images be unique
50
+
51
+ Returns:
52
+ dict: merged dictionaries loaded from [input_files], identical to what's
53
+ written to [output_file] if [output_file] is not None
52
54
  """
53
55
 
54
56
  def print_if_verbose(s):
@@ -73,27 +75,27 @@ def combine_api_output_files(input_files: List[str],
73
75
  return merged_dict
74
76
 
75
77
 
76
- def combine_api_output_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
77
- require_uniqueness: bool = True
78
- ) -> Dict[str, Any]:
78
+ def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
79
79
  """
80
- Merges the list of API detection dictionaries *input_dicts*. See header
81
- comment for details on merge rules.
80
+ Merges the list of MD results dictionaries [input_dicts] into a single dict.
81
+ See module header comment for details on merge rules.
82
82
 
83
83
  Args:
84
- input_dicts: list of dicts, each dict is the JSON of the detections
85
- output file from the Batch Processing API
86
- require_uniqueness: bool, whether to require that the images in
87
- each input dict be unique
88
-
89
- Returns: dict, represents the merged JSON
84
+ input_dicts (list of dicts): list of dicts in which each dict represents the
85
+ contents of a MD output file
86
+ require_uniqueness (bool): whether to require that the images in
87
+ each input dict be unique; if this is True and image filenames are
88
+ not unique, an error is raised.
89
+
90
+ Returns
91
+ dict: merged MD results
90
92
  """
91
93
 
92
94
  # Map image filenames to detections, we'll convert to a list later
93
95
  images = {}
94
- info: Dict[str, str] = {}
95
- detection_categories: Dict[str, str] = {}
96
- classification_categories: Dict[str, str] = {}
96
+ info = {}
97
+ detection_categories = {}
98
+ classification_categories = {}
97
99
  n_redundant_images = 0
98
100
  n_images = 0
99
101
 
@@ -182,8 +184,20 @@ def combine_api_output_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
182
184
 
183
185
  def combine_api_shard_files(input_files, output_file=None):
184
186
  """
185
- Merges the list of .json-formatted API shard files *input_files* into a single
186
- list of dictionaries, optionally writing the result to *output_file*.
187
+ Merges the list of .json-formatted API shard files [input_files] into a single
188
+ list of dictionaries, optionally writing the result to [output_file].
189
+
190
+ This operates on mostly-deprecated API shard files, not MegaDetector results files.
191
+ If you don't know what an API shard file is, you don't want this function.
192
+
193
+ Args:
194
+ input_files (list of str): files to merge
195
+ output_file (str, optiona): file to which we should write merged results
196
+
197
+ Returns:
198
+ dict: merged results
199
+
200
+ :meta private:
187
201
  """
188
202
 
189
203
  input_lists = []
@@ -215,6 +229,7 @@ def combine_api_shard_files(input_files, output_file=None):
215
229
  #%% Command-line driver
216
230
 
217
231
  def main():
232
+
218
233
  parser = argparse.ArgumentParser()
219
234
  parser.add_argument(
220
235
  'input_paths', nargs='+',
@@ -230,6 +245,5 @@ def main():
230
245
  args = parser.parse_args()
231
246
  combine_api_output_files(args.input_paths, args.output_path)
232
247
 
233
-
234
248
  if __name__ == '__main__':
235
249
  main()