megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
api/__init__.py ADDED
File without changes
File without changes
File without changes
@@ -435,6 +435,5 @@ def main():
435
435
  with open(task_output_path, 'w', encoding='utf-8') as f:
436
436
  json.dump(detections, f, ensure_ascii=False)
437
437
 
438
-
439
438
  if __name__ == '__main__':
440
439
  main()
@@ -148,6 +148,5 @@ class TestJobStatusTable(unittest.TestCase):
148
148
  item_read = table.read_job_status(job_id)
149
149
  self.assertIsNone(item_read)
150
150
 
151
-
152
151
  if __name__ == '__main__':
153
152
  unittest.main()
File without changes
@@ -42,6 +42,5 @@ def main():
42
42
  output_file_urls_str = json.dumps(output_file_urls)
43
43
  print(output_file_urls_str)
44
44
 
45
-
46
45
  if __name__ == '__main__':
47
46
  main()
File without changes
@@ -148,6 +148,5 @@ def main():
148
148
  send_message()
149
149
  time.sleep(24 * 60 * 60)
150
150
 
151
-
152
151
  if __name__ == '__main__':
153
152
  main()
File without changes
@@ -1,64 +1,64 @@
1
- ########
2
- #
3
- # manage_local_batch.py
4
- #
5
- # Semi-automated process for managing a local MegaDetector job, including
6
- # standard postprocessing steps.
7
- #
8
- # This script is not intended to be run from top to bottom like a typical Python script,
9
- # it's a notebook disguised with a .py extension. It's the Bestest Most Awesome way to
10
- # run MegaDetector, but it's also pretty subtle; if you want to play with this, you might
11
- # want to check in with cameratraps@lila.science for some tips. Otherwise... YMMV.
12
- #
13
- # Some general notes on using this script, which I do in Spyder, though everything will be
14
- # the same if you are reading this in Jupyter Notebook (using the .ipynb version of the
15
- # script):
16
- #
17
- # * Typically when I have a MegaDetector job to run, I make a copy of this script. Let's
18
- # say I'm running a job for an organization called "bibblebop"; I have a big folder of
19
- # job-specific copies of this script, and I might save a new one called "bibblebop-2023-07-26.py"
20
- # (the filename doesn't matter, it just helps me keep these organized).
21
- #
22
- # * There are three variables you need to set in this script before you start running code:
23
- # "input_path", "organization_name_short", and "job_date". You will get a sensible error if you forget
24
- # to set any of these. In this case I might set those to "/data/bibblebobcamerastuff",
25
- # "bibblebop", and "2023-07-26", respectively.
26
- #
27
- # * The defaults assume you want to split the job into two tasks (this is the default because I have
28
- # two GPUs). Nothing bad will happen if you do this on a zero-GPU or single-GPU machine, but if you
29
- # want everything to run in one logical task, change "n_gpus" and "n_jobs" to 1 (instead of 2).
30
- #
31
- # * After setting the required variables, I run the first few cells - up to and including the one
32
- # called "Generate commands" - which collectively take basically zero seconds. After you run the
33
- # "Generate commands" cell, you will have a folder that looks something like:
34
- #
35
- # ~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
36
- #
37
- # On Windows, this means:
38
- #
39
- # ~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
40
- #
41
- # Everything related to this job - scripts, outputs, intermediate stuff - will be in this folder.
42
- # Specifically, after the "Generate commands" cell, you'll have scripts in that folder called something
43
- # like:
44
- #
45
- # run_chunk_000_gpu_00.sh (or .bat on Windows)
46
- #
47
- # Personally, I like to run that script directly in a command prompt (I just leave Spyder open, though
48
- # it's OK if Spyder gets shut down while MD is running).
49
- #
50
- # At this point, once you get the hang of it, you've invested about zero seconds of human time,
51
- # but possibly several days of unattended compute time, depending on the size of your job.
52
- #
53
- # * Then when the jobs are done, back to the interactive environment! I run the next few cells,
54
- # which make sure the job finished OK, and the cell called "Post-processing (pre-RDE)", which
55
- # generates an HTML preview of the results. You are very plausibly done at this point, and can ignore
56
- # all the remaining cells. If you want to do things like repeat detection elimination, or running
57
- # a classifier, or splitting your results file up in specialized ways, there are cells for all of those
58
- # things, but now you're in power-user territory, so I'm going to leave this guide here. Email
59
- # cameratraps@lila.science with questions about the fancy stuff.
60
- #
61
- ########
1
+ """
2
+
3
+ manage_local_batch.py
4
+
5
+ Semi-automated process for managing a local MegaDetector job, including
6
+ standard postprocessing steps.
7
+
8
+ This script is not intended to be run from top to bottom like a typical Python script,
9
+ it's a notebook disguised with a .py extension. It's the Bestest Most Awesome way to
10
+ run MegaDetector, but it's also pretty subtle; if you want to play with this, you might
11
+ want to check in with cameratraps@lila.science for some tips. Otherwise... YMMV.
12
+
13
+ Some general notes on using this script, which I do in Spyder, though everything will be
14
+ the same if you are reading this in Jupyter Notebook (using the .ipynb version of the
15
+ script):
16
+
17
+ * Typically when I have a MegaDetector job to run, I make a copy of this script. Let's
18
+ say I'm running a job for an organization called "bibblebop"; I have a big folder of
19
+ job-specific copies of this script, and I might save a new one called "bibblebop-2023-07-26.py"
20
+ (the filename doesn't matter, it just helps me keep these organized).
21
+
22
+ * There are three variables you need to set in this script before you start running code:
23
+ "input_path", "organization_name_short", and "job_date". You will get a sensible error if you forget
24
+ to set any of these. In this case I might set those to "/data/bibblebobcamerastuff",
25
+ "bibblebop", and "2023-07-26", respectively.
26
+
27
+ * The defaults assume you want to split the job into two tasks (this is the default because I have
28
+ two GPUs). Nothing bad will happen if you do this on a zero-GPU or single-GPU machine, but if you
29
+ want everything to run in one logical task, change "n_gpus" and "n_jobs" to 1 (instead of 2).
30
+
31
+ * After setting the required variables, I run the first few cells - up to and including the one
32
+ called "Generate commands" - which collectively take basically zero seconds. After you run the
33
+ "Generate commands" cell, you will have a folder that looks something like:
34
+
35
+ ~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
36
+
37
+ On Windows, this means:
38
+
39
+ ~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
40
+
41
+ Everything related to this job - scripts, outputs, intermediate stuff - will be in this folder.
42
+ Specifically, after the "Generate commands" cell, you'll have scripts in that folder called something
43
+ like:
44
+
45
+ run_chunk_000_gpu_00.sh (or .bat on Windows)
46
+
47
+ Personally, I like to run that script directly in a command prompt (I just leave Spyder open, though
48
+ it's OK if Spyder gets shut down while MD is running).
49
+
50
+ At this point, once you get the hang of it, you've invested about zero seconds of human time,
51
+ but possibly several days of unattended compute time, depending on the size of your job.
52
+
53
+ * Then when the jobs are done, back to the interactive environment! I run the next few cells,
54
+ which make sure the job finished OK, and the cell called "Post-processing (pre-RDE)", which
55
+ generates an HTML preview of the results. You are very plausibly done at this point, and can ignore
56
+ all the remaining cells. If you want to do things like repeat detection elimination, or running
57
+ a classifier, or splitting your results file up in specialized ways, there are cells for all of those
58
+ things, but now you're in power-user territory, so I'm going to leave this guide here. Email
59
+ cameratraps@lila.science with questions about the fancy stuff.
60
+
61
+ """
62
62
 
63
63
  #%% Imports and constants
64
64
 
@@ -86,6 +86,8 @@ from api.batch_processing.postprocessing.postprocess_batch_results import (
86
86
  from detection.run_detector import get_detector_version_from_filename
87
87
  from md_utils.ct_utils import image_file_to_camera_folder
88
88
 
89
+ ## Inference options
90
+
89
91
  # To specify a non-default confidence threshold for including detections in the .json file
90
92
  json_threshold = None
91
93
 
@@ -109,6 +111,11 @@ quiet_mode = True
109
111
  # will use its default size, which is 1280 * 1.3, which is almost always what you want.
110
112
  image_size = None
111
113
 
114
+ # Should we include image size, timestamp, and/or EXIF data in MD output?
115
+ include_image_size = False
116
+ include_image_timestamp = False
117
+ include_exif_data = False
118
+
112
119
  # Only relevant when running on CPU
113
120
  ncores = 1
114
121
 
@@ -187,7 +194,7 @@ augment = False
187
194
 
188
195
  ## Constants related to tiled inference
189
196
 
190
- use_tiled_inference = True
197
+ use_tiled_inference = False
191
198
 
192
199
  # Should we delete tiles after each job? Only set this to False for debugging;
193
200
  # large jobs will take up a lot of space if you keep tiles around after each task.
@@ -234,7 +241,7 @@ checkpoint_frequency = 10000
234
241
  approx_images_per_second = estimate_md_images_per_second(model_file)
235
242
 
236
243
  # Rough estimate for the inference time cost of augmentation
237
- if augment:
244
+ if augment and (approx_images_per_second is not None):
238
245
  approx_images_per_second = approx_images_per_second * 0.7
239
246
 
240
247
  base_task_name = organization_name_short + '-' + job_date + job_description_string + '-' + \
@@ -268,6 +275,10 @@ filename_base = os.path.join(base_output_folder_name, base_task_name)
268
275
  combined_api_output_folder = os.path.join(filename_base, 'combined_api_outputs')
269
276
  postprocessing_output_folder = os.path.join(filename_base, 'preview')
270
277
 
278
+ combined_api_output_file = os.path.join(
279
+ combined_api_output_folder,
280
+ '{}_detections.json'.format(base_task_name))
281
+
271
282
  os.makedirs(filename_base, exist_ok=True)
272
283
  os.makedirs(combined_api_output_folder, exist_ok=True)
273
284
  os.makedirs(postprocessing_output_folder, exist_ok=True)
@@ -494,7 +505,14 @@ for i_task,task in enumerate(task_info):
494
505
 
495
506
  overwrite_handling_string = '--overwrite_handling {}'.format(overwrite_handling)
496
507
  cmd = f'{cuda_string} python run_detector_batch.py "{model_file}" "{chunk_file}" "{output_fn}" {checkpoint_frequency_string} {checkpoint_path_string} {use_image_queue_string} {ncores_string} {quiet_string} {image_size_string} {confidence_threshold_string} {overwrite_handling_string}'
497
-
508
+
509
+ if include_image_size:
510
+ cmd += ' --include_image_size'
511
+ if include_image_timestamp:
512
+ cmd += ' --include_image_timestamp'
513
+ if include_exif_data:
514
+ cmd += ' --include_exif_data'
515
+
498
516
  cmd_file = os.path.join(filename_base,'run_chunk_{}_gpu_{}{}'.format(str(i_task).zfill(3),
499
517
  str(gpu_number).zfill(2),script_extension))
500
518
 
@@ -747,10 +765,6 @@ for im in combined_results['images']:
747
765
  else:
748
766
  im['file'] = im['file'].replace(input_path + '/','',1)
749
767
 
750
- combined_api_output_file = os.path.join(
751
- combined_api_output_folder,
752
- '{}_detections.json'.format(base_task_name))
753
-
754
768
  with open(combined_api_output_file,'w') as f:
755
769
  json.dump(combined_results,f,indent=1)
756
770
 
@@ -789,11 +803,11 @@ if render_animals_only:
789
803
  os.makedirs(output_base, exist_ok=True)
790
804
  print('Processing to {}'.format(output_base))
791
805
 
792
- options.api_output_file = combined_api_output_file
806
+ options.md_results_file = combined_api_output_file
793
807
  options.output_dir = output_base
794
808
  ppresults = process_batch_results(options)
795
809
  html_output_file = ppresults.output_html_file
796
- path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
810
+ path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
797
811
  # import clipboard; clipboard.copy(html_output_file)
798
812
 
799
813
 
@@ -823,7 +837,7 @@ options.otherDetectionsThreshold = options.confidenceMin
823
837
 
824
838
  options.bRenderDetectionTiles = True
825
839
  options.maxOutputImageWidth = 2000
826
- options.detectionTilesMaxCrops = 300
840
+ options.detectionTilesMaxCrops = 250
827
841
 
828
842
  # options.lineThickness = 5
829
843
  # options.boxExpansion = 8
@@ -925,12 +939,12 @@ os.makedirs(output_base, exist_ok=True)
925
939
 
926
940
  print('Processing post-RDE to {}'.format(output_base))
927
941
 
928
- options.api_output_file = filtered_output_filename
942
+ options.md_results_file = filtered_output_filename
929
943
  options.output_dir = output_base
930
944
  ppresults = process_batch_results(options)
931
945
  html_output_file = ppresults.output_html_file
932
946
 
933
- path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
947
+ path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
934
948
  # import clipboard; clipboard.copy(html_output_file)
935
949
 
936
950
 
@@ -2000,10 +2014,10 @@ output_base = os.path.join(postprocessing_output_folder, folder_token + \
2000
2014
  os.makedirs(output_base, exist_ok=True)
2001
2015
  print('Processing {} to {}'.format(base_task_name, output_base))
2002
2016
 
2003
- options.api_output_file = sequence_smoothed_classification_file
2017
+ options.md_results_file = sequence_smoothed_classification_file
2004
2018
  options.output_dir = output_base
2005
2019
  ppresults = process_batch_results(options)
2006
- path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True)
2020
+ path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
2007
2021
  # import clipboard; clipboard.copy(ppresults.output_html_file)
2008
2022
 
2009
2023
  #% Zip .json files
@@ -2071,7 +2085,7 @@ for i, j in itertools.combinations(list(range(0,len(filenames))),2):
2071
2085
  results = compare_batch_results(options)
2072
2086
 
2073
2087
  from md_utils.path_utils import open_file
2074
- open_file(results.html_output_file,attempt_to_open_in_wsl_host=True)
2088
+ open_file(results.html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
2075
2089
 
2076
2090
 
2077
2091
  #%% Merge in high-confidence detections from another results file
@@ -2120,12 +2134,12 @@ output_base_large_boxes = os.path.join(postprocessing_output_folder,
2120
2134
  os.makedirs(output_base_large_boxes, exist_ok=True)
2121
2135
  print('Processing post-RDE, post-size-separation to {}'.format(output_base_large_boxes))
2122
2136
 
2123
- options.api_output_file = size_separated_file
2137
+ options.md_results_file = size_separated_file
2124
2138
  options.output_dir = output_base_large_boxes
2125
2139
 
2126
2140
  ppresults = process_batch_results(options)
2127
2141
  html_output_file = ppresults.output_html_file
2128
- path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
2142
+ path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
2129
2143
 
2130
2144
 
2131
2145
  #%% .json splitting
@@ -2280,7 +2294,7 @@ import nbformat as nbf
2280
2294
  if os.name == 'nt':
2281
2295
  git_base = r'c:\git'
2282
2296
  else:
2283
- git_base = os.path.expanduer('~/git')
2297
+ git_base = os.path.expanduser('~/git')
2284
2298
 
2285
2299
  input_py_file = git_base + '/MegaDetector/api/batch_processing/data_preparation/manage_local_batch.py'
2286
2300
  assert os.path.isfile(input_py_file)
@@ -1,11 +1,11 @@
1
- ########
2
- #
3
- # manage_video_batch.py
4
- #
5
- # Notebook-esque script to manage the process of running a local batch of videos
6
- # through MD. Defers most of the heavy lifting to manage_local_batch.py .
7
- #
8
- ########
1
+ """
2
+
3
+ manage_video_batch.py
4
+
5
+ Notebook-esque script to manage the process of running a local batch of videos
6
+ through MD. Defers most of the heavy lifting to manage_local_batch.py .
7
+
8
+ """
9
9
 
10
10
  #%% Imports and constants
11
11
 
@@ -460,7 +460,6 @@ def main():
460
460
  else:
461
461
  process_input_data(options)
462
462
 
463
-
464
463
  if __name__ == '__main__':
465
464
 
466
465
  main()
@@ -121,7 +121,6 @@ def main():
121
121
  update_data(sql)
122
122
  mysql_connection.commit()
123
123
 
124
-
125
124
  if __name__ == '__main__':
126
125
  main()
127
126
 
File without changes
@@ -1,15 +1,15 @@
1
- ########
2
- #
3
- # add_max_conf.py
4
- #
5
- # The MD output format included a "max_detection_conf" field with each image
6
- # up to and including version 1.2; it was removed as of version 1.3 (it's
7
- # redundant with the individual detection confidence values).
8
- #
9
- # Just in case someone took a dependency on that field, this script allows you
10
- # to add it back to an existing .json file.
11
- #
12
- ########
1
+ """
2
+
3
+ add_max_conf.py
4
+
5
+ The MD output format included a "max_detection_conf" field with each image
6
+ up to and including version 1.2; it was removed as of version 1.3 (it's
7
+ redundant with the individual detection confidence values).
8
+
9
+ Just in case someone took a dependency on that field, this script allows you
10
+ to add it back to an existing .json file.
11
+
12
+ """
13
13
 
14
14
  #%% Imports and constants
15
15
 
@@ -1,11 +1,11 @@
1
- ########
2
- #
3
- # categorize_detections_by_size.py
4
- #
5
- # Given an API output .json file, creates a separate category for bounding boxes
6
- # above one or more size thresholds.
7
- #
8
- ########
1
+ """
2
+
3
+ categorize_detections_by_size.py
4
+
5
+ Given a MegaDetector .json file, creates a separate category for bounding boxes
6
+ above one or more size thresholds.
7
+
8
+ """
9
9
 
10
10
  #%% Constants and imports
11
11
 
@@ -18,24 +18,42 @@ from tqdm import tqdm
18
18
  #%% Support classes
19
19
 
20
20
  class SizeCategorizationOptions:
21
-
22
- # Should be sorted from smallest to largest
21
+ """
22
+ Options used to parameterize categorize_detections_by_size().
23
+ """
24
+
25
+ #: Thresholds to use for separation, as a fraction of the image size.
26
+ #:
27
+ #: Should be sorted from smallest to largest.
23
28
  size_thresholds = [0.95]
24
29
 
25
- # List of category numbers to use in separation; uses all categories if None
30
+ #: List of category numbers to use in separation; uses all categories if None
26
31
  categories_to_separate = None
27
32
 
28
- # Can be "size", "width", or "height"
33
+ #: Dimension to use for thresholding; can be "size", "width", or "height"
29
34
  measurement = 'size'
30
35
 
31
- # Should have the same length as "size_thresholds"
36
+ #: Categories to assign to thresholded ranges; should have the same length as
37
+ #: "size_thresholds".
32
38
  size_category_names = ['large_detection']
33
39
 
34
40
 
35
41
  #%% Main functions
36
42
 
37
43
  def categorize_detections_by_size(input_file,output_file=None,options=None):
38
-
44
+ """
45
+ Given a MegaDetector .json file, creates a separate category for bounding boxes
46
+ above one or more size thresholds, optionally writing results to [output_file].
47
+
48
+ Args:
49
+ input_file (str): file to process
50
+ output_file (str, optional): optional output file
51
+ options (SizeCategorizationOptions): categorization parameters
52
+
53
+ Returns:
54
+ dict: data loaded from [input_file], with the new size-based categories.
55
+ Identical to what's written to [output_file], if [output_file] is not None.
56
+ """
39
57
  if options is None:
40
58
  options = SizeCategorizationOptions()
41
59
 
@@ -1,54 +1,56 @@
1
- ########
2
- #
3
- # combine_api_outputs.py
4
- #
5
- # Merges two or more .json files in batch API output format, optionally
6
- # writing the results to another .json file.
7
- #
8
- # * Concatenates image lists, erroring if images are not unique.
9
- # * Errors if class lists are conflicting; errors on unrecognized fields.
10
- # * Checks compatibility in info structs, within reason.
11
- #
12
- # File format:
13
- #
14
- # https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
15
- #
16
- # Command-line use:
17
- #
18
- # combine_api_outputs input1.json input2.json ... inputN.json output.json
19
- #
20
- # Also see combine_api_shard_files() (not exposed via the command line yet) to
21
- # combine the intermediate files created by the API.
22
- #
23
- # This does no checking for redundancy; if you are looking to ensemble
24
- # the results of multiple model versions, see merge_detections.py.
25
- #
26
- ########
1
+ """
2
+
3
+ combine_api_outputs.py
4
+
5
+ Merges two or more .json files in batch API output format, optionally
6
+ writing the results to another .json file.
7
+
8
+ * Concatenates image lists, erroring if images are not unique.
9
+ * Errors if class lists are conflicting; errors on unrecognized fields.
10
+ * Checks compatibility in info structs, within reason.
11
+
12
+ File format:
13
+
14
+ https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
15
+
16
+ Command-line use:
17
+
18
+ combine_api_outputs input1.json input2.json ... inputN.json output.json
19
+
20
+ Also see combine_api_shard_files() (not exposed via the command line yet) to
21
+ combine the intermediate files created by the API.
22
+
23
+ This does no checking for redundancy; if you are looking to ensemble
24
+ the results of multiple model versions, see merge_detections.py.
25
+
26
+ """
27
27
 
28
28
  #%% Constants and imports
29
29
 
30
30
  import argparse
31
31
  import sys
32
32
  import json
33
- from typing import Any, Dict, Iterable, Mapping, List, Optional
34
33
 
35
34
 
36
35
  #%% Merge functions
37
36
 
38
- def combine_api_output_files(input_files: List[str],
39
- output_file: Optional[str] = None,
40
- require_uniqueness: bool = True,
41
- verbose: bool = True
42
- ) -> Dict[str, Any]:
37
+ def combine_api_output_files(input_files,
38
+ output_file=None,
39
+ require_uniqueness=True,
40
+ verbose=True):
43
41
  """
44
- Merges list of JSON API detection files *input_files* into a single
45
- dictionary, optionally writing the result to *output_file*.
42
+ Merges the list of MD results files [input_files] into a single
43
+ dictionary, optionally writing the result to [output_file].
46
44
 
47
45
  Args:
48
- input_files: list of str, paths to JSON detection files
49
- output_file: optional str, path to write merged JSON
50
- require_uniqueness: bool, whether to require that the images in
51
- each input_dict be unique
46
+ input_files (list of str): paths to JSON detection files
47
+ output_file (str, optional): path to write merged JSON
48
+ require_uniqueness (bool): whether to require that the images in
49
+ each list of images be unique
50
+
51
+ Returns:
52
+ dict: merged dictionaries loaded from [input_files], identical to what's
53
+ written to [output_file] if [output_file] is not None
52
54
  """
53
55
 
54
56
  def print_if_verbose(s):
@@ -73,27 +75,27 @@ def combine_api_output_files(input_files: List[str],
73
75
  return merged_dict
74
76
 
75
77
 
76
- def combine_api_output_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
77
- require_uniqueness: bool = True
78
- ) -> Dict[str, Any]:
78
+ def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
79
79
  """
80
- Merges the list of API detection dictionaries *input_dicts*. See header
81
- comment for details on merge rules.
80
+ Merges the list of MD results dictionaries [input_dicts] into a single dict.
81
+ See module header comment for details on merge rules.
82
82
 
83
83
  Args:
84
- input_dicts: list of dicts, each dict is the JSON of the detections
85
- output file from the Batch Processing API
86
- require_uniqueness: bool, whether to require that the images in
87
- each input_dict be unique
88
-
89
- Returns: dict, represents the merged JSON
84
+ input_dicts (list of dicts): list of dicts in which each dict represents the
85
+ contents of a MD output file
86
+ require_uniqueness (bool): whether to require that the images in
87
+ each input dict be unique; if this is True and image filenames are
88
+ not unique, an error is raised.
89
+
90
+ Returns
91
+ dict: merged MD results
90
92
  """
91
93
 
92
94
  # Map image filenames to detections, we'll convert to a list later
93
95
  images = {}
94
- info: Dict[str, str] = {}
95
- detection_categories: Dict[str, str] = {}
96
- classification_categories: Dict[str, str] = {}
96
+ info = {}
97
+ detection_categories = {}
98
+ classification_categories = {}
97
99
  n_redundant_images = 0
98
100
  n_images = 0
99
101
 
@@ -182,8 +184,20 @@ def combine_api_output_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
182
184
 
183
185
  def combine_api_shard_files(input_files, output_file=None):
184
186
  """
185
- Merges the list of .json-formatted API shard files *input_files* into a single
186
- list of dictionaries, optionally writing the result to *output_file*.
187
+ Merges the list of .json-formatted API shard files [input_files] into a single
188
+ list of dictionaries, optionally writing the result to [output_file].
189
+
190
+ This operates on mostly-deprecated API shard files, not MegaDetector results files.
191
+ If you don't know what an API shard file is, you don't want this function.
192
+
193
+ Args:
194
+ input_files (list of str): files to merge
195
+ output_file (str, optiona): file to which we should write merged results
196
+
197
+ Returns:
198
+ dict: merged results
199
+
200
+ :meta private:
187
201
  """
188
202
 
189
203
  input_lists = []
@@ -215,6 +229,7 @@ def combine_api_shard_files(input_files, output_file=None):
215
229
  #%% Command-line driver
216
230
 
217
231
  def main():
232
+
218
233
  parser = argparse.ArgumentParser()
219
234
  parser.add_argument(
220
235
  'input_paths', nargs='+',
@@ -230,6 +245,5 @@ def main():
230
245
  args = parser.parse_args()
231
246
  combine_api_output_files(args.input_paths, args.output_path)
232
247
 
233
-
234
248
  if __name__ == '__main__':
235
249
  main()