megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
  171. megadetector-5.0.10.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0
@@ -1,11 +1,20 @@
1
- ########
2
- #
3
- # process_video.py
4
- #
5
- # Split a video (or folder of videos) into frames, run the frames through run_detector_batch.py,
6
- # and optionally stitch together results into a new video with detection boxes.
7
- #
8
- ########
1
+ """
2
+
3
+ process_video.py
4
+
5
+ Splits a video (or folder of videos) into frames, runs the frames through run_detector_batch.py,
6
+ and optionally stitches together results into a new video with detection boxes.
7
+
8
+ Operates by separating the video into frames, typically sampling every Nth frame, and writing
9
+ those frames to disk, before running MD. This approach clearly has a downside: it requires
10
+ a bunch more disk space, compared to extracting frames and running MD on them without ever
11
+ writing them to disk. The upside, though, is that this approach allows you to run repeat
12
+ detection elimination after running MegaDetector, and it allows allows more efficient re-use
13
+ of frames if you end up running MD more than once, or running multiple versions of MD.
14
+
15
+ TODO: optionally skip writing frames to disk, and process frames in memory.
16
+
17
+ """
9
18
 
10
19
  #%% Imports
11
20
 
@@ -29,73 +38,113 @@ from uuid import uuid1
29
38
  from detection.video_utils import default_fourcc
30
39
 
31
40
 
32
- #%% Options classes
41
+ #%% Classes
33
42
 
34
43
  class ProcessVideoOptions:
35
-
36
- # Can be a model filename (.pt or .pb) or a model name (e.g. "MDV5A")
44
+ """
45
+ Options controlling the behavior of process_video()
46
+ """
47
+
48
+ #: Can be a model filename (.pt or .pb) or a model name (e.g. "MDV5A")
37
49
  model_file = 'MDV5A'
38
50
 
39
- # Can be a file or a folder
51
+ #: Video (of folder of videos) to process
40
52
  input_video_file = ''
41
53
 
54
+ #: .json file to which we should write results
42
55
  output_json_file = None
43
56
 
44
- # Only relevant if render_output_video is True
57
+ #: File to which we should write a video with boxes, only relevant if
58
+ #: render_output_video is True
45
59
  output_video_file = None
46
60
 
47
- # Folder to use for extracted frames
61
+ #: Folder to use for extracted frames; will use a folder in system temp space
62
+ #: if this is None
48
63
  frame_folder = None
49
64
 
50
- # Folder to use for rendered frames (if rendering output video)
65
+ # Folder to use for rendered frames (if rendering output video); will use a folder
66
+ #: in system temp space if this is None
51
67
  frame_rendering_folder = None
52
68
 
53
- # Should we render a video with detection boxes?
54
- #
55
- # Only supported when processing a single video, not a folder.
69
+ #: Should we render a video with detection boxes?
70
+ #:
71
+ #: Only supported when processing a single video, not a folder.
56
72
  render_output_video = False
57
73
 
58
- # If we are rendering boxes to a new video, should we keep the temporary
59
- # rendered frames?
74
+ #: If we are rendering boxes to a new video, should we keep the temporary
75
+ #: rendered frames?
60
76
  keep_rendered_frames = False
61
77
 
62
- # Should we keep the extracted frames?
78
+ #: Should we keep the extracted frames?
63
79
  keep_extracted_frames = False
64
80
 
65
- # Should we delete the entire folder the extracted frames are written to?
66
- #
67
- # By default, we delete the frame files but leave the (probably-empty) folder in place.
81
+ #: Should we delete the entire folder the extracted frames are written to?
82
+ #:
83
+ #: By default, we delete the frame files but leave the (probably-empty) folder in place,
84
+ #: for no reason other than being paranoid about deleting folders.
68
85
  force_extracted_frame_folder_deletion = False
69
86
 
70
- # Should we delete the entire folder the rendered frames are written to?
71
- #
72
- # By default, we delete the frame files but leave the (probably-empty) folder in place.
87
+ #: Should we delete the entire folder the rendered frames are written to?
88
+ #:
89
+ #: By default, we delete the frame files but leave the (probably-empty) folder in place,
90
+ #: for no reason other than being paranoid about deleting folders.
73
91
  force_rendered_frame_folder_deletion = False
74
-
92
+
93
+ #: If we've already run MegaDetector on this video or folder of videos, i.e. if we
94
+ #: find a corresponding MD results file, should we re-use it? Defaults to reprocessing.
75
95
  reuse_results_if_available = False
96
+
97
+ #: If we've already split this video or folder of videos into frames, should we
98
+ #: we re-use those extracted frames? Defaults to reprocessing.
76
99
  reuse_frames_if_available = False
77
100
 
101
+ #: If [input_video_file] is a folder, should we search for videos recursively?
78
102
  recursive = False
103
+
104
+ #: Enable additional debug console output
79
105
  verbose = False
80
106
 
107
+ #: fourcc code to use for writing videos; only relevant if render_output_video is True
81
108
  fourcc = None
82
109
 
110
+ #: Confidence threshold to use for writing videos with boxes, only relevant if
111
+ #: if render_output_video is True. Defaults to choosing a reasonable threshold
112
+ #: based on the model version.
83
113
  rendering_confidence_threshold = None
114
+
115
+ #: Detections below this threshold will not be included in the output file.
84
116
  json_confidence_threshold = 0.005
117
+
118
+ #: Sample every Nth frame; set to None (default) or 1 to sample every frame. Typically
119
+ #: we sample down to around 3 fps, so for typical 30 fps videos, frame_sample=10 is a
120
+ #: typical value.
85
121
  frame_sample = None
86
122
 
123
+ #: Number of workers to use for parallelization; set to <= 1 to disable parallelization
87
124
  n_cores = 1
88
125
 
126
+ #: For debugging only, stop processing after a certain number of frames.
89
127
  debug_max_frames = -1
90
128
 
129
+ #: File containing non-standard categories, typically only used if you're running a non-MD
130
+ #: detector.
91
131
  class_mapping_filename = None
92
132
 
133
+ # ...class ProcessVideoOptions
134
+
93
135
 
94
136
  #%% Functions
95
137
 
96
138
  def process_video(options):
97
139
  """
98
- Process a single video
140
+ Process a single video through MD, optionally writing a new video with boxes
141
+
142
+ Args:
143
+ options (ProcessVideoOptions): all the parameters used to control this process,
144
+ including filenames; see ProcessVideoOptions for details
145
+
146
+ Returns:
147
+ dict: frame-level MegaDetector results, identical to what's in the output .json file
99
148
  """
100
149
 
101
150
  if options.output_json_file is None:
@@ -229,7 +278,11 @@ def process_video(options):
229
278
 
230
279
  def process_video_folder(options):
231
280
  """
232
- Process a folder of videos
281
+ Process a folder of videos through MD
282
+
283
+ Args:
284
+ options (ProcessVideoOptions): all the parameters used to control this process,
285
+ including filenames; see ProcessVideoOptions for details
233
286
  """
234
287
 
235
288
  ## Validate options
@@ -428,8 +481,7 @@ def process_video_folder(options):
428
481
  print('Warning: error deleting frames from folder {}:\n{}'.format(
429
482
  frame_output_folder,str(e)))
430
483
  pass
431
-
432
-
484
+
433
485
  # ...process_video_folder()
434
486
 
435
487
 
@@ -547,7 +599,7 @@ def main():
547
599
  default_options = ProcessVideoOptions()
548
600
 
549
601
  parser = argparse.ArgumentParser(description=(
550
- 'Run MegaDetector on each frame in a video (or every Nth frame), optionally '\
602
+ 'Run MegaDetector on each frame (or every Nth frame) in a video (or folder of videos), optionally '\
551
603
  'producing a new video with detections annotated'))
552
604
 
553
605
  parser.add_argument('model_file', type=str,
@@ -1,12 +1,12 @@
1
- ########
2
- #
3
- # pytorch_detector.py
4
- #
5
- # Module to run MegaDetector v5, a PyTorch YOLOv5 animal detection model.
6
- #
7
- ########
1
+ """
2
+
3
+ pytorch_detector.py
8
4
 
9
- #%% Imports
5
+ Module to run MegaDetector v5, a PyTorch YOLOv5 animal detection model.
6
+
7
+ """
8
+
9
+ #%% Imports and constants
10
10
 
11
11
  import torch
12
12
  import numpy as np
@@ -104,12 +104,19 @@ print(f'Using PyTorch version {torch.__version__}')
104
104
 
105
105
  class PTDetector:
106
106
 
107
- IMAGE_SIZE = 1280 # image size used in training
107
+ #: Image size passed to YOLOv5's letterbox() function; 1280 means "1280 on the long side, preserving
108
+ #: aspect ratio"
109
+ #:
110
+ #: :meta private:
111
+ IMAGE_SIZE = 1280
112
+
113
+ #: Stride size passed to YOLOv5's letterbox() function
114
+ #:
115
+ #: :meta private:
108
116
  STRIDE = 64
109
117
 
110
- def __init__(self, model_path: str,
111
- force_cpu: bool = False,
112
- use_model_native_classes: bool = False):
118
+ def __init__(self, model_path, force_cpu=False, use_model_native_classes= False):
119
+
113
120
  self.device = 'cpu'
114
121
  if not force_cpu:
115
122
  if torch.cuda.is_available():
@@ -162,21 +169,26 @@ class PTDetector:
162
169
  detection_threshold=0.00001, image_size=None,
163
170
  skip_image_resizing=False):
164
171
  """
165
- Apply the detector to an image.
172
+ Applies the detector to an image.
166
173
 
167
174
  Args:
168
- img_original: the PIL Image object with EXIF rotation taken into account
169
- image_id: a path to identify the image; will be in the "file" field of the output object
170
- detection_threshold: confidence above which to include the detection proposal
171
- skip_image_resizing: whether to skip internal image resizing and rely on external resizing
175
+ img_original (Image): the PIL Image object with EXIF rotation taken into account
176
+ image_id (str, optional): a path to identify the image; will be in the "file" field
177
+ of the output object
178
+ detection_threshold (float, optional): only detections above this confidence threshold
179
+ will be included in the return value
180
+ image_size (tuple, optional): image size to use for inference, only mess with this
181
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
182
+ doing
183
+ skip_image_resizing (bool, optional): whether to skip internal image resizing (and rely on external
184
+ resizing)
172
185
 
173
186
  Returns:
174
- A dict with the following fields, see the 'images' key in https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
175
- - 'file' (always present)
176
- - 'max_detection_conf' (removed from MegaDetector output by default, but generated here)
177
- - 'detections', which is a list of detection objects containing keys 'category',
178
- 'conf' and 'bbox'
179
- - 'failure'
187
+ dict: a dictionary with the following fields:
188
+ - 'file' (filename, always present)
189
+ - 'max_detection_conf' (removed from MegaDetector output files by default, but generated here)
190
+ - 'detections' (a list of detection objects containing keys 'category', 'conf', and 'bbox')
191
+ - 'failure' (a failure string, or None if everything went fine)
180
192
  """
181
193
 
182
194
  result = {
@@ -297,13 +309,19 @@ class PTDetector:
297
309
 
298
310
  return result
299
311
 
312
+ # ...def generate_detections_one_image(...)
313
+
314
+ # ...class PTDetector
315
+
300
316
 
301
317
  #%% Command-line driver
302
318
 
319
+ # For testing only... you don't really want to run this module directly.
320
+
303
321
  if __name__ == '__main__':
304
-
305
- # For testing only... you don't really want to run this module directly
306
322
 
323
+ pass
324
+
307
325
  #%%
308
326
 
309
327
  import md_visualization.visualization_utils as vis_utils
detection/run_detector.py CHANGED
@@ -1,40 +1,26 @@
1
- ########
2
- #
3
- # run_detector.py
4
- #
5
- # Module to run an animal detection model on images.
6
- #
7
- # The main function in this script also renders the predicted
8
- # bounding boxes on images and saves the resulting images (with bounding boxes).
9
- #
10
- # This script is not a good way to process lots of images (tens of thousands,
11
- # say). It does not facilitate checkpointing the results so if it crashes you
12
- # would have to start from scratch. If you want to run a detector (e.g., ours)
13
- # on lots of images, you should check out run_detector_batch.py.
14
- #
15
- # To run this script, we recommend you set up a conda virtual environment
16
- # following instructions in the Installation section on the main README, using
17
- # `environment-detector.yml` as the environment file where asked.
18
- #
19
- # This is a good way to test our detector on a handful of images and get
20
- # super-satisfying, graphical results. It's also a good way to see how fast a
21
- # detector model will run on a particular machine.
22
- #
23
- # If you would like to *not* use the GPU on the machine, set the environment
24
- # variable CUDA_VISIBLE_DEVICES to "-1".
25
- #
26
- # If no output directory is specified, writes detections for c:\foo\bar.jpg to
27
- # c:\foo\bar_detections.jpg.
28
- #
29
- # This script will only consider detections with > 0.005 confidence at all times.
30
- # The `threshold` you provide is only for rendering the results. If you need to
31
- # see lower-confidence detections, you can change
32
- # DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD.
33
- #
34
- # Reference:
35
- # https://github.com/tensorflow/models/blob/master/research/object_detection/inference/detection_inference.py
36
- #
37
- ########
1
+ """
2
+
3
+ run_detector.py
4
+
5
+ Module to run an animal detection model on images. The main function in this script also renders
6
+ the predicted bounding boxes on images and saves the resulting images (with bounding boxes).
7
+
8
+ **This script is not a good way to process lots of images**. It does not produce a useful
9
+ output format, and it does not facilitate checkpointing the results so if it crashes you
10
+ would have to start from scratch. **If you want to run a detector on lots of images, you should
11
+ check out run_detector_batch.py**.
12
+
13
+ That said, this script (run_detector.py) is a good way to test our detector on a handful of images
14
+ and get super-satisfying, graphical results.
15
+
16
+ If you would like to *not* use the GPU on the machine, set the environment
17
+ variable CUDA_VISIBLE_DEVICES to "-1".
18
+
19
+ This script will only consider detections with > 0.005 confidence at all times.
20
+ The threshold you provide is only for rendering the results. If you need to
21
+ see lower-confidence detections, you can change DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD.
22
+
23
+ """
38
24
 
39
25
  #%% Constants, imports, environment
40
26
 
@@ -163,9 +149,15 @@ device_token_to_mdv5_inference_speed = {
163
149
 
164
150
  def convert_to_tf_coords(array):
165
151
  """
166
- From [x1, y1, width, height] to [y1, x1, y2, x2], where x1 is x_min, x2 is x_max
167
-
168
- This is only used to keep the interface of the synchronous API.
152
+ Converts a bounding box from [x1, y1, width, height] to [y1, x1, y2, x2]. This
153
+ is mostly not helpful, this function only exists to maintain backwards compatibility
154
+ in the synchronous API, which possibly zero people in the world are using.
155
+
156
+ Args:
157
+ array (list): a bounding box in [x,y,w,h] format
158
+
159
+ Returns:
160
+ list: a bounding box in [y1,x1,y2,x2] format
169
161
  """
170
162
 
171
163
  x1 = array[0]
@@ -174,13 +166,21 @@ def convert_to_tf_coords(array):
174
166
  height = array[3]
175
167
  x2 = x1 + width
176
168
  y2 = y1 + height
169
+
177
170
  return [y1, x1, y2, x2]
178
171
 
179
172
 
180
173
  def get_detector_metadata_from_version_string(detector_version):
181
174
  """
182
- Given a MegaDetector version string (e.g. "v4.1.0"), return the metadata for
175
+ Given a MegaDetector version string (e.g. "v4.1.0"), returns the metadata for
183
176
  the model. Used for writing standard defaults to batch output files.
177
+
178
+ Args:
179
+ detector_version (str): a detection version string, e.g. "v4.1.0", which you
180
+ can extract from a filename using get_detector_version_from_filename()
181
+
182
+ Returns:
183
+ dict: metadata for this model, suitable for writing to a MD output file
184
184
  """
185
185
 
186
186
  if detector_version not in DETECTOR_METADATA:
@@ -196,20 +196,26 @@ def get_detector_metadata_from_version_string(detector_version):
196
196
 
197
197
 
198
198
  def get_detector_version_from_filename(detector_filename):
199
- """
200
- Get the version number component of the detector from the model filename.
199
+ r"""
200
+ Gets the version number component of the detector from the model filename.
201
201
 
202
- *detector_filename* will almost always end with one of the following:
202
+ [detector_filename] will almost always end with one of the following:
203
203
 
204
- megadetector_v2.pb
205
- megadetector_v3.pb
206
- megadetector_v4.1 (not produed by run_detector_batch.py, only found in Azure Batch API output files)
207
- md_v4.1.0.pb
208
- md_v5a.0.0.pt
209
- md_v5b.0.0.pt
210
-
211
- ...for which we identify the version number as "v2.0.0", "v3.0.0", "v4.1.0",
204
+ * megadetector_v2.pb
205
+ * megadetector_v3.pb
206
+ * megadetector_v4.1 (not produed by run_detector_batch.py, only found in output files from the deprecated Azure Batch API)
207
+ * md_v4.1.0.pb
208
+ * md_v5a.0.0.pt
209
+ * md_v5b.0.0.pt
210
+
211
+ This function identifies the version number as "v2.0.0", "v3.0.0", "v4.1.0",
212
212
  "v4.1.0", "v5a.0.0", and "v5b.0.0", respectively.
213
+
214
+ Args:
215
+ detector_filename (str): model filename, e.g. c:/x/z/md_v5a.0.0.pt
216
+
217
+ Returns:
218
+ str: a detector version string, e.g. "v5a.0.0", or "multiple" if I'm confused
213
219
  """
214
220
 
215
221
  fn = os.path.basename(detector_filename).lower()
@@ -228,10 +234,20 @@ def get_detector_version_from_filename(detector_filename):
228
234
 
229
235
 
230
236
  def estimate_md_images_per_second(model_file, device_name=None):
231
- """
232
- Estimate how fast MegaDetector will run based on benchmarks. Defaults to querying
237
+ r"""
238
+ Estimates how fast MegaDetector will run, based on benchmarks. Defaults to querying
233
239
  the current device. Returns None if no data is available for the current card/model.
234
- Estimates only available for a small handful of GPUs.
240
+ Estimates only available for a small handful of GPUs. Uses an absurdly simple lookup
241
+ approach, e.g. if the string "4090" appears in the device name, congratulations,
242
+ you have an RTX 4090.
243
+
244
+ Args:
245
+ model_file (str): model filename, e.g. c:/x/z/md_v5a.0.0.pt
246
+ device_name (str, optional): device name, e.g. blah-blah-4090-blah-blah
247
+
248
+ Returns:
249
+ float: the approximate number of images this model version can process on this
250
+ device per second
235
251
  """
236
252
 
237
253
  if device_name is None:
@@ -271,8 +287,14 @@ def estimate_md_images_per_second(model_file, device_name=None):
271
287
 
272
288
  def get_typical_confidence_threshold_from_results(results):
273
289
  """
274
- Given the .json data loaded from a MD results file, determine a typical confidence
290
+ Given the .json data loaded from a MD results file, returns a typical confidence
275
291
  threshold based on the detector version.
292
+
293
+ Args:
294
+ results (dict): a dict of MD results, as it would be loaded from a MD results .json file
295
+
296
+ Returns:
297
+ float: a sensible default threshold for this model
276
298
  """
277
299
 
278
300
  if 'detector_metadata' in results['info'] and \
@@ -293,10 +315,16 @@ def get_typical_confidence_threshold_from_results(results):
293
315
 
294
316
 
295
317
  def is_gpu_available(model_file):
296
- """
297
- Decide whether a GPU is available, importing PyTorch or TF depending on the extension
318
+ r"""
319
+ Determines whether a GPU is available, importing PyTorch or TF depending on the extension
298
320
  of model_file. Does not actually load model_file, just uses that to determine how to check
299
- for GPU availability.
321
+ for GPU availability (PT vs. TF).
322
+
323
+ Args:
324
+ model_file (str): model filename, e.g. c:/x/z/md_v5a.0.0.pt
325
+
326
+ Returns:
327
+ bool: whether a GPU is available
300
328
  """
301
329
 
302
330
  if model_file.endswith('.pb'):
@@ -323,8 +351,14 @@ def is_gpu_available(model_file):
323
351
 
324
352
 
325
353
  def load_detector(model_file, force_cpu=False):
326
- """
327
- Load a TF or PT detector, depending on the extension of model_file.
354
+ r"""
355
+ Loads a TF or PT detector, depending on the extension of model_file.
356
+
357
+ Args:
358
+ model_file (str): model filename, e.g. c:/x/z/md_v5a.0.0.pt
359
+
360
+ Returns:
361
+ object: loaded detector object
328
362
  """
329
363
 
330
364
  # Possibly automatically download the model
@@ -344,19 +378,41 @@ def load_detector(model_file, force_cpu=False):
344
378
  raise ValueError('Unrecognized model format: {}'.format(model_file))
345
379
  elapsed = time.time() - start_time
346
380
  print('Loaded model in {}'.format(humanfriendly.format_timespan(elapsed)))
381
+
347
382
  return detector
348
383
 
349
384
 
350
385
  #%% Main function
351
386
 
352
- def load_and_run_detector(model_file, image_file_names, output_dir,
387
+ def load_and_run_detector(model_file,
388
+ image_file_names,
389
+ output_dir,
353
390
  render_confidence_threshold=DEFAULT_RENDERING_CONFIDENCE_THRESHOLD,
354
- crop_images=False, box_thickness=DEFAULT_BOX_THICKNESS,
355
- box_expansion=DEFAULT_BOX_EXPANSION, image_size=None,
356
- label_font_size=DEFAULT_LABEL_FONT_SIZE
391
+ crop_images=False,
392
+ box_thickness=DEFAULT_BOX_THICKNESS,
393
+ box_expansion=DEFAULT_BOX_EXPANSION,
394
+ image_size=None,
395
+ label_font_size=DEFAULT_LABEL_FONT_SIZE
357
396
  ):
358
- """
359
- Load and run detector on target images, and visualize the results.
397
+ r"""
398
+ Loads and runs a detector on target images, and visualizes the results.
399
+
400
+ Args:
401
+ model_file (str): model filename, e.g. c:/x/z/md_v5a.0.0.pt, or a known model
402
+ string, e.g. "MDV5A"
403
+ image_file_names (list): list of absolute paths to process
404
+ output_dir (str): folder to write visualized images to
405
+ render_confidence_threshold (float, optional): only render boxes for detections
406
+ above this threshold
407
+ crop_images (bool, optional): whether to crop detected objects to individual images
408
+ (default is to render images with boxes, rather than cropping)
409
+ box_thickness (float, optional): thickness in pixels for box rendering
410
+ box_expansion (float, optional): box expansion in pixels
411
+ image_size (tuple, optional): image size to use for inference, only mess with this
412
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
413
+ doing
414
+ label_font_size (float, optional): font size to use for displaying class names
415
+ and confidence values in the rendered images
360
416
  """
361
417
 
362
418
  if len(image_file_names) == 0:
@@ -507,7 +563,12 @@ def load_and_run_detector(model_file, image_file_names, output_dir,
507
563
 
508
564
  def download_model(model_name,force_download=False):
509
565
  """
510
- Download one of the known models to local temp space if it hasn't already been downloaded
566
+ Downloads one of the known models to local temp space if it hasn't already been downloaded.
567
+
568
+ Args:
569
+ model_name (str): a known model string, e.g. "MDV5A"
570
+ force_download (bool, optional): whether download the model even if the local target
571
+ file already exists
511
572
  """
512
573
 
513
574
  import tempfile
@@ -536,9 +597,17 @@ def download_model(model_name,force_download=False):
536
597
 
537
598
  def try_download_known_detector(detector_file):
538
599
  """
539
- Check whether detector_file is really the name of a known model, in which case we will
600
+ Checks whether detector_file is really the name of a known model, in which case we will
540
601
  either read the actual filename from the corresponding environment variable or download
541
602
  (if necessary) to local temp space. Otherwise just returns the input string.
603
+
604
+ Args:
605
+ detector_file (str): a known model string (e.g. "MDV5A"), or any other string (in which
606
+ case this function is a no-op)
607
+
608
+ Returns:
609
+ str: the local filename to which the model was downloaded, or the same string that
610
+ was passed in, if it's not recognized as a well-known model name
542
611
  """
543
612
 
544
613
  if detector_file in downloadable_models:
@@ -606,7 +675,7 @@ def main():
606
675
  parser.add_argument(
607
676
  '--crop',
608
677
  default=False,
609
- action="store_true",
678
+ action='store_true',
610
679
  help=('If set, produces separate output images for each crop, '
611
680
  'rather than adding bounding boxes to the original image'))
612
681
 
@@ -630,7 +699,14 @@ def main():
630
699
  default=DEFAULT_LABEL_FONT_SIZE,
631
700
  help=('Label font size (defaults to {})'.format(
632
701
  DEFAULT_LABEL_FONT_SIZE)))
633
-
702
+
703
+ parser.add_argument(
704
+ '--process_likely_output_images',
705
+ action='store_true',
706
+ help=('By default, we skip images that end in {}, because they probably came from this script. '\
707
+ .format(DETECTION_FILENAME_INSERT) + \
708
+ 'This option disables that behavior.'))
709
+
634
710
  if len(sys.argv[1:]) == 0:
635
711
  parser.print_help()
636
712
  parser.exit()
@@ -650,6 +726,16 @@ def main():
650
726
  else:
651
727
  image_file_names = path_utils.find_images(args.image_dir, args.recursive)
652
728
 
729
+ # Optionally skip images that were probably generated by this script
730
+ if not args.process_likely_output_images:
731
+ image_file_names_valid = []
732
+ for fn in image_file_names:
733
+ if os.path.splitext(fn)[0].endswith(DETECTION_FILENAME_INSERT):
734
+ print('Skipping likely output image {}'.format(fn))
735
+ else:
736
+ image_file_names_valid.append(fn)
737
+ image_file_names = image_file_names_valid
738
+
653
739
  print('Running detector on {} images...'.format(len(image_file_names)))
654
740
 
655
741
  if args.output_dir:
@@ -671,7 +757,6 @@ def main():
671
757
  image_size=args.image_size,
672
758
  label_font_size=args.label_font_size)
673
759
 
674
-
675
760
  if __name__ == '__main__':
676
761
  main()
677
762