megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
@@ -1,38 +1,43 @@
1
- ########
2
- #
3
- # run_detector_batch.py
4
- #
5
- # Module to run MegaDetector on lots of images, writing the results
6
- # to a file in the same format produced by our batch API:
7
- #
8
- # https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing
9
- #
10
- # This enables the results to be used in our post-processing pipeline; see
11
- # api/batch_processing/postprocessing/postprocess_batch_results.py .
12
- #
13
- # This script can save results to checkpoints intermittently, in case disaster
14
- # strikes. To enable this, set --checkpoint_frequency to n > 0, and results
15
- # will be saved as a checkpoint every n images. Checkpoints will be written
16
- # to a file in the same directory as the output_file, and after all images
17
- # are processed and final results file written to output_file, the temporary
18
- # checkpoint file will be deleted. If you want to resume from a checkpoint, set
19
- # the checkpoint file's path using --resume_from_checkpoint.
20
- #
21
- # The `threshold` you can provide as an argument is the confidence threshold above
22
- # which detections will be included in the output file.
23
- #
24
- # Has preliminary multiprocessing support for CPUs only; if a GPU is available, it will
25
- # use the GPU instead of CPUs, and the --ncores option will be ignored. Checkpointing
26
- # is not supported when using a GPU.
27
- #
28
- # Does not have a command-line option to bind the process to a particular GPU, but you can
29
- # prepend with "CUDA_VISIBLE_DEVICES=0 ", for example, to bind to GPU 0, e.g.:
30
- #
31
- # CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
32
- #
33
- # You can disable GPU processing entirely by setting CUDA_VISIBLE_DEVICES=''.
34
- #
35
- ########
1
+ """
2
+
3
+ run_detector_batch.py
4
+
5
+ Module to run MegaDetector on lots of images, writing the results
6
+ to a file in the MegaDetector results format.
7
+
8
+ https://github.com/agentmorris/MegaDetector/tree/main/api/batch_processing#megadetector-batch-output-format
9
+
10
+ This enables the results to be used in our post-processing pipeline; see
11
+ api/batch_processing/postprocessing/postprocess_batch_results.py .
12
+
13
+ This script can save results to checkpoints intermittently, in case disaster
14
+ strikes. To enable this, set --checkpoint_frequency to n > 0, and results
15
+ will be saved as a checkpoint every n images. Checkpoints will be written
16
+ to a file in the same directory as the output_file, and after all images
17
+ are processed and final results file written to output_file, the temporary
18
+ checkpoint file will be deleted. If you want to resume from a checkpoint, set
19
+ the checkpoint file's path using --resume_from_checkpoint.
20
+
21
+ The `threshold` you can provide as an argument is the confidence threshold above
22
+ which detections will be included in the output file.
23
+
24
+ Has multiprocessing support for CPUs only; if a GPU is available, it will
25
+ use the GPU instead of CPUs, and the --ncores option will be ignored. Checkpointing
26
+ is not supported when using a GPU.
27
+
28
+ The lack of GPU multiprocessing support might sound annoying, but in practice we
29
+ run a gazillion MegaDetector images on multiple GPUs using this script, we just only use
30
+ one GPU *per invocation of this script*. Dividing a big batch of images into one chunk
31
+ per GPU happens outside of this script.
32
+
33
+ Does not have a command-line option to bind the process to a particular GPU, but you can
34
+ prepend with "CUDA_VISIBLE_DEVICES=0 ", for example, to bind to GPU 0, e.g.:
35
+
36
+ CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
37
+
38
+ You can disable GPU processing entirely by setting CUDA_VISIBLE_DEVICES=''.
39
+
40
+ """
36
41
 
37
42
  #%% Constants, imports, environment
38
43
 
@@ -91,7 +96,7 @@ exif_options.byte_handling = 'convert_to_string'
91
96
 
92
97
  #%% Support functions for multiprocessing
93
98
 
94
- def producer_func(q,image_files):
99
+ def _producer_func(q,image_files):
95
100
  """
96
101
  Producer function; only used when using the (optional) image queue.
97
102
 
@@ -120,7 +125,7 @@ def producer_func(q,image_files):
120
125
  print('Finished image loading'); sys.stdout.flush()
121
126
 
122
127
 
123
- def consumer_func(q,return_queue,model_file,confidence_threshold,image_size=None):
128
+ def _consumer_func(q,return_queue,model_file,confidence_threshold,image_size=None):
124
129
  """
125
130
  Consumer function; only used when using the (optional) image queue.
126
131
 
@@ -177,15 +182,28 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
177
182
  when --use_image_queue is specified. Starts a reader process to read images from disk, but
178
183
  processes images in the process from which this function is called (i.e., does not currently
179
184
  spawn a separate consumer process).
185
+
186
+ Args:
187
+ image_files (str): list of absolute paths to images
188
+ model_file (str): filename or model identifier (e.g. "MDV5A")
189
+ confidence_threshold (float): minimum confidence detection to include in
190
+ output
191
+ quiet (bool, optional): suppress per-image console printouts
192
+ image_size (tuple, optional): image size to use for inference, only mess with this
193
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
194
+ doing
195
+
196
+ Returns:
197
+ list: list of dicts in the format returned by process_image()
180
198
  """
181
199
 
182
200
  q = multiprocessing.JoinableQueue(max_queue_size)
183
201
  return_queue = multiprocessing.Queue(1)
184
202
 
185
203
  if use_threads_for_queue:
186
- producer = Thread(target=producer_func,args=(q,image_files,))
204
+ producer = Thread(target=_producer_func,args=(q,image_files,))
187
205
  else:
188
- producer = Process(target=producer_func,args=(q,image_files,))
206
+ producer = Process(target=_producer_func,args=(q,image_files,))
189
207
  producer.daemon = False
190
208
  producer.start()
191
209
 
@@ -199,15 +217,15 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
199
217
 
200
218
  if run_separate_consumer_process:
201
219
  if use_threads_for_queue:
202
- consumer = Thread(target=consumer_func,args=(q,return_queue,model_file,
220
+ consumer = Thread(target=_consumer_func,args=(q,return_queue,model_file,
203
221
  confidence_threshold,image_size,))
204
222
  else:
205
- consumer = Process(target=consumer_func,args=(q,return_queue,model_file,
223
+ consumer = Process(target=_consumer_func,args=(q,return_queue,model_file,
206
224
  confidence_threshold,image_size,))
207
225
  consumer.daemon = True
208
226
  consumer.start()
209
227
  else:
210
- consumer_func(q,return_queue,model_file,confidence_threshold,image_size)
228
+ _consumer_func(q,return_queue,model_file,confidence_threshold,image_size)
211
229
 
212
230
  producer.join()
213
231
  print('Producer finished')
@@ -226,13 +244,15 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
226
244
 
227
245
  #%% Other support functions
228
246
 
229
- def chunks_by_number_of_chunks(ls, n):
247
+ def _chunks_by_number_of_chunks(ls, n):
230
248
  """
231
249
  Splits a list into n even chunks.
250
+
251
+ External callers should use ct_utils.split_list_into_n_chunks().
232
252
 
233
- Args
234
- - ls: list
235
- - n: int, # of chunks
253
+ Args:
254
+ ls (list): list to break up into chunks
255
+ n (int): number of chunks
236
256
  """
237
257
 
238
258
  for i in range(0, n):
@@ -242,19 +262,31 @@ def chunks_by_number_of_chunks(ls, n):
242
262
  #%% Image processing functions
243
263
 
244
264
  def process_images(im_files, detector, confidence_threshold, use_image_queue=False,
245
- quiet=False, image_size=None, checkpoint_queue=None, include_image_size=False,
246
- include_image_timestamp=False, include_exif_data=False):
265
+ quiet=False, image_size=None, checkpoint_queue=None,
266
+ include_image_size=False, include_image_timestamp=False,
267
+ include_exif_data=False):
247
268
  """
248
- Runs MegaDetector over a list of image files. As of 3/2024, this entry point is used when the
249
- image queue is enabled, but not in the standard inference path (which loops over process_image()).
250
-
251
- Args
252
- - im_files: list of str, paths to image files
253
- - detector: loaded model or str (path to .pb/.pt model file)
254
- - confidence_threshold: float, only detections above this threshold are returned
269
+ Runs a detector (typically MegaDetector) over a list of image files.
270
+ As of 3/2024, this entry point is used when the image queue is enabled, but not in the
271
+ standard inference path (which instead loops over process_image()).
272
+
273
+ Args:
274
+ im_files (list: paths to image files
275
+ detector (str or detector object): loaded model or str; if this is a string, it can be a
276
+ path to a .pb/.pt model file or a known model identifier (e.g. "MDV5A")
277
+ confidence_threshold (float): only detections above this threshold are returned
278
+ use_image_queue (bool, optional): separate image loading onto a dedicated worker process
279
+ quiet (bool, optional): suppress per-image printouts
280
+ image_size (tuple, optional): image size to use for inference, only mess with this
281
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
282
+ doing
283
+ checkpoint_queue (Queue, optional): internal parameter used to pass image queues around
284
+ include_image_size (bool, optional): should we include image size in the output for each image?
285
+ include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
286
+ include_exif_data (bool, optional): should we include EXIF data in the output for each image?
255
287
 
256
- Returns
257
- - results: list of dict, each dict represents detections on one image
288
+ Returns:
289
+ list: list of dicts, in which each dict represents detections on one image,
258
290
  see the 'images' key in https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
259
291
  """
260
292
 
@@ -293,17 +325,26 @@ def process_image(im_file, detector, confidence_threshold, image=None,
293
325
  include_image_timestamp=False, include_exif_data=False,
294
326
  skip_image_resizing=False):
295
327
  """
296
- Runs MegaDetector on a single image file.
297
-
298
- Args
299
- - im_file: str, path to image file
300
- - detector: loaded model
301
- - confidence_threshold: float, only detections above this threshold are returned
302
- - image: previously-loaded image, if available
303
- - skip_image_resizing: whether to skip internal image resizing and rely on external resizing
328
+ Runs a detector (typically MegaDetector) on a single image file.
329
+
330
+ Args:
331
+ im_file (str): path to image file
332
+ detector (detector object): loaded model, this can no longer be a string by the time
333
+ you get this far down the pipeline
334
+ confidence_threshold (float): only detections above this threshold are returned
335
+ image (Image, optional): previously-loaded image, if available, used when a worker
336
+ thread is handling image loads
337
+ quiet (bool, optional): suppress per-image printouts
338
+ image_size (tuple, optional): image size to use for inference, only mess with this
339
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
340
+ doing
341
+ include_image_size (bool, optional): should we include image size in the output for each image?
342
+ include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
343
+ include_exif_data (bool, optional): should we include EXIF data in the output for each image?
344
+ skip_image_resizing (bool, optional): whether to skip internal image resizing and rely on external resizing
304
345
 
305
346
  Returns:
306
- - result: dict representing detections on one image
347
+ dict: dict representing detections on one image,
307
348
  see the 'images' key in
308
349
  https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
309
350
  """
@@ -351,7 +392,7 @@ def process_image(im_file, detector, confidence_threshold, image=None,
351
392
  # ...def process_image(...)
352
393
 
353
394
 
354
- def load_custom_class_mapping(class_mapping_filename):
395
+ def _load_custom_class_mapping(class_mapping_filename):
355
396
  """
356
397
  This is an experimental hack to allow the use of non-MD YOLOv5 models through
357
398
  the same infrastructure; it disables the code that enforces MDv5-like class lists.
@@ -389,34 +430,50 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
389
430
  class_mapping_filename=None, include_image_size=False,
390
431
  include_image_timestamp=False, include_exif_data=False):
391
432
  """
392
- Args
393
- - model_file: path to model file, or supported model string (e.g. "MDV5A")
394
- - image_file_names: list of strings (image filenames), a single image filename,
395
- a folder to recursively search for images in, or a .json or .txt file
396
- containing a list of images.
397
- - checkpoint_path: str, path to JSON checkpoint file
398
- - confidence_threshold: float, only detections above this threshold are returned
399
- - checkpoint_frequency: int, write results to JSON checkpoint file every N images
400
- - results: list of dict, existing results loaded from checkpoint
401
- - n_cores: int, # of CPU cores to use
402
- - class_mapping_filename: str, use a non-default class mapping supplied in a .json file
403
- or YOLOv5 dataset.yaml file.
404
-
405
- Returns
406
- - results: list of dicts; each dict represents detections on one image
433
+ Load a model file and run it on a list of images.
434
+
435
+ Args:
436
+
437
+ model_file (str): path to model file, or supported model string (e.g. "MDV5A")
438
+ image_file_names (list or str): list of strings (image filenames), a single image filename,
439
+ a folder to recursively search for images in, or a .json or .txt file containing a list
440
+ of images.
441
+ checkpoint_path (str, optional), path to use for checkpoints (if None, checkpointing
442
+ is disabled)
443
+ confidence_threshold (float, optional): only detections above this threshold are returned
444
+ checkpoint_frequency (int, optional): int, write results to JSON checkpoint file every N
445
+ images, -1 disabled checkpointing
446
+ results (list, optional): list of dicts, existing results loaded from checkpoint; generally
447
+ not useful if you're using this function outside of the CLI
448
+ n_cores (int, optional): number of parallel worker to use, ignored if we're running on a GPU
449
+ use_image_queue (bool, optional): use a dedicated worker for image loading
450
+ quiet (bool, optional): disable per-image console output
451
+ image_size (tuple, optional): image size to use for inference, only mess with this
452
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
453
+ doing
454
+ class_mapping_filename (str, optional), use a non-default class mapping supplied in a .json
455
+ file or YOLOv5 dataset.yaml file
456
+ include_image_size (bool, optional): should we include image size in the output for each image?
457
+ include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
458
+ include_exif_data (bool, optional): should we include EXIF data in the output for each image?
459
+
460
+ Returns:
461
+ results: list of dicts; each dict represents detections on one image
407
462
  """
408
463
 
464
+ # Validate input arguments
409
465
  if n_cores is None:
410
466
  n_cores = 1
411
467
 
412
468
  if confidence_threshold is None:
413
469
  confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
414
-
415
- if checkpoint_frequency is None:
470
+
471
+ # Disable checkpointing if checkpoint_path is None
472
+ if checkpoint_frequency is None or checkpoint_path is None:
416
473
  checkpoint_frequency = -1
417
474
 
418
475
  if class_mapping_filename is not None:
419
- load_custom_class_mapping(class_mapping_filename)
476
+ _load_custom_class_mapping(class_mapping_filename)
420
477
 
421
478
  # Handle the case where image_file_names is not yet actually a list
422
479
  if isinstance(image_file_names,str):
@@ -451,7 +508,8 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
451
508
  list_file))
452
509
  else:
453
510
  raise ValueError(
454
- '{} supplied as [image_file_names] argument, but it does not appear to be a file or folder')
511
+ '{} supplied as [image_file_names] argument, but it does not appear to be a file or folder'.format(
512
+ image_file_names))
455
513
 
456
514
  if results is None:
457
515
  results = []
@@ -515,12 +573,12 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
515
573
  results.append(result)
516
574
 
517
575
  # Write a checkpoint if necessary
518
- if checkpoint_frequency != -1 and count % checkpoint_frequency == 0:
576
+ if (checkpoint_frequency != -1) and ((count % checkpoint_frequency) == 0):
519
577
 
520
578
  print('Writing a new checkpoint after having processed {} images since '
521
579
  'last restart'.format(count))
522
580
 
523
- write_checkpoint(checkpoint_path, results)
581
+ _write_checkpoint(checkpoint_path, results)
524
582
 
525
583
  else:
526
584
 
@@ -540,7 +598,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
540
598
  len(already_processed),n_images_all))
541
599
 
542
600
  # Divide images into chunks; we'll send one chunk to each worker process
543
- image_batches = list(chunks_by_number_of_chunks(image_file_names, n_cores))
601
+ image_batches = list(_chunks_by_number_of_chunks(image_file_names, n_cores))
544
602
 
545
603
  pool = workerpool(n_cores)
546
604
 
@@ -553,7 +611,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
553
611
  # Pass the "results" array (which may already contain images loaded from an existing
554
612
  # checkpoint) to the checkpoint queue handler function, which will append results to
555
613
  # the list as they become available.
556
- checkpoint_thread = Thread(target=checkpoint_queue_handler,
614
+ checkpoint_thread = Thread(target=_checkpoint_queue_handler,
557
615
  args=(checkpoint_path, checkpoint_frequency,
558
616
  checkpoint_queue, results), daemon=True)
559
617
  checkpoint_thread.start()
@@ -597,7 +655,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
597
655
  # ...def load_and_run_detector_batch(...)
598
656
 
599
657
 
600
- def checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_queue, results):
658
+ def _checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_queue, results):
601
659
  """
602
660
  Thread function to accumulate results and write checkpoints when checkpointing and
603
661
  multiprocessing are both enabled.
@@ -617,15 +675,15 @@ def checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_q
617
675
  print('Writing a new checkpoint after having processed {} images since '
618
676
  'last restart'.format(result_count))
619
677
 
620
- write_checkpoint(checkpoint_path, results)
678
+ _write_checkpoint(checkpoint_path, results)
621
679
 
622
680
 
623
- def write_checkpoint(checkpoint_path, results):
681
+ def _write_checkpoint(checkpoint_path, results):
624
682
  """
625
683
  Writes the 'images' field in the dict 'results' to a json checkpoint file.
626
684
  """
627
685
 
628
- assert checkpoint_path is not None
686
+ assert checkpoint_path is not None
629
687
 
630
688
  # Back up any previous checkpoints, to protect against crashes while we're writing
631
689
  # the checkpoint file.
@@ -645,9 +703,14 @@ def write_checkpoint(checkpoint_path, results):
645
703
 
646
704
  def get_image_datetime(image):
647
705
  """
648
- Returns the EXIF datetime from [image] (a PIL Image object), if available, as a string.
706
+ Reads EXIF datetime from a PIL Image object.
649
707
 
650
- [im_file] is used only for error reporting.
708
+ Args:
709
+ image (Image): the PIL Image object from which we should read datetime information
710
+
711
+ Returns:
712
+ str: the EXIF datetime from [image] (a PIL Image object), if available, as a string;
713
+ returns None if EXIF datetime is not available.
651
714
  """
652
715
 
653
716
  exif_tags = read_exif.read_pil_exif(image,exif_options)
@@ -669,20 +732,24 @@ def write_results_to_file(results, output_file, relative_path_base=None,
669
732
 
670
733
  https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
671
734
 
672
- Args
673
- - results: list of dict, each dict represents detections on one image
674
- - output_file: str, path to JSON output file, should end in '.json'
675
- - relative_path_base: str, path to a directory as the base for relative paths
676
- - detector_file: filename of the detector used to generate these results, only
677
- used to pull out a version number for the "info" field
678
- - info: dictionary to use instead of the default "info" field
679
- - include_max_conf: old files (version 1.2 and earlier) included a "max_conf" field
680
- in each image; this was removed in version 1.3. Set this flag to force the inclusion
681
- of this field.
682
- - custom_metadata: additional data to include as info['custom_metadata']. Typically
683
- a dictionary, but no format checks are performed.
684
-
685
- Returns the complete output dictionary that was written to the output file.
735
+ Args:
736
+ results (list): list of dict, each dict represents detections on one image
737
+ output_file (str): path to JSON output file, should end in '.json'
738
+ relative_path_base (str, optional): path to a directory as the base for relative paths, can
739
+ be None if the paths in [results] are absolute
740
+ detector_file (str, optional): filename of the detector used to generate these results, only
741
+ used to pull out a version number for the "info" field
742
+ info (dict, optional): dictionary to put in the results file instead of the default "info" field
743
+ include_max_conf (bool, optional): old files (version 1.2 and earlier) included a "max_conf" field
744
+ in each image; this was removed in version 1.3. Set this flag to force the inclusion
745
+ of this field.
746
+ custom_metadata (object, optional): additional data to include as info['custom_metadata']; typically
747
+ a dictionary, but no type/format checks are performed
748
+ force_forward_slashes (bool, optional): convert all slashes in filenames within [results] to
749
+ forward slashes
750
+
751
+ Returns:
752
+ dict: the MD-formatted dictionary that was written to [output_file]
686
753
  """
687
754
 
688
755
  if relative_path_base is not None:
@@ -997,7 +1064,7 @@ def main():
997
1064
  assert not os.path.isdir(args.output_file), 'Specified output file is a directory'
998
1065
 
999
1066
  if args.class_mapping_filename is not None:
1000
- load_custom_class_mapping(args.class_mapping_filename)
1067
+ _load_custom_class_mapping(args.class_mapping_filename)
1001
1068
 
1002
1069
  # Load the checkpoint if available
1003
1070
  #
@@ -1146,8 +1213,7 @@ def main():
1146
1213
  os.remove(checkpoint_path)
1147
1214
  print('Deleted checkpoint file {}'.format(checkpoint_path))
1148
1215
 
1149
- print('Done!')
1150
-
1216
+ print('Done, thanks for MegaDetect\'ing!')
1151
1217
 
1152
1218
  if __name__ == '__main__':
1153
1219
  main()