megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,38 +1,43 @@
1
- ########
2
- #
3
- # run_detector_batch.py
4
- #
5
- # Module to run MegaDetector on lots of images, writing the results
6
- # to a file in the same format produced by our batch API:
7
- #
8
- # https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing
9
- #
10
- # This enables the results to be used in our post-processing pipeline; see
11
- # api/batch_processing/postprocessing/postprocess_batch_results.py .
12
- #
13
- # This script can save results to checkpoints intermittently, in case disaster
14
- # strikes. To enable this, set --checkpoint_frequency to n > 0, and results
15
- # will be saved as a checkpoint every n images. Checkpoints will be written
16
- # to a file in the same directory as the output_file, and after all images
17
- # are processed and final results file written to output_file, the temporary
18
- # checkpoint file will be deleted. If you want to resume from a checkpoint, set
19
- # the checkpoint file's path using --resume_from_checkpoint.
20
- #
21
- # The `threshold` you can provide as an argument is the confidence threshold above
22
- # which detections will be included in the output file.
23
- #
24
- # Has preliminary multiprocessing support for CPUs only; if a GPU is available, it will
25
- # use the GPU instead of CPUs, and the --ncores option will be ignored. Checkpointing
26
- # is not supported when using a GPU.
27
- #
28
- # Does not have a command-line option to bind the process to a particular GPU, but you can
29
- # prepend with "CUDA_VISIBLE_DEVICES=0 ", for example, to bind to GPU 0, e.g.:
30
- #
31
- # CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
32
- #
33
- # You can disable GPU processing entirely by setting CUDA_VISIBLE_DEVICES=''.
34
- #
35
- ########
1
+ """
2
+
3
+ run_detector_batch.py
4
+
5
+ Module to run MegaDetector on lots of images, writing the results
6
+ to a file in the MegaDetector results format.
7
+
8
+ https://github.com/agentmorris/MegaDetector/tree/main/api/batch_processing#megadetector-batch-output-format
9
+
10
+ This enables the results to be used in our post-processing pipeline; see
11
+ api/batch_processing/postprocessing/postprocess_batch_results.py .
12
+
13
+ This script can save results to checkpoints intermittently, in case disaster
14
+ strikes. To enable this, set --checkpoint_frequency to n > 0, and results
15
+ will be saved as a checkpoint every n images. Checkpoints will be written
16
+ to a file in the same directory as the output_file, and after all images
17
+ are processed and final results file written to output_file, the temporary
18
+ checkpoint file will be deleted. If you want to resume from a checkpoint, set
19
+ the checkpoint file's path using --resume_from_checkpoint.
20
+
21
+ The `threshold` you can provide as an argument is the confidence threshold above
22
+ which detections will be included in the output file.
23
+
24
+ Has multiprocessing support for CPUs only; if a GPU is available, it will
25
+ use the GPU instead of CPUs, and the --ncores option will be ignored. Checkpointing
26
+ is not supported when using a GPU.
27
+
28
+ The lack of GPU multiprocessing support might sound annoying, but in practice we
29
+ run a gazillion MegaDetector images on multiple GPUs using this script, we just only use
30
+ one GPU *per invocation of this script*. Dividing a big batch of images into one chunk
31
+ per GPU happens outside of this script.
32
+
33
+ Does not have a command-line option to bind the process to a particular GPU, but you can
34
+ prepend with "CUDA_VISIBLE_DEVICES=0 ", for example, to bind to GPU 0, e.g.:
35
+
36
+ CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
37
+
38
+ You can disable GPU processing entirely by setting CUDA_VISIBLE_DEVICES=''.
39
+
40
+ """
36
41
 
37
42
  #%% Constants, imports, environment
38
43
 
@@ -91,7 +96,7 @@ exif_options.byte_handling = 'convert_to_string'
91
96
 
92
97
  #%% Support functions for multiprocessing
93
98
 
94
- def producer_func(q,image_files):
99
+ def _producer_func(q,image_files):
95
100
  """
96
101
  Producer function; only used when using the (optional) image queue.
97
102
 
@@ -120,7 +125,7 @@ def producer_func(q,image_files):
120
125
  print('Finished image loading'); sys.stdout.flush()
121
126
 
122
127
 
123
- def consumer_func(q,return_queue,model_file,confidence_threshold,image_size=None):
128
+ def _consumer_func(q,return_queue,model_file,confidence_threshold,image_size=None):
124
129
  """
125
130
  Consumer function; only used when using the (optional) image queue.
126
131
 
@@ -177,15 +182,28 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
177
182
  when --use_image_queue is specified. Starts a reader process to read images from disk, but
178
183
  processes images in the process from which this function is called (i.e., does not currently
179
184
  spawn a separate consumer process).
185
+
186
+ Args:
187
+ image_files (str): list of absolute paths to images
188
+ model_file (str): filename or model identifier (e.g. "MDV5A")
189
+ confidence_threshold (float): minimum confidence detection to include in
190
+ output
191
+ quiet (bool, optional): suppress per-image console printouts
192
+ image_size (tuple, optional): image size to use for inference, only mess with this
193
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
194
+ doing
195
+
196
+ Returns:
197
+ list: list of dicts in the format returned by process_image()
180
198
  """
181
199
 
182
200
  q = multiprocessing.JoinableQueue(max_queue_size)
183
201
  return_queue = multiprocessing.Queue(1)
184
202
 
185
203
  if use_threads_for_queue:
186
- producer = Thread(target=producer_func,args=(q,image_files,))
204
+ producer = Thread(target=_producer_func,args=(q,image_files,))
187
205
  else:
188
- producer = Process(target=producer_func,args=(q,image_files,))
206
+ producer = Process(target=_producer_func,args=(q,image_files,))
189
207
  producer.daemon = False
190
208
  producer.start()
191
209
 
@@ -199,15 +217,15 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
199
217
 
200
218
  if run_separate_consumer_process:
201
219
  if use_threads_for_queue:
202
- consumer = Thread(target=consumer_func,args=(q,return_queue,model_file,
220
+ consumer = Thread(target=_consumer_func,args=(q,return_queue,model_file,
203
221
  confidence_threshold,image_size,))
204
222
  else:
205
- consumer = Process(target=consumer_func,args=(q,return_queue,model_file,
223
+ consumer = Process(target=_consumer_func,args=(q,return_queue,model_file,
206
224
  confidence_threshold,image_size,))
207
225
  consumer.daemon = True
208
226
  consumer.start()
209
227
  else:
210
- consumer_func(q,return_queue,model_file,confidence_threshold,image_size)
228
+ _consumer_func(q,return_queue,model_file,confidence_threshold,image_size)
211
229
 
212
230
  producer.join()
213
231
  print('Producer finished')
@@ -226,13 +244,15 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
226
244
 
227
245
  #%% Other support functions
228
246
 
229
- def chunks_by_number_of_chunks(ls, n):
247
+ def _chunks_by_number_of_chunks(ls, n):
230
248
  """
231
249
  Splits a list into n even chunks.
250
+
251
+ External callers should use ct_utils.split_list_into_n_chunks().
232
252
 
233
- Args
234
- - ls: list
235
- - n: int, # of chunks
253
+ Args:
254
+ ls (list): list to break up into chunks
255
+ n (int): number of chunks
236
256
  """
237
257
 
238
258
  for i in range(0, n):
@@ -242,18 +262,31 @@ def chunks_by_number_of_chunks(ls, n):
242
262
  #%% Image processing functions
243
263
 
244
264
  def process_images(im_files, detector, confidence_threshold, use_image_queue=False,
245
- quiet=False, image_size=None, checkpoint_queue=None, include_image_size=False,
246
- include_image_timestamp=False, include_exif_data=False):
265
+ quiet=False, image_size=None, checkpoint_queue=None,
266
+ include_image_size=False, include_image_timestamp=False,
267
+ include_exif_data=False):
247
268
  """
248
- Runs MegaDetector over a list of image files.
249
-
250
- Args
251
- - im_files: list of str, paths to image files
252
- - detector: loaded model or str (path to .pb/.pt model file)
253
- - confidence_threshold: float, only detections above this threshold are returned
269
+ Runs a detector (typically MegaDetector) over a list of image files.
270
+ As of 3/2024, this entry point is used when the image queue is enabled, but not in the
271
+ standard inference path (which instead loops over process_image()).
272
+
273
+ Args:
274
+ im_files (list: paths to image files
275
+ detector (str or detector object): loaded model or str; if this is a string, it can be a
276
+ path to a .pb/.pt model file or a known model identifier (e.g. "MDV5A")
277
+ confidence_threshold (float): only detections above this threshold are returned
278
+ use_image_queue (bool, optional): separate image loading onto a dedicated worker process
279
+ quiet (bool, optional): suppress per-image printouts
280
+ image_size (tuple, optional): image size to use for inference, only mess with this
281
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
282
+ doing
283
+ checkpoint_queue (Queue, optional): internal parameter used to pass image queues around
284
+ include_image_size (bool, optional): should we include image size in the output for each image?
285
+ include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
286
+ include_exif_data (bool, optional): should we include EXIF data in the output for each image?
254
287
 
255
- Returns
256
- - results: list of dict, each dict represents detections on one image
288
+ Returns:
289
+ list: list of dicts, in which each dict represents detections on one image,
257
290
  see the 'images' key in https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
258
291
  """
259
292
 
@@ -269,7 +302,7 @@ def process_images(im_files, detector, confidence_threshold, use_image_queue=Fal
269
302
  include_image_size=include_image_size,
270
303
  include_image_timestamp=include_image_timestamp,
271
304
  include_exif_data=include_exif_data)
272
- else:
305
+ else:
273
306
  results = []
274
307
  for im_file in im_files:
275
308
  result = process_image(im_file, detector, confidence_threshold,
@@ -292,17 +325,26 @@ def process_image(im_file, detector, confidence_threshold, image=None,
292
325
  include_image_timestamp=False, include_exif_data=False,
293
326
  skip_image_resizing=False):
294
327
  """
295
- Runs MegaDetector on a single image file.
296
-
297
- Args
298
- - im_file: str, path to image file
299
- - detector: loaded model
300
- - confidence_threshold: float, only detections above this threshold are returned
301
- - image: previously-loaded image, if available
302
- - skip_image_resizing: whether to skip internal image resizing and rely on external resizing
328
+ Runs a detector (typically MegaDetector) on a single image file.
329
+
330
+ Args:
331
+ im_file (str): path to image file
332
+ detector (detector object): loaded model, this can no longer be a string by the time
333
+ you get this far down the pipeline
334
+ confidence_threshold (float): only detections above this threshold are returned
335
+ image (Image, optional): previously-loaded image, if available, used when a worker
336
+ thread is handling image loads
337
+ quiet (bool, optional): suppress per-image printouts
338
+ image_size (tuple, optional): image size to use for inference, only mess with this
339
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
340
+ doing
341
+ include_image_size (bool, optional): should we include image size in the output for each image?
342
+ include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
343
+ include_exif_data (bool, optional): should we include EXIF data in the output for each image?
344
+ skip_image_resizing (bool, optional): whether to skip internal image resizing and rely on external resizing
303
345
 
304
346
  Returns:
305
- - result: dict representing detections on one image
347
+ dict: dict representing detections on one image,
306
348
  see the 'images' key in
307
349
  https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
308
350
  """
@@ -350,7 +392,7 @@ def process_image(im_file, detector, confidence_threshold, image=None,
350
392
  # ...def process_image(...)
351
393
 
352
394
 
353
- def load_custom_class_mapping(class_mapping_filename):
395
+ def _load_custom_class_mapping(class_mapping_filename):
354
396
  """
355
397
  This is an experimental hack to allow the use of non-MD YOLOv5 models through
356
398
  the same infrastructure; it disables the code that enforces MDv5-like class lists.
@@ -388,34 +430,50 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
388
430
  class_mapping_filename=None, include_image_size=False,
389
431
  include_image_timestamp=False, include_exif_data=False):
390
432
  """
391
- Args
392
- - model_file: path to model file, or supported model string (e.g. "MDV5A")
393
- - image_file_names: list of strings (image filenames), a single image filename,
394
- a folder to recursively search for images in, or a .json or .txt file
395
- containing a list of images.
396
- - checkpoint_path: str, path to JSON checkpoint file
397
- - confidence_threshold: float, only detections above this threshold are returned
398
- - checkpoint_frequency: int, write results to JSON checkpoint file every N images
399
- - results: list of dict, existing results loaded from checkpoint
400
- - n_cores: int, # of CPU cores to use
401
- - class_mapping_filename: str, use a non-default class mapping supplied in a .json file
402
- or YOLOv5 dataset.yaml file.
403
-
404
- Returns
405
- - results: list of dicts; each dict represents detections on one image
433
+ Load a model file and run it on a list of images.
434
+
435
+ Args:
436
+
437
+ model_file (str): path to model file, or supported model string (e.g. "MDV5A")
438
+ image_file_names (list or str): list of strings (image filenames), a single image filename,
439
+ a folder to recursively search for images in, or a .json or .txt file containing a list
440
+ of images.
441
+ checkpoint_path (str, optional), path to use for checkpoints (if None, checkpointing
442
+ is disabled)
443
+ confidence_threshold (float, optional): only detections above this threshold are returned
444
+ checkpoint_frequency (int, optional): int, write results to JSON checkpoint file every N
445
+ images, -1 disabled checkpointing
446
+ results (list, optional): list of dicts, existing results loaded from checkpoint; generally
447
+ not useful if you're using this function outside of the CLI
448
+ n_cores (int, optional): number of parallel worker to use, ignored if we're running on a GPU
449
+ use_image_queue (bool, optional): use a dedicated worker for image loading
450
+ quiet (bool, optional): disable per-image console output
451
+ image_size (tuple, optional): image size to use for inference, only mess with this
452
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
453
+ doing
454
+ class_mapping_filename (str, optional), use a non-default class mapping supplied in a .json
455
+ file or YOLOv5 dataset.yaml file
456
+ include_image_size (bool, optional): should we include image size in the output for each image?
457
+ include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
458
+ include_exif_data (bool, optional): should we include EXIF data in the output for each image?
459
+
460
+ Returns:
461
+ results: list of dicts; each dict represents detections on one image
406
462
  """
407
463
 
464
+ # Validate input arguments
408
465
  if n_cores is None:
409
466
  n_cores = 1
410
467
 
411
468
  if confidence_threshold is None:
412
469
  confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
413
-
414
- if checkpoint_frequency is None:
470
+
471
+ # Disable checkpointing if checkpoint_path is None
472
+ if checkpoint_frequency is None or checkpoint_path is None:
415
473
  checkpoint_frequency = -1
416
474
 
417
475
  if class_mapping_filename is not None:
418
- load_custom_class_mapping(class_mapping_filename)
476
+ _load_custom_class_mapping(class_mapping_filename)
419
477
 
420
478
  # Handle the case where image_file_names is not yet actually a list
421
479
  if isinstance(image_file_names,str):
@@ -450,7 +508,8 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
450
508
  list_file))
451
509
  else:
452
510
  raise ValueError(
453
- '{} supplied as [image_file_names] argument, but it does not appear to be a file or folder')
511
+ '{} supplied as [image_file_names] argument, but it does not appear to be a file or folder'.format(
512
+ image_file_names))
454
513
 
455
514
  if results is None:
456
515
  results = []
@@ -514,12 +573,12 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
514
573
  results.append(result)
515
574
 
516
575
  # Write a checkpoint if necessary
517
- if checkpoint_frequency != -1 and count % checkpoint_frequency == 0:
576
+ if (checkpoint_frequency != -1) and ((count % checkpoint_frequency) == 0):
518
577
 
519
578
  print('Writing a new checkpoint after having processed {} images since '
520
579
  'last restart'.format(count))
521
580
 
522
- write_checkpoint(checkpoint_path, results)
581
+ _write_checkpoint(checkpoint_path, results)
523
582
 
524
583
  else:
525
584
 
@@ -539,7 +598,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
539
598
  len(already_processed),n_images_all))
540
599
 
541
600
  # Divide images into chunks; we'll send one chunk to each worker process
542
- image_batches = list(chunks_by_number_of_chunks(image_file_names, n_cores))
601
+ image_batches = list(_chunks_by_number_of_chunks(image_file_names, n_cores))
543
602
 
544
603
  pool = workerpool(n_cores)
545
604
 
@@ -552,7 +611,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
552
611
  # Pass the "results" array (which may already contain images loaded from an existing
553
612
  # checkpoint) to the checkpoint queue handler function, which will append results to
554
613
  # the list as they become available.
555
- checkpoint_thread = Thread(target=checkpoint_queue_handler,
614
+ checkpoint_thread = Thread(target=_checkpoint_queue_handler,
556
615
  args=(checkpoint_path, checkpoint_frequency,
557
616
  checkpoint_queue, results), daemon=True)
558
617
  checkpoint_thread.start()
@@ -596,7 +655,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
596
655
  # ...def load_and_run_detector_batch(...)
597
656
 
598
657
 
599
- def checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_queue, results):
658
+ def _checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_queue, results):
600
659
  """
601
660
  Thread function to accumulate results and write checkpoints when checkpointing and
602
661
  multiprocessing are both enabled.
@@ -616,15 +675,15 @@ def checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_q
616
675
  print('Writing a new checkpoint after having processed {} images since '
617
676
  'last restart'.format(result_count))
618
677
 
619
- write_checkpoint(checkpoint_path, results)
678
+ _write_checkpoint(checkpoint_path, results)
620
679
 
621
680
 
622
- def write_checkpoint(checkpoint_path, results):
681
+ def _write_checkpoint(checkpoint_path, results):
623
682
  """
624
683
  Writes the 'images' field in the dict 'results' to a json checkpoint file.
625
684
  """
626
685
 
627
- assert checkpoint_path is not None
686
+ assert checkpoint_path is not None
628
687
 
629
688
  # Back up any previous checkpoints, to protect against crashes while we're writing
630
689
  # the checkpoint file.
@@ -644,9 +703,14 @@ def write_checkpoint(checkpoint_path, results):
644
703
 
645
704
  def get_image_datetime(image):
646
705
  """
647
- Returns the EXIF datetime from [image] (a PIL Image object), if available, as a string.
706
+ Reads EXIF datetime from a PIL Image object.
648
707
 
649
- [im_file] is used only for error reporting.
708
+ Args:
709
+ image (Image): the PIL Image object from which we should read datetime information
710
+
711
+ Returns:
712
+ str: the EXIF datetime from [image] (a PIL Image object), if available, as a string;
713
+ returns None if EXIF datetime is not available.
650
714
  """
651
715
 
652
716
  exif_tags = read_exif.read_pil_exif(image,exif_options)
@@ -662,26 +726,30 @@ def get_image_datetime(image):
662
726
 
663
727
  def write_results_to_file(results, output_file, relative_path_base=None,
664
728
  detector_file=None, info=None, include_max_conf=False,
665
- custom_metadata=None):
729
+ custom_metadata=None, force_forward_slashes=True):
666
730
  """
667
731
  Writes list of detection results to JSON output file. Format matches:
668
732
 
669
733
  https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
670
734
 
671
- Args
672
- - results: list of dict, each dict represents detections on one image
673
- - output_file: str, path to JSON output file, should end in '.json'
674
- - relative_path_base: str, path to a directory as the base for relative paths
675
- - detector_file: filename of the detector used to generate these results, only
676
- used to pull out a version number for the "info" field
677
- - info: dictionary to use instead of the default "info" field
678
- - include_max_conf: old files (version 1.2 and earlier) included a "max_conf" field
679
- in each image; this was removed in version 1.3. Set this flag to force the inclusion
680
- of this field.
681
- - custom_metadata: additional data to include as info['custom_metadata']. Typically
682
- a dictionary, but no format checks are performed.
683
-
684
- Returns the complete output dictionary that was written to the output file.
735
+ Args:
736
+ results (list): list of dict, each dict represents detections on one image
737
+ output_file (str): path to JSON output file, should end in '.json'
738
+ relative_path_base (str, optional): path to a directory as the base for relative paths, can
739
+ be None if the paths in [results] are absolute
740
+ detector_file (str, optional): filename of the detector used to generate these results, only
741
+ used to pull out a version number for the "info" field
742
+ info (dict, optional): dictionary to put in the results file instead of the default "info" field
743
+ include_max_conf (bool, optional): old files (version 1.2 and earlier) included a "max_conf" field
744
+ in each image; this was removed in version 1.3. Set this flag to force the inclusion
745
+ of this field.
746
+ custom_metadata (object, optional): additional data to include as info['custom_metadata']; typically
747
+ a dictionary, but no type/format checks are performed
748
+ force_forward_slashes (bool, optional): convert all slashes in filenames within [results] to
749
+ forward slashes
750
+
751
+ Returns:
752
+ dict: the MD-formatted dictionary that was written to [output_file]
685
753
  """
686
754
 
687
755
  if relative_path_base is not None:
@@ -692,6 +760,14 @@ def write_results_to_file(results, output_file, relative_path_base=None,
692
760
  results_relative.append(r_relative)
693
761
  results = results_relative
694
762
 
763
+ if force_forward_slashes:
764
+ results_converted = []
765
+ for r in results:
766
+ r_converted = copy.copy(r)
767
+ r_converted['file'] = r_converted['file'].replace('\\','/')
768
+ results_converted.append(r_converted)
769
+ results = results_converted
770
+
695
771
  # The typical case: we need to build the 'info' struct
696
772
  if info is None:
697
773
 
@@ -988,7 +1064,7 @@ def main():
988
1064
  assert not os.path.isdir(args.output_file), 'Specified output file is a directory'
989
1065
 
990
1066
  if args.class_mapping_filename is not None:
991
- load_custom_class_mapping(args.class_mapping_filename)
1067
+ _load_custom_class_mapping(args.class_mapping_filename)
992
1068
 
993
1069
  # Load the checkpoint if available
994
1070
  #
@@ -1137,8 +1213,7 @@ def main():
1137
1213
  os.remove(checkpoint_path)
1138
1214
  print('Deleted checkpoint file {}'.format(checkpoint_path))
1139
1215
 
1140
- print('Done!')
1141
-
1216
+ print('Done, thanks for MegaDetect\'ing!')
1142
1217
 
1143
1218
  if __name__ == '__main__':
1144
1219
  main()