megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +98 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +152 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +92 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +126 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +610 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +239 -0
  58. megadetector/data_management/cct_json_utils.py +395 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +272 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +477 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +796 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +874 -0
  129. megadetector/data_management/read_exif.py +681 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/resize_coco_dataset.py +189 -0
  133. megadetector/data_management/wi_download_csv_to_coco.py +246 -0
  134. megadetector/data_management/yolo_output_to_md_output.py +441 -0
  135. megadetector/data_management/yolo_to_coco.py +676 -0
  136. megadetector/detection/__init__.py +0 -0
  137. megadetector/detection/detector_training/__init__.py +0 -0
  138. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  139. megadetector/detection/process_video.py +702 -0
  140. megadetector/detection/pytorch_detector.py +341 -0
  141. megadetector/detection/run_detector.py +779 -0
  142. megadetector/detection/run_detector_batch.py +1219 -0
  143. megadetector/detection/run_inference_with_yolov5_val.py +917 -0
  144. megadetector/detection/run_tiled_inference.py +934 -0
  145. megadetector/detection/tf_detector.py +189 -0
  146. megadetector/detection/video_utils.py +606 -0
  147. megadetector/postprocessing/__init__.py +0 -0
  148. megadetector/postprocessing/add_max_conf.py +64 -0
  149. megadetector/postprocessing/categorize_detections_by_size.py +163 -0
  150. megadetector/postprocessing/combine_api_outputs.py +249 -0
  151. megadetector/postprocessing/compare_batch_results.py +958 -0
  152. megadetector/postprocessing/convert_output_format.py +396 -0
  153. megadetector/postprocessing/load_api_results.py +195 -0
  154. megadetector/postprocessing/md_to_coco.py +310 -0
  155. megadetector/postprocessing/md_to_labelme.py +330 -0
  156. megadetector/postprocessing/merge_detections.py +401 -0
  157. megadetector/postprocessing/postprocess_batch_results.py +1902 -0
  158. megadetector/postprocessing/remap_detection_categories.py +170 -0
  159. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  160. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  161. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  162. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1631 -0
  163. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  164. megadetector/postprocessing/subset_json_detector_output.py +696 -0
  165. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  166. megadetector/taxonomy_mapping/__init__.py +0 -0
  167. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  168. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  169. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  170. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +590 -0
  171. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  172. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  173. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  174. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  175. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  176. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  177. megadetector/utils/__init__.py +0 -0
  178. megadetector/utils/azure_utils.py +178 -0
  179. megadetector/utils/ct_utils.py +612 -0
  180. megadetector/utils/directory_listing.py +246 -0
  181. megadetector/utils/md_tests.py +968 -0
  182. megadetector/utils/path_utils.py +1044 -0
  183. megadetector/utils/process_utils.py +157 -0
  184. megadetector/utils/sas_blob_utils.py +509 -0
  185. megadetector/utils/split_locations_into_train_val.py +228 -0
  186. megadetector/utils/string_utils.py +92 -0
  187. megadetector/utils/url_utils.py +323 -0
  188. megadetector/utils/write_html_image_list.py +225 -0
  189. megadetector/visualization/__init__.py +0 -0
  190. megadetector/visualization/plot_utils.py +293 -0
  191. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  192. megadetector/visualization/visualization_utils.py +1536 -0
  193. megadetector/visualization/visualize_db.py +550 -0
  194. megadetector/visualization/visualize_detector_output.py +405 -0
  195. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/METADATA +1 -1
  196. megadetector-5.0.12.dist-info/RECORD +199 -0
  197. megadetector-5.0.12.dist-info/top_level.txt +1 -0
  198. megadetector-5.0.11.dist-info/RECORD +0 -5
  199. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  200. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/LICENSE +0 -0
  201. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1219 @@
1
+ """
2
+
3
+ run_detector_batch.py
4
+
5
+ Module to run MegaDetector on lots of images, writing the results
6
+ to a file in the MegaDetector results format.
7
+
8
+ https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_processing#megadetector-batch-output-format
9
+
10
+ This enables the results to be used in our post-processing pipeline; see postprocess_batch_results.py.
11
+
12
+ This script can save results to checkpoints intermittently, in case disaster
13
+ strikes. To enable this, set --checkpoint_frequency to n > 0, and results
14
+ will be saved as a checkpoint every n images. Checkpoints will be written
15
+ to a file in the same directory as the output_file, and after all images
16
+ are processed and final results file written to output_file, the temporary
17
+ checkpoint file will be deleted. If you want to resume from a checkpoint, set
18
+ the checkpoint file's path using --resume_from_checkpoint.
19
+
20
+ The `threshold` you can provide as an argument is the confidence threshold above
21
+ which detections will be included in the output file.
22
+
23
+ Has multiprocessing support for CPUs only; if a GPU is available, it will
24
+ use the GPU instead of CPUs, and the --ncores option will be ignored. Checkpointing
25
+ is not supported when using a GPU.
26
+
27
+ The lack of GPU multiprocessing support might sound annoying, but in practice we
28
+ run a gazillion MegaDetector images on multiple GPUs using this script, we just only use
29
+ one GPU *per invocation of this script*. Dividing a big batch of images into one chunk
30
+ per GPU happens outside of this script.
31
+
32
+ Does not have a command-line option to bind the process to a particular GPU, but you can
33
+ prepend with "CUDA_VISIBLE_DEVICES=0 ", for example, to bind to GPU 0, e.g.:
34
+
35
+ CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
36
+
37
+ You can disable GPU processing entirely by setting CUDA_VISIBLE_DEVICES=''.
38
+
39
+ """
40
+
41
+ #%% Constants, imports, environment
42
+
43
+ import argparse
44
+ import json
45
+ import os
46
+ import sys
47
+ import time
48
+ import copy
49
+ import shutil
50
+ import warnings
51
+ import itertools
52
+ import humanfriendly
53
+
54
+ from datetime import datetime
55
+ from functools import partial
56
+ from tqdm import tqdm
57
+
58
+ import multiprocessing
59
+ from threading import Thread
60
+ from multiprocessing import Process, Manager
61
+
62
+ # Multiprocessing uses processes, not threads... leaving this here (and commented out)
63
+ # to make sure I don't change this casually at some point, it changes a number of
64
+ # assumptions about interaction with PyTorch and TF.
65
+ # from multiprocessing.pool import ThreadPool as workerpool
66
+ from multiprocessing.pool import Pool as workerpool
67
+
68
+ from megadetector.detection import run_detector
69
+ from megadetector.detection.run_detector import \
70
+ is_gpu_available,\
71
+ load_detector,\
72
+ try_download_known_detector,\
73
+ get_detector_version_from_filename,\
74
+ get_detector_metadata_from_version_string
75
+
76
+ from megadetector.utils import path_utils
77
+ from megadetector.visualization import visualization_utils as vis_utils
78
+ from megadetector.data_management import read_exif
79
+ from megadetector.data_management.yolo_output_to_md_output import read_classes_from_yolo_dataset_file
80
+
81
+ # Numpy FutureWarnings from tensorflow import
82
+ warnings.filterwarnings('ignore', category=FutureWarning)
83
+
84
+ # Number of images to pre-fetch
85
+ max_queue_size = 10
86
+
87
+ # How often should we print progress when using the image queue?
88
+ n_queue_print = 1000
89
+
90
+ use_threads_for_queue = False
91
+ verbose = False
92
+
93
+ exif_options = read_exif.ReadExifOptions()
94
+ exif_options.processing_library = 'pil'
95
+ exif_options.byte_handling = 'convert_to_string'
96
+
97
+
98
+ #%% Support functions for multiprocessing
99
+
100
+ def _producer_func(q,image_files):
101
+ """
102
+ Producer function; only used when using the (optional) image queue.
103
+
104
+ Reads up to N images from disk and puts them on the blocking queue for processing.
105
+ """
106
+
107
+ if verbose:
108
+ print('Producer starting'); sys.stdout.flush()
109
+
110
+ for im_file in image_files:
111
+
112
+ try:
113
+ if verbose:
114
+ print('Loading image {}'.format(im_file)); sys.stdout.flush()
115
+ image = vis_utils.load_image(im_file)
116
+ except Exception:
117
+ print('Producer process: image {} cannot be loaded.'.format(im_file))
118
+ image = run_detector.FAILURE_IMAGE_OPEN
119
+
120
+ if verbose:
121
+ print('Queueing image {}'.format(im_file)); sys.stdout.flush()
122
+ q.put([im_file,image])
123
+
124
+ q.put(None)
125
+
126
+ print('Finished image loading'); sys.stdout.flush()
127
+
128
+
129
+ def _consumer_func(q,return_queue,model_file,confidence_threshold,image_size=None):
130
+ """
131
+ Consumer function; only used when using the (optional) image queue.
132
+
133
+ Pulls images from a blocking queue and processes them.
134
+ """
135
+
136
+ if verbose:
137
+ print('Consumer starting'); sys.stdout.flush()
138
+
139
+ start_time = time.time()
140
+ detector = load_detector(model_file)
141
+ elapsed = time.time() - start_time
142
+ print('Loaded model (before queueing) in {}, printing updates every {} images'.format(
143
+ humanfriendly.format_timespan(elapsed),n_queue_print))
144
+ sys.stdout.flush()
145
+
146
+ results = []
147
+
148
+ n_images_processed = 0
149
+
150
+ while True:
151
+ r = q.get()
152
+ if r is None:
153
+ q.task_done()
154
+ return_queue.put(results)
155
+ return
156
+ n_images_processed += 1
157
+ im_file = r[0]
158
+ image = r[1]
159
+ if verbose or ((n_images_processed % n_queue_print) == 1):
160
+ elapsed = time.time() - start_time
161
+ images_per_second = n_images_processed / elapsed
162
+ print('De-queued image {} ({:.2f}/s) ({})'.format(n_images_processed,
163
+ images_per_second,
164
+ im_file));
165
+ sys.stdout.flush()
166
+ if isinstance(image,str):
167
+ # This is how the producer function communicates read errors
168
+ results.append({'file': im_file,
169
+ 'failure': image})
170
+ else:
171
+ results.append(process_image(im_file=im_file,detector=detector,
172
+ confidence_threshold=confidence_threshold,
173
+ image=image,quiet=True,image_size=image_size))
174
+ if verbose:
175
+ print('Processed image {}'.format(im_file)); sys.stdout.flush()
176
+ q.task_done()
177
+
178
+
179
+ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
180
+ quiet=False,image_size=None):
181
+ """
182
+ Driver function for the (optional) multiprocessing-based image queue; only used
183
+ when --use_image_queue is specified. Starts a reader process to read images from disk, but
184
+ processes images in the process from which this function is called (i.e., does not currently
185
+ spawn a separate consumer process).
186
+
187
+ Args:
188
+ image_files (str): list of absolute paths to images
189
+ model_file (str): filename or model identifier (e.g. "MDV5A")
190
+ confidence_threshold (float): minimum confidence detection to include in
191
+ output
192
+ quiet (bool, optional): suppress per-image console printouts
193
+ image_size (tuple, optional): image size to use for inference, only mess with this
194
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
195
+ doing
196
+
197
+ Returns:
198
+ list: list of dicts in the format returned by process_image()
199
+ """
200
+
201
+ q = multiprocessing.JoinableQueue(max_queue_size)
202
+ return_queue = multiprocessing.Queue(1)
203
+
204
+ if use_threads_for_queue:
205
+ producer = Thread(target=_producer_func,args=(q,image_files,))
206
+ else:
207
+ producer = Process(target=_producer_func,args=(q,image_files,))
208
+ producer.daemon = False
209
+ producer.start()
210
+
211
+ # The queue system is a little more elegant if we start one thread for reading and one
212
+ # for processing, and this works fine on Windows, but because we import TF at module load,
213
+ # CUDA will only work in the main process, so currently the consumer function runs here.
214
+ #
215
+ # To enable proper multi-GPU support, we may need to move the TF import to a separate module
216
+ # that isn't loaded until very close to where inference actually happens.
217
+ run_separate_consumer_process = False
218
+
219
+ if run_separate_consumer_process:
220
+ if use_threads_for_queue:
221
+ consumer = Thread(target=_consumer_func,args=(q,return_queue,model_file,
222
+ confidence_threshold,image_size,))
223
+ else:
224
+ consumer = Process(target=_consumer_func,args=(q,return_queue,model_file,
225
+ confidence_threshold,image_size,))
226
+ consumer.daemon = True
227
+ consumer.start()
228
+ else:
229
+ _consumer_func(q,return_queue,model_file,confidence_threshold,image_size)
230
+
231
+ producer.join()
232
+ print('Producer finished')
233
+
234
+ if run_separate_consumer_process:
235
+ consumer.join()
236
+ print('Consumer finished')
237
+
238
+ q.join()
239
+ print('Queue joined')
240
+
241
+ results = return_queue.get()
242
+
243
+ return results
244
+
245
+
246
+ #%% Other support functions
247
+
248
+ def _chunks_by_number_of_chunks(ls, n):
249
+ """
250
+ Splits a list into n even chunks.
251
+
252
+ External callers should use ct_utils.split_list_into_n_chunks().
253
+
254
+ Args:
255
+ ls (list): list to break up into chunks
256
+ n (int): number of chunks
257
+ """
258
+
259
+ for i in range(0, n):
260
+ yield ls[i::n]
261
+
262
+
263
+ #%% Image processing functions
264
+
265
+ def process_images(im_files, detector, confidence_threshold, use_image_queue=False,
266
+ quiet=False, image_size=None, checkpoint_queue=None,
267
+ include_image_size=False, include_image_timestamp=False,
268
+ include_exif_data=False):
269
+ """
270
+ Runs a detector (typically MegaDetector) over a list of image files.
271
+ As of 3/2024, this entry point is used when the image queue is enabled, but not in the
272
+ standard inference path (which instead loops over process_image()).
273
+
274
+ Args:
275
+ im_files (list: paths to image files
276
+ detector (str or detector object): loaded model or str; if this is a string, it can be a
277
+ path to a .pb/.pt model file or a known model identifier (e.g. "MDV5A")
278
+ confidence_threshold (float): only detections above this threshold are returned
279
+ use_image_queue (bool, optional): separate image loading onto a dedicated worker process
280
+ quiet (bool, optional): suppress per-image printouts
281
+ image_size (tuple, optional): image size to use for inference, only mess with this
282
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
283
+ doing
284
+ checkpoint_queue (Queue, optional): internal parameter used to pass image queues around
285
+ include_image_size (bool, optional): should we include image size in the output for each image?
286
+ include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
287
+ include_exif_data (bool, optional): should we include EXIF data in the output for each image?
288
+
289
+ Returns:
290
+ list: list of dicts, in which each dict represents detections on one image,
291
+ see the 'images' key in https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_processing#batch-processing-api-output-format
292
+ """
293
+
294
+ if isinstance(detector, str):
295
+ start_time = time.time()
296
+ detector = load_detector(detector)
297
+ elapsed = time.time() - start_time
298
+ print('Loaded model (batch level) in {}'.format(humanfriendly.format_timespan(elapsed)))
299
+
300
+ if use_image_queue:
301
+ run_detector_with_image_queue(im_files, detector, confidence_threshold,
302
+ quiet=quiet, image_size=image_size,
303
+ include_image_size=include_image_size,
304
+ include_image_timestamp=include_image_timestamp,
305
+ include_exif_data=include_exif_data)
306
+ else:
307
+ results = []
308
+ for im_file in im_files:
309
+ result = process_image(im_file, detector, confidence_threshold,
310
+ quiet=quiet, image_size=image_size,
311
+ include_image_size=include_image_size,
312
+ include_image_timestamp=include_image_timestamp,
313
+ include_exif_data=include_exif_data)
314
+
315
+ if checkpoint_queue is not None:
316
+ checkpoint_queue.put(result)
317
+ results.append(result)
318
+
319
+ return results
320
+
321
+ # ...def process_images(...)
322
+
323
+
324
+ def process_image(im_file, detector, confidence_threshold, image=None,
325
+ quiet=False, image_size=None, include_image_size=False,
326
+ include_image_timestamp=False, include_exif_data=False,
327
+ skip_image_resizing=False):
328
+ """
329
+ Runs a detector (typically MegaDetector) on a single image file.
330
+
331
+ Args:
332
+ im_file (str): path to image file
333
+ detector (detector object): loaded model, this can no longer be a string by the time
334
+ you get this far down the pipeline
335
+ confidence_threshold (float): only detections above this threshold are returned
336
+ image (Image, optional): previously-loaded image, if available, used when a worker
337
+ thread is handling image loads
338
+ quiet (bool, optional): suppress per-image printouts
339
+ image_size (tuple, optional): image size to use for inference, only mess with this
340
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
341
+ doing
342
+ include_image_size (bool, optional): should we include image size in the output for each image?
343
+ include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
344
+ include_exif_data (bool, optional): should we include EXIF data in the output for each image?
345
+ skip_image_resizing (bool, optional): whether to skip internal image resizing and rely on external resizing
346
+
347
+ Returns:
348
+ dict: dict representing detections on one image,
349
+ see the 'images' key in
350
+ https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_processing#batch-processing-api-output-format
351
+ """
352
+
353
+ if not quiet:
354
+ print('Processing image {}'.format(im_file))
355
+
356
+ if image is None:
357
+ try:
358
+ image = vis_utils.load_image(im_file)
359
+ except Exception as e:
360
+ if not quiet:
361
+ print('Image {} cannot be loaded. Exception: {}'.format(im_file, e))
362
+ result = {
363
+ 'file': im_file,
364
+ 'failure': run_detector.FAILURE_IMAGE_OPEN
365
+ }
366
+ return result
367
+
368
+ try:
369
+ result = detector.generate_detections_one_image(
370
+ image, im_file, detection_threshold=confidence_threshold, image_size=image_size,
371
+ skip_image_resizing=skip_image_resizing)
372
+ except Exception as e:
373
+ if not quiet:
374
+ print('Image {} cannot be processed. Exception: {}'.format(im_file, e))
375
+ result = {
376
+ 'file': im_file,
377
+ 'failure': run_detector.FAILURE_INFER
378
+ }
379
+ return result
380
+
381
+ if include_image_size:
382
+ result['width'] = image.width
383
+ result['height'] = image.height
384
+
385
+ if include_image_timestamp:
386
+ result['datetime'] = get_image_datetime(image)
387
+
388
+ if include_exif_data:
389
+ result['exif_metadata'] = read_exif.read_pil_exif(image,exif_options)
390
+
391
+ return result
392
+
393
+ # ...def process_image(...)
394
+
395
+
396
+ def _load_custom_class_mapping(class_mapping_filename):
397
+ """
398
+ This is an experimental hack to allow the use of non-MD YOLOv5 models through
399
+ the same infrastructure; it disables the code that enforces MDv5-like class lists.
400
+
401
+ Should be a .json file that maps int-strings to strings, or a YOLOv5 dataset.yaml file.
402
+ """
403
+
404
+ if class_mapping_filename is None:
405
+ return
406
+
407
+ run_detector.USE_MODEL_NATIVE_CLASSES = True
408
+ if class_mapping_filename.endswith('.json'):
409
+ with open(class_mapping_filename,'r') as f:
410
+ class_mapping = json.load(f)
411
+ elif (class_mapping_filename.endswith('.yml') or class_mapping_filename.endswith('.yaml')):
412
+ class_mapping = read_classes_from_yolo_dataset_file(class_mapping_filename)
413
+ # convert from ints to int-strings
414
+ class_mapping = {str(k):v for k,v in class_mapping.items()}
415
+ else:
416
+ raise ValueError('Unrecognized class mapping file {}'.format(class_mapping_filename))
417
+
418
+ print('Loaded custom class mapping:')
419
+ print(class_mapping)
420
+ run_detector.DEFAULT_DETECTOR_LABEL_MAP = class_mapping
421
+ return class_mapping
422
+
423
+
424
+ #%% Main function
425
+
426
+ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=None,
427
+ confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD,
428
+ checkpoint_frequency=-1, results=None, n_cores=1,
429
+ use_image_queue=False, quiet=False, image_size=None,
430
+ class_mapping_filename=None, include_image_size=False,
431
+ include_image_timestamp=False, include_exif_data=False):
432
+ """
433
+ Load a model file and run it on a list of images.
434
+
435
+ Args:
436
+
437
+ model_file (str): path to model file, or supported model string (e.g. "MDV5A")
438
+ image_file_names (list or str): list of strings (image filenames), a single image filename,
439
+ a folder to recursively search for images in, or a .json or .txt file containing a list
440
+ of images.
441
+ checkpoint_path (str, optional), path to use for checkpoints (if None, checkpointing
442
+ is disabled)
443
+ confidence_threshold (float, optional): only detections above this threshold are returned
444
+ checkpoint_frequency (int, optional): int, write results to JSON checkpoint file every N
445
+ images, -1 disabled checkpointing
446
+ results (list, optional): list of dicts, existing results loaded from checkpoint; generally
447
+ not useful if you're using this function outside of the CLI
448
+ n_cores (int, optional): number of parallel worker to use, ignored if we're running on a GPU
449
+ use_image_queue (bool, optional): use a dedicated worker for image loading
450
+ quiet (bool, optional): disable per-image console output
451
+ image_size (tuple, optional): image size to use for inference, only mess with this
452
+ if (a) you're using a model other than MegaDetector or (b) you know what you're
453
+ doing
454
+ class_mapping_filename (str, optional), use a non-default class mapping supplied in a .json
455
+ file or YOLOv5 dataset.yaml file
456
+ include_image_size (bool, optional): should we include image size in the output for each image?
457
+ include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
458
+ include_exif_data (bool, optional): should we include EXIF data in the output for each image?
459
+
460
+ Returns:
461
+ results: list of dicts; each dict represents detections on one image
462
+ """
463
+
464
+ # Validate input arguments
465
+ if n_cores is None:
466
+ n_cores = 1
467
+
468
+ if confidence_threshold is None:
469
+ confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
470
+
471
+ # Disable checkpointing if checkpoint_path is None
472
+ if checkpoint_frequency is None or checkpoint_path is None:
473
+ checkpoint_frequency = -1
474
+
475
+ if class_mapping_filename is not None:
476
+ _load_custom_class_mapping(class_mapping_filename)
477
+
478
+ # Handle the case where image_file_names is not yet actually a list
479
+ if isinstance(image_file_names,str):
480
+
481
+ # Find the images to score; images can be a directory, may need to recurse
482
+ if os.path.isdir(image_file_names):
483
+ image_dir = image_file_names
484
+ image_file_names = path_utils.find_images(image_dir, True)
485
+ print('{} image files found in folder {}'.format(len(image_file_names),image_dir))
486
+
487
+ # A single file, or a list of image paths
488
+ elif os.path.isfile(image_file_names):
489
+ list_file = image_file_names
490
+ if image_file_names.endswith('.json'):
491
+ with open(list_file,'r') as f:
492
+ image_file_names = json.load(f)
493
+ print('Loaded {} image filenames from .json list file {}'.format(
494
+ len(image_file_names),list_file))
495
+ elif image_file_names.endswith('.txt'):
496
+ with open(list_file,'r') as f:
497
+ image_file_names = f.readlines()
498
+ image_file_names = [s.strip() for s in image_file_names if len(s.strip()) > 0]
499
+ print('Loaded {} image filenames from .txt list file {}'.format(
500
+ len(image_file_names),list_file))
501
+ elif path_utils.is_image_file(image_file_names):
502
+ image_file_names = [image_file_names]
503
+ print('Processing image {}'.format(image_file_names[0]))
504
+ else:
505
+ raise ValueError(
506
+ 'File {} supplied as [image_file_names] argument, but extension is neither .json nor .txt'\
507
+ .format(
508
+ list_file))
509
+ else:
510
+ raise ValueError(
511
+ '{} supplied as [image_file_names] argument, but it does not appear to be a file or folder'.format(
512
+ image_file_names))
513
+
514
+ if results is None:
515
+ results = []
516
+
517
+ already_processed = set([i['file'] for i in results])
518
+
519
+ model_file = try_download_known_detector(model_file)
520
+
521
+ print('GPU available: {}'.format(is_gpu_available(model_file)))
522
+
523
+ if n_cores > 1 and is_gpu_available(model_file):
524
+
525
+ print('Warning: multiple cores requested, but a GPU is available; parallelization across ' + \
526
+ 'GPUs is not currently supported, defaulting to one GPU')
527
+ n_cores = 1
528
+
529
+ if n_cores > 1 and use_image_queue:
530
+
531
+ print('Warning: multiple cores requested, but the image queue is enabled; parallelization ' + \
532
+ 'with the image queue is not currently supported, defaulting to one worker')
533
+ n_cores = 1
534
+
535
+ if use_image_queue:
536
+
537
+ assert checkpoint_frequency < 0, \
538
+ 'Using an image queue is not currently supported when checkpointing is enabled'
539
+ assert len(results) == 0, \
540
+ 'Using an image queue with results loaded from a checkpoint is not currently supported'
541
+ assert n_cores <= 1
542
+ results = run_detector_with_image_queue(image_file_names, model_file,
543
+ confidence_threshold, quiet,
544
+ image_size=image_size)
545
+
546
+ elif n_cores <= 1:
547
+
548
+ # Load the detector
549
+ start_time = time.time()
550
+ detector = load_detector(model_file)
551
+ elapsed = time.time() - start_time
552
+ print('Loaded model in {}'.format(humanfriendly.format_timespan(elapsed)))
553
+
554
+ # This is only used for console reporting, so it's OK that it doesn't
555
+ # include images we might have loaded from a previous checkpoint
556
+ count = 0
557
+
558
+ for im_file in tqdm(image_file_names):
559
+
560
+ # Will not add additional entries not in the starter checkpoint
561
+ if im_file in already_processed:
562
+ if not quiet:
563
+ print('Bypassing image {}'.format(im_file))
564
+ continue
565
+
566
+ count += 1
567
+
568
+ result = process_image(im_file, detector,
569
+ confidence_threshold, quiet=quiet,
570
+ image_size=image_size, include_image_size=include_image_size,
571
+ include_image_timestamp=include_image_timestamp,
572
+ include_exif_data=include_exif_data)
573
+ results.append(result)
574
+
575
+ # Write a checkpoint if necessary
576
+ if (checkpoint_frequency != -1) and ((count % checkpoint_frequency) == 0):
577
+
578
+ print('Writing a new checkpoint after having processed {} images since '
579
+ 'last restart'.format(count))
580
+
581
+ _write_checkpoint(checkpoint_path, results)
582
+
583
+ else:
584
+
585
+ # Multiprocessing is enabled at this point
586
+
587
+ # When using multiprocessing, tell the workers to load the model on each
588
+ # process, by passing the model_file string as the "model" argument to
589
+ # process_images.
590
+ detector = model_file
591
+
592
+ print('Creating pool with {} cores'.format(n_cores))
593
+
594
+ if len(already_processed) > 0:
595
+ n_images_all = len(image_file_names)
596
+ image_file_names = [fn for fn in image_file_names if fn not in already_processed]
597
+ print('Loaded {} of {} images from checkpoint'.format(
598
+ len(already_processed),n_images_all))
599
+
600
+ # Divide images into chunks; we'll send one chunk to each worker process
601
+ image_batches = list(_chunks_by_number_of_chunks(image_file_names, n_cores))
602
+
603
+ pool = workerpool(n_cores)
604
+
605
+ if checkpoint_path is not None:
606
+
607
+ # Multiprocessing and checkpointing are both enabled at this point
608
+
609
+ checkpoint_queue = Manager().Queue()
610
+
611
+ # Pass the "results" array (which may already contain images loaded from an existing
612
+ # checkpoint) to the checkpoint queue handler function, which will append results to
613
+ # the list as they become available.
614
+ checkpoint_thread = Thread(target=_checkpoint_queue_handler,
615
+ args=(checkpoint_path, checkpoint_frequency,
616
+ checkpoint_queue, results), daemon=True)
617
+ checkpoint_thread.start()
618
+
619
+ pool.map(partial(process_images, detector=detector,
620
+ confidence_threshold=confidence_threshold,
621
+ image_size=image_size,
622
+ include_image_size=include_image_size,
623
+ include_image_timestamp=include_image_timestamp,
624
+ include_exif_data=include_exif_data,
625
+ checkpoint_queue=checkpoint_queue),
626
+ image_batches)
627
+
628
+ checkpoint_queue.put(None)
629
+
630
+ else:
631
+
632
+ # Multprocessing is enabled, but checkpointing is not
633
+
634
+ new_results = pool.map(partial(process_images, detector=detector,
635
+ confidence_threshold=confidence_threshold,image_size=image_size,
636
+ include_image_size=include_image_size,
637
+ include_image_timestamp=include_image_timestamp,
638
+ include_exif_data=include_exif_data),
639
+ image_batches)
640
+
641
+ new_results = list(itertools.chain.from_iterable(new_results))
642
+
643
+ # Append the results we just computed to "results", which is *usually* empty, but will
644
+ # be non-empty if we resumed from a checkpoint
645
+ results += new_results
646
+
647
+ # ...if checkpointing is/isn't enabled
648
+
649
+ # ...if we're running (1) with image queue, (2) on one core, (3) on multiple cores
650
+
651
+ # 'results' may have been modified in place, but we also return it for
652
+ # backwards-compatibility.
653
+ return results
654
+
655
+ # ...def load_and_run_detector_batch(...)
656
+
657
+
658
+ def _checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_queue, results):
659
+ """
660
+ Thread function to accumulate results and write checkpoints when checkpointing and
661
+ multiprocessing are both enabled.
662
+ """
663
+
664
+ result_count = 0
665
+ while True:
666
+ result = checkpoint_queue.get()
667
+ if result is None:
668
+ break
669
+
670
+ result_count +=1
671
+ results.append(result)
672
+
673
+ if (checkpoint_frequency != -1) and (result_count % checkpoint_frequency == 0):
674
+
675
+ print('Writing a new checkpoint after having processed {} images since '
676
+ 'last restart'.format(result_count))
677
+
678
+ _write_checkpoint(checkpoint_path, results)
679
+
680
+
681
+ def _write_checkpoint(checkpoint_path, results):
682
+ """
683
+ Writes the 'images' field in the dict 'results' to a json checkpoint file.
684
+ """
685
+
686
+ assert checkpoint_path is not None
687
+
688
+ # Back up any previous checkpoints, to protect against crashes while we're writing
689
+ # the checkpoint file.
690
+ checkpoint_tmp_path = None
691
+ if os.path.isfile(checkpoint_path):
692
+ checkpoint_tmp_path = checkpoint_path + '_tmp'
693
+ shutil.copyfile(checkpoint_path,checkpoint_tmp_path)
694
+
695
+ # Write the new checkpoint
696
+ with open(checkpoint_path, 'w') as f:
697
+ json.dump({'images': results}, f, indent=1, default=str)
698
+
699
+ # Remove the backup checkpoint if it exists
700
+ if checkpoint_tmp_path is not None:
701
+ os.remove(checkpoint_tmp_path)
702
+
703
+
704
+ def get_image_datetime(image):
705
+ """
706
+ Reads EXIF datetime from a PIL Image object.
707
+
708
+ Args:
709
+ image (Image): the PIL Image object from which we should read datetime information
710
+
711
+ Returns:
712
+ str: the EXIF datetime from [image] (a PIL Image object), if available, as a string;
713
+ returns None if EXIF datetime is not available.
714
+ """
715
+
716
+ exif_tags = read_exif.read_pil_exif(image,exif_options)
717
+
718
+ try:
719
+ datetime_str = exif_tags['DateTimeOriginal']
720
+ _ = time.strptime(datetime_str, '%Y:%m:%d %H:%M:%S')
721
+ return datetime_str
722
+
723
+ except Exception:
724
+ return None
725
+
726
+
727
+ def write_results_to_file(results, output_file, relative_path_base=None,
728
+ detector_file=None, info=None, include_max_conf=False,
729
+ custom_metadata=None, force_forward_slashes=True):
730
+ """
731
+ Writes list of detection results to JSON output file. Format matches:
732
+
733
+ https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_processing#batch-processing-api-output-format
734
+
735
+ Args:
736
+ results (list): list of dict, each dict represents detections on one image
737
+ output_file (str): path to JSON output file, should end in '.json'
738
+ relative_path_base (str, optional): path to a directory as the base for relative paths, can
739
+ be None if the paths in [results] are absolute
740
+ detector_file (str, optional): filename of the detector used to generate these results, only
741
+ used to pull out a version number for the "info" field
742
+ info (dict, optional): dictionary to put in the results file instead of the default "info" field
743
+ include_max_conf (bool, optional): old files (version 1.2 and earlier) included a "max_conf" field
744
+ in each image; this was removed in version 1.3. Set this flag to force the inclusion
745
+ of this field.
746
+ custom_metadata (object, optional): additional data to include as info['custom_metadata']; typically
747
+ a dictionary, but no type/format checks are performed
748
+ force_forward_slashes (bool, optional): convert all slashes in filenames within [results] to
749
+ forward slashes
750
+
751
+ Returns:
752
+ dict: the MD-formatted dictionary that was written to [output_file]
753
+ """
754
+
755
+ if relative_path_base is not None:
756
+ results_relative = []
757
+ for r in results:
758
+ r_relative = copy.copy(r)
759
+ r_relative['file'] = os.path.relpath(r_relative['file'], start=relative_path_base)
760
+ results_relative.append(r_relative)
761
+ results = results_relative
762
+
763
+ if force_forward_slashes:
764
+ results_converted = []
765
+ for r in results:
766
+ r_converted = copy.copy(r)
767
+ r_converted['file'] = r_converted['file'].replace('\\','/')
768
+ results_converted.append(r_converted)
769
+ results = results_converted
770
+
771
+ # The typical case: we need to build the 'info' struct
772
+ if info is None:
773
+
774
+ info = {
775
+ 'detection_completion_time': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
776
+ 'format_version': '1.3'
777
+ }
778
+
779
+ if detector_file is not None:
780
+ detector_filename = os.path.basename(detector_file)
781
+ detector_version = get_detector_version_from_filename(detector_filename)
782
+ detector_metadata = get_detector_metadata_from_version_string(detector_version)
783
+ info['detector'] = detector_filename
784
+ info['detector_metadata'] = detector_metadata
785
+ else:
786
+ info['detector'] = 'unknown'
787
+ info['detector_metadata'] = get_detector_metadata_from_version_string('unknown')
788
+
789
+ # If the caller supplied the entire "info" struct
790
+ else:
791
+
792
+ if detector_file is not None:
793
+
794
+ print('Warning (write_results_to_file): info struct and detector file ' + \
795
+ 'supplied, ignoring detector file')
796
+
797
+ if custom_metadata is not None:
798
+ info['custom_metadata'] = custom_metadata
799
+
800
+ # The 'max_detection_conf' field used to be included by default, and it caused all kinds
801
+ # of headaches, so it's no longer included unless the user explicitly requests it.
802
+ if not include_max_conf:
803
+ for im in results:
804
+ if 'max_detection_conf' in im:
805
+ del im['max_detection_conf']
806
+
807
+ final_output = {
808
+ 'images': results,
809
+ 'detection_categories': run_detector.DEFAULT_DETECTOR_LABEL_MAP,
810
+ 'info': info
811
+ }
812
+
813
+ with open(output_file, 'w') as f:
814
+ json.dump(final_output, f, indent=1, default=str)
815
+ print('Output file saved at {}'.format(output_file))
816
+
817
+ return final_output
818
+
819
+ # ...def write_results_to_file(...)
820
+
821
+
822
+ #%% Interactive driver
823
+
824
+ if False:
825
+
826
+ pass
827
+
828
+ #%%
829
+
830
+ model_file = 'MDV5A'
831
+ image_dir = r'g:\camera_traps\camera_trap_images'
832
+ output_file = r'g:\temp\md-test.json'
833
+
834
+ recursive = True
835
+ output_relative_filenames = True
836
+ include_max_conf = False
837
+ quiet = True
838
+ image_size = None
839
+ use_image_queue = False
840
+ confidence_threshold = 0.0001
841
+ checkpoint_frequency = 5
842
+ checkpoint_path = None
843
+ resume_from_checkpoint = 'auto'
844
+ allow_checkpoint_overwrite = False
845
+ ncores = 1
846
+ class_mapping_filename = None
847
+ include_image_size = True
848
+ include_image_timestamp = True
849
+ include_exif_data = True
850
+ overwrite_handling = None
851
+
852
+ # Generate a command line
853
+ cmd = 'python run_detector_batch.py "{}" "{}" "{}"'.format(
854
+ model_file,image_dir,output_file)
855
+
856
+ if recursive:
857
+ cmd += ' --recursive'
858
+ if output_relative_filenames:
859
+ cmd += ' --output_relative_filenames'
860
+ if include_max_conf:
861
+ cmd += ' --include_max_conf'
862
+ if quiet:
863
+ cmd += ' --quiet'
864
+ if image_size is not None:
865
+ cmd += ' --image_size {}'.format(image_size)
866
+ if use_image_queue:
867
+ cmd += ' --use_image_queue'
868
+ if confidence_threshold is not None:
869
+ cmd += ' --threshold {}'.format(confidence_threshold)
870
+ if checkpoint_frequency is not None:
871
+ cmd += ' --checkpoint_frequency {}'.format(checkpoint_frequency)
872
+ if checkpoint_path is not None:
873
+ cmd += ' --checkpoint_path "{}"'.format(checkpoint_path)
874
+ if resume_from_checkpoint is not None:
875
+ cmd += ' --resume_from_checkpoint "{}"'.format(resume_from_checkpoint)
876
+ if allow_checkpoint_overwrite:
877
+ cmd += ' --allow_checkpoint_overwrite'
878
+ if ncores is not None:
879
+ cmd += ' --ncores {}'.format(ncores)
880
+ if class_mapping_filename is not None:
881
+ cmd += ' --class_mapping_filename "{}"'.format(class_mapping_filename)
882
+ if include_image_size:
883
+ cmd += ' --include_image_size'
884
+ if include_image_timestamp:
885
+ cmd += ' --include_image_timestamp'
886
+ if include_exif_data:
887
+ cmd += ' --include_exif_data'
888
+ if overwrite_handling is not None:
889
+ cmd += ' --overwrite_handling {}'.format(overwrite_handling)
890
+
891
+ print(cmd)
892
+ import clipboard; clipboard.copy(cmd)
893
+
894
+
895
+ #%% Run inference interactively
896
+
897
+ image_file_names = path_utils.find_images(image_dir, recursive=False)
898
+ results = None
899
+
900
+ start_time = time.time()
901
+
902
+ results = load_and_run_detector_batch(model_file=model_file,
903
+ image_file_names=image_file_names,
904
+ checkpoint_path=checkpoint_path,
905
+ confidence_threshold=confidence_threshold,
906
+ checkpoint_frequency=checkpoint_frequency,
907
+ results=results,
908
+ n_cores=ncores,
909
+ use_image_queue=use_image_queue,
910
+ quiet=quiet,
911
+ image_size=image_size)
912
+
913
+ elapsed = time.time() - start_time
914
+
915
+ print('Finished inference in {}'.format(humanfriendly.format_timespan(elapsed)))
916
+
917
+
918
+ #%% Command-line driver
919
+
920
+ def main():
921
+
922
+ parser = argparse.ArgumentParser(
923
+ description='Module to run a TF/PT animal detection model on lots of images')
924
+ parser.add_argument(
925
+ 'detector_file',
926
+ help='Path to detector model file (.pb or .pt). Can also be the strings "MDV4", "MDV5A", or "MDV5B" to request automatic download.')
927
+ parser.add_argument(
928
+ 'image_file',
929
+ help=\
930
+ 'Path to a single image file, a .json or .txt file containing a list of paths to images, or a directory')
931
+ parser.add_argument(
932
+ 'output_file',
933
+ help='Path to output JSON results file, should end with a .json extension')
934
+ parser.add_argument(
935
+ '--recursive',
936
+ action='store_true',
937
+ help='Recurse into directories, only meaningful if image_file points to a directory')
938
+ parser.add_argument(
939
+ '--output_relative_filenames',
940
+ action='store_true',
941
+ help='Output relative file names, only meaningful if image_file points to a directory')
942
+ parser.add_argument(
943
+ '--include_max_conf',
944
+ action='store_true',
945
+ help='Include the "max_detection_conf" field in the output')
946
+ parser.add_argument(
947
+ '--quiet',
948
+ action='store_true',
949
+ help='Suppress per-image console output')
950
+ parser.add_argument(
951
+ '--image_size',
952
+ type=int,
953
+ default=None,
954
+ help=('Force image resizing to a (square) integer size (not recommended to change this)'))
955
+ parser.add_argument(
956
+ '--use_image_queue',
957
+ action='store_true',
958
+ help='Pre-load images, may help keep your GPU busy; does not currently support ' + \
959
+ 'checkpointing. Useful if you have a very fast GPU and a very slow disk.')
960
+ parser.add_argument(
961
+ '--threshold',
962
+ type=float,
963
+ default=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD,
964
+ help="Confidence threshold between 0 and 1.0, don't include boxes below this " + \
965
+ "confidence in the output file. Default is {}".format(
966
+ run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD))
967
+ parser.add_argument(
968
+ '--checkpoint_frequency',
969
+ type=int,
970
+ default=-1,
971
+ help='Write results to a temporary file every N images; default is -1, which ' + \
972
+ 'disables this feature')
973
+ parser.add_argument(
974
+ '--checkpoint_path',
975
+ type=str,
976
+ default=None,
977
+ help='File name to which checkpoints will be written if checkpoint_frequency is > 0, ' + \
978
+ 'defaults to md_checkpoint_[date].json in the same folder as the output file')
979
+ parser.add_argument(
980
+ '--resume_from_checkpoint',
981
+ type=str,
982
+ default=None,
983
+ help='Path to a JSON checkpoint file to resume from, or "auto" to ' + \
984
+ 'find the most recent checkpoint in the same folder as the output file. "auto" uses' + \
985
+ 'checkpoint_path (rather than searching the output folder) if checkpoint_path is specified.')
986
+ parser.add_argument(
987
+ '--allow_checkpoint_overwrite',
988
+ action='store_true',
989
+ help='By default, this script will bail if the specified checkpoint file ' + \
990
+ 'already exists; this option allows it to overwrite existing checkpoints')
991
+ parser.add_argument(
992
+ '--ncores',
993
+ type=int,
994
+ default=0,
995
+ help='Number of cores to use; only applies to CPU-based inference')
996
+ parser.add_argument(
997
+ '--class_mapping_filename',
998
+ type=str,
999
+ default=None,
1000
+ help='Use a non-default class mapping, supplied in a .json file with a dictionary mapping' + \
1001
+ 'int-strings to strings. This will also disable the addition of "1" to all category ' + \
1002
+ 'IDs, so your class mapping should start at zero. Can also be a YOLOv5 dataset.yaml file.')
1003
+ parser.add_argument(
1004
+ '--include_image_size',
1005
+ action='store_true',
1006
+ help='Include image dimensions in output file'
1007
+ )
1008
+ parser.add_argument(
1009
+ '--include_image_timestamp',
1010
+ action='store_true',
1011
+ help='Include image datetime (if available) in output file'
1012
+ )
1013
+ parser.add_argument(
1014
+ '--include_exif_data',
1015
+ action='store_true',
1016
+ help='Include available EXIF data in output file'
1017
+ )
1018
+ parser.add_argument(
1019
+ '--overwrite_handling',
1020
+ type=str,
1021
+ default='overwrite',
1022
+ help='What should we do if the output file exists? overwrite/skip/error (default overwrite)'
1023
+ )
1024
+
1025
+ if len(sys.argv[1:]) == 0:
1026
+ parser.print_help()
1027
+ parser.exit()
1028
+
1029
+ args = parser.parse_args()
1030
+
1031
+ # If the specified detector file is really the name of a known model, find
1032
+ # (and possibly download) that model
1033
+ args.detector_file = try_download_known_detector(args.detector_file)
1034
+
1035
+ assert os.path.exists(args.detector_file), \
1036
+ 'detector file {} does not exist'.format(args.detector_file)
1037
+ assert 0.0 <= args.threshold <= 1.0, 'Confidence threshold needs to be between 0 and 1'
1038
+ assert args.output_file.endswith('.json'), 'output_file specified needs to end with .json'
1039
+ if args.checkpoint_frequency != -1:
1040
+ assert args.checkpoint_frequency > 0, 'Checkpoint_frequency needs to be > 0 or == -1'
1041
+ if args.output_relative_filenames:
1042
+ assert os.path.isdir(args.image_file), \
1043
+ f'Could not find folder {args.image_file}, must supply a folder when ' + \
1044
+ '--output_relative_filenames is set'
1045
+
1046
+ if os.path.exists(args.output_file):
1047
+ if args.overwrite_handling == 'overwrite':
1048
+ print('Warning: output file {} already exists and will be overwritten'.format(
1049
+ args.output_file))
1050
+ elif args.overwrite_handling == 'skip':
1051
+ print('Output file {} exists, returning'.format(
1052
+ args.output_file))
1053
+ return
1054
+ elif args.overwrite_handling == 'error':
1055
+ raise Exception('Output file {} exists'.format(args.output_file))
1056
+ else:
1057
+ raise ValueError('Illegal overwrite handling string {}'.format(args.overwrite_handling))
1058
+
1059
+ output_dir = os.path.dirname(args.output_file)
1060
+
1061
+ if len(output_dir) > 0:
1062
+ os.makedirs(output_dir,exist_ok=True)
1063
+
1064
+ assert not os.path.isdir(args.output_file), 'Specified output file is a directory'
1065
+
1066
+ if args.class_mapping_filename is not None:
1067
+ _load_custom_class_mapping(args.class_mapping_filename)
1068
+
1069
+ # Load the checkpoint if available
1070
+ #
1071
+ # Relative file names are only output at the end; all file paths in the checkpoint are
1072
+ # still absolute paths.
1073
+ if args.resume_from_checkpoint is not None:
1074
+ if args.resume_from_checkpoint == 'auto':
1075
+ checkpoint_files = os.listdir(output_dir)
1076
+ checkpoint_files = [fn for fn in checkpoint_files if \
1077
+ (fn.startswith('md_checkpoint') and fn.endswith('.json'))]
1078
+ if len(checkpoint_files) == 0:
1079
+ raise ValueError('resume_from_checkpoint set to "auto", but no checkpoints found in {}'.format(
1080
+ output_dir))
1081
+ else:
1082
+ if len(checkpoint_files) > 1:
1083
+ print('Warning: found {} checkpoints in {}, using the latest'.format(
1084
+ len(checkpoint_files),output_dir))
1085
+ checkpoint_files = sorted(checkpoint_files)
1086
+ checkpoint_file_relative = checkpoint_files[-1]
1087
+ checkpoint_file = os.path.join(output_dir,checkpoint_file_relative)
1088
+ else:
1089
+ checkpoint_file = args.resume_from_checkpoint
1090
+ assert os.path.exists(checkpoint_file), \
1091
+ 'File at resume_from_checkpoint specified does not exist'
1092
+ with open(checkpoint_file) as f:
1093
+ print('Loading previous results from checkpoint file {}'.format(
1094
+ checkpoint_file))
1095
+ saved = json.load(f)
1096
+ assert 'images' in saved, \
1097
+ 'The checkpoint file does not have the correct fields; cannot be restored'
1098
+ results = saved['images']
1099
+ print('Restored {} entries from the checkpoint'.format(len(results)))
1100
+ else:
1101
+ results = []
1102
+
1103
+ # Find the images to score; images can be a directory, may need to recurse
1104
+ if os.path.isdir(args.image_file):
1105
+ image_file_names = path_utils.find_images(args.image_file, args.recursive)
1106
+ if len(image_file_names) > 0:
1107
+ print('{} image files found in the input directory'.format(len(image_file_names)))
1108
+ else:
1109
+ if (args.recursive):
1110
+ print('No image files found in directory {}, exiting'.format(args.image_file))
1111
+ else:
1112
+ print('No image files found in directory {}, did you mean to specify '
1113
+ '--recursive?'.format(
1114
+ args.image_file))
1115
+ return
1116
+
1117
+ # A json list of image paths
1118
+ elif os.path.isfile(args.image_file) and args.image_file.endswith('.json'):
1119
+ with open(args.image_file) as f:
1120
+ image_file_names = json.load(f)
1121
+ print('Loaded {} image filenames from .json list file {}'.format(
1122
+ len(image_file_names),args.image_file))
1123
+
1124
+ # A text list of image paths
1125
+ elif os.path.isfile(args.image_file) and args.image_file.endswith('.txt'):
1126
+ with open(args.image_file) as f:
1127
+ image_file_names = f.readlines()
1128
+ image_file_names = [fn.strip() for fn in image_file_names if len(fn.strip()) > 0]
1129
+ print('Loaded {} image filenames from .txt list file {}'.format(
1130
+ len(image_file_names),args.image_file))
1131
+
1132
+ # A single image file
1133
+ elif os.path.isfile(args.image_file) and path_utils.is_image_file(args.image_file):
1134
+ image_file_names = [args.image_file]
1135
+ print('Processing image {}'.format(args.image_file))
1136
+
1137
+ else:
1138
+ raise ValueError('image_file specified is not a directory, a json list, or an image file, '
1139
+ '(or does not have recognizable extensions).')
1140
+
1141
+ assert len(image_file_names) > 0, 'Specified image_file does not point to valid image files'
1142
+ assert os.path.exists(image_file_names[0]), \
1143
+ 'The first image to be processed does not exist at {}'.format(image_file_names[0])
1144
+
1145
+ # Test that we can write to the output_file's dir if checkpointing requested
1146
+ if args.checkpoint_frequency != -1:
1147
+
1148
+ if args.checkpoint_path is not None:
1149
+ checkpoint_path = args.checkpoint_path
1150
+ else:
1151
+ checkpoint_path = os.path.join(output_dir,
1152
+ 'md_checkpoint_{}.json'.format(
1153
+ datetime.utcnow().strftime("%Y%m%d%H%M%S")))
1154
+
1155
+ # Don't overwrite existing checkpoint files, this is a sure-fire way to eventually
1156
+ # erase someone's checkpoint.
1157
+ if (checkpoint_path is not None) and (not args.allow_checkpoint_overwrite) \
1158
+ and (args.resume_from_checkpoint is None):
1159
+
1160
+ assert not os.path.isfile(checkpoint_path), \
1161
+ f'Checkpoint path {checkpoint_path} already exists, delete or move it before ' + \
1162
+ 're-using the same checkpoint path, or specify --allow_checkpoint_overwrite'
1163
+
1164
+
1165
+ # Confirm that we can write to the checkpoint path; this avoids issues where
1166
+ # we crash after several thousand images.
1167
+ #
1168
+ # But actually, commenting this out for now... the scenario where we are resuming from a
1169
+ # checkpoint, then immediately overwrite that checkpoint with empty data is higher-risk
1170
+ # than the annoyance of crashing a few minutes after starting a job.
1171
+ if False:
1172
+ with open(checkpoint_path, 'w') as f:
1173
+ json.dump({'images': []}, f)
1174
+
1175
+ print('The checkpoint file will be written to {}'.format(checkpoint_path))
1176
+
1177
+ else:
1178
+
1179
+ if args.checkpoint_path is not None:
1180
+ print('Warning: checkpointing disabled because checkpoint_frequency is -1, ' + \
1181
+ 'but a checkpoint path was specified')
1182
+ checkpoint_path = None
1183
+
1184
+ start_time = time.time()
1185
+
1186
+ results = load_and_run_detector_batch(model_file=args.detector_file,
1187
+ image_file_names=image_file_names,
1188
+ checkpoint_path=checkpoint_path,
1189
+ confidence_threshold=args.threshold,
1190
+ checkpoint_frequency=args.checkpoint_frequency,
1191
+ results=results,
1192
+ n_cores=args.ncores,
1193
+ use_image_queue=args.use_image_queue,
1194
+ quiet=args.quiet,
1195
+ image_size=args.image_size,
1196
+ class_mapping_filename=args.class_mapping_filename,
1197
+ include_image_size=args.include_image_size,
1198
+ include_image_timestamp=args.include_image_timestamp,
1199
+ include_exif_data=args.include_exif_data)
1200
+
1201
+ elapsed = time.time() - start_time
1202
+ images_per_second = len(results) / elapsed
1203
+ print('Finished inference for {} images in {} ({:.2f} images per second)'.format(
1204
+ len(results),humanfriendly.format_timespan(elapsed),images_per_second))
1205
+
1206
+ relative_path_base = None
1207
+ if args.output_relative_filenames:
1208
+ relative_path_base = args.image_file
1209
+ write_results_to_file(results, args.output_file, relative_path_base=relative_path_base,
1210
+ detector_file=args.detector_file,include_max_conf=args.include_max_conf)
1211
+
1212
+ if checkpoint_path and os.path.isfile(checkpoint_path):
1213
+ os.remove(checkpoint_path)
1214
+ print('Deleted checkpoint file {}'.format(checkpoint_path))
1215
+
1216
+ print('Done, thanks for MegaDetect\'ing!')
1217
+
1218
+ if __name__ == '__main__':
1219
+ main()