megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.9.dist-info/RECORD +0 -224
  214. megadetector-5.0.9.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,1219 +0,0 @@
1
- """
2
-
3
- run_detector_batch.py
4
-
5
- Module to run MegaDetector on lots of images, writing the results
6
- to a file in the MegaDetector results format.
7
-
8
- https://github.com/agentmorris/MegaDetector/tree/main/api/batch_processing#megadetector-batch-output-format
9
-
10
- This enables the results to be used in our post-processing pipeline; see
11
- api/batch_processing/postprocessing/postprocess_batch_results.py .
12
-
13
- This script can save results to checkpoints intermittently, in case disaster
14
- strikes. To enable this, set --checkpoint_frequency to n > 0, and results
15
- will be saved as a checkpoint every n images. Checkpoints will be written
16
- to a file in the same directory as the output_file, and after all images
17
- are processed and final results file written to output_file, the temporary
18
- checkpoint file will be deleted. If you want to resume from a checkpoint, set
19
- the checkpoint file's path using --resume_from_checkpoint.
20
-
21
- The `threshold` you can provide as an argument is the confidence threshold above
22
- which detections will be included in the output file.
23
-
24
- Has multiprocessing support for CPUs only; if a GPU is available, it will
25
- use the GPU instead of CPUs, and the --ncores option will be ignored. Checkpointing
26
- is not supported when using a GPU.
27
-
28
- The lack of GPU multiprocessing support might sound annoying, but in practice we
29
- run a gazillion MegaDetector images on multiple GPUs using this script, we just only use
30
- one GPU *per invocation of this script*. Dividing a big batch of images into one chunk
31
- per GPU happens outside of this script.
32
-
33
- Does not have a command-line option to bind the process to a particular GPU, but you can
34
- prepend with "CUDA_VISIBLE_DEVICES=0 ", for example, to bind to GPU 0, e.g.:
35
-
36
- CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
37
-
38
- You can disable GPU processing entirely by setting CUDA_VISIBLE_DEVICES=''.
39
-
40
- """
41
-
42
- #%% Constants, imports, environment
43
-
44
- import argparse
45
- import json
46
- import os
47
- import sys
48
- import time
49
- import copy
50
- import shutil
51
- import warnings
52
- import itertools
53
- import humanfriendly
54
-
55
- from datetime import datetime
56
- from functools import partial
57
- from tqdm import tqdm
58
-
59
- import multiprocessing
60
- from threading import Thread
61
- from multiprocessing import Process, Manager
62
-
63
- # Multiprocessing uses processes, not threads... leaving this here (and commented out)
64
- # to make sure I don't change this casually at some point, it changes a number of
65
- # assumptions about interaction with PyTorch and TF.
66
- # from multiprocessing.pool import ThreadPool as workerpool
67
- from multiprocessing.pool import Pool as workerpool
68
-
69
- import detection.run_detector as run_detector
70
- from detection.run_detector import is_gpu_available,\
71
- load_detector,\
72
- try_download_known_detector,\
73
- get_detector_version_from_filename,\
74
- get_detector_metadata_from_version_string
75
-
76
- from md_utils import path_utils
77
- import md_visualization.visualization_utils as vis_utils
78
- from data_management import read_exif
79
-
80
- # Numpy FutureWarnings from tensorflow import
81
- warnings.filterwarnings('ignore', category=FutureWarning)
82
-
83
- # Number of images to pre-fetch
84
- max_queue_size = 10
85
-
86
- # How often should we print progress when using the image queue?
87
- n_queue_print = 1000
88
-
89
- use_threads_for_queue = False
90
- verbose = False
91
-
92
- exif_options = read_exif.ReadExifOptions()
93
- exif_options.processing_library = 'pil'
94
- exif_options.byte_handling = 'convert_to_string'
95
-
96
-
97
- #%% Support functions for multiprocessing
98
-
99
- def _producer_func(q,image_files):
100
- """
101
- Producer function; only used when using the (optional) image queue.
102
-
103
- Reads up to N images from disk and puts them on the blocking queue for processing.
104
- """
105
-
106
- if verbose:
107
- print('Producer starting'); sys.stdout.flush()
108
-
109
- for im_file in image_files:
110
-
111
- try:
112
- if verbose:
113
- print('Loading image {}'.format(im_file)); sys.stdout.flush()
114
- image = vis_utils.load_image(im_file)
115
- except Exception:
116
- print('Producer process: image {} cannot be loaded.'.format(im_file))
117
- image = run_detector.FAILURE_IMAGE_OPEN
118
-
119
- if verbose:
120
- print('Queueing image {}'.format(im_file)); sys.stdout.flush()
121
- q.put([im_file,image])
122
-
123
- q.put(None)
124
-
125
- print('Finished image loading'); sys.stdout.flush()
126
-
127
-
128
- def _consumer_func(q,return_queue,model_file,confidence_threshold,image_size=None):
129
- """
130
- Consumer function; only used when using the (optional) image queue.
131
-
132
- Pulls images from a blocking queue and processes them.
133
- """
134
-
135
- if verbose:
136
- print('Consumer starting'); sys.stdout.flush()
137
-
138
- start_time = time.time()
139
- detector = load_detector(model_file)
140
- elapsed = time.time() - start_time
141
- print('Loaded model (before queueing) in {}, printing updates every {} images'.format(
142
- humanfriendly.format_timespan(elapsed),n_queue_print))
143
- sys.stdout.flush()
144
-
145
- results = []
146
-
147
- n_images_processed = 0
148
-
149
- while True:
150
- r = q.get()
151
- if r is None:
152
- q.task_done()
153
- return_queue.put(results)
154
- return
155
- n_images_processed += 1
156
- im_file = r[0]
157
- image = r[1]
158
- if verbose or ((n_images_processed % n_queue_print) == 1):
159
- elapsed = time.time() - start_time
160
- images_per_second = n_images_processed / elapsed
161
- print('De-queued image {} ({:.2f}/s) ({})'.format(n_images_processed,
162
- images_per_second,
163
- im_file));
164
- sys.stdout.flush()
165
- if isinstance(image,str):
166
- # This is how the producer function communicates read errors
167
- results.append({'file': im_file,
168
- 'failure': image})
169
- else:
170
- results.append(process_image(im_file=im_file,detector=detector,
171
- confidence_threshold=confidence_threshold,
172
- image=image,quiet=True,image_size=image_size))
173
- if verbose:
174
- print('Processed image {}'.format(im_file)); sys.stdout.flush()
175
- q.task_done()
176
-
177
-
178
- def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
179
- quiet=False,image_size=None):
180
- """
181
- Driver function for the (optional) multiprocessing-based image queue; only used
182
- when --use_image_queue is specified. Starts a reader process to read images from disk, but
183
- processes images in the process from which this function is called (i.e., does not currently
184
- spawn a separate consumer process).
185
-
186
- Args:
187
- image_files (str): list of absolute paths to images
188
- model_file (str): filename or model identifier (e.g. "MDV5A")
189
- confidence_threshold (float): minimum confidence detection to include in
190
- output
191
- quiet (bool, optional): suppress per-image console printouts
192
- image_size (tuple, optional): image size to use for inference, only mess with this
193
- if (a) you're using a model other than MegaDetector or (b) you know what you're
194
- doing
195
-
196
- Returns:
197
- list: list of dicts in the format returned by process_image()
198
- """
199
-
200
- q = multiprocessing.JoinableQueue(max_queue_size)
201
- return_queue = multiprocessing.Queue(1)
202
-
203
- if use_threads_for_queue:
204
- producer = Thread(target=_producer_func,args=(q,image_files,))
205
- else:
206
- producer = Process(target=_producer_func,args=(q,image_files,))
207
- producer.daemon = False
208
- producer.start()
209
-
210
- # The queue system is a little more elegant if we start one thread for reading and one
211
- # for processing, and this works fine on Windows, but because we import TF at module load,
212
- # CUDA will only work in the main process, so currently the consumer function runs here.
213
- #
214
- # To enable proper multi-GPU support, we may need to move the TF import to a separate module
215
- # that isn't loaded until very close to where inference actually happens.
216
- run_separate_consumer_process = False
217
-
218
- if run_separate_consumer_process:
219
- if use_threads_for_queue:
220
- consumer = Thread(target=_consumer_func,args=(q,return_queue,model_file,
221
- confidence_threshold,image_size,))
222
- else:
223
- consumer = Process(target=_consumer_func,args=(q,return_queue,model_file,
224
- confidence_threshold,image_size,))
225
- consumer.daemon = True
226
- consumer.start()
227
- else:
228
- _consumer_func(q,return_queue,model_file,confidence_threshold,image_size)
229
-
230
- producer.join()
231
- print('Producer finished')
232
-
233
- if run_separate_consumer_process:
234
- consumer.join()
235
- print('Consumer finished')
236
-
237
- q.join()
238
- print('Queue joined')
239
-
240
- results = return_queue.get()
241
-
242
- return results
243
-
244
-
245
- #%% Other support functions
246
-
247
- def _chunks_by_number_of_chunks(ls, n):
248
- """
249
- Splits a list into n even chunks.
250
-
251
- External callers should use ct_utils.split_list_into_n_chunks().
252
-
253
- Args:
254
- ls (list): list to break up into chunks
255
- n (int): number of chunks
256
- """
257
-
258
- for i in range(0, n):
259
- yield ls[i::n]
260
-
261
-
262
- #%% Image processing functions
263
-
264
- def process_images(im_files, detector, confidence_threshold, use_image_queue=False,
265
- quiet=False, image_size=None, checkpoint_queue=None,
266
- include_image_size=False, include_image_timestamp=False,
267
- include_exif_data=False):
268
- """
269
- Runs a detector (typically MegaDetector) over a list of image files.
270
- As of 3/2024, this entry point is used when the image queue is enabled, but not in the
271
- standard inference path (which instead loops over process_image()).
272
-
273
- Args:
274
- im_files (list: paths to image files
275
- detector (str or detector object): loaded model or str; if this is a string, it can be a
276
- path to a .pb/.pt model file or a known model identifier (e.g. "MDV5A")
277
- confidence_threshold (float): only detections above this threshold are returned
278
- use_image_queue (bool, optional): separate image loading onto a dedicated worker process
279
- quiet (bool, optional): suppress per-image printouts
280
- image_size (tuple, optional): image size to use for inference, only mess with this
281
- if (a) you're using a model other than MegaDetector or (b) you know what you're
282
- doing
283
- checkpoint_queue (Queue, optional): internal parameter used to pass image queues around
284
- include_image_size (bool, optional): should we include image size in the output for each image?
285
- include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
286
- include_exif_data (bool, optional): should we include EXIF data in the output for each image?
287
-
288
- Returns:
289
- list: list of dicts, in which each dict represents detections on one image,
290
- see the 'images' key in https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
291
- """
292
-
293
- if isinstance(detector, str):
294
- start_time = time.time()
295
- detector = load_detector(detector)
296
- elapsed = time.time() - start_time
297
- print('Loaded model (batch level) in {}'.format(humanfriendly.format_timespan(elapsed)))
298
-
299
- if use_image_queue:
300
- run_detector_with_image_queue(im_files, detector, confidence_threshold,
301
- quiet=quiet, image_size=image_size,
302
- include_image_size=include_image_size,
303
- include_image_timestamp=include_image_timestamp,
304
- include_exif_data=include_exif_data)
305
- else:
306
- results = []
307
- for im_file in im_files:
308
- result = process_image(im_file, detector, confidence_threshold,
309
- quiet=quiet, image_size=image_size,
310
- include_image_size=include_image_size,
311
- include_image_timestamp=include_image_timestamp,
312
- include_exif_data=include_exif_data)
313
-
314
- if checkpoint_queue is not None:
315
- checkpoint_queue.put(result)
316
- results.append(result)
317
-
318
- return results
319
-
320
- # ...def process_images(...)
321
-
322
-
323
- def process_image(im_file, detector, confidence_threshold, image=None,
324
- quiet=False, image_size=None, include_image_size=False,
325
- include_image_timestamp=False, include_exif_data=False,
326
- skip_image_resizing=False):
327
- """
328
- Runs a detector (typically MegaDetector) on a single image file.
329
-
330
- Args:
331
- im_file (str): path to image file
332
- detector (detector object): loaded model, this can no longer be a string by the time
333
- you get this far down the pipeline
334
- confidence_threshold (float): only detections above this threshold are returned
335
- image (Image, optional): previously-loaded image, if available, used when a worker
336
- thread is handling image loads
337
- quiet (bool, optional): suppress per-image printouts
338
- image_size (tuple, optional): image size to use for inference, only mess with this
339
- if (a) you're using a model other than MegaDetector or (b) you know what you're
340
- doing
341
- include_image_size (bool, optional): should we include image size in the output for each image?
342
- include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
343
- include_exif_data (bool, optional): should we include EXIF data in the output for each image?
344
- skip_image_resizing (bool, optional): whether to skip internal image resizing and rely on external resizing
345
-
346
- Returns:
347
- dict: dict representing detections on one image,
348
- see the 'images' key in
349
- https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
350
- """
351
-
352
- if not quiet:
353
- print('Processing image {}'.format(im_file))
354
-
355
- if image is None:
356
- try:
357
- image = vis_utils.load_image(im_file)
358
- except Exception as e:
359
- if not quiet:
360
- print('Image {} cannot be loaded. Exception: {}'.format(im_file, e))
361
- result = {
362
- 'file': im_file,
363
- 'failure': run_detector.FAILURE_IMAGE_OPEN
364
- }
365
- return result
366
-
367
- try:
368
- result = detector.generate_detections_one_image(
369
- image, im_file, detection_threshold=confidence_threshold, image_size=image_size,
370
- skip_image_resizing=skip_image_resizing)
371
- except Exception as e:
372
- if not quiet:
373
- print('Image {} cannot be processed. Exception: {}'.format(im_file, e))
374
- result = {
375
- 'file': im_file,
376
- 'failure': run_detector.FAILURE_INFER
377
- }
378
- return result
379
-
380
- if include_image_size:
381
- result['width'] = image.width
382
- result['height'] = image.height
383
-
384
- if include_image_timestamp:
385
- result['datetime'] = get_image_datetime(image)
386
-
387
- if include_exif_data:
388
- result['exif_metadata'] = read_exif.read_pil_exif(image,exif_options)
389
-
390
- return result
391
-
392
- # ...def process_image(...)
393
-
394
-
395
- def _load_custom_class_mapping(class_mapping_filename):
396
- """
397
- This is an experimental hack to allow the use of non-MD YOLOv5 models through
398
- the same infrastructure; it disables the code that enforces MDv5-like class lists.
399
-
400
- Should be a .json file that maps int-strings to strings, or a YOLOv5 dataset.yaml file.
401
- """
402
-
403
- if class_mapping_filename is None:
404
- return
405
-
406
- run_detector.USE_MODEL_NATIVE_CLASSES = True
407
- if class_mapping_filename.endswith('.json'):
408
- with open(class_mapping_filename,'r') as f:
409
- class_mapping = json.load(f)
410
- elif (class_mapping_filename.endswith('.yml') or class_mapping_filename.endswith('.yaml')):
411
- from data_management.yolo_output_to_md_output import read_classes_from_yolo_dataset_file
412
- class_mapping = read_classes_from_yolo_dataset_file(class_mapping_filename)
413
- # convert from ints to int-strings
414
- class_mapping = {str(k):v for k,v in class_mapping.items()}
415
- else:
416
- raise ValueError('Unrecognized class mapping file {}'.format(class_mapping_filename))
417
-
418
- print('Loaded custom class mapping:')
419
- print(class_mapping)
420
- run_detector.DEFAULT_DETECTOR_LABEL_MAP = class_mapping
421
- return class_mapping
422
-
423
-
424
- #%% Main function
425
-
426
- def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=None,
427
- confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD,
428
- checkpoint_frequency=-1, results=None, n_cores=1,
429
- use_image_queue=False, quiet=False, image_size=None,
430
- class_mapping_filename=None, include_image_size=False,
431
- include_image_timestamp=False, include_exif_data=False):
432
- """
433
- Load a model file and run it on a list of images.
434
-
435
- Args:
436
-
437
- model_file (str): path to model file, or supported model string (e.g. "MDV5A")
438
- image_file_names (list or str): list of strings (image filenames), a single image filename,
439
- a folder to recursively search for images in, or a .json or .txt file containing a list
440
- of images.
441
- checkpoint_path (str, optional), path to use for checkpoints (if None, checkpointing
442
- is disabled)
443
- confidence_threshold (float, optional): only detections above this threshold are returned
444
- checkpoint_frequency (int, optional): int, write results to JSON checkpoint file every N
445
- images, -1 disabled checkpointing
446
- results (list, optional): list of dicts, existing results loaded from checkpoint; generally
447
- not useful if you're using this function outside of the CLI
448
- n_cores (int, optional): number of parallel worker to use, ignored if we're running on a GPU
449
- use_image_queue (bool, optional): use a dedicated worker for image loading
450
- quiet (bool, optional): disable per-image console output
451
- image_size (tuple, optional): image size to use for inference, only mess with this
452
- if (a) you're using a model other than MegaDetector or (b) you know what you're
453
- doing
454
- class_mapping_filename (str, optional), use a non-default class mapping supplied in a .json
455
- file or YOLOv5 dataset.yaml file
456
- include_image_size (bool, optional): should we include image size in the output for each image?
457
- include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
458
- include_exif_data (bool, optional): should we include EXIF data in the output for each image?
459
-
460
- Returns:
461
- results: list of dicts; each dict represents detections on one image
462
- """
463
-
464
- # Validate input arguments
465
- if n_cores is None:
466
- n_cores = 1
467
-
468
- if confidence_threshold is None:
469
- confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
470
-
471
- # Disable checkpointing if checkpoint_path is None
472
- if checkpoint_frequency is None or checkpoint_path is None:
473
- checkpoint_frequency = -1
474
-
475
- if class_mapping_filename is not None:
476
- _load_custom_class_mapping(class_mapping_filename)
477
-
478
- # Handle the case where image_file_names is not yet actually a list
479
- if isinstance(image_file_names,str):
480
-
481
- # Find the images to score; images can be a directory, may need to recurse
482
- if os.path.isdir(image_file_names):
483
- image_dir = image_file_names
484
- image_file_names = path_utils.find_images(image_dir, True)
485
- print('{} image files found in folder {}'.format(len(image_file_names),image_dir))
486
-
487
- # A single file, or a list of image paths
488
- elif os.path.isfile(image_file_names):
489
- list_file = image_file_names
490
- if image_file_names.endswith('.json'):
491
- with open(list_file,'r') as f:
492
- image_file_names = json.load(f)
493
- print('Loaded {} image filenames from .json list file {}'.format(
494
- len(image_file_names),list_file))
495
- elif image_file_names.endswith('.txt'):
496
- with open(list_file,'r') as f:
497
- image_file_names = f.readlines()
498
- image_file_names = [s.strip() for s in image_file_names if len(s.strip()) > 0]
499
- print('Loaded {} image filenames from .txt list file {}'.format(
500
- len(image_file_names),list_file))
501
- elif path_utils.is_image_file(image_file_names):
502
- image_file_names = [image_file_names]
503
- print('Processing image {}'.format(image_file_names[0]))
504
- else:
505
- raise ValueError(
506
- 'File {} supplied as [image_file_names] argument, but extension is neither .json nor .txt'\
507
- .format(
508
- list_file))
509
- else:
510
- raise ValueError(
511
- '{} supplied as [image_file_names] argument, but it does not appear to be a file or folder'.format(
512
- image_file_names))
513
-
514
- if results is None:
515
- results = []
516
-
517
- already_processed = set([i['file'] for i in results])
518
-
519
- model_file = try_download_known_detector(model_file)
520
-
521
- print('GPU available: {}'.format(is_gpu_available(model_file)))
522
-
523
- if n_cores > 1 and is_gpu_available(model_file):
524
-
525
- print('Warning: multiple cores requested, but a GPU is available; parallelization across ' + \
526
- 'GPUs is not currently supported, defaulting to one GPU')
527
- n_cores = 1
528
-
529
- if n_cores > 1 and use_image_queue:
530
-
531
- print('Warning: multiple cores requested, but the image queue is enabled; parallelization ' + \
532
- 'with the image queue is not currently supported, defaulting to one worker')
533
- n_cores = 1
534
-
535
- if use_image_queue:
536
-
537
- assert checkpoint_frequency < 0, \
538
- 'Using an image queue is not currently supported when checkpointing is enabled'
539
- assert len(results) == 0, \
540
- 'Using an image queue with results loaded from a checkpoint is not currently supported'
541
- assert n_cores <= 1
542
- results = run_detector_with_image_queue(image_file_names, model_file,
543
- confidence_threshold, quiet,
544
- image_size=image_size)
545
-
546
- elif n_cores <= 1:
547
-
548
- # Load the detector
549
- start_time = time.time()
550
- detector = load_detector(model_file)
551
- elapsed = time.time() - start_time
552
- print('Loaded model in {}'.format(humanfriendly.format_timespan(elapsed)))
553
-
554
- # This is only used for console reporting, so it's OK that it doesn't
555
- # include images we might have loaded from a previous checkpoint
556
- count = 0
557
-
558
- for im_file in tqdm(image_file_names):
559
-
560
- # Will not add additional entries not in the starter checkpoint
561
- if im_file in already_processed:
562
- if not quiet:
563
- print('Bypassing image {}'.format(im_file))
564
- continue
565
-
566
- count += 1
567
-
568
- result = process_image(im_file, detector,
569
- confidence_threshold, quiet=quiet,
570
- image_size=image_size, include_image_size=include_image_size,
571
- include_image_timestamp=include_image_timestamp,
572
- include_exif_data=include_exif_data)
573
- results.append(result)
574
-
575
- # Write a checkpoint if necessary
576
- if (checkpoint_frequency != -1) and ((count % checkpoint_frequency) == 0):
577
-
578
- print('Writing a new checkpoint after having processed {} images since '
579
- 'last restart'.format(count))
580
-
581
- _write_checkpoint(checkpoint_path, results)
582
-
583
- else:
584
-
585
- # Multiprocessing is enabled at this point
586
-
587
- # When using multiprocessing, tell the workers to load the model on each
588
- # process, by passing the model_file string as the "model" argument to
589
- # process_images.
590
- detector = model_file
591
-
592
- print('Creating pool with {} cores'.format(n_cores))
593
-
594
- if len(already_processed) > 0:
595
- n_images_all = len(image_file_names)
596
- image_file_names = [fn for fn in image_file_names if fn not in already_processed]
597
- print('Loaded {} of {} images from checkpoint'.format(
598
- len(already_processed),n_images_all))
599
-
600
- # Divide images into chunks; we'll send one chunk to each worker process
601
- image_batches = list(_chunks_by_number_of_chunks(image_file_names, n_cores))
602
-
603
- pool = workerpool(n_cores)
604
-
605
- if checkpoint_path is not None:
606
-
607
- # Multiprocessing and checkpointing are both enabled at this point
608
-
609
- checkpoint_queue = Manager().Queue()
610
-
611
- # Pass the "results" array (which may already contain images loaded from an existing
612
- # checkpoint) to the checkpoint queue handler function, which will append results to
613
- # the list as they become available.
614
- checkpoint_thread = Thread(target=_checkpoint_queue_handler,
615
- args=(checkpoint_path, checkpoint_frequency,
616
- checkpoint_queue, results), daemon=True)
617
- checkpoint_thread.start()
618
-
619
- pool.map(partial(process_images, detector=detector,
620
- confidence_threshold=confidence_threshold,
621
- image_size=image_size,
622
- include_image_size=include_image_size,
623
- include_image_timestamp=include_image_timestamp,
624
- include_exif_data=include_exif_data,
625
- checkpoint_queue=checkpoint_queue),
626
- image_batches)
627
-
628
- checkpoint_queue.put(None)
629
-
630
- else:
631
-
632
- # Multprocessing is enabled, but checkpointing is not
633
-
634
- new_results = pool.map(partial(process_images, detector=detector,
635
- confidence_threshold=confidence_threshold,image_size=image_size,
636
- include_image_size=include_image_size,
637
- include_image_timestamp=include_image_timestamp,
638
- include_exif_data=include_exif_data),
639
- image_batches)
640
-
641
- new_results = list(itertools.chain.from_iterable(new_results))
642
-
643
- # Append the results we just computed to "results", which is *usually* empty, but will
644
- # be non-empty if we resumed from a checkpoint
645
- results += new_results
646
-
647
- # ...if checkpointing is/isn't enabled
648
-
649
- # ...if we're running (1) with image queue, (2) on one core, (3) on multiple cores
650
-
651
- # 'results' may have been modified in place, but we also return it for
652
- # backwards-compatibility.
653
- return results
654
-
655
- # ...def load_and_run_detector_batch(...)
656
-
657
-
658
- def _checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_queue, results):
659
- """
660
- Thread function to accumulate results and write checkpoints when checkpointing and
661
- multiprocessing are both enabled.
662
- """
663
-
664
- result_count = 0
665
- while True:
666
- result = checkpoint_queue.get()
667
- if result is None:
668
- break
669
-
670
- result_count +=1
671
- results.append(result)
672
-
673
- if (checkpoint_frequency != -1) and (result_count % checkpoint_frequency == 0):
674
-
675
- print('Writing a new checkpoint after having processed {} images since '
676
- 'last restart'.format(result_count))
677
-
678
- _write_checkpoint(checkpoint_path, results)
679
-
680
-
681
- def _write_checkpoint(checkpoint_path, results):
682
- """
683
- Writes the 'images' field in the dict 'results' to a json checkpoint file.
684
- """
685
-
686
- assert checkpoint_path is not None
687
-
688
- # Back up any previous checkpoints, to protect against crashes while we're writing
689
- # the checkpoint file.
690
- checkpoint_tmp_path = None
691
- if os.path.isfile(checkpoint_path):
692
- checkpoint_tmp_path = checkpoint_path + '_tmp'
693
- shutil.copyfile(checkpoint_path,checkpoint_tmp_path)
694
-
695
- # Write the new checkpoint
696
- with open(checkpoint_path, 'w') as f:
697
- json.dump({'images': results}, f, indent=1, default=str)
698
-
699
- # Remove the backup checkpoint if it exists
700
- if checkpoint_tmp_path is not None:
701
- os.remove(checkpoint_tmp_path)
702
-
703
-
704
- def get_image_datetime(image):
705
- """
706
- Reads EXIF datetime from a PIL Image object.
707
-
708
- Args:
709
- image (Image): the PIL Image object from which we should read datetime information
710
-
711
- Returns:
712
- str: the EXIF datetime from [image] (a PIL Image object), if available, as a string;
713
- returns None if EXIF datetime is not available.
714
- """
715
-
716
- exif_tags = read_exif.read_pil_exif(image,exif_options)
717
-
718
- try:
719
- datetime_str = exif_tags['DateTimeOriginal']
720
- _ = time.strptime(datetime_str, '%Y:%m:%d %H:%M:%S')
721
- return datetime_str
722
-
723
- except Exception:
724
- return None
725
-
726
-
727
- def write_results_to_file(results, output_file, relative_path_base=None,
728
- detector_file=None, info=None, include_max_conf=False,
729
- custom_metadata=None, force_forward_slashes=True):
730
- """
731
- Writes list of detection results to JSON output file. Format matches:
732
-
733
- https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
734
-
735
- Args:
736
- results (list): list of dict, each dict represents detections on one image
737
- output_file (str): path to JSON output file, should end in '.json'
738
- relative_path_base (str, optional): path to a directory as the base for relative paths, can
739
- be None if the paths in [results] are absolute
740
- detector_file (str, optional): filename of the detector used to generate these results, only
741
- used to pull out a version number for the "info" field
742
- info (dict, optional): dictionary to put in the results file instead of the default "info" field
743
- include_max_conf (bool, optional): old files (version 1.2 and earlier) included a "max_conf" field
744
- in each image; this was removed in version 1.3. Set this flag to force the inclusion
745
- of this field.
746
- custom_metadata (object, optional): additional data to include as info['custom_metadata']; typically
747
- a dictionary, but no type/format checks are performed
748
- force_forward_slashes (bool, optional): convert all slashes in filenames within [results] to
749
- forward slashes
750
-
751
- Returns:
752
- dict: the MD-formatted dictionary that was written to [output_file]
753
- """
754
-
755
- if relative_path_base is not None:
756
- results_relative = []
757
- for r in results:
758
- r_relative = copy.copy(r)
759
- r_relative['file'] = os.path.relpath(r_relative['file'], start=relative_path_base)
760
- results_relative.append(r_relative)
761
- results = results_relative
762
-
763
- if force_forward_slashes:
764
- results_converted = []
765
- for r in results:
766
- r_converted = copy.copy(r)
767
- r_converted['file'] = r_converted['file'].replace('\\','/')
768
- results_converted.append(r_converted)
769
- results = results_converted
770
-
771
- # The typical case: we need to build the 'info' struct
772
- if info is None:
773
-
774
- info = {
775
- 'detection_completion_time': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'),
776
- 'format_version': '1.3'
777
- }
778
-
779
- if detector_file is not None:
780
- detector_filename = os.path.basename(detector_file)
781
- detector_version = get_detector_version_from_filename(detector_filename)
782
- detector_metadata = get_detector_metadata_from_version_string(detector_version)
783
- info['detector'] = detector_filename
784
- info['detector_metadata'] = detector_metadata
785
- else:
786
- info['detector'] = 'unknown'
787
- info['detector_metadata'] = get_detector_metadata_from_version_string('unknown')
788
-
789
- # If the caller supplied the entire "info" struct
790
- else:
791
-
792
- if detector_file is not None:
793
-
794
- print('Warning (write_results_to_file): info struct and detector file ' + \
795
- 'supplied, ignoring detector file')
796
-
797
- if custom_metadata is not None:
798
- info['custom_metadata'] = custom_metadata
799
-
800
- # The 'max_detection_conf' field used to be included by default, and it caused all kinds
801
- # of headaches, so it's no longer included unless the user explicitly requests it.
802
- if not include_max_conf:
803
- for im in results:
804
- if 'max_detection_conf' in im:
805
- del im['max_detection_conf']
806
-
807
- final_output = {
808
- 'images': results,
809
- 'detection_categories': run_detector.DEFAULT_DETECTOR_LABEL_MAP,
810
- 'info': info
811
- }
812
-
813
- with open(output_file, 'w') as f:
814
- json.dump(final_output, f, indent=1, default=str)
815
- print('Output file saved at {}'.format(output_file))
816
-
817
- return final_output
818
-
819
- # ...def write_results_to_file(...)
820
-
821
-
822
- #%% Interactive driver
823
-
824
- if False:
825
-
826
- pass
827
-
828
- #%%
829
-
830
- model_file = 'MDV5A'
831
- image_dir = r'g:\camera_traps\camera_trap_images'
832
- output_file = r'g:\temp\md-test.json'
833
-
834
- recursive = True
835
- output_relative_filenames = True
836
- include_max_conf = False
837
- quiet = True
838
- image_size = None
839
- use_image_queue = False
840
- confidence_threshold = 0.0001
841
- checkpoint_frequency = 5
842
- checkpoint_path = None
843
- resume_from_checkpoint = 'auto'
844
- allow_checkpoint_overwrite = False
845
- ncores = 1
846
- class_mapping_filename = None
847
- include_image_size = True
848
- include_image_timestamp = True
849
- include_exif_data = True
850
- overwrite_handling = None
851
-
852
- # Generate a command line
853
- cmd = 'python run_detector_batch.py "{}" "{}" "{}"'.format(
854
- model_file,image_dir,output_file)
855
-
856
- if recursive:
857
- cmd += ' --recursive'
858
- if output_relative_filenames:
859
- cmd += ' --output_relative_filenames'
860
- if include_max_conf:
861
- cmd += ' --include_max_conf'
862
- if quiet:
863
- cmd += ' --quiet'
864
- if image_size is not None:
865
- cmd += ' --image_size {}'.format(image_size)
866
- if use_image_queue:
867
- cmd += ' --use_image_queue'
868
- if confidence_threshold is not None:
869
- cmd += ' --threshold {}'.format(confidence_threshold)
870
- if checkpoint_frequency is not None:
871
- cmd += ' --checkpoint_frequency {}'.format(checkpoint_frequency)
872
- if checkpoint_path is not None:
873
- cmd += ' --checkpoint_path "{}"'.format(checkpoint_path)
874
- if resume_from_checkpoint is not None:
875
- cmd += ' --resume_from_checkpoint "{}"'.format(resume_from_checkpoint)
876
- if allow_checkpoint_overwrite:
877
- cmd += ' --allow_checkpoint_overwrite'
878
- if ncores is not None:
879
- cmd += ' --ncores {}'.format(ncores)
880
- if class_mapping_filename is not None:
881
- cmd += ' --class_mapping_filename "{}"'.format(class_mapping_filename)
882
- if include_image_size:
883
- cmd += ' --include_image_size'
884
- if include_image_timestamp:
885
- cmd += ' --include_image_timestamp'
886
- if include_exif_data:
887
- cmd += ' --include_exif_data'
888
- if overwrite_handling is not None:
889
- cmd += ' --overwrite_handling {}'.format(overwrite_handling)
890
-
891
- print(cmd)
892
- import clipboard; clipboard.copy(cmd)
893
-
894
-
895
- #%% Run inference interactively
896
-
897
- image_file_names = path_utils.find_images(image_dir, recursive=False)
898
- results = None
899
-
900
- start_time = time.time()
901
-
902
- results = load_and_run_detector_batch(model_file=model_file,
903
- image_file_names=image_file_names,
904
- checkpoint_path=checkpoint_path,
905
- confidence_threshold=confidence_threshold,
906
- checkpoint_frequency=checkpoint_frequency,
907
- results=results,
908
- n_cores=ncores,
909
- use_image_queue=use_image_queue,
910
- quiet=quiet,
911
- image_size=image_size)
912
-
913
- elapsed = time.time() - start_time
914
-
915
- print('Finished inference in {}'.format(humanfriendly.format_timespan(elapsed)))
916
-
917
-
918
- #%% Command-line driver
919
-
920
- def main():
921
-
922
- parser = argparse.ArgumentParser(
923
- description='Module to run a TF/PT animal detection model on lots of images')
924
- parser.add_argument(
925
- 'detector_file',
926
- help='Path to detector model file (.pb or .pt). Can also be the strings "MDV4", "MDV5A", or "MDV5B" to request automatic download.')
927
- parser.add_argument(
928
- 'image_file',
929
- help=\
930
- 'Path to a single image file, a .json or .txt file containing a list of paths to images, or a directory')
931
- parser.add_argument(
932
- 'output_file',
933
- help='Path to output JSON results file, should end with a .json extension')
934
- parser.add_argument(
935
- '--recursive',
936
- action='store_true',
937
- help='Recurse into directories, only meaningful if image_file points to a directory')
938
- parser.add_argument(
939
- '--output_relative_filenames',
940
- action='store_true',
941
- help='Output relative file names, only meaningful if image_file points to a directory')
942
- parser.add_argument(
943
- '--include_max_conf',
944
- action='store_true',
945
- help='Include the "max_detection_conf" field in the output')
946
- parser.add_argument(
947
- '--quiet',
948
- action='store_true',
949
- help='Suppress per-image console output')
950
- parser.add_argument(
951
- '--image_size',
952
- type=int,
953
- default=None,
954
- help=('Force image resizing to a (square) integer size (not recommended to change this)'))
955
- parser.add_argument(
956
- '--use_image_queue',
957
- action='store_true',
958
- help='Pre-load images, may help keep your GPU busy; does not currently support ' + \
959
- 'checkpointing. Useful if you have a very fast GPU and a very slow disk.')
960
- parser.add_argument(
961
- '--threshold',
962
- type=float,
963
- default=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD,
964
- help="Confidence threshold between 0 and 1.0, don't include boxes below this " + \
965
- "confidence in the output file. Default is {}".format(
966
- run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD))
967
- parser.add_argument(
968
- '--checkpoint_frequency',
969
- type=int,
970
- default=-1,
971
- help='Write results to a temporary file every N images; default is -1, which ' + \
972
- 'disables this feature')
973
- parser.add_argument(
974
- '--checkpoint_path',
975
- type=str,
976
- default=None,
977
- help='File name to which checkpoints will be written if checkpoint_frequency is > 0, ' + \
978
- 'defaults to md_checkpoint_[date].json in the same folder as the output file')
979
- parser.add_argument(
980
- '--resume_from_checkpoint',
981
- type=str,
982
- default=None,
983
- help='Path to a JSON checkpoint file to resume from, or "auto" to ' + \
984
- 'find the most recent checkpoint in the same folder as the output file. "auto" uses' + \
985
- 'checkpoint_path (rather than searching the output folder) if checkpoint_path is specified.')
986
- parser.add_argument(
987
- '--allow_checkpoint_overwrite',
988
- action='store_true',
989
- help='By default, this script will bail if the specified checkpoint file ' + \
990
- 'already exists; this option allows it to overwrite existing checkpoints')
991
- parser.add_argument(
992
- '--ncores',
993
- type=int,
994
- default=0,
995
- help='Number of cores to use; only applies to CPU-based inference')
996
- parser.add_argument(
997
- '--class_mapping_filename',
998
- type=str,
999
- default=None,
1000
- help='Use a non-default class mapping, supplied in a .json file with a dictionary mapping' + \
1001
- 'int-strings to strings. This will also disable the addition of "1" to all category ' + \
1002
- 'IDs, so your class mapping should start at zero. Can also be a YOLOv5 dataset.yaml file.')
1003
- parser.add_argument(
1004
- '--include_image_size',
1005
- action='store_true',
1006
- help='Include image dimensions in output file'
1007
- )
1008
- parser.add_argument(
1009
- '--include_image_timestamp',
1010
- action='store_true',
1011
- help='Include image datetime (if available) in output file'
1012
- )
1013
- parser.add_argument(
1014
- '--include_exif_data',
1015
- action='store_true',
1016
- help='Include available EXIF data in output file'
1017
- )
1018
- parser.add_argument(
1019
- '--overwrite_handling',
1020
- type=str,
1021
- default='overwrite',
1022
- help='What should we do if the output file exists? overwrite/skip/error (default overwrite)'
1023
- )
1024
-
1025
- if len(sys.argv[1:]) == 0:
1026
- parser.print_help()
1027
- parser.exit()
1028
-
1029
- args = parser.parse_args()
1030
-
1031
- # If the specified detector file is really the name of a known model, find
1032
- # (and possibly download) that model
1033
- args.detector_file = try_download_known_detector(args.detector_file)
1034
-
1035
- assert os.path.exists(args.detector_file), \
1036
- 'detector file {} does not exist'.format(args.detector_file)
1037
- assert 0.0 <= args.threshold <= 1.0, 'Confidence threshold needs to be between 0 and 1'
1038
- assert args.output_file.endswith('.json'), 'output_file specified needs to end with .json'
1039
- if args.checkpoint_frequency != -1:
1040
- assert args.checkpoint_frequency > 0, 'Checkpoint_frequency needs to be > 0 or == -1'
1041
- if args.output_relative_filenames:
1042
- assert os.path.isdir(args.image_file), \
1043
- f'Could not find folder {args.image_file}, must supply a folder when ' + \
1044
- '--output_relative_filenames is set'
1045
-
1046
- if os.path.exists(args.output_file):
1047
- if args.overwrite_handling == 'overwrite':
1048
- print('Warning: output file {} already exists and will be overwritten'.format(
1049
- args.output_file))
1050
- elif args.overwrite_handling == 'skip':
1051
- print('Output file {} exists, returning'.format(
1052
- args.output_file))
1053
- return
1054
- elif args.overwrite_handling == 'error':
1055
- raise Exception('Output file {} exists'.format(args.output_file))
1056
- else:
1057
- raise ValueError('Illegal overwrite handling string {}'.format(args.overwrite_handling))
1058
-
1059
- output_dir = os.path.dirname(args.output_file)
1060
-
1061
- if len(output_dir) > 0:
1062
- os.makedirs(output_dir,exist_ok=True)
1063
-
1064
- assert not os.path.isdir(args.output_file), 'Specified output file is a directory'
1065
-
1066
- if args.class_mapping_filename is not None:
1067
- _load_custom_class_mapping(args.class_mapping_filename)
1068
-
1069
- # Load the checkpoint if available
1070
- #
1071
- # Relative file names are only output at the end; all file paths in the checkpoint are
1072
- # still absolute paths.
1073
- if args.resume_from_checkpoint is not None:
1074
- if args.resume_from_checkpoint == 'auto':
1075
- checkpoint_files = os.listdir(output_dir)
1076
- checkpoint_files = [fn for fn in checkpoint_files if \
1077
- (fn.startswith('md_checkpoint') and fn.endswith('.json'))]
1078
- if len(checkpoint_files) == 0:
1079
- raise ValueError('resume_from_checkpoint set to "auto", but no checkpoints found in {}'.format(
1080
- output_dir))
1081
- else:
1082
- if len(checkpoint_files) > 1:
1083
- print('Warning: found {} checkpoints in {}, using the latest'.format(
1084
- len(checkpoint_files),output_dir))
1085
- checkpoint_files = sorted(checkpoint_files)
1086
- checkpoint_file_relative = checkpoint_files[-1]
1087
- checkpoint_file = os.path.join(output_dir,checkpoint_file_relative)
1088
- else:
1089
- checkpoint_file = args.resume_from_checkpoint
1090
- assert os.path.exists(checkpoint_file), \
1091
- 'File at resume_from_checkpoint specified does not exist'
1092
- with open(checkpoint_file) as f:
1093
- print('Loading previous results from checkpoint file {}'.format(
1094
- checkpoint_file))
1095
- saved = json.load(f)
1096
- assert 'images' in saved, \
1097
- 'The checkpoint file does not have the correct fields; cannot be restored'
1098
- results = saved['images']
1099
- print('Restored {} entries from the checkpoint'.format(len(results)))
1100
- else:
1101
- results = []
1102
-
1103
- # Find the images to score; images can be a directory, may need to recurse
1104
- if os.path.isdir(args.image_file):
1105
- image_file_names = path_utils.find_images(args.image_file, args.recursive)
1106
- if len(image_file_names) > 0:
1107
- print('{} image files found in the input directory'.format(len(image_file_names)))
1108
- else:
1109
- if (args.recursive):
1110
- print('No image files found in directory {}, exiting'.format(args.image_file))
1111
- else:
1112
- print('No image files found in directory {}, did you mean to specify '
1113
- '--recursive?'.format(
1114
- args.image_file))
1115
- return
1116
-
1117
- # A json list of image paths
1118
- elif os.path.isfile(args.image_file) and args.image_file.endswith('.json'):
1119
- with open(args.image_file) as f:
1120
- image_file_names = json.load(f)
1121
- print('Loaded {} image filenames from .json list file {}'.format(
1122
- len(image_file_names),args.image_file))
1123
-
1124
- # A text list of image paths
1125
- elif os.path.isfile(args.image_file) and args.image_file.endswith('.txt'):
1126
- with open(args.image_file) as f:
1127
- image_file_names = f.readlines()
1128
- image_file_names = [fn.strip() for fn in image_file_names if len(fn.strip()) > 0]
1129
- print('Loaded {} image filenames from .txt list file {}'.format(
1130
- len(image_file_names),args.image_file))
1131
-
1132
- # A single image file
1133
- elif os.path.isfile(args.image_file) and path_utils.is_image_file(args.image_file):
1134
- image_file_names = [args.image_file]
1135
- print('Processing image {}'.format(args.image_file))
1136
-
1137
- else:
1138
- raise ValueError('image_file specified is not a directory, a json list, or an image file, '
1139
- '(or does not have recognizable extensions).')
1140
-
1141
- assert len(image_file_names) > 0, 'Specified image_file does not point to valid image files'
1142
- assert os.path.exists(image_file_names[0]), \
1143
- 'The first image to be processed does not exist at {}'.format(image_file_names[0])
1144
-
1145
- # Test that we can write to the output_file's dir if checkpointing requested
1146
- if args.checkpoint_frequency != -1:
1147
-
1148
- if args.checkpoint_path is not None:
1149
- checkpoint_path = args.checkpoint_path
1150
- else:
1151
- checkpoint_path = os.path.join(output_dir,
1152
- 'md_checkpoint_{}.json'.format(
1153
- datetime.utcnow().strftime("%Y%m%d%H%M%S")))
1154
-
1155
- # Don't overwrite existing checkpoint files, this is a sure-fire way to eventually
1156
- # erase someone's checkpoint.
1157
- if (checkpoint_path is not None) and (not args.allow_checkpoint_overwrite) \
1158
- and (args.resume_from_checkpoint is None):
1159
-
1160
- assert not os.path.isfile(checkpoint_path), \
1161
- f'Checkpoint path {checkpoint_path} already exists, delete or move it before ' + \
1162
- 're-using the same checkpoint path, or specify --allow_checkpoint_overwrite'
1163
-
1164
-
1165
- # Confirm that we can write to the checkpoint path; this avoids issues where
1166
- # we crash after several thousand images.
1167
- #
1168
- # But actually, commenting this out for now... the scenario where we are resuming from a
1169
- # checkpoint, then immediately overwrite that checkpoint with empty data is higher-risk
1170
- # than the annoyance of crashing a few minutes after starting a job.
1171
- if False:
1172
- with open(checkpoint_path, 'w') as f:
1173
- json.dump({'images': []}, f)
1174
-
1175
- print('The checkpoint file will be written to {}'.format(checkpoint_path))
1176
-
1177
- else:
1178
-
1179
- if args.checkpoint_path is not None:
1180
- print('Warning: checkpointing disabled because checkpoint_frequency is -1, ' + \
1181
- 'but a checkpoint path was specified')
1182
- checkpoint_path = None
1183
-
1184
- start_time = time.time()
1185
-
1186
- results = load_and_run_detector_batch(model_file=args.detector_file,
1187
- image_file_names=image_file_names,
1188
- checkpoint_path=checkpoint_path,
1189
- confidence_threshold=args.threshold,
1190
- checkpoint_frequency=args.checkpoint_frequency,
1191
- results=results,
1192
- n_cores=args.ncores,
1193
- use_image_queue=args.use_image_queue,
1194
- quiet=args.quiet,
1195
- image_size=args.image_size,
1196
- class_mapping_filename=args.class_mapping_filename,
1197
- include_image_size=args.include_image_size,
1198
- include_image_timestamp=args.include_image_timestamp,
1199
- include_exif_data=args.include_exif_data)
1200
-
1201
- elapsed = time.time() - start_time
1202
- images_per_second = len(results) / elapsed
1203
- print('Finished inference for {} images in {} ({:.2f} images per second)'.format(
1204
- len(results),humanfriendly.format_timespan(elapsed),images_per_second))
1205
-
1206
- relative_path_base = None
1207
- if args.output_relative_filenames:
1208
- relative_path_base = args.image_file
1209
- write_results_to_file(results, args.output_file, relative_path_base=relative_path_base,
1210
- detector_file=args.detector_file,include_max_conf=args.include_max_conf)
1211
-
1212
- if checkpoint_path and os.path.isfile(checkpoint_path):
1213
- os.remove(checkpoint_path)
1214
- print('Deleted checkpoint file {}'.format(checkpoint_path))
1215
-
1216
- print('Done, thanks for MegaDetect\'ing!')
1217
-
1218
- if __name__ == '__main__':
1219
- main()